# -*- coding: utf-8 -*-
import logging
import os
import shutil
import threading
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from pydub import AudioSegment
from pydub.silence import split_on_silence

# 创建一个锁对象
file_write_lock = threading.Lock()


# 配置日志
def configure_logging():
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # 文件日志处理器
    file_handler = logging.FileHandler('audio_transcription.log')
    file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(file_formatter)
    logger.addHandler(file_handler)

    # 控制台日志处理器
    console_handler = logging.StreamHandler()
    console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(console_formatter)
    logger.addHandler(console_handler)


configure_logging()


def process_audio_file(audio_file_path):
    file_name_with_extension = os.path.basename(audio_file_path)
    file_name_without_extension = os.path.splitext(file_name_with_extension)[0]
    logging.info(f"Starting processing {file_name_with_extension}")

    try:
        # audio_file = AudioSegment.from_mp3(audio_file_path)
        audio_file = AudioSegment.from_wav(audio_file_path)
        audio_chunks = split_on_silence(audio_file, min_silence_len=500, silence_thresh=-40)

        # 处理超过30秒的音频片段
        new_audio_chunks = []
        for chunk in audio_chunks:
            if len(chunk) > 60000:  # 60秒转换为毫秒
                new_audio_chunks.extend(split_on_silence(chunk, min_silence_len=250, silence_thresh=-40))
            else:
                new_audio_chunks.append(chunk)

        url = "https://api.siliconflow.cn/v1/audio/transcriptions"
        headers = {
            "Authorization": "Bearer sk-lakndqcjlmtukekcliwkkryaxquifduhvzgcnlhofzvofllv"
        }

        sentences = []  # 用于存储所有句子的列表

        def send_request(chunk, index):
            try:
                audio_part_path = os.path.join('media', f"{file_name_without_extension}_chunk_{index}.wav")
                chunk.export(audio_part_path, format="wav")
                logging.info(f'Exported chunk file {audio_part_path} for {file_name_with_extension}')

                multipart_form_data = {
                    'file': (os.path.basename(audio_part_path), open(audio_part_path, 'rb')),
                    'model': (None, 'FunAudioLLM/SenseVoiceSmall')
                }

                response = requests.post(url, files=multipart_form_data, headers=headers)
                result = response.json()
                text = result["text"]
                print(text)
                return index, text  # 返回索引和文本
            except Exception as e:
                logging.error(f'Error processing {file_name_with_extension}, chunk {index}: {str(e)}')
                # 将出错的音频片段复制到error文件夹
                error_dir = os.path.join(os.getcwd(), 'media', 'error')
                if not os.path.exists(error_dir):
                    os.makedirs(error_dir)
                error_path = os.path.join(error_dir, f"{file_name_without_extension}_chunk_{index}.wav")
                shutil.copy(audio_part_path, error_path)
                logging.error(f'Copied request failed chunk file {file_name_with_extension} to {error_path}')
                return index, ""  # 返回空文本
            finally:
                if os.path.exists(audio_part_path):
                    os.remove(audio_part_path)

        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = {executor.submit(send_request, chunk, i): i for i, chunk in enumerate(new_audio_chunks)}

            for future in as_completed(futures):
                index = futures[future]
                try:
                    result = future.result()
                    sentences.append(result)  # 存储索引和文本
                except Exception as exc:
                    logging.error(f'Request {index} for {file_name_with_extension} generated an exception: {str(exc)}')

        # 按照音频的顺序写入Markdown文件
        markdown_content = ""
        for sentence in sorted(sentences, key=lambda x: x[0]):  # 根据索引排序
            markdown_content += f"{sentence[1]}\n\n"

        with file_write_lock:  # 确保文件写入操作的线程安全
            md_file_path = os.path.join('media', file_name_without_extension + '.md')
            with open(md_file_path, "w", encoding="utf-8") as f:
                f.write(markdown_content)

        logging.info(f"Finished processing {file_name_with_extension}")
    except Exception as e:
        logging.error(f"Failed to process {file_name_with_extension}: {str(e)}")


def main():
    all_files = os.listdir('media')
    audio_files = [file for file in all_files if file.endswith('.wav')]
    print(audio_files)

    with ThreadPoolExecutor(max_workers=5) as executor:
        for audio_file in audio_files:
            audio_file_path = os.path.join('media', audio_file)
            executor.submit(process_audio_file, audio_file_path)


if __name__ == "__main__":
    main()