songyi/sense_voice_process.py

import os
import shutil
from concurrent.futures.thread import ThreadPoolExecutor
import logging
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
from mpmath import convert


def configure_logging():
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)

    # 文件日志处理器
    file_handler = logging.FileHandler('audio_transcription.log')
    file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(file_formatter)
    logger.addHandler(file_handler)

    # 控制台日志处理器
    console_handler = logging.StreamHandler()
    console_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(console_formatter)
    logger.addHandler(console_handler)


configure_logging()

use_remote_api = False
process_workers = 5 if use_remote_api else 1

conerted = False

model_dir = "iic/SenseVoiceSmall"

model = AutoModel(
    model=model_dir,
    trust_remote_code=True,
    remote_code="./model.py",
    vad_model="fsmn-vad",
    vad_kwargs={"max_single_segment_time": 30000},
    device="cuda:0",
)

import os
from pydub import AudioSegment


def mp3_to_wav(mp3_path):
    """
    将 MP3 文件转换为 WAV 格式，保存到同一目录下，返回 WAV 文件路径。

    参数:
        mp3_path (str): 原始 MP3 文件的路径。

    返回:
        str: 转换后的 WAV 文件路径。
    """
    # 检查文件是否存在
    if not os.path.isfile(mp3_path):
        raise FileNotFoundError(f"文件未找到: {mp3_path}")

    # 获取文件所在目录和文件名（不含扩展名）
    folder = os.path.dirname(mp3_path)
    filename_wo_ext = os.path.splitext(os.path.basename(mp3_path))[0]

    # 构造 WAV 文件路径
    wav_path = os.path.join(folder, f"{filename_wo_ext}.wav")

    # 读取 MP3 并导出为 WAV
    sound = AudioSegment.from_mp3(mp3_path)
    sound.export(wav_path, format="wav")

    return wav_path

def short_audio_process(audio_file_path):
    print("logging file name:", audio_file_path)
    if audio_file_path.endswith(".mp3"):
        wav_path = mp3_to_wav(audio_file_path)
        conerted = True
    else:
        wav_path = audio_file_path
    res = model.generate(
        input=wav_path,
        cache={},
        language="auto",  # "zh", "en", "yue", "ja", "ko", "nospeech"
        use_itn=True,
        batch_size_s=60,
        merge_vad=True,  #
        merge_length_s=15,
    )
    text = rich_transcription_postprocess(res[0]["text"])
    if conerted:
        os.remove(wav_path)
    return text


def process_audio_file(audio_file_path):
    file_name_with_extension = os.path.basename(audio_file_path)
    file_name_without_extension = os.path.splitext(file_name_with_extension)[0]
    logging.info(f"Starting processing {file_name_with_extension}")
    # 获取 WAV 文件所在的目录
    wav_dir = os.path.dirname(audio_file_path)
    # 获取 MP4 文件的文件名（不包含扩展名）
    wav_filename = os.path.splitext(os.path.basename(audio_file_path))[0]
    # 生成对应的 WAV 文件路径
    md_file = os.path.join(wav_dir, f"{wav_filename}.md")

    # en
    res = model.generate(
        input=audio_file_path,
        cache={},
        language="auto",  # "zh", "en", "yue", "ja", "ko", "nospeech"
        use_itn=True,
        batch_size_s=60,
        merge_vad=True,  #
        merge_length_s=15,
    )
    text = rich_transcription_postprocess(res[0]["text"])
    print(text)
    # 按照音频的顺序写入Markdown文件
    markdown_content = ""
    markdown_content = text

    # with file_write_lock:  # 确保文件写入操作的线程安全
    # md_file_path = os.path.join('media', file_name_without_extension + '.md')
    with open(md_file, "w", encoding="utf-8") as f:
        f.write(markdown_content)


def main():
    # all_files = os.listdir('media')
    # audio_files = [file for file in all_files if file.endswith('.wav')]
    audio_files = []
    for root, dirs, files in os.walk('media'):
        for file in files:
            if file.endswith('.wav'):
                audio_files.append(os.path.join(root, file))
    print(audio_files)

    with ThreadPoolExecutor(max_workers=process_workers) as executor:
        for audio_file in audio_files:
            audio_file_path = os.path.join(audio_file)
            executor.submit(process_audio_file, audio_file_path)


if __name__ == "__main__":
    main()