From ff14e1d04a6ceb6550e38499a25fe28fb5ff7f61 Mon Sep 17 00:00:00 2001 From: an-lee Date: Wed, 24 Jan 2024 15:05:56 +0800 Subject: [PATCH] Fix: record without ffmpeg & others (#201) * remove ffmpeg transcode when save recording * fix model download * trancode record audio in renderer * fix transcribe dead loop when whisper not working * force to select a model * check model before transcribe --- enjoy/src/i18n/en.json | 1 + enjoy/src/i18n/zh-CN.json | 1 + enjoy/src/main/db/models/recording.ts | 17 +++++------- enjoy/src/main/downloader.ts | 6 ++++- enjoy/src/main/whisper.ts | 12 +++++++++ .../components/medias/media-transcription.tsx | 10 +++---- .../src/renderer/components/record-button.tsx | 26 +++++++++++++++++-- .../components/whisper-model-options.tsx | 2 +- enjoy/src/renderer/pages/landing.tsx | 2 +- 9 files changed, 55 insertions(+), 22 deletions(-) diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json index f9483a31..602fb329 100644 --- a/enjoy/src/i18n/en.json +++ b/enjoy/src/i18n/en.json @@ -295,6 +295,7 @@ "language": "Language", "sttAiModel": "STT AI model", "checkingWhisper": "Checking whisper status", + "pleaseDownloadWhisperModelFirst": "Please download whisper model first", "whisperIsWorkingGood": "Whisper is working good", "whisperIsNotWorking": "Whisper is not working", "checkingWhisperModel": "Checking whisper model", diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json index 33600938..984802ce 100644 --- a/enjoy/src/i18n/zh-CN.json +++ b/enjoy/src/i18n/zh-CN.json @@ -294,6 +294,7 @@ "language": "语言", "sttAiModel": "语音转文本 AI 模型", "checkingWhisper": "正在检查 Whisper", + "pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型", "whisperIsWorkingGood": "Whisper 正常工作", "whisperIsNotWorking": "Whisper 无法正常工作,请尝试更换模型后重试,或联系开发者", "checkingWhisperModel": "正在检查 Whisper 模型", diff --git a/enjoy/src/main/db/models/recording.ts b/enjoy/src/main/db/models/recording.ts index 03cc5307..452d63e5 100644 --- a/enjoy/src/main/db/models/recording.ts +++ b/enjoy/src/main/db/models/recording.ts @@ -22,7 +22,6 @@ import settings from "@main/settings"; import { hashFile } from "@/utils"; import log from "electron-log/main"; import storage from "@main/storage"; -import Ffmpeg from "@main/ffmpeg"; import { Client } from "@/api"; import { WEB_API_URL } from "@/constants"; import { AzureSpeechSdk } from "@main/azure-speech-sdk"; @@ -283,20 +282,16 @@ export class Recording extends Model { params; const format = blob.type.split("/")[1]; - const tempfile = path.join(settings.cachePath(), `${Date.now()}.${format}`); - await fs.outputFile(tempfile, Buffer.from(blob.arrayBuffer)); - const wavFile = path.join( + const file = path.join( settings.userDataPath(), "recordings", - `${Date.now()}.wav` + `${Date.now()}.${format}` ); + await fs.outputFile(file, Buffer.from(blob.arrayBuffer)); - const ffmpeg = new Ffmpeg(); - await ffmpeg.convertToWav(tempfile, wavFile); - - const md5 = await hashFile(wavFile, { algo: "md5" }); - const filename = `${md5}.wav`; - fs.renameSync(wavFile, path.join(path.dirname(wavFile), filename)); + const md5 = await hashFile(file, { algo: "md5" }); + const filename = `${md5}.${format}`; + fs.renameSync(file, path.join(path.dirname(file), filename)); return this.create( { diff --git a/enjoy/src/main/downloader.ts b/enjoy/src/main/downloader.ts index 01577156..20252a44 100644 --- a/enjoy/src/main/downloader.ts +++ b/enjoy/src/main/downloader.ts @@ -24,7 +24,11 @@ class Downloader { webContents.downloadURL(url); webContents.session.on("will-download", (_event, item, _webContents) => { if (savePath) { - item.setSavePath(savePath); + if (fs.statSync(savePath).isDirectory()) { + item.setSavePath(path.join(savePath, item.getFilename())); + } else { + item.setSavePath(savePath); + } } else { item.setSavePath( path.join(app.getPath("downloads"), item.getFilename()) diff --git a/enjoy/src/main/whisper.ts b/enjoy/src/main/whisper.ts index afa43414..b939f0e9 100644 --- a/enjoy/src/main/whisper.ts +++ b/enjoy/src/main/whisper.ts @@ -5,6 +5,7 @@ import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants"; import { exec } from "child_process"; import fs from "fs-extra"; import log from "electron-log/main"; +import { t } from "i18next"; const logger = log.scope("whisper"); const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."]; @@ -18,6 +19,13 @@ class Whipser { } currentModel() { + if (!this.config.availableModels) return; + if (!this.config.model) { + const model = this.config.availableModels[0]; + settings.setSync("whisper.model", this.config.availableModels[0].name); + return model.savePath; + } + return (this.config.availableModels || []).find( (m) => m.name === this.config.model )?.savePath; @@ -177,6 +185,10 @@ class Whipser { return fs.readJson(outputFile); } + if (!this.currentModel()) { + throw new Error(t("pleaseDownloadWhisperModelFirst")); + } + const command = [ `"${this.binMain}"`, `--file "${file}"`, diff --git a/enjoy/src/renderer/components/medias/media-transcription.tsx b/enjoy/src/renderer/components/medias/media-transcription.tsx index 68b71f26..30497464 100644 --- a/enjoy/src/renderer/components/medias/media-transcription.tsx +++ b/enjoy/src/renderer/components/medias/media-transcription.tsx @@ -112,6 +112,10 @@ export const MediaTranscription = (props: { addDblistener(fetchSegmentStats); fetchSegmentStats(); + if (transcription?.state == "pending") { + generate(); + } + return () => { removeDbListener(fetchSegmentStats); }; @@ -126,12 +130,6 @@ export const MediaTranscription = (props: { } as ScrollIntoViewOptions); }, [currentSegmentIndex, transcription]); - useEffect(() => { - if (transcription?.state !== "pending") return; - - generate(); - }, [transcription]); - if (!transcription) return (
diff --git a/enjoy/src/renderer/components/record-button.tsx b/enjoy/src/renderer/components/record-button.tsx index 474f7380..49f0ee38 100644 --- a/enjoy/src/renderer/components/record-button.tsx +++ b/enjoy/src/renderer/components/record-button.tsx @@ -7,6 +7,7 @@ import WaveSurfer from "wavesurfer.js"; import { cn } from "@renderer/lib/utils"; import { RadialProgress, toast } from "@renderer/components/ui"; import { useHotkeys } from "react-hotkeys-hook"; +import { fetchFile } from "@ffmpeg/util"; export const RecordButton = (props: { className?: string; @@ -117,6 +118,26 @@ const RecordButtonPopover = (props: { onRecordEnd: (blob: Blob, duration: number) => void; }) => { const containerRef = useRef(); + const { ffmpeg } = useContext(AppSettingsProviderContext); + + const transcode = async (blob: Blob) => { + const input = `input.${blob.type.split("/")[1]}`; + const output = input.replace(/\.[^/.]+$/, ".wav"); + await ffmpeg.writeFile(input, await fetchFile(blob)); + await ffmpeg.exec([ + "-i", + input, + "-ar", + "16000", + "-ac", + "1", + "-c:a", + "pcm_s16le", + output, + ]); + const data = await ffmpeg.readFile(output); + return new Blob([data], { type: "audio/wav" }); + }; useEffect(() => { if (!containerRef.current) return; @@ -136,9 +157,10 @@ const RecordButtonPopover = (props: { startAt = Date.now(); }); - record.on("record-end", (blob: Blob) => { + record.on("record-end", async (blob: Blob) => { const duration = Date.now() - startAt; - props.onRecordEnd(blob, duration); + const output = await transcode(blob); + props.onRecordEnd(output, duration); }); RecordPlugin.getAvailableAudioDevices() diff --git a/enjoy/src/renderer/components/whisper-model-options.tsx b/enjoy/src/renderer/components/whisper-model-options.tsx index 7bfbc148..d50ec1be 100644 --- a/enjoy/src/renderer/components/whisper-model-options.tsx +++ b/enjoy/src/renderer/components/whisper-model-options.tsx @@ -132,7 +132,7 @@ export const WhisperModelOptions = () => { if (state.state === "completed") { model.downloaded = true; setWhisperModel(model.name); - } else if (state.state === "cancelled") { + } else if (state.state === "cancelled" || state.state === "interrupted") { model.downloaded = false; model.downloadState = null; } diff --git a/enjoy/src/renderer/pages/landing.tsx b/enjoy/src/renderer/pages/landing.tsx index 6c2dffe5..5b79f27b 100644 --- a/enjoy/src/renderer/pages/landing.tsx +++ b/enjoy/src/renderer/pages/landing.tsx @@ -32,7 +32,7 @@ export default () => { setCurrentStepValid(!!libraryPath); break; case 3: - setCurrentStepValid(true); + setCurrentStepValid(Boolean(whisperConfig.model)); break; case 4: setCurrentStepValid(initialized);