From ff14e1d04a6ceb6550e38499a25fe28fb5ff7f61 Mon Sep 17 00:00:00 2001
From: an-lee <an.lee.work@gmail.com>
Date: Wed, 24 Jan 2024 15:05:56 +0800
Subject: [PATCH] Fix: record without ffmpeg & others (#201)

* remove ffmpeg transcode when save recording

* fix model download

* trancode record audio in renderer

* fix transcribe dead loop when whisper not working

* force to select a model

* check model before transcribe
---
 enjoy/src/i18n/en.json                        |  1 +
 enjoy/src/i18n/zh-CN.json                     |  1 +
 enjoy/src/main/db/models/recording.ts         | 17 +++++-------
 enjoy/src/main/downloader.ts                  |  6 ++++-
 enjoy/src/main/whisper.ts                     | 12 +++++++++
 .../components/medias/media-transcription.tsx | 10 +++----
 .../src/renderer/components/record-button.tsx | 26 +++++++++++++++++--
 .../components/whisper-model-options.tsx      |  2 +-
 enjoy/src/renderer/pages/landing.tsx          |  2 +-
 9 files changed, 55 insertions(+), 22 deletions(-)
diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json
index f9483a31..602fb329 100644
--- a/enjoy/src/i18n/en.json
+++ b/enjoy/src/i18n/en.json
@@ -295,6 +295,7 @@
   "language": "Language",
   "sttAiModel": "STT AI model",
   "checkingWhisper": "Checking whisper status",
+  "pleaseDownloadWhisperModelFirst": "Please download whisper model first",
   "whisperIsWorkingGood": "Whisper is working good",
   "whisperIsNotWorking": "Whisper is not working",
   "checkingWhisperModel": "Checking whisper model",
diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json
index 33600938..984802ce 100644
--- a/enjoy/src/i18n/zh-CN.json
+++ b/enjoy/src/i18n/zh-CN.json
@@ -294,6 +294,7 @@
   "language": "语言",
   "sttAiModel": "语音转文本 AI 模型",
   "checkingWhisper": "正在检查 Whisper",
+  "pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型",
   "whisperIsWorkingGood": "Whisper 正常工作",
   "whisperIsNotWorking": "Whisper 无法正常工作，请尝试更换模型后重试，或联系开发者",
   "checkingWhisperModel": "正在检查 Whisper 模型",
diff --git a/enjoy/src/main/db/models/recording.ts b/enjoy/src/main/db/models/recording.ts
index 03cc5307..452d63e5 100644
--- a/enjoy/src/main/db/models/recording.ts
+++ b/enjoy/src/main/db/models/recording.ts
@@ -22,7 +22,6 @@ import settings from "@main/settings";
 import { hashFile } from "@/utils";
 import log from "electron-log/main";
 import storage from "@main/storage";
-import Ffmpeg from "@main/ffmpeg";
 import { Client } from "@/api";
 import { WEB_API_URL } from "@/constants";
 import { AzureSpeechSdk } from "@main/azure-speech-sdk";
@@ -283,20 +282,16 @@ export class Recording extends Model<Recording> {
       params;
 
     const format = blob.type.split("/")[1];
-    const tempfile = path.join(settings.cachePath(), `${Date.now()}.${format}`);
-    await fs.outputFile(tempfile, Buffer.from(blob.arrayBuffer));
-    const wavFile = path.join(
+    const file = path.join(
       settings.userDataPath(),
       "recordings",
-      `${Date.now()}.wav`
+      `${Date.now()}.${format}`
     );
+    await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
 
-    const ffmpeg = new Ffmpeg();
-    await ffmpeg.convertToWav(tempfile, wavFile);
-
-    const md5 = await hashFile(wavFile, { algo: "md5" });
-    const filename = `${md5}.wav`;
-    fs.renameSync(wavFile, path.join(path.dirname(wavFile), filename));
+    const md5 = await hashFile(file, { algo: "md5" });
+    const filename = `${md5}.${format}`;
+    fs.renameSync(file, path.join(path.dirname(file), filename));
 
     return this.create(
       {
diff --git a/enjoy/src/main/downloader.ts b/enjoy/src/main/downloader.ts
index 01577156..20252a44 100644
--- a/enjoy/src/main/downloader.ts
+++ b/enjoy/src/main/downloader.ts
@@ -24,7 +24,11 @@ class Downloader {
       webContents.downloadURL(url);
       webContents.session.on("will-download", (_event, item, _webContents) => {
         if (savePath) {
-          item.setSavePath(savePath);
+          if (fs.statSync(savePath).isDirectory()) {
+            item.setSavePath(path.join(savePath, item.getFilename()));
+          } else {
+            item.setSavePath(savePath);
+          }
         } else {
           item.setSavePath(
             path.join(app.getPath("downloads"), item.getFilename())
diff --git a/enjoy/src/main/whisper.ts b/enjoy/src/main/whisper.ts
index afa43414..b939f0e9 100644
--- a/enjoy/src/main/whisper.ts
+++ b/enjoy/src/main/whisper.ts
@@ -5,6 +5,7 @@ import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants";
 import { exec } from "child_process";
 import fs from "fs-extra";
 import log from "electron-log/main";
+import { t } from "i18next";
 
 const logger = log.scope("whisper");
 const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
@@ -18,6 +19,13 @@ class Whipser {
   }
 
   currentModel() {
+    if (!this.config.availableModels) return;
+    if (!this.config.model) {
+      const model = this.config.availableModels[0];
+      settings.setSync("whisper.model", this.config.availableModels[0].name);
+      return model.savePath;
+    }
+
     return (this.config.availableModels || []).find(
       (m) => m.name === this.config.model
     )?.savePath;
@@ -177,6 +185,10 @@ class Whipser {
       return fs.readJson(outputFile);
     }
 
+    if (!this.currentModel()) {
+      throw new Error(t("pleaseDownloadWhisperModelFirst"));
+    }
+
     const command = [
       `"${this.binMain}"`,
       `--file "${file}"`,
diff --git a/enjoy/src/renderer/components/medias/media-transcription.tsx b/enjoy/src/renderer/components/medias/media-transcription.tsx
index 68b71f26..30497464 100644
--- a/enjoy/src/renderer/components/medias/media-transcription.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription.tsx
@@ -112,6 +112,10 @@ export const MediaTranscription = (props: {
     addDblistener(fetchSegmentStats);
     fetchSegmentStats();
 
+    if (transcription?.state == "pending") {
+      generate();
+    }
+
     return () => {
       removeDbListener(fetchSegmentStats);
     };
@@ -126,12 +130,6 @@ export const MediaTranscription = (props: {
       } as ScrollIntoViewOptions);
   }, [currentSegmentIndex, transcription]);
 
-  useEffect(() => {
-    if (transcription?.state !== "pending") return;
-
-    generate();
-  }, [transcription]);
-
   if (!transcription)
     return (
       <div className="p-4 w-full">
diff --git a/enjoy/src/renderer/components/record-button.tsx b/enjoy/src/renderer/components/record-button.tsx
index 474f7380..49f0ee38 100644
--- a/enjoy/src/renderer/components/record-button.tsx
+++ b/enjoy/src/renderer/components/record-button.tsx
@@ -7,6 +7,7 @@ import WaveSurfer from "wavesurfer.js";
 import { cn } from "@renderer/lib/utils";
 import { RadialProgress, toast } from "@renderer/components/ui";
 import { useHotkeys } from "react-hotkeys-hook";
+import { fetchFile } from "@ffmpeg/util";
 
 export const RecordButton = (props: {
   className?: string;
@@ -117,6 +118,26 @@ const RecordButtonPopover = (props: {
   onRecordEnd: (blob: Blob, duration: number) => void;
 }) => {
   const containerRef = useRef<HTMLDivElement>();
+  const { ffmpeg } = useContext(AppSettingsProviderContext);
+
+  const transcode = async (blob: Blob) => {
+    const input = `input.${blob.type.split("/")[1]}`;
+    const output = input.replace(/\.[^/.]+$/, ".wav");
+    await ffmpeg.writeFile(input, await fetchFile(blob));
+    await ffmpeg.exec([
+      "-i",
+      input,
+      "-ar",
+      "16000",
+      "-ac",
+      "1",
+      "-c:a",
+      "pcm_s16le",
+      output,
+    ]);
+    const data = await ffmpeg.readFile(output);
+    return new Blob([data], { type: "audio/wav" });
+  };
 
   useEffect(() => {
     if (!containerRef.current) return;
@@ -136,9 +157,10 @@ const RecordButtonPopover = (props: {
       startAt = Date.now();
     });
 
-    record.on("record-end", (blob: Blob) => {
+    record.on("record-end", async (blob: Blob) => {
       const duration = Date.now() - startAt;
-      props.onRecordEnd(blob, duration);
+      const output = await transcode(blob);
+      props.onRecordEnd(output, duration);
     });
 
     RecordPlugin.getAvailableAudioDevices()
diff --git a/enjoy/src/renderer/components/whisper-model-options.tsx b/enjoy/src/renderer/components/whisper-model-options.tsx
index 7bfbc148..d50ec1be 100644
--- a/enjoy/src/renderer/components/whisper-model-options.tsx
+++ b/enjoy/src/renderer/components/whisper-model-options.tsx
@@ -132,7 +132,7 @@ export const WhisperModelOptions = () => {
       if (state.state === "completed") {
         model.downloaded = true;
         setWhisperModel(model.name);
-      } else if (state.state === "cancelled") {
+      } else if (state.state === "cancelled" || state.state === "interrupted") {
         model.downloaded = false;
         model.downloadState = null;
       }
diff --git a/enjoy/src/renderer/pages/landing.tsx b/enjoy/src/renderer/pages/landing.tsx
index 6c2dffe5..5b79f27b 100644
--- a/enjoy/src/renderer/pages/landing.tsx
+++ b/enjoy/src/renderer/pages/landing.tsx
@@ -32,7 +32,7 @@ export default () => {
         setCurrentStepValid(!!libraryPath);
         break;
       case 3:
-        setCurrentStepValid(true);
+        setCurrentStepValid(Boolean(whisperConfig.model));
         break;
       case 4:
         setCurrentStepValid(initialized);