Fix: record without ffmpeg & others (#201)

* remove ffmpeg transcode when save recording * fix model download * trancode record audio in renderer * fix transcribe dead loop when whisper not working * force to select a model * check model before transcribe
2024-01-24 15:05:56 +08:00
parent e9fece9280
commit ff14e1d04a
9 changed files with 55 additions and 22 deletions
--- a/enjoy/src/i18n/en.json
+++ b/enjoy/src/i18n/en.json
@@ -295,6 +295,7 @@
  "language": "Language",
  "sttAiModel": "STT AI model",
  "checkingWhisper": "Checking whisper status",
+  "pleaseDownloadWhisperModelFirst": "Please download whisper model first",
  "whisperIsWorkingGood": "Whisper is working good",
  "whisperIsNotWorking": "Whisper is not working",
  "checkingWhisperModel": "Checking whisper model",
--- a/enjoy/src/i18n/zh-CN.json
+++ b/enjoy/src/i18n/zh-CN.json
@@ -294,6 +294,7 @@
  "language": "语言",
  "sttAiModel": "语音转文本 AI 模型",
  "checkingWhisper": "正在检查 Whisper",
+  "pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型",
  "whisperIsWorkingGood": "Whisper 正常工作",
  "whisperIsNotWorking": "Whisper 无法正常工作，请尝试更换模型后重试，或联系开发者",
  "checkingWhisperModel": "正在检查 Whisper 模型",
--- a/enjoy/src/main/db/models/recording.ts
+++ b/enjoy/src/main/db/models/recording.ts
@@ -22,7 +22,6 @@ import settings from "@main/settings";
 import { hashFile } from "@/utils";
 import log from "electron-log/main";
 import storage from "@main/storage";
-import Ffmpeg from "@main/ffmpeg";
 import { Client } from "@/api";
 import { WEB_API_URL } from "@/constants";
 import { AzureSpeechSdk } from "@main/azure-speech-sdk";
@@ -283,20 +282,16 @@ export class Recording extends Model<Recording> {
      params;

    const format = blob.type.split("/")[1];
-    const tempfile = path.join(settings.cachePath(), `${Date.now()}.${format}`);
-    await fs.outputFile(tempfile, Buffer.from(blob.arrayBuffer));
-    const wavFile = path.join(
+    const file = path.join(
      settings.userDataPath(),
      "recordings",
-      `${Date.now()}.wav`
+      `${Date.now()}.${format}`
    );
+    await fs.outputFile(file, Buffer.from(blob.arrayBuffer));

-    const ffmpeg = new Ffmpeg();
-    await ffmpeg.convertToWav(tempfile, wavFile);
-
-    const md5 = await hashFile(wavFile, { algo: "md5" });
-    const filename = `${md5}.wav`;
-    fs.renameSync(wavFile, path.join(path.dirname(wavFile), filename));
+    const md5 = await hashFile(file, { algo: "md5" });
+    const filename = `${md5}.${format}`;
+    fs.renameSync(file, path.join(path.dirname(file), filename));

    return this.create(
      {
--- a/enjoy/src/main/downloader.ts
+++ b/enjoy/src/main/downloader.ts
@@ -24,7 +24,11 @@ class Downloader {
      webContents.downloadURL(url);
      webContents.session.on("will-download", (_event, item, _webContents) => {
        if (savePath) {
-          item.setSavePath(savePath);
+          if (fs.statSync(savePath).isDirectory()) {
+            item.setSavePath(path.join(savePath, item.getFilename()));
+          } else {
+            item.setSavePath(savePath);
+          }
        } else {
          item.setSavePath(
            path.join(app.getPath("downloads"), item.getFilename())
--- a/enjoy/src/main/whisper.ts
+++ b/enjoy/src/main/whisper.ts
@@ -5,6 +5,7 @@ import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants";
 import { exec } from "child_process";
 import fs from "fs-extra";
 import log from "electron-log/main";
+import { t } from "i18next";

 const logger = log.scope("whisper");
 const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
@@ -18,6 +19,13 @@ class Whipser {
  }

  currentModel() {
+    if (!this.config.availableModels) return;
+    if (!this.config.model) {
+      const model = this.config.availableModels[0];
+      settings.setSync("whisper.model", this.config.availableModels[0].name);
+      return model.savePath;
+    }
+
    return (this.config.availableModels || []).find(
      (m) => m.name === this.config.model
    )?.savePath;
@@ -177,6 +185,10 @@ class Whipser {
      return fs.readJson(outputFile);
    }

+    if (!this.currentModel()) {
+      throw new Error(t("pleaseDownloadWhisperModelFirst"));
+    }
+
    const command = [
      `"${this.binMain}"`,
      `--file "${file}"`,
--- a/enjoy/src/renderer/components/medias/media-transcription.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription.tsx
@@ -112,6 +112,10 @@ export const MediaTranscription = (props: {
    addDblistener(fetchSegmentStats);
    fetchSegmentStats();

+    if (transcription?.state == "pending") {
+      generate();
+    }
+
    return () => {
      removeDbListener(fetchSegmentStats);
    };
@@ -126,12 +130,6 @@ export const MediaTranscription = (props: {
      } as ScrollIntoViewOptions);
  }, [currentSegmentIndex, transcription]);

-  useEffect(() => {
-    if (transcription?.state !== "pending") return;
-
-    generate();
-  }, [transcription]);
-
  if (!transcription)
    return (
      <div className="p-4 w-full">
--- a/enjoy/src/renderer/components/record-button.tsx
+++ b/enjoy/src/renderer/components/record-button.tsx
@@ -7,6 +7,7 @@ import WaveSurfer from "wavesurfer.js";
 import { cn } from "@renderer/lib/utils";
 import { RadialProgress, toast } from "@renderer/components/ui";
 import { useHotkeys } from "react-hotkeys-hook";
+import { fetchFile } from "@ffmpeg/util";

 export const RecordButton = (props: {
  className?: string;
@@ -117,6 +118,26 @@ const RecordButtonPopover = (props: {
  onRecordEnd: (blob: Blob, duration: number) => void;
 }) => {
  const containerRef = useRef<HTMLDivElement>();
+  const { ffmpeg } = useContext(AppSettingsProviderContext);
+
+  const transcode = async (blob: Blob) => {
+    const input = `input.${blob.type.split("/")[1]}`;
+    const output = input.replace(/\.[^/.]+$/, ".wav");
+    await ffmpeg.writeFile(input, await fetchFile(blob));
+    await ffmpeg.exec([
+      "-i",
+      input,
+      "-ar",
+      "16000",
+      "-ac",
+      "1",
+      "-c:a",
+      "pcm_s16le",
+      output,
+    ]);
+    const data = await ffmpeg.readFile(output);
+    return new Blob([data], { type: "audio/wav" });
+  };

  useEffect(() => {
    if (!containerRef.current) return;
@@ -136,9 +157,10 @@ const RecordButtonPopover = (props: {
      startAt = Date.now();
    });

-    record.on("record-end", (blob: Blob) => {
+    record.on("record-end", async (blob: Blob) => {
      const duration = Date.now() - startAt;
-      props.onRecordEnd(blob, duration);
+      const output = await transcode(blob);
+      props.onRecordEnd(output, duration);
    });

    RecordPlugin.getAvailableAudioDevices()
--- a/enjoy/src/renderer/components/whisper-model-options.tsx
+++ b/enjoy/src/renderer/components/whisper-model-options.tsx
@@ -132,7 +132,7 @@ export const WhisperModelOptions = () => {
      if (state.state === "completed") {
        model.downloaded = true;
        setWhisperModel(model.name);
-      } else if (state.state === "cancelled") {
+      } else if (state.state === "cancelled" || state.state === "interrupted") {
        model.downloaded = false;
        model.downloadState = null;
      }
--- a/enjoy/src/renderer/pages/landing.tsx
+++ b/enjoy/src/renderer/pages/landing.tsx
@@ -32,7 +32,7 @@ export default () => {
        setCurrentStepValid(!!libraryPath);
        break;
      case 3:
-        setCurrentStepValid(true);
+        setCurrentStepValid(Boolean(whisperConfig.model));
        break;
      case 4:
        setCurrentStepValid(initialized);