Fix some bugs (#1199)

* refactor echogarden & log * more logs * fix audio/video create with compressing config * upgrade echogarden
2024-11-25 11:52:27 +08:00
parent 15746d4f5a
commit 3c337c2f98
8 changed files with 774 additions and 177 deletions
--- a/enjoy/package.json
+++ b/enjoy/package.json
@@ -141,7 +141,7 @@
    "dayjs": "^1.11.13",
    "decamelize": "^6.0.0",
    "decamelize-keys": "^2.0.1",
-    "echogarden": "2.0.3",
+    "echogarden": "^2.0.7",
    "electron-context-menu": "^4.0.4",
    "electron-log": "^5.2.2",
    "electron-settings": "^4.0.4",
--- a/enjoy/src/main/db/handlers/audios-handler.ts
+++ b/enjoy/src/main/db/handlers/audios-handler.ts
@@ -72,6 +72,7 @@ class AudiosHandler {
      compressing?: boolean;
    } = {}
  ) {
+    logger.info("Creating audio...", { uri, params });
    let file = uri;
    let source;
    if (uri.startsWith("http")) {
--- a/enjoy/src/main/db/handlers/videos-handler.ts
+++ b/enjoy/src/main/db/handlers/videos-handler.ts
@@ -71,6 +71,7 @@ class VideosHandler {
      compressing?: boolean;
    } = {}
  ) {
+    logger.info("Creating video...", { uri, params });
    let file = uri;
    let source;
    if (uri.startsWith("http")) {
--- a/enjoy/src/main/echogarden.ts
+++ b/enjoy/src/main/echogarden.ts
@@ -15,7 +15,6 @@ import {
  type Timeline,
  type TimelineEntry,
 } from "echogarden/dist/utilities/Timeline.d.js";
-import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";
 import { ensureAndGetPackagesDir } from "echogarden/dist/utilities/PackageManager.js";
 import path from "path";
 import log from "@main/logger";
@@ -71,7 +70,12 @@ class EchogardenWrapper {
        // Set the whisper executable path for macOS
        if (process.platform === "darwin") {
          options.whisperCpp = options.whisperCpp || {};
-          options.whisperCpp.executablePath = path.join(__dirname, "lib", "whisper", "main");
+          options.whisperCpp.executablePath = path.join(
+            __dirname,
+            "lib",
+            "whisper",
+            "main"
+          );
        }

        // Call the original recognize function
@@ -84,8 +88,46 @@ class EchogardenWrapper {
          .catch(reject);
      });
    };
-    this.align = Echogarden.align;
-    this.alignSegments = Echogarden.alignSegments;
+    this.align = (input, transcript, options) => {
+      return new Promise((resolve, reject) => {
+        const handler = (reason: any) => {
+          // Remove the handler after it's triggered
+          process.removeListener("unhandledRejection", handler);
+          reject(reason);
+        };
+
+        // Add temporary unhandledRejection listener
+        process.on("unhandledRejection", handler);
+
+        Echogarden.align(input, transcript, options)
+          .then((result) => {
+            // Remove the handler if successful
+            process.removeListener("unhandledRejection", handler);
+            resolve(result);
+          })
+          .catch(reject);
+      });
+    };
+    this.alignSegments = (input, timeline, options) => {
+      return new Promise((resolve, reject) => {
+        const handler = (reason: any) => {
+          // Remove the handler after it's triggered
+          process.removeListener("unhandledRejection", handler);
+          reject(reason);
+        };
+
+        // Add temporary unhandledRejection listener
+        process.on("unhandledRejection", handler);
+
+        Echogarden.alignSegments(input, timeline, options)
+          .then((result) => {
+            // Remove the handler if successful
+            process.removeListener("unhandledRejection", handler);
+            resolve(result);
+          })
+          .catch(reject);
+      });
+    };
    this.denoise = Echogarden.denoise;
    this.encodeRawAudioToWave = encodeRawAudioToWave;
    this.decodeWaveToRawAudio = decodeWaveToRawAudio;
@@ -105,13 +147,13 @@ class EchogardenWrapper {
      },
      whisperCpp: {
        model: "tiny.en",
-      }
+      },
    }
  ) {
    const sampleFile = path.join(__dirname, "samples", "jfk.wav");

    try {
-      logger.info("check:", options);
+      logger.info("echogarden-check:", options);
      const result = await this.recognize(sampleFile, options);
      logger.info("transcript:", result?.transcript);
      fs.writeJsonSync(
@@ -138,6 +180,7 @@ class EchogardenWrapper {
   * @returns A promise that resolves to the enjoy:// protocal URL of the transcoded WAV file.
   */
  async transcode(url: string, sampleRate = 16000): Promise<string> {
+    logger.info("echogarden-transcode:", url, sampleRate);
    const filePath = enjoyUrlToPath(url);
    const rawAudio = await this.ensureRawAudio(filePath, sampleRate);
    const audioBuffer = this.encodeRawAudioToWave(rawAudio);
@@ -152,7 +195,7 @@ class EchogardenWrapper {
    ipcMain.handle(
      "echogarden-recognize",
      async (_event, url: string, options: RecognitionOptions) => {
-        logger.debug("echogarden-recognize:", options);
+        logger.info("echogarden-recognize:", options);
        try {
          const input = enjoyUrlToPath(url);
          return await this.recognize(input, options);
@@ -171,7 +214,7 @@ class EchogardenWrapper {
        transcript: string,
        options: AlignmentOptions
      ) => {
-        logger.debug("echogarden-align:", transcript, options);
+        logger.info("echogarden-align:", options);
        try {
          return await this.align(input, transcript, options);
        } catch (err) {
@@ -189,7 +232,7 @@ class EchogardenWrapper {
        timeline: Timeline,
        options: AlignmentOptions
      ) => {
-        logger.debug("echogarden-align-segments:", timeline, options);
+        logger.info("echogarden-align-segments:", options);
        if (typeof input === "string") {
          input = enjoyUrlToPath(input);
        }
@@ -211,7 +254,7 @@ class EchogardenWrapper {
        transcript: string,
        language: string
      ) => {
-        logger.debug("echogarden-word-to-sentence-timeline:", transcript);
+        logger.info("echogarden-word-to-sentence-timeline:", language);

        const { segmentTimeline } =
          await this.wordTimelineToSegmentSentenceTimeline(
@@ -237,6 +280,7 @@ class EchogardenWrapper {
    ipcMain.handle(
      "echogarden-transcode",
      async (_event, url: string, sampleRate?: number) => {
+        logger.info("echogarden-transcode:", url, sampleRate);
        try {
          return await this.transcode(url, sampleRate);
        } catch (err) {
@@ -247,6 +291,7 @@ class EchogardenWrapper {
    );

    ipcMain.handle("echogarden-check", async (_event, options: any) => {
+      logger.info("echogarden-check:", options);
      return this.check(options);
    });

--- a/enjoy/src/renderer/components/medias/media-add-button.tsx
+++ b/enjoy/src/renderer/components/medias/media-add-button.tsx
@@ -56,7 +56,7 @@ export const MediaAddButton = (props: { type?: "Audio" | "Video" }) => {
    if (files.length > 1) {
      Promise.allSettled(
        files.map((f) =>
-          EnjoyApp[type.toLowerCase() as "audios" | "videos"].create(f, {
+          EnjoyApp[`${type.toLowerCase()}s` as "audios" | "videos"].create(f, {
            compressing,
          })
        )
@@ -97,8 +97,8 @@ export const MediaAddButton = (props: { type?: "Audio" | "Video" }) => {
          setOpen(false);
        });
    } else {
-      EnjoyApp.audios
-        .create(uri)
+      EnjoyApp[`${type.toLowerCase()}s` as "audios" | "videos"]
+        .create(uri, { compressing })
        .then((media) => {
          toast.success(t("resourceAdded"));
          navigate(`/${type.toLowerCase()}s/${media.id}`);
--- a/enjoy/src/renderer/context/db-provider.tsx
+++ b/enjoy/src/renderer/context/db-provider.tsx
@@ -1,6 +1,8 @@
 import { createContext, useState, useEffect, useContext } from "react";
 import log from "electron-log/renderer";

+const logger = log.scope("db-provider.tsx");
+
 type DbStateEnum =
  | "connected"
  | "connecting"
@@ -83,7 +85,7 @@ export const DbProvider = ({ children }: { children: React.ReactNode }) => {
  useEffect(() => {
    if (state === "connected") {
      EnjoyApp.db.onTransaction((_event, state) => {
-        log.debug("db-on-transaction", state);
+        logger.debug("db-on-transaction", state);

        const event = new CustomEvent("db-on-transaction", { detail: state });
        document.dispatchEvent(event);
--- a/enjoy/src/renderer/hooks/use-transcribe.tsx
+++ b/enjoy/src/renderer/hooks/use-transcribe.tsx
@@ -14,11 +14,14 @@ import {
  TimelineEntry,
  type TimelineEntryType,
 } from "echogarden/dist/utilities/Timeline";
-import { parseText } from "media-captions";
+import { type ParsedCaptionsResult, parseText } from "media-captions";
 import { SttEngineOptionEnum } from "@/types/enums";
 import { RecognitionResult } from "echogarden/dist/api/API.js";
 import take from "lodash/take";
 import sortedUniqBy from "lodash/sortedUniqBy";
+import log from "electron-log/renderer";
+
+const logger = log.scope("use-transcribe.tsx");

 // test a text string has any punctuations or not
 // some transcribed text may not have any punctuations
@@ -133,6 +136,7 @@ export const useTranscribe = () => {
      };
    } else if (transcript) {
      setOutput("Aligning the transcript...");
+      logger.info("Aligning the transcript...");
      const alignmentResult = await EnjoyApp.echogarden.align(
        new Uint8Array(await blob.arrayBuffer()),
        transcript,
@@ -171,7 +175,13 @@ export const useTranscribe = () => {
    transcript: string;
    segmentTimeline: TimelineEntry[];
  }> => {
-    const caption = await parseText(originalText, { type: "srt" });
+    let caption: ParsedCaptionsResult;
+    try {
+      caption = await parseText(originalText, { type: "srt" });
+    } catch (err) {
+      logger.error("parseTextFailed", { error: err.message });
+      throw err;
+    }

    if (caption.cues.length > 0) {
      // valid srt file
@@ -210,7 +220,7 @@ export const useTranscribe = () => {
          transcript = punctuatedText;
        } catch (err) {
          toast.error(err.message);
-          console.warn(err);
+          logger.error("punctuateTextFailed", { error: err.message });
        }
      }

@@ -237,6 +247,7 @@ export const useTranscribe = () => {
    let model: string;

    let res: RecognitionResult;
+    logger.info("Start transcribing from Whisper...");
    try {
      model =
        echogardenSttConfig[
@@ -282,6 +293,7 @@ export const useTranscribe = () => {
    });

    setOutput("Transcribing from OpenAI...");
+    logger.info("Start transcribing from OpenAI...");
    try {
      const res: {
        text: string;
@@ -325,6 +337,7 @@ export const useTranscribe = () => {
    segmentTimeline: TimelineEntry[];
  }> => {
    setOutput("Transcribing from Cloudflare...");
+    logger.info("Start transcribing from Cloudflare...");
    try {
      const res: CfWhipserOutputType = (
        await axios.postForm(
@@ -394,6 +407,7 @@ export const useTranscribe = () => {
    const reco = new sdk.SpeechRecognizer(config, audioConfig);

    setOutput("Transcribing from Azure...");
+    logger.info("Start transcribing from Azure...");
    let results: SpeechRecognitionResultType[] = [];

    const { transcript, segmentTimeline }: any = await new Promise(
@@ -412,18 +426,18 @@ export const useTranscribe = () => {

        reco.canceled = (_s, e) => {
          if (e.reason === sdk.CancellationReason.Error) {
+            logger.error("CANCELED: Reason=" + e.reason);
            return reject(new Error(e.errorDetails));
          }

          reco.stopContinuousRecognitionAsync();
-          console.log("CANCELED: Reason=" + e.reason);
+          logger.info("CANCELED: Reason=" + e.reason);
        };

        reco.sessionStopped = async (_s, e) => {
-          console.log(
+          logger.info(
            "Session stopped. Stop continuous recognition.",
-            e.sessionId,
-            results
+            e.sessionId
          );
          reco.stopContinuousRecognitionAsync();

@@ -460,6 +474,7 @@ export const useTranscribe = () => {
              segmentTimeline,
            });
          } catch (err) {
+            logger.error("azureTranscribeFailed", { error: err.message });
            reject(t("azureTranscribeFailed", { error: err.message }));
          }
        };
--- a/yarn.lock
+++ b/yarn.lock