Feat: use ffmpeg wasm (#177)

* add ffmepg wasm * refactor whisper config * refactor ffmpeg config * remove depected codes * refactor model download * decouple whipser & ffmpeg when transcribe * do not always toast not found error * ffmpeg works in renderer * transcode in renderer & transcript in main * improve transcode * refactor * remove ffmpeg check * fix whisper config check * update whisper check * enable SharedArrayBuffer * remove ffmpeg setting * fix UI * handle ffmpeg not ready * may create issue when whisper check failed * upgrade deps * hide ffmpeg setting if not ready * hide record button when not in region
2024-01-23 14:39:16 +08:00
parent 7c9170170d
commit 3a98bd9f40
50 changed files with 1414 additions and 1199 deletions
--- a/enjoy/src/types/enjoy-app.d.ts
+++ b/enjoy/src/types/enjoy-app.d.ts
@@ -8,6 +8,7 @@ type EnjoyAppType = {
    apiUrl: () => Promise<string>;
    quit: () => Promise<void>;
    openDevTools: () => Promise<void>;
+    createIssue: (title: string, body: string) => Promise<void>;
    version: string;
  };
  system: {
@@ -71,17 +72,12 @@ type EnjoyAppType = {
    setLibrary: (library: string) => Promise<void>;
    getUser: () => Promise<UserType>;
    setUser: (user: UserType) => Promise<void>;
-    getWhisperModel: () => Promise<string>;
-    setWhisperModel: (model: string) => Promise<void>;
-    getWhisperModelsPath: () => Promise<string>;
    getUserDataPath: () => Promise<string>;
    getLlm: (provider: SupportedLlmProviderType) => Promise<LlmProviderType>;
    setLlm: (
      provider: SupportedLlmProviderType,
      LlmProviderType
    ) => Promise<void>;
-    getFfmpegConfig: () => Promise<FfmpegConfigType>;
-    setFfmpegConfig: (config: FfmpegConfigType) => Promise<void>;
    getLanguage: () => Promise<string>;
    switchLanguage: (language: string) => Promise<void>;
  };
@@ -176,15 +172,17 @@ type EnjoyAppType = {
    createSpeech: (id: string, configuration?: any) => Promise<SpeechType>;
  };
  whisper: {
-    availableModels: () => Promise<string[]>;
-    downloadModel: (name: string) => Promise<any>;
-    check: () => Promise<boolean>;
-    transcribe: (
+    config: () => Promise<WhisperConfigType>;
+    check: () => Promise<{ success: boolean; log: string }>;
+    setModel: (model: string) => Promise<WhisperConfigType>;
+    transcribeBlob: (
      blob: { type: string; arrayBuffer: ArrayBuffer },
      prompt?: string
    ) => Promise<{ file: string; content: string }>;
  };
  ffmpeg: {
+    config: () => Promise<FfmpegConfigType>;
+    setConfig: (config: FfmpegConfigType) => Promise<FfmpegConfigType>;
    download: () => Promise<FfmpegConfigType>;
    check: () => Promise<boolean>;
    discover: () => Promise<{
@@ -195,7 +193,8 @@ type EnjoyAppType = {
  };
  download: {
    onState: (callback: (event, state) => void) => void;
-    cancel: (filename: string) => void;
+    start: (url: string, savePath?: string) => void;
+    cancel: (filename: string) => Promise<void>;
    cancelAll: () => void;
    dashboard: () => Promise<DownloadStateType[]>;
    removeAllListeners: () => void;
@@ -208,7 +207,7 @@ type EnjoyAppType = {
  };
  transcriptions: {
    findOrCreate: (params: any) => Promise<TranscriptionType>;
-    process: (params: any) => Promise<void>;
+    process: (params: any, options: any) => Promise<void>;
    update: (id: string, params: any) => Promise<void>;
  };
  waveforms: {
--- a/enjoy/src/types/index.d.ts
+++ b/enjoy/src/types/index.d.ts
@@ -0,0 +1,159 @@
+// This allows TypeScript to pick up the magic constants that's auto-generated by Forge's Vite
+// plugin that tells the Electron app where to look for the Vite-bundled app code (depending on
+// whether you're running in development or production).
+declare const MAIN_WINDOW_VITE_DEV_SERVER_URL: string;
+declare const MAIN_WINDOW_VITE_NAME: string;
+declare module "compromise-paragraphs";
+
+type SupportedLlmProviderType = "openai" | "googleGenerativeAi";
+
+type LlmProviderType = {
+  key?: string;
+  model?: string;
+  baseUrl?: string;
+};
+
+type DownloadStateType = {
+  name: string;
+  state: "progressing" | "interrupted" | "completed" | "cancelled";
+  received: number;
+  total: number;
+};
+
+type NotificationType = {
+  type: "info" | "error" | "warning" | "success";
+  message: string;
+};
+
+type WhisperConfigType = {
+  availableModels: {
+    type: string;
+    name: string;
+    size: string;
+    url: string;
+    savePath: string;
+  }[];
+  modelsPath: string;
+  model: string;
+  ready?: boolean;
+};
+
+type WhisperOutputType = {
+  model: {
+    audio: {
+      cts: number;
+      head: number;
+      layer: number;
+      state: number;
+    };
+    ftype: number;
+    mels: number;
+    multilingual: number;
+    text: {
+      cts: number;
+      head: number;
+      layer: number;
+      state: number;
+    };
+    type: string;
+    vocab: number;
+  };
+  params: {
+    language: string;
+    model: string;
+    translate: boolean;
+  };
+  result: {
+    languate: string;
+  };
+  systeminfo: string;
+  transcription: TranscriptionSegmentType[];
+};
+
+type TranscriptionSegmentType = {
+  offsets: {
+    from: number;
+    to: number;
+  };
+  text: string;
+  timestamps: {
+    from: string;
+    to: string;
+  };
+};
+
+type TransactionStateType = {
+  model: string;
+  id: string;
+  action: "create" | "update" | "destroy";
+  record?: AudioType | UserType | RecordingType;
+};
+
+type FfmpegConfigType = {
+  os: string;
+  arch: string;
+  commandExists: boolean;
+  ffmpegPath?: string;
+  ffprobePath?: string;
+  scanDirs: string[];
+  ready: boolean;
+};
+
+type LookupType = {
+  id: string;
+  word: string;
+  context: string;
+  contextTranslation: string;
+  status?: "pending" | "completed" | "failed";
+  meaning?: MeaningType;
+  meaningOptions?: MeaningType[];
+  createdAt: string;
+  updatedAt: string;
+};
+
+type MeaningType = {
+  id: string;
+  word: string;
+  lemma?: string;
+  pronunciation?: string;
+  pos?: string;
+  definition: string;
+  translation: string;
+  lookups: LookupType[];
+};
+
+type PagyResponseType = {
+  page: number;
+  next: number | null;
+};
+
+type AudibleBookType = {
+  title: string;
+  subtitle: string;
+  author: string;
+  narrator: string;
+  cover?: string;
+  language?: string;
+  sample?: string;
+  url: string;
+};
+
+type TedTalkType = {
+  title: string;
+  presenterDisplayName: string;
+  slug: string;
+  canonicalUrl: string;
+  duration: string;
+  publishedAt: string;
+  primaryImageSet: {
+    url: string;
+    aspectRatioName: string;
+  }[];
+};
+
+type TedIdeaType = {
+  url: string;
+  cover?: string;
+  title: string;
+  description: string;
+};