Feat improve media player page (#320)

* download transcription from api when transcribed * add friendly loading panel on audio page * add loading panel for video page * fix locale * remove ubuntu 20.04 for e2e test * do not auto upload recording
2024-02-19 11:01:52 +08:00
parent 9a605b9f39
commit 06f8d32169
15 changed files with 347 additions and 75 deletions
--- a/.github/workflows/test-enjoy-app.yml
+++ b/.github/workflows/test-enjoy-app.yml
@@ -23,7 +23,6 @@ jobs:
            macos-14,
            windows-2019,
            windows-latest,
-            ubuntu-20.04,
            ubuntu-latest,
          ]
    steps:
--- a/enjoy/package.json
+++ b/enjoy/package.json
@@ -8,7 +8,7 @@
  "types": "./src/types.d.ts",
  "scripts": {
    "predev": "yarn run download",
-    "dev": "rimraf .vite && yarn run download && WEB_API_URL=http://localhost:3000 SETTINGS_PATH=./tmp LIBRARY_PATH=./tmp electron-forge start",
+    "dev": "rimraf .vite && yarn run download && WEB_API_URL=http://localhost:3000 SETTINGS_PATH=${PWD}/enjoy/tmp LIBRARY_PATH=${PWD}/enjoy/tmp electron-forge start",
    "start": "rimraf .vite && yarn run download && electron-forge start",
    "package": "rimraf .vite && yarn run download && electron-forge package",
    "make": "rimraf .vite && yarn run download && electron-forge make",
--- a/enjoy/src/i18n/en.json
+++ b/enjoy/src/i18n/en.json
@@ -450,5 +450,14 @@
  "syncingRecordings": "Syncing {{count}} recordings",
  "failedToSyncRecordings": "Syncing recordings failed",
  "downloadUrlNotResolved": "Download URL not resolved",
-  "resolvingDownloadUrl": "Resolving download URL"
+  "resolvingDownloadUrl": "Resolving download URL",
+  "waveformIsDecoded": "Waveform is decoded",
+  "decodingWaveform": "Decoding waveform",
+  "transcribedSuccessfully": "Transcribed successfully",
+  "transcribing": "Transcribing",
+  "notTranscribedYet": "Not transcribed yet",
+  "preparingAudio": "Preparing audio",
+  "preparingVideo": "Preparing video",
+  "itMayTakeAWhileToPrepareForTheFirstLoad": "It may take a while to prepare for the first load. Please be patient.",
+  "loadingTranscription": "Loading transcription"
 }
--- a/enjoy/src/i18n/zh-CN.json
+++ b/enjoy/src/i18n/zh-CN.json
@@ -449,5 +449,14 @@
  "syncingRecordings": "{{count}} 条录音正在同步",
  "failedToSyncRecordings": "同步录音失败",
  "downloadUrlNotResolved": "无法解析下载地址",
-  "resolvingDownloadUrl": "正在解析下载地址"
+  "resolvingDownloadUrl": "正在解析下载地址",
+  "waveformIsDecoded": "波形已解码",
+  "decodingWaveform": "正在解码波形",
+  "transcribedSuccessfully": "语音转文本成功",
+  "transcribing": "正在语音转文本",
+  "notTranscribedYet": "尚未语音转文本",
+  "preparingAudio": "正在准备音频",
+  "preparingVideo": "正在准备视频",
+  "itMayTakeAWhileToPrepareForTheFirstLoad": "首次加载可能需要一些时间，请耐心等候",
+  "loadingTranscription": "正在加载语音文本"
 }
--- a/enjoy/src/main/db/models/audio.ts
+++ b/enjoy/src/main/db/models/audio.ts
@@ -174,7 +174,8 @@ export class Audio extends Model<Audio> {
    });

    return webApi.syncAudio(this.toJSON()).then(() => {
-      this.update({ syncedAt: new Date() });
+      const now = new Date();
+      this.update({ syncedAt: now, updatedAt: now });
    });
  }

--- a/enjoy/src/main/db/models/recording.ts
+++ b/enjoy/src/main/db/models/recording.ts
@@ -134,12 +134,10 @@ export class Recording extends Model<Recording> {
  }

  async sync() {
-    this.upload().catch(() => {});
-
    const webApi = new Client({
      baseUrl: process.env.WEB_API_URL || WEB_API_URL,
      accessToken: settings.getSync("user.accessToken") as string,
-      logger: log.scope("recording/sync"),
+      logger,
    });

    return webApi.syncRecording(this.toJSON()).then(() => {
@@ -156,11 +154,12 @@ export class Recording extends Model<Recording> {
      return assessment;
    }

+    await this.upload();
    await this.sync();
    const webApi = new Client({
      baseUrl: process.env.WEB_API_URL || WEB_API_URL,
      accessToken: settings.getSync("user.accessToken") as string,
-      logger: log.scope("recording/assess"),
+      logger,
    });

    const { token, region } = await webApi.generateSpeechToken();
@@ -221,7 +220,7 @@ export class Recording extends Model<Recording> {

  @AfterCreate
  static autoSync(recording: Recording) {
-    // auto upload should not block the main thread
+    // auto sync should not block the main thread
    recording.sync().catch(() => {});
  }

--- a/enjoy/src/main/db/models/transcription.ts
+++ b/enjoy/src/main/db/models/transcription.ts
@@ -69,6 +69,7 @@ export class Transcription extends Model<Transcription> {
  }

  async sync() {
+    if (this.isSynced) return;
    if (this.getDataValue("state") !== "finished") return;

    const webApi = new Client({
@@ -77,7 +78,8 @@ export class Transcription extends Model<Transcription> {
      logger,
    });
    return webApi.syncTranscription(this.toJSON()).then(() => {
-      this.update({ syncedAt: new Date() });
+      const now = new Date();
+      this.update({ syncedAt: now, updatedAt: now });
    });
  }

@@ -86,6 +88,13 @@ export class Transcription extends Model<Transcription> {
    this.notify(transcription, "update");
  }

+  @AfterUpdate
+  static syncAfterUpdate(transcription: Transcription) {
+    transcription.sync().catch((err) => {
+      logger.error("sync error", err);
+    });
+  }
+
  @AfterDestroy
  static notifyForDestroy(transcription: Transcription) {
    this.notify(transcription, "destroy");
--- a/enjoy/src/main/db/models/video.ts
+++ b/enjoy/src/main/db/models/video.ts
@@ -191,11 +191,12 @@ export class Video extends Model<Video> {
    const webApi = new Client({
      baseUrl: process.env.WEB_API_URL || WEB_API_URL,
      accessToken: settings.getSync("user.accessToken") as string,
-      logger: log.scope("video/sync"),
+      logger,
    });

    return webApi.syncVideo(this.toJSON()).then(() => {
-      this.update({ syncedAt: new Date() });
+      const now = new Date();
+      this.update({ syncedAt: now, updatedAt: now });
    });
  }

--- a/enjoy/src/renderer/components/audios/audio-detail.tsx
+++ b/enjoy/src/renderer/components/audios/audio-detail.tsx
@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
 import {
  DbProviderContext,
  AppSettingsProviderContext,
+  AISettingsProviderContext,
 } from "@renderer/context";
 import {
  LoaderSpin,
@@ -10,7 +11,7 @@ import {
  MediaPlayer,
  MediaTranscription,
 } from "@renderer/components";
-import { LoaderIcon } from "lucide-react";
+import { CheckCircleIcon, LoaderIcon } from "lucide-react";
 import {
  AlertDialog,
  AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
  AlertDialogFooter,
  AlertDialogCancel,
  Button,
+  PingPoint,
+  Progress,
  ScrollArea,
  toast,
 } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useTranscribe } from "@renderer/hooks";
+import { useNavigate } from "react-router-dom";

 export const AudioDetail = (props: { id?: string; md5?: string }) => {
+  const navigate = useNavigate();
+
  const { id, md5 } = props;
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
+  const { whisperConfig } = useContext(AISettingsProviderContext);
  const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);

  const [audio, setAudio] = useState<AudioType | null>(null);
  const [transcription, setTranscription] = useState<TranscriptionType>(null);
-  const [initialized, setInitialized] = useState<boolean>(false);
  const [sharing, setSharing] = useState<boolean>(false);

+  // Transcription controls
+  const [transcribing, setTranscribing] = useState<boolean>(false);
+  const { transcribe } = useTranscribe();
+  const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
+
  // Player controls
+  const [initialized, setInitialized] = useState<boolean>(false);
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [seek, setSeek] = useState<{
    seekTo: number;
@@ -56,6 +69,56 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
    }
  };

+  const generateTranscription = async () => {
+    if (transcribing) return;
+
+    setTranscribing(true);
+    setTranscribingProgress(0);
+    try {
+      const { engine, model, result } = await transcribe(audio.src);
+      await EnjoyApp.transcriptions.update(transcription.id, {
+        state: "finished",
+        result,
+        engine,
+        model,
+      });
+    } catch (err) {
+      toast.error(err.message);
+    }
+
+    setTranscribing(false);
+  };
+
+  const findTranscriptionFromWebApi = async () => {
+    const res = await webApi.transcriptions({
+      targetMd5: audio.md5,
+    });
+
+    const transcript = (res?.transcriptions || []).filter((t) =>
+      ["base", "small", "medium", "large", "whisper-1"].includes(t.model)
+    )?.[0];
+
+    if (!transcript) {
+      throw new Error("Transcription not found");
+    }
+
+    await EnjoyApp.transcriptions.update(transcription.id, {
+      state: "finished",
+      result: transcript.result,
+      engine: transcript.engine,
+      model: transcript.model,
+    });
+  };
+
+  const findOrGenerateTranscription = async () => {
+    try {
+      await findTranscriptionFromWebApi();
+    } catch (err) {
+      console.error(err);
+      await generateTranscription();
+    }
+  };
+
  const handleShare = async () => {
    if (!audio.source && !audio.isUploaded) {
      try {
@@ -110,11 +173,26 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
  }, [audio]);

  useEffect(() => {
+    if (!transcription) return;
+
    addDblistener(onTransactionUpdate);
+
+    if (transcription?.state == "pending") {
+      findOrGenerateTranscription();
+    }
+
+    if (whisperConfig.service === "local") {
+      EnjoyApp.whisper.onProgress((_, p: number) => {
+        if (p > 100) p = 100;
+        setTranscribingProgress(p);
+      });
+    }
+
    return () => {
      removeDbListener(onTransactionUpdate);
+      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [transcription]);
+  }, [md5, transcription]);

  if (!audio) {
    return <LoaderSpin />;
@@ -183,8 +261,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
            mediaId={audio.id}
            mediaType="Audio"
            mediaName={audio.name}
-            mediaUrl={audio.src}
            transcription={transcription}
+            transcribing={transcribing}
+            progress={transcribingProgress}
+            transcribe={generateTranscription}
            currentSegmentIndex={currentSegmentIndex}
            onSelectSegment={(index) => {
              if (currentSegmentIndex === index) return;
@@ -219,11 +299,69 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
        </AlertDialogContent>
      </AlertDialog>

-      {!initialized && (
-        <div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
-          <LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
-        </div>
-      )}
+      {/* Show loading progress until waveform is decoded & transcribed */}
+      <AlertDialog open={!initialized || !Boolean(transcription?.result)}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
+            <AlertDialogDescription>
+              {t("itMayTakeAWhileToPrepareForTheFirstLoad")}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+
+          <div className="py-4">
+            {initialized ? (
+              <div className="mb-4 flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("waveformIsDecoded")}</span>
+              </div>
+            ) : (
+              <div className="mb-4 flex items-center space-x-4">
+                <LoaderIcon className="w-4 h-4 animate-spin" />
+                <span>{t("decodingWaveform")}</span>
+              </div>
+            )}
+
+            {!transcription ? (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <span>{t("loadingTranscription")}</span>
+              </div>
+            ) : transcription.result ? (
+              <div className="flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("transcribedSuccessfully")}</span>
+              </div>
+            ) : transcribing ? (
+              <div className="">
+                <div className="flex items-center space-x-4 mb-2">
+                  <PingPoint colorClassName="bg-yellow-500" />
+                  <span>{t("transcribing")}</span>
+                </div>
+                {whisperConfig.service === "local" && (
+                  <Progress value={transcribingProgress} />
+                )}
+              </div>
+            ) : (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <div className="inline">
+                  <span>{t("notTranscribedYet")}</span>
+                  <Button className="ml-4" size="sm">
+                    {t("transcribe")}
+                  </Button>
+                </div>
+              </div>
+            )}
+          </div>
+
+          <AlertDialogFooter>
+            <Button variant="secondary" onClick={() => navigate(-1)}>
+              {t("cancel")}
+            </Button>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
    </div>
  );
 };
--- a/enjoy/src/renderer/components/medias/media-player.tsx
+++ b/enjoy/src/renderer/components/medias/media-player.tsx
@@ -319,7 +319,7 @@ export const MediaPlayer = (props: {
    const subscriptions = [
      wavesurfer.on("play", () => setIsPlaying(true)),
      wavesurfer.on("pause", () => setIsPlaying(false)),
-      wavesurfer.on("loading", (percent: number) => console.log(percent)),
+      wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
      wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
      wavesurfer.on("decode", () => {
        if (waveform?.frequencies) return;
--- a/enjoy/src/renderer/components/medias/media-transcription.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription.tsx
@@ -12,7 +12,6 @@ import {
  ScrollArea,
  Button,
  PingPoint,
-  toast,
 } from "@renderer/components/ui";
 import React, { useEffect, useContext, useState } from "react";
 import { t } from "i18next";
@@ -22,14 +21,15 @@ import {
  AppSettingsProviderContext,
  AISettingsProviderContext,
 } from "@renderer/context";
-import { useTranscribe } from "@renderer/hooks";

 export const MediaTranscription = (props: {
  transcription: TranscriptionType;
+  progress: number;
+  transcribe: () => void;
+  transcribing: boolean;
  mediaId: string;
  mediaType: "Audio" | "Video";
  mediaName?: string;
-  mediaUrl: string;
  currentSegmentIndex?: number;
  onSelectSegment?: (index: number) => void;
 }) => {
@@ -38,41 +38,20 @@ export const MediaTranscription = (props: {
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const {
    transcription,
+    transcribing,
+    progress,
+    transcribe,
    mediaId,
    mediaType,
    mediaName,
-    mediaUrl,
    currentSegmentIndex,
    onSelectSegment,
  } = props;
  const containerRef = React.createRef<HTMLDivElement>();
-  const [transcribing, setTranscribing] = useState<boolean>(false);
-  const { transcribe } = useTranscribe();
-  const [progress, setProgress] = useState<number>(0);

  const [recordingStats, setRecordingStats] =
    useState<SegementRecordingStatsType>([]);

-  const generate = async () => {
-    if (transcribing) return;
-
-    setTranscribing(true);
-    setProgress(0);
-    try {
-      const { engine, model, result } = await transcribe(mediaUrl);
-      await EnjoyApp.transcriptions.update(transcription.id, {
-        state: "finished",
-        result,
-        engine,
-        model,
-      });
-    } catch (err) {
-      toast.error(err.message);
-    }
-
-    setTranscribing(false);
-  };
-
  const fetchSegmentStats = async () => {
    if (!mediaId) return;

@@ -85,22 +64,10 @@ export const MediaTranscription = (props: {
    addDblistener(fetchSegmentStats);
    fetchSegmentStats();

-    if (transcription?.state == "pending") {
-      generate();
-    }
-
-    if (whisperConfig.service === "local") {
-      EnjoyApp.whisper.onProgress((_, p: number) => {
-        if (p > 100) p = 100;
-        setProgress(p);
-      });
-    }
-
    return () => {
      removeDbListener(fetchSegmentStats);
-      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [mediaId, mediaType, transcription]);
+  }, [transcription]);

  useEffect(() => {
    containerRef.current
@@ -159,7 +126,7 @@ export const MediaTranscription = (props: {
            </AlertDialogHeader>
            <AlertDialogFooter>
              <AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
-              <AlertDialogAction onClick={generate}>
+              <AlertDialogAction onClick={transcribe}>
                {t("transcribe")}
              </AlertDialogAction>
            </AlertDialogFooter>
--- a/enjoy/src/renderer/components/posts/post-actions.tsx
+++ b/enjoy/src/renderer/components/posts/post-actions.tsx
@@ -38,7 +38,7 @@ export const PostActions = (props: { post: PostType }) => {
  const [copied, setCopied] = useState<boolean>(false);
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const [asking, setAsking] = useState<boolean>(false);
-  const [aiReplies, setAiReplies] = useState<MessageType[]>([]);
+  const [aiReplies, setAiReplies] = useState<Partial<MessageType>[]>([]);

  const handleAddMedium = async () => {
    if (post.targetType !== "Medium") return;
@@ -185,7 +185,7 @@ export const PostActions = (props: { post: PostType }) => {
  );
 };

-const AIReplies = (props: { replies: MessageType[] }) => {
+const AIReplies = (props: { replies: Partial<MessageType>[] }) => {
  return (
    <div>
      <div className="space-y-2">
--- a/enjoy/src/renderer/components/videos/ted-talks-segment.tsx
+++ b/enjoy/src/renderer/components/videos/ted-talks-segment.tsx
@@ -51,11 +51,6 @@ export const TedTalksSegment = () => {
        coverUrl: selectedTalk?.primaryImageSet[0].url,
      })
      .then((record) => {
-        if (!record) {
-          toast.error(t("failedToDownload"));
-          return;
-        }
-
        if (type === "video") {
          navigate(`/videos/${record.id}`);
        } else {
--- a/enjoy/src/renderer/components/videos/video-detail.tsx
+++ b/enjoy/src/renderer/components/videos/video-detail.tsx
@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
 import {
  DbProviderContext,
  AppSettingsProviderContext,
+  AISettingsProviderContext,
 } from "@renderer/context";
 import {
  LoaderSpin,
@@ -10,7 +11,7 @@ import {
  MediaPlayer,
  MediaTranscription,
 } from "@renderer/components";
-import { LoaderIcon } from "lucide-react";
+import { CheckCircleIcon, LoaderIcon } from "lucide-react";
 import {
  AlertDialog,
  AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
  AlertDialogFooter,
  AlertDialogCancel,
  Button,
+  PingPoint,
+  Progress,
  ScrollArea,
  toast,
 } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useTranscribe } from "@renderer/hooks";
+import { useNavigate } from "react-router-dom";

 export const VideoDetail = (props: { id?: string; md5?: string }) => {
+  const navigate = useNavigate();
+
  const { id, md5 } = props;
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
+  const { whisperConfig } = useContext(AISettingsProviderContext);
  const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);

  const [video, setVideo] = useState<VideoType | null>(null);
  const [transcription, setTranscription] = useState<TranscriptionType>(null);
-  const [initialized, setInitialized] = useState<boolean>(false);
  const [sharing, setSharing] = useState<boolean>(false);

+  // Transcription controls
+  const [transcribing, setTranscribing] = useState<boolean>(false);
+  const { transcribe } = useTranscribe();
+  const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
+
  // Player controls
+  const [initialized, setInitialized] = useState<boolean>(false);
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [seek, setSeek] = useState<{
    seekTo: number;
@@ -58,6 +71,56 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
    }
  };

+  const generateTranscription = async () => {
+    if (transcribing) return;
+
+    setTranscribing(true);
+    setTranscribingProgress(0);
+    try {
+      const { engine, model, result } = await transcribe(video.src);
+      await EnjoyApp.transcriptions.update(transcription.id, {
+        state: "finished",
+        result,
+        engine,
+        model,
+      });
+    } catch (err) {
+      toast.error(err.message);
+    }
+
+    setTranscribing(false);
+  };
+
+  const findTranscriptionFromWebApi = async () => {
+    const res = await webApi.transcriptions({
+      targetMd5: video.md5,
+    });
+
+    const transcript = (res?.transcriptions || []).filter((t) =>
+      ["base", "small", "medium", "large", "whisper-1"].includes(t.model)
+    )?.[0];
+
+    if (!transcript) {
+      throw new Error("Transcription not found");
+    }
+
+    await EnjoyApp.transcriptions.update(transcription.id, {
+      state: "finished",
+      result: transcript.result,
+      engine: transcript.engine,
+      model: transcript.model,
+    });
+  };
+
+  const findOrGenerateTranscription = async () => {
+    try {
+      await findTranscriptionFromWebApi();
+    } catch (err) {
+      console.error(err);
+      await generateTranscription();
+    }
+  };
+
  const handleShare = async () => {
    if (!video.source.startsWith("http")) {
      toast.error(t("shareFailed"), {
@@ -116,11 +179,26 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
  }, [video]);

  useEffect(() => {
+    if (!transcription) return;
+
    addDblistener(onTransactionUpdate);
+
+    if (transcription?.state == "pending") {
+      findOrGenerateTranscription();
+    }
+
+    if (whisperConfig.service === "local") {
+      EnjoyApp.whisper.onProgress((_, p: number) => {
+        if (p > 100) p = 100;
+        setTranscribingProgress(p);
+      });
+    }
+
    return () => {
      removeDbListener(onTransactionUpdate);
+      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [transcription]);
+  }, [md5, transcription]);

  if (!video) {
    return <LoaderSpin />;
@@ -193,9 +271,11 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
          <MediaTranscription
            mediaId={video.id}
            mediaType="Video"
-            mediaUrl={video.src}
            mediaName={video.name}
            transcription={transcription}
+            transcribing={transcribing}
+            progress={transcribingProgress}
+            transcribe={generateTranscription}
            currentSegmentIndex={currentSegmentIndex}
            onSelectSegment={(index) => {
              if (currentSegmentIndex === index) return;
@@ -232,6 +312,70 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
        </AlertDialogContent>
      </AlertDialog>

+      {/* Show loading progress until waveform is decoded & transcribed */}
+      <AlertDialog open={!initialized || !Boolean(transcription?.result)}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle>{t("preparingVideo")}</AlertDialogTitle>
+            <AlertDialogDescription>
+              {t("itMayTakeAWhileToPrepareForTheFirstLoad")}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+
+          <div className="py-4">
+            {initialized ? (
+              <div className="mb-4 flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("waveformIsDecoded")}</span>
+              </div>
+            ) : (
+              <div className="mb-4 flex items-center space-x-4">
+                <LoaderIcon className="w-4 h-4 animate-spin" />
+                <span>{t("decodingWaveform")}</span>
+              </div>
+            )}
+
+            {!transcription ? (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <span>{t("loadingTranscription")}</span>
+              </div>
+            ) : transcription.result ? (
+              <div className="flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("transcribedSuccessfully")}</span>
+              </div>
+            ) : transcribing ? (
+              <div className="">
+                <div className="flex items-center space-x-4 mb-2">
+                  <PingPoint colorClassName="bg-yellow-500" />
+                  <span>{t("transcribing")}</span>
+                </div>
+                {whisperConfig.service === "local" && (
+                  <Progress value={transcribingProgress} />
+                )}
+              </div>
+            ) : (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <div className="inline">
+                  <span>{t("notTranscribedYet")}</span>
+                  <Button className="ml-4" size="sm">
+                    {t("transcribe")}
+                  </Button>
+                </div>
+              </div>
+            )}
+          </div>
+
+          <AlertDialogFooter>
+            <Button variant="secondary" onClick={() => navigate(-1)}>
+              {t("cancel")}
+            </Button>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
+
      {!initialized && (
        <div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
          <LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
--- a/enjoy/src/types/transcription.d.ts
+++ b/enjoy/src/types/transcription.d.ts
@@ -3,6 +3,7 @@ type TranscriptionType = {
  targetId: string;
  targetType: string;
  state: "pending" | "processing" | "finished";
+  engine: string;
  model: string;
  result: TranscriptionResultSegmentGroupType[];
 };