Feat improve media player page (#320)

* download transcription from api when transcribed * add friendly loading panel on audio page * add loading panel for video page * fix locale * remove ubuntu 20.04 for e2e test * do not auto upload recording
2024-02-19 11:01:52 +08:00
parent 9a605b9f39
commit 06f8d32169
15 changed files with 347 additions and 75 deletions
--- a/enjoy/src/renderer/components/audios/audio-detail.tsx
+++ b/enjoy/src/renderer/components/audios/audio-detail.tsx
@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
 import {
  DbProviderContext,
  AppSettingsProviderContext,
+  AISettingsProviderContext,
 } from "@renderer/context";
 import {
  LoaderSpin,
@@ -10,7 +11,7 @@ import {
  MediaPlayer,
  MediaTranscription,
 } from "@renderer/components";
-import { LoaderIcon } from "lucide-react";
+import { CheckCircleIcon, LoaderIcon } from "lucide-react";
 import {
  AlertDialog,
  AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
  AlertDialogFooter,
  AlertDialogCancel,
  Button,
+  PingPoint,
+  Progress,
  ScrollArea,
  toast,
 } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useTranscribe } from "@renderer/hooks";
+import { useNavigate } from "react-router-dom";

 export const AudioDetail = (props: { id?: string; md5?: string }) => {
+  const navigate = useNavigate();
+
  const { id, md5 } = props;
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
+  const { whisperConfig } = useContext(AISettingsProviderContext);
  const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);

  const [audio, setAudio] = useState<AudioType | null>(null);
  const [transcription, setTranscription] = useState<TranscriptionType>(null);
-  const [initialized, setInitialized] = useState<boolean>(false);
  const [sharing, setSharing] = useState<boolean>(false);

+  // Transcription controls
+  const [transcribing, setTranscribing] = useState<boolean>(false);
+  const { transcribe } = useTranscribe();
+  const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
+
  // Player controls
+  const [initialized, setInitialized] = useState<boolean>(false);
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [seek, setSeek] = useState<{
    seekTo: number;
@@ -56,6 +69,56 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
    }
  };

+  const generateTranscription = async () => {
+    if (transcribing) return;
+
+    setTranscribing(true);
+    setTranscribingProgress(0);
+    try {
+      const { engine, model, result } = await transcribe(audio.src);
+      await EnjoyApp.transcriptions.update(transcription.id, {
+        state: "finished",
+        result,
+        engine,
+        model,
+      });
+    } catch (err) {
+      toast.error(err.message);
+    }
+
+    setTranscribing(false);
+  };
+
+  const findTranscriptionFromWebApi = async () => {
+    const res = await webApi.transcriptions({
+      targetMd5: audio.md5,
+    });
+
+    const transcript = (res?.transcriptions || []).filter((t) =>
+      ["base", "small", "medium", "large", "whisper-1"].includes(t.model)
+    )?.[0];
+
+    if (!transcript) {
+      throw new Error("Transcription not found");
+    }
+
+    await EnjoyApp.transcriptions.update(transcription.id, {
+      state: "finished",
+      result: transcript.result,
+      engine: transcript.engine,
+      model: transcript.model,
+    });
+  };
+
+  const findOrGenerateTranscription = async () => {
+    try {
+      await findTranscriptionFromWebApi();
+    } catch (err) {
+      console.error(err);
+      await generateTranscription();
+    }
+  };
+
  const handleShare = async () => {
    if (!audio.source && !audio.isUploaded) {
      try {
@@ -110,11 +173,26 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
  }, [audio]);

  useEffect(() => {
+    if (!transcription) return;
+
    addDblistener(onTransactionUpdate);
+
+    if (transcription?.state == "pending") {
+      findOrGenerateTranscription();
+    }
+
+    if (whisperConfig.service === "local") {
+      EnjoyApp.whisper.onProgress((_, p: number) => {
+        if (p > 100) p = 100;
+        setTranscribingProgress(p);
+      });
+    }
+
    return () => {
      removeDbListener(onTransactionUpdate);
+      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [transcription]);
+  }, [md5, transcription]);

  if (!audio) {
    return <LoaderSpin />;
@@ -183,8 +261,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
            mediaId={audio.id}
            mediaType="Audio"
            mediaName={audio.name}
-            mediaUrl={audio.src}
            transcription={transcription}
+            transcribing={transcribing}
+            progress={transcribingProgress}
+            transcribe={generateTranscription}
            currentSegmentIndex={currentSegmentIndex}
            onSelectSegment={(index) => {
              if (currentSegmentIndex === index) return;
@@ -219,11 +299,69 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
        </AlertDialogContent>
      </AlertDialog>

-      {!initialized && (
-        <div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
-          <LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
-        </div>
-      )}
+      {/* Show loading progress until waveform is decoded & transcribed */}
+      <AlertDialog open={!initialized || !Boolean(transcription?.result)}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
+            <AlertDialogDescription>
+              {t("itMayTakeAWhileToPrepareForTheFirstLoad")}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+
+          <div className="py-4">
+            {initialized ? (
+              <div className="mb-4 flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("waveformIsDecoded")}</span>
+              </div>
+            ) : (
+              <div className="mb-4 flex items-center space-x-4">
+                <LoaderIcon className="w-4 h-4 animate-spin" />
+                <span>{t("decodingWaveform")}</span>
+              </div>
+            )}
+
+            {!transcription ? (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <span>{t("loadingTranscription")}</span>
+              </div>
+            ) : transcription.result ? (
+              <div className="flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("transcribedSuccessfully")}</span>
+              </div>
+            ) : transcribing ? (
+              <div className="">
+                <div className="flex items-center space-x-4 mb-2">
+                  <PingPoint colorClassName="bg-yellow-500" />
+                  <span>{t("transcribing")}</span>
+                </div>
+                {whisperConfig.service === "local" && (
+                  <Progress value={transcribingProgress} />
+                )}
+              </div>
+            ) : (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <div className="inline">
+                  <span>{t("notTranscribedYet")}</span>
+                  <Button className="ml-4" size="sm">
+                    {t("transcribe")}
+                  </Button>
+                </div>
+              </div>
+            )}
+          </div>
+
+          <AlertDialogFooter>
+            <Button variant="secondary" onClick={() => navigate(-1)}>
+              {t("cancel")}
+            </Button>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
    </div>
  );
 };
--- a/enjoy/src/renderer/components/medias/media-player.tsx
+++ b/enjoy/src/renderer/components/medias/media-player.tsx
@@ -319,7 +319,7 @@ export const MediaPlayer = (props: {
    const subscriptions = [
      wavesurfer.on("play", () => setIsPlaying(true)),
      wavesurfer.on("pause", () => setIsPlaying(false)),
-      wavesurfer.on("loading", (percent: number) => console.log(percent)),
+      wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
      wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
      wavesurfer.on("decode", () => {
        if (waveform?.frequencies) return;
--- a/enjoy/src/renderer/components/medias/media-transcription.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription.tsx
@@ -12,7 +12,6 @@ import {
  ScrollArea,
  Button,
  PingPoint,
-  toast,
 } from "@renderer/components/ui";
 import React, { useEffect, useContext, useState } from "react";
 import { t } from "i18next";
@@ -22,14 +21,15 @@ import {
  AppSettingsProviderContext,
  AISettingsProviderContext,
 } from "@renderer/context";
-import { useTranscribe } from "@renderer/hooks";

 export const MediaTranscription = (props: {
  transcription: TranscriptionType;
+  progress: number;
+  transcribe: () => void;
+  transcribing: boolean;
  mediaId: string;
  mediaType: "Audio" | "Video";
  mediaName?: string;
-  mediaUrl: string;
  currentSegmentIndex?: number;
  onSelectSegment?: (index: number) => void;
 }) => {
@@ -38,41 +38,20 @@ export const MediaTranscription = (props: {
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const {
    transcription,
+    transcribing,
+    progress,
+    transcribe,
    mediaId,
    mediaType,
    mediaName,
-    mediaUrl,
    currentSegmentIndex,
    onSelectSegment,
  } = props;
  const containerRef = React.createRef<HTMLDivElement>();
-  const [transcribing, setTranscribing] = useState<boolean>(false);
-  const { transcribe } = useTranscribe();
-  const [progress, setProgress] = useState<number>(0);

  const [recordingStats, setRecordingStats] =
    useState<SegementRecordingStatsType>([]);

-  const generate = async () => {
-    if (transcribing) return;
-
-    setTranscribing(true);
-    setProgress(0);
-    try {
-      const { engine, model, result } = await transcribe(mediaUrl);
-      await EnjoyApp.transcriptions.update(transcription.id, {
-        state: "finished",
-        result,
-        engine,
-        model,
-      });
-    } catch (err) {
-      toast.error(err.message);
-    }
-
-    setTranscribing(false);
-  };
-
  const fetchSegmentStats = async () => {
    if (!mediaId) return;

@@ -85,22 +64,10 @@ export const MediaTranscription = (props: {
    addDblistener(fetchSegmentStats);
    fetchSegmentStats();

-    if (transcription?.state == "pending") {
-      generate();
-    }
-
-    if (whisperConfig.service === "local") {
-      EnjoyApp.whisper.onProgress((_, p: number) => {
-        if (p > 100) p = 100;
-        setProgress(p);
-      });
-    }
-
    return () => {
      removeDbListener(fetchSegmentStats);
-      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [mediaId, mediaType, transcription]);
+  }, [transcription]);

  useEffect(() => {
    containerRef.current
@@ -159,7 +126,7 @@ export const MediaTranscription = (props: {
            </AlertDialogHeader>
            <AlertDialogFooter>
              <AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
-              <AlertDialogAction onClick={generate}>
+              <AlertDialogAction onClick={transcribe}>
                {t("transcribe")}
              </AlertDialogAction>
            </AlertDialogFooter>
--- a/enjoy/src/renderer/components/posts/post-actions.tsx
+++ b/enjoy/src/renderer/components/posts/post-actions.tsx
@@ -38,7 +38,7 @@ export const PostActions = (props: { post: PostType }) => {
  const [copied, setCopied] = useState<boolean>(false);
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const [asking, setAsking] = useState<boolean>(false);
-  const [aiReplies, setAiReplies] = useState<MessageType[]>([]);
+  const [aiReplies, setAiReplies] = useState<Partial<MessageType>[]>([]);

  const handleAddMedium = async () => {
    if (post.targetType !== "Medium") return;
@@ -185,7 +185,7 @@ export const PostActions = (props: { post: PostType }) => {
  );
 };

-const AIReplies = (props: { replies: MessageType[] }) => {
+const AIReplies = (props: { replies: Partial<MessageType>[] }) => {
  return (
    <div>
      <div className="space-y-2">
--- a/enjoy/src/renderer/components/videos/ted-talks-segment.tsx
+++ b/enjoy/src/renderer/components/videos/ted-talks-segment.tsx
@@ -51,11 +51,6 @@ export const TedTalksSegment = () => {
        coverUrl: selectedTalk?.primaryImageSet[0].url,
      })
      .then((record) => {
-        if (!record) {
-          toast.error(t("failedToDownload"));
-          return;
-        }
-
        if (type === "video") {
          navigate(`/videos/${record.id}`);
        } else {
--- a/enjoy/src/renderer/components/videos/video-detail.tsx
+++ b/enjoy/src/renderer/components/videos/video-detail.tsx
@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
 import {
  DbProviderContext,
  AppSettingsProviderContext,
+  AISettingsProviderContext,
 } from "@renderer/context";
 import {
  LoaderSpin,
@@ -10,7 +11,7 @@ import {
  MediaPlayer,
  MediaTranscription,
 } from "@renderer/components";
-import { LoaderIcon } from "lucide-react";
+import { CheckCircleIcon, LoaderIcon } from "lucide-react";
 import {
  AlertDialog,
  AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
  AlertDialogFooter,
  AlertDialogCancel,
  Button,
+  PingPoint,
+  Progress,
  ScrollArea,
  toast,
 } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useTranscribe } from "@renderer/hooks";
+import { useNavigate } from "react-router-dom";

 export const VideoDetail = (props: { id?: string; md5?: string }) => {
+  const navigate = useNavigate();
+
  const { id, md5 } = props;
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
+  const { whisperConfig } = useContext(AISettingsProviderContext);
  const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);

  const [video, setVideo] = useState<VideoType | null>(null);
  const [transcription, setTranscription] = useState<TranscriptionType>(null);
-  const [initialized, setInitialized] = useState<boolean>(false);
  const [sharing, setSharing] = useState<boolean>(false);

+  // Transcription controls
+  const [transcribing, setTranscribing] = useState<boolean>(false);
+  const { transcribe } = useTranscribe();
+  const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
+
  // Player controls
+  const [initialized, setInitialized] = useState<boolean>(false);
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [seek, setSeek] = useState<{
    seekTo: number;
@@ -58,6 +71,56 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
    }
  };

+  const generateTranscription = async () => {
+    if (transcribing) return;
+
+    setTranscribing(true);
+    setTranscribingProgress(0);
+    try {
+      const { engine, model, result } = await transcribe(video.src);
+      await EnjoyApp.transcriptions.update(transcription.id, {
+        state: "finished",
+        result,
+        engine,
+        model,
+      });
+    } catch (err) {
+      toast.error(err.message);
+    }
+
+    setTranscribing(false);
+  };
+
+  const findTranscriptionFromWebApi = async () => {
+    const res = await webApi.transcriptions({
+      targetMd5: video.md5,
+    });
+
+    const transcript = (res?.transcriptions || []).filter((t) =>
+      ["base", "small", "medium", "large", "whisper-1"].includes(t.model)
+    )?.[0];
+
+    if (!transcript) {
+      throw new Error("Transcription not found");
+    }
+
+    await EnjoyApp.transcriptions.update(transcription.id, {
+      state: "finished",
+      result: transcript.result,
+      engine: transcript.engine,
+      model: transcript.model,
+    });
+  };
+
+  const findOrGenerateTranscription = async () => {
+    try {
+      await findTranscriptionFromWebApi();
+    } catch (err) {
+      console.error(err);
+      await generateTranscription();
+    }
+  };
+
  const handleShare = async () => {
    if (!video.source.startsWith("http")) {
      toast.error(t("shareFailed"), {
@@ -116,11 +179,26 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
  }, [video]);

  useEffect(() => {
+    if (!transcription) return;
+
    addDblistener(onTransactionUpdate);
+
+    if (transcription?.state == "pending") {
+      findOrGenerateTranscription();
+    }
+
+    if (whisperConfig.service === "local") {
+      EnjoyApp.whisper.onProgress((_, p: number) => {
+        if (p > 100) p = 100;
+        setTranscribingProgress(p);
+      });
+    }
+
    return () => {
      removeDbListener(onTransactionUpdate);
+      EnjoyApp.whisper.removeProgressListeners();
    };
-  }, [transcription]);
+  }, [md5, transcription]);

  if (!video) {
    return <LoaderSpin />;
@@ -193,9 +271,11 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
          <MediaTranscription
            mediaId={video.id}
            mediaType="Video"
-            mediaUrl={video.src}
            mediaName={video.name}
            transcription={transcription}
+            transcribing={transcribing}
+            progress={transcribingProgress}
+            transcribe={generateTranscription}
            currentSegmentIndex={currentSegmentIndex}
            onSelectSegment={(index) => {
              if (currentSegmentIndex === index) return;
@@ -232,6 +312,70 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
        </AlertDialogContent>
      </AlertDialog>

+      {/* Show loading progress until waveform is decoded & transcribed */}
+      <AlertDialog open={!initialized || !Boolean(transcription?.result)}>
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle>{t("preparingVideo")}</AlertDialogTitle>
+            <AlertDialogDescription>
+              {t("itMayTakeAWhileToPrepareForTheFirstLoad")}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+
+          <div className="py-4">
+            {initialized ? (
+              <div className="mb-4 flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("waveformIsDecoded")}</span>
+              </div>
+            ) : (
+              <div className="mb-4 flex items-center space-x-4">
+                <LoaderIcon className="w-4 h-4 animate-spin" />
+                <span>{t("decodingWaveform")}</span>
+              </div>
+            )}
+
+            {!transcription ? (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <span>{t("loadingTranscription")}</span>
+              </div>
+            ) : transcription.result ? (
+              <div className="flex items-center space-x-4">
+                <CheckCircleIcon className="w-4 h-4 text-green-500" />
+                <span>{t("transcribedSuccessfully")}</span>
+              </div>
+            ) : transcribing ? (
+              <div className="">
+                <div className="flex items-center space-x-4 mb-2">
+                  <PingPoint colorClassName="bg-yellow-500" />
+                  <span>{t("transcribing")}</span>
+                </div>
+                {whisperConfig.service === "local" && (
+                  <Progress value={transcribingProgress} />
+                )}
+              </div>
+            ) : (
+              <div className="flex items-center space-x-4">
+                <PingPoint colorClassName="bg-muted" />
+                <div className="inline">
+                  <span>{t("notTranscribedYet")}</span>
+                  <Button className="ml-4" size="sm">
+                    {t("transcribe")}
+                  </Button>
+                </div>
+              </div>
+            )}
+          </div>
+
+          <AlertDialogFooter>
+            <Button variant="secondary" onClick={() => navigate(-1)}>
+              {t("cancel")}
+            </Button>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
+
      {!initialized && (
        <div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
          <LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />