From e0b2f59a234b9d6edcb1fcca09aec1a7dd04d6e4 Mon Sep 17 00:00:00 2001
From: an-lee <an.lee.work@gmail.com>
Date: Sun, 19 May 2024 09:05:02 +0800
Subject: [PATCH] Feat: read & record through the transcription (#619)

* may read & record fulltext of transcription

* may delete & remove recording

* may assess recording

* fix pronunciation assessment result style

* fix style

* fix style

* fix player confit

* update locale & fix warning
---
 enjoy/src/i18n/en.json                        |   3 +-
 enjoy/src/i18n/zh-CN.json                     |   3 +-
 enjoy/src/renderer/components/medias/index.ts |   1 +
 .../media-transcription-read-button.tsx       | 397 ++++++++++++++++++
 .../components/medias/media-transcription.tsx |   3 +
 .../pronunciation-assessment-word-result.tsx  |   4 +-
 .../recordings/recording-detail.tsx           |  10 +-
 enjoy/src/renderer/hooks/use-recordings.tsx   |   2 +-
 8 files changed, 414 insertions(+), 9 deletions(-)
 create mode 100644 enjoy/src/renderer/components/medias/media-transcription-read-button.tsx

diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json
index a761aad2..c739f617 100644
--- a/enjoy/src/i18n/en.json
+++ b/enjoy/src/i18n/en.json
@@ -588,5 +588,6 @@
   "saveTranscription": "Save transcription",
   "areYouSureToSaveTranscription": "It will perform a force-alignment between the audio and your edited transcription. Are you sure to continue?",
   "summarize": "Summarize",
-  "noResultsFound": "No results found"
+  "noResultsFound": "No results found",
+  "readThrough": "Read through"
 }
diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json
index ddd3c35a..e9aee25c 100644
--- a/enjoy/src/i18n/zh-CN.json
+++ b/enjoy/src/i18n/zh-CN.json
@@ -588,5 +588,6 @@
   "saveTranscription": "保存语音文本",
   "areYouSureToSaveTranscription": "即将根据您修改后的语音文本对语音重新进行对齐，确定要继续吗？",
   "summarize": "提炼主题",
-  "noResultsFound": "没有找到结果"
+  "noResultsFound": "没有找到结果",
+  "readThrough": "朗读全文"
 }
diff --git a/enjoy/src/renderer/components/medias/index.ts b/enjoy/src/renderer/components/medias/index.ts
index 968448f9..4c1e9241 100644
--- a/enjoy/src/renderer/components/medias/index.ts
+++ b/enjoy/src/renderer/components/medias/index.ts
@@ -5,6 +5,7 @@ export * from "./media-recordings";
 export * from "./media-current-recording";
 export * from "./media-recorder";
 export * from "./media-transcription";
+export * from "./media-transcription-read-button";
 export * from "./media-transcription-form";
 export * from "./media-player";
 export * from "./media-provider";
diff --git a/enjoy/src/renderer/components/medias/media-transcription-read-button.tsx b/enjoy/src/renderer/components/medias/media-transcription-read-button.tsx
new file mode 100644
index 00000000..de6bf702
--- /dev/null
+++ b/enjoy/src/renderer/components/medias/media-transcription-read-button.tsx
@@ -0,0 +1,397 @@
+import {
+  AppSettingsProviderContext,
+  MediaPlayerProviderContext,
+} from "@renderer/context";
+import { useContext, useEffect, useRef, useState } from "react";
+import {
+  AlertDialog,
+  AlertDialogAction,
+  AlertDialogCancel,
+  AlertDialogContent,
+  AlertDialogDescription,
+  AlertDialogFooter,
+  AlertDialogHeader,
+  AlertDialogTitle,
+  Button,
+  Dialog,
+  DialogContent,
+  DialogTrigger,
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuItem,
+  DropdownMenuTrigger,
+  ScrollArea,
+  Sheet,
+  SheetClose,
+  SheetContent,
+  SheetHeader,
+  toast,
+} from "@renderer/components/ui";
+import { TimelineEntry } from "echogarden/dist/utilities/Timeline";
+import { t } from "i18next";
+import WaveSurfer from "wavesurfer.js";
+import {
+  ChevronDownIcon,
+  DownloadIcon,
+  GaugeCircleIcon,
+  LoaderIcon,
+  MicIcon,
+  MoreHorizontalIcon,
+  SquareIcon,
+  Trash2Icon,
+} from "lucide-react";
+import RecordPlugin from "wavesurfer.js/dist/plugins/record";
+import { useRecordings } from "@/renderer/hooks";
+import { formatDateTime } from "@/renderer/lib/utils";
+import { MediaPlayer, MediaProvider } from "@vidstack/react";
+import {
+  DefaultAudioLayout,
+  defaultLayoutIcons,
+} from "@vidstack/react/player/layouts/default";
+import { RecordingDetail } from "@renderer/components";
+
+const TEN_MINUTES = 60 * 10;
+let interval: NodeJS.Timeout;
+
+export const MediaTranscriptionReadButton = () => {
+  const [open, setOpen] = useState(false);
+  const [deleting, setDeleting] = useState<RecordingType>(null);
+  const { EnjoyApp } = useContext(AppSettingsProviderContext);
+  const { media, transcription } = useContext(MediaPlayerProviderContext);
+  const [assessing, setAssessing] = useState<RecordingType>();
+
+  const handleDelete = () => {
+    if (!deleting) return;
+
+    EnjoyApp.recordings.destroy(deleting.id);
+  };
+
+  const handleDownload = (recording: RecordingType) => {
+    EnjoyApp.dialog
+      .showSaveDialog({
+        title: t("download"),
+        defaultPath: recording.filename,
+      })
+      .then((savePath) => {
+        if (!savePath) return;
+
+        toast.promise(
+          EnjoyApp.download.start(recording.src, savePath as string),
+          {
+            loading: t("downloading", { file: recording.filename }),
+            success: () => t("downloadedSuccessfully"),
+            error: t("downloadFailed"),
+            position: "bottom-right",
+          }
+        );
+      })
+      .catch((err) => {
+        if (err) toast.error(err.message);
+      });
+  };
+
+  const {
+    recordings,
+    fetchRecordings,
+    loading: loadingRecordings,
+    hasMore: hasMoreRecordings,
+  } = useRecordings(media, -1);
+
+  return (
+    <>
+      <Dialog open={open} onOpenChange={setOpen}>
+        <DialogTrigger asChild>
+          <Button variant="outline" size="sm" className="hidden lg:block">
+            {t("readThrough")}
+          </Button>
+        </DialogTrigger>
+        <DialogContent
+          onPointerDownOutside={(event) => event.preventDefault()}
+          className="max-w-screen-md xl:max-w-screen-lg h-5/6 flex flex-col p-0"
+        >
+          <ScrollArea className="flex-1 px-6 pt-4">
+            <div className="select-text mx-auto w-full max-w-prose">
+              <h3 className="font-bold text-xl my-4">{media.name}</h3>
+              {transcription.result.timeline.map(
+                (sentence: TimelineEntry, index: number) => (
+                  <div key={index} className="flex flex-start space-x-2 mb-4">
+                    <span className="text-sm text-muted-foreground min-w-max leading-8">
+                      #{index + 1}
+                    </span>
+                    <div className="text-lg leading-8">{sentence.text}</div>
+                  </div>
+                )
+              )}
+            </div>
+            <div className="mt-12">
+              {recordings.map((recording) => (
+                <div
+                  key={recording.id}
+                  className="mx-auto w-full max-w-prose px-4 mb-4"
+                  id={recording.id}
+                >
+                  <div className="flex items-center justify-end space-x-2 mb-2">
+                    <span className="text-sm text-muted-foreground">
+                      {formatDateTime(recording.createdAt)}
+                    </span>
+                    <DropdownMenu>
+                      <DropdownMenuTrigger>
+                        <MoreHorizontalIcon className="w-4 h-4" />
+                      </DropdownMenuTrigger>
+
+                      <DropdownMenuContent>
+                        <DropdownMenuItem
+                          className="cursor-pointer"
+                          onClick={() => handleDownload(recording)}
+                        >
+                          <DownloadIcon className="w-4 h-4 mr-2" />
+                          <span>{t("download")}</span>
+                        </DropdownMenuItem>
+                        <DropdownMenuItem
+                          className="cursor-pointer"
+                          onClick={() => setAssessing(recording)}
+                        >
+                          <GaugeCircleIcon
+                            className={`w-4 h-4 mr-2
+                    ${
+                      recording.pronunciationAssessment
+                        ? recording.pronunciationAssessment
+                            .pronunciationScore >= 80
+                          ? "text-green-500"
+                          : recording.pronunciationAssessment
+                              .pronunciationScore >= 60
+                          ? "text-yellow-600"
+                          : "text-red-500"
+                        : ""
+                    }
+                    `}
+                          />
+                          <span>{t("pronunciationAssessment")}</span>
+                        </DropdownMenuItem>
+                        <DropdownMenuItem
+                          className="text-destructive cursor-pointer"
+                          onClick={() => setDeleting(recording)}
+                        >
+                          <Trash2Icon className="w-4 h-4 mr-2" />
+                          <span>{t("delete")}</span>
+                        </DropdownMenuItem>
+                      </DropdownMenuContent>
+                    </DropdownMenu>
+                  </div>
+                  <MediaPlayer
+                    duration={recording.duration / 1000}
+                    src={recording.src}
+                  >
+                    <MediaProvider />
+                    <DefaultAudioLayout icons={defaultLayoutIcons} />
+                  </MediaPlayer>
+                </div>
+              ))}
+              {hasMoreRecordings && (
+                <div className="flex items-center justify-center">
+                  <Button
+                    variant="secondary"
+                    onClick={() => fetchRecordings(recordings.length)}
+                  >
+                    {loadingRecordings && (
+                      <LoaderIcon className="w-4 h-4 animate-spin" />
+                    )}
+                    <span>{t("loadMore")}</span>
+                  </Button>
+                </div>
+              )}
+            </div>
+          </ScrollArea>
+          <div className="h-16 border-t">
+            {open && <RecorderButton onRecorded={() => fetchRecordings(0)} />}
+          </div>
+        </DialogContent>
+      </Dialog>
+
+      <AlertDialog
+        open={!!deleting}
+        onOpenChange={(value) => {
+          if (value) return;
+          setDeleting(null);
+        }}
+      >
+        <AlertDialogContent>
+          <AlertDialogHeader>
+            <AlertDialogTitle>{t("deleteRecording")}</AlertDialogTitle>
+            <AlertDialogDescription>
+              {t("deleteRecordingConfirmation")}
+            </AlertDialogDescription>
+          </AlertDialogHeader>
+          <AlertDialogFooter>
+            <AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
+            <AlertDialogAction asChild>
+              <Button onClick={handleDelete}>{t("delete")}</Button>
+            </AlertDialogAction>
+          </AlertDialogFooter>
+        </AlertDialogContent>
+      </AlertDialog>
+
+      <Sheet
+        open={Boolean(assessing)}
+        onOpenChange={(open) => {
+          if (!open) setAssessing(undefined);
+        }}
+      >
+        <SheetContent
+          side="bottom"
+          className="rounded-t-2xl shadow-lg max-h-screen overflow-y-scroll"
+          displayClose={false}
+        >
+          <SheetHeader className="flex items-center justify-center -mt-4 mb-2">
+            <SheetClose>
+              <ChevronDownIcon />
+            </SheetClose>
+          </SheetHeader>
+
+          {assessing && <RecordingDetail recording={assessing} />}
+        </SheetContent>
+      </Sheet>
+    </>
+  );
+};
+
+const RecorderButton = (props: { onRecorded: () => void }) => {
+  const { onRecorded } = props;
+  const { media, transcription } = useContext(MediaPlayerProviderContext);
+  const { EnjoyApp } = useContext(AppSettingsProviderContext);
+  const [isRecording, setIsRecording] = useState(false);
+  const [recorder, setRecorder] = useState<RecordPlugin>();
+  const [access, setAccess] = useState<boolean>(false);
+  const [duration, setDuration] = useState<number>(0);
+  const ref = useRef(null);
+
+  const askForMediaAccess = () => {
+    EnjoyApp.system.preferences.mediaAccess("microphone").then((access) => {
+      if (access) {
+        setAccess(true);
+      } else {
+        setAccess(false);
+        toast.warning(t("noMicrophoneAccess"));
+      }
+    });
+  };
+
+  const startRecord = () => {
+    if (isRecording) return;
+    if (!recorder) {
+      toast.warning(t("noMicrophoneAccess"));
+      return;
+    }
+
+    RecordPlugin.getAvailableAudioDevices()
+      .then((devices) => devices.find((d) => d.kind === "audioinput"))
+      .then((device) => {
+        if (device) {
+          recorder.startRecording({ deviceId: device.deviceId });
+          setIsRecording(true);
+          setDuration(0);
+          interval = setInterval(() => {
+            setDuration((duration) => {
+              if (duration >= TEN_MINUTES) {
+                recorder.stopRecording();
+              }
+              return duration + 0.1;
+            });
+          }, 100);
+        } else {
+          toast.error(t("cannotFindMicrophone"));
+        }
+      });
+  };
+
+  const createRecording = async (blob: Blob) => {
+    if (!media) return;
+
+    toast.promise(
+      async () => {
+        return EnjoyApp.recordings
+          .create({
+            targetId: media.id,
+            targetType: media.mediaType,
+            blob: {
+              type: blob.type.split(";")[0],
+              arrayBuffer: await blob.arrayBuffer(),
+            },
+            referenceId: -1,
+            referenceText: transcription.result.timeline
+              .map((s: TimelineEntry) => s.text)
+              .join("\n"),
+            duration,
+          })
+          .then(() => onRecorded());
+      },
+      {
+        loading: t("savingRecording"),
+        success: t("recordingSaved"),
+        error: (e) => t("failedToSaveRecording" + " : " + e.message),
+        position: "bottom-right",
+      }
+    );
+  };
+
+  useEffect(() => {
+    if (!access) return;
+    if (!ref?.current) return;
+
+    const ws = WaveSurfer.create({
+      container: ref.current,
+      fillParent: true,
+      height: 40,
+      autoCenter: false,
+      normalize: false,
+    });
+
+    const record = ws.registerPlugin(RecordPlugin.create());
+    setRecorder(record);
+
+    record.on("record-end", async (blob: Blob) => {
+      createRecording(blob);
+      setIsRecording(false);
+    });
+
+    return () => {
+      if (interval) clearInterval(interval);
+      recorder?.stopRecording();
+      ws?.destroy();
+    };
+  }, [access, ref]);
+
+  useEffect(() => {
+    askForMediaAccess();
+  }, []);
+  return (
+    <div className="h-16 flex items-center justify-center px-6">
+      <div
+        ref={ref}
+        className={isRecording ? "w-full mr-4" : "w-0 overflow-hidden"}
+      ></div>
+      {isRecording && (
+        <div className="text-muted-foreground text-sm w-24 mr-4">
+          {duration.toFixed(1)} / {TEN_MINUTES}
+        </div>
+      )}
+      <Button
+        variant="ghost"
+        className="aspect-square p-0 h-12 rounded-full bg-red-500 hover:bg-red-500/90"
+        onClick={() => {
+          if (isRecording) {
+            recorder?.stopRecording();
+          } else {
+            startRecord();
+          }
+        }}
+      >
+        {isRecording ? (
+          <SquareIcon fill="white" className="w-6 h-6 text-white" />
+        ) : (
+          <MicIcon className="w-6 h-6 text-white" />
+        )}
+      </Button>
+    </div>
+  );
+};
diff --git a/enjoy/src/renderer/components/medias/media-transcription.tsx b/enjoy/src/renderer/components/medias/media-transcription.tsx
index da8a94fa..8af316c6 100644
--- a/enjoy/src/renderer/components/medias/media-transcription.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription.tsx
@@ -27,6 +27,7 @@ import {
 import { AlignmentResult } from "echogarden/dist/api/API.d.js";
 import { formatDuration } from "@renderer/lib/utils";
 import { MediaTranscriptionForm } from "./media-transcription-form";
+import { MediaTranscriptionReadButton } from "./media-transcription-read-button";
 
 export const MediaTranscription = () => {
   const containerRef = useRef<HTMLDivElement>();
@@ -115,6 +116,7 @@ export const MediaTranscription = () => {
             <span className="capitalize">{t("transcript")}</span>
           </div>
           <div className="flex space-x-2">
+            <MediaTranscriptionReadButton />
             <AlertDialog>
               <AlertDialogTrigger asChild>
                 <Button
@@ -148,6 +150,7 @@ export const MediaTranscription = () => {
                 </AlertDialogFooter>
               </AlertDialogContent>
             </AlertDialog>
+
             <MediaTranscriptionForm />
           </div>
         </div>
diff --git a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx
index b504030e..4c02a6dd 100644
--- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx
+++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx
@@ -75,9 +75,9 @@ export const PronunciationAssessmentWordResult = (props: {
             onClick={() => {
               onSeek && onSeek(result.offset / 1e7);
             }}
-            className="text-center"
+            className="text-center mb-3"
           >
-            <div className="">
+            <div className="mb-1">
               {result.phonemes.map((phoneme, index) => (
                 <span
                   key={index}
diff --git a/enjoy/src/renderer/components/recordings/recording-detail.tsx b/enjoy/src/renderer/components/recordings/recording-detail.tsx
index f228dc55..bb5f731a 100644
--- a/enjoy/src/renderer/components/recordings/recording-detail.tsx
+++ b/enjoy/src/renderer/components/recordings/recording-detail.tsx
@@ -58,10 +58,12 @@ export const RecordingDetail = (props: { recording: RecordingType }) => {
           }}
         />
       ) : (
-        <ScrollArea className="h-72 py-4 px-8">
-          <p className="text-xl font-serif tracking-wide">
-            {recording?.referenceText}
-          </p>
+        <ScrollArea className="h-72 py-4 px-8 select-text">
+          {(recording?.referenceText || "").split("\n").map((line, index) => (
+            <div key={index} className="text-xl font-serif tracking-wide mb-2">
+              {line}
+            </div>
+          ))}
         </ScrollArea>
       )}
 
diff --git a/enjoy/src/renderer/hooks/use-recordings.tsx b/enjoy/src/renderer/hooks/use-recordings.tsx
index 0f5d2d99..5578ff8d 100644
--- a/enjoy/src/renderer/hooks/use-recordings.tsx
+++ b/enjoy/src/renderer/hooks/use-recordings.tsx
@@ -1,4 +1,4 @@
-import { useState, useContext, useEffect, useRef, useReducer } from "react";
+import { useState, useContext, useEffect, useReducer } from "react";
 import {
   AppSettingsProviderContext,
   DbProviderContext,