Feat: unscripted pronounciation assessment (#666)

* clean code * add pronunciation assessment page * load assessments * recording target constraint * display assessment card * update style * may check assessment detail * fix style * add new assessment page * update pronunciation assessment form * add language column to models * create pronunciation assessment * upload file to assess * locales * add source for assessment * display language
2024-06-13 12:55:11 +08:00
parent 7f9e997dad
commit e124609437
29 changed files with 1070 additions and 87 deletions
--- a/enjoy/src/renderer/components/medias/media-transcription-read-button.tsx
+++ b/enjoy/src/renderer/components/medias/media-transcription-read-button.tsx
@@ -41,8 +41,8 @@ import {
  Trash2Icon,
 } from "lucide-react";
 import RecordPlugin from "wavesurfer.js/dist/plugins/record";
-import { useRecordings } from "@/renderer/hooks";
-import { formatDateTime } from "@/renderer/lib/utils";
+import { useRecordings } from "@renderer/hooks";
+import { formatDateTime } from "@renderer/lib/utils";
 import { MediaPlayer, MediaProvider } from "@vidstack/react";
 import {
  DefaultAudioLayout,
--- a/enjoy/src/renderer/components/misc/sidebar.tsx
+++ b/enjoy/src/renderer/components/misc/sidebar.tsx
@@ -14,7 +14,6 @@ import {
  DropdownMenuSubTrigger,
  DropdownMenuItem,
  Separator,
-  toast,
 } from "@renderer/components/ui";
 import {
  SettingsIcon,
@@ -30,6 +29,7 @@ import {
  HelpCircleIcon,
  ExternalLinkIcon,
  NotebookPenIcon,
+  SpeechIcon,
 } from "lucide-react";
 import { useLocation, Link } from "react-router-dom";
 import { t } from "i18next";
@@ -44,7 +44,6 @@ export const Sidebar = () => {
  const { EnjoyApp, cable } = useContext(AppSettingsProviderContext);

  useEffect(() => {
-    console.log("Subscrbing ->");
    const channel = new NoticiationsChannel(cable);
    channel.subscribe();
  }, []);
@@ -108,6 +107,15 @@ export const Sidebar = () => {
              testid="sidebar-conversations"
            />

+            <SidebarItem
+              href="/pronunciation_assessments"
+              label={t("sidebar.pronunciationAssessment")}
+              tooltip={t("sidebar.pronunciationAssessment")}
+              active={activeTab.startsWith("/pronunciation_assessments")}
+              Icon={SpeechIcon}
+              testid="sidebar-pronunciation-assessments"
+            />
+
            <SidebarItem
              href="/notes"
              label={t("sidebar.notes")}
--- a/enjoy/src/renderer/components/pronunciation-assessments/index.ts
+++ b/enjoy/src/renderer/components/pronunciation-assessments/index.ts
@@ -1,4 +1,6 @@
-export * from "./pronunciation-assessment-word-result";
+export * from "./pronunciation-assessment-card";
+export * from "./pronunciation-assessment-form";
 export * from "./pronunciation-assessment-fulltext-result";
 export * from "./pronunciation-assessment-score-result";
 export * from "./pronunciation-assessment-score-icon";
+export * from "./pronunciation-assessment-word-result";
--- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-card.tsx
+++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-card.tsx
@@ -0,0 +1,138 @@
+import {
+  Button,
+  DropdownMenu,
+  DropdownMenuContent,
+  DropdownMenuTrigger,
+  DropdownMenuItem,
+  RadialProgress,
+  Badge,
+} from "@renderer/components/ui";
+import { scoreColor } from "./pronunciation-assessment-score-result";
+import { t } from "i18next";
+import { formatDateTime } from "@/renderer/lib/utils";
+import { MoreHorizontalIcon, Trash2Icon } from "lucide-react";
+import { Link } from "react-router-dom";
+
+export const PronunciationAssessmentCard = (props: {
+  pronunciationAssessment: PronunciationAssessmentType;
+  onSelect: (assessment: PronunciationAssessmentType) => void;
+  onDelete: (assessment: PronunciationAssessmentType) => void;
+}) => {
+  const { pronunciationAssessment: assessment, onSelect, onDelete } = props;
+
+  return (
+    <div
+      key={assessment.id}
+      className="bg-background p-4 rounded-lg border hover:shadow"
+    >
+      <div className="flex items-start space-x-4">
+        <div className="flex-1 flex flex-col min-h-32">
+          <div className="select-text line-clamp-2 text-muted-foreground font-serif pl-3 border-l-4 mb-4">
+            {assessment.referenceText || assessment.target.referenceText}
+          </div>
+          <div className="flex items-center gap-2 flex-wrap mb-4">
+            {[
+              {
+                label: t("models.pronunciationAssessment.pronunciationScore"),
+                value: assessment.pronunciationScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.accuracyScore"),
+                value: assessment.accuracyScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.fluencyScore"),
+                value: assessment.fluencyScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.completenessScore"),
+                value: assessment.completenessScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.prosodyScore"),
+                value: assessment.prosodyScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.grammarScore"),
+                value: assessment.grammarScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.vocabularyScore"),
+                value: assessment.vocabularyScore,
+              },
+              {
+                label: t("models.pronunciationAssessment.topicScore"),
+                value: assessment.topicScore,
+              },
+            ].map(({ label, value }) => {
+              if (typeof value === "number") {
+                return (
+                  <div className="flex items-center space-x-2 mb-2">
+                    <span className="text-muted-foreground text-sm">
+                      {label}:
+                    </span>
+                    <span
+                      className={`text-sm font-bold ${scoreColor(value || 0)}`}
+                    >
+                      {value}
+                    </span>
+                  </div>
+                );
+              }
+            })}
+          </div>
+          {["Audio", "Video"].includes(assessment.target?.targetType) && (
+            <div className="flex items-center gap-2 mb-4">
+              <span className="text-sm">{t("source")}:</span>
+              <Link
+                to={`/${assessment.target.targetType.toLowerCase()}s/${
+                  assessment.target.targetId
+                }?segmentIndex=${assessment.target.referenceId}`}
+                className="text-sm"
+              >
+                {t(assessment.target?.targetType?.toLowerCase())}
+              </Link>
+            </div>
+          )}
+          <div className="mt-auto flex items-center gap-4">
+            {assessment.language && <Badge variant="secondary">{assessment.language}</Badge>}
+            <div className="text-xs text-muted-foreground">
+              {formatDateTime(assessment.createdAt)}
+            </div>
+            <DropdownMenu>
+              <DropdownMenuTrigger>
+                <MoreHorizontalIcon className="w-4 h-4" />
+              </DropdownMenuTrigger>
+              <DropdownMenuContent>
+                <DropdownMenuItem
+                  className="text-destructive cursor-pointer"
+                  onClick={() => onDelete(assessment)}
+                >
+                  <Trash2Icon className="w-4 h-4 mr-2" />
+                  <span>{t("delete")}</span>
+                </DropdownMenuItem>
+              </DropdownMenuContent>
+            </DropdownMenu>
+          </div>
+        </div>
+        <div className="h-32">
+          <RadialProgress
+            className="w-20 h-20 mx-auto mb-2"
+            ringClassName={`${scoreColor(assessment.pronunciationScore || 0)}`}
+            progress={assessment.pronunciationScore || 0}
+            fontSize={24}
+          />
+          <div className="flex justify-center">
+            <Button
+              onClick={() => onSelect(assessment)}
+              variant="secondary"
+              size="sm"
+            >
+              {t("detail")}
+            </Button>
+          </div>
+        </div>
+      </div>
+    </div>
+  );
+};
--- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-form.tsx
+++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-form.tsx
@@ -0,0 +1,343 @@
+import {
+  Button,
+  Input,
+  SelectContent,
+  SelectTrigger,
+  SelectValue,
+  Select,
+  SelectItem,
+  Form,
+  FormField,
+  FormItem,
+  FormLabel,
+  FormMessage,
+  Textarea,
+  toast,
+  Tabs,
+  TabsList,
+  TabsTrigger,
+  TabsContent,
+} from "@renderer/components/ui";
+import { t } from "i18next";
+import { useNavigate } from "react-router-dom";
+import { useContext, useEffect, useRef, useState } from "react";
+import { AppSettingsProviderContext } from "@/renderer/context";
+import { LANGUAGES } from "@/constants";
+import { z } from "zod";
+import { useForm } from "react-hook-form";
+import { zodResolver } from "@hookform/resolvers/zod";
+import { LoaderIcon, MicIcon, SquareIcon } from "lucide-react";
+import WaveSurfer from "wavesurfer.js";
+import RecordPlugin from "wavesurfer.js/dist/plugins/record";
+
+const pronunciationAssessmentSchema = z.object({
+  file: z.instanceof(FileList).optional(),
+  recording: z.instanceof(Blob).optional(),
+  language: z.string().min(2),
+  referenceText: z.string().optional(),
+});
+
+export const PronunciationAssessmentForm = () => {
+  const navigate = useNavigate();
+  const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
+  const [submitting, setSubmitting] = useState(false);
+
+  const form = useForm<z.infer<typeof pronunciationAssessmentSchema>>({
+    resolver: zodResolver(pronunciationAssessmentSchema),
+    values: {
+      language: learningLanguage,
+      referenceText: "",
+    },
+  });
+
+  const fileField = form.register("file");
+
+  const onSubmit = async (
+    data: z.infer<typeof pronunciationAssessmentSchema>
+  ) => {
+    console.log(data);
+    if ((!data.file || data.file.length === 0) && !data.recording) {
+      toast.error(t("noFileOrRecording"));
+      form.setError("recording", { message: t("noFileOrRecording") });
+      return;
+    }
+    const { language, referenceText, file, recording } = data;
+    let arrayBuffer: ArrayBuffer;
+    if (recording) {
+      arrayBuffer = await recording.arrayBuffer();
+    } else {
+      arrayBuffer = await new Blob([file[0]]).arrayBuffer();
+    }
+
+    setSubmitting(true);
+    toast.promise(
+      EnjoyApp.pronunciationAssessments
+        .create({
+          language,
+          referenceText,
+          blob: {
+            type: recording?.type || file[0].type,
+            arrayBuffer,
+          },
+        })
+        .then(() => {
+          navigate("/pronunciation_assessments");
+        })
+        .finally(() => setSubmitting(false)),
+      {
+        loading: t("assessing"),
+        success: t("assessedSuccessfully"),
+        error: (err) => err.message,
+      }
+    );
+  };
+
+  return (
+    <div className="max-w-screen-md mx-auto">
+      <Form {...form}>
+        <form
+          onSubmit={form.handleSubmit(onSubmit)}
+          className="h-full flex flex-col"
+        >
+          <Tabs className="mb-6" defaultValue="record">
+            <TabsList className="mb-2">
+              <TabsTrigger value="record">{t("record")}</TabsTrigger>
+              <TabsTrigger value="upload">{t("upload")}</TabsTrigger>
+            </TabsList>
+            <TabsContent value="upload">
+              <div className="grid gap-4">
+                <FormField
+                  control={form.control}
+                  name="file"
+                  render={() => (
+                    <FormItem className="grid w-full items-center gap-1.5">
+                      <Input
+                        placeholder={t("upload")}
+                        type="file"
+                        className="cursor-pointer"
+                        accept="audio/*"
+                        {...fileField}
+                      />
+                      <FormMessage />
+                    </FormItem>
+                  )}
+                />
+              </div>
+            </TabsContent>
+            <TabsContent value="record">
+              <div className="grid gap-4 border p-4 rounded-lg">
+                <FormField
+                  control={form.control}
+                  name="recording"
+                  render={({ field }) => (
+                    <FormItem className="grid w-full items-center gap-1.5">
+                      <Input
+                        placeholder={t("recording")}
+                        type="file"
+                        className="hidden"
+                        accept="audio/*"
+                        {...fileField}
+                      />
+                      <RecorderButton
+                        onStart={() => {
+                          form.resetField("recording");
+                        }}
+                        onFinish={(blob) => {
+                          field.onChange(blob);
+                        }}
+                      />
+                    </FormItem>
+                  )}
+                />
+                {form.watch("recording") && (
+                  <div className="">
+                    <audio controls className="w-full">
+                      <source
+                        src={URL.createObjectURL(form.watch("recording"))}
+                      />
+                    </audio>
+                  </div>
+                )}
+              </div>
+            </TabsContent>
+          </Tabs>
+          <div className="mb-6">
+            <FormField
+              control={form.control}
+              name="language"
+              render={({ field }) => (
+                <FormItem className="grid w-full items-center gap-1.5">
+                  <FormLabel>{t("language")}</FormLabel>
+                  <Select value={field.value} onValueChange={field.onChange}>
+                    <SelectTrigger>
+                      <SelectValue />
+                    </SelectTrigger>
+                    <SelectContent>
+                      {LANGUAGES.map((language) => (
+                        <SelectItem key={language.code} value={language.code}>
+                          {language.name}
+                        </SelectItem>
+                      ))}
+                    </SelectContent>
+                  </Select>
+                  <FormMessage />
+                </FormItem>
+              )}
+            />
+          </div>
+          <div className="mb-6">
+            <FormField
+              control={form.control}
+              name="referenceText"
+              render={({ field }) => (
+                <FormItem className="grid w-full items-center gap-1.5">
+                  <FormLabel>{t("referenceText")}</FormLabel>
+                  <Textarea
+                    placeholder={t("inputReferenceTextOrLeaveItBlank")}
+                    className="h-64"
+                    {...field}
+                  />
+                  <FormMessage />
+                </FormItem>
+              )}
+            />
+          </div>
+          <div className="mt-6">
+            <Button
+              disabled={submitting || !form.formState.isDirty}
+              className="w-full h-12"
+              data-testid="conversation-form-submit"
+              size="lg"
+              type="submit"
+            >
+              {submitting && <LoaderIcon className="mr-2 animate-spin" />}
+              {t("confirm")}
+            </Button>
+          </div>
+        </form>
+      </Form>
+    </div>
+  );
+};
+
+const TEN_MINUTES = 60 * 10;
+let interval: NodeJS.Timeout;
+const RecorderButton = (props: {
+  onStart?: () => void;
+  onFinish: (blob: Blob) => void;
+}) => {
+  const { onStart, onFinish } = props;
+  const { EnjoyApp } = useContext(AppSettingsProviderContext);
+  const [isRecording, setIsRecording] = useState(false);
+  const [recorder, setRecorder] = useState<RecordPlugin>();
+  const [access, setAccess] = useState<boolean>(false);
+  const [duration, setDuration] = useState<number>(0);
+  const ref = useRef(null);
+
+  const askForMediaAccess = () => {
+    EnjoyApp.system.preferences.mediaAccess("microphone").then((access) => {
+      if (access) {
+        setAccess(true);
+      } else {
+        setAccess(false);
+        toast.warning(t("noMicrophoneAccess"));
+      }
+    });
+  };
+
+  const startRecord = () => {
+    if (isRecording) return;
+    if (!recorder) {
+      toast.warning(t("noMicrophoneAccess"));
+      return;
+    }
+
+    onStart();
+    RecordPlugin.getAvailableAudioDevices()
+      .then((devices) => devices.find((d) => d.kind === "audioinput"))
+      .then((device) => {
+        if (device) {
+          recorder.startRecording({ deviceId: device.deviceId });
+          setIsRecording(true);
+          setDuration(0);
+          interval = setInterval(() => {
+            setDuration((duration) => {
+              if (duration >= TEN_MINUTES) {
+                recorder.stopRecording();
+              }
+              return duration + 0.1;
+            });
+          }, 100);
+        } else {
+          toast.error(t("cannotFindMicrophone"));
+        }
+      });
+  };
+
+  useEffect(() => {
+    if (!access) return;
+    if (!ref?.current) return;
+
+    const ws = WaveSurfer.create({
+      container: ref.current,
+      fillParent: true,
+      height: 40,
+      autoCenter: false,
+      normalize: false,
+    });
+
+    const record = ws.registerPlugin(RecordPlugin.create());
+    setRecorder(record);
+
+    record.on("record-end", async (blob: Blob) => {
+      if (interval) clearInterval(interval);
+      onFinish(blob);
+      setIsRecording(false);
+    });
+
+    return () => {
+      if (interval) clearInterval(interval);
+      recorder?.stopRecording();
+      ws?.destroy();
+    };
+  }, [access, ref]);
+
+  useEffect(() => {
+    askForMediaAccess();
+  }, []);
+  return (
+    <div className="w-full">
+      <div className="flex items-center justify-center">
+        <Button
+          type="button"
+          variant="ghost"
+          className="aspect-square p-0 h-12 rounded-full bg-red-500 hover:bg-red-500/90"
+          onClick={() => {
+            if (isRecording) {
+              recorder?.stopRecording();
+            } else {
+              startRecord();
+            }
+          }}
+        >
+          {isRecording ? (
+            <SquareIcon fill="white" className="w-6 h-6 text-white" />
+          ) : (
+            <MicIcon className="w-6 h-6 text-white" />
+          )}
+        </Button>
+      </div>
+      <div className="w-full flex items-center">
+        <div
+          ref={ref}
+          className={isRecording ? "w-full mr-4" : "h-0 overflow-hidden"}
+        ></div>
+        {isRecording && (
+          <div className="text-muted-foreground text-sm w-24">
+            {duration.toFixed(1)} / {TEN_MINUTES}
+          </div>
+        )}
+      </div>
+    </div>
+  );
+};
--- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx
+++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx
@@ -55,7 +55,7 @@ export const PronunciationAssessmentFulltextResult = (props: {
  }, []);

  return (
-    <ScrollArea className="h-72 py-4 px-8">
+    <ScrollArea className="min-h-72 py-4 px-8">
      <div className="flex items-start justify-between space-x-6">
        <div className="flex-1 py-4">
          {words.map((result, index: number) => (
--- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-score-result.tsx
+++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-score-result.tsx
@@ -9,11 +9,11 @@ export const PronunciationAssessmentScoreResult = (props: {
  fluencyScore?: number;
  completenessScore?: number;
  prosodyScore?: number;
-  assessing: boolean;
-  onAssess: () => void;
+  assessing?: boolean;
+  onAssess?: () => void;
 }) => {
  const {
-    assessing,
+    assessing = false,
    onAssess,
    pronunciationScore,
    accuracyScore,
@@ -142,7 +142,7 @@ const ScoreBarComponent = ({
  );
 };

-const scoreColor = (score: number, type: "text" | "bg" = "text") => {
+export const scoreColor = (score: number, type: "text" | "bg" = "text") => {
  if (!score) return "gray";

  if (score >= 80) return type == "text" ? "text-green-600" : "bg-green-600";
--- a/enjoy/src/renderer/components/recordings/recording-detail.tsx
+++ b/enjoy/src/renderer/components/recordings/recording-detail.tsx
@@ -8,11 +8,15 @@ import { useState, useContext } from "react";
 import { AppSettingsProviderContext } from "@renderer/context";
 import { Tooltip } from "react-tooltip";

-export const RecordingDetail = (props: { recording: RecordingType }) => {
+export const RecordingDetail = (props: {
+  recording: RecordingType;
+  pronunciationAssessment?: PronunciationAssessmentType;
+}) => {
  const { recording } = props;
  if (!recording) return;

-  const { pronunciationAssessment } = recording;
+  const pronunciationAssessment =
+    props.pronunciationAssessment || recording.pronunciationAssessment;
  const { result } = pronunciationAssessment || {};
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [seek, setSeek] = useState<{
@@ -58,7 +62,7 @@ export const RecordingDetail = (props: { recording: RecordingType }) => {
          }}
        />
      ) : (
-        <ScrollArea className="h-72 py-4 px-8 select-text">
+        <ScrollArea className="min-h-72 py-4 px-8 select-text">
          {(recording?.referenceText || "").split("\n").map((line, index) => (
            <div key={index} className="text-xl font-serif tracking-wide mb-2">
              {line}
--- a/enjoy/src/renderer/components/ui/badage.tsx
+++ b/enjoy/src/renderer/components/ui/badage.tsx
@@ -0,0 +1,36 @@
+import * as React from "react";
+import { cva, type VariantProps } from "class-variance-authority";
+
+import { cn } from "@renderer/lib/utils";
+
+const badgeVariants = cva(
+  "inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
+  {
+    variants: {
+      variant: {
+        default:
+          "border-transparent bg-primary text-primary-foreground shadow hover:bg-primary/80",
+        secondary:
+          "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
+        destructive:
+          "border-transparent bg-destructive text-destructive-foreground shadow hover:bg-destructive/80",
+        outline: "text-foreground",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+    },
+  }
+);
+
+export interface BadgeProps
+  extends React.HTMLAttributes<HTMLDivElement>,
+    VariantProps<typeof badgeVariants> {}
+
+function Badge({ className, variant, ...props }: BadgeProps) {
+  return (
+    <div className={cn(badgeVariants({ variant }), className)} {...props} />
+  );
+}
+
+export { Badge, badgeVariants };
--- a/enjoy/src/renderer/components/ui/index.ts
+++ b/enjoy/src/renderer/components/ui/index.ts
@@ -1,5 +1,6 @@
 export * from "./accordion";
 export * from "./alert";
+export * from "./badage";
 export * from "./button";
 export * from "./menubar";
 export * from "./progress";