Feat summarize audio topic (#594)

* refactor ai commands * fix json command * fix extract story command * may summarize topic for audio
2024-05-09 10:54:11 +08:00
parent 5436b2038c
commit 69a6f721ca
18 changed files with 245 additions and 232 deletions
--- a/enjoy/src/commands/analyze.command.ts
+++ b/enjoy/src/commands/analyze.command.ts
@@ -1,5 +1,5 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";

 export const analyzeCommand = async (
  text: string,
@@ -10,29 +10,14 @@ export const analyzeCommand = async (
    baseUrl?: string;
  }
 ): Promise<string> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");

-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: false,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", SYSTEM_PROMPT],
    ["human", text],
-  ]);
+  ]).format({});

-  const response = await prompt.pipe(chatModel).invoke({});
-
-  return response.text;
+  return textCommand(prompt, options);
 };

 const SYSTEM_PROMPT = `你是我的英语教练，我将提供英语文本，你将帮助我分析文本的句子结构、语法和词汇/短语，并对文本进行详细解释。请用中文回答，并按以下格式返回结果：
--- a/enjoy/src/commands/extract-story.command.ts
+++ b/enjoy/src/commands/extract-story.command.ts
@@ -1,11 +1,9 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
-import { zodToJsonSchema } from "zod-to-json-schema";
 import { z } from "zod";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";

 export const extractStoryCommand = async (
-  content: string,
+  text: string,
  options: {
    key: string;
    modelName?: string;
@@ -13,61 +11,27 @@ export const extractStoryCommand = async (
    baseUrl?: string;
  }
 ): Promise<{ words: string[]; idioms: string[] }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
-
-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
-  const saveExtraction = z.object({
+  const schema = z.object({
    words: z.array(z.string().describe("extracted word")),
    idioms: z.array(z.string().describe("extracted idiom")),
  });

-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    modelKwargs: {
-      response_format: {
-        type: "json_object",
-      },
-    },
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  }).bind({
-    tools: [
-      {
-        type: "function",
-        function: {
-          name: "save_extraction",
-          description: "Save the extracted words and idioms from a text",
-          parameters: zodToJsonSchema(saveExtraction),
-        },
-      },
-    ],
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", EXTRACT_STORY_PROMPT],
    ["human", "{text}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
    learning_language: "English",
-    text: content,
+    text,
  });

-  return JSON.parse(
-    response.additional_kwargs?.tool_calls?.[0]?.function?.arguments || "{}"
-  );
+  return jsonCommand(prompt, { ...options, schema });
 };

 const EXTRACT_STORY_PROMPT = `
-I am an {learning_language} beginner and only have a grasp of 500 high-frequency basic words. You are an {learning_language} learning assistant robot, and your task is to analyze the article I provide and extract all the meaningful words and idioms that I may not be familiar with. Specifically, it should include common words used in uncommon ways. Return in JSON format.
+I am an {learning_language} beginner and only have a grasp of 500 high-frequency basic words. You are an {learning_language} learning assistant robot, and your task is to analyze the article I provide and extract all the meaningful words and idioms that I may not be familiar with. Specifically, it should include common words used in uncommon ways. Return in JSON format like following:
+
+{{
+  words: ["word1", "word2", ...],
+  idiom: ["idiom1", "idiom2", ...]
+}}
 `;
--- a/enjoy/src/commands/index.ts
+++ b/enjoy/src/commands/index.ts
@@ -2,5 +2,8 @@ export * from "./extract-story.command";
 export * from "./lookup.command";
 export * from "./translate.command";
 export * from "./ipa.command";
+export * from "./json.command";
 export * from "./analyze.command";
 export * from "./punctuate.command";
+export * from "./summarize-topic.command";
+export * from "./text.command";
--- a/enjoy/src/commands/ipa.command.ts
+++ b/enjoy/src/commands/ipa.command.ts
@@ -1,11 +1,6 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { z } from "zod";
-import {
-  StructuredOutputParser,
-  OutputFixingParser,
-} from "langchain/output_parsers";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";

 export const ipaCommand = async (
  text: string,
@@ -16,14 +11,9 @@ export const ipaCommand = async (
    baseUrl?: string;
  }
 ): Promise<{ words?: { word?: string; ipa?: string }[] }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");

-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
-  const responseSchema = z.object({
+  const schema = z.object({
    words: z.array(
      z.object({
        word: z.string().nonempty(),
@@ -32,51 +22,15 @@ export const ipaCommand = async (
    ),
  });

-  const parser = StructuredOutputParser.fromZodSchema(responseSchema);
-  const fixParser = OutputFixingParser.fromLLM(
-    new ChatOpenAI({
-      openAIApiKey: key,
-      modelName,
-      temperature: 0,
-      configuration: {
-        baseURL: baseUrl,
-      },
-    }),
-    parser
-  );
-
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    modelKwargs: {
-      response_format: {
-        type: "json_object",
-      },
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", SYSTEM_PROMPT],
    ["human", "{text}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
    learning_language: "English",
    text,
  });

-  try {
-    return await parser.parse(response.text);
-  } catch (e) {
-    return await fixParser.parse(response.text);
-  }
+  return jsonCommand(prompt, { ...options, schema });
 };

 const SYSTEM_PROMPT = `Generate an array of JSON objects for each {learning_language} word in the given text, with each object containing two keys: 'word' and 'ipa', where 'ipa' is the International Phonetic Alphabet (IPA) representation of the word. Return the array in JSON format only. The output should be structured like this:
--- a/enjoy/src/commands/json.command.ts
+++ b/enjoy/src/commands/json.command.ts
@@ -0,0 +1,48 @@
+import { ChatOpenAI } from "@langchain/openai";
+import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { zodToJsonSchema } from "zod-to-json-schema";
+
+export const jsonCommand = async (
+  prompt: string,
+  options: {
+    key: string;
+    modelName?: string;
+    temperature?: number;
+    baseUrl?: string;
+    schema: any;
+  }
+): Promise<any> => {
+  const { key, temperature = 0, baseUrl, schema } = options;
+  let { modelName = "gpt-4-turbo" } = options;
+
+  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
+    modelName = "gpt-4-turbo";
+  }
+
+  const chatModel = new ChatOpenAI({
+    openAIApiKey: key,
+    modelName,
+    temperature,
+    modelKwargs: {
+      response_format: {
+        type: "json_object",
+      },
+    },
+    configuration: {
+      baseURL: baseUrl,
+    },
+    cache: true,
+    verbose: true,
+    maxRetries: 1,
+  });
+
+  const structuredOutput = chatModel.withStructuredOutput(
+    zodToJsonSchema(schema),
+    {
+      method: "jsonMode",
+    }
+  );
+
+  const response = await structuredOutput.invoke(prompt);
+  return response;
+};
--- a/enjoy/src/commands/lookup.command.ts
+++ b/enjoy/src/commands/lookup.command.ts
@@ -1,11 +1,6 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
 import { z } from "zod";
-import {
-  StructuredOutputParser,
-  OutputFixingParser,
-} from "langchain/output_parsers";
-import { RESPONSE_JSON_FORMAT_MODELS } from "@/constants";
+import { jsonCommand } from "./json.command";

 export const lookupCommand = async (
  params: {
@@ -29,16 +24,9 @@ export const lookupCommand = async (
  translation?: string;
  lemma?: string;
 }> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
-
-  if (RESPONSE_JSON_FORMAT_MODELS.indexOf(modelName) === -1) {
-    modelName = "gpt-4-turbo";
-  }
-
  const { word, context, meaningOptions } = params;

-  const responseSchema = z.object({
+  const schema = z.object({
    id: z.string().optional(),
    word: z.string().optional(),
    context_translation: z.string().optional(),
@@ -49,37 +37,10 @@ export const lookupCommand = async (
    lemma: z.string().optional(),
  });

-  const parser = StructuredOutputParser.fromZodSchema(responseSchema);
-  const fixParser = OutputFixingParser.fromLLM(
-    new ChatOpenAI({
-      openAIApiKey: key,
-      modelName,
-      temperature: 0,
-      configuration: {
-        baseURL: baseUrl,
-      },
-    }),
-    parser
-  );
-
-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: true,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", DICITIONARY_PROMPT],
    ["human", "{input}"],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
    learning_language: "English",
    native_language: "Chinese",
    input: JSON.stringify({
@@ -89,11 +50,7 @@ export const lookupCommand = async (
    }),
  });

-  try {
-    return await parser.parse(response.text);
-  } catch (e) {
-    return await fixParser.parse(response.text);
-  }
+  return jsonCommand(prompt, { ...options, schema });
 };

 const DICITIONARY_PROMPT = `You are an {learning_language}-{native_language} dictionary. I will provide "word(it also maybe a phrase)" and "context" as input, you should return the "word", "lemma", "pronunciation", "pos(part of speech, maybe empty for phrase)", "definition", "translation" and "context_translation" as output. If I provide "definitions", you should try to select the appropriate one for the given context, and return the id of selected definition as "id". If none are suitable, generate a new definition for me. If no context is provided, return the most common definition. If you do not know the appropriate definition, return an empty string for "definition" and "translation".
--- a/enjoy/src/commands/punctuate.command.ts
+++ b/enjoy/src/commands/punctuate.command.ts
@@ -1,5 +1,5 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";

 export const punctuateCommand = async (
  text: string,
@@ -10,29 +10,14 @@ export const punctuateCommand = async (
    baseUrl?: string;
  }
 ): Promise<string> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");

-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: false,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", SYSTEM_PROMPT],
    ["human", text],
-  ]);
+  ]).format({});

-  const response = await prompt.pipe(chatModel).invoke({});
-
-  return response.text;
+  return textCommand(prompt, options);
 };

 const SYSTEM_PROMPT = `Please add proper punctuation to the text I provide you. Return the corrected text only.`;
--- a/enjoy/src/commands/summarize-topic.command.ts
+++ b/enjoy/src/commands/summarize-topic.command.ts
@@ -0,0 +1,24 @@
+import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";
+
+export const summarizeTopicCommand = async (
+  text: string,
+  options: {
+    key: string;
+    modelName?: string;
+    temperature?: number;
+    baseUrl?: string;
+  }
+): Promise<string> => {
+  if (!text) throw new Error("Text is required");
+
+  const prompt = await ChatPromptTemplate.fromMessages([
+    ["system", SYSTEM_PROMPT],
+    ["human", text],
+  ]).format({});
+
+  return textCommand(prompt, options);
+};
+
+const SYSTEM_PROMPT =
+  "Please generate a four to five word title summarizing our conversation without any lead-in, punctuation, quotation marks, periods, symbols, bold text, or additional text. Remove enclosing quotation marks.";
--- a/enjoy/src/commands/text.command.ts
+++ b/enjoy/src/commands/text.command.ts
@@ -0,0 +1,31 @@
+import { ChatOpenAI } from "@langchain/openai";
+
+export const textCommand = async (
+  prompt: string,
+  options: {
+    key: string;
+    modelName?: string;
+    temperature?: number;
+    baseUrl?: string;
+    systemPrompt?: string;
+  }
+): Promise<string> => {
+  const { key, temperature = 0, baseUrl } = options;
+  let { modelName = "gpt-4-turbo" } = options;
+
+  const chatModel = new ChatOpenAI({
+    openAIApiKey: key,
+    modelName,
+    temperature,
+    configuration: {
+      baseURL: baseUrl,
+    },
+    cache: false,
+    verbose: true,
+    maxRetries: 1,
+  });
+
+  const response = await chatModel.invoke(prompt);
+
+  return response.text;
+};
--- a/enjoy/src/commands/translate.command.ts
+++ b/enjoy/src/commands/translate.command.ts
@@ -1,5 +1,5 @@
-import { ChatOpenAI } from "@langchain/openai";
 import { ChatPromptTemplate } from "@langchain/core/prompts";
+import { textCommand } from "./text.command";

 export const translateCommand = async (
  text: string,
@@ -10,32 +10,17 @@ export const translateCommand = async (
    baseUrl?: string;
  }
 ): Promise<string> => {
-  const { key, temperature = 0, baseUrl } = options;
-  let { modelName = "gpt-4-turbo" } = options;
+  if (!text) throw new Error("Text is required");

-  const chatModel = new ChatOpenAI({
-    openAIApiKey: key,
-    modelName,
-    temperature,
-    configuration: {
-      baseURL: baseUrl,
-    },
-    cache: false,
-    verbose: true,
-    maxRetries: 2,
-  });
-
-  const prompt = ChatPromptTemplate.fromMessages([
+  const prompt = await ChatPromptTemplate.fromMessages([
    ["system", SYSTEM_PROMPT],
    ["human", TRANSLATION_PROMPT],
-  ]);
-
-  const response = await prompt.pipe(chatModel).invoke({
+  ]).format({
    native_language: "Chinese",
    text,
  });

-  return response.text;
+  return textCommand(prompt, options);
 };

 const SYSTEM_PROMPT =
--- a/enjoy/src/i18n/en.json
+++ b/enjoy/src/i18n/en.json
@@ -570,5 +570,6 @@
  "noNotesYet": "No notes yet",
  "editTranscription": "Edit transcription",
  "saveTranscription": "Save transcription",
-  "areYouSureToSaveTranscription": "It will perform a force-alignment between the audio and your edited transcription. Are you sure to continue?"
+  "areYouSureToSaveTranscription": "It will perform a force-alignment between the audio and your edited transcription. Are you sure to continue?",
+  "summarize": "Summarize"
 }
--- a/enjoy/src/i18n/zh-CN.json
+++ b/enjoy/src/i18n/zh-CN.json
@@ -569,5 +569,6 @@
  "noNotesYet": "还没有笔记",
  "editTranscription": "编辑语音文本",
  "saveTranscription": "保存语音文本",
-  "areYouSureToSaveTranscription": "即将根据您修改后的语音文本对语音重新进行对齐，确定要继续吗？"
+  "areYouSureToSaveTranscription": "即将根据您修改后的语音文本对语音重新进行对齐，确定要继续吗？",
+  "summarize": "提炼主题"
 }
--- a/enjoy/src/renderer/components/medias/media-info-panel.tsx
+++ b/enjoy/src/renderer/components/medias/media-info-panel.tsx
@@ -1,16 +1,66 @@
-import { useContext } from "react";
-import { MediaPlayerProviderContext } from "@renderer/context";
+import { useContext, useState } from "react";
+import {
+  AppSettingsProviderContext,
+  MediaPlayerProviderContext,
+} from "@renderer/context";
 import { formatDuration, formatDateTime } from "@renderer/lib/utils";
 import { t } from "i18next";
+import { Button, toast } from "@renderer/components/ui";
+import { useAiCommand } from "@renderer/hooks";
+import { LoaderIcon } from "lucide-react";

 export const MediaInfoPanel = () => {
-  const { media } = useContext(MediaPlayerProviderContext);
+  const { media, transcription } = useContext(MediaPlayerProviderContext);
+  const { EnjoyApp } = useContext(AppSettingsProviderContext);
+  const { summarizeTopic } = useAiCommand();
+  const [summarizing, setSummarizing] = useState<boolean>(false);
+
+  const handleSummarize = async () => {
+    setSummarizing(true);
+
+    try {
+      const topic = await summarizeTopic(transcription.result.transcript);
+      if (media.mediaType === "Video") {
+        await EnjoyApp.videos.update(media.id, {
+          name: topic,
+        });
+      } else if (media.mediaType === "Audio") {
+        await EnjoyApp.audios.update(media.id, {
+          name: topic,
+        });
+      }
+    } catch (error) {
+      toast.error(error);
+    }
+
+    setSummarizing(false);
+  };
+
  if (!media) return null;

  return (
    <div className="px-4" data-testid="media-info-panel">
+      <div className="mb-2">
+        <div className="flex items-center justify-between">
+          <div className="capitalize text-sm text-muted-foreground mb-1">
+            {t("models.audio.name")}
+          </div>
+          <Button
+            disabled={summarizing}
+            onClick={handleSummarize}
+            variant="outline"
+            size="sm"
+          >
+            {summarizing && (
+              <LoaderIcon className="animate-spin mr-2" size={16} />
+            )}
+            {t("summarize")}
+          </Button>
+        </div>
+        <div className="">{media.name}</div>
+      </div>
+
      {[
-        { label: t("models.audio.name"), value: media.name },
        {
          label: t("models.audio.duration"),
          value: formatDuration(media.duration),
--- a/enjoy/src/renderer/components/messages/assistant-message.tsx
+++ b/enjoy/src/renderer/components/messages/assistant-message.tsx
@@ -34,7 +34,7 @@ import { useCopyToClipboard } from "@uidotdev/usehooks";
 import { t } from "i18next";
 import { AppSettingsProviderContext } from "@renderer/context";
 import Markdown from "react-markdown";
-import { useConversation } from "@renderer/hooks";
+import { useConversation, useAiCommand } from "@renderer/hooks";

 export const AssistantMessageComponent = (props: {
  message: MessageType;
@@ -52,6 +52,7 @@ export const AssistantMessageComponent = (props: {
  const [shadowing, setShadowing] = useState<boolean>(false);
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const { tts } = useConversation();
+  const { summarizeTopic } = useAiCommand();

  useEffect(() => {
    if (speech) return;
@@ -100,11 +101,19 @@ export const AssistantMessageComponent = (props: {

    if (!audio) {
      setResourcing(true);
+      let title =
+        speech.text.length > 20
+          ? speech.text.substring(0, 17).trim() + "..."
+          : speech.text;
+
+      try {
+        title = await summarizeTopic(speech.text);
+      } catch (e) {
+        console.warn(e);
+      }
+
      await EnjoyApp.audios.create(speech.filePath, {
-        name:
-          speech.text.length > 20
-            ? speech.text.substring(0, 17).trim() + "..."
-            : speech.text,
+        name: title,
        originalText: speech.text,
      });
      setResourcing(false);
@@ -169,7 +178,7 @@ export const AssistantMessageComponent = (props: {
                  new URL(props.href ?? "");
                  props.target = "_blank";
                  props.rel = "noopener noreferrer";
-                } catch (e) { }
+                } catch (e) {}

                return <a {...props}>{children}</a>;
              },
--- a/enjoy/src/renderer/context/media-player-provider.tsx
+++ b/enjoy/src/renderer/context/media-player-provider.tsx
@@ -510,7 +510,7 @@ export const MediaPlayerProvider = ({
    EnjoyApp.waveforms.find(media.md5).then((waveform) => {
      setWaveForm(waveform);
    });
-  }, [media]);
+  }, [media?.md5]);

  /*
   * Initialize wavesurfer when container ref is available
@@ -524,7 +524,7 @@ export const MediaPlayerProvider = ({
      setDecoded(false);
      setDecodeError(null);
    };
-  }, [media, ref, mediaProvider, layout?.playerHeight]);
+  }, [media?.src, ref, mediaProvider, layout?.playerHeight]);

  useEffect(() => {
    calculateHeight();
--- a/enjoy/src/renderer/hooks/use-ai-command.tsx
+++ b/enjoy/src/renderer/hooks/use-ai-command.tsx
@@ -9,6 +9,7 @@ import {
  translateCommand,
  analyzeCommand,
  punctuateCommand,
+  summarizeTopicCommand,
 } from "@commands";

 export const useAiCommand = () => {
@@ -108,14 +109,23 @@ export const useAiCommand = () => {
      key: currentEngine.key,
      modelName: currentEngine.model,
      baseUrl: currentEngine.baseUrl,
-    })
-  }
+    });
+  };
+
+  const summarizeTopic = async (text: string) => {
+    return summarizeTopicCommand(text, {
+      key: currentEngine.key,
+      modelName: currentEngine.model,
+      baseUrl: currentEngine.baseUrl,
+    });
+  };

  return {
    lookupWord,
    extractStory,
    translate,
    analyzeText,
-    punctuateText
+    punctuateText,
+    summarizeTopic,
  };
 };
--- a/enjoy/src/renderer/hooks/use-audio.tsx
+++ b/enjoy/src/renderer/hooks/use-audio.tsx
@@ -5,17 +5,20 @@ import {
 } from "@renderer/context";
 import { toast } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useThrottle } from "@uidotdev/usehooks";

 export const useAudio = (options: { id?: string; md5?: string }) => {
  const { id, md5 } = options;
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
  const [audio, setAudio] = useState<AudioType>(null);
+  const throttledAudio = useThrottle(audio, 500);

  const onAudioUpdate = (event: CustomEvent) => {
    const { model, action, record } = event.detail || {};
-    if (model !== "Audio") return;
-    if (record?.id != audio?.id) return;
+    if (model != "Audio") return;
+    if (id && record.id !== id) return;
+    if (md5 && record.md5 !== md5) return;
    if (action !== "update") return;

    setAudio(record);
@@ -38,6 +41,6 @@ export const useAudio = (options: { id?: string; md5?: string }) => {
  }, [id, md5]);

  return {
-    audio,
+    audio: throttledAudio,
  };
 };
--- a/enjoy/src/renderer/hooks/use-video.tsx
+++ b/enjoy/src/renderer/hooks/use-video.tsx
@@ -5,17 +5,20 @@ import {
 } from "@renderer/context";
 import { toast } from "@renderer/components/ui";
 import { t } from "i18next";
+import { useThrottle } from "@uidotdev/usehooks";

 export const useVideo = (options: { id?: string; md5?: string }) => {
  const { id, md5 } = options;
  const { EnjoyApp } = useContext(AppSettingsProviderContext);
  const { addDblistener, removeDbListener } = useContext(DbProviderContext);
  const [video, setVideo] = useState<VideoType>(null);
+  const throttledVideo = useThrottle(video, 500);

  const onAudioUpdate = (event: CustomEvent) => {
    const { model, action, record } = event.detail || {};
-    if (model !== "Audio") return;
-    if (record?.id != video?.id) return;
+    if (model !== "Video") return;
+    if (id && record.id !== id) return;
+    if (md5 && record.md5 !== md5) return;
    if (action !== "update") return;

    setVideo(record);
@@ -38,6 +41,6 @@ export const useVideo = (options: { id?: string; md5?: string }) => {
  }, [id, md5]);

  return {
-    video,
+    video: throttledVideo,
  };
 };