From 5efc5fc1db5d389762ac733c3a2954d73ce26ba1 Mon Sep 17 00:00:00 2001 From: an-lee Date: Fri, 19 Jan 2024 16:52:49 +0800 Subject: [PATCH] Feat: AI commands (#145) * upgrade deps * add extract command * add lookup.command * update lookup command * fix locals * may lookup one by one * update lookup method * add translate command * cache translation by default * open ai default settings * use openai config in context * refactor * genreate ipa * update UI * handle ai generate fail --- enjoy/package.json | 1 + enjoy/src/api/client.ts | 25 +- enjoy/src/commands/extract-story.command.ts | 71 +++++ enjoy/src/commands/index.ts | 4 + enjoy/src/commands/ipa.command.ts | 88 ++++++ enjoy/src/commands/lookup.command.ts | 145 +++++++++ enjoy/src/commands/translate.command.ts | 49 ++++ enjoy/src/i18n/en.json | 20 +- enjoy/src/i18n/zh-CN.json | 22 +- enjoy/src/main/db/models/conversation.ts | 3 + .../src/renderer/components/lookup-result.tsx | 99 ++++--- .../components/meanings/meaning-card.tsx | 2 +- .../components/medias/media-caption.tsx | 274 ++++++++++++++---- .../components/medias/media-player.tsx | 1 + .../components/preferences/basic-settings.tsx | 170 ++++++++--- .../src/renderer/components/stories/index.ts | 1 + .../components/stories/story-toolbar.tsx | 158 +++------- .../components/stories/story-viewer.tsx | 23 +- .../stories/story-vocabulary-sheet.tsx | 155 ++++++++++ enjoy/src/renderer/pages/story-preview.tsx | 4 +- enjoy/src/renderer/pages/story.tsx | 207 ++++++++++--- enjoy/src/types.d.ts | 2 + enjoy/src/types/story.d.ts | 8 + enjoy/tsconfig.json | 3 +- enjoy/vite.main.config.mts | 1 + enjoy/vite.renderer.config.mts | 1 + yarn.lock | 8 + 27 files changed, 1227 insertions(+), 318 deletions(-) create mode 100644 enjoy/src/commands/extract-story.command.ts create mode 100644 enjoy/src/commands/index.ts create mode 100644 enjoy/src/commands/ipa.command.ts create mode 100644 enjoy/src/commands/lookup.command.ts create mode 100644 enjoy/src/commands/translate.command.ts create mode 100644 enjoy/src/renderer/components/stories/story-vocabulary-sheet.tsx diff --git a/enjoy/package.json b/enjoy/package.json index c4b69fcf..9d5307a9 100644 --- a/enjoy/package.json +++ b/enjoy/package.json @@ -112,6 +112,7 @@ "fs-extra": "^11.2.0", "html-to-text": "^9.0.5", "i18next": "^23.7.16", + "js-md5": "^0.8.3", "langchain": "^0.1.4", "lodash": "^4.17.21", "lucide-react": "^0.312.0", diff --git a/enjoy/src/api/client.ts b/enjoy/src/api/client.ts index ebf90218..90246be6 100644 --- a/enjoy/src/api/client.ts +++ b/enjoy/src/api/client.ts @@ -172,6 +172,17 @@ export class Client { return this.api.post("/api/lookups", decamelizeKeys(params)); } + updateLookup( + id: string, + params: { + meaning: Partial; + sourceId?: string; + sourceType?: string; + } + ): Promise { + return this.api.put(`/api/lookups/${id}`, decamelizeKeys(params)); + } + lookupInBatch( lookups: { word: string; @@ -185,8 +196,17 @@ export class Client { }); } - extractVocabularyFromStory(storyId: string): Promise { - return this.api.post(`/api/stories/${storyId}/extract_vocabulary`); + extractVocabularyFromStory( + storyId: string, + extraction?: { + words?: string[]; + idioms?: string[]; + } + ): Promise { + return this.api.post( + `/api/stories/${storyId}/extract_vocabulary`, + decamelizeKeys({ extraction }) + ); } storyMeanings( @@ -194,7 +214,6 @@ export class Client { params?: { page?: number; items?: number; - storyId?: string; } ): Promise< { diff --git a/enjoy/src/commands/extract-story.command.ts b/enjoy/src/commands/extract-story.command.ts new file mode 100644 index 00000000..2d728287 --- /dev/null +++ b/enjoy/src/commands/extract-story.command.ts @@ -0,0 +1,71 @@ +import { ChatOpenAI } from "@langchain/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; +import { zodToJsonSchema } from "zod-to-json-schema"; +import { z } from "zod"; + +export const extractStoryCommand = async ( + content: string, + options: { + key: string; + modelName?: string; + temperature?: number; + baseUrl?: string; + } +): Promise<{ words: string[]; idioms: string[] }> => { + const { + key, + modelName = "gpt-3.5-turbo-1106", + temperature = 0, + baseUrl, + } = options; + + const saveExtraction = z.object({ + words: z.array(z.string().describe("extracted word")), + idioms: z.array(z.string().describe("extracted idiom")), + }); + + const chatModel = new ChatOpenAI({ + openAIApiKey: key, + modelName, + temperature, + modelKwargs: { + response_format: { + type: "json_object", + }, + }, + configuration: { + baseURL: baseUrl, + }, + cache: true, + verbose: true, + }).bind({ + tools: [ + { + type: "function", + function: { + name: "save_extraction", + description: "Save the extracted words and idioms from a text", + parameters: zodToJsonSchema(saveExtraction), + }, + }, + ], + }); + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", EXTRACT_STORY_PROMPT], + ["human", "{text}"], + ]); + + const response = await prompt.pipe(chatModel).invoke({ + learning_language: "English", + text: content, + }); + + return JSON.parse( + response.additional_kwargs?.tool_calls?.[0]?.function?.arguments || "{}" + ); +}; + +const EXTRACT_STORY_PROMPT = ` +I am an {learning_language} beginner and only have a grasp of 500 high-frequency basic words. You are an {learning_language} learning assistant robot, and your task is to analyze the article I provide and extract all the meaningful words and idioms that I may not be familiar with. Specifically, it should include common words used in uncommon ways. Return in JSON format. +`; diff --git a/enjoy/src/commands/index.ts b/enjoy/src/commands/index.ts new file mode 100644 index 00000000..82846215 --- /dev/null +++ b/enjoy/src/commands/index.ts @@ -0,0 +1,4 @@ +export * from "./extract-story.command"; +export * from "./lookup.command"; +export * from "./translate.command"; +export * from "./ipa.command"; diff --git a/enjoy/src/commands/ipa.command.ts b/enjoy/src/commands/ipa.command.ts new file mode 100644 index 00000000..215d803a --- /dev/null +++ b/enjoy/src/commands/ipa.command.ts @@ -0,0 +1,88 @@ +import { ChatOpenAI } from "@langchain/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; +import { z } from "zod"; +import { + StructuredOutputParser, + OutputFixingParser, +} from "langchain/output_parsers"; + +export const ipaCommand = async ( + text: string, + options: { + key: string; + modelName?: string; + temperature?: number; + baseUrl?: string; + } +): Promise<{ words?: { word?: string; ipa?: string }[] }> => { + const { + key, + modelName = "gpt-3.5-turbo-1106", + temperature = 0, + baseUrl, + } = options; + + const responseSchema = z.object({ + words: z.array( + z.object({ + word: z.string().nonempty(), + ipa: z.string().nonempty(), + }) + ), + }); + + const parser = StructuredOutputParser.fromZodSchema(responseSchema); + const fixParser = OutputFixingParser.fromLLM( + new ChatOpenAI({ + openAIApiKey: key, + temperature: 0, + configuration: { + baseURL: baseUrl, + }, + }), + parser + ); + + const chatModel = new ChatOpenAI({ + openAIApiKey: key, + modelName, + temperature, + configuration: { + baseURL: baseUrl, + }, + modelKwargs: { + response_format: { + type: "json_object", + }, + }, + cache: true, + verbose: true, + }); + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", SYSTEM_PROMPT], + ["human", "{text}"], + ]); + + const response = await prompt.pipe(chatModel).invoke({ + learning_language: "English", + text, + }); + + try { + return await parser.parse(response.text); + } catch (e) { + return await fixParser.parse(response.text); + } +}; + +const SYSTEM_PROMPT = `Generate an array of JSON objects for each {learning_language} word in the given text, with each object containing two keys: 'word' and 'ipa', where 'ipa' is the International Phonetic Alphabet (IPA) representation of the word. Return the array in JSON format only. The output should be structured like this: + +{{ + words: [ + {{ + word: "word", + ipa: "ipa" + }} + ] +}}`; diff --git a/enjoy/src/commands/lookup.command.ts b/enjoy/src/commands/lookup.command.ts new file mode 100644 index 00000000..d15f7ec0 --- /dev/null +++ b/enjoy/src/commands/lookup.command.ts @@ -0,0 +1,145 @@ +import { ChatOpenAI } from "@langchain/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; +import { z } from "zod"; +import { + StructuredOutputParser, + OutputFixingParser, +} from "langchain/output_parsers"; + +export const lookupCommand = async ( + params: { + word: string; + context: string; + meaningOptions?: Partial[]; + }, + options: { + key: string; + modelName?: string; + temperature?: number; + baseUrl?: string; + } +): Promise<{ + id?: string; + word?: string; + context_translation?: string; + pos?: string; + pronunciation?: string; + definition?: string; + translation?: string; + lemma?: string; +}> => { + const { + key, + modelName = "gpt-3.5-turbo-1106", + temperature = 0, + baseUrl, + } = options; + const { word, context, meaningOptions } = params; + + const responseSchema = z.object({ + id: z.string().optional(), + word: z.string().optional(), + context_translation: z.string().optional(), + pos: z.string().optional(), + pronunciation: z.string().optional(), + definition: z.string().optional(), + translation: z.string().optional(), + lemma: z.string().optional(), + }); + + const parser = StructuredOutputParser.fromZodSchema(responseSchema); + const fixParser = OutputFixingParser.fromLLM( + new ChatOpenAI({ + openAIApiKey: key, + temperature: 0, + configuration: { + baseURL: baseUrl, + }, + }), + parser + ); + + const chatModel = new ChatOpenAI({ + openAIApiKey: key, + modelName, + temperature, + configuration: { + baseURL: baseUrl, + }, + cache: true, + verbose: true, + }); + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", DICITIONARY_PROMPT], + ["human", "{input}"], + ]); + + const response = await prompt.pipe(chatModel).invoke({ + learning_language: "English", + native_language: "Chinese", + input: JSON.stringify({ + word, + context, + definitions: meaningOptions, + }), + }); + + try { + return await parser.parse(response.text); + } catch (e) { + return await fixParser.parse(response.text); + } +}; + +const DICITIONARY_PROMPT = `You are an {learning_language}-{native_language} dictionary. I will provide "word(it also maybe a phrase)" and "context" as input, you should return the "word", "lemma", "pronunciation", "pos(part of speech, maybe empty for phrase)", "definition", "translation" and "context_translation" as output. If I provide "definitions", you should try to select the appropriate one for the given context, and return the id of selected definition as "id". If none are suitable, generate a new definition for me. If no context is provided, return the most common definition. If you do not know the appropriate definition, return an empty string for "definition" and "translation". + Always return output in JSON format. + + # Example 1, with empty definitions + + {{ + "word": "booked", + "context": "She'd *booked* a table for four at their favourite restaurant.", + "definitions": [] + }} + + + + {{ + "word": "booked", + "lemma": "book", + "pronunciation": "bʊk", + "pos": "verb", + "definition": "to arrange to have a seat, room, performer, etc. at a particular time in the future", + "translation": "预订", + "context_translation": "她已经在他们最喜欢的餐厅预订了四人桌位。" + }} + + + # Example 2, with definitions + + {{ + "word": "booked", + "context": "She'd *booked* a table for four at their favourite restaurant.", + "definitions": [ + {{ + "id": "767ddbf3-c08a-42e1-95c8-c48e681f3486", + "pos": "noun", + "definition": "a written text that can be published in printed or electronic form", + }}, + {{ + "id": "37940295-ef93-4873-af60-f03bf7e271f0", + "pos": "verb", + "definition": "to arrange to have a seat, room, performer, etc. at a particular time in the future", + }} + ] + }} + + + + {{ + "id": "37940295-ef93-4873-af60-f03bf7e271f0", + "context_translation": "她已经在他们最喜欢的餐厅预订了四人桌位。" + }} + + `; diff --git a/enjoy/src/commands/translate.command.ts b/enjoy/src/commands/translate.command.ts new file mode 100644 index 00000000..07020917 --- /dev/null +++ b/enjoy/src/commands/translate.command.ts @@ -0,0 +1,49 @@ +import { ChatOpenAI } from "@langchain/openai"; +import { ChatPromptTemplate } from "langchain/prompts"; + +export const translateCommand = async ( + text: string, + options: { + key: string; + modelName?: string; + temperature?: number; + baseUrl?: string; + } +): Promise => { + const { + key, + modelName = "gpt-3.5-turbo-1106", + temperature = 0, + baseUrl, + } = options; + + const chatModel = new ChatOpenAI({ + openAIApiKey: key, + modelName, + temperature, + configuration: { + baseURL: baseUrl, + }, + cache: true, + verbose: true, + }); + + const prompt = ChatPromptTemplate.fromMessages([ + ["system", SYSTEM_PROMPT], + ["human", TRANSLATION_PROMPT], + ]); + + const response = await prompt.pipe(chatModel).invoke({ + native_language: "Chinese", + text, + }); + + return response.text; +}; + +const SYSTEM_PROMPT = + "You are a professional, authentic translation engine, only returns translations."; +const TRANSLATION_PROMPT = `Translate the text to {native_language} Language, please do not explain my original text.: + +{text} +`; diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json index bd66a496..c7e4fc81 100644 --- a/enjoy/src/i18n/en.json +++ b/enjoy/src/i18n/en.json @@ -87,7 +87,7 @@ "ttsBaseUrl": "TTS base URL", "notFound": "Conversation not found", "contentRequired": "Content required", - "failedToGenerateResponse": "Failed to generate response" + "failedToGenerateResponse": "Failed to generate response, please retry" }, "pronunciationAssessment": { "pronunciationScore": "Pronunciation Score", @@ -156,6 +156,8 @@ "autoCenter": "auto center", "inlineCaption": "inline caption", "autoScroll": "auto scroll", + "translate:": "translate", + "displayIpa": "display IPA", "detail": "detail", "remove": "remove", "share": "share", @@ -295,7 +297,12 @@ "whisperIsNotWorking": "Whisper is not working", "relaunchIsNeededAfterChanged": "Relaunch is needed after changed", "openaiKeySaved": "OpenAI key saved", + "openaiConfigSaved": "OpenAI config saved", "openaiKeyRequired": "OpenAI key required", + "baseUrl": "baseURL", + "model": "model", + "key": "key", + "leaveEmptyToUseDefault": "Leave empty to use default", "newConversation": "New conversation", "startConversation": "Start conversation", "editConversation": "Edit conversation", @@ -336,8 +343,17 @@ "backSide": "back side", "aiExtractVocabulary": "AI extract vocabulary", "toggleReadable": "Toggle readable", + "extracting": "Extracting", + "extractionFailed": "Extraction failed", + "extractedSuccessfully": "Extracted successfully", + "lookUp": "Look up", + "lookUpAll": "Look up all", "lookingUp": "Looking up", - "thereAreLookupsPending": "There are {{count}} lookups pending", + "pending": "Pending", + "thereAreLookupsProcessing": "There are {{count}} lookups processing", + "thereAreLookupsPending": "There are {{count}} lookups waiting", + "lookupFailed": "Lookup failed", + "lookedUpSuccessfully": "Looked up successfully", "noRecordsFound": "No records found", "pleaseTryLater": "Please try later", "author": "author", diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json index d58c4f65..e040d1e8 100644 --- a/enjoy/src/i18n/zh-CN.json +++ b/enjoy/src/i18n/zh-CN.json @@ -65,7 +65,7 @@ "conversation": { "name": "对话标题", "engine": "AI 引擎", - "baseUrl": "请求地址", + "baseUrl": "接口地址", "configuration": "AI 配置", "model": "AI 模型", "roleDefinition": "角色定义", @@ -87,7 +87,7 @@ "ttsBaseUrl": "TTS 请求地址", "notFound": "未找到对话", "contentRequired": "对话内容不能为空", - "failedToGenerateResponse": "生成失败" + "failedToGenerateResponse": "生成失败,请重试" }, "pronunciationAssessment": { "pronunciationScore": "发音得分", @@ -156,6 +156,8 @@ "autoCenter": "自动居中", "inlineCaption": "内联字幕", "autoScroll": "自动滚动", + "translate:": "翻译", + "displayIpa": "标注音标", "detail": "详情", "remove": "删除", "share": "分享", @@ -294,7 +296,12 @@ "whisperIsNotWorking": "Whisper 无法正常工作,请尝试更换模型后重试,或联系开发者", "relaunchIsNeededAfterChanged": "更改后需要重新启动", "openaiKeySaved": "OpenAI 密钥已保存", + "openaiConfigSaved": "OpenAI 配置已保存", "openaiKeyRequired": "未提供 OpenAI 密钥", + "baseUrl": "接口地址", + "model": "模型", + "key": "密钥", + "leaveEmptyToUseDefault": "留空则使用默认值", "newConversation": "新对话", "startConversation": "开始对话", "editConversation": "编辑对话", @@ -335,8 +342,17 @@ "backSide": "反面", "aiExtractVocabulary": "AI 提取生词", "toggleReadable": "切换阅读模式", + "extracting": "正在提取", + "extractionFailed": "提取失败", + "extractedSuccessfully": "提取成功", + "lookUp": "查询", + "lookUpAll": "全部查询", "lookingUp": "正在查询", - "thereAreLookupsPending": "有{{count}}个单词正在查询", + "pending": "等待中", + "thereAreLookupsProcessing": "有{{count}}个单词正在查询", + "thereAreLookupsPending": "有{{count}}个单词正在等待查询", + "lookupFailed": "查询失败", + "lookedUpSuccessfully": "查询成功", "noRecordsFound": "没有找到记录", "pleaseTryLater": "请稍后再试", "author": "作者", diff --git a/enjoy/src/main/db/models/conversation.ts b/enjoy/src/main/db/models/conversation.ts index 47a9d34d..53de178d 100644 --- a/enjoy/src/main/db/models/conversation.ts +++ b/enjoy/src/main/db/models/conversation.ts @@ -297,6 +297,9 @@ export class Conversation extends Model { const replies = await Promise.all( response.map(async (generation) => { + if (!generation?.text) { + throw new Error(t("models.conversation.failedToGenerateResponse")); + } return await Message.create( { conversationId: this.id, diff --git a/enjoy/src/renderer/components/lookup-result.tsx b/enjoy/src/renderer/components/lookup-result.tsx index bc2ab41b..4a97e90b 100644 --- a/enjoy/src/renderer/components/lookup-result.tsx +++ b/enjoy/src/renderer/components/lookup-result.tsx @@ -1,9 +1,14 @@ -import { AppSettingsProviderContext } from "@renderer/context"; +import { + AppSettingsProviderContext, + AISettingsProviderContext, +} from "@renderer/context"; import { useState, useContext, useEffect } from "react"; import { LoaderSpin, MeaningCard } from "@renderer/components"; import { Button } from "@renderer/components/ui"; import { t } from "i18next"; import { XCircleIcon } from "lucide-react"; +import { toast } from "@renderer/components/ui"; +import { lookupCommand } from "@commands"; export const LookupResult = (props: { word: string; @@ -13,49 +18,70 @@ export const LookupResult = (props: { onResult?: (meaning: MeaningType) => void; }) => { const { word, context, sourceId, sourceType, onResult } = props; - const [timer, setTimer] = useState(); const [result, setResult] = useState(); const [loading, setLoading] = useState(true); if (!word) return null; const { webApi } = useContext(AppSettingsProviderContext); + const { openai } = useContext(AISettingsProviderContext); - const lookup = (retries = 0) => { + const processLookup = async () => { if (!word) return; - if (retries > 3) { - setLoading(false); - return; - } + if (!loading) return; - retries += 1; - webApi - .lookup({ - word, - context, - sourceId, - sourceType, - }) - .then((res) => { - if (res?.meaning) { - setResult(res); - setLoading(false); - onResult && onResult(res.meaning); - } else { - // Retry after 1.5s - const _timeout = setTimeout(() => { - lookup(retries); - }, 1500); - setTimer(_timeout); + setLoading(true); + const lookup = await webApi.lookup({ + word, + context, + sourceId, + sourceType, + }); + + if (lookup.meaning) { + setResult(lookup); + setLoading(false); + onResult && onResult(lookup.meaning); + } else { + if (!openai?.key) { + toast.error(t("openaiApiKeyRequired")); + return; + } + + lookupCommand( + { + word, + context, + meaningOptions: lookup.meaningOptions, + }, + { + key: openai.key, } - }); + ) + .then((res) => { + if (res.context_translation?.trim()) { + webApi + .updateLookup(lookup.id, { + meaning: res, + sourceId, + sourceType, + }) + .then((lookup) => { + setResult(lookup); + onResult && onResult(lookup.meaning); + }); + } + }) + .catch((err) => { + toast.error(`${t("lookupFailed")}: ${err.message}`); + }) + .finally(() => { + setLoading(false); + }); + } }; useEffect(() => { - lookup(); - - return () => { - if (timer) clearTimeout(timer); - }; + processLookup(); }, [word, context]); if (result?.meaning) { @@ -95,14 +121,7 @@ export const LookupResult = (props: {
{word}
-
diff --git a/enjoy/src/renderer/components/meanings/meaning-card.tsx b/enjoy/src/renderer/components/meanings/meaning-card.tsx index 12ce1acf..362a4149 100644 --- a/enjoy/src/renderer/components/meanings/meaning-card.tsx +++ b/enjoy/src/renderer/components/meanings/meaning-card.tsx @@ -23,7 +23,7 @@ export const MeaningCard = (props: { const lookups = [lookup, ..._lookups].filter(Boolean); return ( -
+
{word}
{pos && ( diff --git a/enjoy/src/renderer/components/medias/media-caption.tsx b/enjoy/src/renderer/components/medias/media-caption.tsx index ba568706..6b9b7735 100644 --- a/enjoy/src/renderer/components/medias/media-caption.tsx +++ b/enjoy/src/renderer/components/medias/media-caption.tsx @@ -1,13 +1,31 @@ -import { useState, useEffect } from "react"; +import { useState, useEffect, useContext } from "react"; import { cn } from "@renderer/lib/utils"; import { Button, + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuTrigger, Popover, PopoverContent, PopoverAnchor, + toast, } from "@renderer/components/ui"; import { LookupResult } from "@renderer/components"; -import { LanguagesIcon, PlayIcon } from "lucide-react"; +import { + ChevronDownIcon, + LanguagesIcon, + PlayIcon, + LoaderIcon, + SpeechIcon, +} from "lucide-react"; +import { translateCommand, ipaCommand } from "@commands"; +import { + AppSettingsProviderContext, + AISettingsProviderContext, +} from "@renderer/context"; +import { t } from "i18next"; +import { md5 } from "js-md5"; export const MediaCaption = (props: { mediaId: string; @@ -36,6 +54,92 @@ export const MediaCaption = (props: { left: number; }; }>(); + const [translation, setTranslation] = useState(); + const [translating, setTranslating] = useState(false); + const [displayTranslation, setDisplayTranslation] = useState(false); + + const [ipa, setIpa] = useState<{ word?: string; ipa?: string }[]>([]); + const [ipaGenerating, setIpaGenerating] = useState(false); + const [displayIpa, setDisplayIpa] = useState(false); + + const { EnjoyApp } = useContext(AppSettingsProviderContext); + const { openai } = useContext(AISettingsProviderContext); + + const toogleIPA = async () => { + if (ipaGenerating) return; + + if (ipa.length > 0) { + setDisplayIpa(!displayIpa); + return; + } + + const hash = md5.create(); + hash.update(transcription.text); + const cacheKey = `ipa-${hash.hex()}`; + const cached = await EnjoyApp.cacheObjects.get(cacheKey); + if (cached) { + setIpa(cached); + return; + } + + if (!openai?.key) { + toast.error(t("openaiApiKeyRequired")); + return; + } + setIpaGenerating(true); + + ipaCommand(transcription.text, { + key: openai.key, + }) + .then((result) => { + if (result?.words?.length > 0) { + setIpa(result.words); + EnjoyApp.cacheObjects.set(cacheKey, result.words); + setDisplayIpa(true); + } + }) + .finally(() => { + setIpaGenerating(false); + }); + }; + + const translate = async () => { + if (translating) return; + + if (translation) { + setDisplayTranslation(!displayTranslation); + return; + } + + const hash = md5.create(); + hash.update(transcription.text); + const cacheKey = `translate-${hash.hex()}`; + const cached = await EnjoyApp.cacheObjects.get(cacheKey); + if (cached) { + setTranslation(cached); + return; + } + + if (!openai?.key) { + toast.error(t("openaiApiKeyRequired")); + return; + } + setTranslating(true); + + translateCommand(transcription.text, { + key: openai.key, + }) + .then((result) => { + if (result) { + setTranslation(result); + EnjoyApp.cacheObjects.set(cacheKey, result); + setDisplayTranslation(true); + } + }) + .finally(() => { + setTranslating(false); + }); + }; useEffect(() => { if (!transcription) return; @@ -54,67 +158,117 @@ export const MediaCaption = (props: { return (
-
- {(transcription.segments || []).map((w, index) => ( - { - setSelected({ - index, - word: w.text, - position: { - top: - event.currentTarget.offsetTop + - event.currentTarget.offsetHeight, - left: event.currentTarget.offsetLeft, - }, - }); +
+
+
+ {(transcription.segments || []).map((w, index) => ( +
{ + setSelected({ + index, + word: w.text, + position: { + top: + event.currentTarget.offsetTop + + event.currentTarget.offsetHeight, + left: event.currentTarget.offsetLeft, + }, + }); - setIsPlaying(false); - if (onSeek) onSeek(w.offsets.from / 1000); - }} - > - {w.text} - - ))} - - { - if (!value) setSelected(null); - }} - > - - - {selected?.word && ( - w.text) - .join(" ") - .trim()} - mediaId={props.mediaId} - mediaType={props.mediaType} - onPlay={() => { - setIsPlaying(true); + setIsPlaying(false); + if (onSeek) onSeek(w.offsets.from / 1000); }} - /> - )} - - + > +
{w.text}
+ {displayIpa && + ipa.find( + (i) => + i.word.trim() === w.text.replace(/[\.,?!]/g, "").trim() + )?.ipa && ( +
+ { + ipa.find( + (i) => + i.word.trim() === + w.text.replace(/[\.,?!]/g, "").trim() + )?.ipa + } +
+ )} +
+ ))} +
+ {displayTranslation && translation && ( +
+ {translation} +
+ )} +
+ + + + + + + + {translating ? ( + + ) : ( + + )} + {t("translate")} + + + {ipaGenerating ? ( + + ) : ( + + )} + {t("displayIpa")} + + +
+ + { + if (!value) setSelected(null); + }} + > + + + {selected?.word && ( + w.text) + .join(" ") + .trim()} + mediaId={props.mediaId} + mediaType={props.mediaType} + onPlay={() => { + setIsPlaying(true); + }} + /> + )} + +
); }; diff --git a/enjoy/src/renderer/components/medias/media-player.tsx b/enjoy/src/renderer/components/medias/media-player.tsx index 28ef95cc..689e3a0f 100644 --- a/enjoy/src/renderer/components/medias/media-player.tsx +++ b/enjoy/src/renderer/components/medias/media-player.tsx @@ -538,6 +538,7 @@ export const MediaPlayer = (props: { {initialized && (
{ const OpenaiSettings = () => { const { openai, setOpenai } = useContext(AISettingsProviderContext); const [editing, setEditing] = useState(false); - const ref = useRef(); - const handleSave = () => { - if (!ref.current) return; + const openAiConfigSchema = z.object({ + key: z.string().optional(), + model: z.enum(LLM_PROVIDERS.openai.models), + baseUrl: z.string().optional(), + }); + const form = useForm>({ + resolver: zodResolver(openAiConfigSchema), + values: { + key: openai?.key, + model: openai?.model, + baseUrl: openai?.baseUrl, + }, + }); + + const onSubmit = async (data: z.infer) => { setOpenai({ - key: ref.current.value, + ...data, }); setEditing(false); - - toast.success(t("openaiKeySaved")); + toast.success(t("openaiConfigSaved")); }; - useEffect(() => { - if (editing) { - ref.current?.focus(); - } - }, [editing]); - return ( -
-
-
Open AI
-
-
- - - {editing && ( - - )} +
+ +
+
+
Open AI
+
+ ( + +
+ {t("key")}: + +
+ +
+ )} + /> + ( + +
+ {t("model")}: + +
+ +
+ )} + /> + ( + +
+ {t("baseUrl")}: + +
+ +
+ )} + /> +
+
+ +
+ +
-
-
- -
-
+ + ); }; diff --git a/enjoy/src/renderer/components/stories/index.ts b/enjoy/src/renderer/components/stories/index.ts index 7bcc4271..34383ecc 100644 --- a/enjoy/src/renderer/components/stories/index.ts +++ b/enjoy/src/renderer/components/stories/index.ts @@ -4,6 +4,7 @@ export * from "./story-preview-toolbar"; export * from "./story-toolbar"; export * from "./story-viewer"; export * from "./story-content"; +export * from "./story-vocabulary-sheet"; export * from "./stories-segment"; export * from "./ted-ideas-segment"; diff --git a/enjoy/src/renderer/components/stories/story-toolbar.tsx b/enjoy/src/renderer/components/stories/story-toolbar.tsx index b3740b3e..bf4cb475 100644 --- a/enjoy/src/renderer/components/stories/story-toolbar.tsx +++ b/enjoy/src/renderer/components/stories/story-toolbar.tsx @@ -1,7 +1,4 @@ import { - Alert, - AlertTitle, - AlertDescription, AlertDialog, AlertDialogTrigger, AlertDialogContent, @@ -12,16 +9,9 @@ import { AlertDialogCancel, AlertDialogAction, Button, - ScrollArea, - Separator, - Sheet, - SheetHeader, - SheetContent, FloatingToolbar, ToolbarButton, } from "@renderer/components/ui"; -import { MeaningCard, NoRecordsFound, LoaderSpin } from "@renderer/components"; -import { useState } from "react"; import { HighlighterIcon, ScanTextIcon, @@ -46,123 +36,65 @@ export const StoryToolbar = (props: { meanings?: MeaningType[]; marked?: boolean; toggleMarked?: () => void; - pendingLookups?: LookupType[]; handleShare?: () => void; + vocabularyVisible: boolean; + setVocabularyVisible?: (value: boolean) => void; }) => { const { starred, toggleStarred, - extracted, scanning, onScan, marked, toggleMarked, - meanings = [], - pendingLookups = [], handleShare, + vocabularyVisible, + setVocabularyVisible, } = props; - const [vocabularyVisible, setVocabularyVisible] = useState( - !extracted - ); - return ( - <> - - { - onScan(); - setVocabularyVisible(!vocabularyVisible); - }} - > - {scanning ? ( - - ) : ( - - )} - - - - - - - - - - - - - - - - {t("shareStory")} - - {t("areYouSureToShareThisStoryToCommunity")} - - - - {t("cancel")} - - - - - - - - - { - if (!value) setVocabularyVisible(null); + + { + onScan(); + setVocabularyVisible(!vocabularyVisible); }} > - - -
- - {t("keyVocabulary")} - - - ({meanings.length}) - -
-
-
- - {extracted ? ( - <> - {pendingLookups.length > 0 && ( - - - {t("lookingUp")} - - {t("thereAreLookupsPending", { - count: pendingLookups.length, - })} - - - )} - - {meanings.length > 0 ? ( - meanings.map((meaning) => ( -
- - -
- )) - ) : ( - - )} - - ) : ( - - )} -
-
-
-
- + {scanning ? ( + + ) : ( + + )} + + + + + + + + + + + + + + + + {t("shareStory")} + + {t("areYouSureToShareThisStoryToCommunity")} + + + + {t("cancel")} + + + + + + + ); }; diff --git a/enjoy/src/renderer/components/stories/story-viewer.tsx b/enjoy/src/renderer/components/stories/story-viewer.tsx index 9f30eea3..022fde66 100644 --- a/enjoy/src/renderer/components/stories/story-viewer.tsx +++ b/enjoy/src/renderer/components/stories/story-viewer.tsx @@ -9,7 +9,7 @@ import { PopoverAnchor, } from "@renderer/components/ui"; import { SelectionMenu } from "@renderer/components"; -import { debounce , uniq } from "lodash"; +import { debounce, uniq } from "lodash"; import Mark from "mark.js"; export const StoryViewer = (props: { @@ -17,7 +17,7 @@ export const StoryViewer = (props: { marked?: boolean; meanings?: MeaningType[]; setMeanings: (meanings: MeaningType[]) => void; - pendingLookups?: LookupType[]; + pendingLookups?: Partial[]; doc: any; }) => { const navigate = useNavigate(); @@ -48,6 +48,8 @@ export const StoryViewer = (props: { const handleSelectionChanged = debounce(() => { const selection = document.getSelection(); + if (!ref.current?.contains(selection.anchorNode.parentElement)) return; + const word = selection .toString() .trim() @@ -73,17 +75,16 @@ export const StoryViewer = (props: { return () => { document.removeEventListener("selectionchange", handleSelectionChanged); }; - }, [story]); + }, [story, ref]); useEffect(() => { - const words = uniq([ - ...meanings.map((m) => m.word), - ...pendingLookups.map((l) => l.word), - ]); - if (words.length === 0) return; - const marker = new Mark(ref.current); if (marked) { + const words = uniq([ + ...meanings.map((m) => m.word), + ...pendingLookups.map((l) => l.word), + ]); + if (words.length === 0) return; marker.mark(words, { separateWordSearch: false, caseSensitive: false, @@ -92,6 +93,10 @@ export const StoryViewer = (props: { } else { marker.unmark(); } + + return () => { + marker.unmark(); + }; }, [meanings, pendingLookups, marked]); return ( diff --git a/enjoy/src/renderer/components/stories/story-vocabulary-sheet.tsx b/enjoy/src/renderer/components/stories/story-vocabulary-sheet.tsx new file mode 100644 index 00000000..f204966e --- /dev/null +++ b/enjoy/src/renderer/components/stories/story-vocabulary-sheet.tsx @@ -0,0 +1,155 @@ +import { + Alert, + AlertTitle, + AlertDescription, + Button, + ScrollArea, + Separator, + Sheet, + SheetHeader, + SheetContent, +} from "@renderer/components/ui"; +import { MeaningCard, NoRecordsFound, LoaderSpin } from "@renderer/components"; +import { LoaderIcon, LanguagesIcon } from "lucide-react"; +import { t } from "i18next"; + +export const StoryVocabularySheet = (props: { + extracted: boolean; + meanings?: MeaningType[]; + pendingLookups?: Partial[]; + vocabularyVisible?: boolean; + setVocabularyVisible?: (value: boolean) => void; + lookingUpInBatch?: boolean; + setLookupInBatch?: (value: boolean) => void; + processLookup?: (lookup: Partial) => void; + lookingUp?: boolean; +}) => { + const { + extracted, + meanings = [], + pendingLookups = [], + vocabularyVisible, + setVocabularyVisible, + lookingUpInBatch, + setLookupInBatch, + processLookup, + lookingUp, + } = props; + + return ( + { + if (!value) setVocabularyVisible(null); + }} + > + + +
+ + {t("keyVocabulary")} + + + ({meanings.length}) + +
+
+
+ + {extracted ? ( + <> + {pendingLookups.length > 0 && ( + + {lookingUpInBatch ? ( + <> + + {t("lookingUp")} + +
+ {t("thereAreLookupsPending", { + count: pendingLookups.length, + })} +
+
+ +
+
+ + ) : ( + <> + + {t("pending")} + +
+ {t("thereAreLookupsPending", { + count: pendingLookups.length, + })} +
+
+ +
+
+ + )} +
+ )} + + {meanings.length > 0 && + meanings.map((meaning) => ( +
+ + +
+ ))} + + {pendingLookups.length > 0 && + pendingLookups.map((lookup) => ( +
+
+
{lookup.word}
+ +
+
+
+ {t("context")}: +
+
+ {lookup.context} +
+
+ +
+ ))} + + {meanings.length === 0 && pendingLookups.length === 0 && ( + + )} + + ) : ( + + )} +
+
+
+
+ ); +}; diff --git a/enjoy/src/renderer/pages/story-preview.tsx b/enjoy/src/renderer/pages/story-preview.tsx index 75d824ca..86d809af 100644 --- a/enjoy/src/renderer/pages/story-preview.tsx +++ b/enjoy/src/renderer/pages/story-preview.tsx @@ -53,8 +53,8 @@ export default () => { webApi .createStory({ - url: story.metadata?.url || story.url, ...story, + url: story.metadata?.url || story.url, } as CreateStoryParamsType) .then((story) => { navigate(`/stories/${story.id}`); @@ -168,7 +168,7 @@ export default () => { useEffect(() => { if (readable) { - EnjoyApp.view.hide(); + EnjoyApp.view.hide().catch(console.error); } else if (!loading) { const rect = containerRef.current.getBoundingClientRect(); EnjoyApp.view.show({ diff --git a/enjoy/src/renderer/pages/story.tsx b/enjoy/src/renderer/pages/story.tsx index b32c3011..742a069a 100644 --- a/enjoy/src/renderer/pages/story.tsx +++ b/enjoy/src/renderer/pages/story.tsx @@ -5,31 +5,42 @@ import { PagePlaceholder, StoryToolbar, StoryViewer, + StoryVocabularySheet, } from "@renderer/components"; import { useState, useContext, useEffect } from "react"; import { useParams } from "react-router-dom"; -import { AppSettingsProviderContext } from "@renderer/context"; +import { + AppSettingsProviderContext, + AISettingsProviderContext, +} from "@renderer/context"; +import { extractStoryCommand, lookupCommand } from "@/commands"; import nlp from "compromise"; import paragraphs from "compromise-paragraphs"; nlp.plugin(paragraphs); -let timeout: NodeJS.Timeout = null; export default () => { const { id } = useParams<{ id: string }>(); const { webApi } = useContext(AppSettingsProviderContext); + const { openai } = useContext(AISettingsProviderContext); const [loading, setLoading] = useState(true); const [story, setStory] = useState(); const [meanings, setMeanings] = useState([]); - const [pendingLookups, setPendingLookups] = useState([]); - const [scanning, setScanning] = useState(false); + const [pendingLookups, setPendingLookups] = useState[]>( + [] + ); + const [scanning, setScanning] = useState(true); const [marked, setMarked] = useState(true); const [doc, setDoc] = useState(null); + const [vocabularyVisible, setVocabularyVisible] = useState(false); + const [lookingUpInBatch, setLookupInBatch] = useState(false); + const [lookingUp, setLookingUp] = useState(false); const fetchStory = async () => { webApi .story(id) .then((story) => { setStory(story); + setVocabularyVisible(!story.extracted); const doc = nlp(story.content); doc.cache(); setDoc(doc); @@ -47,28 +58,75 @@ export default () => { if (!response) return; setMeanings(response.meanings); - setPendingLookups(response.pendingLookups); - - if (response.pendingLookups.length > 0) { - if (timeout) clearTimeout(timeout); - - timeout = setTimeout(() => { - fetchMeanings(); - }, 3000); - } + setPendingLookups(response.pendingLookups || []); }) .finally(() => { setScanning(false); }); }; - const lookupVocabulary = () => { - if (story?.extracted) return; + const extractVocabulary = async () => { + if (!story) return; + + let { words = [], idioms = [] } = story?.extraction || {}; + if (story?.extracted && (words.length > 0 || idioms.length > 0)) return; + + toast.promise( + async () => { + if (words.length === 0 && idioms.length === 0) { + if (!openai?.key) { + toast.error(t("openaiKeyRequired")); + return; + } + + try { + const res = await extractStoryCommand(story.content, { + key: openai.key, + }); + + words = res.words || []; + idioms = res.idioms || []; + } catch (error) { + console.error(error); + toast.error(t("extractionFailed"), { + description: error.message, + }); + return; + } + } + + webApi + .extractVocabularyFromStory(id, { + words, + idioms, + }) + .then(() => { + fetchStory(); + }) + .finally(() => { + setScanning(false); + }); + }, + { + loading: t("extracting"), + success: t("extractedSuccessfully"), + error: (err) => t("extractionFailed", { error: err.message }), + position: "bottom-right", + } + ); + }; + + const buildVocabulary = () => { + if (!story?.extraction) return; + if (meanings.length > 0 || pendingLookups.length > 0) return; if (!doc) return; + if (scanning) return; - const vocabulary: any[] = []; + const { words = [], idioms = [] } = story.extraction || {}; - story.vocabulary.forEach((word) => { + const lookups: any[] = []; + + [...words, ...idioms].forEach((word) => { const m = doc.lookup(word); const sentences = m.sentences().json(); @@ -79,7 +137,7 @@ export default () => { return; } - vocabulary.push({ + lookups.push({ word, context, sourceId: story.id, @@ -88,19 +146,24 @@ export default () => { }); }); - webApi.lookupInBatch(vocabulary).then((response) => { - const { errors } = response; - if (errors.length > 0) { - console.warn(errors); - return; - } + const pendings = lookups + .filter( + (v) => + meanings.findIndex( + (m) => m.word.toLowerCase() === v.word.toLowerCase() + ) < 0 + ) + .filter( + (v) => + pendingLookups.findIndex( + (l) => l.word.toLowerCase() === v.word.toLowerCase() + ) < 0 + ); - webApi.extractVocabularyFromStory(id).then(() => { - fetchStory(); - if (pendingLookups.length > 0) return; + if (pendings.length === 0) return; - fetchMeanings(); - }); + webApi.lookupInBatch(pendings).then(() => { + fetchMeanings(); }); }; @@ -131,18 +194,76 @@ export default () => { }); }; + const processLookup = async (pendingLookup: Partial) => { + if (lookingUp) return; + + const { meaningOptions = [] } = await webApi.lookup({ + word: pendingLookup.word, + context: pendingLookup.context, + sourceId: story.id, + sourceType: "Story", + }); + if (!openai?.key) { + toast.error(t("openaiApiKeyRequired")); + return; + } + + setLookingUp(true); + toast.promise( + lookupCommand( + { + word: pendingLookup.word, + context: pendingLookup.context, + meaningOptions, + }, + { + key: openai.key, + } + ) + .then((res) => { + if (res.context_translation?.trim()) { + webApi + .updateLookup(pendingLookup.id, { + meaning: res, + sourceId: story.id, + sourceType: "Story", + }) + .then(() => { + fetchMeanings(); + }); + } + }) + .finally(() => { + setLookingUp(false); + }), + { + loading: t("lookingUp"), + success: t("lookedUpSuccessfully"), + error: (err) => t("lookupFailed", { error: err.message }), + position: "bottom-right", + } + ); + }; + useEffect(() => { fetchStory(); fetchMeanings(); - - return () => { - if (timeout) clearTimeout(timeout); - }; }, [id]); useEffect(() => { - lookupVocabulary(); - }, [story]); + extractVocabulary(); + }, [story?.extracted]); + + useEffect(() => { + buildVocabulary(); + }, [pendingLookups, meanings, story?.extraction]); + + useEffect(() => { + if (!lookingUpInBatch) return; + if (pendingLookups.length === 0) return; + + processLookup(pendingLookups[0]); + }, [pendingLookups, lookingUpInBatch]); if (loading) { return ( @@ -174,19 +295,31 @@ export default () => { extracted={story.extracted} starred={story.starred} toggleStarred={toggleStarred} - pendingLookups={pendingLookups} handleShare={handleShare} + vocabularyVisible={vocabularyVisible} + setVocabularyVisible={setVocabularyVisible} /> + ); }; diff --git a/enjoy/src/types.d.ts b/enjoy/src/types.d.ts index 68d5219e..8e1241ce 100644 --- a/enjoy/src/types.d.ts +++ b/enjoy/src/types.d.ts @@ -10,6 +10,7 @@ type SupportedLlmProviderType = "openai" | "googleGenerativeAi"; type LlmProviderType = { key?: string; model?: string; + baseUrl?: string; }; type DownloadStateType = { @@ -92,6 +93,7 @@ type LookupType = { contextTranslation: string; status?: "pending" | "completed" | "failed"; meaning?: MeaningType; + meaningOptions?: MeaningType[]; createdAt: string; updatedAt: string; }; diff --git a/enjoy/src/types/story.d.ts b/enjoy/src/types/story.d.ts index f3c63e02..612b4c45 100644 --- a/enjoy/src/types/story.d.ts +++ b/enjoy/src/types/story.d.ts @@ -6,6 +6,10 @@ type StoryType = { metadata: { [key: string]: string; }; + extraction?: { + words?: string[]; + idioms?: string[]; + }; vocabulary?: string[]; extracted?: boolean; starred?: boolean; @@ -21,4 +25,8 @@ type CreateStoryParamsType = { metadata: { [key: string]: string; }; + extraction?: { + words?: string[]; + idioms?: string[]; + } }; diff --git a/enjoy/tsconfig.json b/enjoy/tsconfig.json index 37860d29..a10922e4 100644 --- a/enjoy/tsconfig.json +++ b/enjoy/tsconfig.json @@ -15,7 +15,8 @@ "paths": { "@/*": ["./src/*"], "@renderer/*": ["./src/renderer/*"], - "@main/*": ["./src/main/*"] + "@main/*": ["./src/main/*"], + "@commands": ["./src/commands"] }, "emitDecoratorMetadata": true, "experimentalDecorators": true, diff --git a/enjoy/vite.main.config.mts b/enjoy/vite.main.config.mts index f6eeff09..cbc19a39 100644 --- a/enjoy/vite.main.config.mts +++ b/enjoy/vite.main.config.mts @@ -12,6 +12,7 @@ export default defineConfig({ alias: { "@": path.resolve(__dirname, "./src"), "@main": path.resolve(__dirname, "./src/main"), + "@commands": path.resolve(__dirname, "./src/commands"), }, }, build: { diff --git a/enjoy/vite.renderer.config.mts b/enjoy/vite.renderer.config.mts index 7651b8c6..8d7d8644 100644 --- a/enjoy/vite.renderer.config.mts +++ b/enjoy/vite.renderer.config.mts @@ -20,6 +20,7 @@ export default defineConfig({ alias: { "@": path.resolve(__dirname, "./src"), "@renderer": path.resolve(__dirname, "./src/renderer"), + "@commands": path.resolve(__dirname, "./src/commands"), }, }, }); diff --git a/yarn.lock b/yarn.lock index 18b8df33..9eb55869 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5803,6 +5803,7 @@ __metadata: fs-extra: "npm:^11.2.0" html-to-text: "npm:^9.0.5" i18next: "npm:^23.7.16" + js-md5: "npm:^0.8.3" langchain: "npm:^0.1.4" lodash: "npm:^4.17.21" lucide-react: "npm:^0.312.0" @@ -7955,6 +7956,13 @@ __metadata: languageName: node linkType: hard +"js-md5@npm:^0.8.3": + version: 0.8.3 + resolution: "js-md5@npm:0.8.3" + checksum: f7e41e95f8e5eb5eeb43085bec3832ae3dfe0020c42fcca5a4efe571213391a9e9594db31bd34624b7280af4f1f12c751b6a50074a15346ecf40a0d54115d77f + languageName: node + linkType: hard + "js-tiktoken@npm:^1.0.7, js-tiktoken@npm:^1.0.8": version: 1.0.8 resolution: "js-tiktoken@npm:1.0.8"