Improve settings (#1174)

* more settings for local whisper

* clean code

* may open echogarden packages dir

* upgrade deps

* handle recognize unhandle rejection

* fix

* add tts settings

* update ui

* may open library path
This commit is contained in:
an-lee
2024-11-11 18:07:16 +08:00
committed by GitHub
parent 6c8da3075a
commit 521ee76af7
31 changed files with 849 additions and 324 deletions

View File

@@ -18,7 +18,7 @@
},
"devDependencies": {
"autoprefixer": "^10.4.20",
"postcss": "^8.4.47",
"postcss": "^8.4.48",
"sass": "^1.80.6",
"tailwindcss": "^3.4.14"
}

View File

@@ -85,7 +85,7 @@
"typescript": "^5.6.3",
"vite": "^5.4.10",
"vite-plugin-static-copy": "^2.1.0",
"zx": "^8.2.0"
"zx": "^8.2.1"
},
"dependencies": {
"@andrkrn/ffprobe-static": "^5.2.0",
@@ -141,7 +141,7 @@
"dayjs": "^1.11.13",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"echogarden": "^1.8.7",
"echogarden": "^2.0.0",
"electron-context-menu": "^4.0.4",
"electron-log": "^5.2.2",
"electron-settings": "^4.0.4",
@@ -159,7 +159,7 @@
"langchain": "^0.3.5",
"lodash": "^4.17.21",
"lru-cache": "^11.0.2",
"lucide-react": "^0.455.0",
"lucide-react": "^0.456.0",
"mark.js": "^8.11.1",
"microsoft-cognitiveservices-speech-sdk": "^1.41.0",
"mime-types": "^2.1.35",
@@ -167,7 +167,7 @@
"next-themes": "^0.4.3",
"openai": "^4.71.1",
"pitchfinder": "^2.3.2",
"postcss": "^8.4.47",
"postcss": "^8.4.48",
"proxy-agent": "^6.4.0",
"react": "^18.3.1",
"react-activity-calendar": "^2.7.1",
@@ -175,7 +175,7 @@
"react-audio-voice-recorder": "^2.2.0",
"react-dom": "^18.3.1",
"react-frame-component": "^5.2.7",
"react-hook-form": "^7.53.1",
"react-hook-form": "^7.53.2",
"react-hotkeys-hook": "^4.6.1",
"react-i18next": "^15.1.1",
"react-markdown": "^9.0.1",

View File

@@ -907,5 +907,17 @@
"failedToLoadLink": "Failed to load link",
"refreshSpeech": "Refresh speech",
"locateParagraph": "Locate paragraph",
"close": "Close"
"close": "Close",
"config": "Config",
"temperature": "Temperature",
"encoderProvider": "Encoder Provider",
"decoderProvider": "Decoder Provider",
"enableGPU": "Enable GPU",
"openPackagesDir": "Open models dir",
"whisperModelDescription": "Model will be downloaded when first used.",
"whisperEngineDescription": "OpenAI Whisper with inference done via the ONNX runtime.",
"whisperCppEngineDescription": "C++ port of the Whisper architecture.",
"ttsService": "Text to Speech Service",
"openaiTtsServiceDescription": "Use OpenAI TTS service from your own key.",
"enjoyTtsServiceDescription": "Use TTS service provided by Enjoy. OpenAI or Azure is supported."
}

View File

@@ -907,5 +907,17 @@
"failedToLoadLink": "加载链接失败",
"refreshSpeech": "刷新语音",
"locateParagraph": "定位段落",
"close": "关闭"
"close": "关闭",
"config": "配置",
"temperature": "温度",
"encoderProvider": "编码器",
"decoderProvider": "解码器",
"enableGPU": "启用 GPU",
"openPackagesDir": "打开模型目录",
"whisperModelDescription": "模型首次使用时会下载。",
"whisperEngineDescription": "OpenAI Whisper 使用 ONNX 运行时进行推理。",
"whisperCppEngineDescription": "Whisper 的 C++ 实现。",
"ttsService": "文字转语音服务",
"openaiTtsServiceDescription": "使用您自己的 API key 来使用 OpenAI TTS 服务。",
"enjoyTtsServiceDescription": "使用 Enjoy 提供的 TTS 服务,支持 OpenAI 或 Azure。"
}

View File

@@ -259,7 +259,9 @@ export class Document extends Model<Document> {
}
logger.debug("detected file type", filePath, mimeType, extension);
if (!DocumentFormats.includes(extension)) {
if (extension === "zip" && filePath.endsWith(".epub")) {
extension = "epub";
} else if (!DocumentFormats.includes(extension)) {
logger.error("unsupported file type", filePath, extension);
throw new Error(
t("models.document.fileNotSupported", { file: filePath })

View File

@@ -15,6 +15,8 @@ import {
type Timeline,
type TimelineEntry,
} from "echogarden/dist/utilities/Timeline.d.js";
import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";
import { ensureAndGetPackagesDir } from "echogarden/dist/utilities/PackageManager.js";
import path from "path";
import log from "@main/logger";
import url from "url";
@@ -25,7 +27,6 @@ import { enjoyUrlToPath, pathToEnjoyUrl } from "./utils";
import { UserSetting } from "./db/models";
import { UserSettingKeyEnum } from "@/types/enums";
import { WHISPER_MODELS } from "@/constants";
import { WhisperOptions } from "echogarden/dist/recognition/WhisperSTT.js";
Echogarden.setGlobalOption(
"ffmpegPath",
@@ -59,7 +60,27 @@ class EchogardenWrapper {
public wordTimelineToSegmentSentenceTimeline: typeof wordTimelineToSegmentSentenceTimeline;
constructor() {
this.recognize = Echogarden.recognize;
this.recognize = (sampleFile: string, options: RecognitionOptions) => {
return new Promise((resolve, reject) => {
const handler = (reason: any) => {
// Remove the handler after it's triggered
process.removeListener("unhandledRejection", handler);
reject(reason);
};
// Add temporary unhandledRejection listener
process.on("unhandledRejection", handler);
// Call the original recognize function
Echogarden.recognize(sampleFile, options)
.then((result) => {
// Remove the handler if successful
process.removeListener("unhandledRejection", handler);
resolve(result);
})
.catch(reject);
});
};
this.align = Echogarden.align;
this.alignSegments = Echogarden.alignSegments;
this.denoise = Echogarden.denoise;
@@ -78,23 +99,15 @@ class EchogardenWrapper {
engine: "whisper",
whisper: {
model: "tiny.en",
language: "en",
} as WhisperOptions,
},
}
) {
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
try {
const whisperModel = await UserSetting.get(UserSettingKeyEnum.WHISPER);
if (WHISPER_MODELS.includes(whisperModel)) {
options.whisper.model = whisperModel;
}
} catch (e) {
logger.error(e);
}
try {
logger.info("check:", options);
const result = await this.recognize(sampleFile, options);
logger.info(result);
logger.info(result?.transcript);
fs.writeJsonSync(
path.join(settings.cachePath(), "echogarden-check.json"),
result,
@@ -225,6 +238,10 @@ class EchogardenWrapper {
ipcMain.handle("echogarden-check", async (_event, options: any) => {
return this.check(options);
});
ipcMain.handle("echogarden-get-packages-dir", async (_event) => {
return ensureAndGetPackagesDir();
});
}
}

View File

@@ -476,6 +476,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
},
},
echogarden: {
getPackagesDir: () => {
return ipcRenderer.invoke("echogarden-get-packages-dir");
},
recognize: (input: string, options: RecognitionOptions) => {
return ipcRenderer.invoke("echogarden-recognize", input, options);
},
@@ -505,8 +508,8 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
transcode: (input: string) => {
return ipcRenderer.invoke("echogarden-transcode", input);
},
check: () => {
return ipcRenderer.invoke("echogarden-check");
check: (options: RecognitionOptions) => {
return ipcRenderer.invoke("echogarden-check", options);
},
},
ffmpeg: {

View File

@@ -48,7 +48,7 @@ export const ChatAgentForm = (props: {
const { EnjoyApp, learningLanguage, webApi } = useContext(
AppSettingsProviderContext
);
const { currentTtsEngine } = useContext(AISettingsProviderContext);
const { ttsConfig } = useContext(AISettingsProviderContext);
const [selectedTemplate, setSelectedTemplate] = useState<string>("custom");
const [templates, setTemplates] = useState<
{
@@ -104,10 +104,10 @@ export const ChatAgentForm = (props: {
const { type, name, description, config } = data;
if (type === ChatAgentTypeEnum.TTS) {
config.tts = {
engine: config.tts?.engine || currentTtsEngine.name,
model: config.tts?.model || currentTtsEngine.model,
language: config.tts?.language || learningLanguage,
voice: config.tts?.voice || currentTtsEngine.voice,
engine: config.tts?.engine || ttsConfig.engine,
model: config.tts?.model || ttsConfig.model,
language: config.tts?.language || ttsConfig.language,
voice: config.tts?.voice || ttsConfig.voice,
};
}

View File

@@ -30,7 +30,7 @@ export const ChatList = (props: {
setCurrentChat: (chat: ChatType) => void;
}) => {
const { chats, chatAgent, currentChat, setCurrentChat } = props;
const { sttEngine, currentGptEngine, currentTtsEngine } = useContext(
const { sttEngine, currentGptEngine, ttsConfig } = useContext(
AISettingsProviderContext
);
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
@@ -78,10 +78,10 @@ export const ChatList = (props: {
agent.type === ChatAgentTypeEnum.TTS
? {
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
...agent.config.tts,
},
}
@@ -92,10 +92,10 @@ export const ChatList = (props: {
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
};
return {

View File

@@ -68,10 +68,8 @@ const ChatMemberSetting = (props: {
onFinish?: () => void;
}) => {
const { chat, agentMembers, onFinish } = props;
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
const { currentGptEngine, currentTtsEngine } = useContext(
AISettingsProviderContext
);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { currentGptEngine, ttsConfig } = useContext(AISettingsProviderContext);
const [memberTab, setMemberTab] = useState(agentMembers[0]?.userId);
const [query, setQuery] = useState("");
const [chatAgents, setChatAgents] = useState<ChatAgentType[]>([]);
@@ -90,10 +88,10 @@ const ChatMemberSetting = (props: {
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
},
})

View File

@@ -12,7 +12,8 @@ import {
import { t } from "i18next";
import { TTSForm } from "@renderer/components";
import { LoaderIcon } from "lucide-react";
import { useState } from "react";
import { useContext, useState } from "react";
import { AISettingsProviderContext } from "@renderer/context";
const documentConfigSchema = z.object({
config: z.object({
@@ -33,6 +34,7 @@ export const DocumentConfigForm = (props: {
}) => {
const { config, onSubmit } = props;
const [submitting, setSubmitting] = useState<boolean>(false);
const { ttsConfig } = useContext(AISettingsProviderContext);
const form = useForm<z.infer<typeof documentConfigSchema>>({
resolver: zodResolver(documentConfigSchema),
@@ -42,12 +44,7 @@ export const DocumentConfigForm = (props: {
config: {
autoTranslate: true,
autoNextSpeech: true,
tts: {
engine: "openai",
model: "openai/tts-1",
language: "en-US",
voice: "alloy",
},
tts: ttsConfig,
},
},
});

View File

@@ -79,7 +79,6 @@ export const MediaTranscriptionGenerateButton = (props: {
generateTranscription({
originalText: data.text,
language: data.language,
model: data.model,
service: data.service as SttEngineOptionEnum | "upload",
isolate: data.isolate,
})

View File

@@ -70,7 +70,6 @@ const LoadingContent = () => {
generateTranscription({
originalText: data.text,
language: data.language,
model: data.model,
service: data.service as SttEngineOptionEnum | "upload",
isolate: data.isolate,
});

View File

@@ -0,0 +1,336 @@
import { t } from "i18next";
import {
Button,
Form,
FormControl,
FormDescription,
FormField,
FormItem,
FormLabel,
Input,
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
Switch,
} from "@renderer/components/ui";
import { AppSettingsProviderContext } from "@renderer/context";
import { useContext, useEffect, useState } from "react";
import { z } from "zod";
import { useForm } from "react-hook-form";
import { zodResolver } from "@hookform/resolvers/zod";
import { WHISPER_MODELS } from "@/constants";
const echogardenSttConfigSchema = z.object({
engine: z.enum(["whisper", "whisper.cpp"]),
whisper: z.object({
model: z.string(),
temperature: z.number(),
prompt: z.string(),
encoderProvider: z.enum(["cpu", "dml", "cuda"]),
decoderProvider: z.enum(["cpu", "dml", "cuda"]),
}),
whisperCpp: z.object({
model: z.string(),
temperature: z.number(),
prompt: z.string(),
enableGPU: z.boolean(),
}),
});
export const EchogardenSttSettings = (props: {
echogardenSttConfig: EchogardenSttConfigType;
onSave: (data: z.infer<typeof echogardenSttConfigSchema>) => void;
}) => {
const { echogardenSttConfig, onSave } = props;
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [platformInfo, setPlatformInfo] = useState<{
platform: string;
arch: string;
version: string;
}>();
const [packagesDir, setPackagesDir] = useState<string>();
const form = useForm<z.infer<typeof echogardenSttConfigSchema>>({
resolver: zodResolver(echogardenSttConfigSchema),
values: {
engine: echogardenSttConfig?.engine,
whisper: {
model: "tiny",
temperature: 0.1,
prompt: "",
encoderProvider: "cpu",
decoderProvider: "cpu",
...echogardenSttConfig?.whisper,
},
whisperCpp: {
model: "tiny",
temperature: 0.1,
prompt: "",
enableGPU: false,
...echogardenSttConfig?.whisperCpp,
},
},
});
const onSubmit = async (data: z.infer<typeof echogardenSttConfigSchema>) => {
onSave({
engine: data.engine || "whisper",
whisper: {
model: data.whisper.model || "tiny",
...data.whisper,
},
whisperCpp: {
model: data.whisperCpp.model || "tiny",
...data.whisperCpp,
},
});
};
const handleOpenPackagesDir = () => {
if (!packagesDir) return;
EnjoyApp.shell.openPath(packagesDir);
};
useEffect(() => {
EnjoyApp.app.getPlatformInfo().then(setPlatformInfo);
EnjoyApp.echogarden.getPackagesDir().then(setPackagesDir);
}, []);
return (
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)}>
<div className="text-sm text-muted-foreground space-y-3 mb-4">
<FormField
control={form.control}
name="engine"
render={({ field }) => (
<FormItem>
<FormLabel>{t("engine")}</FormLabel>
<FormControl>
<Select value={field.value} onValueChange={field.onChange}>
<SelectTrigger className="min-w-fit">
<SelectValue placeholder="engine"></SelectValue>
</SelectTrigger>
<SelectContent>
<SelectItem value="whisper">Whisper</SelectItem>
<SelectItem value="whisper.cpp">Whisper.cpp</SelectItem>
</SelectContent>
</Select>
</FormControl>
<FormDescription>
{form.watch("engine") === "whisper"
? t("whisperEngineDescription")
: t("whisperCppEngineDescription")}
</FormDescription>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisper.model"
render={({ field }) => (
<FormItem>
<FormLabel>{t("model")}</FormLabel>
<FormControl>
<Select value={field.value} onValueChange={field.onChange}>
<SelectTrigger className="min-w-fit">
<SelectValue placeholder="model"></SelectValue>
</SelectTrigger>
<SelectContent>
{WHISPER_MODELS.map((model) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
))}
</SelectContent>
</Select>
</FormControl>
<FormDescription>
{t("whisperModelDescription")}
{packagesDir && (
<Button
size="icon"
variant="link"
className="ml-2"
type="button"
onClick={handleOpenPackagesDir}
>
{t("openPackagesDir")}
</Button>
)}
</FormDescription>
</FormItem>
)}
/>
{form.watch("engine") === "whisper" && (
<>
<FormField
control={form.control}
name="whisper.temperature"
render={({ field }) => (
<FormItem>
<FormLabel>{t("temperature")}</FormLabel>
<FormControl>
<Input
type="number"
step={0.1}
min={0}
max={1}
{...field}
/>
</FormControl>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisper.prompt"
render={({ field }) => (
<FormItem>
<FormLabel>{t("prompt")}</FormLabel>
<FormControl>
<Input placeholder={t("prompt")} {...field} />
</FormControl>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisper.encoderProvider"
render={({ field }) => (
<FormItem>
<FormLabel>{t("encoderProvider")}</FormLabel>
<FormControl>
<Select
value={field.value}
onValueChange={field.onChange}
>
<SelectTrigger className="min-w-fit">
<SelectValue placeholder="provider"></SelectValue>
</SelectTrigger>
<SelectContent>
<SelectItem value="cpu">CPU</SelectItem>
<SelectItem
disabled={platformInfo?.platform !== "win32"}
value="dml"
>
DML
</SelectItem>
<SelectItem
disabled={platformInfo?.platform !== "linux"}
value="cuda"
>
CUDA
</SelectItem>
</SelectContent>
</Select>
</FormControl>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisper.decoderProvider"
render={({ field }) => (
<FormItem>
<FormLabel>{t("decoderProvider")}</FormLabel>
<FormControl>
<Select
value={field.value}
onValueChange={field.onChange}
>
<SelectTrigger className="min-w-fit">
<SelectValue placeholder="provider"></SelectValue>
</SelectTrigger>
<SelectContent>
<SelectItem value="cpu">CPU</SelectItem>
<SelectItem
disabled={platformInfo?.platform !== "win32"}
value="dml"
>
DML
</SelectItem>
<SelectItem
disabled={platformInfo?.platform !== "linux"}
value="cuda"
>
CUDA
</SelectItem>
</SelectContent>
</Select>
</FormControl>
</FormItem>
)}
/>
</>
)}
{form.watch("engine") === "whisper.cpp" && (
<>
<FormField
control={form.control}
name="whisperCpp.temperature"
render={({ field }) => (
<FormItem>
<FormLabel>{t("temperature")}</FormLabel>
<FormControl>
<Input
type="number"
step={0.1}
min={0}
max={1}
{...field}
/>
</FormControl>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisperCpp.prompt"
render={({ field }) => (
<FormItem>
<FormLabel>{t("prompt")}</FormLabel>
<FormControl>
<Input placeholder={t("prompt")} {...field} />
</FormControl>
</FormItem>
)}
/>
<FormField
control={form.control}
name="whisperCpp.enableGPU"
render={({ field }) => (
<FormItem>
<div className="flex items-center space-x-2">
<FormLabel>{t("enableGPU")}</FormLabel>
<FormControl>
<Switch
checked={field.value}
onCheckedChange={field.onChange}
/>
</FormControl>
</div>
</FormItem>
)}
/>
</>
)}
</div>
<div className="flex items-center justify-end space-x-2">
<Button size="sm" type="submit">
{t("save")}
</Button>
</div>
</form>
</Form>
);
};

View File

@@ -15,6 +15,7 @@ export * from "./openai-settings";
export * from "./library-settings";
export * from "./disk-usage";
export * from "./stt-settings";
export * from "./tts-settings";
export * from "./user-settings";
export * from "./email-settings";
@@ -34,3 +35,5 @@ export * from "./recorder-settings";
export * from "./vocabulary-settings";
export * from "./dict-settings";
export * from "./echogarden-stt-settings";

View File

@@ -21,6 +21,10 @@ export const LibrarySettings = () => {
}
};
const openLibraryDir = () => {
EnjoyApp.shell.openPath(libraryPath);
};
return (
<div className="flex items-start justify-between py-4">
<div className="">
@@ -30,6 +34,9 @@ export const LibrarySettings = () => {
<div className="">
<div className="flex items-center justify-end space-x-2 mb-2">
<Button variant="secondary" size="sm" onClick={openLibraryDir}>
{t("open")}
</Button>
<Button
variant="secondary"
size="sm"

View File

@@ -9,7 +9,8 @@ import {
UserSettings,
BalanceSettings,
LibrarySettings,
WhisperSettings,
SttSettings,
TtsSettings,
OpenaiSettings,
ProxySettings,
ResetSettings,
@@ -40,7 +41,9 @@ export const Preferences = () => {
<Separator />
<LearningLanguageSettings />
<Separator />
<WhisperSettings />
<SttSettings />
<Separator />
<TtsSettings />
<Separator />
<DefaultEngineSettings />
<Separator />

View File

@@ -1,11 +1,6 @@
import { t } from "i18next";
import {
Button,
Dialog,
DialogTrigger,
DialogContent,
DialogHeader,
DialogDescription,
toast,
Select,
SelectTrigger,
@@ -17,46 +12,52 @@ import {
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import { useContext, useState } from "react";
import { AlertCircleIcon } from "lucide-react";
import { useContext, useEffect, useState } from "react";
import { SttEngineOptionEnum } from "@/types/enums";
import { WHISPER_MODELS } from "@/constants";
import { EchogardenSttSettings } from "@renderer/components";
export const WhisperSettings = () => {
const { sttEngine, whisperModel, setWhisperModel, setSttEngine } = useContext(
AISettingsProviderContext
);
export const SttSettings = () => {
const {
sttEngine,
setSttEngine,
echogardenSttConfig,
setEchogardenSttConfig,
} = useContext(AISettingsProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [stderr, setStderr] = useState("");
const [editing, setEditing] = useState(false);
const handleCheck = async () => {
toast.promise(
async () => {
const { success, log } = await EnjoyApp.echogarden.check();
const { success, log } = await EnjoyApp.echogarden.check(
echogardenSttConfig
);
if (success) {
setStderr("");
return Promise.resolve();
} else {
setStderr(log);
return Promise.reject();
return Promise.reject(log);
}
},
{
loading: t("checkingWhisper"),
success: t("whisperIsWorkingGood"),
error: t("whisperIsNotWorking"),
error: (error) => t("whisperIsNotWorking") + ": " + error,
}
);
};
useEffect(() => {
if (sttEngine !== SttEngineOptionEnum.LOCAL) {
setEditing(false);
}
}, [sttEngine]);
return (
<div className="flex items-start justify-between py-4">
<div className="">
<div className="flex items-center mb-2">
<span>{t("sttAiService")}</span>
{stderr && (
<AlertCircleIcon className="ml-2 w-4 h-4 text-yellow-500" />
)}
</div>
<div className="text-sm text-muted-foreground">
{sttEngine === SttEngineOptionEnum.LOCAL &&
@@ -68,6 +69,27 @@ export const WhisperSettings = () => {
{sttEngine === SttEngineOptionEnum.OPENAI &&
t("openaiSpeechToTextDescription")}
</div>
<div
className={`text-sm text-muted-foreground mt-4 px-1 ${
editing ? "" : "hidden"
}`}
>
<EchogardenSttSettings
echogardenSttConfig={echogardenSttConfig}
onSave={(data) => {
setEchogardenSttConfig(data as EchogardenSttConfigType)
.then(() => {
toast.success(t("saved"));
})
.catch((error) => {
toast.error(error.message);
})
.finally(() => {
setEditing(false);
});
}}
/>
</div>
</div>
<div className="flex items-center space-x-2">
@@ -96,26 +118,18 @@ export const WhisperSettings = () => {
{sttEngine === SttEngineOptionEnum.LOCAL && (
<>
<Select
value={whisperModel}
onValueChange={(value) => {
setWhisperModel(value);
}}
<Button
onClick={() => setEditing(!editing)}
variant="secondary"
size="sm"
>
<SelectTrigger className="min-w-fit">
<SelectValue placeholder="service"></SelectValue>
</SelectTrigger>
<SelectContent>
{WHISPER_MODELS.map((model) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
))}
</SelectContent>
</Select>
<Button onClick={handleCheck} variant="secondary" size="sm">
{t("check")}
{editing ? t("cancel") : t("config")}
</Button>
{!editing && (
<Button onClick={handleCheck} variant="secondary" size="sm">
{t("check")}
</Button>
)}
</>
)}
</div>

View File

@@ -0,0 +1,81 @@
import { t } from "i18next";
import { Button, toast, Form } from "@renderer/components/ui";
import { AISettingsProviderContext } from "@renderer/context";
import { useContext, useState } from "react";
import { z } from "zod";
import { useForm } from "react-hook-form";
import { zodResolver } from "@hookform/resolvers/zod";
import { TTSForm } from "@renderer/components";
const ttsConfigSchema = z.object({
config: z.object({
tts: z.object({
engine: z.string().min(1),
model: z.string().min(1),
language: z.string().min(1),
voice: z.string().min(1),
}),
}),
});
export const TtsSettings = () => {
const [editing, setEditing] = useState(false);
const { ttsConfig, setTtsConfig } = useContext(AISettingsProviderContext);
const form = useForm<z.infer<typeof ttsConfigSchema>>({
resolver: zodResolver(ttsConfigSchema),
values: {
config: {
tts: ttsConfig,
},
},
});
const onSubmit = (data: z.infer<typeof ttsConfigSchema>) => {
setTtsConfig(data.config.tts as TtsConfigType)
.then(() => toast.success(t("saved")))
.finally(() => setEditing(false));
};
return (
<Form {...form}>
<form onSubmit={form.handleSubmit(onSubmit)}>
<div className="flex items-start justify-between py-4">
<div className="">
<div className="flex items-center mb-2">
<span>{t("ttsService")}</span>
</div>
<div className="text-sm text-muted-foreground mb-3">
{form.watch("config.tts.engine") === "openai"
? t("openaiTtsServiceDescription")
: t("enjoyTtsServiceDescription")}
</div>
<div
className={`text-sm text-muted-foreground space-y-3 px-1 ${
editing ? "" : "hidden"
}`}
>
<TTSForm form={form} />
</div>
</div>
<div className="flex items-center space-x-2">
<Button
variant={editing ? "outline" : "secondary"}
size="sm"
type="reset"
onClick={(event) => {
event.preventDefault();
form.reset();
setEditing(!editing);
}}
>
{editing ? t("cancel") : t("edit")}
</Button>
<Button className={editing ? "" : "hidden"} size="sm" type="submit">
{t("save")}
</Button>
</div>
</div>
</form>
</Form>
);
};

View File

@@ -39,7 +39,6 @@ import { SttEngineOptionEnum } from "@/types/enums";
const transcriptionSchema = z.object({
language: z.string(),
service: z.union([z.nativeEnum(SttEngineOptionEnum), z.literal("upload")]),
model: z.string().optional(),
text: z.string().optional(),
isolate: z.boolean().optional(),
});
@@ -61,14 +60,15 @@ export const TranscriptionCreateForm = (props: {
originalText,
} = props;
const { learningLanguage } = useContext(AppSettingsProviderContext);
const { sttEngine, whisperModel } = useContext(AISettingsProviderContext);
const { sttEngine, echogardenSttConfig } = useContext(
AISettingsProviderContext
);
const form = useForm<z.infer<typeof transcriptionSchema>>({
resolver: zodResolver(transcriptionSchema),
values: {
language: learningLanguage,
service: originalText ? "upload" : sttEngine,
model: sttEngine === SttEngineOptionEnum.LOCAL ? whisperModel : "",
text: originalText,
isolate: false,
},
@@ -184,8 +184,22 @@ export const TranscriptionCreateForm = (props: {
</SelectContent>
</Select>
<FormDescription>
{form.watch("service") === SttEngineOptionEnum.LOCAL &&
t("localSpeechToTextDescription")}
{form.watch("service") === SttEngineOptionEnum.LOCAL && (
<>
<div>{t("localSpeechToTextDescription")}</div>
<div>
* {t("model")}: {echogardenSttConfig.engine} /{" "}
{
echogardenSttConfig[
echogardenSttConfig.engine.replace(".cpp", "Cpp") as
| "whisper"
| "whisperCpp"
]?.model
}
</div>
</>
)}
{form.watch("service") === SttEngineOptionEnum.ENJOY_AZURE &&
t("enjoyAzureSpeechToTextDescription")}
{form.watch("service") ===
@@ -200,34 +214,6 @@ export const TranscriptionCreateForm = (props: {
)}
/>
{form.watch("service") === SttEngineOptionEnum.LOCAL && (
<FormField
control={form.control}
name="model"
render={({ field }) => (
<FormItem className="grid w-full items-center">
<FormLabel>{t("model")}</FormLabel>
<Select
disabled={transcribing}
value={field.value}
onValueChange={field.onChange}
>
<SelectTrigger>
<SelectValue />
</SelectTrigger>
<SelectContent>
{WHISPER_MODELS.map((model) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
))}
</SelectContent>
</Select>
</FormItem>
)}
/>
)}
<FormField
control={form.control}
name="language"

View File

@@ -10,15 +10,16 @@ import { WHISPER_MODELS } from "@/constants";
type AISettingsProviderState = {
sttEngine?: SttEngineOptionEnum;
setSttEngine?: (name: string) => Promise<void>;
whisperModel?: string;
setWhisperModel?: (name: string) => void;
openai?: LlmProviderType;
setOpenai?: (config: LlmProviderType) => void;
setGptEngine?: (engine: GptEngineSettingType) => void;
currentGptEngine?: GptEngineSettingType;
currentTtsEngine?: TtsEngineSettingType;
gptProviders?: typeof GPT_PROVIDERS;
ttsProviders?: typeof TTS_PROVIDERS;
ttsConfig?: TtsConfigType;
setTtsConfig?: (config: TtsConfigType) => Promise<void>;
echogardenSttConfig?: EchogardenSttConfigType;
setEchogardenSttConfig?: (config: EchogardenSttConfigType) => Promise<void>;
};
const initialState: AISettingsProviderState = {};
@@ -31,6 +32,18 @@ export const AISettingsProvider = ({
}: {
children: React.ReactNode;
}) => {
const { EnjoyApp, libraryPath, user, apiUrl, webApi, learningLanguage } =
useContext(AppSettingsProviderContext);
const [gptProviders, setGptProviders] = useState<any>(GPT_PROVIDERS);
const [ttsProviders, setTtsProviders] = useState<any>(TTS_PROVIDERS);
const db = useContext(DbProviderContext);
const [sttEngine, setSttEngine] = useState<SttEngineOptionEnum>(
SttEngineOptionEnum.ENJOY_AZURE
);
const [ttsConfig, setTtsConfig] = useState<TtsConfigType>(null);
const [echogardenSttConfig, setEchogardenSttConfig] =
useState<EchogardenSttConfigType>(null);
const [gptEngine, setGptEngine] = useState<GptEngineSettingType>({
name: "enjoyai",
models: {
@@ -38,15 +51,6 @@ export const AISettingsProvider = ({
},
});
const [openai, setOpenai] = useState<LlmProviderType>(null);
const [whisperModel, setWhisperModel] = useState<string>(null);
const [sttEngine, setSttEngine] = useState<SttEngineOptionEnum>(
SttEngineOptionEnum.ENJOY_AZURE
);
const { EnjoyApp, libraryPath, user, apiUrl, webApi, learningLanguage } =
useContext(AppSettingsProviderContext);
const [gptProviders, setGptProviders] = useState<any>(GPT_PROVIDERS);
const [ttsProviders, setTtsProviders] = useState<any>(TTS_PROVIDERS);
const db = useContext(DbProviderContext);
const refreshGptProviders = async () => {
let providers = GPT_PROVIDERS;
@@ -87,42 +91,82 @@ export const AISettingsProvider = ({
setTtsProviders({ ...providers });
};
const refreshWhisperModel = async () => {
const whisperModel = await EnjoyApp.userSettings.get(
UserSettingKeyEnum.WHISPER
);
if (WHISPER_MODELS.includes(whisperModel)) {
setWhisperModel(whisperModel);
} else {
let model = "tiny";
if (whisperModel.match(/tiny/)) {
model = "tiny";
} else if (whisperModel.match(/base/)) {
model = "base";
} else if (whisperModel.match(/small/)) {
model = "small";
} else if (whisperModel.match(/medium/)) {
model = "medium";
} else if (whisperModel.match(/large/)) {
model = "large-v3-turbo";
}
if (
learningLanguage.match(/en/) &&
model.match(/tiny|base|small|medium/)
) {
model = `${model}.en`;
}
setWhisperModel(model);
const refreshTtsConfig = async () => {
let config = await EnjoyApp.userSettings.get(UserSettingKeyEnum.TTS_CONFIG);
if (!config) {
config = {
engine: "enjoyai",
model: "openai/tts-1",
voice: "alloy",
language: learningLanguage,
};
EnjoyApp.userSettings.set(UserSettingKeyEnum.TTS_CONFIG, config);
}
setTtsConfig(config);
};
const handleSetWhisperModel = async (name: string) => {
if (WHISPER_MODELS.includes(name)) {
setWhisperModel(name);
EnjoyApp.userSettings.set(UserSettingKeyEnum.WHISPER, name);
const handleSetTtsConfig = async (config: TtsConfigType) => {
return EnjoyApp.userSettings
.set(UserSettingKeyEnum.TTS_CONFIG, config)
.then(() => {
setTtsConfig(config);
});
};
const refreshEchogardenSttConfig = async () => {
let config = await EnjoyApp.userSettings.get(UserSettingKeyEnum.ECHOGARDEN);
if (!config) {
let model = "tiny";
const whisperModel = await EnjoyApp.userSettings.get(
UserSettingKeyEnum.WHISPER
);
if (WHISPER_MODELS.includes(whisperModel)) {
model = whisperModel;
} else {
if (whisperModel.match(/tiny/)) {
model = "tiny";
} else if (whisperModel.match(/base/)) {
model = "base";
} else if (whisperModel.match(/small/)) {
model = "small";
} else if (whisperModel.match(/medium/)) {
model = "medium";
} else if (whisperModel.match(/large/)) {
model = "large-v3-turbo";
}
if (
learningLanguage.match(/en/) &&
model.match(/tiny|base|small|medium/)
) {
model = `${model}.en`;
}
}
config = {
engine: "whisper",
whisper: {
model,
temperature: 0.2,
prompt: "",
encoderProvider: "cpu",
decoderProvider: "cpu",
},
};
EnjoyApp.userSettings.set(UserSettingKeyEnum.ECHOGARDEN, config);
}
setEchogardenSttConfig(config);
};
const handleSetEchogardenSttConfig = async (
config: EchogardenSttConfigType
) => {
return EnjoyApp.userSettings
.set(UserSettingKeyEnum.ECHOGARDEN, config)
.then(() => {
setEchogardenSttConfig(config);
});
};
useEffect(() => {
@@ -190,7 +234,8 @@ export const AISettingsProvider = ({
});
}
refreshWhisperModel();
refreshEchogardenSttConfig();
refreshTtsConfig();
};
const handleSetOpenai = async (config: LlmProviderType) => {
@@ -218,26 +263,15 @@ export const AISettingsProvider = ({
key: user?.accessToken,
baseUrl: `${apiUrl}/api/ai`,
}),
currentTtsEngine:
gptEngine.name === "openai"
? {
name: "openai",
model: "tts-1",
voice: "alloy",
language: learningLanguage,
}
: {
name: "enjoyai",
model: "openai/tts-1",
voice: "alloy",
language: learningLanguage,
},
openai,
setOpenai: (config: LlmProviderType) => handleSetOpenai(config),
whisperModel,
setWhisperModel: handleSetWhisperModel,
echogardenSttConfig,
setEchogardenSttConfig: (config: EchogardenSttConfigType) =>
handleSetEchogardenSttConfig(config),
sttEngine,
setSttEngine: (name: SttEngineOptionEnum) => handleSetSttEngine(name),
ttsConfig,
setTtsConfig: (config: TtsConfigType) => handleSetTtsConfig(config),
gptProviders,
ttsProviders,
}}

View File

@@ -42,8 +42,8 @@ export const CopilotProvider = ({
const [active, setActive] = useState(false);
const [currentChat, setCurrentChat] = useState<ChatType>(null);
const [occupiedChat, setOccupiedChat] = useState<ChatType | null>(null);
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
const { sttEngine, currentGptEngine, currentTtsEngine } = useContext(
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { sttEngine, currentGptEngine, ttsConfig } = useContext(
AISettingsProviderContext
);
const { currentHotkeys } = useContext(HotKeysSettingsProviderContext);
@@ -100,10 +100,10 @@ export const CopilotProvider = ({
agent.type === ChatAgentTypeEnum.TTS
? {
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
...agent.config.tts,
},
}
@@ -114,10 +114,10 @@ export const CopilotProvider = ({
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
};
return {

View File

@@ -29,9 +29,7 @@ export const useChatSession = (chatId: string) => {
const { EnjoyApp, user, apiUrl, learningLanguage } = useContext(
AppSettingsProviderContext
);
const { currentGptEngine, currentTtsEngine } = useContext(
AISettingsProviderContext
);
const { currentGptEngine, ttsConfig } = useContext(AISettingsProviderContext);
const { openai } = useContext(AISettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [chatMessages, dispatchChatMessages] = useReducer(
@@ -386,10 +384,10 @@ export const useChatSession = (chatId: string) => {
agent.type === ChatAgentTypeEnum.TTS
? {
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
...agent.config.tts,
},
}
@@ -402,10 +400,10 @@ export const useChatSession = (chatId: string) => {
model: currentGptEngine.models.default,
},
tts: {
engine: currentTtsEngine.name,
model: currentTtsEngine.model,
voice: currentTtsEngine.voice,
language: learningLanguage,
engine: ttsConfig.engine,
model: ttsConfig.model,
voice: ttsConfig.voice,
language: ttsConfig.language,
},
};

View File

@@ -11,11 +11,11 @@ export const useSpeech = () => {
const { EnjoyApp, webApi, user, apiUrl, learningLanguage } = useContext(
AppSettingsProviderContext
);
const { openai, currentGptEngine } = useContext(AISettingsProviderContext);
const { openai, ttsConfig } = useContext(AISettingsProviderContext);
const tts = async (params: Partial<SpeechType>) => {
const { configuration } = params;
const { engine, model = "tts-1", voice } = configuration || {};
const { engine, model, voice } = configuration || ttsConfig;
let buffer;
if (model.match(/^(openai|tts-)/)) {
@@ -47,9 +47,9 @@ export const useSpeech = () => {
const openaiTTS = async (params: Partial<SpeechType>) => {
const { configuration } = params;
const {
engine = currentGptEngine.name,
model = "tts-1",
voice = "alloy",
engine = ttsConfig.engine,
model = ttsConfig.model,
voice = ttsConfig.voice,
baseUrl,
} = configuration || {};
@@ -85,8 +85,8 @@ export const useSpeech = () => {
const azureTTS = async (
params: Partial<SpeechType>
): Promise<ArrayBuffer> => {
const { configuration, text } = params;
const { model, voice } = configuration || {};
const { configuration = ttsConfig, text } = params;
const { model, voice } = configuration;
if (model !== "azure/speech") return;

View File

@@ -26,7 +26,7 @@ const punctuationsPattern = /\w[.,!?](\s|$)/g;
export const useTranscribe = () => {
const { EnjoyApp, user, webApi } = useContext(AppSettingsProviderContext);
const { openai, whisperModel } = useContext(AISettingsProviderContext);
const { openai, echogardenSttConfig } = useContext(AISettingsProviderContext);
const { punctuateText } = useAiCommand();
const [output, setOutput] = useState<string>("");
@@ -47,7 +47,6 @@ export const useTranscribe = () => {
params?: {
targetId?: string;
targetType?: string;
model?: string;
originalText?: string;
language: string;
service: SttEngineOptionEnum | "upload";
@@ -65,7 +64,6 @@ export const useTranscribe = () => {
}> => {
const url = await transcode(mediaSrc);
const {
model,
targetId,
targetType,
originalText,
@@ -81,7 +79,9 @@ export const useTranscribe = () => {
if (service === "upload" && originalText) {
result = await alignText(originalText);
} else if (service === SttEngineOptionEnum.LOCAL) {
result = await transcribeByLocal(url, { language, model });
result = await transcribeByLocal(url, {
language,
});
} else if (service === SttEngineOptionEnum.ENJOY_CLOUDFLARE) {
result = await transcribeByCloudflareAi(blob);
} else if (service === SttEngineOptionEnum.OPENAI) {
@@ -223,27 +223,28 @@ export const useTranscribe = () => {
const transcribeByLocal = async (
url: string,
options: { language: string; model?: string }
options: { language: string }
): Promise<{
engine: string;
model: string;
transcript: string;
segmentTimeline: TimelineEntry[];
}> => {
let { language, model = whisperModel } = options || {};
let { language } = options || {};
const languageCode = language.split("-")[0];
if (model.match(/en/) && languageCode !== "en") {
model = model.replace(".en", "");
}
let model: string;
let res: RecognitionResult;
try {
model =
echogardenSttConfig[
echogardenSttConfig.engine.replace(".cpp", "Cpp") as
| "whisper"
| "whisperCpp"
].model;
res = await EnjoyApp.echogarden.recognize(url, {
engine: "whisper",
language: languageCode,
whisper: {
model,
},
...echogardenSttConfig,
});
} catch (err) {
throw new Error(t("whisperTranscribeFailed", { error: err.message }));

View File

@@ -12,7 +12,9 @@ import { SttEngineOptionEnum } from "@/types/enums";
import { t } from "i18next";
export const useTranscriptions = (media: AudioType | VideoType) => {
const { sttEngine } = useContext(AISettingsProviderContext);
const { sttEngine, echogardenSttConfig } = useContext(
AISettingsProviderContext
);
const { EnjoyApp, learningLanguage, webApi } = useContext(
AppSettingsProviderContext
);
@@ -113,7 +115,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
const generateTranscription = async (params?: {
originalText?: string;
language?: string;
model?: string;
service?: SttEngineOptionEnum | "upload";
isolate?: boolean;
}) => {
@@ -121,7 +122,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
originalText,
language = learningLanguage,
service = sttEngine,
model: whisperModel,
isolate = false,
} = params || {};
setService(service);
@@ -144,7 +144,6 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
{
targetId: media.id,
targetType: media.mediaType,
model: whisperModel,
originalText,
language,
service,

View File

@@ -276,6 +276,7 @@ type EnjoyAppType = {
delete: (id: string) => Promise<void>;
};
echogarden: {
getPackagesDir: () => Promise<string>;
recognize: (
input: string,
options: RecognitionOptions

View File

@@ -8,9 +8,11 @@ export enum UserSettingKeyEnum {
HOTKEYS = "hotkeys",
GPT_ENGINE = "gpt_engine",
STT_ENGINE = "stt_engine",
TTS_CONFIG = "tts_config",
VOCABULARY = "vocabulary",
DICTS = "dicts",
RECORDER = "recorder",
ECHOGARDEN = "echogarden",
}
export enum SttEngineOptionEnum {

View File

@@ -260,3 +260,20 @@ type TranscribeResultType = {
tokenId?: number;
url: string;
};
type EchogardenSttConfigType = {
engine: "whisper" | "whisper.cpp";
whisper: {
model: string;
temperature?: number;
prompt?: string;
encoderProvider?: "cpu" | "dml" | "cuda";
decoderProvider?: "cpu" | "dml" | "cuda";
};
whisperCpp?: {
model: string;
temperature?: number;
prompt?: string;
enableGPU?: boolean;
};
};

View File

@@ -28,6 +28,7 @@ export default defineConfig((env) => {
"echogarden/dist/api/API.js",
"echogarden/dist/audio/AudioUtilities.js",
"echogarden/dist/utilities/Timeline.js",
"echogarden/dist/utilities/PackageManager.js",
],
output: {
strict: false,

181
yarn.lock
View File

@@ -33,7 +33,7 @@ __metadata:
autoprefixer: "npm:^10.4.20"
nuxt: "npm:^3.14.159"
nuxt-og-image: "npm:^3.0.8"
postcss: "npm:^8.4.47"
postcss: "npm:^8.4.48"
sass: "npm:^1.80.6"
tailwindcss: "npm:^3.4.14"
vue: "npm:^3.5.12"
@@ -354,7 +354,7 @@ __metadata:
languageName: node
linkType: hard
"@aws-sdk/client-polly@npm:^3.670.0":
"@aws-sdk/client-polly@npm:^3.687.0":
version: 3.687.0
resolution: "@aws-sdk/client-polly@npm:3.687.0"
dependencies:
@@ -613,7 +613,7 @@ __metadata:
languageName: node
linkType: hard
"@aws-sdk/client-transcribe-streaming@npm:^3.672.0":
"@aws-sdk/client-transcribe-streaming@npm:^3.687.0":
version: 3.687.0
resolution: "@aws-sdk/client-transcribe-streaming@npm:3.687.0"
dependencies:
@@ -1681,10 +1681,18 @@ __metadata:
languageName: node
linkType: hard
"@echogarden/espeak-ng-emscripten@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/espeak-ng-emscripten@npm:0.2.0"
checksum: 10c0/ec2669328e6f6629c5d416d5f15af5ff7de93fc01b44297022a982c0661c9df9cdcf3754f81c4ef77d6ca31fd84674c19a4fe94cc4f9bdc097315111cc157415
"@echogarden/audio-io@npm:^0.2.3":
version: 0.2.3
resolution: "@echogarden/audio-io@npm:0.2.3"
checksum: 10c0/b8b0de7f370d0115ece37272f5b012094c77de661c17407a667db1e65bbf6876bf832260234af17018fcaf92ef2480e8948e9481bdc2e8902d279b72669d99c6
conditions: (os=win32 | os=darwin | os=linux)
languageName: node
linkType: hard
"@echogarden/espeak-ng-emscripten@npm:^0.3.0":
version: 0.3.0
resolution: "@echogarden/espeak-ng-emscripten@npm:0.3.0"
checksum: 10c0/7163023b91394eda5ded0fd2e819a14944edd4888beee4c1b87f1095ec536802749636c8f69144391d2ebf171ed23c217a2aa44ac48959f71ee8391ccdc47880
languageName: node
linkType: hard
@@ -1702,59 +1710,59 @@ __metadata:
languageName: node
linkType: hard
"@echogarden/fvad-wasm@npm:^0.1.2":
version: 0.1.2
resolution: "@echogarden/fvad-wasm@npm:0.1.2"
checksum: 10c0/e5ebd0c8bddd19f26cb4862f86fffa842f941c969ffb7da726c936fc338e2cb317ef99039fe7c5472ac370af909618ad08139b8b95dce8b07993de432b86d56a
"@echogarden/fvad-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/fvad-wasm@npm:0.2.0"
checksum: 10c0/d5480abf5c555ffa7fbe3f75a18b3c5f9b0d5d64b0d31f92097269637e827f3253556b39e1d52bbabeae604c43c8f1479d1bfd798d7fee112cdb3a250124a093
languageName: node
linkType: hard
"@echogarden/kissfft-wasm@npm:^0.1.1":
version: 0.1.1
resolution: "@echogarden/kissfft-wasm@npm:0.1.1"
checksum: 10c0/3c034ebb6f64f1551ec70314ca373aec5c4907cc36f6efa9aa62d759840acbf471c50f697b71f910adcf5fab09cf3d918405a76e78d7ba94ffae9baf885c780e
"@echogarden/kissfft-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/kissfft-wasm@npm:0.2.0"
checksum: 10c0/f02a8c6101e50f7c01d50181295e8638bbb11d23e601b4dfda482cbac90f617a1116ea0177263429e3647b686052d0ab95ca56fb2a77e75ac76cccee9d4996a9
languageName: node
linkType: hard
"@echogarden/pffft-wasm@npm:^0.3.0":
version: 0.3.0
resolution: "@echogarden/pffft-wasm@npm:0.3.0"
checksum: 10c0/7425828ed23e1a1dbd77b940b0b141738337df21c0c71f8b9dbb21706b32832f70e61b0a4a441ce7bf3053433a52d48a1b905978ef01122fae0415bf31a83e46
"@echogarden/pffft-wasm@npm:^0.4.2":
version: 0.4.2
resolution: "@echogarden/pffft-wasm@npm:0.4.2"
checksum: 10c0/4e9ffc24195f5fa44f5623124f8815125af12c0c8934e7388b69f424418aab73e7e3acdf2c98894a60eed5509dccdb6844622f1232927ee6d51b2b7444beae4c
languageName: node
linkType: hard
"@echogarden/rnnoise-wasm@npm:^0.1.1":
version: 0.1.1
resolution: "@echogarden/rnnoise-wasm@npm:0.1.1"
checksum: 10c0/806e51f68836d10319b40bd82dd24b02acc340c6b60b4bcd9609d8f480cd233a76c14a40a135cabb45c530c0187afb61a1ad6b8709c50de4559ec2136d45097f
"@echogarden/rnnoise-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/rnnoise-wasm@npm:0.2.0"
checksum: 10c0/b38a3e66f377de5429f3be57140b3fcea55727ef516ed25e53a310261156e90cda44c5c53d83f3061edff063ac7954bab3f62b9dd94aed86bd67e59bedfbd3bc
languageName: node
linkType: hard
"@echogarden/rubberband-wasm@npm:^0.1.1":
version: 0.1.1
resolution: "@echogarden/rubberband-wasm@npm:0.1.1"
checksum: 10c0/4d254c511f65bfb425aaedebb59de174938bbbde5b8c64e758bc6b633e0a9764cbd6091e077e38d4e5bf8b462785b7ed769786e44c285543d0bd87f9b7ab6376
"@echogarden/rubberband-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/rubberband-wasm@npm:0.2.0"
checksum: 10c0/e37c947a241efdd489f42502184700cce2d01b7f3ceb74461c88364a7eb0407ac745cda459d8afe577f76dc77629047a3529237b561be4ca9dd246a8482f5474
languageName: node
linkType: hard
"@echogarden/sonic-wasm@npm:^0.1.1":
version: 0.1.1
resolution: "@echogarden/sonic-wasm@npm:0.1.1"
checksum: 10c0/74872334ee730e03d21191d2e38aba4516a4ebe49380f4d2baf0da62d7d23a89d08839d2096de8b0bac548199c285d895466e51e83d24b841c4f8f08a52a6594
"@echogarden/sonic-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/sonic-wasm@npm:0.2.0"
checksum: 10c0/936a042f0e262062f87c97afbfd7b8ce573416843198bc8be31cf45ea4827bb4bb1e47d91a5e0c0dc752afa8726395e85230c52f9c12b9f94f404e51a7792cf0
languageName: node
linkType: hard
"@echogarden/speex-resampler-wasm@npm:^0.1.1":
version: 0.1.1
resolution: "@echogarden/speex-resampler-wasm@npm:0.1.1"
checksum: 10c0/f210506b865a5ed42dba6bdeaa5360ee591424a4007bf1c2f0a845ae78cec4fe0068ccf78c895f0f5cbf63778b3cccde2d21dc01bf51e83dd1e7a7ca963c26d9
"@echogarden/speex-resampler-wasm@npm:^0.2.1":
version: 0.2.1
resolution: "@echogarden/speex-resampler-wasm@npm:0.2.1"
checksum: 10c0/ec7a1c49c7d4d2e056cb850271d4e368a25fcd0c89b2b8c1f206e6d401b51ead4ca1d67fec9272554948f7c58ac35c9f048eb3f52605a4ad28f93ee278dddf95
languageName: node
linkType: hard
"@echogarden/svoxpico-wasm@npm:^0.1.0":
version: 0.1.0
resolution: "@echogarden/svoxpico-wasm@npm:0.1.0"
checksum: 10c0/1a787402601146ab175359831feff3dde22ec17771303a75103d157b04a2a323172681306e2bf4edb7b2b8626db52683d28202779332e48be6df178b95772d32
"@echogarden/svoxpico-wasm@npm:^0.2.0":
version: 0.2.0
resolution: "@echogarden/svoxpico-wasm@npm:0.2.0"
checksum: 10c0/142454b6d5c065e3c68beffa9122d382e8a0be0fb2ca983f1abb8249ba2b5f479813665a54e4069b98a3341e230e952b1a84cb8dd478f331716699866a55edda
languageName: node
linkType: hard
@@ -9284,22 +9292,6 @@ __metadata:
languageName: node
linkType: hard
"buffer-indexof@npm:~0.0.0":
version: 0.0.2
resolution: "buffer-indexof@npm:0.0.2"
checksum: 10c0/495124d3eacdfae2da6f815881e1c837459e48144ec46c246199ca3efc7052e767822784fd5399ca65a4043ba364b3a38b6b06dc6df44aab1aedea17544fc9fc
languageName: node
linkType: hard
"buffer-split@npm:^1.0.0":
version: 1.0.0
resolution: "buffer-split@npm:1.0.0"
dependencies:
buffer-indexof: "npm:~0.0.0"
checksum: 10c0/59280260d41c3871d227cd605343fc48a3ae3b3a7b799f3fec9a94e7275e040bdffcd84c3ae4a8e68de7f8232dee495f8d1c410b1466fc4be265d2d3410c8f75
languageName: node
linkType: hard
"buffer-to-arraybuffer@npm:0.0.6":
version: 0.0.6
resolution: "buffer-to-arraybuffer@npm:0.0.6"
@@ -11600,27 +11592,27 @@ __metadata:
languageName: node
linkType: hard
"echogarden@npm:^1.8.7":
version: 1.8.7
resolution: "echogarden@npm:1.8.7"
"echogarden@npm:^2.0.0":
version: 2.0.0
resolution: "echogarden@npm:2.0.0"
dependencies:
"@aws-sdk/client-polly": "npm:^3.670.0"
"@aws-sdk/client-transcribe-streaming": "npm:^3.672.0"
"@echogarden/espeak-ng-emscripten": "npm:^0.2.0"
"@aws-sdk/client-polly": "npm:^3.687.0"
"@aws-sdk/client-transcribe-streaming": "npm:^3.687.0"
"@echogarden/audio-io": "npm:^0.2.3"
"@echogarden/espeak-ng-emscripten": "npm:^0.3.0"
"@echogarden/fasttext-wasm": "npm:^0.1.0"
"@echogarden/flite-wasi": "npm:^0.1.1"
"@echogarden/fvad-wasm": "npm:^0.1.2"
"@echogarden/kissfft-wasm": "npm:^0.1.1"
"@echogarden/pffft-wasm": "npm:^0.3.0"
"@echogarden/rnnoise-wasm": "npm:^0.1.1"
"@echogarden/rubberband-wasm": "npm:^0.1.1"
"@echogarden/sonic-wasm": "npm:^0.1.1"
"@echogarden/speex-resampler-wasm": "npm:^0.1.1"
"@echogarden/svoxpico-wasm": "npm:^0.1.0"
"@echogarden/fvad-wasm": "npm:^0.2.0"
"@echogarden/kissfft-wasm": "npm:^0.2.0"
"@echogarden/pffft-wasm": "npm:^0.4.2"
"@echogarden/rnnoise-wasm": "npm:^0.2.0"
"@echogarden/rubberband-wasm": "npm:^0.2.0"
"@echogarden/sonic-wasm": "npm:^0.2.0"
"@echogarden/speex-resampler-wasm": "npm:^0.2.1"
"@echogarden/svoxpico-wasm": "npm:^0.2.0"
"@echogarden/transformers-nodejs-lite": "npm:^2.17.1-lite.3"
"@mozilla/readability": "npm:^0.5.0"
alawmulaw: "npm:^6.0.0"
buffer-split: "npm:^1.0.0"
chalk: "npm:^5.3.0"
cldr-segmentation: "npm:^2.2.1"
command-exists: "npm:^1.2.9"
@@ -11638,8 +11630,8 @@ __metadata:
microsoft-cognitiveservices-speech-sdk: "npm:^1.41.0"
moving-median: "npm:^1.0.0"
msgpack-lite: "npm:^0.1.26"
onnxruntime-node: "npm:^1.19.2"
openai: "npm:^4.67.3"
onnxruntime-node: "npm:^1.20.0"
openai: "npm:^4.71.1"
sam-js: "npm:^0.3.1"
strip-ansi: "npm:^7.1.0"
tar: "npm:^7.4.3"
@@ -12069,7 +12061,7 @@ __metadata:
dayjs: "npm:^1.11.13"
decamelize: "npm:^6.0.0"
decamelize-keys: "npm:^2.0.1"
echogarden: "npm:^1.8.7"
echogarden: "npm:^2.0.0"
electron: "npm:^33.2.0"
electron-context-menu: "npm:^4.0.4"
electron-devtools-installer: "npm:^3.2.0"
@@ -12094,7 +12086,7 @@ __metadata:
langchain: "npm:^0.3.5"
lodash: "npm:^4.17.21"
lru-cache: "npm:^11.0.2"
lucide-react: "npm:^0.455.0"
lucide-react: "npm:^0.456.0"
mark.js: "npm:^8.11.1"
microsoft-cognitiveservices-speech-sdk: "npm:^1.41.0"
mime-types: "npm:^2.1.35"
@@ -12103,7 +12095,7 @@ __metadata:
octokit: "npm:^4.0.2"
openai: "npm:^4.71.1"
pitchfinder: "npm:^2.3.2"
postcss: "npm:^8.4.47"
postcss: "npm:^8.4.48"
progress: "npm:^2.0.3"
prop-types: "npm:^15.8.1"
proxy-agent: "npm:^6.4.0"
@@ -12113,7 +12105,7 @@ __metadata:
react-audio-voice-recorder: "npm:^2.2.0"
react-dom: "npm:^18.3.1"
react-frame-component: "npm:^5.2.7"
react-hook-form: "npm:^7.53.1"
react-hook-form: "npm:^7.53.2"
react-hotkeys-hook: "npm:^4.6.1"
react-i18next: "npm:^15.1.1"
react-markdown: "npm:^9.0.1"
@@ -12146,7 +12138,7 @@ __metadata:
wavesurfer.js: "npm:^7.8.8"
zod: "npm:^3.23.8"
zod-to-json-schema: "npm:^3.23.5"
zx: "npm:^8.2.0"
zx: "npm:^8.2.1"
languageName: unknown
linkType: soft
@@ -16209,12 +16201,12 @@ __metadata:
languageName: node
linkType: hard
"lucide-react@npm:^0.455.0":
version: 0.455.0
resolution: "lucide-react@npm:0.455.0"
"lucide-react@npm:^0.456.0":
version: 0.456.0
resolution: "lucide-react@npm:0.456.0"
peerDependencies:
react: ^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc
checksum: 10c0/879f51dc0143c36452022ee852cdc9c8967d644f4102ba5311d55a6fe63a0c9a5963069e56b4d034579e9e502e032736009c592e3706067cd0cf9ca2556e07f9
checksum: 10c0/a56f6922381ff529f1df93c3fca47766682a8e427ec266fd1f4bcf063f65f93460f9e7982abf79d79e5f7bb50b8013d0493a26f78677de9b1c06b6b5a6343d56
languageName: node
linkType: hard
@@ -18510,7 +18502,7 @@ __metadata:
languageName: node
linkType: hard
"onnxruntime-node@npm:^1.19.2":
"onnxruntime-node@npm:^1.20.0":
version: 1.20.0
resolution: "onnxruntime-node@npm:1.20.0"
dependencies:
@@ -18544,7 +18536,7 @@ __metadata:
languageName: node
linkType: hard
"openai@npm:^4.67.3, openai@npm:^4.71.0, openai@npm:^4.71.1":
"openai@npm:^4.71.0, openai@npm:^4.71.1":
version: 4.71.1
resolution: "openai@npm:4.71.1"
dependencies:
@@ -19172,7 +19164,7 @@ __metadata:
languageName: node
linkType: hard
"picocolors@npm:^1.0.0, picocolors@npm:^1.0.1, picocolors@npm:^1.1.0":
"picocolors@npm:^1.0.0, picocolors@npm:^1.0.1, picocolors@npm:^1.1.0, picocolors@npm:^1.1.1":
version: 1.1.1
resolution: "picocolors@npm:1.1.1"
checksum: 10c0/e2e3e8170ab9d7c7421969adaa7e1b31434f789afb9b3f115f6b96d91945041ac3ceb02e9ec6fe6510ff036bcc0bf91e69a1772edc0b707e12b19c0f2d6bcf58
@@ -19712,6 +19704,17 @@ __metadata:
languageName: node
linkType: hard
"postcss@npm:^8.4.48":
version: 8.4.48
resolution: "postcss@npm:8.4.48"
dependencies:
nanoid: "npm:^3.3.7"
picocolors: "npm:^1.1.1"
source-map-js: "npm:^1.2.1"
checksum: 10c0/d586361fda12fc7ab5650ce9b5763fc61d6ea2cecac9da98fceea6a3f27e42ed34db830582411bc06743492d9bb414c52b0c81da65440682d244d692da2f928a
languageName: node
linkType: hard
"postject@npm:^1.0.0-alpha.6":
version: 1.0.0-alpha.6
resolution: "postject@npm:1.0.0-alpha.6"
@@ -20094,7 +20097,7 @@ __metadata:
languageName: node
linkType: hard
"react-hook-form@npm:^7.53.1":
"react-hook-form@npm:^7.53.2":
version: 7.53.2
resolution: "react-hook-form@npm:7.53.2"
peerDependencies:
@@ -24434,9 +24437,9 @@ __metadata:
languageName: node
linkType: hard
"zx@npm:^8.2.0":
version: 8.2.0
resolution: "zx@npm:8.2.0"
"zx@npm:^8.2.1":
version: 8.2.1
resolution: "zx@npm:8.2.1"
dependencies:
"@types/fs-extra": "npm:>=11"
"@types/node": "npm:>=20"
@@ -24447,6 +24450,6 @@ __metadata:
optional: true
bin:
zx: build/cli.js
checksum: 10c0/67baf00280343259f04b2bf58b2dc7c90abc7b42f3b4ca2794ea59bf988c53707c08c8427dbf22e88606c321a08254bec0b27400840029f4086ba3c43b8056a8
checksum: 10c0/39aac596a031eb149d91c54359ab74969e5135ce3de401dabcae06b16516fdf28aa97c01b5c2509e46a5bcf9d3d941f349e5ac2df861f67aa6755df84e629bc5
languageName: node
linkType: hard