Feat: support azure tts (#651)

* upgrade deps

* update tts providers

* refactor conversation form

* update tts providers config

* add azure tts api

* fix azure sdk usage

* consume/revoke token

* upgrade deps
This commit is contained in:
an-lee
2024-06-04 10:09:04 +08:00
committed by GitHub
parent 6d687b2d53
commit a3208d876f
12 changed files with 3093 additions and 3406 deletions

View File

@@ -8,7 +8,7 @@
"markdown-it-sub": "^2.0.0",
"markdown-it-sup": "^2.0.0",
"mermaid": "^10.9.1",
"sass": "^1.77.2",
"sass": "^1.77.4",
"vitepress": "^1.2.2",
"vitepress-plugin-mermaid": "^2.0.16",
"vue": "^3.4.27"

View File

@@ -15,7 +15,7 @@
"make": "rimraf .vite && yarn run download && electron-forge make",
"publish": "rimraf .vite && yarn run download && electron-forge publish",
"lint": "eslint --ext .ts,.tsx .",
"test": "yarn run package && playwright test",
"test": "yarn run package && yarn run playwright test",
"test:main": "yarn run playwright test e2e/main.spec.ts",
"test:renderer": "yarn run playwright test e2e/renderer.spec.ts",
"create-migration": "zx ./src/main/db/create-migration.mjs",
@@ -51,18 +51,18 @@
"@types/intl-tel-input": "^18.1.4",
"@types/lodash": "^4.17.4",
"@types/mark.js": "^8.11.12",
"@types/node": "^20.12.12",
"@types/node": "^20.14.1",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"@types/validator": "^13.11.10",
"@types/wavesurfer.js": "^6.0.12",
"@typescript-eslint/eslint-plugin": "^7.10.0",
"@typescript-eslint/parser": "^7.10.0",
"@typescript-eslint/eslint-plugin": "^7.12.0",
"@typescript-eslint/parser": "^7.12.0",
"@vitejs/plugin-react": "^4.3.0",
"autoprefixer": "^10.4.19",
"electron": "^30.0.8",
"electron": "^30.0.9",
"electron-playwright-helpers": "^1.7.1",
"eslint": "^9.3.0",
"eslint": "^9.4.0",
"eslint-import-resolver-typescript": "^3.6.1",
"eslint-plugin-import": "^2.29.1",
"flora-colossus": "^2.0.0",
@@ -75,15 +75,15 @@
"ts-node": "^10.9.2",
"tslib": "^2.6.2",
"typescript": "^5.4.5",
"vite": "^5.2.11",
"vite": "^5.2.12",
"vite-plugin-static-copy": "^1.0.5",
"zx": "^8.1.1"
"zx": "^8.1.2"
},
"dependencies": {
"@andrkrn/ffprobe-static": "^5.2.0",
"@electron-forge/publisher-s3": "^7.4.0",
"@hookform/resolvers": "^3.4.2",
"@langchain/community": "^0.2.3",
"@langchain/community": "^0.2.5",
"@langchain/google-genai": "^0.0.16",
"@mozilla/readability": "^0.5.0",
"@radix-ui/react-accordion": "^1.1.2",
@@ -142,13 +142,13 @@
"i18next": "^23.11.5",
"intl-tel-input": "^23.0.10",
"js-md5": "^0.8.3",
"langchain": "^0.2.2",
"langchain": "^0.2.4",
"lodash": "^4.17.21",
"lucide-react": "^0.379.0",
"lucide-react": "^0.383.0",
"mark.js": "^8.11.1",
"microsoft-cognitiveservices-speech-sdk": "^1.36.0",
"next-themes": "^0.3.0",
"openai": "^4.47.1",
"openai": "^4.47.3",
"pitchfinder": "^2.3.2",
"postcss": "^8.4.38",
"proxy-agent": "^6.4.0",

View File

@@ -283,12 +283,26 @@ export class Client {
}
generateSpeechToken(params?: {
purpose?: string;
targetType?: string;
targetId?: string;
}): Promise<{ token: string; region: string }> {
input?: string;
}): Promise<{ id: number; token: string; region: string }> {
return this.api.post("/api/speech/tokens", decamelizeKeys(params || {}));
}
consumeSpeechToken(id: number) {
return this.api.put(`/api/speech/tokens/${id}`, {
state: "consumed",
});
}
revokeSpeechToken(id: number) {
return this.api.put(`/api/speech/tokens/${id}`, {
state: "revoked",
});
}
syncPronunciationAssessment(
pronunciationAssessment: Partial<PronunciationAssessmentType>
) {

View File

@@ -1,777 +0,0 @@
import * as z from "zod";
import { t } from "i18next";
import { useForm } from "react-hook-form";
import { zodResolver } from "@hookform/resolvers/zod";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
AlertDialogTrigger,
Button,
FormField,
Form,
FormItem,
FormLabel,
FormControl,
FormDescription,
FormMessage,
Input,
ScrollArea,
Select,
SelectTrigger,
SelectValue,
SelectContent,
SelectItem,
Textarea,
toast,
} from "@renderer/components/ui";
import { useState, useEffect, useContext } from "react";
import {
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import { LoaderIcon } from "lucide-react";
import { useNavigate } from "react-router-dom";
import {
GPT_PROVIDERS,
TTS_PROVIDERS,
GPTShareButton,
} from "@renderer/components";
const conversationFormSchema = z.object({
name: z.string().optional(),
engine: z
.enum(["enjoyai", "openai", "ollama", "googleGenerativeAi"])
.default("openai"),
configuration: z.object({
type: z.enum(["gpt", "tts"]),
model: z.string().optional(),
baseUrl: z.string().optional(),
roleDefinition: z.string().optional(),
temperature: z.number().min(0).max(1).default(0.2),
numberOfChoices: z.number().min(1).default(1),
maxTokens: z.number().min(-1).default(2000),
presencePenalty: z.number().min(-2).max(2).default(0),
frequencyPenalty: z.number().min(-2).max(2).default(0),
historyBufferSize: z.number().min(0).default(10),
tts: z.object({
engine: z.enum(["openai", "enjoyai"]).default("enjoyai"),
model: z.string().default("tts-1"),
voice: z.string(),
baseUrl: z.string().optional(),
}),
}),
});
export const ConversationForm = (props: {
conversation: Partial<ConversationType>;
onFinish?: () => void;
}) => {
const { conversation, onFinish } = props;
const [submitting, setSubmitting] = useState<boolean>(false);
const [gptProviders, setGptProviders] = useState<any>(GPT_PROVIDERS);
const [ttsProviders, setTtsProviders] = useState<any>(TTS_PROVIDERS);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const { openai } = useContext(AISettingsProviderContext);
const navigate = useNavigate();
const refreshGptProviders = async () => {
let providers = GPT_PROVIDERS;
try {
const config = await webApi.config("gpt_providers");
providers = Object.assign(providers, config);
} catch (e) {
console.warn(`Failed to fetch remote GPT config: ${e.message}`);
}
try {
const response = await fetch(providers["ollama"]?.baseUrl + "/api/tags");
providers["ollama"].models = (await response.json()).models.map(
(m: any) => m.name
);
} catch (e) {
console.warn(`No ollama server found: ${e.message}`);
}
setGptProviders({ ...providers });
};
const destroyConversation = async () => {
if (!conversation.id) return;
EnjoyApp.conversations.destroy(conversation.id).then(() => {
navigate(`/conversations`);
});
};
const refreshTtsProviders = async () => {
let providers = TTS_PROVIDERS;
try {
const config = await webApi.config("tts_providers");
providers = Object.assign(providers, config);
} catch (e) {
console.warn(`Failed to fetch remote TTS config: ${e.message}`);
}
setTtsProviders({ ...providers });
};
useEffect(() => {
refreshGptProviders();
refreshTtsProviders();
}, []);
const defaultConfig = JSON.parse(JSON.stringify(conversation || {}));
if (defaultConfig.engine === "openai" && openai) {
if (!defaultConfig.configuration) {
defaultConfig.configuration = {};
}
if (!defaultConfig.configuration.model) {
defaultConfig.configuration.model = openai.model;
}
if (!defaultConfig.configuration.baseUrl) {
defaultConfig.configuration.baseUrl = openai.baseUrl;
}
}
if (defaultConfig.configuration.tts?.engine === "openai" && openai) {
if (!defaultConfig.configuration.tts?.baseUrl) {
defaultConfig.configuration.tts.baseUrl = openai.baseUrl;
}
}
const form = useForm<z.infer<typeof conversationFormSchema>>({
resolver: zodResolver(conversationFormSchema),
// @ts-ignore
values: conversation?.id
? {
name: conversation.name,
engine: conversation.engine,
configuration: {
type: conversation.configuration.type || "gpt",
...conversation.configuration,
},
}
: {
name: defaultConfig.name,
engine: defaultConfig.engine,
configuration: {
...defaultConfig.configuration,
},
},
});
const onSubmit = async (data: z.infer<typeof conversationFormSchema>) => {
let { name, engine, configuration } = data;
setSubmitting(true);
try {
configuration = validateConfiguration(data);
} catch (e) {
toast.error(e.message);
setSubmitting(false);
return;
}
if (conversation?.id) {
EnjoyApp.conversations
.update(conversation.id, {
name,
configuration,
})
.then(() => {
onFinish && onFinish();
})
.finally(() => {
setSubmitting(false);
});
} else {
EnjoyApp.conversations
.create({
name,
engine,
configuration,
})
.then(() => {
onFinish && onFinish();
})
.finally(() => {
setSubmitting(false);
});
}
};
const validateConfiguration = (
data: z.infer<typeof conversationFormSchema>
) => {
const { engine, configuration } = data;
Object.keys(configuration).forEach((key) => {
if (key === "type") return;
if (
configuration.type === "gpt" &&
!gptProviders[engine]?.configurable.includes(key)
) {
// @ts-ignore
delete configuration[key];
}
if (
configuration.type === "tts" &&
!ttsProviders[engine]?.configurable.includes(key)
) {
// @ts-ignore
delete configuration.tts[key];
}
});
// use default base url if not set
if (!configuration.baseUrl) {
configuration.baseUrl = gptProviders[engine]?.baseUrl;
}
// use default base url if not set
if (!configuration?.tts?.baseUrl) {
configuration.tts ||= {};
configuration.tts.baseUrl = gptProviders[engine]?.baseUrl;
}
return configuration;
};
return (
<Form {...form}>
<form
onSubmit={form.handleSubmit(onSubmit)}
className="h-full flex flex-col pt-6"
data-testid="conversation-form"
>
<div className="mb-4 px-6 flex items-center space-x-4">
<div className="text-lg font-bold">
{conversation.id ? t("editConversation") : t("startConversation")}
</div>
<GPTShareButton conversation={conversation} />
</div>
<ScrollArea className="flex-1 px-4">
<div className="space-y-4 px-2 mb-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.name")}</FormLabel>
<Input value={field.value} onChange={field.onChange} />
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.type"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.type")}</FormLabel>
<Select
disabled={Boolean(conversation?.id)}
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiType")} />
</SelectTrigger>
</FormControl>
<SelectContent>
<SelectItem key="gpt" value="gpt">
GPT
</SelectItem>
<SelectItem key="tts" value="tts">
TTS
</SelectItem>
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
{form.watch("configuration.type") === "gpt" && (
<>
<FormField
control={form.control}
name="engine"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.engine")}</FormLabel>
<Select
disabled={Boolean(conversation?.id)}
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiEngine")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{Object.keys(gptProviders).map((key) => (
<SelectItem key={key} value={key}>
{gptProviders[key].name}
</SelectItem>
))}
</SelectContent>
</Select>
<FormDescription>
{gptProviders[form.watch("engine")]?.description}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.model"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.model")}</FormLabel>
<Select
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiModel")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(
gptProviders[form.watch("engine")]?.models || []
).map((option: string) => (
<SelectItem key={option} value={option}>
{option}
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.roleDefinition"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.roleDefinition")}
</FormLabel>
<Textarea
placeholder={t(
"models.conversation.roleDefinitionPlaceholder"
)}
className="h-64"
{...field}
/>
<FormMessage />
</FormItem>
)}
/>
{gptProviders[form.watch("engine")]?.configurable.includes(
"temperature"
) && (
<FormField
control={form.control}
name="configuration.temperature"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.temperature")}
</FormLabel>
<Input
type="number"
min="0"
max="1.0"
step="0.1"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value
? parseFloat(event.target.value)
: 0.0
);
}}
/>
<FormDescription>
{t("models.conversation.temperatureDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"maxTokens"
) && (
<FormField
control={form.control}
name="configuration.maxTokens"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.maxTokens")}
</FormLabel>
<Input
type="number"
min="0"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.maxTokensDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"presencePenalty"
) && (
<FormField
control={form.control}
name="configuration.presencePenalty"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.presencePenalty")}
</FormLabel>
<Input
type="number"
min="-2"
step="0.1"
max="2"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.presencePenaltyDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"frequencyPenalty"
) && (
<FormField
control={form.control}
name="configuration.frequencyPenalty"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.frequencyPenalty")}
</FormLabel>
<Input
type="number"
min="-2"
step="0.1"
max="2"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.frequencyPenaltyDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"numberOfChoices"
) && (
<FormField
control={form.control}
name="configuration.numberOfChoices"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.numberOfChoices")}
</FormLabel>
<Input
type="number"
min="1"
step="1.0"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value
? parseInt(event.target.value)
: 1.0
);
}}
/>
<FormDescription>
{t("models.conversation.numberOfChoicesDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
<FormField
control={form.control}
name="configuration.historyBufferSize"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.historyBufferSize")}
</FormLabel>
<Input
type="number"
min="0"
step="1"
max="100"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value
? parseInt(event.target.value)
: 0
);
}}
/>
<FormDescription>
{t("models.conversation.historyBufferSizeDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
{gptProviders[form.watch("engine")]?.configurable.includes(
"baseUrl"
) && (
<FormField
control={form.control}
name="configuration.baseUrl"
render={({ field }) => (
<FormItem>
<FormLabel>
{t("models.conversation.baseUrl")}
</FormLabel>
<Input
{...field}
placeholder={t(
"models.conversation.baseUrlDescription"
)}
/>
<FormMessage />
</FormItem>
)}
/>
)}
</>
)}
<FormField
control={form.control}
name="configuration.tts.engine"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsEngine")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsEngine")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{Object.keys(ttsProviders).map((key) => (
<SelectItem key={key} value={key}>
{ttsProviders[key].name}
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable?.includes("model") && (
<FormField
control={form.control}
name="configuration.tts.model"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsModel")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsModel")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(
ttsProviders[form.watch("configuration.tts.engine")]
?.models || []
).map((model: string) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
)}
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable?.includes("voice") && (
<FormField
control={form.control}
name="configuration.tts.voice"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsVoice")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsVoice")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(
ttsProviders[form.watch("configuration.tts.engine")]
?.voices || []
).map((voice: string) => (
<SelectItem key={voice} value={voice}>
<span className="capitalize">{voice}</span>
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
)}
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable.includes("baseUrl") && (
<FormField
control={form.control}
name="configuration.tts.baseUrl"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsBaseUrl")}</FormLabel>
<Input
{...field}
placeholder={t(
"models.conversation.ttsBaseUrlDescription"
)}
/>
<FormMessage />
</FormItem>
)}
/>
)}
</div>
</ScrollArea>
<div className="flex justify-center space-x-4 py-6 px-6 border-t shadow">
{conversation.id && (
<AlertDialog>
<AlertDialogTrigger asChild>
<Button
className="w-full h-12 text-destructive"
size="lg"
variant="secondary"
>
{t("delete")}
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("deleteConversation")}</AlertDialogTitle>
</AlertDialogHeader>
<AlertDialogDescription>
{t("deleteConversationConfirmation")}
</AlertDialogDescription>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction
className="bg-destructive hover:bg-destructive-hover"
onClick={destroyConversation}
>
{t("delete")}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
)}
<Button
disabled={
submitting || (conversation.id && !form.formState.isDirty)
}
className="w-full h-12"
data-testid="conversation-form-submit"
size="lg"
type="submit"
>
{submitting && <LoaderIcon className="mr-2 animate-spin" />}
{t("confirm")}
</Button>
</div>
</form>
</Form>
);
};

View File

@@ -0,0 +1,291 @@
import { t } from "i18next";
import { useForm } from "react-hook-form";
import {
FormField,
FormItem,
FormLabel,
FormControl,
FormDescription,
FormMessage,
Input,
Select,
SelectTrigger,
SelectValue,
SelectContent,
SelectItem,
Textarea,
} from "@renderer/components/ui";
export const ConversationFormGPT = (props: {
conversation: Partial<ConversationType>;
form: ReturnType<typeof useForm>;
gptProviders: any;
}) => {
const { form, gptProviders, conversation } = props;
return (
<>
<FormField
control={form.control}
name="engine"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.engine")}</FormLabel>
<Select
disabled={Boolean(conversation?.id)}
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiEngine")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{Object.keys(gptProviders).map((key) => (
<SelectItem key={key} value={key}>
{gptProviders[key].name}
</SelectItem>
))}
</SelectContent>
</Select>
<FormDescription>
{gptProviders[form.watch("engine")]?.description}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.model"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.model")}</FormLabel>
<Select onValueChange={field.onChange} value={field.value}>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiModel")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(gptProviders[form.watch("engine")]?.models || []).map(
(option: string) => (
<SelectItem key={option} value={option}>
{option}
</SelectItem>
)
)}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.roleDefinition"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.roleDefinition")}</FormLabel>
<Textarea
placeholder={t("models.conversation.roleDefinitionPlaceholder")}
className="h-64"
{...field}
/>
<FormMessage />
</FormItem>
)}
/>
{gptProviders[form.watch("engine")]?.configurable.includes(
"temperature"
) && (
<FormField
control={form.control}
name="configuration.temperature"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.temperature")}</FormLabel>
<Input
type="number"
min="0"
max="1.0"
step="0.1"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value ? parseFloat(event.target.value) : 0.0
);
}}
/>
<FormDescription>
{t("models.conversation.temperatureDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"maxTokens"
) && (
<FormField
control={form.control}
name="configuration.maxTokens"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.maxTokens")}</FormLabel>
<Input
type="number"
min="0"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.maxTokensDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"presencePenalty"
) && (
<FormField
control={form.control}
name="configuration.presencePenalty"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.presencePenalty")}</FormLabel>
<Input
type="number"
min="-2"
step="0.1"
max="2"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.presencePenaltyDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"frequencyPenalty"
) && (
<FormField
control={form.control}
name="configuration.frequencyPenalty"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.frequencyPenalty")}</FormLabel>
<Input
type="number"
min="-2"
step="0.1"
max="2"
value={field.value}
onChange={(event) => {
if (!event.target.value) return;
field.onChange(parseInt(event.target.value));
}}
/>
<FormDescription>
{t("models.conversation.frequencyPenaltyDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
{gptProviders[form.watch("engine")]?.configurable.includes(
"numberOfChoices"
) && (
<FormField
control={form.control}
name="configuration.numberOfChoices"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.numberOfChoices")}</FormLabel>
<Input
type="number"
min="1"
step="1.0"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value ? parseInt(event.target.value) : 1.0
);
}}
/>
<FormDescription>
{t("models.conversation.numberOfChoicesDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
)}
<FormField
control={form.control}
name="configuration.historyBufferSize"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.historyBufferSize")}</FormLabel>
<Input
type="number"
min="0"
step="1"
max="100"
value={field.value}
onChange={(event) => {
field.onChange(
event.target.value ? parseInt(event.target.value) : 0
);
}}
/>
<FormDescription>
{t("models.conversation.historyBufferSizeDescription")}
</FormDescription>
<FormMessage />
</FormItem>
)}
/>
{gptProviders[form.watch("engine")]?.configurable.includes("baseUrl") && (
<FormField
control={form.control}
name="configuration.baseUrl"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.baseUrl")}</FormLabel>
<Input
{...field}
placeholder={t("models.conversation.baseUrlDescription")}
/>
<FormMessage />
</FormItem>
)}
/>
)}
</>
);
};

View File

@@ -0,0 +1,163 @@
import { t } from "i18next";
import { useForm } from "react-hook-form";
import {
FormField,
FormItem,
FormLabel,
FormControl,
FormMessage,
Input,
Select,
SelectTrigger,
SelectValue,
SelectContent,
SelectItem,
} from "@renderer/components/ui";
import { useContext } from "react";
import { AppSettingsProviderContext } from "@renderer/context";
export const ConversationFormTTS = (props: {
form: ReturnType<typeof useForm>;
ttsProviders: any;
}) => {
const { form, ttsProviders } = props;
const { learningLanguage } = useContext(AppSettingsProviderContext);
return (
<>
<FormField
control={form.control}
name="configuration.tts.engine"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsEngine")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsEngine")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{Object.keys(ttsProviders).map((key) => (
<SelectItem key={key} value={key}>
{ttsProviders[key].name}
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable?.includes("model") && (
<FormField
control={form.control}
name="configuration.tts.model"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsModel")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsModel")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(
ttsProviders[form.watch("configuration.tts.engine")]
?.models || []
).map((model: string) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
))}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
)}
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable?.includes("voice") && (
<FormField
control={form.control}
name="configuration.tts.voice"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsVoice")}</FormLabel>
<Select
onValueChange={field.onChange}
defaultValue={field.value}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectTtsVoice")} />
</SelectTrigger>
</FormControl>
<SelectContent>
{(
(form.watch("configuration.tts.engine") === "enjoyai"
? ttsProviders.enjoyai.voices[
form.watch("configuration.tts.model").split("/")[0]
]
: ttsProviders[form.watch("configuration.tts.engine")]
.voices) || []
).map((voice: any) => {
if (typeof voice === "string") {
return (
<SelectItem key={voice} value={voice}>
<span className="capitalize">{voice}</span>
</SelectItem>
);
} else if (voice.language === learningLanguage) {
return (
<SelectItem key={voice.value} value={voice.value}>
<span className="capitalize">{voice.label}</span>
</SelectItem>
);
}
})}
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
)}
{ttsProviders[
form.watch("configuration.tts.engine")
]?.configurable.includes("baseUrl") && (
<FormField
control={form.control}
name="configuration.tts.baseUrl"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.ttsBaseUrl")}</FormLabel>
<Input
{...field}
placeholder={t("models.conversation.ttsBaseUrlDescription")}
/>
<FormMessage />
</FormItem>
)}
/>
)}
</>
);
};

View File

@@ -0,0 +1,369 @@
import * as z from "zod";
import { t } from "i18next";
import { useForm } from "react-hook-form";
import { zodResolver } from "@hookform/resolvers/zod";
import {
AlertDialog,
AlertDialogAction,
AlertDialogCancel,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
AlertDialogTrigger,
Button,
FormField,
Form,
FormItem,
FormLabel,
FormControl,
FormMessage,
Input,
ScrollArea,
Select,
SelectTrigger,
SelectValue,
SelectContent,
SelectItem,
toast,
} from "@renderer/components/ui";
import { useState, useEffect, useContext } from "react";
import {
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import { LoaderIcon } from "lucide-react";
import { useNavigate } from "react-router-dom";
import {
GPT_PROVIDERS,
TTS_PROVIDERS,
GPTShareButton,
ConversationFormGPT,
ConversationFormTTS,
} from "@renderer/components";
const conversationFormSchema = z.object({
name: z.string().optional(),
engine: z
.enum(["enjoyai", "openai", "ollama", "googleGenerativeAi"])
.default("openai"),
configuration: z.object({
type: z.enum(["gpt", "tts"]),
model: z.string().optional(),
baseUrl: z.string().optional(),
roleDefinition: z.string().optional(),
temperature: z.number().min(0).max(1).default(0.2),
numberOfChoices: z.number().min(1).default(1),
maxTokens: z.number().min(-1).default(2000),
presencePenalty: z.number().min(-2).max(2).default(0),
frequencyPenalty: z.number().min(-2).max(2).default(0),
historyBufferSize: z.number().min(0).default(10),
tts: z.object({
engine: z.enum(["openai", "enjoyai"]).default("enjoyai"),
model: z.string().default("openai/tts-1"),
voice: z.string(),
baseUrl: z.string().optional(),
}),
}),
});
export const ConversationForm = (props: {
conversation: Partial<ConversationType>;
onFinish?: () => void;
}) => {
const { conversation, onFinish } = props;
const [submitting, setSubmitting] = useState<boolean>(false);
const [gptProviders, setGptProviders] = useState<any>(GPT_PROVIDERS);
const [ttsProviders, setTtsProviders] = useState<any>(TTS_PROVIDERS);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const { openai } = useContext(AISettingsProviderContext);
const navigate = useNavigate();
const refreshGptProviders = async () => {
let providers = GPT_PROVIDERS;
try {
const config = await webApi.config("gpt_providers");
providers = Object.assign(providers, config);
} catch (e) {
console.warn(`Failed to fetch remote GPT config: ${e.message}`);
}
try {
const response = await fetch(providers["ollama"]?.baseUrl + "/api/tags");
providers["ollama"].models = (await response.json()).models.map(
(m: any) => m.name
);
} catch (e) {
console.warn(`No ollama server found: ${e.message}`);
}
setGptProviders({ ...providers });
};
const destroyConversation = async () => {
if (!conversation.id) return;
EnjoyApp.conversations.destroy(conversation.id).then(() => {
navigate(`/conversations`);
});
};
const refreshTtsProviders = async () => {
let providers = TTS_PROVIDERS;
try {
const config = await webApi.config("tts_providers_v2");
providers = Object.assign(providers, config);
} catch (e) {
console.warn(`Failed to fetch remote TTS config: ${e.message}`);
}
setTtsProviders({ ...providers });
};
useEffect(() => {
refreshGptProviders();
refreshTtsProviders();
}, []);
const defaultConfig = JSON.parse(JSON.stringify(conversation || {}));
if (defaultConfig.engine === "openai" && openai) {
if (!defaultConfig.configuration) {
defaultConfig.configuration = {};
}
if (!defaultConfig.configuration.model) {
defaultConfig.configuration.model = openai.model;
}
if (!defaultConfig.configuration.baseUrl) {
defaultConfig.configuration.baseUrl = openai.baseUrl;
}
}
if (defaultConfig.configuration.tts?.engine === "openai" && openai) {
if (!defaultConfig.configuration.tts?.baseUrl) {
defaultConfig.configuration.tts.baseUrl = openai.baseUrl;
}
}
const form = useForm<z.infer<typeof conversationFormSchema>>({
resolver: zodResolver(conversationFormSchema),
// @ts-ignore
values: conversation?.id
? {
name: conversation.name,
engine: conversation.engine,
configuration: {
type: conversation.configuration.type || "gpt",
...conversation.configuration,
},
}
: {
name: defaultConfig.name,
engine: defaultConfig.engine,
configuration: {
...defaultConfig.configuration,
},
},
});
const onSubmit = async (data: z.infer<typeof conversationFormSchema>) => {
let { name, engine, configuration } = data;
setSubmitting(true);
try {
configuration = validateConfiguration(data);
} catch (e) {
toast.error(e.message);
setSubmitting(false);
return;
}
if (conversation?.id) {
EnjoyApp.conversations
.update(conversation.id, {
name,
configuration,
})
.then(() => {
onFinish && onFinish();
})
.finally(() => {
setSubmitting(false);
});
} else {
EnjoyApp.conversations
.create({
name,
engine,
configuration,
})
.then(() => {
onFinish && onFinish();
})
.finally(() => {
setSubmitting(false);
});
}
};
const validateConfiguration = (
data: z.infer<typeof conversationFormSchema>
) => {
const { engine, configuration } = data;
Object.keys(configuration).forEach((key) => {
if (key === "type") return;
if (
configuration.type === "gpt" &&
!gptProviders[engine]?.configurable.includes(key)
) {
// @ts-ignore
delete configuration[key];
}
if (
configuration.type === "tts" &&
!ttsProviders[engine]?.configurable.includes(key)
) {
// @ts-ignore
delete configuration.tts[key];
}
});
// use default base url if not set
if (!configuration.baseUrl) {
configuration.baseUrl = gptProviders[engine]?.baseUrl;
}
// use default base url if not set
if (!configuration?.tts?.baseUrl) {
configuration.tts ||= {};
configuration.tts.baseUrl = gptProviders[engine]?.baseUrl;
}
return configuration;
};
return (
<Form {...form}>
<form
onSubmit={form.handleSubmit(onSubmit)}
className="h-full flex flex-col pt-6"
data-testid="conversation-form"
>
<div className="mb-4 px-6 flex items-center space-x-4">
<div className="text-lg font-bold">
{conversation.id ? t("editConversation") : t("startConversation")}
</div>
<GPTShareButton conversation={conversation} />
</div>
<ScrollArea className="flex-1 px-4">
<div className="space-y-4 px-2 mb-6">
<FormField
control={form.control}
name="name"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.name")}</FormLabel>
<Input value={field.value} onChange={field.onChange} />
<FormMessage />
</FormItem>
)}
/>
<FormField
control={form.control}
name="configuration.type"
render={({ field }) => (
<FormItem>
<FormLabel>{t("models.conversation.type")}</FormLabel>
<Select
disabled={Boolean(conversation?.id)}
onValueChange={field.onChange}
value={field.value}
>
<FormControl>
<SelectTrigger>
<SelectValue placeholder={t("selectAiType")} />
</SelectTrigger>
</FormControl>
<SelectContent>
<SelectItem key="gpt" value="gpt">
GPT
</SelectItem>
<SelectItem key="tts" value="tts">
TTS
</SelectItem>
</SelectContent>
</Select>
<FormMessage />
</FormItem>
)}
/>
{form.watch("configuration.type") === "gpt" && (
<ConversationFormGPT
form={form}
gptProviders={gptProviders}
conversation={conversation}
/>
)}
<ConversationFormTTS form={form} ttsProviders={ttsProviders} />
</div>
</ScrollArea>
<div className="flex justify-center space-x-4 py-6 px-6 border-t shadow">
{conversation.id && (
<AlertDialog>
<AlertDialogTrigger asChild>
<Button
className="w-full h-12 text-destructive"
size="lg"
variant="secondary"
>
{t("delete")}
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("deleteConversation")}</AlertDialogTitle>
</AlertDialogHeader>
<AlertDialogDescription>
{t("deleteConversationConfirmation")}
</AlertDialogDescription>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction
className="bg-destructive hover:bg-destructive-hover"
onClick={destroyConversation}
>
{t("delete")}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
)}
<Button
disabled={
submitting || (conversation.id && !form.formState.isDirty)
}
className="w-full h-12"
data-testid="conversation-form-submit"
size="lg"
type="submit"
>
{submitting && <LoaderIcon className="mr-2 animate-spin" />}
{t("confirm")}
</Button>
</div>
</form>
</Form>
);
};

View File

@@ -1,5 +1,7 @@
export * from "./conversation-card";
export * from "./conversation-form";
export * from "./conversation-form/index";
export * from "./conversation-form/conversation-form-gpt";
export * from "./conversation-form/conversation-form-tts";
export * from "./conversation-shortcuts";
export * from "./speech-player";

View File

@@ -3,9 +3,675 @@ import { t } from "i18next";
export const TTS_PROVIDERS: { [key: string]: any } = {
enjoyai: {
name: "EnjoyAI",
models: ["tts-1", "tts-1-hd"],
voices: ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
configurable: ["voice"],
models: ["openai/tts-1", "openai/tts-1-hd", "azure/speech"],
voices: {
openai: ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
azure: [
{
label: "Katja (Female)",
value: "de-DE-KatjaNeural",
language: "de-DE",
},
{
label: "Conradl (Male)",
value: "de-DE-ConradNeural",
language: "de-DE",
},
{
label: "Amala (Female)",
value: "de-DE-AmalaNeural",
language: "de-DE",
},
{
label: "Bernd (Male)",
value: "de-DE-BerndNeural",
language: "de-DE",
},
{
label: "Christoph (Male)",
value: "de-DE-ChristophNeural",
language: "de-DE",
},
{
label: "Elke (Female)",
value: "de-DE-ElkeNeural",
language: "de-DE",
},
{
label: "Gisela (Female, Child)",
value: "de-DE-GiselaNeural",
language: "de-DE",
},
{
label: "Kasper (Male)",
value: "de-DE-KasperNeural",
language: "de-DE",
},
{
label: "Killian (Male)",
value: "de-DE-KillianNeural",
language: "de-DE",
},
{
label: "Klarissa (Female)",
value: "de-DE-KlarissaNeural",
language: "de-DE",
},
{
label: "Klaus (Male)",
value: "de-DE-KlausNeural",
language: "de-DE",
},
{
label: "Louisa (Female)",
value: "de-DE-LouisaNeural",
language: "de-DE",
},
{
label: "Maja (Female)",
value: "de-DE-MajaNeural",
language: "de-DE",
},
{ label: "Ralf (Male)", value: "de-DE-RalfNeural", language: "de-DE" },
{
label: "Tanja (Female)",
value: "de-DE-TanjaNeural",
language: "de-DE",
},
{
label: "Sonia (Female)",
value: "en-GB-SoniaNeural",
language: "en-GB",
},
{ label: "Ryan (Male)", value: "en-GB-RyanNeural", language: "en-GB" },
{
label: "Libby (Female)",
value: "en-GB-LibbyNeural",
language: "en-GB",
},
{
label: "Abbi (Female)",
value: "en-GB-AbbiNeural",
language: "en-GB",
},
{
label: "Alfie (Male)",
value: "en-GB-AlfieNeural",
language: "en-GB",
},
{
label: "Bella (Female)",
value: "en-GB-BellaNeural",
language: "en-GB",
},
{
label: "Elliot (Male)",
value: "en-GB-ElliotNeural",
language: "en-GB",
},
{
label: "Ethan (Male)",
value: "en-GB-EthanNeural",
language: "en-GB",
},
{
label: "Hollie (Female)",
value: "en-GB-HollieNeural",
language: "en-GB",
},
{
label: "Maisie (Female, Child)",
value: "en-GB-MaisieNeural",
language: "en-GB",
},
{ label: "Noah (Male)", value: "en-GB-NoahNeural", language: "en-GB" },
{
label: "Oliver (Male)",
value: "en-GB-OliverNeural",
language: "en-GB",
},
{
label: "Olivia (Female)",
value: "en-GB-OliviaNeural",
language: "en-GB",
},
{
label: "Thomas (Male)",
value: "en-GB-ThomasNeural",
language: "en-GB",
},
{ label: "Ava (Female)", value: "en-US-AvaNeural", language: "en-US" },
{
label: "Andrew (Male)",
value: "en-US-AndrewNeural",
language: "en-US",
},
{
label: "Emma (Female)",
value: "en-US-EmmaNeural",
language: "en-US",
},
{
label: "Brian (Male)",
value: "en-US-BrianNeural",
language: "en-US",
},
{
label: "Jenny (Female)",
value: "en-US-JennyNeural",
language: "en-US",
},
{ label: "Guy (Male)", value: "en-US-GuyNeural", language: "en-US" },
{
label: "Aria (Female)",
value: "en-US-AriaNeural",
language: "en-US",
},
{
label: "Davis (Male)",
value: "en-US-DavisNeural",
language: "en-US",
},
{
label: "Jane (Female)",
value: "en-US-JaneNeural",
language: "en-US",
},
{
label: "Jason (Male)",
value: "en-US-JasonNeural",
language: "en-US",
},
{
label: "Sara (Female)",
value: "en-US-SaraNeural",
language: "en-US",
},
{ label: "Tony (Male)", value: "en-US-TonyNeural", language: "en-US" },
{
label: "Nancy (Female)",
value: "en-US-NancyNeural",
language: "en-US",
},
{
label: "Amber (Female)",
value: "en-US-AmberNeural",
language: "en-US",
},
{
label: "Ana (Female, Child)",
value: "en-US-AnaNeural",
language: "en-US",
},
{
label: "Ashley (Female)",
value: "en-US-AshleyNeural",
language: "en-US",
},
{
label: "Brandon (Male)",
value: "en-US-BrandonNeural",
language: "en-US",
},
{
label: "Christopher (Male)",
value: "en-US-ChristopherNeural",
language: "en-US",
},
{
label: "Cora (Female)",
value: "en-US-CoraNeural",
language: "en-US",
},
{
label: "Elizabeth (Female)",
value: "en-US-ElizabethNeural",
language: "en-US",
},
{ label: "Eric (Male)", value: "en-US-EricNeural", language: "en-US" },
{
label: "Jacob (Male)",
value: "en-US-JacobNeural",
language: "en-US",
},
{
label: "Michelle (Female)",
value: "en-US-MichelleNeural",
language: "en-US",
},
{
label: "Monica (Female)",
value: "en-US-MonicaNeural",
language: "en-US",
},
{
label: "Roger (Male)",
value: "en-US-RogerNeural",
language: "en-US",
},
{
label: "Steffan (Male)",
value: "en-US-SteffanNeural",
language: "en-US",
},
{
label: "AIGenerate1 (Male)",
value: "en-US-AIGenerate1Neural",
language: "en-US",
},
{
label: "AIGenerate2 (Female)",
value: "en-US-AIGenerate2Neural",
language: "en-US",
},
{
label: "Elvira (Female)",
value: "es-ES-ElviraNeural",
language: "es-ES",
},
{
label: "Alvaro (Male)",
value: "es-ES-AlvaroNeural",
language: "es-ES",
},
{
label: "Abril (Female)",
value: "es-ES-AbrilNeural",
language: "es-ES",
},
{
label: "Arnau (Male)",
value: "es-ES-ArnauNeural",
language: "es-ES",
},
{
label: "Dario (Male)",
value: "es-ES-DarioNeural",
language: "es-ES",
},
{
label: "Elias (Male)",
value: "es-ES-EliasNeural",
language: "es-ES",
},
{
label: "Estrella (Female)",
value: "es-ES-EstrellaNeural",
language: "es-ES",
},
{
label: "Irene (Female)",
value: "es-ES-IreneNeural",
language: "es-ES",
},
{
label: "Laia (Female)",
value: "es-ES-LaiaNeural",
language: "es-ES",
},
{ label: "Lia (Female)", value: "es-ES-LiaNeural", language: "es-ES" },
{ label: "Nil (Male)", value: "es-ES-NilNeural", language: "es-ES" },
{ label: "Saul (Male)", value: "es-ES-SaulNeural", language: "es-ES" },
{ label: "Teo (Male)", value: "es-ES-TeoNeural", language: "es-ES" },
{
label: "Triana (Female)",
value: "es-ES-TrianaNeural",
language: "es-ES",
},
{
label: "Vera (Female)",
value: "es-ES-VeraNeural",
language: "es-ES",
},
{
label: "Ximena (Female)",
value: "es-ES-XimenaNeural",
language: "es-ES",
},
{
label: "Denise (Female)",
value: "fr-FR-DeniseNeural",
language: "fr-FR",
},
{
label: "Henri (Male)",
value: "fr-FR-HenriNeural",
language: "fr-FR",
},
{
label: "Alain (Male)",
value: "fr-FR-AlainNeural",
language: "fr-FR",
},
{
label: "Brigitte (Female)",
value: "fr-FR-BrigitteNeural",
language: "fr-FR",
},
{
label: "Celeste (Female)",
value: "fr-FR-CelesteNeural",
language: "fr-FR",
},
{
label: "Claude (Male)",
value: "fr-FR-ClaudeNeural",
language: "fr-FR",
},
{
label: "Coralie (Female)",
value: "fr-FR-CoralieNeural",
language: "fr-FR",
},
{
label: "Eloise (Female, Child)",
value: "fr-FR-EloiseNeural",
language: "fr-FR",
},
{
label: "Jacqueline (Female)",
value: "fr-FR-JacquelineNeural",
language: "fr-FR",
},
{
label: "Jerome (Male)",
value: "fr-FR-JeromeNeural",
language: "fr-FR",
},
{
label: "Josephine (Female)",
value: "fr-FR-JosephineNeural",
language: "fr-FR",
},
{
label: "Maurice (Male)",
value: "fr-FR-MauriceNeural",
language: "fr-FR",
},
{ label: "Yves (Male)", value: "fr-FR-YvesNeural", language: "fr-FR" },
{
label: "Yvette (Female)",
value: "fr-FR-YvetteNeural",
language: "fr-FR",
},
{
label: "Elsa (Female)",
value: "it-IT-ElsaNeural",
language: "it-IT",
},
{
label: "Isabella (Female)",
value: "it-IT-IsabellaNeural",
language: "it-IT",
},
{
label: "Diego (Male)",
value: "it-IT-DiegoNeural",
language: "it-IT",
},
{
label: "Benigno (Male)",
value: "it-IT-BenignoNeural",
language: "it-IT",
},
{
label: "Calimero (Male)",
value: "it-IT-CalimeroNeural",
language: "it-IT",
},
{
label: "Cataldo (Male)",
value: "it-IT-CataldoNeural",
language: "it-IT",
},
{
label: "Fabiola (Female)",
value: "it-IT-FabiolaNeural",
language: "it-IT",
},
{
label: "Fiamma (Female)",
value: "it-IT-FiammaNeural",
language: "it-IT",
},
{
label: "Gianni (Male)",
value: "it-IT-GianniNeural",
language: "it-IT",
},
{
label: "Imelda (Female)",
value: "it-IT-ImeldaNeural",
language: "it-IT",
},
{
label: "Irma (Female)",
value: "it-IT-IrmaNeural",
language: "it-IT",
},
{
label: "Lisandro (Male)",
value: "it-IT-LisandroNeural",
language: "it-IT",
},
{
label: "Palmira (Female)",
value: "it-IT-PalmiraNeural",
language: "it-IT",
},
{
label: "Pierina (Female)",
value: "it-IT-PierinaNeural",
language: "it-IT",
},
{
label: "Rinaldo (Male)",
value: "it-IT-RinaldoNeural",
language: "it-IT",
},
{
label: "Giuseppe (Male)",
value: "it-IT-GiuseppeNeural",
language: "it-IT",
},
{
label: "NanamiNeural (Female)",
value: "ja-JP-NanamiNeural",
language: "ja-JP",
},
{
label: "KeitaNeural (Male)",
value: "ja-JP-KeitaNeural",
language: "ja-JP",
},
{
label: "AoiNeural (Female)",
value: "ja-JP-AoiNeural",
language: "ja-JP",
},
{
label: "DaichiNeural (Male)",
value: "ja-JP-DaichiNeural",
language: "ja-JP",
},
{
label: "MayuNeural (Female)",
value: "ja-JP-MayuNeural",
language: "ja-JP",
},
{
label: "NaokiNeural (Male)",
value: "ja-JP-NaokiNeural",
language: "ja-JP",
},
{
label: "ShioriNeural (Female)",
value: "ja-JP-ShioriNeural",
language: "ja-JP",
},
{
label: "SunHi (Female)",
value: "ko-KR-SunHiNeural",
language: "ko-KR",
},
{
label: "InJoon (Male)",
value: "ko-KR-InJoonNeural",
language: "ko-KR",
},
{
label: "BongJin (Male)",
value: "ko-KR-BongJinNeural",
language: "ko-KR",
},
{
label: "GookMin (Male)",
value: "ko-KR-GookMinNeural",
language: "ko-KR",
},
{
label: "JiMin (Female)",
value: "ko-KR-JiMinNeural",
language: "ko-KR",
},
{
label: "SeoHyeon (Female)",
value: "ko-KR-SeoHyeonNeural",
language: "ko-KR",
},
{
label: "SoonBok (Female)",
value: "ko-KR-SoonBokNeural",
language: "ko-KR",
},
{
label: "YuJin (Female)",
value: "ko-KR-YuJinNeural",
language: "ko-KR",
},
{
label: "Hyunsu (Male)",
value: "ko-KR-HyunsuNeural1",
language: "ko-KR",
},
{
label: "Xiaoxiao (Female)",
value: "zh-CN-XiaoxiaoNeural",
language: "zh-CN",
},
{
label: "Yunxi (Male)",
value: "zh-CN-YunxiNeural",
language: "zh-CN",
},
{
label: "Yunjian (Male)",
value: "zh-CN-YunjianNeural",
language: "zh-CN",
},
{
label: "Xiaoyi (Female)",
value: "zh-CN-XiaoyiNeural",
language: "zh-CN",
},
{
label: "Yunyang (Male)",
value: "zh-CN-YunyangNeural",
language: "zh-CN",
},
{
label: "Xiaochen (Female)",
value: "zh-CN-XiaochenNeural",
language: "zh-CN",
},
{
label: "Xiaohan (Female)",
value: "zh-CN-XiaohanNeural",
language: "zh-CN",
},
{
label: "Xiaomeng (Female)",
value: "zh-CN-XiaomengNeural",
language: "zh-CN",
},
{
label: "Xiaomo (Female)",
value: "zh-CN-XiaomoNeural",
language: "zh-CN",
},
{
label: "Xiaoqiu (Female)",
value: "zh-CN-XiaoqiuNeural",
language: "zh-CN",
},
{
label: "Xiaorui (Female)",
value: "zh-CN-XiaoruiNeural",
language: "zh-CN",
},
{
label: "Xiaoshuang (Female, Child)",
value: "zh-CN-XiaoshuangNeural",
language: "zh-CN",
},
{
label: "Xiaoyan (Female)",
value: "zh-CN-XiaoyanNeural",
language: "zh-CN",
},
{
label: "Xiaoyou (Female, Child)",
value: "zh-CN-XiaoyouNeural",
language: "zh-CN",
},
{
label: "Xiaozhen (Female)",
value: "zh-CN-XiaozhenNeural",
language: "zh-CN",
},
{
label: "Yunfeng (Male)",
value: "zh-CN-YunfengNeural",
language: "zh-CN",
},
{
label: "Yunhao (Male)",
value: "zh-CN-YunhaoNeural",
language: "zh-CN",
},
{
label: "Yunxia (Male)",
value: "zh-CN-YunxiaNeural",
language: "zh-CN",
},
{
label: "Yunye (Male)",
value: "zh-CN-YunyeNeural",
language: "zh-CN",
},
{
label: "Yunze (Male)",
value: "zh-CN-YunzeNeural",
language: "zh-CN",
},
{
label: "Xiaorou (Female)",
value: "zh-CN-XiaorouNeural",
language: "zh-CN",
},
{
label: "XiaoxiaoDialects (Female)",
value: "zh-CN-XiaoxiaoDialectsNeural",
language: "zh-CN",
},
{
label: "Yunjie (Male)",
value: "zh-CN-YunjieNeural",
language: "zh-CN",
},
],
},
configurable: ["model", "voice"],
},
openai: {
name: "OpenAI",

View File

@@ -15,9 +15,12 @@ import {
import OpenAI from "openai";
import { type LLMResult } from "@langchain/core/outputs";
import { v4 } from "uuid";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
export const useConversation = () => {
const { EnjoyApp, user, apiUrl } = useContext(AppSettingsProviderContext);
const { EnjoyApp, webApi, user, apiUrl, learningLanguage } = useContext(
AppSettingsProviderContext
);
const { openai, googleGenerativeAi, currentEngine } = useContext(
AISettingsProviderContext
);
@@ -226,6 +229,35 @@ export const useConversation = () => {
};
const tts = async (params: Partial<SpeechType>) => {
const { configuration } = params;
const { engine, model = "tts-1", voice } = configuration || {};
let buffer;
if (model.startsWith("openai") || model === "tts-1" || model === "tts-2") {
buffer = await openaiTTS(params);
} else if (model.startsWith("azure")) {
buffer = await azureTTS(params);
}
return EnjoyApp.speeches.create(
{
text: params.text,
sourceType: params.sourceType,
sourceId: params.sourceId,
configuration: {
engine,
model,
voice,
},
},
{
type: "audio/mp3",
arrayBuffer: buffer,
}
);
};
const openaiTTS = async (params: Partial<SpeechType>) => {
const { configuration } = params;
const {
engine = currentEngine.name,
@@ -256,27 +288,56 @@ export const useConversation = () => {
const file = await client.audio.speech.create({
input: params.text,
model,
model: model.replace("openai-", ""),
voice,
});
const buffer = await file.arrayBuffer();
return EnjoyApp.speeches.create(
{
text: params.text,
sourceType: params.sourceType,
sourceId: params.sourceId,
configuration: {
engine,
model,
voice,
},
},
{
type: "audio/mp3",
arrayBuffer: buffer,
}
return file.arrayBuffer();
};
const azureTTS = async (
params: Partial<SpeechType>
): Promise<ArrayBuffer> => {
const { configuration, text } = params;
const { model, voice } = configuration || {};
if (model !== "azure/speech") return;
const { id, token, region } = await webApi.generateSpeechToken({
purpose: "tts",
input: text,
});
const speechConfig = sdk.SpeechConfig.fromAuthorizationToken(token, region);
const audioConfig = sdk.AudioConfig.fromDefaultSpeakerOutput();
speechConfig.speechRecognitionLanguage = learningLanguage;
speechConfig.speechSynthesisVoiceName = voice;
const speechSynthesizer = new sdk.SpeechSynthesizer(
speechConfig,
audioConfig
);
return new Promise((resolve, reject) => {
speechSynthesizer.speakTextAsync(
text,
(result) => {
speechSynthesizer.close();
if (result && result.audioData) {
webApi.consumeSpeechToken(id);
resolve(result.audioData);
} else {
webApi.revokeSpeechToken(id);
reject(result);
}
},
(error) => {
speechSynthesizer.close();
webApi.revokeSpeechToken(id);
reject(error);
}
);
});
};
return {

View File

@@ -135,7 +135,7 @@ export default () => {
let presets = GPT_PRESETS;
let defaultGptPreset = {
key: "custom",
engine: "enjoyai",
engine: currentEngine.name,
name: t("custom"),
configuration: {
type: "gpt",
@@ -153,8 +153,8 @@ export default () => {
configuration: {
type: "tts",
tts: {
engine: currentEngine.name,
model: "tts-1",
engine: "enjoyai",
model: "openai/tts-1",
voice: "alloy",
},
},
@@ -173,8 +173,11 @@ export default () => {
defaultGpt.configuration.tts.engine = currentEngine.name;
defaultGptPreset = defaultGpt;
defaultTts.engine = currentEngine.name;
defaultTts.configuration.tts.engine = currentEngine.name;
if (currentEngine.name === "openai") {
defaultTts.configuration.tts.engine = "openai";
defaultTts.configuration.tts.model = "tts-1";
defaultTts.configuration.tts.voice = "alloy";
}
defaultTtsPreset = defaultTts;
} catch (error) {
console.error(error);

4071
yarn.lock

File diff suppressed because it is too large Load Diff