Fix: handle echogarden align error (#620)
* may re-generate when "no matching voice found" * may regenerate if the original text has not-matching voice * specify language to avoid "no matching voice found"
This commit is contained in:
@@ -129,7 +129,7 @@
|
||||
"dayjs": "^1.11.11",
|
||||
"decamelize": "^6.0.0",
|
||||
"decamelize-keys": "^2.0.1",
|
||||
"echogarden": "^1.4.3",
|
||||
"echogarden": "^1.4.4",
|
||||
"electron-context-menu": "^4.0.0",
|
||||
"electron-log": "^5.1.4",
|
||||
"electron-settings": "^4.0.4",
|
||||
|
||||
@@ -100,6 +100,7 @@ class EchogardenWrapper {
|
||||
transcript: string,
|
||||
options: AlignmentOptions
|
||||
) => {
|
||||
logger.debug("echogarden-align:", transcript, options);
|
||||
try {
|
||||
return await this.align(input, transcript, options);
|
||||
} catch (err) {
|
||||
|
||||
@@ -34,7 +34,7 @@ export const MediaLoadingModal = () => {
|
||||
|
||||
return (
|
||||
<AlertDialog open={!decoded || !Boolean(transcription?.result?.timeline)}>
|
||||
<AlertDialogOverlay className="z-[100]" />
|
||||
<AlertDialogOverlay className="" />
|
||||
<AlertDialogContent className="z-[100]">
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
|
||||
@@ -55,9 +55,7 @@ export const MediaLoadingModal = () => {
|
||||
<XCircleIcon className="w-4 h-4 text-destructive" />
|
||||
</div>
|
||||
<div className="select-text">
|
||||
<div className="mb-2">
|
||||
{decodeError}
|
||||
</div>
|
||||
<div className="mb-2">{decodeError}</div>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
{t("failedToDecodeWaveform")}:{" "}
|
||||
<span className="break-all ">{media?.src}</span>
|
||||
@@ -97,12 +95,17 @@ export const MediaLoadingModal = () => {
|
||||
<div className="inline">
|
||||
<span>{t("notTranscribedYet")}</span>
|
||||
{decoded && (
|
||||
<Button
|
||||
onClick={generateTranscription}
|
||||
className="ml-4"
|
||||
size="sm"
|
||||
>
|
||||
{t("transcribe")}
|
||||
<Button asChild className="ml-4" size="sm">
|
||||
<a
|
||||
className="cursor-pointer"
|
||||
onClick={() =>
|
||||
generateTranscription({
|
||||
originalText: "",
|
||||
})
|
||||
}
|
||||
>
|
||||
{t("regenerate")}
|
||||
</a>
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -57,7 +57,7 @@ export const TranscriptionForm = (props: {
|
||||
const handleSave = async () => {
|
||||
setSubmiting(true);
|
||||
try {
|
||||
await generateTranscription(content);
|
||||
await generateTranscription({ originalText: content });
|
||||
setOpen(false);
|
||||
} catch (e) {
|
||||
toast.error(e.message);
|
||||
|
||||
@@ -144,7 +144,13 @@ export const MediaTranscription = () => {
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction onClick={() => generateTranscription("")}>
|
||||
<AlertDialogAction
|
||||
onClick={() =>
|
||||
generateTranscription({
|
||||
originalText: "",
|
||||
})
|
||||
}
|
||||
>
|
||||
{t("transcribe")}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
|
||||
@@ -284,7 +284,11 @@ export const AssistantMessageComponent = (props: {
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
|
||||
<Sheet open={shadowing} onOpenChange={(value) => setShadowing(value)}>
|
||||
<Sheet
|
||||
modal={false}
|
||||
open={shadowing}
|
||||
onOpenChange={(value) => setShadowing(value)}
|
||||
>
|
||||
<SheetContent
|
||||
side="bottom"
|
||||
className="h-screen p-0"
|
||||
|
||||
@@ -66,7 +66,10 @@ type MediaPlayerContextType = {
|
||||
pitchChart: Chart;
|
||||
// Transcription
|
||||
transcription: TranscriptionType;
|
||||
generateTranscription: (text?: string) => void;
|
||||
generateTranscription: (params?: {
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
}) => void;
|
||||
transcribing: boolean;
|
||||
transcribingProgress: number;
|
||||
transcriptionDraft: TranscriptionType["result"];
|
||||
|
||||
@@ -34,6 +34,7 @@ export const useTranscribe = () => {
|
||||
targetId?: string;
|
||||
targetType?: string;
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
}
|
||||
): Promise<{
|
||||
engine: string;
|
||||
@@ -42,7 +43,12 @@ export const useTranscribe = () => {
|
||||
originalText?: string;
|
||||
}> => {
|
||||
const url = await transcode(mediaSrc);
|
||||
const { targetId, targetType, originalText } = params || {};
|
||||
const {
|
||||
targetId,
|
||||
targetType,
|
||||
originalText,
|
||||
language = "english",
|
||||
} = params || {};
|
||||
const blob = await (await fetch(url)).blob();
|
||||
|
||||
let result;
|
||||
@@ -75,7 +81,10 @@ export const useTranscribe = () => {
|
||||
|
||||
const alignmentResult = await EnjoyApp.echogarden.align(
|
||||
new Uint8Array(await blob.arrayBuffer()),
|
||||
transcript
|
||||
transcript,
|
||||
{
|
||||
language,
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -197,7 +206,7 @@ export const useTranscribe = () => {
|
||||
resolve({
|
||||
engine: "azure",
|
||||
model: "whisper",
|
||||
text: results.map((result) => result.DisplayText).join(' '),
|
||||
text: results.map((result) => result.DisplayText).join(" "),
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
@@ -8,7 +8,6 @@ import {
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
|
||||
import { ca } from "@vidstack/react/types/vidstack-react";
|
||||
|
||||
export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
@@ -53,7 +52,11 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
});
|
||||
};
|
||||
|
||||
const generateTranscription = async (originalText?: string) => {
|
||||
const generateTranscription = async (params?: {
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
}) => {
|
||||
let { originalText, language } = params || {};
|
||||
if (originalText === undefined) {
|
||||
if (transcription?.targetId === media.id) {
|
||||
originalText = transcription.result?.originalText;
|
||||
@@ -72,6 +75,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
originalText,
|
||||
language,
|
||||
});
|
||||
|
||||
let timeline: TimelineEntry[] = [];
|
||||
|
||||
Reference in New Issue
Block a user