Fix: handle echogarden align error (#620)

* may re-generate when "no matching voice found"

* may regenerate if the original text has not-matching voice

* specify language to avoid "no matching voice found"
This commit is contained in:
an-lee
2024-05-20 10:30:28 +08:00
committed by GitHub
parent e0b2f59a23
commit cdbaf89005
10 changed files with 997 additions and 464 deletions

View File

@@ -129,7 +129,7 @@
"dayjs": "^1.11.11",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"echogarden": "^1.4.3",
"echogarden": "^1.4.4",
"electron-context-menu": "^4.0.0",
"electron-log": "^5.1.4",
"electron-settings": "^4.0.4",

View File

@@ -100,6 +100,7 @@ class EchogardenWrapper {
transcript: string,
options: AlignmentOptions
) => {
logger.debug("echogarden-align:", transcript, options);
try {
return await this.align(input, transcript, options);
} catch (err) {

View File

@@ -34,7 +34,7 @@ export const MediaLoadingModal = () => {
return (
<AlertDialog open={!decoded || !Boolean(transcription?.result?.timeline)}>
<AlertDialogOverlay className="z-[100]" />
<AlertDialogOverlay className="" />
<AlertDialogContent className="z-[100]">
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
@@ -55,9 +55,7 @@ export const MediaLoadingModal = () => {
<XCircleIcon className="w-4 h-4 text-destructive" />
</div>
<div className="select-text">
<div className="mb-2">
{decodeError}
</div>
<div className="mb-2">{decodeError}</div>
<div className="text-sm text-muted-foreground">
{t("failedToDecodeWaveform")}:{" "}
<span className="break-all ">{media?.src}</span>
@@ -97,12 +95,17 @@ export const MediaLoadingModal = () => {
<div className="inline">
<span>{t("notTranscribedYet")}</span>
{decoded && (
<Button
onClick={generateTranscription}
className="ml-4"
size="sm"
>
{t("transcribe")}
<Button asChild className="ml-4" size="sm">
<a
className="cursor-pointer"
onClick={() =>
generateTranscription({
originalText: "",
})
}
>
{t("regenerate")}
</a>
</Button>
)}
</div>

View File

@@ -57,7 +57,7 @@ export const TranscriptionForm = (props: {
const handleSave = async () => {
setSubmiting(true);
try {
await generateTranscription(content);
await generateTranscription({ originalText: content });
setOpen(false);
} catch (e) {
toast.error(e.message);

View File

@@ -144,7 +144,13 @@ export const MediaTranscription = () => {
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction onClick={() => generateTranscription("")}>
<AlertDialogAction
onClick={() =>
generateTranscription({
originalText: "",
})
}
>
{t("transcribe")}
</AlertDialogAction>
</AlertDialogFooter>

View File

@@ -284,7 +284,11 @@ export const AssistantMessageComponent = (props: {
</DropdownMenu>
</div>
<Sheet open={shadowing} onOpenChange={(value) => setShadowing(value)}>
<Sheet
modal={false}
open={shadowing}
onOpenChange={(value) => setShadowing(value)}
>
<SheetContent
side="bottom"
className="h-screen p-0"

View File

@@ -66,7 +66,10 @@ type MediaPlayerContextType = {
pitchChart: Chart;
// Transcription
transcription: TranscriptionType;
generateTranscription: (text?: string) => void;
generateTranscription: (params?: {
originalText?: string;
language?: string;
}) => void;
transcribing: boolean;
transcribingProgress: number;
transcriptionDraft: TranscriptionType["result"];

View File

@@ -34,6 +34,7 @@ export const useTranscribe = () => {
targetId?: string;
targetType?: string;
originalText?: string;
language?: string;
}
): Promise<{
engine: string;
@@ -42,7 +43,12 @@ export const useTranscribe = () => {
originalText?: string;
}> => {
const url = await transcode(mediaSrc);
const { targetId, targetType, originalText } = params || {};
const {
targetId,
targetType,
originalText,
language = "english",
} = params || {};
const blob = await (await fetch(url)).blob();
let result;
@@ -75,7 +81,10 @@ export const useTranscribe = () => {
const alignmentResult = await EnjoyApp.echogarden.align(
new Uint8Array(await blob.arrayBuffer()),
transcript
transcript,
{
language,
}
);
return {
@@ -197,7 +206,7 @@ export const useTranscribe = () => {
resolve({
engine: "azure",
model: "whisper",
text: results.map((result) => result.DisplayText).join(' '),
text: results.map((result) => result.DisplayText).join(" "),
});
};

View File

@@ -8,7 +8,6 @@ import {
import { toast } from "@renderer/components/ui";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
import { ca } from "@vidstack/react/types/vidstack-react";
export const useTranscriptions = (media: AudioType | VideoType) => {
const { whisperConfig } = useContext(AISettingsProviderContext);
@@ -53,7 +52,11 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
});
};
const generateTranscription = async (originalText?: string) => {
const generateTranscription = async (params?: {
originalText?: string;
language?: string;
}) => {
let { originalText, language } = params || {};
if (originalText === undefined) {
if (transcription?.targetId === media.id) {
originalText = transcription.result?.originalText;
@@ -72,6 +75,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
targetId: media.id,
targetType: media.mediaType,
originalText,
language,
});
let timeline: TimelineEntry[] = [];

1391
yarn.lock

File diff suppressed because it is too large Load Diff