Fix: handle echogarden align error (#620)

* may re-generate when "no matching voice found"

* may regenerate if the original text has not-matching voice

* specify language to avoid "no matching voice found"
This commit is contained in:
an-lee
2024-05-20 10:30:28 +08:00
committed by GitHub
parent e0b2f59a23
commit cdbaf89005
10 changed files with 997 additions and 464 deletions

View File

@@ -129,7 +129,7 @@
"dayjs": "^1.11.11", "dayjs": "^1.11.11",
"decamelize": "^6.0.0", "decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1", "decamelize-keys": "^2.0.1",
"echogarden": "^1.4.3", "echogarden": "^1.4.4",
"electron-context-menu": "^4.0.0", "electron-context-menu": "^4.0.0",
"electron-log": "^5.1.4", "electron-log": "^5.1.4",
"electron-settings": "^4.0.4", "electron-settings": "^4.0.4",

View File

@@ -100,6 +100,7 @@ class EchogardenWrapper {
transcript: string, transcript: string,
options: AlignmentOptions options: AlignmentOptions
) => { ) => {
logger.debug("echogarden-align:", transcript, options);
try { try {
return await this.align(input, transcript, options); return await this.align(input, transcript, options);
} catch (err) { } catch (err) {

View File

@@ -34,7 +34,7 @@ export const MediaLoadingModal = () => {
return ( return (
<AlertDialog open={!decoded || !Boolean(transcription?.result?.timeline)}> <AlertDialog open={!decoded || !Boolean(transcription?.result?.timeline)}>
<AlertDialogOverlay className="z-[100]" /> <AlertDialogOverlay className="" />
<AlertDialogContent className="z-[100]"> <AlertDialogContent className="z-[100]">
<AlertDialogHeader> <AlertDialogHeader>
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle> <AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
@@ -55,9 +55,7 @@ export const MediaLoadingModal = () => {
<XCircleIcon className="w-4 h-4 text-destructive" /> <XCircleIcon className="w-4 h-4 text-destructive" />
</div> </div>
<div className="select-text"> <div className="select-text">
<div className="mb-2"> <div className="mb-2">{decodeError}</div>
{decodeError}
</div>
<div className="text-sm text-muted-foreground"> <div className="text-sm text-muted-foreground">
{t("failedToDecodeWaveform")}:{" "} {t("failedToDecodeWaveform")}:{" "}
<span className="break-all ">{media?.src}</span> <span className="break-all ">{media?.src}</span>
@@ -97,12 +95,17 @@ export const MediaLoadingModal = () => {
<div className="inline"> <div className="inline">
<span>{t("notTranscribedYet")}</span> <span>{t("notTranscribedYet")}</span>
{decoded && ( {decoded && (
<Button <Button asChild className="ml-4" size="sm">
onClick={generateTranscription} <a
className="ml-4" className="cursor-pointer"
size="sm" onClick={() =>
> generateTranscription({
{t("transcribe")} originalText: "",
})
}
>
{t("regenerate")}
</a>
</Button> </Button>
)} )}
</div> </div>

View File

@@ -57,7 +57,7 @@ export const TranscriptionForm = (props: {
const handleSave = async () => { const handleSave = async () => {
setSubmiting(true); setSubmiting(true);
try { try {
await generateTranscription(content); await generateTranscription({ originalText: content });
setOpen(false); setOpen(false);
} catch (e) { } catch (e) {
toast.error(e.message); toast.error(e.message);

View File

@@ -144,7 +144,13 @@ export const MediaTranscription = () => {
</AlertDialogHeader> </AlertDialogHeader>
<AlertDialogFooter> <AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel> <AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction onClick={() => generateTranscription("")}> <AlertDialogAction
onClick={() =>
generateTranscription({
originalText: "",
})
}
>
{t("transcribe")} {t("transcribe")}
</AlertDialogAction> </AlertDialogAction>
</AlertDialogFooter> </AlertDialogFooter>

View File

@@ -284,7 +284,11 @@ export const AssistantMessageComponent = (props: {
</DropdownMenu> </DropdownMenu>
</div> </div>
<Sheet open={shadowing} onOpenChange={(value) => setShadowing(value)}> <Sheet
modal={false}
open={shadowing}
onOpenChange={(value) => setShadowing(value)}
>
<SheetContent <SheetContent
side="bottom" side="bottom"
className="h-screen p-0" className="h-screen p-0"

View File

@@ -66,7 +66,10 @@ type MediaPlayerContextType = {
pitchChart: Chart; pitchChart: Chart;
// Transcription // Transcription
transcription: TranscriptionType; transcription: TranscriptionType;
generateTranscription: (text?: string) => void; generateTranscription: (params?: {
originalText?: string;
language?: string;
}) => void;
transcribing: boolean; transcribing: boolean;
transcribingProgress: number; transcribingProgress: number;
transcriptionDraft: TranscriptionType["result"]; transcriptionDraft: TranscriptionType["result"];

View File

@@ -34,6 +34,7 @@ export const useTranscribe = () => {
targetId?: string; targetId?: string;
targetType?: string; targetType?: string;
originalText?: string; originalText?: string;
language?: string;
} }
): Promise<{ ): Promise<{
engine: string; engine: string;
@@ -42,7 +43,12 @@ export const useTranscribe = () => {
originalText?: string; originalText?: string;
}> => { }> => {
const url = await transcode(mediaSrc); const url = await transcode(mediaSrc);
const { targetId, targetType, originalText } = params || {}; const {
targetId,
targetType,
originalText,
language = "english",
} = params || {};
const blob = await (await fetch(url)).blob(); const blob = await (await fetch(url)).blob();
let result; let result;
@@ -75,7 +81,10 @@ export const useTranscribe = () => {
const alignmentResult = await EnjoyApp.echogarden.align( const alignmentResult = await EnjoyApp.echogarden.align(
new Uint8Array(await blob.arrayBuffer()), new Uint8Array(await blob.arrayBuffer()),
transcript transcript,
{
language,
}
); );
return { return {
@@ -197,7 +206,7 @@ export const useTranscribe = () => {
resolve({ resolve({
engine: "azure", engine: "azure",
model: "whisper", model: "whisper",
text: results.map((result) => result.DisplayText).join(' '), text: results.map((result) => result.DisplayText).join(" "),
}); });
}; };

View File

@@ -8,7 +8,6 @@ import {
import { toast } from "@renderer/components/ui"; import { toast } from "@renderer/components/ui";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js"; import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants"; import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
import { ca } from "@vidstack/react/types/vidstack-react";
export const useTranscriptions = (media: AudioType | VideoType) => { export const useTranscriptions = (media: AudioType | VideoType) => {
const { whisperConfig } = useContext(AISettingsProviderContext); const { whisperConfig } = useContext(AISettingsProviderContext);
@@ -53,7 +52,11 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
}); });
}; };
const generateTranscription = async (originalText?: string) => { const generateTranscription = async (params?: {
originalText?: string;
language?: string;
}) => {
let { originalText, language } = params || {};
if (originalText === undefined) { if (originalText === undefined) {
if (transcription?.targetId === media.id) { if (transcription?.targetId === media.id) {
originalText = transcription.result?.originalText; originalText = transcription.result?.originalText;
@@ -72,6 +75,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
targetId: media.id, targetId: media.id,
targetType: media.mediaType, targetType: media.mediaType,
originalText, originalText,
language,
}); });
let timeline: TimelineEntry[] = []; let timeline: TimelineEntry[] = [];

1391
yarn.lock

File diff suppressed because it is too large Load Diff