* escape space in command line

* disable record button when no reference

* notify when transcription working too long

* fix release CI

* fix UI & remove deprecated codes

* clear zombie transcribe process when started

* fix remove file when added a duplicated audio/video

* update latest whisper for win32
This commit is contained in:
an-lee
2024-01-18 18:21:55 +08:00
committed by GitHub
parent 8f4503db37
commit f04dd1e3c8
20 changed files with 120 additions and 349 deletions

View File

@@ -21,4 +21,5 @@ jobs:
- if: matrix.os == 'macos-latest'
env:
GITHUB_TOKEN: ${{ secrets.PUBLISH_TOKEN }}
PACKAGE_OS_ARCH: arm64
run: yarn run publish:enjoy --arch=arm64

View File

@@ -178,6 +178,7 @@
"failedToLogin": "Failed to login",
"invalidRedirectUrl": "Invalid redirect url",
"transcribe": "Transcribe",
"stillTranscribing": "AI is still working on the transcription. Please wait or switch to a smaller model to make it faster.",
"unableToSetLibraryPath": "Unable to set library path to {{path}}",
"nthStep": "{{current}}/{{totalSteps}} Step",
"open": "Open",
@@ -265,6 +266,7 @@
"allResources": "all resources",
"playbackRate": "playback rate",
"transcription": "transcription",
"transcript": "transcript",
"regenerate": "regenerate",
"holdAndSpeak": "Hold and speak",
"releaseToStop": "Release to stop",

View File

@@ -178,6 +178,7 @@
"invalidRedirectUrl": "无效的重定向 URL",
"delete": "删除",
"transcribe": "语音转文本",
"stillTranscribing": "语音转文本仍在进行中,请耐心等候。或者您可以切换到另一个更小的模型以加快速度。",
"unableToSetLibraryPath": "无法设置资源库保存路径 {{path}}",
"nthStep": "第 {{current}}/{{totalSteps}} 步",
"open": "打开",
@@ -265,6 +266,7 @@
"allResources": "所有资源",
"playbackRate": "播放速度",
"transcription": "语音文本",
"transcript": "字幕",
"regenerate": "重新生成",
"holdAndSpeak": "按住并说话",
"releaseToStop": "松开停止",

View File

@@ -80,12 +80,24 @@ class AudiosHandler {
});
}
audio.transcribe().catch((err) => {
const timeout = setTimeout(() => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
type: "warning",
message: t("stillTranscribing"),
});
}, 1000 * 10);
audio
.transcribe()
.catch((err) => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
});
})
.finally(() => {
clearTimeout(timeout);
});
});
}
private async create(

View File

@@ -1,6 +1,7 @@
import { ipcMain, IpcMainEvent } from "electron";
import { Transcription, Audio, Video } from "@main/db/models";
import { WhereOptions, Attributes } from "sequelize";
import { t } from "i18next";
import log from "electron-log/main";
const logger = log.scope("db/handlers/transcriptions-handler");
@@ -30,12 +31,24 @@ class TranscriptionsHandler {
});
if (transcription.state === "pending") {
transcription.process().catch((err) => {
const timeout = setTimeout(() => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
type: "warning",
message: t("stillTranscribing"),
});
}, 1000 * 10);
transcription
.process()
.catch((err) => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
});
})
.finally(() => {
clearTimeout(timeout);
});
});
}
return transcription.toJSON();
@@ -86,7 +99,24 @@ class TranscriptionsHandler {
throw new Error("models.transcription.notFound");
}
transcription.process({ force: true });
const timeout = setTimeout(() => {
event.sender.send("on-notification", {
type: "warning",
message: t("stillTranscribing"),
});
}, 1000 * 10);
transcription
.process({ force: true })
.catch((err) => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
});
})
.finally(() => {
clearTimeout(timeout);
});
})
.catch((err) => {
logger.error(err);

View File

@@ -80,12 +80,24 @@ class VideosHandler {
});
}
video.transcribe().catch((err) => {
const timeout = setTimeout(() => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
type: "warning",
message: t("stillTranscribing"),
});
}, 1000 * 10);
video
.transcribe()
.catch((err) => {
event.sender.send("on-notification", {
type: "error",
message: err.message,
});
})
.finally(() => {
clearTimeout(timeout);
});
});
}
private async create(

View File

@@ -65,19 +65,20 @@ db.connect = async () => {
await sequelize.sync();
await sequelize.authenticate();
// TODO:
// clear the large waveform data in DB.
// Remove this in next release
const caches = await CacheObject.findAll({
attributes: ["id", "key"],
// kill the zombie transcribe processes
Transcription.findAll({
where: {
state: "processing",
},
}).then((transcriptions) => {
transcriptions.forEach((transcription) => {
if (transcription.result) {
transcription.update({ state: "finished" });
} else {
transcription.update({ state: "pending" });
}
});
});
const cacheIds: string[] = [];
caches.forEach((cache) => {
if (cache.key.startsWith("waveform")) {
cacheIds.push(cache.id);
}
});
await CacheObject.destroy({ where: { id: cacheIds } });
// vacuum the database
await sequelize.query("VACUUM");

View File

@@ -257,6 +257,16 @@ export class Audio extends Model<Audio> {
const md5 = await hashFile(filePath, { algo: "md5" });
// check if file already exists
const existing = await Audio.findOne({
where: {
md5,
},
});
if (existing) {
throw new Error(t("audioAlreadyAddedToLibrary", { file: filePath }));
}
// Generate ID
const userId = settings.getSync("user.id");
const id = uuidv5(`${userId}/${md5}`, uuidv5.URL);

View File

@@ -279,6 +279,16 @@ export class Video extends Model<Video> {
const md5 = await hashFile(filePath, { algo: "md5" });
// check if file already exists
const existing = await Video.findOne({
where: {
md5,
},
});
if (existing) {
throw new Error(t("videoAlreadyAddedToLibrary", { file: filePath }));
}
// Generate ID
const userId = settings.getSync("user.id");
const id = uuidv5(`${userId}/${md5}`, uuidv5.URL);

View File

@@ -73,11 +73,11 @@ class Whipser {
);
const command = [
this.binMain,
`"${this.binMain}"`,
`--file "${waveFile}"`,
`--model ${settings.whisperModelPath()}`,
`--model "${settings.whisperModelPath()}"`,
"--output-json",
`--output-file ${path.join(tmpDir, filename)}`,
`--output-file "${path.join(tmpDir, filename)}"`,
...extra,
].join(" ");

View File

@@ -1,149 +0,0 @@
import { useState, useEffect } from "react";
import { cn } from "@renderer/lib/utils";
import {
Button,
Popover,
PopoverContent,
PopoverAnchor,
} from "@renderer/components/ui";
import { LookupResult } from "@renderer/components";
import { LanguagesIcon, PlayIcon } from "lucide-react";
export const AudioCaption = (props: {
audioId: string;
currentTime: number;
transcription: TranscriptionGroupType;
onSeek?: (time: number) => void;
className?: string;
isPlaying: boolean;
setIsPlaying: (isPlaying: boolean) => void;
}) => {
const {
transcription,
currentTime,
onSeek,
className,
isPlaying,
setIsPlaying,
} = props;
const [activeIndex, setActiveIndex] = useState<number>(0);
const [selected, setSelected] = useState<{
index: number;
word: string;
position?: {
top: number;
left: number;
};
}>();
useEffect(() => {
if (!transcription) return;
const time = Math.round(currentTime * 1000);
const index = transcription.segments.findIndex(
(w) => time >= w.offsets.from && time < w.offsets.to
);
if (index !== activeIndex) {
setActiveIndex(index);
}
}, [currentTime, transcription]);
if (!transcription) return null;
if (Math.round(currentTime * 1000) < transcription.offsets.from) return null;
return (
<div className={cn("relative px-4 py-2 text-lg", className)}>
<div className="flex flex-wrap">
{(transcription.segments || []).map((w, index) => (
<span
key={index}
className={`mr-1 cursor-pointer hover:bg-red-500/10 ${
index === activeIndex ? "text-red-500" : ""
}`}
onClick={(event) => {
setSelected({
index,
word: w.text,
position: {
top:
event.currentTarget.offsetTop +
event.currentTarget.offsetHeight,
left: event.currentTarget.offsetLeft,
},
});
setIsPlaying(false);
if (onSeek) onSeek(w.offsets.from / 1000);
}}
>
{w.text}
</span>
))}
<Popover
open={Boolean(selected) && !isPlaying}
onOpenChange={(value) => {
if (!value) setSelected(null);
}}
>
<PopoverAnchor
className="absolute w-0 h-0"
style={{
top: selected?.position?.top,
left: selected?.position?.left,
}}
></PopoverAnchor>
<PopoverContent
className="w-full max-w-md p-0"
updatePositionStrategy="always"
>
{selected?.word && (
<AudioCaptionSelectionMenu
word={selected.word}
context={transcription.segments.map((w) => w.text).join(" ").trim()}
audioId={props.audioId}
onPlay={() => {
setIsPlaying(true);
}}
/>
)}
</PopoverContent>
</Popover>
</div>
</div>
);
};
const AudioCaptionSelectionMenu = (props: {
word: string;
context: string;
audioId: string;
onPlay: () => void;
}) => {
const { word, context, audioId, onPlay } = props;
const [translating, setTranslating] = useState<boolean>(false);
if (!word) return null;
if (translating) {
return (
<LookupResult
word={word}
context={context}
sourceId={audioId}
sourceType={"Audio"}
/>
);
}
return (
<div className="flex items-center p-1">
<Button onClick={onPlay} variant="ghost" size="icon">
<PlayIcon size={16} />
</Button>
<Button onClick={() => setTranslating(true)} variant="ghost" size="icon">
<LanguagesIcon size={16} />
</Button>
</div>
);
};

View File

@@ -1,165 +0,0 @@
import {
AlertDialog,
AlertDialogTrigger,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogContent,
AlertDialogTitle,
AlertDialogDescription,
AlertDialogCancel,
AlertDialogAction,
Skeleton,
ScrollArea,
Button,
PingPoint,
} from "@renderer/components/ui";
import React, { useEffect, useContext } from "react";
import { t } from "i18next";
import { LoaderIcon, CheckCircleIcon, MicIcon } from "lucide-react";
import {
DbProviderContext,
AppSettingsProviderContext,
} from "@renderer/context";
export const AudioTranscription = (props: {
audio: AudioType | null;
currentSegmentIndex?: number;
onSelectSegment?: (index: number) => void;
}) => {
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { audio, currentSegmentIndex, onSelectSegment } = props;
const containerRef = React.createRef<HTMLDivElement>();
const [recordingStats, setRecordingStats] =
React.useState<SegementRecordingStatsType>([]);
const regenerate = async () => {
if (!audio) return;
EnjoyApp.audios.transcribe(audio.id);
};
const fetchSegmentStats = async () => {
if (!audio) return;
EnjoyApp.recordings.groupBySegment(audio.id).then((stats) => {
setRecordingStats(stats);
});
};
useEffect(() => {
addDblistener(fetchSegmentStats);
fetchSegmentStats();
return () => {
removeDbListener(fetchSegmentStats);
};
}, [audio]);
useEffect(() => {
containerRef.current
?.querySelector(`#segment-${currentSegmentIndex}`)
?.scrollIntoView({
block: "center",
inline: "center",
} as ScrollIntoViewOptions);
}, [currentSegmentIndex, audio?.transcription]);
if (!audio)
return (
<div className="p-4 w-full">
<TranscriptionPlaceholder />
</div>
);
return (
<div className="w-full h-full flex flex-col">
<div className="mb-4 flex items-cener justify-between">
<div className="flex items-center space-x-2">
{audio.transcribing ? (
<PingPoint colorClassName="bg-yellow-500" />
) : audio.isTranscribed ? (
<CheckCircleIcon className="text-green-500 w-4 h-4" />
) : (
<PingPoint colorClassName="bg-mute" />
)}
<span className="">{t("transcription")}</span>
</div>
<AlertDialog>
<AlertDialogTrigger asChild>
<Button disabled={audio.transcribing} className="capitalize">
{audio.transcribing && (
<LoaderIcon className="animate-spin w-4 mr-2" />
)}
{audio.isTranscribed ? t("regenerate") : t("transcribe")}
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
<AlertDialogDescription>
{t("transcribeAudioConfirmation", {
name: audio.name,
})}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction
className="bg-destructive"
onClick={regenerate}
>
{t("transcribe")}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
{audio.transcription ? (
<ScrollArea ref={containerRef} className="flex-1">
{audio.transcription.map((t, index) => (
<div
key={index}
id={`segment-${index}`}
className={`py-1 px-2 mb-2 cursor-pointer hover:bg-yellow-400/25 ${
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
}`}
onClick={() => {
onSelectSegment?.(index);
}}
>
<div className="flex items-center justify-between">
<span className="text-xs opacity-50">#{index + 1}</span>
<div className="flex items-center space-x-2">
{(recordingStats || []).findIndex(
(s) => s.segmentIndex === index
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
<span className="text-xs opacity-50">
{t.timestamps.from.split(",")[0]}
</span>
</div>
</div>
<p className="">{t.text}</p>
</div>
))}
</ScrollArea>
) : (
<TranscriptionPlaceholder />
)}
</div>
);
};
export const TranscriptionPlaceholder = () => {
return (
<div className="p-4">
{Array.from({ length: 5 }).map((_, i) => (
<Skeleton key={i} className="h-4 w-full mb-4" />
))}
<Skeleton className="h-4 w-3/5" />
</div>
);
};

View File

@@ -95,7 +95,7 @@ export const MediaTranscription = (props: {
) : (
<PingPoint colorClassName="bg-mute" />
)}
<span className="">{t("transcription")}</span>
<span className="capitalize">{t("transcript")}</span>
</div>
<AlertDialog>
<AlertDialogTrigger asChild>
@@ -121,7 +121,6 @@ export const MediaTranscription = (props: {
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction
className="bg-destructive"
onClick={regenerate}
>
{t("transcribe")}
@@ -132,7 +131,7 @@ export const MediaTranscription = (props: {
</div>
{transcription?.result ? (
<ScrollArea ref={containerRef} className="flex-1">
<ScrollArea ref={containerRef} className="flex-1 px-2">
{transcription.result.map((t, index) => (
<div
key={index}

View File

@@ -170,7 +170,7 @@ export const RecordingsList = (props: {
<div className="z-50 bottom-16 left-1/2 w-0 h-0 absolute flex items-center justify-center">
<RecordButton
disabled={!referenceId == undefined}
disabled={referenceId == undefined || !Boolean(referenceText)}
onRecordEnd={createRecording}
/>
</div>

View File

@@ -31,15 +31,21 @@ export default defineConfig({
viteStaticCopy({
targets: [
{
src: `lib/whisper.cpp/${os.arch()}/${os.platform()}/*`,
src: `lib/whisper.cpp/${
process.env.PACKAGE_OS_ARCH || os.arch()
}/${os.platform()}/*`,
dest: "lib/whisper",
},
{
src: `lib/youtubedr/${os.arch()}/${os.platform()}/*`,
src: `lib/youtubedr/${
process.env.PACKAGE_OS_ARCH || os.arch()
}/${os.platform()}/*`,
dest: "lib/youtubedr",
},
{
src: `lib/ffmpeg//${os.arch()}/${os.platform()}/*`,
src: `lib/ffmpeg//${
process.env.PACKAGE_OS_ARCH || os.arch()
}/${os.platform()}/*`,
dest: "lib/ffmpeg",
},
{