Fix bugs (#153)
* escape space in command line * disable record button when no reference * notify when transcription working too long * fix release CI * fix UI & remove deprecated codes * clear zombie transcribe process when started * fix remove file when added a duplicated audio/video * update latest whisper for win32
This commit is contained in:
1
.github/workflows/release-enjoy-app.yml
vendored
1
.github/workflows/release-enjoy-app.yml
vendored
@@ -21,4 +21,5 @@ jobs:
|
||||
- if: matrix.os == 'macos-latest'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.PUBLISH_TOKEN }}
|
||||
PACKAGE_OS_ARCH: arm64
|
||||
run: yarn run publish:enjoy --arch=arm64
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -178,6 +178,7 @@
|
||||
"failedToLogin": "Failed to login",
|
||||
"invalidRedirectUrl": "Invalid redirect url",
|
||||
"transcribe": "Transcribe",
|
||||
"stillTranscribing": "AI is still working on the transcription. Please wait or switch to a smaller model to make it faster.",
|
||||
"unableToSetLibraryPath": "Unable to set library path to {{path}}",
|
||||
"nthStep": "{{current}}/{{totalSteps}} Step",
|
||||
"open": "Open",
|
||||
@@ -265,6 +266,7 @@
|
||||
"allResources": "all resources",
|
||||
"playbackRate": "playback rate",
|
||||
"transcription": "transcription",
|
||||
"transcript": "transcript",
|
||||
"regenerate": "regenerate",
|
||||
"holdAndSpeak": "Hold and speak",
|
||||
"releaseToStop": "Release to stop",
|
||||
|
||||
@@ -178,6 +178,7 @@
|
||||
"invalidRedirectUrl": "无效的重定向 URL",
|
||||
"delete": "删除",
|
||||
"transcribe": "语音转文本",
|
||||
"stillTranscribing": "语音转文本仍在进行中,请耐心等候。或者您可以切换到另一个更小的模型以加快速度。",
|
||||
"unableToSetLibraryPath": "无法设置资源库保存路径 {{path}}",
|
||||
"nthStep": "第 {{current}}/{{totalSteps}} 步",
|
||||
"open": "打开",
|
||||
@@ -265,6 +266,7 @@
|
||||
"allResources": "所有资源",
|
||||
"playbackRate": "播放速度",
|
||||
"transcription": "语音文本",
|
||||
"transcript": "字幕",
|
||||
"regenerate": "重新生成",
|
||||
"holdAndSpeak": "按住并说话",
|
||||
"releaseToStop": "松开停止",
|
||||
|
||||
@@ -80,12 +80,24 @@ class AudiosHandler {
|
||||
});
|
||||
}
|
||||
|
||||
audio.transcribe().catch((err) => {
|
||||
const timeout = setTimeout(() => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
type: "warning",
|
||||
message: t("stillTranscribing"),
|
||||
});
|
||||
}, 1000 * 10);
|
||||
|
||||
audio
|
||||
.transcribe()
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async create(
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { ipcMain, IpcMainEvent } from "electron";
|
||||
import { Transcription, Audio, Video } from "@main/db/models";
|
||||
import { WhereOptions, Attributes } from "sequelize";
|
||||
import { t } from "i18next";
|
||||
import log from "electron-log/main";
|
||||
|
||||
const logger = log.scope("db/handlers/transcriptions-handler");
|
||||
@@ -30,12 +31,24 @@ class TranscriptionsHandler {
|
||||
});
|
||||
|
||||
if (transcription.state === "pending") {
|
||||
transcription.process().catch((err) => {
|
||||
const timeout = setTimeout(() => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
type: "warning",
|
||||
message: t("stillTranscribing"),
|
||||
});
|
||||
}, 1000 * 10);
|
||||
|
||||
transcription
|
||||
.process()
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return transcription.toJSON();
|
||||
@@ -86,7 +99,24 @@ class TranscriptionsHandler {
|
||||
throw new Error("models.transcription.notFound");
|
||||
}
|
||||
|
||||
transcription.process({ force: true });
|
||||
const timeout = setTimeout(() => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "warning",
|
||||
message: t("stillTranscribing"),
|
||||
});
|
||||
}, 1000 * 10);
|
||||
|
||||
transcription
|
||||
.process({ force: true })
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
})
|
||||
.catch((err) => {
|
||||
logger.error(err);
|
||||
|
||||
@@ -80,12 +80,24 @@ class VideosHandler {
|
||||
});
|
||||
}
|
||||
|
||||
video.transcribe().catch((err) => {
|
||||
const timeout = setTimeout(() => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
type: "warning",
|
||||
message: t("stillTranscribing"),
|
||||
});
|
||||
}, 1000 * 10);
|
||||
|
||||
video
|
||||
.transcribe()
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
clearTimeout(timeout);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private async create(
|
||||
|
||||
@@ -65,19 +65,20 @@ db.connect = async () => {
|
||||
await sequelize.sync();
|
||||
await sequelize.authenticate();
|
||||
|
||||
// TODO:
|
||||
// clear the large waveform data in DB.
|
||||
// Remove this in next release
|
||||
const caches = await CacheObject.findAll({
|
||||
attributes: ["id", "key"],
|
||||
// kill the zombie transcribe processes
|
||||
Transcription.findAll({
|
||||
where: {
|
||||
state: "processing",
|
||||
},
|
||||
}).then((transcriptions) => {
|
||||
transcriptions.forEach((transcription) => {
|
||||
if (transcription.result) {
|
||||
transcription.update({ state: "finished" });
|
||||
} else {
|
||||
transcription.update({ state: "pending" });
|
||||
}
|
||||
});
|
||||
});
|
||||
const cacheIds: string[] = [];
|
||||
caches.forEach((cache) => {
|
||||
if (cache.key.startsWith("waveform")) {
|
||||
cacheIds.push(cache.id);
|
||||
}
|
||||
});
|
||||
await CacheObject.destroy({ where: { id: cacheIds } });
|
||||
|
||||
// vacuum the database
|
||||
await sequelize.query("VACUUM");
|
||||
|
||||
@@ -257,6 +257,16 @@ export class Audio extends Model<Audio> {
|
||||
|
||||
const md5 = await hashFile(filePath, { algo: "md5" });
|
||||
|
||||
// check if file already exists
|
||||
const existing = await Audio.findOne({
|
||||
where: {
|
||||
md5,
|
||||
},
|
||||
});
|
||||
if (existing) {
|
||||
throw new Error(t("audioAlreadyAddedToLibrary", { file: filePath }));
|
||||
}
|
||||
|
||||
// Generate ID
|
||||
const userId = settings.getSync("user.id");
|
||||
const id = uuidv5(`${userId}/${md5}`, uuidv5.URL);
|
||||
|
||||
@@ -279,6 +279,16 @@ export class Video extends Model<Video> {
|
||||
|
||||
const md5 = await hashFile(filePath, { algo: "md5" });
|
||||
|
||||
// check if file already exists
|
||||
const existing = await Video.findOne({
|
||||
where: {
|
||||
md5,
|
||||
},
|
||||
});
|
||||
if (existing) {
|
||||
throw new Error(t("videoAlreadyAddedToLibrary", { file: filePath }));
|
||||
}
|
||||
|
||||
// Generate ID
|
||||
const userId = settings.getSync("user.id");
|
||||
const id = uuidv5(`${userId}/${md5}`, uuidv5.URL);
|
||||
|
||||
@@ -73,11 +73,11 @@ class Whipser {
|
||||
);
|
||||
|
||||
const command = [
|
||||
this.binMain,
|
||||
`"${this.binMain}"`,
|
||||
`--file "${waveFile}"`,
|
||||
`--model ${settings.whisperModelPath()}`,
|
||||
`--model "${settings.whisperModelPath()}"`,
|
||||
"--output-json",
|
||||
`--output-file ${path.join(tmpDir, filename)}`,
|
||||
`--output-file "${path.join(tmpDir, filename)}"`,
|
||||
...extra,
|
||||
].join(" ");
|
||||
|
||||
|
||||
@@ -1,149 +0,0 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { cn } from "@renderer/lib/utils";
|
||||
import {
|
||||
Button,
|
||||
Popover,
|
||||
PopoverContent,
|
||||
PopoverAnchor,
|
||||
} from "@renderer/components/ui";
|
||||
import { LookupResult } from "@renderer/components";
|
||||
import { LanguagesIcon, PlayIcon } from "lucide-react";
|
||||
|
||||
export const AudioCaption = (props: {
|
||||
audioId: string;
|
||||
currentTime: number;
|
||||
transcription: TranscriptionGroupType;
|
||||
onSeek?: (time: number) => void;
|
||||
className?: string;
|
||||
isPlaying: boolean;
|
||||
setIsPlaying: (isPlaying: boolean) => void;
|
||||
}) => {
|
||||
const {
|
||||
transcription,
|
||||
currentTime,
|
||||
onSeek,
|
||||
className,
|
||||
isPlaying,
|
||||
setIsPlaying,
|
||||
} = props;
|
||||
const [activeIndex, setActiveIndex] = useState<number>(0);
|
||||
const [selected, setSelected] = useState<{
|
||||
index: number;
|
||||
word: string;
|
||||
position?: {
|
||||
top: number;
|
||||
left: number;
|
||||
};
|
||||
}>();
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcription) return;
|
||||
const time = Math.round(currentTime * 1000);
|
||||
const index = transcription.segments.findIndex(
|
||||
(w) => time >= w.offsets.from && time < w.offsets.to
|
||||
);
|
||||
|
||||
if (index !== activeIndex) {
|
||||
setActiveIndex(index);
|
||||
}
|
||||
}, [currentTime, transcription]);
|
||||
|
||||
if (!transcription) return null;
|
||||
if (Math.round(currentTime * 1000) < transcription.offsets.from) return null;
|
||||
|
||||
return (
|
||||
<div className={cn("relative px-4 py-2 text-lg", className)}>
|
||||
<div className="flex flex-wrap">
|
||||
{(transcription.segments || []).map((w, index) => (
|
||||
<span
|
||||
key={index}
|
||||
className={`mr-1 cursor-pointer hover:bg-red-500/10 ${
|
||||
index === activeIndex ? "text-red-500" : ""
|
||||
}`}
|
||||
onClick={(event) => {
|
||||
setSelected({
|
||||
index,
|
||||
word: w.text,
|
||||
position: {
|
||||
top:
|
||||
event.currentTarget.offsetTop +
|
||||
event.currentTarget.offsetHeight,
|
||||
left: event.currentTarget.offsetLeft,
|
||||
},
|
||||
});
|
||||
|
||||
setIsPlaying(false);
|
||||
if (onSeek) onSeek(w.offsets.from / 1000);
|
||||
}}
|
||||
>
|
||||
{w.text}
|
||||
</span>
|
||||
))}
|
||||
|
||||
<Popover
|
||||
open={Boolean(selected) && !isPlaying}
|
||||
onOpenChange={(value) => {
|
||||
if (!value) setSelected(null);
|
||||
}}
|
||||
>
|
||||
<PopoverAnchor
|
||||
className="absolute w-0 h-0"
|
||||
style={{
|
||||
top: selected?.position?.top,
|
||||
left: selected?.position?.left,
|
||||
}}
|
||||
></PopoverAnchor>
|
||||
<PopoverContent
|
||||
className="w-full max-w-md p-0"
|
||||
updatePositionStrategy="always"
|
||||
>
|
||||
{selected?.word && (
|
||||
<AudioCaptionSelectionMenu
|
||||
word={selected.word}
|
||||
context={transcription.segments.map((w) => w.text).join(" ").trim()}
|
||||
audioId={props.audioId}
|
||||
onPlay={() => {
|
||||
setIsPlaying(true);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const AudioCaptionSelectionMenu = (props: {
|
||||
word: string;
|
||||
context: string;
|
||||
audioId: string;
|
||||
onPlay: () => void;
|
||||
}) => {
|
||||
const { word, context, audioId, onPlay } = props;
|
||||
const [translating, setTranslating] = useState<boolean>(false);
|
||||
|
||||
if (!word) return null;
|
||||
|
||||
if (translating) {
|
||||
return (
|
||||
<LookupResult
|
||||
word={word}
|
||||
context={context}
|
||||
sourceId={audioId}
|
||||
sourceType={"Audio"}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex items-center p-1">
|
||||
<Button onClick={onPlay} variant="ghost" size="icon">
|
||||
<PlayIcon size={16} />
|
||||
</Button>
|
||||
<Button onClick={() => setTranslating(true)} variant="ghost" size="icon">
|
||||
<LanguagesIcon size={16} />
|
||||
</Button>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@@ -1,165 +0,0 @@
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogTrigger,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogContent,
|
||||
AlertDialogTitle,
|
||||
AlertDialogDescription,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
Skeleton,
|
||||
ScrollArea,
|
||||
Button,
|
||||
PingPoint,
|
||||
} from "@renderer/components/ui";
|
||||
import React, { useEffect, useContext } from "react";
|
||||
import { t } from "i18next";
|
||||
import { LoaderIcon, CheckCircleIcon, MicIcon } from "lucide-react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
|
||||
export const AudioTranscription = (props: {
|
||||
audio: AudioType | null;
|
||||
currentSegmentIndex?: number;
|
||||
onSelectSegment?: (index: number) => void;
|
||||
}) => {
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { audio, currentSegmentIndex, onSelectSegment } = props;
|
||||
const containerRef = React.createRef<HTMLDivElement>();
|
||||
|
||||
const [recordingStats, setRecordingStats] =
|
||||
React.useState<SegementRecordingStatsType>([]);
|
||||
|
||||
const regenerate = async () => {
|
||||
if (!audio) return;
|
||||
|
||||
EnjoyApp.audios.transcribe(audio.id);
|
||||
};
|
||||
|
||||
const fetchSegmentStats = async () => {
|
||||
if (!audio) return;
|
||||
|
||||
EnjoyApp.recordings.groupBySegment(audio.id).then((stats) => {
|
||||
setRecordingStats(stats);
|
||||
});
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
addDblistener(fetchSegmentStats);
|
||||
fetchSegmentStats();
|
||||
|
||||
return () => {
|
||||
removeDbListener(fetchSegmentStats);
|
||||
};
|
||||
}, [audio]);
|
||||
|
||||
useEffect(() => {
|
||||
containerRef.current
|
||||
?.querySelector(`#segment-${currentSegmentIndex}`)
|
||||
?.scrollIntoView({
|
||||
block: "center",
|
||||
inline: "center",
|
||||
} as ScrollIntoViewOptions);
|
||||
}, [currentSegmentIndex, audio?.transcription]);
|
||||
|
||||
if (!audio)
|
||||
return (
|
||||
<div className="p-4 w-full">
|
||||
<TranscriptionPlaceholder />
|
||||
</div>
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="w-full h-full flex flex-col">
|
||||
<div className="mb-4 flex items-cener justify-between">
|
||||
<div className="flex items-center space-x-2">
|
||||
{audio.transcribing ? (
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
) : audio.isTranscribed ? (
|
||||
<CheckCircleIcon className="text-green-500 w-4 h-4" />
|
||||
) : (
|
||||
<PingPoint colorClassName="bg-mute" />
|
||||
)}
|
||||
<span className="">{t("transcription")}</span>
|
||||
</div>
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
<Button disabled={audio.transcribing} className="capitalize">
|
||||
{audio.transcribing && (
|
||||
<LoaderIcon className="animate-spin w-4 mr-2" />
|
||||
)}
|
||||
{audio.isTranscribed ? t("regenerate") : t("transcribe")}
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("transcribeAudioConfirmation", {
|
||||
name: audio.name,
|
||||
})}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction
|
||||
className="bg-destructive"
|
||||
onClick={regenerate}
|
||||
>
|
||||
{t("transcribe")}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</div>
|
||||
|
||||
{audio.transcription ? (
|
||||
<ScrollArea ref={containerRef} className="flex-1">
|
||||
{audio.transcription.map((t, index) => (
|
||||
<div
|
||||
key={index}
|
||||
id={`segment-${index}`}
|
||||
className={`py-1 px-2 mb-2 cursor-pointer hover:bg-yellow-400/25 ${
|
||||
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
|
||||
}`}
|
||||
onClick={() => {
|
||||
onSelectSegment?.(index);
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-xs opacity-50">#{index + 1}</span>
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
{(recordingStats || []).findIndex(
|
||||
(s) => s.segmentIndex === index
|
||||
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
|
||||
<span className="text-xs opacity-50">
|
||||
{t.timestamps.from.split(",")[0]}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<p className="">{t.text}</p>
|
||||
</div>
|
||||
))}
|
||||
</ScrollArea>
|
||||
) : (
|
||||
<TranscriptionPlaceholder />
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const TranscriptionPlaceholder = () => {
|
||||
return (
|
||||
<div className="p-4">
|
||||
{Array.from({ length: 5 }).map((_, i) => (
|
||||
<Skeleton key={i} className="h-4 w-full mb-4" />
|
||||
))}
|
||||
<Skeleton className="h-4 w-3/5" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@@ -95,7 +95,7 @@ export const MediaTranscription = (props: {
|
||||
) : (
|
||||
<PingPoint colorClassName="bg-mute" />
|
||||
)}
|
||||
<span className="">{t("transcription")}</span>
|
||||
<span className="capitalize">{t("transcript")}</span>
|
||||
</div>
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
@@ -121,7 +121,6 @@ export const MediaTranscription = (props: {
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction
|
||||
className="bg-destructive"
|
||||
onClick={regenerate}
|
||||
>
|
||||
{t("transcribe")}
|
||||
@@ -132,7 +131,7 @@ export const MediaTranscription = (props: {
|
||||
</div>
|
||||
|
||||
{transcription?.result ? (
|
||||
<ScrollArea ref={containerRef} className="flex-1">
|
||||
<ScrollArea ref={containerRef} className="flex-1 px-2">
|
||||
{transcription.result.map((t, index) => (
|
||||
<div
|
||||
key={index}
|
||||
|
||||
@@ -170,7 +170,7 @@ export const RecordingsList = (props: {
|
||||
|
||||
<div className="z-50 bottom-16 left-1/2 w-0 h-0 absolute flex items-center justify-center">
|
||||
<RecordButton
|
||||
disabled={!referenceId == undefined}
|
||||
disabled={referenceId == undefined || !Boolean(referenceText)}
|
||||
onRecordEnd={createRecording}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -31,15 +31,21 @@ export default defineConfig({
|
||||
viteStaticCopy({
|
||||
targets: [
|
||||
{
|
||||
src: `lib/whisper.cpp/${os.arch()}/${os.platform()}/*`,
|
||||
src: `lib/whisper.cpp/${
|
||||
process.env.PACKAGE_OS_ARCH || os.arch()
|
||||
}/${os.platform()}/*`,
|
||||
dest: "lib/whisper",
|
||||
},
|
||||
{
|
||||
src: `lib/youtubedr/${os.arch()}/${os.platform()}/*`,
|
||||
src: `lib/youtubedr/${
|
||||
process.env.PACKAGE_OS_ARCH || os.arch()
|
||||
}/${os.platform()}/*`,
|
||||
dest: "lib/youtubedr",
|
||||
},
|
||||
{
|
||||
src: `lib/ffmpeg//${os.arch()}/${os.platform()}/*`,
|
||||
src: `lib/ffmpeg//${
|
||||
process.env.PACKAGE_OS_ARCH || os.arch()
|
||||
}/${os.platform()}/*`,
|
||||
dest: "lib/ffmpeg",
|
||||
},
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user