Feat: customize settings before transcribing (#699)
* transcribe with language * avoid using .en model to transcribe un-English audio * save lanuage in transcription/audio/video * may select language when regenerate transcription * may select service when re-generate * refactor transcription form * refactor transcription create form * refactor media loading modal * display ipa per language * refactor ipa mappings * parse subtitle files
This commit is contained in:
@@ -260,7 +260,9 @@ export class Client {
|
||||
return this.api.post("/api/transcriptions", decamelizeKeys(transcription));
|
||||
}
|
||||
|
||||
syncSegment(segment: Partial<Omit<SegmentType, "audio" | "video">>) {
|
||||
syncSegment(
|
||||
segment: Partial<Omit<SegmentType, "audio" | "video" | "target">>
|
||||
) {
|
||||
return this.api.post("/api/segments", decamelizeKeys(segment));
|
||||
}
|
||||
|
||||
|
||||
@@ -604,5 +604,8 @@
|
||||
"referenceText": "Reference text",
|
||||
"inputReferenceTextOrLeaveItBlank": "Input the reference text or leave it blank",
|
||||
"assessing": "Assessing",
|
||||
"assessedSuccessfully": "Assessed successfully"
|
||||
"assessedSuccessfully": "Assessed successfully",
|
||||
"optinal": "Optional",
|
||||
"uploadTranscriptFile": "Upload transcript file(.txt/.srt/.vtt)",
|
||||
"onlyTextFileIsSupported": "Only text file is supported"
|
||||
}
|
||||
|
||||
@@ -604,5 +604,8 @@
|
||||
"referenceText": "参考文本",
|
||||
"inputReferenceTextOrLeaveItBlank": "输入参考文本,或者留空",
|
||||
"assessing": "正在评估",
|
||||
"assessedSuccessfully": "评估成功"
|
||||
"assessedSuccessfully": "评估成功",
|
||||
"optinal": "可选",
|
||||
"uploadTranscriptFile": "上传字幕文件(.txt/.srt/.vtt)",
|
||||
"onlyTextFileIsSupported": "仅支持文本文件"
|
||||
}
|
||||
|
||||
@@ -11,10 +11,10 @@ const logger = log.scope("db/handlers/audios-handler");
|
||||
|
||||
class AudiosHandler {
|
||||
private async findAll(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
options: FindOptions<Attributes<Audio>>
|
||||
) {
|
||||
return Audio.findAll({
|
||||
const audios = await Audio.findAll({
|
||||
order: [["updatedAt", "DESC"]],
|
||||
include: [
|
||||
{
|
||||
@@ -25,46 +25,30 @@ class AudiosHandler {
|
||||
},
|
||||
],
|
||||
...options,
|
||||
})
|
||||
.then((audios) => {
|
||||
if (!audios) {
|
||||
return [];
|
||||
}
|
||||
return audios.map((audio) => audio.toJSON());
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
if (!audios) {
|
||||
return [];
|
||||
}
|
||||
return audios.map((audio) => audio.toJSON());
|
||||
}
|
||||
|
||||
private async findOne(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
where: WhereOptions<Attributes<Audio>>
|
||||
) {
|
||||
return Audio.findOne({
|
||||
const audio = await Audio.findOne({
|
||||
where: {
|
||||
...where,
|
||||
},
|
||||
})
|
||||
.then((audio) => {
|
||||
if (!audio) return;
|
||||
});
|
||||
if (!audio) return;
|
||||
|
||||
if (!audio.isSynced) {
|
||||
audio.sync().catch(() => {});
|
||||
}
|
||||
if (!audio.isSynced) {
|
||||
audio.sync().catch(() => {});
|
||||
}
|
||||
|
||||
return audio.toJSON();
|
||||
})
|
||||
.catch((err) => {
|
||||
logger.error(err);
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
return audio.toJSON();
|
||||
}
|
||||
|
||||
private async create(
|
||||
@@ -79,22 +63,15 @@ class AudiosHandler {
|
||||
let file = uri;
|
||||
let source;
|
||||
if (uri.startsWith("http")) {
|
||||
try {
|
||||
if (youtubedr.validateYtURL(uri)) {
|
||||
file = await youtubedr.autoDownload(uri);
|
||||
} else {
|
||||
file = await downloader.download(uri, {
|
||||
webContents: event.sender,
|
||||
});
|
||||
}
|
||||
if (!file) throw new Error("Failed to download file");
|
||||
source = uri;
|
||||
} catch (err) {
|
||||
return event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.audio.failedToDownloadFile", { file: uri }),
|
||||
if (youtubedr.validateYtURL(uri)) {
|
||||
file = await youtubedr.autoDownload(uri);
|
||||
} else {
|
||||
file = await downloader.download(uri, {
|
||||
webContents: event.sender,
|
||||
});
|
||||
}
|
||||
if (!file) throw new Error("Failed to download file");
|
||||
source = uri;
|
||||
}
|
||||
|
||||
try {
|
||||
@@ -119,73 +96,42 @@ class AudiosHandler {
|
||||
|
||||
return audio.toJSON();
|
||||
} catch (err) {
|
||||
return event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.audio.failedToAdd", { error: err.message }),
|
||||
});
|
||||
logger.error(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
private async update(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
id: string,
|
||||
params: Attributes<Audio>
|
||||
) {
|
||||
const { name, description, metadata } = params;
|
||||
const { name, description, metadata, language } = params;
|
||||
|
||||
return Audio.findOne({
|
||||
where: { id },
|
||||
})
|
||||
.then((audio) => {
|
||||
if (!audio) {
|
||||
throw new Error(t("models.audio.notFound"));
|
||||
}
|
||||
audio.update({ name, description, metadata });
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
const audio = await Audio.findByPk(id);
|
||||
|
||||
if (!audio) {
|
||||
throw new Error(t("models.audio.notFound"));
|
||||
}
|
||||
return await audio.update({ name, description, metadata, language });
|
||||
}
|
||||
|
||||
private async destroy(event: IpcMainEvent, id: string) {
|
||||
return Audio.findOne({
|
||||
where: { id },
|
||||
}).then((audio) => {
|
||||
if (!audio) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.audio.notFound"),
|
||||
});
|
||||
}
|
||||
audio.destroy();
|
||||
});
|
||||
private async destroy(_event: IpcMainEvent, id: string) {
|
||||
const audio = await Audio.findByPk(id);
|
||||
|
||||
if (!audio) {
|
||||
throw new Error(t("models.audio.notFound"));
|
||||
}
|
||||
return await audio.destroy();
|
||||
}
|
||||
|
||||
private async upload(event: IpcMainEvent, id: string) {
|
||||
const audio = await Audio.findOne({
|
||||
where: { id },
|
||||
});
|
||||
const audio = await Audio.findByPk(id);
|
||||
if (!audio) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.audio.notFound"),
|
||||
});
|
||||
throw new Error(t("models.audio.notFound"));
|
||||
}
|
||||
|
||||
audio
|
||||
.upload()
|
||||
.then((res) => {
|
||||
return res;
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
return await audio.upload();
|
||||
}
|
||||
|
||||
private async crop(
|
||||
@@ -193,9 +139,7 @@ class AudiosHandler {
|
||||
id: string,
|
||||
params: { startTime: number; endTime: number }
|
||||
) {
|
||||
const audio = await Audio.findOne({
|
||||
where: { id },
|
||||
});
|
||||
const audio = await Audio.findByPk(id);
|
||||
if (!audio) {
|
||||
throw new Error(t("models.audio.notFound"));
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ import log from "@main/logger";
|
||||
|
||||
const logger = log.scope("db/handlers/transcriptions-handler");
|
||||
class TranscriptionsHandler {
|
||||
private async findOrCreate(event: IpcMainEvent, where: Transcription) {
|
||||
private async findOrCreate(_event: IpcMainEvent, where: Transcription) {
|
||||
try {
|
||||
const { targetType, targetId } = where;
|
||||
let target: Video | Audio = null;
|
||||
@@ -31,10 +31,8 @@ class TranscriptionsHandler {
|
||||
|
||||
return transcription.toJSON();
|
||||
} catch (err) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
logger.error(err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,24 +41,19 @@ class TranscriptionsHandler {
|
||||
id: string,
|
||||
params: Attributes<Transcription>
|
||||
) {
|
||||
const { result, engine, model, state } = params;
|
||||
const { result, engine, model, state, language } = params;
|
||||
|
||||
return Transcription.findOne({
|
||||
where: { id },
|
||||
})
|
||||
.then((transcription) => {
|
||||
if (!transcription) {
|
||||
throw new Error("models.transcription.notFound");
|
||||
}
|
||||
transcription.update({ result, engine, model, state });
|
||||
})
|
||||
.catch((err) => {
|
||||
logger.error(err);
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
const transcription = await Transcription.findByPk(id);
|
||||
if (!transcription) {
|
||||
throw new Error("models.transcription.notFound");
|
||||
}
|
||||
return await transcription.update({
|
||||
result,
|
||||
engine,
|
||||
model,
|
||||
state,
|
||||
language,
|
||||
});
|
||||
}
|
||||
|
||||
register() {
|
||||
|
||||
@@ -11,10 +11,10 @@ const logger = log.scope("db/handlers/videos-handler");
|
||||
|
||||
class VideosHandler {
|
||||
private async findAll(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
options: FindOptions<Attributes<Video>>
|
||||
) {
|
||||
return Video.findAll({
|
||||
const videos = await Video.findAll({
|
||||
order: [["updatedAt", "DESC"]],
|
||||
include: [
|
||||
{
|
||||
@@ -25,46 +25,29 @@ class VideosHandler {
|
||||
},
|
||||
],
|
||||
...options,
|
||||
})
|
||||
.then((videos) => {
|
||||
if (!videos) {
|
||||
return [];
|
||||
}
|
||||
return videos.map((video) => video.toJSON());
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
});
|
||||
if (!videos) {
|
||||
return [];
|
||||
}
|
||||
return videos.map((video) => video.toJSON());
|
||||
}
|
||||
|
||||
private async findOne(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
where: WhereOptions<Attributes<Video>>
|
||||
) {
|
||||
return Video.findOne({
|
||||
const video = await Video.findOne({
|
||||
where: {
|
||||
...where,
|
||||
},
|
||||
})
|
||||
.then((video) => {
|
||||
if (!video) return;
|
||||
});
|
||||
if (!video) return;
|
||||
|
||||
if (!video.isSynced) {
|
||||
video.sync().catch(() => {});
|
||||
}
|
||||
if (!video.isSynced) {
|
||||
video.sync().catch(() => {});
|
||||
}
|
||||
|
||||
return video.toJSON();
|
||||
})
|
||||
.catch((err) => {
|
||||
logger.error(err);
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
return video.toJSON();
|
||||
}
|
||||
|
||||
private async create(
|
||||
@@ -90,10 +73,8 @@ class VideosHandler {
|
||||
if (!file) throw new Error("Failed to download file");
|
||||
source = uri;
|
||||
} catch (err) {
|
||||
return event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.video.failedToDownloadFile", { file: uri }),
|
||||
});
|
||||
logger.error(err);
|
||||
throw new Error(t("models.video.failedToDownloadFile", { file: uri }));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,72 +86,46 @@ class VideosHandler {
|
||||
return video.toJSON();
|
||||
})
|
||||
.catch((err) => {
|
||||
return event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.video.failedToAdd", { error: err.message }),
|
||||
});
|
||||
logger.error(err);
|
||||
throw new Error(t("models.video.failedToAdd", { error: err.message }));
|
||||
});
|
||||
}
|
||||
|
||||
private async update(
|
||||
event: IpcMainEvent,
|
||||
_event: IpcMainEvent,
|
||||
id: string,
|
||||
params: Attributes<Video>
|
||||
) {
|
||||
const { name, description, metadata } = params;
|
||||
const { name, description, metadata, language } = params;
|
||||
|
||||
return Video.findOne({
|
||||
where: { id },
|
||||
})
|
||||
.then((video) => {
|
||||
if (!video) {
|
||||
throw new Error(t("models.video.notFound"));
|
||||
}
|
||||
video.update({ name, description, metadata });
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
});
|
||||
const video = await Video.findByPk(id);
|
||||
if (!video) {
|
||||
throw new Error(t("models.video.notFound"));
|
||||
}
|
||||
video.update({ name, description, metadata, language });
|
||||
}
|
||||
|
||||
private async destroy(event: IpcMainEvent, id: string) {
|
||||
return Video.findOne({
|
||||
where: { id },
|
||||
}).then((video) => {
|
||||
if (!video) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.video.notFound"),
|
||||
});
|
||||
}
|
||||
video.destroy();
|
||||
});
|
||||
const video = await Video.findByPk(id);
|
||||
if (!video) {
|
||||
throw new Error(t("models.video.notFound"));
|
||||
}
|
||||
return await video.destroy();
|
||||
}
|
||||
|
||||
private async upload(event: IpcMainEvent, id: string) {
|
||||
const video = await Video.findOne({
|
||||
where: { id },
|
||||
});
|
||||
const video = await Video.findByPk(id);
|
||||
if (!video) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: t("models.video.notFound"),
|
||||
});
|
||||
throw new Error(t("models.video.notFound"));
|
||||
}
|
||||
|
||||
video
|
||||
.upload()
|
||||
.then((res) => {
|
||||
return res;
|
||||
})
|
||||
.catch((err) => {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
logger.error(err);
|
||||
throw err;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -70,6 +70,9 @@ export class Segment extends Model<Segment> {
|
||||
@Column(DataType.DATE)
|
||||
uploadedAt: Date;
|
||||
|
||||
@Column(DataType.VIRTUAL)
|
||||
target: Audio | Video;
|
||||
|
||||
@BelongsTo(() => Audio, { foreignKey: "targetId", constraints: false })
|
||||
audio: Audio;
|
||||
|
||||
@@ -208,6 +211,22 @@ export class Segment extends Model<Segment> {
|
||||
logger.error("sync error", err);
|
||||
});
|
||||
});
|
||||
|
||||
if (!Array.isArray(segments)) segments = [segments];
|
||||
|
||||
for (const instance of segments) {
|
||||
if (instance.targetType === "Audio" && instance.audio) {
|
||||
instance.target = instance.audio.toJSON();
|
||||
}
|
||||
if (instance.targetType === "Video" && instance.video) {
|
||||
instance.target = instance.video.toJSON();
|
||||
}
|
||||
// To prevent mistakes:
|
||||
delete instance.audio;
|
||||
delete instance.dataValues.audio;
|
||||
delete instance.video;
|
||||
delete instance.dataValues.video;
|
||||
}
|
||||
}
|
||||
|
||||
@AfterCreate
|
||||
|
||||
@@ -147,6 +147,7 @@ class Whipser {
|
||||
};
|
||||
},
|
||||
options?: {
|
||||
language?: string;
|
||||
force?: boolean;
|
||||
extra?: string[];
|
||||
onProgress?: (progress: number) => void;
|
||||
@@ -174,9 +175,13 @@ class Whipser {
|
||||
throw new Error("No file or blob provided");
|
||||
}
|
||||
|
||||
const model = this.currentModel();
|
||||
const { force = false, extra = [], language, onProgress } = options || {};
|
||||
|
||||
const model = this.currentModel();
|
||||
if (language && !language.startsWith("en") && model.name.includes("en")) {
|
||||
throw new Error(`Model ${model.name} does not support ${language}`);
|
||||
}
|
||||
|
||||
const { force = false, extra = [], onProgress } = options || {};
|
||||
const filename = path.basename(file, path.extname(file));
|
||||
const tmpDir = settings.cachePath();
|
||||
const outputFile = path.join(tmpDir, filename + ".json");
|
||||
@@ -197,7 +202,7 @@ class Whipser {
|
||||
path.join(tmpDir, filename),
|
||||
"--print-progress",
|
||||
"--language",
|
||||
model.name.includes("en") ? "en" : "auto",
|
||||
model.name.includes("en") ? "en" : language?.split("-")?.[0] || "auto",
|
||||
...extra,
|
||||
];
|
||||
|
||||
@@ -252,7 +257,7 @@ class Whipser {
|
||||
return this.config;
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-set-model", async (event, model) => {
|
||||
ipcMain.handle("whisper-set-model", async (_event, model) => {
|
||||
const originalModel = settings.getSync("whisper.model");
|
||||
settings.setSync("whisper.model", model);
|
||||
this.config = settings.whisperConfig();
|
||||
@@ -267,35 +272,22 @@ class Whipser {
|
||||
})
|
||||
.catch((err) => {
|
||||
settings.setSync("whisper.model", originalModel);
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
throw err;
|
||||
});
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-set-service", async (event, service) => {
|
||||
ipcMain.handle("whisper-set-service", async (_event, service) => {
|
||||
if (service === "local") {
|
||||
try {
|
||||
await this.check();
|
||||
settings.setSync("whisper.service", service);
|
||||
this.config.service = service;
|
||||
return this.config;
|
||||
} catch (err) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
}
|
||||
await this.check();
|
||||
settings.setSync("whisper.service", service);
|
||||
this.config.service = service;
|
||||
return this.config;
|
||||
} else if (["cloudflare", "azure", "openai"].includes(service)) {
|
||||
settings.setSync("whisper.service", service);
|
||||
this.config.service = service;
|
||||
return this.config;
|
||||
} else {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: "Unknown service",
|
||||
});
|
||||
throw new Error("Unknown service");
|
||||
}
|
||||
});
|
||||
|
||||
@@ -304,19 +296,12 @@ class Whipser {
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-transcribe", async (event, params, options) => {
|
||||
try {
|
||||
return await this.transcribe(params, {
|
||||
...options,
|
||||
onProgress: (progress) => {
|
||||
event.sender.send("whisper-on-progress", progress);
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
}
|
||||
return await this.transcribe(params, {
|
||||
...options,
|
||||
onProgress: (progress) => {
|
||||
event.sender.send("whisper-on-progress", progress);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-abort", async (_event) => {
|
||||
|
||||
@@ -454,6 +454,7 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
|
||||
};
|
||||
},
|
||||
options?: {
|
||||
language?: string;
|
||||
force?: boolean;
|
||||
extra?: string[];
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Link } from "react-router-dom";
|
||||
import { cn } from "@renderer/lib/utils";
|
||||
import { AudioLinesIcon } from "lucide-react";
|
||||
import { Badge } from "@renderer/components/ui";
|
||||
|
||||
export const AudioCard = (props: {
|
||||
audio: Partial<AudioType>;
|
||||
@@ -12,9 +13,9 @@ export const AudioCard = (props: {
|
||||
<div className={cn("w-full", className)}>
|
||||
<Link to={`/audios/${audio.id}`}>
|
||||
<div
|
||||
className="aspect-square border rounded-lg overflow-hidden flex"
|
||||
className="aspect-square border rounded-lg overflow-hidden flex relative"
|
||||
style={{
|
||||
borderBottomColor: `#${audio.md5.substr(0, 6)}`,
|
||||
borderBottomColor: `#${audio.md5.slice(0, 6)}`,
|
||||
borderBottomWidth: 3,
|
||||
}}
|
||||
>
|
||||
@@ -25,7 +26,11 @@ export const AudioCard = (props: {
|
||||
className="hover:scale-105 object-cover w-full h-full"
|
||||
/>
|
||||
) : (
|
||||
<AudioLinesIcon className="hover:scale-105 object-cover w-1/2 h-1/2 m-auto" />
|
||||
<AudioLinesIcon className="hover:scale-105 object-cover w-1/2 h-1/2 m-auto" />
|
||||
)}
|
||||
|
||||
{audio.language && (
|
||||
<Badge className="absolute left-2 top-2">{audio.language}</Badge>
|
||||
)}
|
||||
</div>
|
||||
</Link>
|
||||
|
||||
@@ -10,6 +10,7 @@ export * from "./preferences";
|
||||
export * from "./pronunciation-assessments";
|
||||
export * from "./recordings";
|
||||
export * from "./stories";
|
||||
export * from "./transcriptions";
|
||||
export * from "./users";
|
||||
export * from "./videos";
|
||||
export * from "./widgets";
|
||||
|
||||
@@ -6,7 +6,6 @@ export * from "./media-current-recording";
|
||||
export * from "./media-recorder";
|
||||
export * from "./media-transcription";
|
||||
export * from "./media-transcription-read-button";
|
||||
export * from "./media-transcription-form";
|
||||
export * from "./media-transcription-generate-button";
|
||||
export * from "./media-player";
|
||||
export * from "./media-provider";
|
||||
|
||||
@@ -37,9 +37,10 @@ export const MediaCaption = () => {
|
||||
editingRegion,
|
||||
setEditingRegion,
|
||||
setTranscriptionDraft,
|
||||
ipaMappings,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const { EnjoyApp, learningLanguage } = useContext(AppSettingsProviderContext);
|
||||
const { EnjoyApp, learningLanguage, ipaMappings } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
const [activeIndex, setActiveIndex] = useState<number>(0);
|
||||
const [selectedIndices, setSelectedIndices] = useState<number[]>([]);
|
||||
const [multiSelecting, setMultiSelecting] = useState<boolean>(false);
|
||||
@@ -366,6 +367,7 @@ export const MediaCaption = () => {
|
||||
>
|
||||
<Caption
|
||||
caption={caption}
|
||||
language={transcription.language}
|
||||
selectedIndices={selectedIndices}
|
||||
currentSegmentIndex={currentSegmentIndex}
|
||||
activeIndex={activeIndex}
|
||||
@@ -428,7 +430,9 @@ export const MediaCaption = () => {
|
||||
t.timeline.map((s) => s.text).join("")
|
||||
);
|
||||
return `${word.text}(${
|
||||
learningLanguage.startsWith("en")
|
||||
(transcription.language || learningLanguage).startsWith(
|
||||
"en"
|
||||
)
|
||||
? convertWordIpaToNormal(ipas, {
|
||||
mappings: ipaMappings,
|
||||
}).join("")
|
||||
@@ -475,6 +479,7 @@ export const MediaCaption = () => {
|
||||
|
||||
export const Caption = (props: {
|
||||
caption: TimelineEntry;
|
||||
language?: string;
|
||||
selectedIndices?: number[];
|
||||
currentSegmentIndex: number;
|
||||
activeIndex?: number;
|
||||
@@ -482,6 +487,11 @@ export const Caption = (props: {
|
||||
displayNotes?: boolean;
|
||||
onClick?: (index: number) => void;
|
||||
}) => {
|
||||
const { currentNotes } = useContext(MediaPlayerProviderContext);
|
||||
const { learningLanguage, ipaMappings } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
const notes = currentNotes.filter((note) => note.parameters?.quoteIndices);
|
||||
const {
|
||||
caption,
|
||||
selectedIndices = [],
|
||||
@@ -491,16 +501,14 @@ export const Caption = (props: {
|
||||
displayNotes,
|
||||
onClick,
|
||||
} = props;
|
||||
const language = props.language || learningLanguage;
|
||||
|
||||
const { currentNotes, ipaMappings } = useContext(MediaPlayerProviderContext);
|
||||
const { learningLanguage } = useContext(AppSettingsProviderContext);
|
||||
const notes = currentNotes.filter((note) => note.parameters?.quoteIndices);
|
||||
const [notedquoteIndices, setNotedquoteIndices] = useState<number[]>([]);
|
||||
|
||||
let words = caption.text.split(" ");
|
||||
const ipas = caption.timeline.map((w) =>
|
||||
w.timeline.map((t) =>
|
||||
learningLanguage.startsWith("en")
|
||||
language.startsWith("en")
|
||||
? convertWordIpaToNormal(
|
||||
t.timeline.map((s) => s.text),
|
||||
{ mappings: ipaMappings }
|
||||
|
||||
@@ -40,8 +40,8 @@ const SelectedWords = (props: {
|
||||
}) => {
|
||||
const { selectedIndices, caption } = props;
|
||||
|
||||
const { transcription, ipaMappings } = useContext(MediaPlayerProviderContext);
|
||||
const { learningLanguage } = useContext(AppSettingsProviderContext);
|
||||
const { transcription } = useContext(MediaPlayerProviderContext);
|
||||
const { learningLanguage, ipaMappings } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const word = selectedIndices
|
||||
.map((index) => caption.timeline[index]?.text || "")
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
import { CheckCircleIcon, LoaderIcon, XCircleIcon } from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import { TranscriptionCreateForm } from "../transcriptions";
|
||||
|
||||
export const MediaLoadingModal = () => {
|
||||
const navigate = useNavigate();
|
||||
@@ -35,7 +36,7 @@ export const MediaLoadingModal = () => {
|
||||
return (
|
||||
<AlertDialog open={!decoded || !Boolean(transcription?.result?.timeline)}>
|
||||
<AlertDialogOverlay className="" />
|
||||
<AlertDialogContent className="z-[100]">
|
||||
<AlertDialogContent className="">
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
@@ -43,81 +44,54 @@ export const MediaLoadingModal = () => {
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
|
||||
<div className="py-4">
|
||||
{decoded ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("waveformIsDecoded")}</span>
|
||||
</div>
|
||||
) : decodeError ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<div className="w-4 h-4">
|
||||
<XCircleIcon className="w-4 h-4 text-destructive" />
|
||||
</div>
|
||||
<div className="select-text">
|
||||
<div className="mb-2">{decodeError}</div>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
{t("failedToDecodeWaveform")}:{" "}
|
||||
<span className="break-all ">{media?.src}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("decodingWaveform")}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!transcription ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("loadingTranscription")}</span>
|
||||
</div>
|
||||
) : transcription.result?.timeline ? (
|
||||
{decoded ? (
|
||||
transcription?.result?.timeline ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("transcribedSuccessfully")}</span>
|
||||
</div>
|
||||
) : transcribing ? (
|
||||
<div className="">
|
||||
<div className="flex items-center space-x-4 mb-2">
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<span>{t("transcribing")}</span>
|
||||
</div>
|
||||
{whisperConfig.service === "local" && (
|
||||
<Progress value={transcribingProgress} />
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center space-x-4">
|
||||
<PingPoint colorClassName="bg-muted" />
|
||||
<div className="inline">
|
||||
<span>{t("notTranscribedYet")}</span>
|
||||
{decoded && (
|
||||
<Button asChild className="ml-4" size="sm">
|
||||
<a
|
||||
className="cursor-pointer"
|
||||
onClick={() =>
|
||||
generateTranscription({
|
||||
originalText: "",
|
||||
})
|
||||
}
|
||||
>
|
||||
{t("regenerate")}
|
||||
</a>
|
||||
</Button>
|
||||
)}
|
||||
<TranscriptionCreateForm
|
||||
onSubmit={(data) => {
|
||||
generateTranscription({
|
||||
originalText: data.text,
|
||||
language: data.language,
|
||||
service: data.service as WhisperConfigType["service"],
|
||||
});
|
||||
}}
|
||||
onCancel={() => navigate(-1)}
|
||||
transcribing={transcribing}
|
||||
transcribingProgress={transcribingProgress}
|
||||
/>
|
||||
)
|
||||
) : (
|
||||
<>
|
||||
{decodeError ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<div className="w-4 h-4">
|
||||
<XCircleIcon className="w-4 h-4 text-destructive" />
|
||||
</div>
|
||||
<div className="select-text">
|
||||
<div className="mb-2">{decodeError}</div>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
{t("failedToDecodeWaveform")}:{" "}
|
||||
<span className="break-all ">{media?.src}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<AlertDialogFooter>
|
||||
<Button variant="secondary" onClick={() => navigate(-1)}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
) : (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("decodingWaveform")}</span>
|
||||
</div>
|
||||
)}
|
||||
<AlertDialogFooter>
|
||||
<Button variant="secondary" onClick={() => navigate(-1)}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</>
|
||||
)}
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
);
|
||||
|
||||
@@ -1,127 +0,0 @@
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogAction,
|
||||
AlertDialogCancel,
|
||||
AlertDialogContent,
|
||||
AlertDialogDescription,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
AlertDialogTrigger,
|
||||
Button,
|
||||
Dialog,
|
||||
DialogClose,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
DialogTrigger,
|
||||
Textarea,
|
||||
toast,
|
||||
} from "@renderer/components/ui";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline";
|
||||
import { t } from "i18next";
|
||||
import { useContext, useState } from "react";
|
||||
import { LoaderIcon } from "lucide-react";
|
||||
|
||||
export const MediaTranscriptionForm = (props: {
|
||||
children?: React.ReactNode;
|
||||
}) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={setOpen}>
|
||||
<DialogTrigger asChild>
|
||||
{props.children ? (
|
||||
props.children
|
||||
) : (
|
||||
<Button variant="outline" size="sm">
|
||||
<span className="capitalize">{t("edit")}</span>
|
||||
</Button>
|
||||
)}
|
||||
</DialogTrigger>
|
||||
<DialogContent className="max-w-screen-sm xl:max-w-screen-md">
|
||||
<TranscriptionForm setOpen={setOpen} />
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
|
||||
export const TranscriptionForm = (props: {
|
||||
setOpen: (value: boolean) => void;
|
||||
}) => {
|
||||
const { setOpen } = props;
|
||||
const [submiting, setSubmiting] = useState(false);
|
||||
const { transcription, generateTranscription } = useContext(
|
||||
MediaPlayerProviderContext
|
||||
);
|
||||
const [content, setContent] = useState<string>(
|
||||
transcription.result.timeline.map((t: TimelineEntry) => t.text).join("\n\n")
|
||||
);
|
||||
|
||||
const handleSave = async () => {
|
||||
setSubmiting(true);
|
||||
try {
|
||||
await generateTranscription({ originalText: content });
|
||||
setOpen(false);
|
||||
} catch (e) {
|
||||
toast.error(e.message);
|
||||
}
|
||||
|
||||
setSubmiting(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<>
|
||||
<DialogHeader>
|
||||
<DialogTitle>{t("editTranscription")}</DialogTitle>
|
||||
</DialogHeader>
|
||||
<div>
|
||||
<Textarea
|
||||
disabled={submiting}
|
||||
className="h-96 text-lg font-serif resize-none"
|
||||
value={content}
|
||||
onChange={(e) => setContent(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
<DialogClose asChild>
|
||||
<Button disabled={submiting} variant="secondary">
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</DialogClose>
|
||||
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
<Button disabled={submiting}>
|
||||
{submiting && <LoaderIcon className="animate-spin w-4 mr-2" />}
|
||||
{t("save")}
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("saveTranscription")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("areYouSureToSaveTranscription")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel disabled={submiting}>
|
||||
{t("cancel")}
|
||||
</AlertDialogCancel>
|
||||
<AlertDialogAction asChild>
|
||||
<Button disabled={submiting} onClick={handleSave}>
|
||||
{submiting && (
|
||||
<LoaderIcon className="animate-spin w-4 mr-2" />
|
||||
)}
|
||||
{t("save")}
|
||||
</Button>
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</DialogFooter>
|
||||
</>
|
||||
);
|
||||
};
|
||||
@@ -1,28 +1,32 @@
|
||||
import { useContext, useRef, useState } from "react";
|
||||
import { useContext, useState } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import { t } from "i18next";
|
||||
import {
|
||||
Button,
|
||||
AlertDialog,
|
||||
AlertDialogTrigger,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogContent,
|
||||
AlertDialogTitle,
|
||||
AlertDialogDescription,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
} from "@renderer/components/ui";
|
||||
import { LoaderIcon } from "lucide-react";
|
||||
import { TranscriptionCreateForm } from "../transcriptions";
|
||||
|
||||
export const MediaTranscriptionGenerateButton = (props: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const { media, generateTranscription, transcribing, transcription } =
|
||||
useContext(MediaPlayerProviderContext);
|
||||
const {
|
||||
media,
|
||||
generateTranscription,
|
||||
transcribing,
|
||||
transcription,
|
||||
transcribingProgress,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const [open, setOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<AlertDialog>
|
||||
<AlertDialog open={open} onOpenChange={setOpen}>
|
||||
<AlertDialogTrigger disabled={transcribing} asChild>
|
||||
{props.children ? (
|
||||
props.children
|
||||
@@ -50,18 +54,20 @@ export const MediaTranscriptionGenerateButton = (props: {
|
||||
})}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction
|
||||
onClick={() =>
|
||||
generateTranscription({
|
||||
originalText: "",
|
||||
})
|
||||
}
|
||||
>
|
||||
{t("transcribe")}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
|
||||
<TranscriptionCreateForm
|
||||
onCancel={() => setOpen(false)}
|
||||
onSubmit={(data) => {
|
||||
generateTranscription({
|
||||
originalText: data.text,
|
||||
language: data.language,
|
||||
service: data.service as WhisperConfigType["service"],
|
||||
});
|
||||
setOpen(false);
|
||||
}}
|
||||
transcribing={transcribing}
|
||||
transcribingProgress={transcribingProgress}
|
||||
/>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
);
|
||||
|
||||
@@ -24,9 +24,9 @@ import {
|
||||
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
|
||||
import { formatDuration } from "@renderer/lib/utils";
|
||||
import {
|
||||
MediaTranscriptionForm,
|
||||
MediaTranscriptionReadButton,
|
||||
MediaTranscriptionGenerateButton,
|
||||
TranscriptionEditButton,
|
||||
} from "@renderer/components";
|
||||
|
||||
export const MediaTranscription = (props: { display?: boolean }) => {
|
||||
@@ -157,11 +157,11 @@ export const MediaTranscription = (props: { display?: boolean }) => {
|
||||
</MediaTranscriptionGenerateButton>
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem asChild>
|
||||
<MediaTranscriptionForm>
|
||||
<TranscriptionEditButton>
|
||||
<Button variant="ghost" className="block w-full">
|
||||
{t("edit")}
|
||||
</Button>
|
||||
</MediaTranscriptionForm>
|
||||
</TranscriptionEditButton>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline";
|
||||
import { useState } from "react";
|
||||
import { useContext, useState } from "react";
|
||||
import { WavesurferPlayer } from "@/renderer/components/misc";
|
||||
import { AppSettingsProviderContext } from "@/renderer/context";
|
||||
import { convertWordIpaToNormal } from "@/utils";
|
||||
|
||||
export const NoteSemgent = (props: {
|
||||
segment: SegmentType;
|
||||
@@ -8,12 +10,23 @@ export const NoteSemgent = (props: {
|
||||
}) => {
|
||||
const { segment, notes } = props;
|
||||
const caption: TimelineEntry = segment.caption;
|
||||
const { learningLanguage, ipaMappings } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
|
||||
const [notedquoteIndices, setNotedquoteIndices] = useState<number[]>([]);
|
||||
|
||||
let words = caption.text.split(" ");
|
||||
const language = segment.target?.language || learningLanguage;
|
||||
const ipas = caption.timeline.map((w) =>
|
||||
w.timeline.map((t) => t.timeline.map((s) => s.text))
|
||||
w.timeline.map((t) =>
|
||||
language.startsWith("en")
|
||||
? convertWordIpaToNormal(
|
||||
t.timeline.map((s) => s.text),
|
||||
{ mappings: ipaMappings }
|
||||
).join("")
|
||||
: t.text
|
||||
)
|
||||
);
|
||||
|
||||
if (words.length !== caption.timeline.length) {
|
||||
|
||||
2
enjoy/src/renderer/components/transcriptions/index.ts
Normal file
2
enjoy/src/renderer/components/transcriptions/index.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export * from "./transcription-create-form";
|
||||
export * from "./transcription-edit-button";
|
||||
@@ -0,0 +1,233 @@
|
||||
import {
|
||||
AISettingsProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { zodResolver } from "@hookform/resolvers/zod";
|
||||
import { useContext } from "react";
|
||||
import { useForm } from "react-hook-form";
|
||||
import { z } from "zod";
|
||||
import {
|
||||
Button,
|
||||
Form,
|
||||
FormField,
|
||||
FormItem,
|
||||
FormLabel,
|
||||
FormMessage,
|
||||
Input,
|
||||
PingPoint,
|
||||
Progress,
|
||||
Select,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectTrigger,
|
||||
SelectValue,
|
||||
Textarea,
|
||||
toast,
|
||||
} from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
import { LANGUAGES } from "@/constants";
|
||||
import { LoaderIcon } from "lucide-react";
|
||||
import { parseText } from "media-captions";
|
||||
|
||||
const transcriptionSchema = z.object({
|
||||
language: z.string(),
|
||||
service: z.string(),
|
||||
text: z.string().optional(),
|
||||
});
|
||||
|
||||
export const TranscriptionCreateForm = (props: {
|
||||
onSubmit: (data: z.infer<typeof transcriptionSchema>) => void;
|
||||
onCancel?: () => void;
|
||||
transcribing?: boolean;
|
||||
transcribingProgress?: number;
|
||||
}) => {
|
||||
const {
|
||||
transcribing = false,
|
||||
transcribingProgress = 0,
|
||||
onSubmit,
|
||||
onCancel,
|
||||
} = props;
|
||||
const { learningLanguage } = useContext(AppSettingsProviderContext);
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
|
||||
const form = useForm<z.infer<typeof transcriptionSchema>>({
|
||||
resolver: zodResolver(transcriptionSchema),
|
||||
values: {
|
||||
language: learningLanguage,
|
||||
service: whisperConfig.service,
|
||||
text: "",
|
||||
},
|
||||
});
|
||||
|
||||
const parseSubtitle = (file: File) => {
|
||||
const fileType = file.name.split(".").pop();
|
||||
return new Promise<string>((resolve, reject) => {
|
||||
const reader = new FileReader();
|
||||
reader.onload = async (e) => {
|
||||
let text = e.target.result;
|
||||
if (typeof text !== "string") {
|
||||
reject(new Error("Failed to read file"));
|
||||
}
|
||||
|
||||
const caption = await parseText(text as string, {
|
||||
strict: false,
|
||||
type: fileType as "srt" | "vtt",
|
||||
});
|
||||
if (caption.cues.length === 0) {
|
||||
text = cleanSubtitleText(text as string);
|
||||
} else {
|
||||
text = caption.cues.map((cue) => cue.text).join("\n");
|
||||
}
|
||||
|
||||
if (text.length === 0) {
|
||||
reject(new Error("No text found in the file"));
|
||||
}
|
||||
|
||||
// Remove all content inside `()`
|
||||
text = text.replace(/\(.*?\)/g, "").trim();
|
||||
resolve(text);
|
||||
};
|
||||
|
||||
reader.onerror = (e) => {
|
||||
reject(e);
|
||||
};
|
||||
|
||||
reader.readAsText(file);
|
||||
});
|
||||
};
|
||||
|
||||
const cleanSubtitleText = (text: string) => {
|
||||
// Remove all line starting with `#`
|
||||
// Remove all timestamps like `00:00:00,000` or `00:00:00.000 --> 00:00:00.000`
|
||||
// Remove all empty lines
|
||||
// Remove all lines with only spaces
|
||||
return text
|
||||
.replace(
|
||||
/(\d{2}:\d{2}:\d{2}[,\.]\d{3}(\s+-->\s+\d{2}:\d{2}:\d{2}[,\.]\d{3})?)\s+/g,
|
||||
""
|
||||
)
|
||||
.replace(/#.*\n/g, "")
|
||||
.replace(/^\s*[\r\n]/gm, "")
|
||||
.replace(/^\s+$/gm, "");
|
||||
};
|
||||
|
||||
return (
|
||||
<Form {...form}>
|
||||
<form
|
||||
onSubmit={form.handleSubmit(onSubmit)}
|
||||
className="gap-4 grid w-full"
|
||||
>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="service"
|
||||
render={({ field }) => (
|
||||
<FormItem className="grid w-full items-center gap-1.5">
|
||||
<FormLabel>{t("sttAiService")}</FormLabel>
|
||||
<Select
|
||||
disabled={transcribing}
|
||||
value={field.value}
|
||||
onValueChange={field.onChange}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="local">{t("local")}</SelectItem>
|
||||
<SelectItem value="azure">{t("azureAi")}</SelectItem>
|
||||
<SelectItem value="cloudflare">
|
||||
{t("cloudflareAi")}
|
||||
</SelectItem>
|
||||
<SelectItem value="openai">OpenAI</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="language"
|
||||
render={({ field }) => (
|
||||
<FormItem className="grid w-full items-center gap-1.5">
|
||||
<FormLabel>{t("language")}</FormLabel>
|
||||
<Select
|
||||
disabled={transcribing}
|
||||
value={field.value}
|
||||
onValueChange={field.onChange}
|
||||
>
|
||||
<SelectTrigger>
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{LANGUAGES.map((language) => (
|
||||
<SelectItem key={language.code} value={language.code}>
|
||||
{language.name}
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
<FormField
|
||||
control={form.control}
|
||||
name="text"
|
||||
render={({ field }) => (
|
||||
<FormItem className="grid w-full items-center gap-1.5">
|
||||
<FormLabel>
|
||||
{t("uploadTranscriptFile")}({t("optinal")})
|
||||
</FormLabel>
|
||||
<Input
|
||||
disabled={transcribing}
|
||||
type="file"
|
||||
accept=".txt,.srt,.vtt"
|
||||
onChange={async (event) => {
|
||||
const file = event.target.files[0];
|
||||
|
||||
if (file) {
|
||||
parseSubtitle(file)
|
||||
.then((text) => {
|
||||
field.onChange(text);
|
||||
})
|
||||
.catch((error) => {
|
||||
toast.error(error.message);
|
||||
});
|
||||
} else {
|
||||
field.onChange("");
|
||||
}
|
||||
}}
|
||||
/>
|
||||
{field.value && (
|
||||
<Textarea className="h-96" {...field} disabled={transcribing} />
|
||||
)}
|
||||
<FormMessage />
|
||||
</FormItem>
|
||||
)}
|
||||
/>
|
||||
{transcribing && (
|
||||
<div className="mb-4">
|
||||
<div className="flex items-center space-x-4 mb-2">
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<span>{t("transcribing")}</span>
|
||||
</div>
|
||||
{whisperConfig.service === "local" && (
|
||||
<Progress value={transcribingProgress} />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="flex justify-end space-x-4">
|
||||
{onCancel && (
|
||||
<Button type="reset" variant="outline" onClick={onCancel}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
)}
|
||||
<Button disabled={transcribing} type="submit" variant="default">
|
||||
{transcribing && <LoaderIcon className="animate-spin w-4 mr-2" />}
|
||||
{t("transcribe")}
|
||||
</Button>
|
||||
</div>
|
||||
</form>
|
||||
</Form>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,115 @@
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogAction,
|
||||
AlertDialogCancel,
|
||||
AlertDialogContent,
|
||||
AlertDialogDescription,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
AlertDialogTrigger,
|
||||
Button,
|
||||
Dialog,
|
||||
DialogClose,
|
||||
DialogContent,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
DialogTrigger,
|
||||
Textarea,
|
||||
toast,
|
||||
} from "@renderer/components/ui";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline";
|
||||
import { t } from "i18next";
|
||||
import { useContext, useState } from "react";
|
||||
import { LoaderIcon } from "lucide-react";
|
||||
|
||||
export const TranscriptionEditButton = (props: {
|
||||
children?: React.ReactNode;
|
||||
}) => {
|
||||
const [open, setOpen] = useState(false);
|
||||
const [submiting, setSubmiting] = useState(false);
|
||||
const { transcription, generateTranscription } = useContext(
|
||||
MediaPlayerProviderContext
|
||||
);
|
||||
const [content, setContent] = useState<string>(
|
||||
transcription.result.timeline.map((t: TimelineEntry) => t.text).join("\n\n")
|
||||
);
|
||||
|
||||
const handleSave = async () => {
|
||||
setSubmiting(true);
|
||||
try {
|
||||
await generateTranscription({ originalText: content });
|
||||
setOpen(false);
|
||||
} catch (e) {
|
||||
toast.error(e.message);
|
||||
}
|
||||
|
||||
setSubmiting(false);
|
||||
};
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={setOpen}>
|
||||
<DialogTrigger asChild>
|
||||
{props.children ? (
|
||||
props.children
|
||||
) : (
|
||||
<Button variant="outline" size="sm">
|
||||
<span className="capitalize">{t("edit")}</span>
|
||||
</Button>
|
||||
)}
|
||||
</DialogTrigger>
|
||||
<DialogContent className="max-w-screen-sm xl:max-w-screen-md">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{t("editTranscription")}</DialogTitle>
|
||||
</DialogHeader>
|
||||
<div>
|
||||
<Textarea
|
||||
disabled={submiting}
|
||||
className="h-96 text-lg font-serif resize-none"
|
||||
value={content}
|
||||
onChange={(e) => setContent(e.target.value)}
|
||||
/>
|
||||
</div>
|
||||
<DialogFooter>
|
||||
<DialogClose asChild>
|
||||
<Button disabled={submiting} variant="secondary">
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</DialogClose>
|
||||
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
<Button disabled={submiting}>
|
||||
{submiting && <LoaderIcon className="animate-spin w-4 mr-2" />}
|
||||
{t("save")}
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("saveTranscription")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("areYouSureToSaveTranscription")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel disabled={submiting}>
|
||||
{t("cancel")}
|
||||
</AlertDialogCancel>
|
||||
<AlertDialogAction asChild>
|
||||
<Button disabled={submiting} onClick={handleSave}>
|
||||
{submiting && (
|
||||
<LoaderIcon className="animate-spin w-4 mr-2" />
|
||||
)}
|
||||
{t("save")}
|
||||
</Button>
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
import { createContext, useEffect, useState } from "react";
|
||||
import { WEB_API_URL, LANGUAGES } from "@/constants";
|
||||
import { WEB_API_URL, LANGUAGES, IPA_MAPPINGS } from "@/constants";
|
||||
import { Client } from "@/api";
|
||||
import i18n from "@renderer/i18n";
|
||||
import ahoy from "ahoy.js";
|
||||
@@ -26,6 +26,8 @@ type AppSettingsProviderState = {
|
||||
setProxy?: (config: ProxyConfigType) => Promise<void>;
|
||||
cable?: Consumer;
|
||||
ahoy?: typeof ahoy;
|
||||
// remote config
|
||||
ipaMappings?: { [key: string]: string };
|
||||
};
|
||||
|
||||
const initialState: AppSettingsProviderState = {
|
||||
@@ -53,6 +55,9 @@ export const AppSettingsProvider = ({
|
||||
const [learningLanguage, setLearningLanguage] = useState<string>("en-US");
|
||||
const [proxy, setProxy] = useState<ProxyConfigType>();
|
||||
const EnjoyApp = window.__ENJOY_APP__;
|
||||
const [ipaMappings, setIpaMappings] = useState<{ [key: string]: string }>(
|
||||
IPA_MAPPINGS
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
fetchVersion();
|
||||
@@ -82,6 +87,14 @@ export const AppSettingsProvider = ({
|
||||
});
|
||||
}, [apiUrl]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!webApi) return;
|
||||
|
||||
webApi.config("ipa_mappings").then((mappings) => {
|
||||
if (mappings) setIpaMappings(mappings);
|
||||
});
|
||||
}, [webApi]);
|
||||
|
||||
const fetchLanguages = async () => {
|
||||
const language = await EnjoyApp.settings.getLanguage();
|
||||
setLanguage(language as "en" | "zh-CN");
|
||||
@@ -206,6 +219,7 @@ export const AppSettingsProvider = ({
|
||||
initialized: Boolean(user && libraryPath),
|
||||
ahoy,
|
||||
cable,
|
||||
ipaMappings,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
|
||||
@@ -16,7 +16,6 @@ import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
import { debounce } from "lodash";
|
||||
import { IPA_MAPPINGS } from "@/constants";
|
||||
|
||||
type MediaPlayerContextType = {
|
||||
layout: {
|
||||
@@ -69,6 +68,7 @@ type MediaPlayerContextType = {
|
||||
generateTranscription: (params?: {
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
service?: WhisperConfigType["service"];
|
||||
}) => void;
|
||||
transcribing: boolean;
|
||||
transcribingProgress: number;
|
||||
@@ -89,8 +89,6 @@ type MediaPlayerContextType = {
|
||||
// Segments
|
||||
currentSegment: SegmentType;
|
||||
createSegment: () => Promise<SegmentType | void>;
|
||||
// remote config
|
||||
ipaMappings: { [key: string]: string };
|
||||
getCachedSegmentIndex: () => Promise<number>;
|
||||
setCachedSegmentIndex: (index: number) => void;
|
||||
};
|
||||
@@ -169,10 +167,6 @@ export const MediaPlayerProvider = ({
|
||||
const [transcriptionDraft, setTranscriptionDraft] =
|
||||
useState<TranscriptionType["result"]>();
|
||||
|
||||
const [ipaMappings, setIpaMappings] = useState<{ [key: string]: string }>(
|
||||
IPA_MAPPINGS
|
||||
);
|
||||
|
||||
const {
|
||||
transcription,
|
||||
generateTranscription,
|
||||
@@ -364,7 +358,7 @@ export const MediaPlayerProvider = ({
|
||||
);
|
||||
labels[index] = [
|
||||
labels[index] || "",
|
||||
learningLanguage.startsWith("en")
|
||||
(media?.language || learningLanguage).startsWith("en")
|
||||
? convertIpaToNormal(phone.text.trim())
|
||||
: phone.text.trim(),
|
||||
].join("");
|
||||
@@ -575,10 +569,6 @@ export const MediaPlayerProvider = ({
|
||||
useEffect(() => {
|
||||
calculateHeight();
|
||||
|
||||
webApi.config("ipa_mappings").then((mappings) => {
|
||||
if (mappings) setIpaMappings(mappings);
|
||||
});
|
||||
|
||||
EnjoyApp.window.onResize(() => {
|
||||
deboundeCalculateHeight();
|
||||
});
|
||||
@@ -635,7 +625,6 @@ export const MediaPlayerProvider = ({
|
||||
createNote,
|
||||
currentSegment: segment,
|
||||
createSegment,
|
||||
ipaMappings,
|
||||
getCachedSegmentIndex,
|
||||
setCachedSegmentIndex,
|
||||
}}
|
||||
|
||||
@@ -12,10 +12,8 @@ import { AlignmentResult } from "echogarden/dist/api/API.d.js";
|
||||
import { useAiCommand } from "./use-ai-command";
|
||||
|
||||
export const useTranscribe = () => {
|
||||
const { EnjoyApp, user, webApi, learningLanguage } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
const { whisperConfig, openai } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp, user, webApi } = useContext(AppSettingsProviderContext);
|
||||
const { openai } = useContext(AISettingsProviderContext);
|
||||
const { punctuateText } = useAiCommand();
|
||||
|
||||
const transcode = async (src: string | Blob): Promise<string> => {
|
||||
@@ -36,7 +34,8 @@ export const useTranscribe = () => {
|
||||
targetId?: string;
|
||||
targetType?: string;
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
language: string;
|
||||
service: WhisperConfigType["service"];
|
||||
}
|
||||
): Promise<{
|
||||
engine: string;
|
||||
@@ -45,12 +44,8 @@ export const useTranscribe = () => {
|
||||
originalText?: string;
|
||||
}> => {
|
||||
const url = await transcode(mediaSrc);
|
||||
const {
|
||||
targetId,
|
||||
targetType,
|
||||
originalText,
|
||||
language = learningLanguage.split("-")[0],
|
||||
} = params || {};
|
||||
const { targetId, targetType, originalText, language, service } =
|
||||
params || {};
|
||||
const blob = await (await fetch(url)).blob();
|
||||
|
||||
let result;
|
||||
@@ -59,19 +54,30 @@ export const useTranscribe = () => {
|
||||
engine: "original",
|
||||
model: "original",
|
||||
};
|
||||
} else if (whisperConfig.service === "local") {
|
||||
result = await transcribeByLocal(url);
|
||||
} else if (whisperConfig.service === "cloudflare") {
|
||||
} else if (service === "local") {
|
||||
result = await transcribeByLocal(url, language);
|
||||
} else if (service === "cloudflare") {
|
||||
result = await transcribeByCloudflareAi(blob);
|
||||
} else if (whisperConfig.service === "openai") {
|
||||
} else if (service === "openai") {
|
||||
result = await transcribeByOpenAi(blob);
|
||||
} else if (whisperConfig.service === "azure") {
|
||||
result = await transcribeByAzureAi(blob, { targetId, targetType });
|
||||
} else if (service === "azure") {
|
||||
result = await transcribeByAzureAi(blob, language, {
|
||||
targetId,
|
||||
targetType,
|
||||
});
|
||||
} else {
|
||||
throw new Error(t("whisperServiceNotSupported"));
|
||||
}
|
||||
|
||||
let transcript = originalText || result.text;
|
||||
|
||||
// Remove all content inside `()`, `[]`, `{}` and trim the text
|
||||
transcript = transcript
|
||||
.replace(/\(.*?\)/g, "")
|
||||
.replace(/\[.*?\]/g, "")
|
||||
.replace(/\{.*?\}/g, "")
|
||||
.trim();
|
||||
|
||||
// if the transcript does not contain any punctuation, use AI command to add punctuation
|
||||
if (!transcript.match(/\w[.,!?](\s|$)/)) {
|
||||
try {
|
||||
@@ -96,12 +102,13 @@ export const useTranscribe = () => {
|
||||
};
|
||||
};
|
||||
|
||||
const transcribeByLocal = async (url: string) => {
|
||||
const transcribeByLocal = async (url: string, language?: string) => {
|
||||
const res = await EnjoyApp.whisper.transcribe(
|
||||
{
|
||||
file: url,
|
||||
},
|
||||
{
|
||||
language,
|
||||
force: true,
|
||||
extra: ["--prompt", `"Hello! Welcome to listen to this audio."`],
|
||||
}
|
||||
@@ -157,6 +164,7 @@ export const useTranscribe = () => {
|
||||
|
||||
const transcribeByAzureAi = async (
|
||||
blob: Blob,
|
||||
language: string,
|
||||
params?: {
|
||||
targetId?: string;
|
||||
targetType?: string;
|
||||
@@ -172,7 +180,7 @@ export const useTranscribe = () => {
|
||||
new File([blob], "audio.wav")
|
||||
);
|
||||
// setting the recognition language to learning language, such as 'en-US'.
|
||||
config.speechRecognitionLanguage = learningLanguage;
|
||||
config.speechRecognitionLanguage = language;
|
||||
config.requestWordLevelTimestamps();
|
||||
config.outputFormat = sdk.OutputFormat.Detailed;
|
||||
|
||||
|
||||
@@ -11,7 +11,9 @@ import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
|
||||
|
||||
export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
|
||||
const { EnjoyApp, webApi, learningLanguage } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const [transcription, setTranscription] = useState<TranscriptionType>(null);
|
||||
const { transcribe } = useTranscribe();
|
||||
@@ -55,8 +57,13 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
const generateTranscription = async (params?: {
|
||||
originalText?: string;
|
||||
language?: string;
|
||||
service?: WhisperConfigType["service"];
|
||||
}) => {
|
||||
let { originalText, language } = params || {};
|
||||
let {
|
||||
originalText,
|
||||
language = learningLanguage,
|
||||
service = whisperConfig.service,
|
||||
} = params || {};
|
||||
if (originalText === undefined) {
|
||||
if (transcription?.targetId === media.id) {
|
||||
originalText = transcription.result?.originalText;
|
||||
@@ -76,6 +83,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
targetType: media.mediaType,
|
||||
originalText,
|
||||
language,
|
||||
service,
|
||||
});
|
||||
|
||||
let timeline: TimelineEntry[] = [];
|
||||
@@ -169,7 +177,20 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
},
|
||||
engine,
|
||||
model,
|
||||
language,
|
||||
});
|
||||
|
||||
if (media.language !== language) {
|
||||
if (media.mediaType === "Video") {
|
||||
await EnjoyApp.videos.update(media.id, {
|
||||
language,
|
||||
});
|
||||
} else {
|
||||
await EnjoyApp.audios.update(media.id, {
|
||||
language,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
toast.error(err.message);
|
||||
}
|
||||
@@ -234,12 +255,12 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
|
||||
addDblistener(onTransactionUpdate);
|
||||
|
||||
if (
|
||||
transcription.state == "pending" ||
|
||||
!transcription.result?.["timeline"]
|
||||
) {
|
||||
findOrGenerateTranscription();
|
||||
}
|
||||
// if (
|
||||
// transcription.state == "pending" ||
|
||||
// !transcription.result?.["timeline"]
|
||||
// ) {
|
||||
// findOrGenerateTranscription();
|
||||
// }
|
||||
|
||||
if (whisperConfig.service === "local") {
|
||||
EnjoyApp.whisper.onProgress((_, p: number) => {
|
||||
|
||||
1
enjoy/src/types/audio.d.ts
vendored
1
enjoy/src/types/audio.d.ts
vendored
@@ -4,6 +4,7 @@ type AudioType = {
|
||||
source: string;
|
||||
name: string;
|
||||
filename: string;
|
||||
language?: string;
|
||||
description?: string;
|
||||
src?: string;
|
||||
coverUrl?: string;
|
||||
|
||||
1
enjoy/src/types/enjoy-app.d.ts
vendored
1
enjoy/src/types/enjoy-app.d.ts
vendored
@@ -264,6 +264,7 @@ type EnjoyAppType = {
|
||||
blob?: { type: string; arrayBuffer: ArrayBuffer };
|
||||
},
|
||||
options?: {
|
||||
language?: string;
|
||||
force?: boolean;
|
||||
extra?: string[];
|
||||
}
|
||||
|
||||
9
enjoy/src/types/segment.d.ts
vendored
9
enjoy/src/types/segment.d.ts
vendored
@@ -2,6 +2,7 @@ type SegmentType = {
|
||||
id: string;
|
||||
targetId: string;
|
||||
targetType: string;
|
||||
target: AudioType | VideoType;
|
||||
caption: TimelineEntry;
|
||||
audio?: AudioType;
|
||||
video?: VideoType;
|
||||
@@ -14,7 +15,7 @@ type SegmentType = {
|
||||
isSynced?: boolean;
|
||||
isUploaded?: boolean;
|
||||
syncedAt?: Date;
|
||||
uploadedAt?: Date
|
||||
updatedAt: Date
|
||||
createdAt: Date
|
||||
};
|
||||
uploadedAt?: Date;
|
||||
updatedAt: Date;
|
||||
createdAt: Date;
|
||||
};
|
||||
|
||||
1
enjoy/src/types/transcription.d.ts
vendored
1
enjoy/src/types/transcription.d.ts
vendored
@@ -6,6 +6,7 @@ type TranscriptionType = {
|
||||
state: "pending" | "processing" | "finished";
|
||||
engine: string;
|
||||
model: string;
|
||||
language?: string;
|
||||
result: AlignmentResult & { original?: string };
|
||||
};
|
||||
|
||||
|
||||
8
enjoy/src/types/video.d.ts
vendored
8
enjoy/src/types/video.d.ts
vendored
@@ -4,15 +4,15 @@ type VideoType = {
|
||||
source: string;
|
||||
name: string;
|
||||
filename: string;
|
||||
language?: string;
|
||||
description?: string;
|
||||
filename?: string;
|
||||
src?: string;
|
||||
coverUrl?: string;
|
||||
md5: string;
|
||||
src?: string;
|
||||
metadata?: Ffmpeg.FfprobeData;
|
||||
duration?: number;
|
||||
transcribed: boolean;
|
||||
transcribing: boolean;
|
||||
transcribed?: boolean;
|
||||
transcribing?: boolean;
|
||||
recordingsCount?: number;
|
||||
recordingsDuration?: number;
|
||||
isUploaded?: boolean;
|
||||
|
||||
Reference in New Issue
Block a user