Feat: denoise recording & clean code (#473)

* denoise recording before saved

* Refactor audio processing and recording logic

* Remove unused code

* use echogarden to transcode

* remove ffmpeg-wasm

* add echogarden decode

* remove deprecated code

* ensure use posix path

* refactor echogarden transcode

* refactor recording denoise

* clean code

* expose align error in toast

* remove unused code
This commit is contained in:
an-lee
2024-04-02 11:10:19 +08:00
committed by GitHub
parent 69258e0e7d
commit 265429a24e
21 changed files with 185 additions and 777 deletions

View File

@@ -20,9 +20,8 @@
"test:renderer": "yarn run playwright test e2e/renderer.spec.ts",
"create-migration": "zx ./src/main/db/create-migration.mjs",
"download-whisper-model": "zx ./scripts/download-whisper-model.mjs",
"download-ffmpeg-wasm": "zx ./scripts/download-ffmpeg-wasm.mjs",
"download-dictionaries": "zx ./scripts/download-dictionaries.mjs",
"download": "yarn run download-whisper-model && yarn run download-ffmpeg-wasm && yarn run download-dictionaries"
"download": "yarn run download-whisper-model && yarn run download-dictionaries"
},
"keywords": [],
"author": {
@@ -82,8 +81,6 @@
},
"dependencies": {
"@andrkrn/ffprobe-static": "^5.2.0",
"@ffmpeg/ffmpeg": "^0.12.10",
"@ffmpeg/util": "^0.12.1",
"@hookform/resolvers": "^3.3.4",
"@langchain/community": "^0.0.43",
"@langchain/google-genai": "^0.0.10",

View File

@@ -1,162 +0,0 @@
#!/usr/bin/env zx
import axios from "axios";
import { createHash } from "crypto";
import { HttpsProxyAgent } from "https-proxy-agent";
console.info(chalk.blue("=> Download ffmpeg wasm files"));
const files = [
{
name: "ffmpeg-core.wasm",
md5: "ff1676d6a417d1162dba70dbe8dfd354",
},
{
name: "ffmpeg-core.worker.js",
md5: "09dc7f1cd71bb52bd9afc22afdf1f6da",
},
{
name: "ffmpeg-core.js",
md5: "30296628fd78e4ef1c939f36c1d31527",
},
];
const pendingFiles = [];
const dir = path.join(process.cwd(), "assets/libs");
fs.ensureDirSync(dir);
await Promise.all(
files.map(async (file) => {
try {
if (fs.statSync(path.join(dir, file.name)).isFile()) {
console.info(chalk.green(`✅ File ${file.name} already exists`));
const hash = await hashFile(path.join(dir, file.name), { algo: "md5" });
if (hash === file.md5) {
console.info(chalk.green(`✅ File ${file.name} valid`));
} else {
console.warn(
chalk.yellow(`❌ File ${file.name} not valid, start to redownload`)
);
fs.removeSync(path.join(dir, file.name));
pendingFiles.push(file);
}
} else {
pendingFiles.push(file);
}
} catch (err) {
if (err && err.code !== "ENOENT") {
console.error(chalk.red(`❌ Error: ${err}`));
process.exit(1);
}
pendingFiles.push(file);
}
})
);
if (pendingFiles.length === 0) {
console.info(chalk.green("✅ All files already exist"));
process.exit(0);
} else {
console.info(chalk.blue(`=> Start to download ${pendingFiles.length} files`));
}
const proxyUrl =
process.env.HTTPS_PROXY ||
process.env.https_proxy ||
process.env.HTTP_PROXY ||
process.env.http_proxy;
if (proxyUrl) {
const { hostname, port, protocol } = new URL(proxyUrl);
const httpsAgent = new HttpsProxyAgent(proxyUrl);
axios.defaults.proxy = {
host: hostname,
port: port,
protocol: protocol,
};
axios.defaults.httpsAgent = httpsAgent;
console.info(chalk.blue(`=> Use proxy: ${proxyUrl}`));
}
const download = async (url, dest, md5) => {
console.info(chalk.blue(`=> Start to download ${url} to ${dest}`));
return spinner(async () => {
console.info(chalk.blue(`=> Start to download file ${url}`));
await axios
.get(url, {
responseType: "arraybuffer",
})
.then(async (response) => {
const data = Buffer.from(response.data, "binary");
console.info(chalk.green(`${dest} downloaded successfully`));
fs.writeFileSync(dest, data);
const hash = await hashFile(dest, { algo: "md5" });
if (hash === md5) {
console.info(chalk.green(`${dest} valid`));
} else {
console.error(
chalk.red(
`❌ Error: ${dest} not valid. \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
}
})
.catch((err) => {
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
});
});
};
function hashFile(file, options) {
const algo = options.algo || "md5";
return new Promise((resolve, reject) => {
const hash = createHash(algo);
const stream = fs.createReadStream(file);
stream.on("error", reject);
stream.on("data", (chunk) => hash.update(chunk));
stream.on("end", () => resolve(hash.digest("hex")));
});
}
const cleanup = () => {
files.forEach((file) => {
try {
fs.removeSync(path.join(dir, file.name));
} catch (err) {
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
}
});
};
// const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm";
const baseURL = "https://enjoy-storage.baizhiheizi.com";
try {
await Promise.all(
pendingFiles.map((file) =>
download(`${baseURL}/${file.name}`, path.join(dir, file.name), file.md5)
)
);
} catch (err) {
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
cleanup();
process.exit(1);
}
console.info(chalk.green("✅ All files downloaded successfully"));
process.exit(0);

View File

@@ -25,7 +25,7 @@ import storage from "@main/storage";
import { Client } from "@/api";
import { WEB_API_URL } from "@/constants";
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
import Ffmpeg from "@main/ffmpeg";
import echogarden from "@main/echogarden";
import camelcaseKeys from "camelcase-keys";
const logger = log.scope("db/models/recording");
@@ -307,32 +307,38 @@ export class Recording extends Model<Recording> {
throw new Error("Empty recording");
}
const format = blob.type.split("/")[1]?.split(";")?.[0];
if (!format) {
throw new Error("Unknown recording format");
}
const file = path.join(
settings.userDataPath(),
"recordings",
`${Date.now()}.${format}`
// denoise audio
const { denoisedAudio } = await echogarden.denoise(
Buffer.from(blob.arrayBuffer),
{}
);
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
try {
const ffmpeg = new Ffmpeg();
const metadata = await ffmpeg.generateMetadata(file);
duration = Math.floor(metadata.format.duration * 1000);
} catch (err) {
logger.error(err);
}
// trim audio
let trimmedSamples = echogarden.trimAudioStart(
denoisedAudio.audioChannels[0]
);
trimmedSamples = echogarden.trimAudioEnd(trimmedSamples);
denoisedAudio.audioChannels[0] = trimmedSamples;
duration = Math.round(echogarden.getRawAudioDuration(denoisedAudio) * 1000);
if (duration === 0) {
throw new Error("Failed to get duration of the recording");
}
// save recording to file
const file = path.join(
settings.userDataPath(),
"recordings",
`${Date.now()}.wav`
);
await fs.outputFile(file, echogarden.encodeWaveBuffer(denoisedAudio));
// hash file
const md5 = await hashFile(file, { algo: "md5" });
const filename = `${md5}.${format}`;
// rename file
const filename = `${md5}.wav`;
fs.renameSync(file, path.join(path.dirname(file), filename));
return this.create(

View File

@@ -2,12 +2,23 @@ import { ipcMain } from "electron";
import * as Echogarden from "echogarden/dist/api/API.js";
import { AlignmentOptions } from "echogarden/dist/api/API";
import { AudioSourceParam } from "echogarden/dist/audio/AudioUtilities";
import {
encodeWaveBuffer,
decodeWaveBuffer,
ensureRawAudio,
getRawAudioDuration,
trimAudioStart,
trimAudioEnd,
} from "echogarden/dist/audio/AudioUtilities.js";
import path from "path";
import log from "@main/logger";
import url from "url";
import settings from "@main/settings";
import fs from "fs-extra";
import ffmpegPath from "ffmpeg-static";
import { enjoyUrlToPath, hashFile, pathToEnjoyUrl } from "./utils";
import { extractFrequencies } from "@/utils";
import waveform from "./waveform";
Echogarden.setGlobalOption(
"ffmpegPath",
@@ -25,9 +36,23 @@ const __dirname = path
const logger = log.scope("echogarden");
class EchogardenWrapper {
public align: typeof Echogarden.align;
public denoise: typeof Echogarden.denoise;
public encodeWaveBuffer: typeof encodeWaveBuffer;
public decodeWaveBuffer: typeof decodeWaveBuffer;
public ensureRawAudio: typeof ensureRawAudio;
public getRawAudioDuration: typeof getRawAudioDuration;
public trimAudioStart: typeof trimAudioStart;
public trimAudioEnd: typeof trimAudioEnd;
constructor() {
this.align = Echogarden.align;
this.denoise = Echogarden.denoise;
this.encodeWaveBuffer = encodeWaveBuffer;
this.decodeWaveBuffer = decodeWaveBuffer;
this.ensureRawAudio = ensureRawAudio;
this.getRawAudioDuration = getRawAudioDuration;
this.trimAudioStart = trimAudioStart;
this.trimAudioEnd = trimAudioEnd;
}
async check() {
@@ -52,11 +77,27 @@ class EchogardenWrapper {
}
}
/**
* Transcodes the audio file at the enjoy:// protocol URL into a WAV format.
* @param url - The URL of the audio file to transcode.
* @returns A promise that resolves to the enjoy:// protocal URL of the transcoded WAV file.
*/
async transcode(url: string, sampleRate = 16000): Promise<string> {
const filePath = enjoyUrlToPath(url);
const rawAudio = await this.ensureRawAudio(filePath, sampleRate);
const audioBuffer = this.encodeWaveBuffer(rawAudio);
const outputFilePath = path.join(settings.cachePath(), `${Date.now()}.wav`);
fs.writeFileSync(outputFilePath, audioBuffer);
return pathToEnjoyUrl(outputFilePath);
}
registerIpcHandlers() {
ipcMain.handle(
"echogarden-align",
async (
event,
_event,
input: AudioSourceParam,
transcript: string,
options: AlignmentOptions
@@ -65,10 +106,19 @@ class EchogardenWrapper {
return await this.align(input, transcript, options);
} catch (err) {
logger.error(err);
event.sender.send("on-notification", {
type: "error",
message: err.message,
});
throw err;
}
}
);
ipcMain.handle(
"echogarden-transcode",
async (_event, url: string, sampleRate?: number) => {
try {
return await this.transcode(url, sampleRate);
} catch (err) {
logger.error(err);
throw err;
}
}
);

View File

@@ -5,9 +5,10 @@ import Ffmpeg from "fluent-ffmpeg";
import log from "@main/logger";
import path from "path";
import fs from "fs-extra";
import settings from "./settings";
import settings from "@main/settings";
import url from "url";
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
import { enjoyUrlToPath, pathToEnjoyUrl } from "@main/utils";
/*
* ffmpeg and ffprobe bin file will be in /app.asar.unpacked instead of /app.asar
@@ -189,27 +190,12 @@ export default class FfmpegWrapper {
output?: string,
options?: string[]
): Promise<string> {
if (input.match(/enjoy:\/\/library\/(audios|videos|recordings)/g)) {
input = path.join(
settings.userDataPath(),
input.replace("enjoy://library/", "")
);
} else if (input.startsWith("enjoy://library/")) {
input = path.join(
settings.libraryPath(),
input.replace("enjoy://library/", "")
);
}
input = enjoyUrlToPath(input);
if (!output) {
output = path.join(settings.cachePath(), `${path.basename(input)}.wav`);
}
if (output.startsWith("enjoy://library/")) {
output = path.join(
settings.libraryPath(),
output.replace("enjoy://library/", "")
);
} else {
output = enjoyUrlToPath(output);
}
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
@@ -234,7 +220,7 @@ export default class FfmpegWrapper {
}
if (fs.existsSync(output)) {
resolve(output);
resolve(pathToEnjoyUrl(output));
} else {
reject(new Error("FFmpeg command failed"));
}

View File

@@ -1,5 +1,7 @@
import { createHash } from "crypto";
import { createReadStream } from "fs";
import settings from "./settings";
import path from "path";
export function hashFile(
path: string,
@@ -36,3 +38,53 @@ export function hashBlob(
reader.readAsArrayBuffer(blob);
});
}
/*
* Convert enjoy url to file path
*
* @param {string} enjoyUrl - enjoy url
* @returns {string} file path
*/
export function enjoyUrlToPath(enjoyUrl: string): string {
let filePath = enjoyUrl;
if (
enjoyUrl.match(/enjoy:\/\/library\/(audios|videos|recordings|speeches)/g)
) {
filePath = path.posix.join(
settings.userDataPath(),
enjoyUrl.replace("enjoy://library/", "")
);
} else if (enjoyUrl.startsWith("enjoy://library/")) {
filePath = path.posix.join(
settings.libraryPath(),
filePath.replace("enjoy://library/", "")
);
}
return filePath;
}
/*
* Convert file path to enjoy url
*
* @param {string} filePath - file path
* @returns {string} enjoy url
*/
export function pathToEnjoyUrl(filePath: string): string {
let enjoyUrl = filePath;
if (filePath.startsWith(settings.userDataPath())) {
enjoyUrl = `enjoy://library/${filePath.replace(
settings.userDataPath(),
""
)}`;
} else if (filePath.startsWith(settings.libraryPath())) {
enjoyUrl = `enjoy://library/${filePath.replace(
settings.libraryPath(),
""
)}`;
}
return enjoyUrl;
}

View File

@@ -36,3 +36,5 @@ export class Waveform {
});
}
}
export default new Waveform();

View File

@@ -6,6 +6,7 @@ import { exec, spawn } from "child_process";
import fs from "fs-extra";
import log from "@main/logger";
import url from "url";
import { enjoyUrlToPath } from "./utils";
const __filename = url.fileURLToPath(import.meta.url);
/*
@@ -150,23 +151,23 @@ class Whipser {
const { blob } = params;
let { file } = params;
if (!file && !blob) {
throw new Error("No file or blob provided");
}
const model = this.currentModel();
if (blob) {
if (file) {
file = enjoyUrlToPath(file);
} else if (blob) {
const format = blob.type.split("/")[1];
if (format !== "wav") {
throw new Error("Only wav format is supported");
}
file = path.join(settings.cachePath(), `${Date.now()}.${format}`);
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
} else {
throw new Error("No file or blob provided");
}
const model = this.currentModel();
const { force = false, extra = [], onProgress } = options || {};
const filename = path.basename(file, path.extname(file));
const tmpDir = settings.cachePath();

View File

@@ -370,6 +370,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
align: (input: string, transcript: string, options: any) => {
return ipcRenderer.invoke("echogarden-align", input, transcript, options);
},
transcode: (input: string) => {
return ipcRenderer.invoke("echogarden-transcode", input);
},
check: () => {
return ipcRenderer.invoke("echogarden-check");
},

View File

@@ -6,10 +6,8 @@ import {
import RecordPlugin from "wavesurfer.js/dist/plugins/record";
import WaveSurfer from "wavesurfer.js";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { toast } from "@renderer/components/ui";
import { MediaRecordButton } from "@renderer/components";
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
export const MediaRecorder = () => {
const {
@@ -23,7 +21,6 @@ export const MediaRecorder = () => {
const [access, setAccess] = useState<boolean>(false);
const [duration, setDuration] = useState<number>(0);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { transcode } = useTranscribe();
const ref = useRef(null);
@@ -45,12 +42,6 @@ export const MediaRecorder = () => {
toast.promise(
async () => {
let output: Blob;
output = await transcode(blob, [
// ...FFMPEG_TRIM_SILENCE_OPTIONS,
...FFMPEG_CONVERT_WAV_OPTIONS,
]);
const currentSegment =
transcription?.result?.timeline?.[currentSegmentIndex];
if (!currentSegment) return;
@@ -59,8 +50,8 @@ export const MediaRecorder = () => {
targetId: media.id,
targetType: media.mediaType,
blob: {
type: output.type.split(";")[0],
arrayBuffer: await output.arrayBuffer(),
type: blob.type.split(";")[0],
arrayBuffer: await blob.arrayBuffer(),
},
referenceId: currentSegmentIndex,
referenceText: currentSegment.text,

View File

@@ -252,7 +252,7 @@ export const AssistantMessageComponent = (props: {
<DownloadIcon
data-tooltip-id="global-tooltip"
data-tooltip-content={t("download")}
data-testid="message-download"
data-testid="message-download-speech"
onClick={handleDownload}
className="w-3 h-3 cursor-pointer"
/>

View File

@@ -1,5 +1,3 @@
export * from "./recordings-list";
export * from "./recording-card";
export * from "./recording-player";
export * from "./recording-calendar";
export * from "./recording-activities";

View File

@@ -1,186 +0,0 @@
import { useState, useContext } from "react";
import { AppSettingsProviderContext } from "@/renderer/context";
import { RecordingPlayer } from "@renderer/components";
import {
AlertDialog,
AlertDialogHeader,
AlertDialogTrigger,
AlertDialogDescription,
AlertDialogTitle,
AlertDialogContent,
AlertDialogFooter,
AlertDialogCancel,
AlertDialogAction,
Button,
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
toast,
} from "@renderer/components/ui";
import {
MoreHorizontalIcon,
Trash2Icon,
Share2Icon,
GaugeCircleIcon,
} from "lucide-react";
import { formatDateTime, secondsToTimestamp } from "@renderer/lib/utils";
import { t } from "i18next";
export const RecordingCard = (props: {
recording: RecordingType;
id?: string;
onSelect?: () => void;
}) => {
const { recording, id, onSelect } = props;
const [isDeleteDialogOpen, setIsDeleteDialogOpen] = useState(false);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const [isPlaying, setIsPlaying] = useState(false);
const handleDelete = () => {
EnjoyApp.recordings.destroy(recording.id);
};
const handleShare = async () => {
if (!recording.uploadedAt) {
try {
await EnjoyApp.recordings.upload(recording.id);
} catch (error) {
toast.error(t("shareFailed"), { description: error.message });
return;
}
}
webApi
.createPost({
targetId: recording.id,
targetType: "Recording",
})
.then(() => {
toast.success(t("sharedSuccessfully"), {
description: t("sharedRecording"),
});
})
.catch((error) => {
toast.error(t("shareFailed"), {
description: error.message,
});
});
};
return (
<div id={id} className="flex items-center justify-end px-4 transition-all">
<div className="w-full">
<div className="bg-background rounded-lg py-2 px-4 relative mb-1">
<div className="flex items-center justify-end space-x-2">
<span className="text-xs text-muted-foreground">
{secondsToTimestamp(recording.duration / 1000)}
</span>
</div>
<RecordingPlayer
recording={recording}
isPlaying={isPlaying}
setIsPlaying={setIsPlaying}
/>
<div className="flex items-center justify-end space-x-2">
<Button
data-tooltip-id="global-tooltip"
data-tooltip-content={t("pronunciationAssessment")}
data-tooltip-place="bottom"
onClick={onSelect}
variant="ghost"
size="sm"
className="p-1 h-6"
>
<GaugeCircleIcon
className={`w-4 h-4
${
recording.pronunciationAssessment
? recording.pronunciationAssessment
.pronunciationScore >= 80
? "text-green-500"
: recording.pronunciationAssessment
.pronunciationScore >= 60
? "text-yellow-600"
: "text-red-500"
: "text-muted-foreground"
}
`}
/>
</Button>
<AlertDialog>
<AlertDialogTrigger asChild>
<Button
data-tooltip-id="global-tooltip"
data-tooltip-content={t("share")}
data-tooltip-place="bottom"
variant="ghost"
size="sm"
className="p-1 h-6"
>
<Share2Icon className="w-4 h-4 text-muted-foreground" />
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("shareRecording")}</AlertDialogTitle>
<AlertDialogDescription>
{t("areYouSureToShareThisRecordingToCommunity")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction asChild>
<Button onClick={handleShare}>{t("share")}</Button>
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
<DropdownMenu>
<DropdownMenuTrigger>
<MoreHorizontalIcon className="w-4 h-4 text-muted-foreground" />
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem onClick={() => setIsDeleteDialogOpen(true)}>
<span className="mr-auto text-destructive capitalize">
{t("delete")}
</span>
<Trash2Icon className="w-4 h-4 text-destructive" />
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
</div>
<div className="flex justify-end">
<span className="text-xs text-muted-foreground">
{formatDateTime(recording.createdAt)}
</span>
</div>
</div>
<AlertDialog
open={isDeleteDialogOpen}
onOpenChange={(value) => setIsDeleteDialogOpen(value)}
>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("deleteRecording")}</AlertDialogTitle>
<AlertDialogDescription>
{t("deleteRecordingConfirmation")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<Button variant="destructive" onClick={handleDelete}>
{t("delete")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
);
};

View File

@@ -1,203 +0,0 @@
import {
RecordButton,
RecordingCard,
RecordingDetail,
} from "@renderer/components";
import {
Button,
Sheet,
SheetContent,
SheetHeader,
SheetClose,
} from "@renderer/components/ui";
import { useEffect, useState, useRef, useContext, useReducer } from "react";
import { LoaderIcon, ChevronDownIcon } from "lucide-react";
import { t } from "i18next";
import {
DbProviderContext,
AppSettingsProviderContext,
} from "@renderer/context";
import { recordingsReducer } from "@renderer/reducers";
export const RecordingsList = (props: {
targetId: string;
targetType: "Audio" | "Video";
referenceId: number;
referenceText: string;
}) => {
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { targetId, targetType, referenceId, referenceText } = props;
const containerRef = useRef<HTMLDivElement>();
const [recordings, dispatchRecordings] = useReducer(recordingsReducer, []);
const [selected, setSelected] = useState<RecordingType | null>(null);
const [loading, setLoading] = useState(false);
const [offset, setOffest] = useState(0);
const scrollToRecording = (recording: RecordingType) => {
if (!containerRef.current) return;
if (!recording) return;
setTimeout(() => {
containerRef.current
.querySelector(`#recording-${recording.id}`)
?.scrollIntoView({
behavior: "smooth",
} as ScrollIntoViewOptions);
}, 500);
};
const onRecordingsUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model === "PronunciationAssessment" && action === "create") {
const recording = recordings.find((r) => r.id === record.targetId);
if (!recording) return;
recording.pronunciationAssessment = record;
dispatchRecordings({
type: "update",
record: recording,
});
}
if (model != "Recording") return;
if (action === "destroy") {
dispatchRecordings({
type: "destroy",
record,
});
} else if (action === "create") {
if ((record as RecordingType).targetId !== targetId) return;
dispatchRecordings({
type: "create",
record,
});
scrollToRecording(record);
}
};
const createRecording = async (blob: Blob, duration: number) => {
if (typeof referenceId !== "number") return;
EnjoyApp.recordings.create({
targetId,
targetType,
blob: {
type: blob.type.split(";")[0],
arrayBuffer: await blob.arrayBuffer(),
},
referenceId,
referenceText,
duration,
});
};
useEffect(() => {
addDblistener(onRecordingsUpdate);
return () => {
removeDbListener(onRecordingsUpdate);
};
}, [recordings]);
useEffect(() => {
fetchRecordings();
}, [targetId, targetType, referenceId]);
const fetchRecordings = async () => {
setLoading(true);
const limit = 10;
EnjoyApp.recordings
.findAll({
limit,
offset,
where: { targetId, targetType, referenceId },
})
.then((_recordings) => {
if (_recordings.length === 0) {
setOffest(-1);
return;
}
if (_recordings.length < limit) {
setOffest(-1);
} else {
setOffest(offset + _recordings.length);
}
dispatchRecordings({
type: "append",
records: _recordings,
});
scrollToRecording(_recordings[0]);
})
.finally(() => {
setLoading(false);
});
};
return (
<>
<div ref={containerRef} className="">
{offset > -1 && (
<div className="flex items-center justify-center my-4">
<Button variant="ghost" onClick={fetchRecordings}>
{t("loadMore")}
{loading && (
<LoaderIcon className="w-6 h-6 animate-spin text-muted-foreground" />
)}
</Button>
</div>
)}
<div className="flex flex-col-reverse space-y-4">
<div className="w-full h-24"></div>
{recordings.map((recording) => (
<RecordingCard
id={`recording-${recording.id}`}
key={recording.id}
recording={recording}
onSelect={() => setSelected(recording)}
/>
))}
</div>
<div className="z-50 bottom-16 left-1/2 w-0 h-0 absolute flex items-center justify-center">
{referenceId !== undefined && Boolean(referenceText) && (
<RecordButton
disabled={referenceId == undefined || !referenceText}
onRecordEnd={createRecording}
/>
)}
</div>
</div>
<Sheet
open={!!selected}
onOpenChange={(value) => {
if (!value) setSelected(null);
}}
>
<SheetContent
side="bottom"
className="rounded-t-2xl shadow-lg"
displayClose={false}
>
<SheetHeader className="flex items-center justify-center -mt-4 mb-2">
<SheetClose>
<ChevronDownIcon />
</SheetClose>
</SheetHeader>
<RecordingDetail recording={selected} />
</SheetContent>
</Sheet>
</>
);
};

View File

@@ -1,10 +1,7 @@
import { createContext, useEffect, useState, useRef } from "react";
import { toast } from "@renderer/components/ui";
import { createContext, useEffect, useState } from "react";
import { WEB_API_URL } from "@/constants";
import { Client } from "@/api";
import i18n from "@renderer/i18n";
import { FFmpeg } from "@ffmpeg/ffmpeg";
import { toBlobURL } from "@ffmpeg/util";
import ahoy from "ahoy.js";
type AppSettingsProviderState = {
@@ -17,8 +14,6 @@ type AppSettingsProviderState = {
login?: (user: UserType) => void;
logout?: () => void;
setLibraryPath?: (path: string) => Promise<void>;
ffmpegWasm?: FFmpeg;
ffmpegValid?: boolean;
EnjoyApp?: EnjoyAppType;
language?: "en" | "zh-CN";
switchLanguage?: (language: "en" | "zh-CN") => void;
@@ -46,20 +41,15 @@ export const AppSettingsProvider = ({
const [webApi, setWebApi] = useState<Client>(null);
const [user, setUser] = useState<UserType | null>(null);
const [libraryPath, setLibraryPath] = useState("");
const [ffmpegWasm, setFfmpegWasm] = useState<FFmpeg>(null);
const [ffmpegValid, setFfmpegValid] = useState<boolean>(false);
const [language, setLanguage] = useState<"en" | "zh-CN">();
const [proxy, setProxy] = useState<ProxyConfigType>();
const EnjoyApp = window.__ENJOY_APP__;
const ffmpegRef = useRef(new FFmpeg());
useEffect(() => {
fetchVersion();
fetchUser();
fetchLibraryPath();
fetchLanguage();
prepareFfmpeg();
fetchProxyConfig();
}, []);
@@ -83,50 +73,6 @@ export const AppSettingsProvider = ({
});
}, [apiUrl]);
const prepareFfmpeg = async () => {
try {
const valid = await EnjoyApp.ffmpeg.check();
setFfmpegValid(valid);
} catch (err) {
console.error(err);
toast.error(err.message);
}
loadFfmpegWASM();
};
const loadFfmpegWASM = async () => {
const baseURL = "assets/libs";
ffmpegRef.current.on("log", ({ message }) => {
console.log(message);
});
const coreURL = await toBlobURL(
`${baseURL}/ffmpeg-core.js`,
"text/javascript"
);
const wasmURL = await toBlobURL(
`${baseURL}/ffmpeg-core.wasm`,
"application/wasm"
);
const workerURL = await toBlobURL(
`${baseURL}/ffmpeg-core.worker.js`,
"text/javascript"
);
try {
await ffmpegRef.current.load({
coreURL,
wasmURL,
workerURL,
});
setFfmpegWasm(ffmpegRef.current);
(window as any).ffmpeg = ffmpegRef.current;
} catch (err) {
toast.error(err.message);
}
};
const fetchLanguage = async () => {
const language = await EnjoyApp.settings.getLanguage();
setLanguage(language as "en" | "zh-CN");
@@ -211,8 +157,6 @@ export const AppSettingsProvider = ({
logout,
libraryPath,
setLibraryPath: setLibraryPathHandler,
ffmpegValid,
ffmpegWasm,
proxy,
setProxy: setProxyConfigHandler,
initialized: Boolean(user && libraryPath),

View File

@@ -4,9 +4,7 @@ import {
} from "@renderer/context";
import OpenAI from "openai";
import { useContext } from "react";
import { toast } from "@renderer/components/ui";
import { t } from "i18next";
import { fetchFile } from "@ffmpeg/util";
import { AI_WORKER_ENDPOINT } from "@/constants";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import axios from "axios";
@@ -15,63 +13,21 @@ import sortedUniqBy from "lodash/sortedUniqBy";
import { groupTranscription, milisecondsToTimestamp } from "@/utils";
import { END_OF_SENTENCE_REGEX } from "@/constants";
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
export const useTranscribe = () => {
const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext(
AppSettingsProviderContext
);
const { EnjoyApp, user, webApi } = useContext(AppSettingsProviderContext);
const { whisperConfig, openai } = useContext(AISettingsProviderContext);
const transcode = async (src: string | Blob, options?: string[]) => {
if (ffmpegValid) {
if (src instanceof Blob) {
src = await EnjoyApp.cacheObjects.writeFile(
`${Date.now()}.${src.type.split("/")[1].split(";")[0]}`,
await src.arrayBuffer()
);
}
const output = `enjoy://library/cache/${src
.split("/")
.pop()
.split(";")
.shift()}.wav`;
await EnjoyApp.ffmpeg.transcode(src, output, options);
const data = await fetchFile(output);
return new Blob([data], { type: "audio/wav" });
} else {
return transcodeUsingWasm(src, options);
const transcode = async (src: string | Blob): Promise<string> => {
if (src instanceof Blob) {
src = await EnjoyApp.cacheObjects.writeFile(
`${Date.now()}.${src.type.split("/")[1].split(";")[0]}`,
await src.arrayBuffer()
);
}
};
const transcodeUsingWasm = async (src: string | Blob, options?: string[]) => {
if (!ffmpegWasm?.loaded) return;
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
try {
let uri: URL;
if (src instanceof Blob) {
uri = new URL(URL.createObjectURL(src));
} else {
uri = new URL(src);
}
const input = uri.pathname.split("/").pop();
let output: string;
if (src instanceof Blob) {
output = input + ".wav";
} else {
output = input.replace(/\.[^/.]+$/, ".wav");
}
await ffmpegWasm.writeFile(input, await fetchFile(src));
await ffmpegWasm.exec(["-i", input, ...options, output]);
const data = await ffmpegWasm.readFile(output);
return new Blob([data], { type: "audio/wav" });
} catch (e) {
toast.error(t("transcodeError"));
}
const output = await EnjoyApp.echogarden.transcode(src);
return output;
};
const transcribe = async (
@@ -87,8 +43,9 @@ export const useTranscribe = () => {
alignmentResult: AlignmentResult;
originalText?: string;
}> => {
const blob = await transcode(mediaSrc);
const url = await transcode(mediaSrc);
const { targetId, targetType, originalText } = params || {};
const blob = await (await fetch(url)).blob();
let result;
if (originalText) {
@@ -97,7 +54,7 @@ export const useTranscribe = () => {
model: "original",
};
} else if (whisperConfig.service === "local") {
result = await transcribeByLocal(blob);
result = await transcribeByLocal(url);
} else if (whisperConfig.service === "cloudflare") {
result = await transcribeByCloudflareAi(blob);
} else if (whisperConfig.service === "openai") {
@@ -120,13 +77,10 @@ export const useTranscribe = () => {
};
};
const transcribeByLocal = async (blob: Blob) => {
const transcribeByLocal = async (url: string) => {
const res = await EnjoyApp.whisper.transcribe(
{
blob: {
type: blob.type.split(";")[0],
arrayBuffer: await blob.arrayBuffer(),
},
file: url,
},
{
force: true,

View File

@@ -8,7 +8,6 @@ import {
import { toast } from "@renderer/components/ui";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
import { t } from "i18next";
export const useTranscriptions = (media: AudioType | VideoType) => {
const { whisperConfig } = useContext(AISettingsProviderContext);
@@ -74,19 +73,15 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
});
let timeline: TimelineEntry[] = [];
if (alignmentResult) {
alignmentResult.timeline.forEach((t) => {
if (t.type === "sentence") {
timeline.push(t);
} else {
t.timeline.forEach((st) => {
timeline.push(st);
});
}
});
} else {
throw new Error(t("forceAlignmentFailed"));
}
alignmentResult.timeline.forEach((t) => {
if (t.type === "sentence") {
timeline.push(t);
} else {
t.timeline.forEach((st) => {
timeline.push(st);
});
}
});
/*
* Pre-process

View File

@@ -219,6 +219,7 @@ type EnjoyAppType = {
transcript: string,
options?: any
) => Promise<AlignmentResult>;
transcode: (input: string) => Promise<string>;
check: () => Promise<boolean>;
};
whisper: {

View File

@@ -23,7 +23,11 @@ export default defineConfig((env) => {
formats: ["es"],
},
rollupOptions: {
external: [...external, "echogarden/dist/api/API.js"],
external: [
...external,
"echogarden/dist/api/API.js",
"echogarden/dist/audio/AudioUtilities.js",
],
output: {
strict: false,
},

View File

@@ -2043,29 +2043,6 @@ __metadata:
languageName: node
linkType: hard
"@ffmpeg/ffmpeg@npm:^0.12.10":
version: 0.12.10
resolution: "@ffmpeg/ffmpeg@npm:0.12.10"
dependencies:
"@ffmpeg/types": "npm:^0.12.2"
checksum: 10c0/224185b24b4fe9d3d1d6d17e741205793f74b29cce04435d6c25e5d17b7e12b608e876a64ad33d3e92114d4f0079f4e2e4d809632b354ad1a5f775f5bdb1b8e6
languageName: node
linkType: hard
"@ffmpeg/types@npm:^0.12.2":
version: 0.12.2
resolution: "@ffmpeg/types@npm:0.12.2"
checksum: 10c0/5c3f250c7ed828a3f66073504e3e92ee9f7cd73ddf3bbdf777263710beb6e757da3d7178b2bae29bef6d602abbc21b561bcc659455420d3aed1b30a535ca1d0a
languageName: node
linkType: hard
"@ffmpeg/util@npm:^0.12.1":
version: 0.12.1
resolution: "@ffmpeg/util@npm:0.12.1"
checksum: 10c0/943cc8b886cdfd3c448d3618acc3fe02abda46472c11aefb9e90a4a814962142632e90ab475374357fddb6ba8176757dbbe434147e0cd1a8fcd5133f95bee0a6
languageName: node
linkType: hard
"@floating-ui/core@npm:^1.0.0":
version: 1.6.0
resolution: "@floating-ui/core@npm:1.6.0"
@@ -9007,8 +8984,6 @@ __metadata:
"@electron-forge/plugin-vite": "npm:^7.3.1"
"@electron-forge/publisher-github": "npm:^7.3.1"
"@electron/fuses": "npm:^1.8.0"
"@ffmpeg/ffmpeg": "npm:^0.12.10"
"@ffmpeg/util": "npm:^0.12.1"
"@hookform/resolvers": "npm:^3.3.4"
"@langchain/community": "npm:^0.0.43"
"@langchain/google-genai": "npm:^0.0.10"