From 265429a24e7aba1888c610064e419fae239fce05 Mon Sep 17 00:00:00 2001 From: an-lee Date: Tue, 2 Apr 2024 11:10:19 +0800 Subject: [PATCH] Feat: denoise recording & clean code (#473) * denoise recording before saved * Refactor audio processing and recording logic * Remove unused code * use echogarden to transcode * remove ffmpeg-wasm * add echogarden decode * remove deprecated code * ensure use posix path * refactor echogarden transcode * refactor recording denoise * clean code * expose align error in toast * remove unused code --- enjoy/assets/libs/.keep | 0 enjoy/package.json | 5 +- enjoy/scripts/download-ffmpeg-wasm.mjs | 162 -------------- enjoy/src/main/db/models/recording.ts | 44 ++-- enjoy/src/main/echogarden.ts | 60 +++++- enjoy/src/main/ffmpeg.ts | 26 +-- enjoy/src/main/utils.ts | 52 +++++ enjoy/src/main/waveform.ts | 2 + enjoy/src/main/whisper.ts | 15 +- enjoy/src/preload.ts | 3 + .../components/medias/media-recorder.tsx | 13 +- .../components/messages/assistant-message.tsx | 2 +- .../renderer/components/recordings/index.ts | 2 - .../components/recordings/recording-card.tsx | 186 ---------------- .../components/recordings/recordings-list.tsx | 203 ------------------ .../context/app-settings-provider.tsx | 58 +---- enjoy/src/renderer/hooks/use-transcribe.tsx | 74 ++----- .../src/renderer/hooks/use-transcriptions.tsx | 23 +- enjoy/src/types/enjoy-app.d.ts | 1 + enjoy/vite.main.config.ts | 6 +- yarn.lock | 25 --- 21 files changed, 185 insertions(+), 777 deletions(-) delete mode 100644 enjoy/assets/libs/.keep delete mode 100755 enjoy/scripts/download-ffmpeg-wasm.mjs delete mode 100644 enjoy/src/renderer/components/recordings/recording-card.tsx delete mode 100644 enjoy/src/renderer/components/recordings/recordings-list.tsx diff --git a/enjoy/assets/libs/.keep b/enjoy/assets/libs/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/enjoy/package.json b/enjoy/package.json index a563831e..7735bffa 100644 --- a/enjoy/package.json +++ b/enjoy/package.json @@ -20,9 +20,8 @@ "test:renderer": "yarn run playwright test e2e/renderer.spec.ts", "create-migration": "zx ./src/main/db/create-migration.mjs", "download-whisper-model": "zx ./scripts/download-whisper-model.mjs", - "download-ffmpeg-wasm": "zx ./scripts/download-ffmpeg-wasm.mjs", "download-dictionaries": "zx ./scripts/download-dictionaries.mjs", - "download": "yarn run download-whisper-model && yarn run download-ffmpeg-wasm && yarn run download-dictionaries" + "download": "yarn run download-whisper-model && yarn run download-dictionaries" }, "keywords": [], "author": { @@ -82,8 +81,6 @@ }, "dependencies": { "@andrkrn/ffprobe-static": "^5.2.0", - "@ffmpeg/ffmpeg": "^0.12.10", - "@ffmpeg/util": "^0.12.1", "@hookform/resolvers": "^3.3.4", "@langchain/community": "^0.0.43", "@langchain/google-genai": "^0.0.10", diff --git a/enjoy/scripts/download-ffmpeg-wasm.mjs b/enjoy/scripts/download-ffmpeg-wasm.mjs deleted file mode 100755 index 8eb38195..00000000 --- a/enjoy/scripts/download-ffmpeg-wasm.mjs +++ /dev/null @@ -1,162 +0,0 @@ -#!/usr/bin/env zx - -import axios from "axios"; -import { createHash } from "crypto"; -import { HttpsProxyAgent } from "https-proxy-agent"; - -console.info(chalk.blue("=> Download ffmpeg wasm files")); - -const files = [ - { - name: "ffmpeg-core.wasm", - md5: "ff1676d6a417d1162dba70dbe8dfd354", - }, - { - name: "ffmpeg-core.worker.js", - md5: "09dc7f1cd71bb52bd9afc22afdf1f6da", - }, - { - name: "ffmpeg-core.js", - md5: "30296628fd78e4ef1c939f36c1d31527", - }, -]; -const pendingFiles = []; -const dir = path.join(process.cwd(), "assets/libs"); -fs.ensureDirSync(dir); - -await Promise.all( - files.map(async (file) => { - try { - if (fs.statSync(path.join(dir, file.name)).isFile()) { - console.info(chalk.green(`✅ File ${file.name} already exists`)); - - const hash = await hashFile(path.join(dir, file.name), { algo: "md5" }); - if (hash === file.md5) { - console.info(chalk.green(`✅ File ${file.name} valid`)); - } else { - console.warn( - chalk.yellow(`❌ File ${file.name} not valid, start to redownload`) - ); - fs.removeSync(path.join(dir, file.name)); - pendingFiles.push(file); - } - } else { - pendingFiles.push(file); - } - } catch (err) { - if (err && err.code !== "ENOENT") { - console.error(chalk.red(`❌ Error: ${err}`)); - process.exit(1); - } - pendingFiles.push(file); - } - }) -); - -if (pendingFiles.length === 0) { - console.info(chalk.green("✅ All files already exist")); - process.exit(0); -} else { - console.info(chalk.blue(`=> Start to download ${pendingFiles.length} files`)); -} - -const proxyUrl = - process.env.HTTPS_PROXY || - process.env.https_proxy || - process.env.HTTP_PROXY || - process.env.http_proxy; - -if (proxyUrl) { - const { hostname, port, protocol } = new URL(proxyUrl); - const httpsAgent = new HttpsProxyAgent(proxyUrl); - axios.defaults.proxy = { - host: hostname, - port: port, - protocol: protocol, - }; - axios.defaults.httpsAgent = httpsAgent; - console.info(chalk.blue(`=> Use proxy: ${proxyUrl}`)); -} - -const download = async (url, dest, md5) => { - console.info(chalk.blue(`=> Start to download ${url} to ${dest}`)); - - return spinner(async () => { - console.info(chalk.blue(`=> Start to download file ${url}`)); - await axios - .get(url, { - responseType: "arraybuffer", - }) - .then(async (response) => { - const data = Buffer.from(response.data, "binary"); - console.info(chalk.green(`✅ ${dest} downloaded successfully`)); - - fs.writeFileSync(dest, data); - const hash = await hashFile(dest, { algo: "md5" }); - if (hash === md5) { - console.info(chalk.green(`✅ ${dest} valid`)); - } else { - console.error( - chalk.red( - `❌ Error: ${dest} not valid. \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"` - ) - ); - process.exit(1); - } - }) - .catch((err) => { - console.error( - chalk.red( - `❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"` - ) - ); - process.exit(1); - }); - }); -}; - -function hashFile(file, options) { - const algo = options.algo || "md5"; - return new Promise((resolve, reject) => { - const hash = createHash(algo); - const stream = fs.createReadStream(file); - stream.on("error", reject); - stream.on("data", (chunk) => hash.update(chunk)); - stream.on("end", () => resolve(hash.digest("hex"))); - }); -} - -const cleanup = () => { - files.forEach((file) => { - try { - fs.removeSync(path.join(dir, file.name)); - } catch (err) { - console.error( - chalk.red( - `❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"` - ) - ); - } - }); -}; - -// const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm"; -const baseURL = "https://enjoy-storage.baizhiheizi.com"; -try { - await Promise.all( - pendingFiles.map((file) => - download(`${baseURL}/${file.name}`, path.join(dir, file.name), file.md5) - ) - ); -} catch (err) { - console.error( - chalk.red( - `❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"` - ) - ); - cleanup(); - process.exit(1); -} - -console.info(chalk.green("✅ All files downloaded successfully")); -process.exit(0); diff --git a/enjoy/src/main/db/models/recording.ts b/enjoy/src/main/db/models/recording.ts index 849fc983..9bc598df 100644 --- a/enjoy/src/main/db/models/recording.ts +++ b/enjoy/src/main/db/models/recording.ts @@ -25,7 +25,7 @@ import storage from "@main/storage"; import { Client } from "@/api"; import { WEB_API_URL } from "@/constants"; import { AzureSpeechSdk } from "@main/azure-speech-sdk"; -import Ffmpeg from "@main/ffmpeg"; +import echogarden from "@main/echogarden"; import camelcaseKeys from "camelcase-keys"; const logger = log.scope("db/models/recording"); @@ -307,32 +307,38 @@ export class Recording extends Model { throw new Error("Empty recording"); } - const format = blob.type.split("/")[1]?.split(";")?.[0]; - if (!format) { - throw new Error("Unknown recording format"); - } - - const file = path.join( - settings.userDataPath(), - "recordings", - `${Date.now()}.${format}` + // denoise audio + const { denoisedAudio } = await echogarden.denoise( + Buffer.from(blob.arrayBuffer), + {} ); - await fs.outputFile(file, Buffer.from(blob.arrayBuffer)); - try { - const ffmpeg = new Ffmpeg(); - const metadata = await ffmpeg.generateMetadata(file); - duration = Math.floor(metadata.format.duration * 1000); - } catch (err) { - logger.error(err); - } + // trim audio + let trimmedSamples = echogarden.trimAudioStart( + denoisedAudio.audioChannels[0] + ); + trimmedSamples = echogarden.trimAudioEnd(trimmedSamples); + denoisedAudio.audioChannels[0] = trimmedSamples; + + duration = Math.round(echogarden.getRawAudioDuration(denoisedAudio) * 1000); if (duration === 0) { throw new Error("Failed to get duration of the recording"); } + // save recording to file + const file = path.join( + settings.userDataPath(), + "recordings", + `${Date.now()}.wav` + ); + await fs.outputFile(file, echogarden.encodeWaveBuffer(denoisedAudio)); + + // hash file const md5 = await hashFile(file, { algo: "md5" }); - const filename = `${md5}.${format}`; + + // rename file + const filename = `${md5}.wav`; fs.renameSync(file, path.join(path.dirname(file), filename)); return this.create( diff --git a/enjoy/src/main/echogarden.ts b/enjoy/src/main/echogarden.ts index 1b15af67..063f2391 100644 --- a/enjoy/src/main/echogarden.ts +++ b/enjoy/src/main/echogarden.ts @@ -2,12 +2,23 @@ import { ipcMain } from "electron"; import * as Echogarden from "echogarden/dist/api/API.js"; import { AlignmentOptions } from "echogarden/dist/api/API"; import { AudioSourceParam } from "echogarden/dist/audio/AudioUtilities"; +import { + encodeWaveBuffer, + decodeWaveBuffer, + ensureRawAudio, + getRawAudioDuration, + trimAudioStart, + trimAudioEnd, +} from "echogarden/dist/audio/AudioUtilities.js"; import path from "path"; import log from "@main/logger"; import url from "url"; import settings from "@main/settings"; import fs from "fs-extra"; import ffmpegPath from "ffmpeg-static"; +import { enjoyUrlToPath, hashFile, pathToEnjoyUrl } from "./utils"; +import { extractFrequencies } from "@/utils"; +import waveform from "./waveform"; Echogarden.setGlobalOption( "ffmpegPath", @@ -25,9 +36,23 @@ const __dirname = path const logger = log.scope("echogarden"); class EchogardenWrapper { public align: typeof Echogarden.align; + public denoise: typeof Echogarden.denoise; + public encodeWaveBuffer: typeof encodeWaveBuffer; + public decodeWaveBuffer: typeof decodeWaveBuffer; + public ensureRawAudio: typeof ensureRawAudio; + public getRawAudioDuration: typeof getRawAudioDuration; + public trimAudioStart: typeof trimAudioStart; + public trimAudioEnd: typeof trimAudioEnd; constructor() { this.align = Echogarden.align; + this.denoise = Echogarden.denoise; + this.encodeWaveBuffer = encodeWaveBuffer; + this.decodeWaveBuffer = decodeWaveBuffer; + this.ensureRawAudio = ensureRawAudio; + this.getRawAudioDuration = getRawAudioDuration; + this.trimAudioStart = trimAudioStart; + this.trimAudioEnd = trimAudioEnd; } async check() { @@ -52,11 +77,27 @@ class EchogardenWrapper { } } + /** + * Transcodes the audio file at the enjoy:// protocol URL into a WAV format. + * @param url - The URL of the audio file to transcode. + * @returns A promise that resolves to the enjoy:// protocal URL of the transcoded WAV file. + */ + async transcode(url: string, sampleRate = 16000): Promise { + const filePath = enjoyUrlToPath(url); + const rawAudio = await this.ensureRawAudio(filePath, sampleRate); + const audioBuffer = this.encodeWaveBuffer(rawAudio); + + const outputFilePath = path.join(settings.cachePath(), `${Date.now()}.wav`); + fs.writeFileSync(outputFilePath, audioBuffer); + + return pathToEnjoyUrl(outputFilePath); + } + registerIpcHandlers() { ipcMain.handle( "echogarden-align", async ( - event, + _event, input: AudioSourceParam, transcript: string, options: AlignmentOptions @@ -65,10 +106,19 @@ class EchogardenWrapper { return await this.align(input, transcript, options); } catch (err) { logger.error(err); - event.sender.send("on-notification", { - type: "error", - message: err.message, - }); + throw err; + } + } + ); + + ipcMain.handle( + "echogarden-transcode", + async (_event, url: string, sampleRate?: number) => { + try { + return await this.transcode(url, sampleRate); + } catch (err) { + logger.error(err); + throw err; } } ); diff --git a/enjoy/src/main/ffmpeg.ts b/enjoy/src/main/ffmpeg.ts index 1401d83f..d3f6efd4 100644 --- a/enjoy/src/main/ffmpeg.ts +++ b/enjoy/src/main/ffmpeg.ts @@ -5,9 +5,10 @@ import Ffmpeg from "fluent-ffmpeg"; import log from "@main/logger"; import path from "path"; import fs from "fs-extra"; -import settings from "./settings"; +import settings from "@main/settings"; import url from "url"; import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants"; +import { enjoyUrlToPath, pathToEnjoyUrl } from "@main/utils"; /* * ffmpeg and ffprobe bin file will be in /app.asar.unpacked instead of /app.asar @@ -189,27 +190,12 @@ export default class FfmpegWrapper { output?: string, options?: string[] ): Promise { - if (input.match(/enjoy:\/\/library\/(audios|videos|recordings)/g)) { - input = path.join( - settings.userDataPath(), - input.replace("enjoy://library/", "") - ); - } else if (input.startsWith("enjoy://library/")) { - input = path.join( - settings.libraryPath(), - input.replace("enjoy://library/", "") - ); - } + input = enjoyUrlToPath(input); if (!output) { output = path.join(settings.cachePath(), `${path.basename(input)}.wav`); - } - - if (output.startsWith("enjoy://library/")) { - output = path.join( - settings.libraryPath(), - output.replace("enjoy://library/", "") - ); + } else { + output = enjoyUrlToPath(output); } options = options || FFMPEG_CONVERT_WAV_OPTIONS; @@ -234,7 +220,7 @@ export default class FfmpegWrapper { } if (fs.existsSync(output)) { - resolve(output); + resolve(pathToEnjoyUrl(output)); } else { reject(new Error("FFmpeg command failed")); } diff --git a/enjoy/src/main/utils.ts b/enjoy/src/main/utils.ts index 32787b26..5e472d6d 100644 --- a/enjoy/src/main/utils.ts +++ b/enjoy/src/main/utils.ts @@ -1,5 +1,7 @@ import { createHash } from "crypto"; import { createReadStream } from "fs"; +import settings from "./settings"; +import path from "path"; export function hashFile( path: string, @@ -36,3 +38,53 @@ export function hashBlob( reader.readAsArrayBuffer(blob); }); } + +/* + * Convert enjoy url to file path + * + * @param {string} enjoyUrl - enjoy url + * @returns {string} file path + */ +export function enjoyUrlToPath(enjoyUrl: string): string { + let filePath = enjoyUrl; + + if ( + enjoyUrl.match(/enjoy:\/\/library\/(audios|videos|recordings|speeches)/g) + ) { + filePath = path.posix.join( + settings.userDataPath(), + enjoyUrl.replace("enjoy://library/", "") + ); + } else if (enjoyUrl.startsWith("enjoy://library/")) { + filePath = path.posix.join( + settings.libraryPath(), + filePath.replace("enjoy://library/", "") + ); + } + + return filePath; +} + +/* + * Convert file path to enjoy url + * + * @param {string} filePath - file path + * @returns {string} enjoy url + */ +export function pathToEnjoyUrl(filePath: string): string { + let enjoyUrl = filePath; + + if (filePath.startsWith(settings.userDataPath())) { + enjoyUrl = `enjoy://library/${filePath.replace( + settings.userDataPath(), + "" + )}`; + } else if (filePath.startsWith(settings.libraryPath())) { + enjoyUrl = `enjoy://library/${filePath.replace( + settings.libraryPath(), + "" + )}`; + } + + return enjoyUrl; +} diff --git a/enjoy/src/main/waveform.ts b/enjoy/src/main/waveform.ts index e4b6a6a9..de0ac8fa 100644 --- a/enjoy/src/main/waveform.ts +++ b/enjoy/src/main/waveform.ts @@ -36,3 +36,5 @@ export class Waveform { }); } } + +export default new Waveform(); \ No newline at end of file diff --git a/enjoy/src/main/whisper.ts b/enjoy/src/main/whisper.ts index 90145734..94974541 100644 --- a/enjoy/src/main/whisper.ts +++ b/enjoy/src/main/whisper.ts @@ -6,6 +6,7 @@ import { exec, spawn } from "child_process"; import fs from "fs-extra"; import log from "@main/logger"; import url from "url"; +import { enjoyUrlToPath } from "./utils"; const __filename = url.fileURLToPath(import.meta.url); /* @@ -150,23 +151,23 @@ class Whipser { const { blob } = params; let { file } = params; - if (!file && !blob) { - throw new Error("No file or blob provided"); - } - const model = this.currentModel(); - - if (blob) { + if (file) { + file = enjoyUrlToPath(file); + } else if (blob) { const format = blob.type.split("/")[1]; - if (format !== "wav") { throw new Error("Only wav format is supported"); } file = path.join(settings.cachePath(), `${Date.now()}.${format}`); await fs.outputFile(file, Buffer.from(blob.arrayBuffer)); + } else { + throw new Error("No file or blob provided"); } + const model = this.currentModel(); + const { force = false, extra = [], onProgress } = options || {}; const filename = path.basename(file, path.extname(file)); const tmpDir = settings.cachePath(); diff --git a/enjoy/src/preload.ts b/enjoy/src/preload.ts index 71275da5..962a2f49 100644 --- a/enjoy/src/preload.ts +++ b/enjoy/src/preload.ts @@ -370,6 +370,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", { align: (input: string, transcript: string, options: any) => { return ipcRenderer.invoke("echogarden-align", input, transcript, options); }, + transcode: (input: string) => { + return ipcRenderer.invoke("echogarden-transcode", input); + }, check: () => { return ipcRenderer.invoke("echogarden-check"); }, diff --git a/enjoy/src/renderer/components/medias/media-recorder.tsx b/enjoy/src/renderer/components/medias/media-recorder.tsx index 8e557d25..83d45f68 100644 --- a/enjoy/src/renderer/components/medias/media-recorder.tsx +++ b/enjoy/src/renderer/components/medias/media-recorder.tsx @@ -6,10 +6,8 @@ import { import RecordPlugin from "wavesurfer.js/dist/plugins/record"; import WaveSurfer from "wavesurfer.js"; import { t } from "i18next"; -import { useTranscribe } from "@renderer/hooks"; import { toast } from "@renderer/components/ui"; import { MediaRecordButton } from "@renderer/components"; -import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants"; export const MediaRecorder = () => { const { @@ -23,7 +21,6 @@ export const MediaRecorder = () => { const [access, setAccess] = useState(false); const [duration, setDuration] = useState(0); const { EnjoyApp } = useContext(AppSettingsProviderContext); - const { transcode } = useTranscribe(); const ref = useRef(null); @@ -45,12 +42,6 @@ export const MediaRecorder = () => { toast.promise( async () => { - let output: Blob; - output = await transcode(blob, [ - // ...FFMPEG_TRIM_SILENCE_OPTIONS, - ...FFMPEG_CONVERT_WAV_OPTIONS, - ]); - const currentSegment = transcription?.result?.timeline?.[currentSegmentIndex]; if (!currentSegment) return; @@ -59,8 +50,8 @@ export const MediaRecorder = () => { targetId: media.id, targetType: media.mediaType, blob: { - type: output.type.split(";")[0], - arrayBuffer: await output.arrayBuffer(), + type: blob.type.split(";")[0], + arrayBuffer: await blob.arrayBuffer(), }, referenceId: currentSegmentIndex, referenceText: currentSegment.text, diff --git a/enjoy/src/renderer/components/messages/assistant-message.tsx b/enjoy/src/renderer/components/messages/assistant-message.tsx index fea5d287..d68aa7d8 100644 --- a/enjoy/src/renderer/components/messages/assistant-message.tsx +++ b/enjoy/src/renderer/components/messages/assistant-message.tsx @@ -252,7 +252,7 @@ export const AssistantMessageComponent = (props: { diff --git a/enjoy/src/renderer/components/recordings/index.ts b/enjoy/src/renderer/components/recordings/index.ts index 4e1a9171..d470d0b9 100644 --- a/enjoy/src/renderer/components/recordings/index.ts +++ b/enjoy/src/renderer/components/recordings/index.ts @@ -1,5 +1,3 @@ -export * from "./recordings-list"; -export * from "./recording-card"; export * from "./recording-player"; export * from "./recording-calendar"; export * from "./recording-activities"; diff --git a/enjoy/src/renderer/components/recordings/recording-card.tsx b/enjoy/src/renderer/components/recordings/recording-card.tsx deleted file mode 100644 index 93a4c56a..00000000 --- a/enjoy/src/renderer/components/recordings/recording-card.tsx +++ /dev/null @@ -1,186 +0,0 @@ -import { useState, useContext } from "react"; -import { AppSettingsProviderContext } from "@/renderer/context"; -import { RecordingPlayer } from "@renderer/components"; -import { - AlertDialog, - AlertDialogHeader, - AlertDialogTrigger, - AlertDialogDescription, - AlertDialogTitle, - AlertDialogContent, - AlertDialogFooter, - AlertDialogCancel, - AlertDialogAction, - Button, - DropdownMenu, - DropdownMenuContent, - DropdownMenuItem, - DropdownMenuTrigger, - toast, -} from "@renderer/components/ui"; -import { - MoreHorizontalIcon, - Trash2Icon, - Share2Icon, - GaugeCircleIcon, -} from "lucide-react"; -import { formatDateTime, secondsToTimestamp } from "@renderer/lib/utils"; -import { t } from "i18next"; - -export const RecordingCard = (props: { - recording: RecordingType; - id?: string; - onSelect?: () => void; -}) => { - const { recording, id, onSelect } = props; - const [isDeleteDialogOpen, setIsDeleteDialogOpen] = useState(false); - const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext); - const [isPlaying, setIsPlaying] = useState(false); - - const handleDelete = () => { - EnjoyApp.recordings.destroy(recording.id); - }; - const handleShare = async () => { - if (!recording.uploadedAt) { - try { - await EnjoyApp.recordings.upload(recording.id); - } catch (error) { - toast.error(t("shareFailed"), { description: error.message }); - return; - } - } - - webApi - .createPost({ - targetId: recording.id, - targetType: "Recording", - }) - .then(() => { - toast.success(t("sharedSuccessfully"), { - description: t("sharedRecording"), - }); - }) - .catch((error) => { - toast.error(t("shareFailed"), { - description: error.message, - }); - }); - }; - - return ( -
-
-
-
- - {secondsToTimestamp(recording.duration / 1000)} - -
- - - -
- - - - - - - - - {t("shareRecording")} - - {t("areYouSureToShareThisRecordingToCommunity")} - - - - {t("cancel")} - - - - - - - - - - - - - - setIsDeleteDialogOpen(true)}> - - {t("delete")} - - - - - -
-
-
- - {formatDateTime(recording.createdAt)} - -
-
- - setIsDeleteDialogOpen(value)} - > - - - {t("deleteRecording")} - - {t("deleteRecordingConfirmation")} - - - - {t("cancel")} - - - - -
- ); -}; diff --git a/enjoy/src/renderer/components/recordings/recordings-list.tsx b/enjoy/src/renderer/components/recordings/recordings-list.tsx deleted file mode 100644 index 99e5f0d5..00000000 --- a/enjoy/src/renderer/components/recordings/recordings-list.tsx +++ /dev/null @@ -1,203 +0,0 @@ -import { - RecordButton, - RecordingCard, - RecordingDetail, -} from "@renderer/components"; -import { - Button, - Sheet, - SheetContent, - SheetHeader, - SheetClose, -} from "@renderer/components/ui"; -import { useEffect, useState, useRef, useContext, useReducer } from "react"; -import { LoaderIcon, ChevronDownIcon } from "lucide-react"; -import { t } from "i18next"; -import { - DbProviderContext, - AppSettingsProviderContext, -} from "@renderer/context"; -import { recordingsReducer } from "@renderer/reducers"; - -export const RecordingsList = (props: { - targetId: string; - targetType: "Audio" | "Video"; - referenceId: number; - referenceText: string; -}) => { - const { addDblistener, removeDbListener } = useContext(DbProviderContext); - const { EnjoyApp } = useContext(AppSettingsProviderContext); - const { targetId, targetType, referenceId, referenceText } = props; - const containerRef = useRef(); - - const [recordings, dispatchRecordings] = useReducer(recordingsReducer, []); - const [selected, setSelected] = useState(null); - const [loading, setLoading] = useState(false); - const [offset, setOffest] = useState(0); - - const scrollToRecording = (recording: RecordingType) => { - if (!containerRef.current) return; - if (!recording) return; - - setTimeout(() => { - containerRef.current - .querySelector(`#recording-${recording.id}`) - ?.scrollIntoView({ - behavior: "smooth", - } as ScrollIntoViewOptions); - }, 500); - }; - - const onRecordingsUpdate = (event: CustomEvent) => { - const { model, action, record } = event.detail || {}; - - if (model === "PronunciationAssessment" && action === "create") { - const recording = recordings.find((r) => r.id === record.targetId); - if (!recording) return; - - recording.pronunciationAssessment = record; - dispatchRecordings({ - type: "update", - record: recording, - }); - } - - if (model != "Recording") return; - - if (action === "destroy") { - dispatchRecordings({ - type: "destroy", - record, - }); - } else if (action === "create") { - if ((record as RecordingType).targetId !== targetId) return; - dispatchRecordings({ - type: "create", - record, - }); - - scrollToRecording(record); - } - }; - - const createRecording = async (blob: Blob, duration: number) => { - if (typeof referenceId !== "number") return; - - EnjoyApp.recordings.create({ - targetId, - targetType, - blob: { - type: blob.type.split(";")[0], - arrayBuffer: await blob.arrayBuffer(), - }, - referenceId, - referenceText, - duration, - }); - }; - - useEffect(() => { - addDblistener(onRecordingsUpdate); - - return () => { - removeDbListener(onRecordingsUpdate); - }; - }, [recordings]); - - useEffect(() => { - fetchRecordings(); - }, [targetId, targetType, referenceId]); - - const fetchRecordings = async () => { - setLoading(true); - - const limit = 10; - EnjoyApp.recordings - .findAll({ - limit, - offset, - where: { targetId, targetType, referenceId }, - }) - .then((_recordings) => { - if (_recordings.length === 0) { - setOffest(-1); - return; - } - - if (_recordings.length < limit) { - setOffest(-1); - } else { - setOffest(offset + _recordings.length); - } - - dispatchRecordings({ - type: "append", - records: _recordings, - }); - - scrollToRecording(_recordings[0]); - }) - .finally(() => { - setLoading(false); - }); - }; - - return ( - <> -
- {offset > -1 && ( -
- -
- )} - -
-
- {recordings.map((recording) => ( - setSelected(recording)} - /> - ))} -
- -
- {referenceId !== undefined && Boolean(referenceText) && ( - - )} -
-
- - { - if (!value) setSelected(null); - }} - > - - - - - - - - - - - - ); -}; diff --git a/enjoy/src/renderer/context/app-settings-provider.tsx b/enjoy/src/renderer/context/app-settings-provider.tsx index 9712b2b5..643c3735 100644 --- a/enjoy/src/renderer/context/app-settings-provider.tsx +++ b/enjoy/src/renderer/context/app-settings-provider.tsx @@ -1,10 +1,7 @@ -import { createContext, useEffect, useState, useRef } from "react"; -import { toast } from "@renderer/components/ui"; +import { createContext, useEffect, useState } from "react"; import { WEB_API_URL } from "@/constants"; import { Client } from "@/api"; import i18n from "@renderer/i18n"; -import { FFmpeg } from "@ffmpeg/ffmpeg"; -import { toBlobURL } from "@ffmpeg/util"; import ahoy from "ahoy.js"; type AppSettingsProviderState = { @@ -17,8 +14,6 @@ type AppSettingsProviderState = { login?: (user: UserType) => void; logout?: () => void; setLibraryPath?: (path: string) => Promise; - ffmpegWasm?: FFmpeg; - ffmpegValid?: boolean; EnjoyApp?: EnjoyAppType; language?: "en" | "zh-CN"; switchLanguage?: (language: "en" | "zh-CN") => void; @@ -46,20 +41,15 @@ export const AppSettingsProvider = ({ const [webApi, setWebApi] = useState(null); const [user, setUser] = useState(null); const [libraryPath, setLibraryPath] = useState(""); - const [ffmpegWasm, setFfmpegWasm] = useState(null); - const [ffmpegValid, setFfmpegValid] = useState(false); const [language, setLanguage] = useState<"en" | "zh-CN">(); const [proxy, setProxy] = useState(); const EnjoyApp = window.__ENJOY_APP__; - const ffmpegRef = useRef(new FFmpeg()); - useEffect(() => { fetchVersion(); fetchUser(); fetchLibraryPath(); fetchLanguage(); - prepareFfmpeg(); fetchProxyConfig(); }, []); @@ -83,50 +73,6 @@ export const AppSettingsProvider = ({ }); }, [apiUrl]); - const prepareFfmpeg = async () => { - try { - const valid = await EnjoyApp.ffmpeg.check(); - setFfmpegValid(valid); - } catch (err) { - console.error(err); - toast.error(err.message); - } - - loadFfmpegWASM(); - }; - - const loadFfmpegWASM = async () => { - const baseURL = "assets/libs"; - ffmpegRef.current.on("log", ({ message }) => { - console.log(message); - }); - - const coreURL = await toBlobURL( - `${baseURL}/ffmpeg-core.js`, - "text/javascript" - ); - const wasmURL = await toBlobURL( - `${baseURL}/ffmpeg-core.wasm`, - "application/wasm" - ); - const workerURL = await toBlobURL( - `${baseURL}/ffmpeg-core.worker.js`, - "text/javascript" - ); - - try { - await ffmpegRef.current.load({ - coreURL, - wasmURL, - workerURL, - }); - setFfmpegWasm(ffmpegRef.current); - (window as any).ffmpeg = ffmpegRef.current; - } catch (err) { - toast.error(err.message); - } - }; - const fetchLanguage = async () => { const language = await EnjoyApp.settings.getLanguage(); setLanguage(language as "en" | "zh-CN"); @@ -211,8 +157,6 @@ export const AppSettingsProvider = ({ logout, libraryPath, setLibraryPath: setLibraryPathHandler, - ffmpegValid, - ffmpegWasm, proxy, setProxy: setProxyConfigHandler, initialized: Boolean(user && libraryPath), diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx index c2ff68c4..cffde52f 100644 --- a/enjoy/src/renderer/hooks/use-transcribe.tsx +++ b/enjoy/src/renderer/hooks/use-transcribe.tsx @@ -4,9 +4,7 @@ import { } from "@renderer/context"; import OpenAI from "openai"; import { useContext } from "react"; -import { toast } from "@renderer/components/ui"; import { t } from "i18next"; -import { fetchFile } from "@ffmpeg/util"; import { AI_WORKER_ENDPOINT } from "@/constants"; import * as sdk from "microsoft-cognitiveservices-speech-sdk"; import axios from "axios"; @@ -15,63 +13,21 @@ import sortedUniqBy from "lodash/sortedUniqBy"; import { groupTranscription, milisecondsToTimestamp } from "@/utils"; import { END_OF_SENTENCE_REGEX } from "@/constants"; import { AlignmentResult } from "echogarden/dist/api/API.d.js"; -import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants"; export const useTranscribe = () => { - const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext( - AppSettingsProviderContext - ); + const { EnjoyApp, user, webApi } = useContext(AppSettingsProviderContext); const { whisperConfig, openai } = useContext(AISettingsProviderContext); - const transcode = async (src: string | Blob, options?: string[]) => { - if (ffmpegValid) { - if (src instanceof Blob) { - src = await EnjoyApp.cacheObjects.writeFile( - `${Date.now()}.${src.type.split("/")[1].split(";")[0]}`, - await src.arrayBuffer() - ); - } - - const output = `enjoy://library/cache/${src - .split("/") - .pop() - .split(";") - .shift()}.wav`; - await EnjoyApp.ffmpeg.transcode(src, output, options); - const data = await fetchFile(output); - return new Blob([data], { type: "audio/wav" }); - } else { - return transcodeUsingWasm(src, options); + const transcode = async (src: string | Blob): Promise => { + if (src instanceof Blob) { + src = await EnjoyApp.cacheObjects.writeFile( + `${Date.now()}.${src.type.split("/")[1].split(";")[0]}`, + await src.arrayBuffer() + ); } - }; - const transcodeUsingWasm = async (src: string | Blob, options?: string[]) => { - if (!ffmpegWasm?.loaded) return; - - options = options || FFMPEG_CONVERT_WAV_OPTIONS; - - try { - let uri: URL; - if (src instanceof Blob) { - uri = new URL(URL.createObjectURL(src)); - } else { - uri = new URL(src); - } - - const input = uri.pathname.split("/").pop(); - let output: string; - if (src instanceof Blob) { - output = input + ".wav"; - } else { - output = input.replace(/\.[^/.]+$/, ".wav"); - } - await ffmpegWasm.writeFile(input, await fetchFile(src)); - await ffmpegWasm.exec(["-i", input, ...options, output]); - const data = await ffmpegWasm.readFile(output); - return new Blob([data], { type: "audio/wav" }); - } catch (e) { - toast.error(t("transcodeError")); - } + const output = await EnjoyApp.echogarden.transcode(src); + return output; }; const transcribe = async ( @@ -87,8 +43,9 @@ export const useTranscribe = () => { alignmentResult: AlignmentResult; originalText?: string; }> => { - const blob = await transcode(mediaSrc); + const url = await transcode(mediaSrc); const { targetId, targetType, originalText } = params || {}; + const blob = await (await fetch(url)).blob(); let result; if (originalText) { @@ -97,7 +54,7 @@ export const useTranscribe = () => { model: "original", }; } else if (whisperConfig.service === "local") { - result = await transcribeByLocal(blob); + result = await transcribeByLocal(url); } else if (whisperConfig.service === "cloudflare") { result = await transcribeByCloudflareAi(blob); } else if (whisperConfig.service === "openai") { @@ -120,13 +77,10 @@ export const useTranscribe = () => { }; }; - const transcribeByLocal = async (blob: Blob) => { + const transcribeByLocal = async (url: string) => { const res = await EnjoyApp.whisper.transcribe( { - blob: { - type: blob.type.split(";")[0], - arrayBuffer: await blob.arrayBuffer(), - }, + file: url, }, { force: true, diff --git a/enjoy/src/renderer/hooks/use-transcriptions.tsx b/enjoy/src/renderer/hooks/use-transcriptions.tsx index 2e3f1ac8..5d53926a 100644 --- a/enjoy/src/renderer/hooks/use-transcriptions.tsx +++ b/enjoy/src/renderer/hooks/use-transcriptions.tsx @@ -8,7 +8,6 @@ import { import { toast } from "@renderer/components/ui"; import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js"; import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants"; -import { t } from "i18next"; export const useTranscriptions = (media: AudioType | VideoType) => { const { whisperConfig } = useContext(AISettingsProviderContext); @@ -74,19 +73,15 @@ export const useTranscriptions = (media: AudioType | VideoType) => { }); let timeline: TimelineEntry[] = []; - if (alignmentResult) { - alignmentResult.timeline.forEach((t) => { - if (t.type === "sentence") { - timeline.push(t); - } else { - t.timeline.forEach((st) => { - timeline.push(st); - }); - } - }); - } else { - throw new Error(t("forceAlignmentFailed")); - } + alignmentResult.timeline.forEach((t) => { + if (t.type === "sentence") { + timeline.push(t); + } else { + t.timeline.forEach((st) => { + timeline.push(st); + }); + } + }); /* * Pre-process diff --git a/enjoy/src/types/enjoy-app.d.ts b/enjoy/src/types/enjoy-app.d.ts index a97d1843..027900e5 100644 --- a/enjoy/src/types/enjoy-app.d.ts +++ b/enjoy/src/types/enjoy-app.d.ts @@ -219,6 +219,7 @@ type EnjoyAppType = { transcript: string, options?: any ) => Promise; + transcode: (input: string) => Promise; check: () => Promise; }; whisper: { diff --git a/enjoy/vite.main.config.ts b/enjoy/vite.main.config.ts index 9c71a32a..b2249ee0 100644 --- a/enjoy/vite.main.config.ts +++ b/enjoy/vite.main.config.ts @@ -23,7 +23,11 @@ export default defineConfig((env) => { formats: ["es"], }, rollupOptions: { - external: [...external, "echogarden/dist/api/API.js"], + external: [ + ...external, + "echogarden/dist/api/API.js", + "echogarden/dist/audio/AudioUtilities.js", + ], output: { strict: false, }, diff --git a/yarn.lock b/yarn.lock index 15e1e7b7..749cdaad 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2043,29 +2043,6 @@ __metadata: languageName: node linkType: hard -"@ffmpeg/ffmpeg@npm:^0.12.10": - version: 0.12.10 - resolution: "@ffmpeg/ffmpeg@npm:0.12.10" - dependencies: - "@ffmpeg/types": "npm:^0.12.2" - checksum: 10c0/224185b24b4fe9d3d1d6d17e741205793f74b29cce04435d6c25e5d17b7e12b608e876a64ad33d3e92114d4f0079f4e2e4d809632b354ad1a5f775f5bdb1b8e6 - languageName: node - linkType: hard - -"@ffmpeg/types@npm:^0.12.2": - version: 0.12.2 - resolution: "@ffmpeg/types@npm:0.12.2" - checksum: 10c0/5c3f250c7ed828a3f66073504e3e92ee9f7cd73ddf3bbdf777263710beb6e757da3d7178b2bae29bef6d602abbc21b561bcc659455420d3aed1b30a535ca1d0a - languageName: node - linkType: hard - -"@ffmpeg/util@npm:^0.12.1": - version: 0.12.1 - resolution: "@ffmpeg/util@npm:0.12.1" - checksum: 10c0/943cc8b886cdfd3c448d3618acc3fe02abda46472c11aefb9e90a4a814962142632e90ab475374357fddb6ba8176757dbbe434147e0cd1a8fcd5133f95bee0a6 - languageName: node - linkType: hard - "@floating-ui/core@npm:^1.0.0": version: 1.6.0 resolution: "@floating-ui/core@npm:1.6.0" @@ -9007,8 +8984,6 @@ __metadata: "@electron-forge/plugin-vite": "npm:^7.3.1" "@electron-forge/publisher-github": "npm:^7.3.1" "@electron/fuses": "npm:^1.8.0" - "@ffmpeg/ffmpeg": "npm:^0.12.10" - "@ffmpeg/util": "npm:^0.12.1" "@hookform/resolvers": "npm:^3.3.4" "@langchain/community": "npm:^0.0.43" "@langchain/google-genai": "npm:^0.0.10"