Feat use ffmpeg static (#327)

* use ffmpeg static if valid

* transcribe after decoded
This commit is contained in:
an-lee
2024-02-19 14:12:06 +08:00
committed by GitHub
parent 27e6cd85db
commit f75912ff8b
10 changed files with 143 additions and 45 deletions

View File

@@ -43,9 +43,12 @@ protocol.registerSchemesAsPrivileged([
app.on("ready", async () => {
protocol.handle("enjoy", (request) => {
let url = request.url.replace("enjoy://", "");
if (url.startsWith("library")) {
if (url.match(/library\/(audios|videos|recordings)/g)) {
url = url.replace("library/", "");
url = path.join(settings.userDataPath(), url);
} else if (url.startsWith("library")) {
url = url.replace("library/", "");
url = path.join(settings.libraryPath(), url);
}
return net.fetch(`file:///${url}`);

View File

@@ -5,24 +5,18 @@ import Ffmpeg from "fluent-ffmpeg";
import log from "electron-log/main";
import path from "path";
import fs from "fs-extra";
import settings from "./settings";
Ffmpeg.setFfmpegPath(ffmpegPath);
Ffmpeg.setFfprobePath(ffprobePath);
const logger = log.scope("ffmpeg");
export default class FfmpegWrapper {
public ffmpeg: Ffmpeg.FfmpegCommand;
constructor() {
const ff = Ffmpeg();
logger.debug("Using ffmpeg path:", ffmpegPath);
logger.debug("Using ffprobe path:", ffprobePath);
ff.setFfmpegPath(ffmpegPath);
ff.setFfprobePath(ffprobePath);
this.ffmpeg = ff;
}
checkCommand(): Promise<boolean> {
const ffmpeg = Ffmpeg();
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
return new Promise((resolve, _reject) => {
this.ffmpeg.input(sampleFile).getAvailableFormats((err, _formats) => {
ffmpeg.input(sampleFile).getAvailableFormats((err, _formats) => {
if (err) {
logger.error("Command not valid:", err);
resolve(false);
@@ -35,8 +29,9 @@ export default class FfmpegWrapper {
}
generateMetadata(input: string): Promise<Ffmpeg.FfprobeData> {
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {
this.ffmpeg
ffmpeg
.input(input)
.on("start", (commandLine) => {
logger.info("Spawned FFmpeg with command: " + commandLine);
@@ -57,8 +52,9 @@ export default class FfmpegWrapper {
}
generateCover(input: string, output: string): Promise<string> {
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {
this.ffmpeg
ffmpeg
.input(input)
.thumbnail({
count: 1,
@@ -91,8 +87,9 @@ export default class FfmpegWrapper {
fs.removeSync(output);
}
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {
this.ffmpeg
ffmpeg
.input(input)
.outputOptions("-ar", `${sampleRate}`)
.on("error", (err) => {
@@ -112,8 +109,9 @@ export default class FfmpegWrapper {
output: string,
options: string[] = []
): Promise<string> {
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {
this.ffmpeg
ffmpeg
.input(input)
.outputOptions(
"-ar",
@@ -135,7 +133,7 @@ export default class FfmpegWrapper {
}
if (stderr) {
logger.error(stderr);
logger.info(stderr);
}
if (fs.existsSync(output)) {
@@ -176,9 +174,79 @@ export default class FfmpegWrapper {
return this.convertToWav(input, output);
}
async transcode(
input: string,
output?: string,
options?: string[]
): Promise<string> {
if (input.match(/enjoy:\/\/library\/(audios|videos|recordings)/g)) {
input = path.join(
settings.userDataPath(),
input.replace("enjoy://library/", "")
);
} else if (input.startsWith("enjoy://library/")) {
input = path.join(
settings.libraryPath(),
input.replace("enjoy://library/", "")
);
}
if (!output) {
output = path.join(settings.cachePath(), `${path.basename(input)}.wav`);
}
if (output.startsWith("enjoy://library/")) {
output = path.join(
settings.libraryPath(),
output.replace("enjoy://library/", "")
);
}
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {
ffmpeg
.input(input)
.outputOptions(...options)
.on("start", (commandLine) => {
logger.debug(`Trying to convert ${input} to ${output}`);
logger.info("Spawned FFmpeg with command: " + commandLine);
fs.ensureDirSync(path.dirname(output));
})
.on("end", (stdout, stderr) => {
if (stdout) {
logger.debug(stdout);
}
if (stderr) {
logger.info(stderr);
}
if (fs.existsSync(output)) {
resolve(output);
} else {
reject(new Error("FFmpeg command failed"));
}
})
.on("error", (err: Error) => {
logger.error(err);
reject(err);
})
.save(output);
});
}
registerIpcHandlers() {
ipcMain.handle("ffmpeg-check-command", async (_event) => {
return await this.checkCommand();
});
ipcMain.handle(
"ffmpeg-transcode",
async (_event, input, output, options) => {
return await this.transcode(input, output, options);
}
);
}
}

View File

@@ -104,7 +104,7 @@ class Whipser {
}
if (stderr) {
logger.error("stderr", stderr);
logger.info("stderr", stderr);
}
if (stdout) {
@@ -199,7 +199,7 @@ class Whipser {
command.stderr.on("data", (data) => {
const output = data.toString();
logger.error(`stderr: ${output}`);
logger.info(`stderr: ${output}`);
if (output.startsWith("whisper_print_progress_callback")) {
const progress = parseInt(output.match(/\d+%/)?.[0] || "0");
if (typeof progress === "number") onProgress(progress);

View File

@@ -389,6 +389,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
check: () => {
return ipcRenderer.invoke("ffmpeg-check-command");
},
transcode: (input: string, output: string, options: string[]) => {
return ipcRenderer.invoke("ffmpeg-transcode", input, output, options);
},
},
download: {
onState: (

View File

@@ -173,6 +173,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
}, [audio]);
useEffect(() => {
if (!initialized) return;
if (!transcription) return;
addDblistener(onTransactionUpdate);
@@ -192,7 +193,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [md5, transcription]);
}, [md5, transcription, initialized]);
if (!audio) {
return <LoaderSpin />;
@@ -324,7 +325,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
{!transcription ? (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (

View File

@@ -179,6 +179,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
}, [video]);
useEffect(() => {
if (!initialized) return;
if (!transcription) return;
addDblistener(onTransactionUpdate);
@@ -198,7 +199,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [md5, transcription]);
}, [md5, transcription, initialized]);
if (!video) {
return <LoaderSpin />;
@@ -337,7 +338,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
{!transcription ? (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (

View File

@@ -16,7 +16,8 @@ type AppSettingsProviderState = {
login?: (user: UserType) => void;
logout?: () => void;
setLibraryPath?: (path: string) => Promise<void>;
ffmpeg?: FFmpeg;
ffmpegWasm?: FFmpeg;
ffmpegValid?: boolean;
EnjoyApp?: EnjoyAppType;
language?: "en" | "zh-CN";
switchLanguage?: (language: "en" | "zh-CN") => void;
@@ -44,7 +45,8 @@ export const AppSettingsProvider = ({
const [webApi, setWebApi] = useState<Client>(null);
const [user, setUser] = useState<UserType | null>(null);
const [libraryPath, setLibraryPath] = useState("");
const [ffmpeg, setFfmpeg] = useState<FFmpeg>(null);
const [ffmpegWasm, setFfmpegWasm] = useState<FFmpeg>(null);
const [ffmpegValid, setFfmpegValid] = useState<boolean>(false);
const [language, setLanguage] = useState<"en" | "zh-CN">();
const [proxy, setProxy] = useState<ProxyConfigType>();
const EnjoyApp = window.__ENJOY_APP__;
@@ -56,7 +58,7 @@ export const AppSettingsProvider = ({
fetchUser();
fetchLibraryPath();
fetchLanguage();
loadFfmpegWASM();
prepareFfmpeg();
fetchProxyConfig();
}, []);
@@ -76,6 +78,14 @@ export const AppSettingsProvider = ({
);
}, [user, apiUrl, language]);
const prepareFfmpeg = async () => {
const valid = await EnjoyApp.ffmpeg.check();
setFfmpegValid(valid);
if (!valid) {
loadFfmpegWASM();
}
};
const loadFfmpegWASM = async () => {
const baseURL = "assets/libs";
ffmpegRef.current.on("log", ({ message }) => {
@@ -101,7 +111,7 @@ export const AppSettingsProvider = ({
wasmURL,
workerURL,
});
setFfmpeg(ffmpegRef.current);
setFfmpegWasm(ffmpegRef.current);
} catch (err) {
toast.error(err.message);
}
@@ -195,7 +205,8 @@ export const AppSettingsProvider = ({
logout,
libraryPath,
setLibraryPath: setLibraryPathHandler,
ffmpeg,
ffmpegValid,
ffmpegWasm,
proxy,
setProxy: setProxyConfigHandler,
initialized,

View File

@@ -12,16 +12,32 @@ import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import axios from "axios";
import take from "lodash/take";
import sortedUniqBy from "lodash/sortedUniqBy";
import { groupTranscription, END_OF_WORD_REGEX, milisecondsToTimestamp } from "@/utils";
import {
groupTranscription,
END_OF_WORD_REGEX,
milisecondsToTimestamp,
} from "@/utils";
export const useTranscribe = () => {
const { EnjoyApp, ffmpeg, user, webApi } = useContext(
const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext(
AppSettingsProviderContext
);
const { whisperConfig, openai } = useContext(AISettingsProviderContext);
const transcode = async (src: string, options?: string[]) => {
if (!ffmpeg?.loaded) return;
if (ffmpegValid) {
const output = `enjoy://library/cache/${src.split("/").pop()}.wav`;
const res = await EnjoyApp.ffmpeg.transcode(src, output, options);
console.log(res);
const data = await fetchFile(output);
return new Blob([data], { type: "audio/wav" });
} else {
return transcodeUsingWasm(src, options);
}
};
const transcodeUsingWasm = async (src: string, options?: string[]) => {
if (!ffmpegWasm?.loaded) return;
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
@@ -29,9 +45,9 @@ export const useTranscribe = () => {
const uri = new URL(src);
const input = uri.pathname.split("/").pop();
const output = input.replace(/\.[^/.]+$/, ".wav");
await ffmpeg.writeFile(input, await fetchFile(src));
await ffmpeg.exec(["-i", input, ...options, output]);
const data = await ffmpeg.readFile(output);
await ffmpegWasm.writeFile(input, await fetchFile(src));
await ffmpegWasm.exec(["-i", input, ...options, output]);
const data = await ffmpegWasm.readFile(output);
return new Blob([data], { type: "audio/wav" });
} catch (e) {
toast.error(t("transcodeError"));

View File

@@ -228,6 +228,11 @@ type EnjoyAppType = {
};
ffmpeg: {
check: () => Promise<boolean>;
transcode: (
input: string,
output: string,
options?: string[]
) => Promise<string>;
};
download: {
onState: (callback: (event, state) => void) => void;

View File

@@ -90,16 +90,6 @@ type TransactionStateType = {
record?: AudioType | UserType | RecordingType;
};
type FfmpegConfigType = {
os: string;
arch: string;
commandExists: boolean;
ffmpegPath?: string;
ffprobePath?: string;
scanDirs: string[];
ready: boolean;
};
type LookupType = {
id: string;
word: string;