Fix: record without ffmpeg & others (#201)
* remove ffmpeg transcode when save recording * fix model download * trancode record audio in renderer * fix transcribe dead loop when whisper not working * force to select a model * check model before transcribe
This commit is contained in:
@@ -295,6 +295,7 @@
|
||||
"language": "Language",
|
||||
"sttAiModel": "STT AI model",
|
||||
"checkingWhisper": "Checking whisper status",
|
||||
"pleaseDownloadWhisperModelFirst": "Please download whisper model first",
|
||||
"whisperIsWorkingGood": "Whisper is working good",
|
||||
"whisperIsNotWorking": "Whisper is not working",
|
||||
"checkingWhisperModel": "Checking whisper model",
|
||||
|
||||
@@ -294,6 +294,7 @@
|
||||
"language": "语言",
|
||||
"sttAiModel": "语音转文本 AI 模型",
|
||||
"checkingWhisper": "正在检查 Whisper",
|
||||
"pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型",
|
||||
"whisperIsWorkingGood": "Whisper 正常工作",
|
||||
"whisperIsNotWorking": "Whisper 无法正常工作,请尝试更换模型后重试,或联系开发者",
|
||||
"checkingWhisperModel": "正在检查 Whisper 模型",
|
||||
|
||||
@@ -22,7 +22,6 @@ import settings from "@main/settings";
|
||||
import { hashFile } from "@/utils";
|
||||
import log from "electron-log/main";
|
||||
import storage from "@main/storage";
|
||||
import Ffmpeg from "@main/ffmpeg";
|
||||
import { Client } from "@/api";
|
||||
import { WEB_API_URL } from "@/constants";
|
||||
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
|
||||
@@ -283,20 +282,16 @@ export class Recording extends Model<Recording> {
|
||||
params;
|
||||
|
||||
const format = blob.type.split("/")[1];
|
||||
const tempfile = path.join(settings.cachePath(), `${Date.now()}.${format}`);
|
||||
await fs.outputFile(tempfile, Buffer.from(blob.arrayBuffer));
|
||||
const wavFile = path.join(
|
||||
const file = path.join(
|
||||
settings.userDataPath(),
|
||||
"recordings",
|
||||
`${Date.now()}.wav`
|
||||
`${Date.now()}.${format}`
|
||||
);
|
||||
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
|
||||
|
||||
const ffmpeg = new Ffmpeg();
|
||||
await ffmpeg.convertToWav(tempfile, wavFile);
|
||||
|
||||
const md5 = await hashFile(wavFile, { algo: "md5" });
|
||||
const filename = `${md5}.wav`;
|
||||
fs.renameSync(wavFile, path.join(path.dirname(wavFile), filename));
|
||||
const md5 = await hashFile(file, { algo: "md5" });
|
||||
const filename = `${md5}.${format}`;
|
||||
fs.renameSync(file, path.join(path.dirname(file), filename));
|
||||
|
||||
return this.create(
|
||||
{
|
||||
|
||||
@@ -24,7 +24,11 @@ class Downloader {
|
||||
webContents.downloadURL(url);
|
||||
webContents.session.on("will-download", (_event, item, _webContents) => {
|
||||
if (savePath) {
|
||||
item.setSavePath(savePath);
|
||||
if (fs.statSync(savePath).isDirectory()) {
|
||||
item.setSavePath(path.join(savePath, item.getFilename()));
|
||||
} else {
|
||||
item.setSavePath(savePath);
|
||||
}
|
||||
} else {
|
||||
item.setSavePath(
|
||||
path.join(app.getPath("downloads"), item.getFilename())
|
||||
|
||||
@@ -5,6 +5,7 @@ import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants";
|
||||
import { exec } from "child_process";
|
||||
import fs from "fs-extra";
|
||||
import log from "electron-log/main";
|
||||
import { t } from "i18next";
|
||||
|
||||
const logger = log.scope("whisper");
|
||||
const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
|
||||
@@ -18,6 +19,13 @@ class Whipser {
|
||||
}
|
||||
|
||||
currentModel() {
|
||||
if (!this.config.availableModels) return;
|
||||
if (!this.config.model) {
|
||||
const model = this.config.availableModels[0];
|
||||
settings.setSync("whisper.model", this.config.availableModels[0].name);
|
||||
return model.savePath;
|
||||
}
|
||||
|
||||
return (this.config.availableModels || []).find(
|
||||
(m) => m.name === this.config.model
|
||||
)?.savePath;
|
||||
@@ -177,6 +185,10 @@ class Whipser {
|
||||
return fs.readJson(outputFile);
|
||||
}
|
||||
|
||||
if (!this.currentModel()) {
|
||||
throw new Error(t("pleaseDownloadWhisperModelFirst"));
|
||||
}
|
||||
|
||||
const command = [
|
||||
`"${this.binMain}"`,
|
||||
`--file "${file}"`,
|
||||
|
||||
@@ -112,6 +112,10 @@ export const MediaTranscription = (props: {
|
||||
addDblistener(fetchSegmentStats);
|
||||
fetchSegmentStats();
|
||||
|
||||
if (transcription?.state == "pending") {
|
||||
generate();
|
||||
}
|
||||
|
||||
return () => {
|
||||
removeDbListener(fetchSegmentStats);
|
||||
};
|
||||
@@ -126,12 +130,6 @@ export const MediaTranscription = (props: {
|
||||
} as ScrollIntoViewOptions);
|
||||
}, [currentSegmentIndex, transcription]);
|
||||
|
||||
useEffect(() => {
|
||||
if (transcription?.state !== "pending") return;
|
||||
|
||||
generate();
|
||||
}, [transcription]);
|
||||
|
||||
if (!transcription)
|
||||
return (
|
||||
<div className="p-4 w-full">
|
||||
|
||||
@@ -7,6 +7,7 @@ import WaveSurfer from "wavesurfer.js";
|
||||
import { cn } from "@renderer/lib/utils";
|
||||
import { RadialProgress, toast } from "@renderer/components/ui";
|
||||
import { useHotkeys } from "react-hotkeys-hook";
|
||||
import { fetchFile } from "@ffmpeg/util";
|
||||
|
||||
export const RecordButton = (props: {
|
||||
className?: string;
|
||||
@@ -117,6 +118,26 @@ const RecordButtonPopover = (props: {
|
||||
onRecordEnd: (blob: Blob, duration: number) => void;
|
||||
}) => {
|
||||
const containerRef = useRef<HTMLDivElement>();
|
||||
const { ffmpeg } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const transcode = async (blob: Blob) => {
|
||||
const input = `input.${blob.type.split("/")[1]}`;
|
||||
const output = input.replace(/\.[^/.]+$/, ".wav");
|
||||
await ffmpeg.writeFile(input, await fetchFile(blob));
|
||||
await ffmpeg.exec([
|
||||
"-i",
|
||||
input,
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-c:a",
|
||||
"pcm_s16le",
|
||||
output,
|
||||
]);
|
||||
const data = await ffmpeg.readFile(output);
|
||||
return new Blob([data], { type: "audio/wav" });
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!containerRef.current) return;
|
||||
@@ -136,9 +157,10 @@ const RecordButtonPopover = (props: {
|
||||
startAt = Date.now();
|
||||
});
|
||||
|
||||
record.on("record-end", (blob: Blob) => {
|
||||
record.on("record-end", async (blob: Blob) => {
|
||||
const duration = Date.now() - startAt;
|
||||
props.onRecordEnd(blob, duration);
|
||||
const output = await transcode(blob);
|
||||
props.onRecordEnd(output, duration);
|
||||
});
|
||||
|
||||
RecordPlugin.getAvailableAudioDevices()
|
||||
|
||||
@@ -132,7 +132,7 @@ export const WhisperModelOptions = () => {
|
||||
if (state.state === "completed") {
|
||||
model.downloaded = true;
|
||||
setWhisperModel(model.name);
|
||||
} else if (state.state === "cancelled") {
|
||||
} else if (state.state === "cancelled" || state.state === "interrupted") {
|
||||
model.downloaded = false;
|
||||
model.downloadState = null;
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ export default () => {
|
||||
setCurrentStepValid(!!libraryPath);
|
||||
break;
|
||||
case 3:
|
||||
setCurrentStepValid(true);
|
||||
setCurrentStepValid(Boolean(whisperConfig.model));
|
||||
break;
|
||||
case 4:
|
||||
setCurrentStepValid(initialized);
|
||||
|
||||
Reference in New Issue
Block a user