Fix: record without ffmpeg & others (#201)

* remove ffmpeg transcode when save recording

* fix model download

* trancode record audio in renderer

* fix transcribe dead loop when whisper not working

* force to select a model

* check model before transcribe
This commit is contained in:
an-lee
2024-01-24 15:05:56 +08:00
committed by GitHub
parent e9fece9280
commit ff14e1d04a
9 changed files with 55 additions and 22 deletions

View File

@@ -295,6 +295,7 @@
"language": "Language",
"sttAiModel": "STT AI model",
"checkingWhisper": "Checking whisper status",
"pleaseDownloadWhisperModelFirst": "Please download whisper model first",
"whisperIsWorkingGood": "Whisper is working good",
"whisperIsNotWorking": "Whisper is not working",
"checkingWhisperModel": "Checking whisper model",

View File

@@ -294,6 +294,7 @@
"language": "语言",
"sttAiModel": "语音转文本 AI 模型",
"checkingWhisper": "正在检查 Whisper",
"pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型",
"whisperIsWorkingGood": "Whisper 正常工作",
"whisperIsNotWorking": "Whisper 无法正常工作,请尝试更换模型后重试,或联系开发者",
"checkingWhisperModel": "正在检查 Whisper 模型",

View File

@@ -22,7 +22,6 @@ import settings from "@main/settings";
import { hashFile } from "@/utils";
import log from "electron-log/main";
import storage from "@main/storage";
import Ffmpeg from "@main/ffmpeg";
import { Client } from "@/api";
import { WEB_API_URL } from "@/constants";
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
@@ -283,20 +282,16 @@ export class Recording extends Model<Recording> {
params;
const format = blob.type.split("/")[1];
const tempfile = path.join(settings.cachePath(), `${Date.now()}.${format}`);
await fs.outputFile(tempfile, Buffer.from(blob.arrayBuffer));
const wavFile = path.join(
const file = path.join(
settings.userDataPath(),
"recordings",
`${Date.now()}.wav`
`${Date.now()}.${format}`
);
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
const ffmpeg = new Ffmpeg();
await ffmpeg.convertToWav(tempfile, wavFile);
const md5 = await hashFile(wavFile, { algo: "md5" });
const filename = `${md5}.wav`;
fs.renameSync(wavFile, path.join(path.dirname(wavFile), filename));
const md5 = await hashFile(file, { algo: "md5" });
const filename = `${md5}.${format}`;
fs.renameSync(file, path.join(path.dirname(file), filename));
return this.create(
{

View File

@@ -24,7 +24,11 @@ class Downloader {
webContents.downloadURL(url);
webContents.session.on("will-download", (_event, item, _webContents) => {
if (savePath) {
item.setSavePath(savePath);
if (fs.statSync(savePath).isDirectory()) {
item.setSavePath(path.join(savePath, item.getFilename()));
} else {
item.setSavePath(savePath);
}
} else {
item.setSavePath(
path.join(app.getPath("downloads"), item.getFilename())

View File

@@ -5,6 +5,7 @@ import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants";
import { exec } from "child_process";
import fs from "fs-extra";
import log from "electron-log/main";
import { t } from "i18next";
const logger = log.scope("whisper");
const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
@@ -18,6 +19,13 @@ class Whipser {
}
currentModel() {
if (!this.config.availableModels) return;
if (!this.config.model) {
const model = this.config.availableModels[0];
settings.setSync("whisper.model", this.config.availableModels[0].name);
return model.savePath;
}
return (this.config.availableModels || []).find(
(m) => m.name === this.config.model
)?.savePath;
@@ -177,6 +185,10 @@ class Whipser {
return fs.readJson(outputFile);
}
if (!this.currentModel()) {
throw new Error(t("pleaseDownloadWhisperModelFirst"));
}
const command = [
`"${this.binMain}"`,
`--file "${file}"`,

View File

@@ -112,6 +112,10 @@ export const MediaTranscription = (props: {
addDblistener(fetchSegmentStats);
fetchSegmentStats();
if (transcription?.state == "pending") {
generate();
}
return () => {
removeDbListener(fetchSegmentStats);
};
@@ -126,12 +130,6 @@ export const MediaTranscription = (props: {
} as ScrollIntoViewOptions);
}, [currentSegmentIndex, transcription]);
useEffect(() => {
if (transcription?.state !== "pending") return;
generate();
}, [transcription]);
if (!transcription)
return (
<div className="p-4 w-full">

View File

@@ -7,6 +7,7 @@ import WaveSurfer from "wavesurfer.js";
import { cn } from "@renderer/lib/utils";
import { RadialProgress, toast } from "@renderer/components/ui";
import { useHotkeys } from "react-hotkeys-hook";
import { fetchFile } from "@ffmpeg/util";
export const RecordButton = (props: {
className?: string;
@@ -117,6 +118,26 @@ const RecordButtonPopover = (props: {
onRecordEnd: (blob: Blob, duration: number) => void;
}) => {
const containerRef = useRef<HTMLDivElement>();
const { ffmpeg } = useContext(AppSettingsProviderContext);
const transcode = async (blob: Blob) => {
const input = `input.${blob.type.split("/")[1]}`;
const output = input.replace(/\.[^/.]+$/, ".wav");
await ffmpeg.writeFile(input, await fetchFile(blob));
await ffmpeg.exec([
"-i",
input,
"-ar",
"16000",
"-ac",
"1",
"-c:a",
"pcm_s16le",
output,
]);
const data = await ffmpeg.readFile(output);
return new Blob([data], { type: "audio/wav" });
};
useEffect(() => {
if (!containerRef.current) return;
@@ -136,9 +157,10 @@ const RecordButtonPopover = (props: {
startAt = Date.now();
});
record.on("record-end", (blob: Blob) => {
record.on("record-end", async (blob: Blob) => {
const duration = Date.now() - startAt;
props.onRecordEnd(blob, duration);
const output = await transcode(blob);
props.onRecordEnd(output, duration);
});
RecordPlugin.getAvailableAudioDevices()

View File

@@ -132,7 +132,7 @@ export const WhisperModelOptions = () => {
if (state.state === "completed") {
model.downloaded = true;
setWhisperModel(model.name);
} else if (state.state === "cancelled") {
} else if (state.state === "cancelled" || state.state === "interrupted") {
model.downloaded = false;
model.downloadState = null;
}

View File

@@ -32,7 +32,7 @@ export default () => {
setCurrentStepValid(!!libraryPath);
break;
case 3:
setCurrentStepValid(true);
setCurrentStepValid(Boolean(whisperConfig.model));
break;
case 4:
setCurrentStepValid(initialized);