Feat: transcribe from web (#204)
* add transcribe from web * transcribe from web * add azure speech ai * fix azure speech output * may select stt service * fix UI * remove debug code * lint * fix default stt service * tweak * fix secondsToTimestamp
This commit is contained in:
@@ -2,9 +2,10 @@ export const DATABASE_NAME = "enjoy_database";
|
||||
export const LIBRARY_PATH_SUFFIX = "EnjoyLibrary";
|
||||
|
||||
export const STORAGE_WORKER_ENDPOINT = "https://enjoy-storage.baizhiheizi.com";
|
||||
export const AI_WORKER_ENDPOINT = "https://enjoy-ai.baizhiheizi.com";
|
||||
export const WEB_API_URL = "https://enjoy-web.fly.dev";
|
||||
|
||||
export const REPO_URL = "https://github.com/xiaolai/everyone-can-use-english"
|
||||
export const REPO_URL = "https://github.com/xiaolai/everyone-can-use-english";
|
||||
|
||||
// https://huggingface.co/ggerganov/whisper.cpp/tree/main
|
||||
export const WHISPER_MODELS_OPTIONS = [
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
"autoCenter": "auto center",
|
||||
"inlineCaption": "inline caption",
|
||||
"autoScroll": "auto scroll",
|
||||
"translate:": "translate",
|
||||
"translate": "translate",
|
||||
"displayIpa": "display IPA",
|
||||
"detail": "detail",
|
||||
"remove": "remove",
|
||||
@@ -295,7 +295,13 @@
|
||||
"advancedSettings": "Advanced settings",
|
||||
"advanced": "Advanced",
|
||||
"language": "Language",
|
||||
"sttAiModel": "STT AI model",
|
||||
"sttAiService": "STT AI service",
|
||||
"local": "Local",
|
||||
"localSpeechToTextDescription": "Use local whisper model to transcribe.",
|
||||
"azureAi": "Azure AI",
|
||||
"azureSpeechToTextDescription": "Use Azure AI Speech to transcribe.",
|
||||
"cloudflareAi": "Cloudflare AI",
|
||||
"cloudflareSpeechToTextDescription": "Use Cloudflare AI Worker to transcribe.",
|
||||
"checkingWhisper": "Checking whisper status",
|
||||
"pleaseDownloadWhisperModelFirst": "Please download whisper model first",
|
||||
"whisperIsWorkingGood": "Whisper is working good",
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
"autoCenter": "自动居中",
|
||||
"inlineCaption": "内联字幕",
|
||||
"autoScroll": "自动滚动",
|
||||
"translate:": "翻译",
|
||||
"translate": "翻译",
|
||||
"displayIpa": "标注音标",
|
||||
"detail": "详情",
|
||||
"remove": "删除",
|
||||
@@ -294,7 +294,13 @@
|
||||
"advancedSettingsShort": "高级设置",
|
||||
"advancedSettings": "高级设置",
|
||||
"language": "语言",
|
||||
"sttAiModel": "语音转文本 AI 模型",
|
||||
"sttAiService": "语音转文本服务",
|
||||
"local": "本地",
|
||||
"localSpeechToTextDescription": "使用本地 whisper 模型进行语音转文本",
|
||||
"azureAi": "Azure AI",
|
||||
"azureSpeechToTextDescription": "使用 Azure AI Speech 进行语音转文本",
|
||||
"cloudflareAi": "Cloudflare AI",
|
||||
"cloudflareSpeechToTextDescription": "使用 Cloudflare AI 进行语音转文本",
|
||||
"checkingWhisper": "正在检查 Whisper",
|
||||
"pleaseDownloadWhisperModelFirst": "请先下载 Whisper 模型",
|
||||
"whisperIsWorkingGood": "Whisper 正常工作",
|
||||
|
||||
@@ -71,4 +71,58 @@ export class AzureSpeechSdk {
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async transcribe(params: {
|
||||
filePath: string;
|
||||
language?: string;
|
||||
}): Promise<SpeechRecognitionResultType[]> {
|
||||
const { filePath, language = "en-US" } = params;
|
||||
|
||||
const audioConfig = sdk.AudioConfig.fromWavFileInput(
|
||||
fs.readFileSync(filePath)
|
||||
);
|
||||
|
||||
// setting the recognition language to English.
|
||||
this.config.speechRecognitionLanguage = language;
|
||||
this.config.requestWordLevelTimestamps();
|
||||
this.config.outputFormat = sdk.OutputFormat.Detailed;
|
||||
|
||||
// create the speech recognizer.
|
||||
const reco = new sdk.SpeechRecognizer(this.config, audioConfig);
|
||||
|
||||
logger.debug("Start transcribe.");
|
||||
|
||||
let results: SpeechRecognitionResultType[] = [];
|
||||
return new Promise((resolve, reject) => {
|
||||
reco.recognizing = (_s, e) => {
|
||||
logger.debug("Intermediate result received: ", e.result.text);
|
||||
};
|
||||
reco.recognized = (_s, e) => {
|
||||
logger.debug("Got final result", e.result.text);
|
||||
const json = e.result.properties.getProperty(
|
||||
sdk.PropertyId.SpeechServiceResponse_JsonResult
|
||||
);
|
||||
const result = JSON.parse(json);
|
||||
results = results.concat(result);
|
||||
};
|
||||
reco.canceled = (_s, e) => {
|
||||
logger.debug("CANCELED: Reason=" + e.reason);
|
||||
|
||||
if (e.reason === sdk.CancellationReason.Error) {
|
||||
logger.debug(`"CANCELED: ErrorCode=${e.errorCode}`);
|
||||
logger.debug("CANCELED: ErrorDetails=" + e.errorDetails);
|
||||
return reject(new Error(e.errorDetails));
|
||||
}
|
||||
|
||||
reco.stopContinuousRecognitionAsync();
|
||||
};
|
||||
reco.sessionStopped = (_s, _e) => {
|
||||
logger.debug("\n Session stopped event.");
|
||||
reco.stopContinuousRecognitionAsync();
|
||||
return resolve(results);
|
||||
};
|
||||
|
||||
reco.startContinuousRecognitionAsync();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,7 +148,11 @@ export class Transcription extends Model<Transcription> {
|
||||
await this.update({
|
||||
state: "processing",
|
||||
});
|
||||
const { model, transcription } = await whisper.transcribe(wavFile, {
|
||||
const {
|
||||
engine = "whisper",
|
||||
model,
|
||||
transcription,
|
||||
} = await whisper.transcribe(wavFile, {
|
||||
force,
|
||||
extra: [
|
||||
"--split-on-word",
|
||||
@@ -158,7 +162,7 @@ export class Transcription extends Model<Transcription> {
|
||||
});
|
||||
const result = whisper.groupTranscription(transcription);
|
||||
this.update({
|
||||
engine: "whisper",
|
||||
engine,
|
||||
model: model?.type,
|
||||
result,
|
||||
state: "finished",
|
||||
|
||||
@@ -58,12 +58,29 @@ const dbPath = () => {
|
||||
};
|
||||
|
||||
const whisperConfig = (): WhisperConfigType => {
|
||||
const model = settings.getSync("whisper.model") as string;
|
||||
|
||||
let service = settings.getSync(
|
||||
"whisper.service"
|
||||
) as WhisperConfigType["service"];
|
||||
|
||||
if (!service) {
|
||||
if (model) {
|
||||
settings.setSync("whisper.service", "local");
|
||||
service = "local";
|
||||
} else {
|
||||
settings.setSync("whisper.service", "azure");
|
||||
service = "azure";
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
service,
|
||||
availableModels: settings.getSync(
|
||||
"whisper.availableModels"
|
||||
) as WhisperConfigType["availableModels"],
|
||||
modelsPath: settings.getSync("whisper.modelsPath") as string,
|
||||
model: settings.getSync("whisper.model") as string,
|
||||
model,
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@@ -1,13 +1,30 @@
|
||||
import { ipcMain } from "electron";
|
||||
import settings from "@main/settings";
|
||||
import path from "path";
|
||||
import { WHISPER_MODELS_OPTIONS, PROCESS_TIMEOUT } from "@/constants";
|
||||
import {
|
||||
WHISPER_MODELS_OPTIONS,
|
||||
PROCESS_TIMEOUT,
|
||||
AI_WORKER_ENDPOINT,
|
||||
} from "@/constants";
|
||||
import { exec } from "child_process";
|
||||
import fs from "fs-extra";
|
||||
import log from "electron-log/main";
|
||||
import { t } from "i18next";
|
||||
import axios from "axios";
|
||||
import { milisecondsToTimestamp } from "@/utils";
|
||||
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
|
||||
import { Client } from "@/api";
|
||||
import { WEB_API_URL } from "@/constants";
|
||||
import { sortedUniqBy, take } from "lodash";
|
||||
|
||||
const logger = log.scope("whisper");
|
||||
|
||||
const webApi = new Client({
|
||||
baseUrl: process.env.WEB_API_URL || WEB_API_URL,
|
||||
accessToken: settings.getSync("user.accessToken") as string,
|
||||
logger: log.scope("api/client"),
|
||||
});
|
||||
|
||||
const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
|
||||
const END_OF_WORD_REGEX = /[^\.!,\?][\.!\?]/g;
|
||||
class Whipser {
|
||||
@@ -135,7 +152,7 @@ class Whipser {
|
||||
group?: boolean;
|
||||
}
|
||||
): Promise<
|
||||
TranscriptionSegmentType[] | TranscriptionResultSegmentGroupType[]
|
||||
TranscriptionResultSegmentType[] | TranscriptionResultSegmentGroupType[]
|
||||
> {
|
||||
const { prompt, group = false } = options || {};
|
||||
|
||||
@@ -164,17 +181,123 @@ class Whipser {
|
||||
}
|
||||
}
|
||||
|
||||
async transcribe(
|
||||
file: string,
|
||||
options?: {
|
||||
force?: boolean;
|
||||
extra?: string[];
|
||||
}
|
||||
): Promise<Partial<WhisperOutputType>> {
|
||||
if (this.config.service === "local") {
|
||||
return this.transcribeFromLocal(file, options);
|
||||
} else if (this.config.service === "azure") {
|
||||
return this.transcribeFromAzure(file);
|
||||
} else if (this.config.service === "cloudflare") {
|
||||
return this.transcribeFromCloudflare(file);
|
||||
} else {
|
||||
throw new Error("Unknown service");
|
||||
}
|
||||
}
|
||||
|
||||
async transcribeFromAzure(file: string): Promise<Partial<WhisperOutputType>> {
|
||||
const { token, region } = await webApi.generateSpeechToken();
|
||||
const sdk = new AzureSpeechSdk(token, region);
|
||||
|
||||
const results = await sdk.transcribe({
|
||||
filePath: file,
|
||||
});
|
||||
|
||||
const transcription: TranscriptionResultSegmentType[] = [];
|
||||
results.forEach((result) => {
|
||||
logger.debug(result);
|
||||
const best = take(sortedUniqBy(result.NBest, "Confidence"), 1)[0];
|
||||
const words = best.Display.trim().split(" ");
|
||||
|
||||
best.Words.map((word, index) => {
|
||||
let text = word.Word;
|
||||
if (words.length === best.Words.length) {
|
||||
text = words[index];
|
||||
}
|
||||
|
||||
if (
|
||||
index === best.Words.length - 1 &&
|
||||
!text.trim().match(END_OF_WORD_REGEX)
|
||||
) {
|
||||
text = text + ".";
|
||||
}
|
||||
|
||||
transcription.push({
|
||||
offsets: {
|
||||
from: word.Offset / 1e4,
|
||||
to: (word.Offset + word.Duration) / 1e4,
|
||||
},
|
||||
timestamps: {
|
||||
from: milisecondsToTimestamp(word.Offset / 1e4),
|
||||
to: milisecondsToTimestamp((word.Offset + word.Duration) * 1e4),
|
||||
},
|
||||
text,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
engine: "azure",
|
||||
model: {
|
||||
type: "Azure AI Speech",
|
||||
},
|
||||
transcription,
|
||||
};
|
||||
}
|
||||
|
||||
async transcribeFromCloudflare(
|
||||
file: string
|
||||
): Promise<Partial<WhisperOutputType>> {
|
||||
logger.debug("transcribing from CloudFlare");
|
||||
|
||||
const data = fs.readFileSync(file);
|
||||
const res: CfWhipserOutputType = (
|
||||
await axios.postForm(`${AI_WORKER_ENDPOINT}/audio/transcriptions`, data)
|
||||
).data;
|
||||
logger.debug("transcription from Web,", res);
|
||||
|
||||
const transcription: TranscriptionResultSegmentType[] = res.words.map(
|
||||
(word) => {
|
||||
return {
|
||||
offsets: {
|
||||
from: word.start * 1000,
|
||||
to: word.end * 1000,
|
||||
},
|
||||
timestamps: {
|
||||
from: milisecondsToTimestamp(word.start * 1000),
|
||||
to: milisecondsToTimestamp(word.end * 1000),
|
||||
},
|
||||
text: word.word,
|
||||
};
|
||||
}
|
||||
);
|
||||
logger.debug("converted transcription,", transcription);
|
||||
|
||||
return {
|
||||
engine: "cloudflare",
|
||||
model: {
|
||||
type: "@cf/openai/whisper",
|
||||
},
|
||||
transcription,
|
||||
};
|
||||
}
|
||||
|
||||
/* Ensure the file is in wav format
|
||||
* and 16kHz sample rate
|
||||
*/
|
||||
async transcribe(
|
||||
async transcribeFromLocal(
|
||||
file: string,
|
||||
options: {
|
||||
options?: {
|
||||
force?: boolean;
|
||||
extra?: string[];
|
||||
} = {}
|
||||
) {
|
||||
const { force = false, extra = [] } = options;
|
||||
}
|
||||
): Promise<Partial<WhisperOutputType>> {
|
||||
logger.debug("transcribing from local");
|
||||
const { force = false, extra = [] } = options || {};
|
||||
const filename = path.basename(file, path.extname(file));
|
||||
const tmpDir = settings.cachePath();
|
||||
const outputFile = path.join(tmpDir, filename + ".json");
|
||||
@@ -232,9 +355,9 @@ class Whipser {
|
||||
}
|
||||
|
||||
groupTranscription(
|
||||
transcription: TranscriptionSegmentType[]
|
||||
transcription: TranscriptionResultSegmentType[]
|
||||
): TranscriptionResultSegmentGroupType[] {
|
||||
const generateGroup = (group?: TranscriptionSegmentType[]) => {
|
||||
const generateGroup = (group?: TranscriptionResultSegmentType[]) => {
|
||||
if (!group || group.length === 0) return;
|
||||
|
||||
const firstWord = group[0];
|
||||
@@ -255,7 +378,7 @@ class Whipser {
|
||||
};
|
||||
|
||||
const groups: TranscriptionResultSegmentGroupType[] = [];
|
||||
let group: TranscriptionSegmentType[] = [];
|
||||
let group: TranscriptionResultSegmentType[] = [];
|
||||
|
||||
transcription.forEach((segment) => {
|
||||
const text = segment.text.trim();
|
||||
@@ -310,6 +433,31 @@ class Whipser {
|
||||
});
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-set-service", async (event, service) => {
|
||||
if (service === "local") {
|
||||
try {
|
||||
await this.initialize();
|
||||
settings.setSync("whisper.service", service);
|
||||
this.config.service = service;
|
||||
return this.config;
|
||||
} catch (err) {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: err.message,
|
||||
});
|
||||
}
|
||||
} else if (["cloudflare", "azure"].includes(service)) {
|
||||
settings.setSync("whisper.service", service);
|
||||
this.config.service = service;
|
||||
return this.config;
|
||||
} else {
|
||||
event.sender.send("on-notification", {
|
||||
type: "error",
|
||||
message: "Unknown service",
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
ipcMain.handle("whisper-check", async (_event) => {
|
||||
return await this.check();
|
||||
});
|
||||
|
||||
@@ -326,6 +326,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
|
||||
setModel: (model: string) => {
|
||||
return ipcRenderer.invoke("whisper-set-model", model);
|
||||
},
|
||||
setService: (service: string) => {
|
||||
return ipcRenderer.invoke("whisper-set-service", service);
|
||||
},
|
||||
check: () => {
|
||||
return ipcRenderer.invoke("whisper-check");
|
||||
},
|
||||
|
||||
@@ -220,7 +220,11 @@ export const MediaCaption = (props: {
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuItem disabled={translating} onClick={translate}>
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer capitalize"
|
||||
disabled={translating}
|
||||
onClick={translate}
|
||||
>
|
||||
{translating ? (
|
||||
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
|
||||
) : (
|
||||
@@ -228,7 +232,11 @@ export const MediaCaption = (props: {
|
||||
)}
|
||||
<span>{t("translate")}</span>
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem disabled={ipaGenerating} onClick={toogleIPA}>
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer capitalize"
|
||||
disabled={ipaGenerating}
|
||||
onClick={toogleIPA}
|
||||
>
|
||||
{ipaGenerating ? (
|
||||
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
|
||||
) : (
|
||||
|
||||
@@ -8,6 +8,11 @@ import {
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
toast,
|
||||
Select,
|
||||
SelectTrigger,
|
||||
SelectContent,
|
||||
SelectItem,
|
||||
SelectValue,
|
||||
} from "@renderer/components/ui";
|
||||
import { WhisperModelOptions } from "@renderer/components";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
@@ -15,9 +20,8 @@ import { useContext, useEffect, useState } from "react";
|
||||
import { InfoIcon, AlertCircleIcon } from "lucide-react";
|
||||
|
||||
export const WhisperSettings = () => {
|
||||
const { whisperConfig, refreshWhisperConfig, EnjoyApp } = useContext(
|
||||
AppSettingsProviderContext
|
||||
);
|
||||
const { whisperConfig, refreshWhisperConfig, EnjoyApp, setWhisperService } =
|
||||
useContext(AppSettingsProviderContext);
|
||||
const [stderr, setStderr] = useState("");
|
||||
|
||||
useEffect(() => {
|
||||
@@ -48,7 +52,7 @@ export const WhisperSettings = () => {
|
||||
<div className="flex items-start justify-between py-4">
|
||||
<div className="">
|
||||
<div className="flex items-center mb-2">
|
||||
<span>{t("sttAiModel")}</span>
|
||||
<span>{t("sttAiService")}</span>
|
||||
{stderr && (
|
||||
<Button
|
||||
variant="ghost"
|
||||
@@ -62,49 +66,74 @@ export const WhisperSettings = () => {
|
||||
)}
|
||||
</div>
|
||||
<div className="text-sm text-muted-foreground">
|
||||
{whisperConfig.model}
|
||||
{whisperConfig?.service === "local" &&
|
||||
t("localSpeechToTextDescription")}
|
||||
{whisperConfig?.service === "azure" &&
|
||||
t("azureSpeechToTextDescription")}
|
||||
{whisperConfig?.service === "cloudflare" &&
|
||||
t("cloudflareSpeechToTextDescription")}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
<Button onClick={handleCheck} variant="secondary" size="sm">
|
||||
{t("check")}
|
||||
</Button>
|
||||
<Dialog>
|
||||
<DialogTrigger asChild>
|
||||
<Button variant="secondary" size="sm">
|
||||
{t("edit")}
|
||||
<Select
|
||||
value={whisperConfig.service}
|
||||
onValueChange={(value) => {
|
||||
setWhisperService(value);
|
||||
}}
|
||||
>
|
||||
<SelectTrigger className="min-w-fit">
|
||||
<SelectValue placeholder="service"></SelectValue>
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
<SelectItem value="local">{t("local")}</SelectItem>
|
||||
<SelectItem value="azure">{t("azureAi")}</SelectItem>
|
||||
<SelectItem value="cloudflare">{t("cloudflareAi")}</SelectItem>
|
||||
</SelectContent>
|
||||
</Select>
|
||||
|
||||
{whisperConfig.service === "local" && (
|
||||
<>
|
||||
<Button onClick={handleCheck} variant="secondary" size="sm">
|
||||
{t("check")}
|
||||
</Button>
|
||||
</DialogTrigger>
|
||||
<DialogContent>
|
||||
<DialogHeader>{t("sttAiModel")}</DialogHeader>
|
||||
<DialogDescription>
|
||||
{t("chooseAIModelDependingOnYourHardware")}
|
||||
</DialogDescription>
|
||||
|
||||
<WhisperModelOptions />
|
||||
|
||||
<DialogFooter>
|
||||
<div className="text-xs flex items-start space-x-2">
|
||||
<InfoIcon className="mr-1.5 w-4 h-4" />
|
||||
<span className="flex-1 opacity-70">
|
||||
{t("yourModelsWillBeDownloadedTo", {
|
||||
path: whisperConfig.modelsPath,
|
||||
})}
|
||||
</span>
|
||||
<Button
|
||||
onClick={() => {
|
||||
EnjoyApp.shell.openPath(whisperConfig.modelsPath);
|
||||
}}
|
||||
variant="default"
|
||||
size="sm"
|
||||
>
|
||||
{t("open")}
|
||||
<Dialog>
|
||||
<DialogTrigger asChild>
|
||||
<Button variant="secondary" size="sm">
|
||||
{t("model")}
|
||||
</Button>
|
||||
</div>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</DialogTrigger>
|
||||
<DialogContent>
|
||||
<DialogHeader>{t("sttAiService")}</DialogHeader>
|
||||
<DialogDescription>
|
||||
{t("chooseAIModelDependingOnYourHardware")}
|
||||
</DialogDescription>
|
||||
|
||||
<WhisperModelOptions />
|
||||
|
||||
<DialogFooter>
|
||||
<div className="text-xs flex items-start space-x-2">
|
||||
<InfoIcon className="mr-1.5 w-4 h-4" />
|
||||
<span className="flex-1 opacity-70">
|
||||
{t("yourModelsWillBeDownloadedTo", {
|
||||
path: whisperConfig.modelsPath,
|
||||
})}
|
||||
</span>
|
||||
<Button
|
||||
onClick={() => {
|
||||
EnjoyApp.shell.openPath(whisperConfig.modelsPath);
|
||||
}}
|
||||
variant="outline"
|
||||
size="sm"
|
||||
>
|
||||
{t("open")}
|
||||
</Button>
|
||||
</div>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -5,7 +5,7 @@ import { cva, type VariantProps } from "class-variance-authority"
|
||||
import { cn } from "@renderer/lib/utils"
|
||||
|
||||
const buttonVariants = cva(
|
||||
"capitalize inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50",
|
||||
"capitalize inline-flex items-center justify-center rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 min-w-fit",
|
||||
{
|
||||
variants: {
|
||||
variant: {
|
||||
|
||||
@@ -68,7 +68,7 @@ export const WhisperModelOptionsPanel = () => {
|
||||
onClick={() => {
|
||||
EnjoyApp.shell.openPath(whisperConfig.modelsPath);
|
||||
}}
|
||||
variant="default"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
>
|
||||
{t("open")}
|
||||
|
||||
@@ -16,6 +16,7 @@ type AppSettingsProviderState = {
|
||||
logout?: () => void;
|
||||
setLibraryPath?: (path: string) => Promise<void>;
|
||||
setWhisperModel?: (name: string) => Promise<void>;
|
||||
setWhisperService?: (name: string) => Promise<void>;
|
||||
ffmpegConfig?: FfmpegConfigType;
|
||||
ffmpeg?: FFmpeg;
|
||||
whisperConfig?: WhisperConfigType;
|
||||
@@ -190,6 +191,13 @@ export const AppSettingsProvider = ({
|
||||
});
|
||||
};
|
||||
|
||||
const setWhisperService = async (name: WhisperConfigType["service"]) => {
|
||||
return EnjoyApp.whisper.setService(name).then((config) => {
|
||||
if (!config) return;
|
||||
setWhisperConfig(config);
|
||||
});
|
||||
};
|
||||
|
||||
const validate = async () => {
|
||||
setInitialized(Boolean(user && libraryPath));
|
||||
};
|
||||
@@ -208,6 +216,7 @@ export const AppSettingsProvider = ({
|
||||
libraryPath,
|
||||
setLibraryPath: setLibraryPathHandler,
|
||||
setWhisperModel,
|
||||
setWhisperService,
|
||||
ffmpegConfig,
|
||||
ffmpeg,
|
||||
whisperConfig,
|
||||
|
||||
@@ -16,8 +16,11 @@ export function cn(...inputs: ClassValue[]) {
|
||||
}
|
||||
|
||||
export function secondsToTimestamp(seconds: number) {
|
||||
const date = new Date(seconds * 1000);
|
||||
return date.toISOString().substr(11, 8);
|
||||
const h = Math.floor(seconds / 3600).toString();
|
||||
const m = Math.floor((seconds % 3600) / 60).toString();
|
||||
const s = Math.floor((seconds % 3600) % 60).toString();
|
||||
|
||||
return `${h.padStart(2, "0")}:${m.padStart(2, "0")}:${s.padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export function humanizeDuration(
|
||||
|
||||
3
enjoy/src/types/enjoy-app.d.ts
vendored
3
enjoy/src/types/enjoy-app.d.ts
vendored
@@ -186,6 +186,9 @@ type EnjoyAppType = {
|
||||
config: () => Promise<WhisperConfigType>;
|
||||
check: () => Promise<{ success: boolean; log: string }>;
|
||||
setModel: (model: string) => Promise<WhisperConfigType>;
|
||||
setService: (
|
||||
service: WhisperConfigType["service"]
|
||||
) => Promise<WhisperConfigType>;
|
||||
transcribeBlob: (
|
||||
blob: { type: string; arrayBuffer: ArrayBuffer },
|
||||
prompt?: string
|
||||
|
||||
32
enjoy/src/types/index.d.ts
vendored
32
enjoy/src/types/index.d.ts
vendored
@@ -26,6 +26,7 @@ type NotificationType = {
|
||||
};
|
||||
|
||||
type WhisperConfigType = {
|
||||
service: "local" | "azure" | "cloudflare";
|
||||
availableModels: {
|
||||
type: string;
|
||||
name: string;
|
||||
@@ -39,24 +40,25 @@ type WhisperConfigType = {
|
||||
};
|
||||
|
||||
type WhisperOutputType = {
|
||||
engine?: string;
|
||||
model: {
|
||||
audio: {
|
||||
audio?: {
|
||||
cts: number;
|
||||
head: number;
|
||||
layer: number;
|
||||
state: number;
|
||||
};
|
||||
ftype: number;
|
||||
mels: number;
|
||||
multilingual: number;
|
||||
text: {
|
||||
ftype?: number;
|
||||
mels?: number;
|
||||
multilingual?: number;
|
||||
text?: {
|
||||
cts: number;
|
||||
head: number;
|
||||
layer: number;
|
||||
state: number;
|
||||
};
|
||||
type: string;
|
||||
vocab: number;
|
||||
vocab?: number;
|
||||
};
|
||||
params: {
|
||||
language: string;
|
||||
@@ -67,19 +69,17 @@ type WhisperOutputType = {
|
||||
languate: string;
|
||||
};
|
||||
systeminfo: string;
|
||||
transcription: TranscriptionSegmentType[];
|
||||
transcription: TranscriptionResultSegmentType[];
|
||||
};
|
||||
|
||||
type TranscriptionSegmentType = {
|
||||
offsets: {
|
||||
from: number;
|
||||
to: number;
|
||||
};
|
||||
type CfWhipserOutputType = {
|
||||
text: string;
|
||||
timestamps: {
|
||||
from: string;
|
||||
to: string;
|
||||
};
|
||||
words_count: number;
|
||||
words: {
|
||||
word: string;
|
||||
start: number;
|
||||
end: number;
|
||||
}[];
|
||||
};
|
||||
|
||||
type TransactionStateType = {
|
||||
|
||||
21
enjoy/src/types/pronunciation-assessment.d.ts
vendored
21
enjoy/src/types/pronunciation-assessment.d.ts
vendored
@@ -62,3 +62,24 @@ type PronunciationAssessmentWordResultType = {
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
type SpeechRecognitionResultType = {
|
||||
Id: string;
|
||||
RecognitionStatus: string;
|
||||
Offset: number;
|
||||
Duration: number;
|
||||
Channel: number;
|
||||
DisplayText: string;
|
||||
NBest: {
|
||||
Confidence: number;
|
||||
Lexical: string;
|
||||
ITN: string;
|
||||
MaskedITN: string;
|
||||
Display: string;
|
||||
Words: {
|
||||
Word: string;
|
||||
Offset: number;
|
||||
Duration: number;
|
||||
}[];
|
||||
}[];
|
||||
};
|
||||
|
||||
@@ -66,3 +66,14 @@ export function generatePitch(peaks: Float32Array, sampleRate: number) {
|
||||
|
||||
return { frequencies, baseFrequency };
|
||||
}
|
||||
|
||||
export function milisecondsToTimestamp(ms: number) {
|
||||
const hours = Math.floor(ms / 3600000).toString();
|
||||
const minutes = Math.floor((ms % 3600000) / 60000).toString();
|
||||
const seconds = Math.floor(((ms % 360000) % 60000) / 1000).toString();
|
||||
const milliseconds = Math.floor(((ms % 360000) % 60000) % 1000).toString();
|
||||
return `${hours.padStart(2, "0")}:${minutes.padStart(
|
||||
2,
|
||||
"0"
|
||||
)}:${seconds.padStart(2, "0")},${milliseconds}`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user