Transcription force alignment & more (#416)

* add wavesurfer-provider

* brand new layout for player

* refactor pitch contour

* clean up

* update styl

* refactor

* update layout

* use new layout for video

* refactor

* may select word

* may edit word timestamp

* may toggle multiselect words

* clean code

* improve word region update

* improve layout

* update layout

* add echogarden

* fix test

* use aligned transcription

* fix ipa

* some refactor

* improve code

* implement ipa & translate & lookup

* recording play & share

* fix

* fix post audio

* improve layout

* may delete recording

* may record

* fix video player layout

* fix player in conversation

* render recording along with orignal audio

* may custom create region in recording

* fix float issue when seekTo

* fix recording player

* fix load more recordings

* fix seekTo

* clean up

* refactor pitch contour

* fix some warnings

* upgrade deps

* fix group transcription sentence

* zoom to fit when segment update

* add more hotkeys

* update player layout

* improve style

* play recording overlap audio when comparing

* update echogarden dep

* add recorded mark on transcription

* fix recording pitch contour rendering

* improve recording

* adjust pitch finder params
This commit is contained in:
an-lee
2024-03-16 19:42:37 +08:00
committed by GitHub
parent fe43755e02
commit 90f38e9226
67 changed files with 6898 additions and 2643 deletions

View File

@@ -7,9 +7,9 @@
"markdown-it-mathjax3": "^4.3.2",
"markdown-it-sub": "^2.0.0",
"markdown-it-sup": "^2.0.0",
"mermaid": "^10.8.0",
"sass": "^1.71.1",
"vitepress": "^1.0.0-rc.42",
"mermaid": "^10.9.0",
"sass": "^1.72.0",
"vitepress": "^1.0.0-rc.45",
"vitepress-plugin-mermaid": "^2.0.16",
"vue": "^3.4.21"
},

View File

@@ -78,6 +78,16 @@ test("valid ffmpeg command", async () => {
expect(res).toBeTruthy();
});
test("validate echogarden align command", async () => {
const res = await page.evaluate(() => {
return window.__ENJOY_APP__.echogarden.check();
});
expect(res).toBeTruthy();
const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
expect(settings.whisper.service).toBe("local");
});
test("should setup default library path", async () => {
const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
expect(settings.library).not.toBeNull();

View File

@@ -122,9 +122,39 @@ test.describe("with login", async () => {
},
});
});
});
/*
* steps:
* 1. create a tts conversation
* 2. submit a message to the conversation
* 3. the speech should auto create
*/
test("tts conversation", async () => {
// navigate to the conversations page
await page.getByTestId("sidebar-conversations").click();
// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();
// create a tts conversation
await page.click("[data-testid=conversation-preset-tts]");
await page.getByTestId("conversation-form").waitFor();
await page.click("[data-testid=conversation-form-submit]");
// wait for the conversation to be created
await page.getByTestId("conversation-page").waitFor();
// submit a message to the conversation
await page.getByTestId("conversation-page-input").fill("How are you?");
await page.getByTestId("conversation-page-submit").click();
await page.locator(".ai-message").waitFor();
const player = page
.locator(".ai-message")
.getByTestId("wavesurfer-container");
await player.waitFor();
expect(await player.isVisible()).toBeTruthy();
});
/*
@@ -136,6 +166,9 @@ test.describe("with login", async () => {
* 5. audio waveform player should be visible and transcription should be generated
*/
test("gpt conversation", async () => {
// navigate to the conversations page
await page.getByTestId("sidebar-conversations").click();
// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();
@@ -166,43 +199,12 @@ test.describe("with login", async () => {
// add to library
await page.getByTestId("message-start-shadow").click();
await page.getByTestId("audio-detail").waitFor();
await page.getByTestId("audio-player").waitFor();
await page.getByTestId("media-player-container").waitFor();
await page.getByTestId("media-transcription").waitFor();
await page.getByTestId("media-transcription-result").waitFor();
expect(
await page.getByTestId("media-transcription-result").isVisible()
).toBeTruthy();
});
/*
* steps:
* 1. create a tts conversation
* 2. submit a message to the conversation
* 3. the speech should auto create
*/
test("tts conversation", async () => {
// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();
// create a tts conversation
await page.click("[data-testid=conversation-preset-tts]");
await page.getByTestId("conversation-form").waitFor();
await page.click("[data-testid=conversation-form-submit]");
// wait for the conversation to be created
await page.getByTestId("conversation-page").waitFor();
// submit a message to the conversation
await page.getByTestId("conversation-page-input").fill("How are you?");
await page.getByTestId("conversation-page-submit").click();
await page.locator(".ai-message").waitFor();
const player = page
.locator(".ai-message")
.getByTestId("wavesurfer-container");
await player.waitFor();
expect(await player.isVisible()).toBeTruthy();
});
});
});

View File

@@ -12,7 +12,7 @@ const config = {
asar: {
// Binary files won't work in asar, so we need to unpack them
unpackDir:
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static}",
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static,node_modules/onnxruntime-node/bin}",
},
icon: "./assets/icon",
name: "Enjoy",

View File

@@ -47,18 +47,18 @@
"@types/fluent-ffmpeg": "^2.1.24",
"@types/html-to-text": "^9.0.4",
"@types/intl-tel-input": "^18.1.4",
"@types/lodash": "^4.14.202",
"@types/lodash": "^4.17.0",
"@types/mark.js": "^8.11.12",
"@types/node": "^20.11.24",
"@types/react": "^18.2.62",
"@types/react-dom": "^18.2.19",
"@types/node": "^20.11.27",
"@types/react": "^18.2.66",
"@types/react-dom": "^18.2.22",
"@types/validator": "^13.11.9",
"@types/wavesurfer.js": "^6.0.12",
"@typescript-eslint/eslint-plugin": "^7.1.1",
"@typescript-eslint/parser": "^7.1.1",
"@typescript-eslint/eslint-plugin": "^7.2.0",
"@typescript-eslint/parser": "^7.2.0",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.18",
"electron": "^29.1.0",
"electron": "^29.1.4",
"electron-playwright-helpers": "^1.7.1",
"eslint": "^8.57.0",
"eslint-import-resolver-typescript": "^3.6.1",
@@ -67,12 +67,13 @@
"octokit": "^3.1.2",
"progress": "^2.0.3",
"tailwind-merge": "^2.2.1",
"tailwind-scrollbar": "^3.1.0",
"tailwindcss": "^3.4.1",
"tailwindcss-animate": "^1.0.7",
"ts-node": "^10.9.2",
"tslib": "^2.6.2",
"typescript": "^5.3.3",
"vite": "^5.1.5",
"typescript": "^5.4.2",
"vite": "^5.1.6",
"vite-plugin-static-copy": "^1.0.1",
"zx": "^7.2.3"
},
@@ -81,7 +82,7 @@
"@ffmpeg/ffmpeg": "^0.12.10",
"@ffmpeg/util": "^0.12.1",
"@hookform/resolvers": "^3.3.4",
"@langchain/community": "^0.0.34",
"@langchain/community": "^0.0.39",
"@langchain/google-genai": "^0.0.10",
"@mozilla/readability": "^0.5.0",
"@radix-ui/react-accordion": "^1.1.2",
@@ -112,6 +113,7 @@
"axios": "^1.6.7",
"camelcase": "^8.0.0",
"camelcase-keys": "^9.1.3",
"chart.js": "^4.4.2",
"cheerio": "^1.0.0-rc.12",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
@@ -122,7 +124,8 @@
"dayjs": "^1.11.10",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"electron-log": "^5.1.1",
"echogarden": "https://github.com/an-lee/echogarden",
"electron-log": "^5.1.2",
"electron-settings": "^4.0.2",
"electron-squirrel-startup": "^1.0.0",
"ffmpeg-static": "^5.2.0",
@@ -130,27 +133,27 @@
"fs-extra": "^11.2.0",
"html-to-text": "^9.0.5",
"https-proxy-agent": "^7.0.4",
"i18next": "^23.10.0",
"intl-tel-input": "^19.5.5",
"i18next": "^23.10.1",
"intl-tel-input": "^19.5.7",
"js-md5": "^0.8.3",
"langchain": "^0.1.25",
"langchain": "^0.1.28",
"lodash": "^4.17.21",
"lucide-react": "^0.344.0",
"lucide-react": "^0.358.0",
"mark.js": "^8.11.1",
"microsoft-cognitiveservices-speech-sdk": "^1.35.0",
"next-themes": "^0.2.1",
"openai": "^4.28.4",
"microsoft-cognitiveservices-speech-sdk": "^1.36.0",
"next-themes": "^0.3.0",
"openai": "^4.29.0",
"pitchfinder": "^2.3.2",
"postcss": "^8.4.35",
"proxy-agent": "^6.4.0",
"react": "^18.2.0",
"react-activity-calendar": "^2.2.7",
"react-activity-calendar": "^2.2.8",
"react-dom": "^18.2.0",
"react-hook-form": "^7.51.0",
"react-hotkeys-hook": "^4.5.0",
"react-i18next": "^14.0.5",
"react-i18next": "^14.1.0",
"react-markdown": "^9.0.1",
"react-router-dom": "^6.22.2",
"react-router-dom": "^6.22.3",
"react-tooltip": "^5.26.3",
"reflect-metadata": "^0.2.1",
"rimraf": "^5.0.5",
@@ -160,7 +163,7 @@
"sqlite3": "^5.1.7",
"tailwind-scrollbar-hide": "^1.1.7",
"umzug": "^3.7.0",
"wavesurfer.js": "^7.7.3",
"wavesurfer.js": "^7.7.5",
"zod": "^3.22.4"
}
}

View File

@@ -18,7 +18,7 @@ export default defineConfig({
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: process.env.CI ? 1 : undefined,
workers: 1,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: "html",
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */

View File

@@ -7,6 +7,24 @@ export const WEB_API_URL = "https://enjoy-web.fly.dev";
export const REPO_URL = "https://github.com/xiaolai/everyone-can-use-english";
export const MAGIC_TOKEN_REGEX =
/\b(Mrs|Ms|Mr|Dr|Prof|St|[a-zA-Z]{1,2}|\d{1,2})\.\b/g;
export const END_OF_SENTENCE_REGEX = /[^\.!,\?][\.!\?]/g;
export const FFMPEG_TRIM_SILENCE_OPTIONS = [
"-af",
"silenceremove=1:start_duration=1:start_threshold=-50dB:detection=peak,aformat=dblp,areverse,silenceremove=start_periods=1:start_duration=1:start_threshold=-50dB:detection=peak,aformat=dblp,areverse",
];
export const FFMPEG_CONVERT_WAV_OPTIONS = [
"-ar",
"16000",
"-ac",
"1",
"-c:a",
"pcm_s16le",
];
// https://huggingface.co/ggerganov/whisper.cpp/tree/main
export const WHISPER_MODELS_OPTIONS = [
{
@@ -344,3 +362,133 @@ export const CONVERSATION_PRESETS = [
},
},
];
export const IPA_MAPPING = {
p: "p",
b: "b",
t: "t",
d: "d",
ʈ: "t",
ɖ: "d",
c: "k",
ɟ: "g",
k: "k",
g: "g",
q: "k",
ɢ: "g",
ʔ: "",
ɡ: "g",
m: "m",
ɱ: "m",
n: "n",
ɳ: "n",
ɲ: "j",
ŋ: "ŋ",
ɴ: "ŋ",
: "n",
ʙ: "r",
r: "r",
ʀ: "r",
: "",
ɾ: "r",
ɽ: "r",
ɸ: "f",
β: "v",
f: "f",
v: "v",
θ: "θ",
ð: "ð",
s: "s",
z: "z",
ʃ: "ʃ",
ʒ: "ʒ",
ʂ: "s",
ʐ: "z",
ç: "",
ʝ: "j",
x: "h",
ɣ: "g",
χ: "h",
ʁ: "r",
ħ: "h",
ʕ: "",
h: "h",
ɦ: "h",
ɬ: "",
ɮ: "",
: "tʃ",
ʈʃ: "tʃ",
: "dʒ",
ʋ: "v",
ɹ: "r",
ɻ: "r",
j: "j",
ɰ: "w",
w: "w",
l: "l",
ɭ: "l",
ʎ: "j",
ʟ: "l",
i: "iː",
: "iː",
ʉɯ: "uː",
u: "uː",
iː: "iː",
ɪ: "ɪ",
ʏ: "ɪ",
ʊ: "ʊ",
ɨ: "ɪ",
: "ɪ",
e: "e",
ø: "e",
ɘ: "ə",
ɵ: "ə",
ɤ: "ɒ",
o: "ɔː",
ə: "ə",
oː: "ɔː",
ɛ: "æ",
œ: "æ",
ɜ: "əː",
ɞ: "əː",
ʌ: "ʌ",
ɔ: "ɔː",
ɜː: "əː",
uː: "uː",
ɔː: "ɔː",
ɛː: "æ",
æ: "æ",
a: "ɑː",
ɶ: "ɑː",
ɐ: "ɑː",
ɑ: "ɑː",
ɒ: "ɒ",
ɑː: "ɑː",
"◌˞": "",
ɚ: "ɪə",
ɝ: "ɪə",
ɹ̩: "r",
eɪ: "eɪ",
əʊ: "əʊ",
: "əʊ",
aɪ: "aɪ",
ɔɪ: "ɔɪ",
: "aʊ",
: "ɪə",
ɜr: "ɪə(r)",
ɑr: "ɑː(r)",
ɔr: "ɔː(r)",
oʊr: "əʊ(r)",
oːɹ: "ɔː(r)",
ir: "iː(r)",
ɪɹ: "ɪ(r)",
ɔːɹ: "ɔː(r)",
ɑːɹ: "ɑː(r)",
ʊɹ: "ʊ(r)",
ʊr: "ʊ(r)",
ɛr: "æ(r)",
ɛɹ: "æ(r)",
əl: "ə",
aɪɚ: "aɪ",
aɪə: "aɪ",
};

View File

@@ -151,6 +151,7 @@
"yesterday": "yesterday",
"play": "play",
"pause": "pause",
"switchPlayMode": "switch play mode",
"playSingleSegment": "play single segment",
"playAllSegments": "play all segments",
"playInLoop": "play in loop",
@@ -241,9 +242,13 @@
"logoutAndRemoveAllPersonalData": "Logout and remove all personal data",
"logoutAndRemoveAllPersonalSettings": "Logout and remove all personal settings",
"hotkeys": "Hotkeys",
"system": "System",
"player": "Player",
"quitApp": "Quit APP",
"openPreferences": "Open preferences",
"playOrPause": "Play or pause",
"playOrPauseRecording": "Play or pause recording",
"startOrStopRecording": "start or stop recording",
"about": "About",
"currentVersion": "Current version",
"checkUpdate": "Check update",
@@ -268,8 +273,7 @@
"editResource": "edit resource",
"deleteResource": "delete resource",
"deleteResourceConfirmation": "Are you sure to delete {{name}}?",
"transcribeAudioConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
"transcribeVideoConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
"transcribeMediaConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
"localFile": "local file",
"resourcesYouAddedRecently": "resources you added recently",
"recentlyAdded": "recently added",
@@ -291,6 +295,7 @@
"deleteRecording": "delete recording",
"deleteRecordingConfirmation": "Are you sure to delete this recording?",
"myRecordings": "my recordings",
"noRecordingForThisSegmentYet": "No recordings for this segment yet. Press <kbd>R</kbd> to start recording.",
"lastYear": "last year",
"less": "less",
"more": "more",
@@ -474,7 +479,19 @@
"itMayTakeAWhileToPrepareForTheFirstLoad": "It may take a while to prepare for the first load. Please be patient.",
"loadingTranscription": "Loading transcription",
"cannotFindMicrophone": "Cannot find microphone",
"savingRecording": "Saving recording",
"recordingSaved": "Recording saved",
"failedToSaveRecording": "Failed to save recording",
"speechNotCreatedYet": "Speech not created yet",
"goToConversation": "Go to conversation"
"goToConversation": "Go to conversation",
"mediaInfo": "Media Info",
"editRegion": "edit region",
"dragRegionBorderToEdit": "Drag region border to edit",
"startRecording": "start recording",
"stopRecording": "stop recording",
"playRecording": "play recording",
"clickAnyWordToSelect": "Click any words to select. Press shift to select multiple words.",
"currentRegionIsBeingEdited": "Current region is being edited",
"compare": "compare",
"selectRegion": "select region"
}

View File

@@ -151,6 +151,7 @@
"yesterday": "昨天",
"play": "播放",
"pause": "暂停",
"switchPlayMode": "切换播放模式",
"playSingleSegment": "播放单句",
"playAllSegments": "播放所有",
"playInLoop": "单句循环",
@@ -241,9 +242,13 @@
"logoutAndRemoveAllPersonalData": "退出登录并删除所有个人数据",
"logoutAndRemoveAllPersonalSettings": "退出登录并删除所有个人设置选项",
"hotkeys": "快捷键",
"system": "系统",
"player": "播放器",
"quitApp": "退出应用",
"openPreferences": "打开设置",
"playOrPause": "播放/暂停",
"playOrPauseRecording": "播放/暂停录音",
"startOrStopRecording": "开始/结束录音",
"about": "关于",
"currentVersion": "当前版本",
"checkUpdate": "检查更新",
@@ -269,7 +274,6 @@
"deleteResource": "删除资源",
"deleteResourceConfirmation": "您确定要删除资源 {{name}} 吗?",
"transcribeAudioConfirmation": "这将删除原来的语音文本,您确定要重新对 {{name}} 进行语音转文本吗?",
"transcribeVideoConfirmation": "这将删除原来的语音文本,您确定要重新对 {{name}} 进行语音转文本吗?",
"localFile": "本地文件",
"recentlyAdded": "最近添加",
"resourcesYouAddedRecently": "最近添加的资源",
@@ -291,6 +295,7 @@
"deleteRecording": "删除录音",
"deleteRecordingConfirmation": "您确定要删除录音吗?",
"myRecordings": "我的练习",
"noRecordingForThisSegmentYet": "当前句子还没有练习过。按 <kbd>R</kbd> 键开始录音。",
"lastYear": "过去一年",
"less": "更少",
"more": "更多",
@@ -473,7 +478,19 @@
"itMayTakeAWhileToPrepareForTheFirstLoad": "首次加载可能需要一些时间,请耐心等候",
"loadingTranscription": "正在加载语音文本",
"cannotFindMicrophone": "无法找到麦克风",
"savingRecording": "正在保存录音",
"recordingSaved": "录音已保存",
"failedToSaveRecording": "保存录音失败",
"speechNotCreatedYet": "尚未生成语音",
"goToConversation": "前往对话"
"goToConversation": "前往对话",
"mediaInfo": "资源信息",
"editRegion": "修改当前区域",
"dragRegionBorderToEdit": "拖动区域边界以修改",
"startRecording": "开始录音",
"stopRecording": "结束录音",
"playRecording": "播放录音",
"clickAnyWordToSelect": "点击任意单词可以选中,同时按下 Shift 键可以多选",
"currentRegionIsBeingEdited": "当前区域正在编辑中",
"compare": "对比",
"selectRegion": "选取区域"
}

View File

@@ -81,6 +81,12 @@
}
}
@layer components {
.scroll {
@appply scrollbar-thin scrollbar-thumb-primary scrollbar-track-secondary;
}
}
body {
user-select: none;
}

View File

@@ -129,6 +129,11 @@ export class Audio extends Model<Audio> {
return this.getDataValue("metadata").duration;
}
@Column(DataType.VIRTUAL)
get mediaType(): string {
return "Audio";
}
get extname(): string {
return (
this.getDataValue("metadata").extname ||

View File

@@ -25,6 +25,7 @@ import storage from "@main/storage";
import { Client } from "@/api";
import { WEB_API_URL } from "@/constants";
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
import Ffmpeg from "@main/ffmpeg";
import camelcaseKeys from "camelcase-keys";
const logger = log.scope("db/models/recording");
@@ -299,10 +300,18 @@ export class Recording extends Model<Recording> {
referenceText?: string;
}
) {
const { targetId, targetType, referenceId, referenceText, duration } =
params;
const { targetId, targetType, referenceId, referenceText } = params;
let { duration } = params;
if (blob.arrayBuffer.byteLength === 0) {
throw new Error("Empty recording");
}
const format = blob.type.split("/")[1]?.split(";")?.[0];
if (!format) {
throw new Error("Unknown recording format");
}
const format = blob.type.split("/")[1];
const file = path.join(
settings.userDataPath(),
"recordings",
@@ -310,6 +319,18 @@ export class Recording extends Model<Recording> {
);
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
try {
const ffmpeg = new Ffmpeg();
const metadata = await ffmpeg.generateMetadata(file);
duration = Math.floor(metadata.format.duration * 1000);
} catch (err) {
logger.error(err);
}
if (duration === 0) {
throw new Error("Failed to get duration of the recording");
}
const md5 = await hashFile(file, { algo: "md5" });
const filename = `${md5}.${format}`;
fs.renameSync(file, path.join(path.dirname(file), filename));

View File

@@ -129,6 +129,11 @@ export class Video extends Model<Video> {
return this.getDataValue("metadata").duration;
}
@Column(DataType.VIRTUAL)
get mediaType(): string {
return "Video";
}
get extname(): string {
return (
this.getDataValue("metadata").extname ||

View File

@@ -0,0 +1,68 @@
import { ipcMain } from "electron";
import { align } from "echogarden/dist/api/API.js";
import { AlignmentOptions } from "echogarden/dist/api/API";
import { AudioSourceParam } from "echogarden/dist/audio/AudioUtilities";
import path from "path";
import log from "@main/logger";
import url from "url";
import settings from "@main/settings";
import fs from "fs-extra";
const __filename = url.fileURLToPath(import.meta.url);
/*
* sample files will be in /app.asar.unpacked instead of /app.asar
*/
const __dirname = path
.dirname(__filename)
.replace("app.asar", "app.asar.unpacked");
const logger = log.scope("echogarden");
class EchogardenWrapper {
public align: typeof align;
constructor() {
this.align = align;
}
async check() {
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
try {
const result = await this.align(
sampleFile,
"And so my fellow Americans ask not what your country can do for you",
{}
);
logger.info(result);
fs.writeJsonSync(
path.join(settings.cachePath(), "echogarden-check.json"),
result,
{ spaces: 2 }
);
return true;
} catch (e) {
logger.error(e);
return false;
}
}
registerIpcHandlers() {
ipcMain.handle(
"echogarden-align",
async (
_event,
input: AudioSourceParam,
transcript: string,
options: AlignmentOptions
) => {
return this.align(input, transcript, options);
}
);
ipcMain.handle("echogarden-check", async (_event) => {
return this.check();
});
}
}
export default new EchogardenWrapper();

View File

@@ -7,6 +7,7 @@ import path from "path";
import fs from "fs-extra";
import settings from "./settings";
import url from "url";
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
/*
* ffmpeg and ffprobe bin file will be in /app.asar.unpacked instead of /app.asar
@@ -19,6 +20,8 @@ const __dirname = path
.dirname(__filename)
.replace("app.asar", "app.asar.unpacked");
process.env.FFMPEG_PATH = ffmpegPath;
const logger = log.scope("ffmpeg");
export default class FfmpegWrapper {
checkCommand(): Promise<boolean> {
@@ -211,7 +214,7 @@ export default class FfmpegWrapper {
);
}
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
const ffmpeg = Ffmpeg();
return new Promise((resolve, reject) => {

View File

@@ -21,6 +21,7 @@ import { AudibleProvider, TedProvider } from "@main/providers";
import Ffmpeg from "@main/ffmpeg";
import { Waveform } from "./waveform";
import url from "url";
import echogarden from "./echogarden";
const __filename = url.fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@@ -49,6 +50,9 @@ main.init = () => {
// Prepare Settings
settings.registerIpcHandlers();
// echogarden
echogarden.registerIpcHandlers();
// Whisper
whisper.registerIpcHandlers();
@@ -433,10 +437,11 @@ ${log}
// Create the browser window.
const mainWindow = new BrowserWindow({
icon: "./assets/icon.png",
width: 1600,
height: 1200,
minWidth: 1024,
minHeight: 768,
width: 1920,
height: 1080,
minWidth: 1440,
minHeight: 900,
fullscreen: true,
webPreferences: {
preload: path.join(__dirname, "preload.js"),
},

View File

@@ -350,6 +350,14 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
return ipcRenderer.invoke("audiowaveform-frequencies", file);
},
},
echogarden: {
align: (input: string, transcript: string, options: any) => {
return ipcRenderer.invoke("echogarden-align", input, transcript, options);
},
check: () => {
return ipcRenderer.invoke("echogarden-check");
},
},
whisper: {
config: () => {
return ipcRenderer.invoke("whisper-config");

View File

@@ -1,394 +0,0 @@
import { useEffect, useState, useContext } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import {
LoaderSpin,
RecordingsList,
PagePlaceholder,
MediaPlayer,
MediaTranscription,
} from "@renderer/components";
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
import {
AlertDialog,
AlertDialogHeader,
AlertDialogDescription,
AlertDialogTitle,
AlertDialogContent,
AlertDialogFooter,
AlertDialogCancel,
Button,
PingPoint,
Progress,
ScrollArea,
toast,
} from "@renderer/components/ui";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { useNavigate } from "react-router-dom";
export const AudioDetail = (props: { id?: string; md5?: string }) => {
const navigate = useNavigate();
const { id, md5 } = props;
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const [audio, setAudio] = useState<AudioType | null>(null);
const [transcription, setTranscription] = useState<TranscriptionType>(null);
const [sharing, setSharing] = useState<boolean>(false);
// Transcription controls
const [transcribing, setTranscribing] = useState<boolean>(false);
const { transcribe } = useTranscribe();
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
// Player controls
const [initialized, setInitialized] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [seek, setSeek] = useState<{
seekTo: number;
timestamp: number;
}>();
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
const [isPlaying, setIsPlaying] = useState(false);
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
const [playBackRate, setPlaybackRate] = useState<number>(1);
const [displayInlineCaption, setDisplayInlineCaption] =
useState<boolean>(true);
const onTransactionUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model === "Transcription" && action === "update") {
setTranscription(record);
}
};
const findOrCreateTranscription = async () => {
if (!audio) return;
if (transcription) return;
return EnjoyApp.transcriptions
.findOrCreate({
targetId: audio.id,
targetType: "Audio",
})
.then((transcription) => {
setTranscription(transcription);
})
.catch((err) => {
toast.error(err.message);
});
};
const generateTranscription = async () => {
if (transcribing) return;
if (!transcription) {
await findOrCreateTranscription();
}
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, result } = await transcribe(audio.src, {
targetId: audio.id,
targetType: "Audio",
});
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result,
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const findTranscriptionFromWebApi = async () => {
if (!transcription) {
await findOrCreateTranscription();
}
const res = await webApi.transcriptions({
targetMd5: audio.md5,
});
const transcript = (res?.transcriptions || []).filter((t) =>
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
)?.[0];
if (!transcript) {
throw new Error("Transcription not found");
}
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: transcript.result,
engine: transcript.engine,
model: transcript.model,
});
};
const findOrGenerateTranscription = async () => {
try {
await findTranscriptionFromWebApi();
} catch (err) {
console.error(err);
await generateTranscription();
}
};
const handleShare = async () => {
if (!audio.source && !audio.isUploaded) {
try {
await EnjoyApp.audios.upload(audio.id);
} catch (err) {
toast.error(t("shareFailed"), {
description: err.message,
});
return;
}
}
webApi
.createPost({
targetType: "Audio",
targetId: audio.id,
})
.then(() => {
toast.success(t("sharedSuccessfully"), {
description: t("sharedAudio"),
});
})
.catch((err) => {
toast.error(t("shareFailed"), {
description: err.message,
});
});
setSharing(false);
};
useEffect(() => {
const where = id ? { id } : { md5 };
EnjoyApp.audios.findOne(where).then((audio) => {
if (audio) {
setAudio(audio);
} else {
toast.error(t("models.audio.notFound"));
}
});
}, [id, md5]);
useEffect(() => {
if (!audio) return;
findOrCreateTranscription();
}, [audio]);
useEffect(() => {
if (!initialized) return;
if (!transcription) return;
addDblistener(onTransactionUpdate);
if (transcription?.state == "pending") {
findOrGenerateTranscription();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setTranscribingProgress(p);
});
}
return () => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [md5, transcription, initialized]);
if (!audio) {
return <LoaderSpin />;
}
if (!audio.src) {
return (
<PagePlaceholder placeholder="invalid" extra="cannot find play source" />
);
}
return (
<div className="relative" data-testid="audio-detail">
<div className={`grid grid-cols-7 gap-4 ${initialized ? "" : "blur-sm"}`}>
<div className="col-span-5 h-[calc(100vh-6.5rem)] flex flex-col">
<MediaPlayer
mediaId={audio.id}
mediaType="Audio"
mediaUrl={audio.src}
mediaMd5={audio.md5}
transcription={transcription}
currentTime={currentTime}
setCurrentTime={setCurrentTime}
currentSegmentIndex={currentSegmentIndex}
setCurrentSegmentIndex={setCurrentSegmentIndex}
recordButtonVisible={true}
seek={seek}
initialized={initialized}
setInitialized={setInitialized}
zoomRatio={zoomRatio}
setZoomRatio={setZoomRatio}
isPlaying={isPlaying}
setIsPlaying={setIsPlaying}
playMode={playMode}
setPlayMode={setPlayMode}
playBackRate={playBackRate}
setPlaybackRate={setPlaybackRate}
displayInlineCaption={displayInlineCaption}
setDisplayInlineCaption={setDisplayInlineCaption}
onShare={() => setSharing(true)}
onDecoded={({ duration, sampleRate }) => {
if (audio.duration) return;
EnjoyApp.audios.update(audio.id, {
metadata: Object.assign({}, audio.metadata, {
duration,
sampleRate,
}),
});
}}
/>
<ScrollArea className={`flex-1 relative bg-muted`}>
<RecordingsList
key={`recordings-list-${audio.id}-${currentSegmentIndex}`}
targetId={audio.id}
targetType="Audio"
referenceText={transcription?.result?.[currentSegmentIndex]?.text}
referenceId={currentSegmentIndex}
/>
</ScrollArea>
</div>
<div className="col-span-2 h-[calc(100vh-6.5rem)]">
<MediaTranscription
mediaId={audio.id}
mediaType="Audio"
mediaName={audio.name}
transcription={transcription}
transcribing={transcribing}
progress={transcribingProgress}
transcribe={generateTranscription}
currentSegmentIndex={currentSegmentIndex}
onSelectSegment={(index) => {
if (currentSegmentIndex === index) return;
const segment = transcription?.result?.[index];
if (!segment) return;
if (playMode === "loop" && isPlaying) setIsPlaying(false);
setSeek({
seekTo: segment.offsets.from / 1000,
timestamp: Date.now(),
});
}}
/>
</div>
</div>
<AlertDialog open={sharing} onOpenChange={(value) => setSharing(value)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("shareAudio")}</AlertDialogTitle>
<AlertDialogDescription>
{t("areYouSureToShareThisAudioToCommunity")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<Button variant="default" onClick={handleShare}>
{t("share")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
{/* Show loading progress until waveform is decoded & transcribed */}
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
<AlertDialogDescription>
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
</AlertDialogDescription>
</AlertDialogHeader>
<div className="py-4">
{initialized ? (
<div className="mb-4 flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("waveformIsDecoded")}</span>
</div>
) : (
<div className="mb-4 flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("decodingWaveform")}</span>
</div>
)}
{!transcription ? (
<div className="flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (
<div className="flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("transcribedSuccessfully")}</span>
</div>
) : transcribing ? (
<div className="">
<div className="flex items-center space-x-4 mb-2">
<PingPoint colorClassName="bg-yellow-500" />
<span>{t("transcribing")}</span>
</div>
{whisperConfig.service === "local" && (
<Progress value={transcribingProgress} />
)}
</div>
) : (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<div className="inline">
<span>{t("notTranscribedYet")}</span>
{initialized && (
<Button
onClick={generateTranscription}
className="ml-4"
size="sm"
>
{t("transcribe")}
</Button>
)}
</div>
</div>
)}
</div>
<AlertDialogFooter>
<Button variant="secondary" onClick={() => navigate(-1)}>
{t("cancel")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
);
};

View File

@@ -0,0 +1,72 @@
import { useEffect, useContext, useRef } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import {
MediaLoadingModal,
MediaCaption,
MediaPlayerControls,
MediaTabs,
MediaCurrentRecording,
} from "@renderer/components";
import { formatDuration } from "@renderer/lib/utils";
import { useAudio } from "@renderer/hooks";
export const AudioPlayer = (props: { id?: string; md5?: string }) => {
const { id, md5 } = props;
const { media, currentTime, setMedia, setRef } = useContext(
MediaPlayerProviderContext
);
const { audio } = useAudio({ id, md5 });
const ref = useRef(null);
useEffect(() => {
if (!audio) return;
setMedia(audio);
}, [audio]);
useEffect(() => {
setRef(ref);
}, [ref]);
return (
<div data-testid="audio-player">
<div className="h-[calc(100vh-37.5rem)] mb-4">
<div className="grid grid-cols-3 gap-4 px-6 h-full">
<div className="col-span-1 rounded-lg border shadow-lg h-[calc(100vh-37.5rem)]">
<MediaTabs />
</div>
<div className="col-span-2 h-[calc(100vh-37.5rem)]">
<MediaCaption />
</div>
</div>
</div>
<div className="h-[33rem] flex flex-col">
<div className="h-[13rem] py-2 px-6 mb-4">
<MediaCurrentRecording />
</div>
<div className="w-full h-[13rem] px-6 py-2 mb-4">
<div className="border rounded-xl shadow-lg relative">
<div data-testid="media-player-container" ref={ref} />
<div className="absolute right-2 top-1">
<span className="text-sm">
{formatDuration(currentTime || 0)}
</span>
<span className="mx-1">/</span>
<span className="text-sm">
{formatDuration(media?.duration || 0)}
</span>
</div>
</div>
</div>
<div className="w-full bg-background z-10 shadow-xl">
<MediaPlayerControls />
</div>
</div>
<MediaLoadingModal />
</div>
);
};

View File

@@ -1,8 +1,9 @@
export * from "./audios-table";
export * from "./audio-edit-form";
export * from "./audio-detail";
export * from "./audios-component";
export * from "./audible-books-segment";
export * from "./audios-segment";
export * from "./audio-card";
export * from "./audio-player";

View File

@@ -1,5 +1,6 @@
import { useEffect, useState, useRef, useCallback } from "react";
import { PitchContour } from "@renderer/components";
import { renderPitchContour } from "@renderer/lib/utils";
import { extractFrequencies } from "@/utils";
import WaveSurfer from "wavesurfer.js";
import { Button, Skeleton } from "@renderer/components/ui";
import { PlayIcon, PauseIcon } from "lucide-react";
@@ -59,17 +60,25 @@ export const SpeechPlayer = (props: {
wavesurfer.on("pause", () => {
setIsPlaying(false);
}),
wavesurfer.on("decode", () => {
wavesurfer.on("ready", () => {
setDuration(wavesurfer.getDuration());
const peaks = wavesurfer.getDecodedData().getChannelData(0);
const sampleRate = wavesurfer.options.sampleRate;
wavesurfer.renderer.getWrapper().appendChild(
PitchContour({
peaks,
sampleRate,
height,
})
);
const data = extractFrequencies({ peaks, sampleRate });
setTimeout(() => {
renderPitchContour({
wrapper: wavesurfer.getWrapper(),
canvasId: `pitch-contour-${speech.id}-canvas`,
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
pointRadius: 1,
},
],
});
}, 1000);
setInitialized(true);
}),
];

View File

@@ -25,7 +25,6 @@ export * from "./login-form";
export * from "./choose-library-path-input";
export * from "./whisper-model-options";
export * from "./pitch-contour";
export * from "./reset-all-button";
export * from "./loader-spin";

View File

@@ -1,5 +1,11 @@
export * from "./add-media-button";
export * from "./media-player";
export * from "./media-player-controls";
export * from "./media-caption";
export * from "./media-info-panel";
export * from "./media-recordings";
export * from "./media-current-recording";
export * from "./media-recorder";
export * from "./media-transcription";
export * from "./media-player";
export * from "./media-tabs";
export * from "./media-loading-modal";
export * from "./add-media-button";

View File

@@ -1,91 +1,75 @@
import { useState, useEffect } from "react";
import { cn } from "@renderer/lib/utils";
import { useEffect, useState, useContext } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import cloneDeep from "lodash/cloneDeep";
import {
Button,
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuTrigger,
Popover,
PopoverContent,
PopoverAnchor,
toast,
ScrollArea,
Separator,
} from "@renderer/components/ui";
import { LookupResult } from "@renderer/components";
import {
ChevronDownIcon,
LanguagesIcon,
PlayIcon,
LoaderIcon,
SpeechIcon,
} from "lucide-react";
import { t } from "i18next";
import { LanguagesIcon, SpeechIcon } from "lucide-react";
import { Timeline } from "echogarden/dist/utilities/Timeline.d.js";
import { IPA_MAPPING } from "@/constants";
import { useAiCommand } from "@renderer/hooks";
import { LoaderIcon } from "lucide-react";
export const MediaCaption = (props: {
mediaId: string;
mediaType: string;
currentTime: number;
transcription: TranscriptionResultSegmentGroupType;
onSeek?: (time: number) => void;
className?: string;
isPlaying: boolean;
setIsPlaying: (isPlaying: boolean) => void;
}) => {
export const MediaCaption = () => {
const {
transcription,
wavesurfer,
currentSegmentIndex,
currentTime,
onSeek,
className,
isPlaying,
setIsPlaying,
} = props;
transcription,
regions,
activeRegion,
setActiveRegion,
editingRegion,
setEditingRegion,
setTranscriptionDraft,
} = useContext(MediaPlayerProviderContext);
const [activeIndex, setActiveIndex] = useState<number>(0);
const [selected, setSelected] = useState<{
index: number;
word: string;
position?: {
top: number;
left: number;
};
}>();
const [selectedIndices, setSelectedIndices] = useState<number[]>([]);
const [multiSelecting, setMultiSelecting] = useState<boolean>(false);
const [displayIpa, setDisplayIpa] = useState<boolean>(true);
const [translation, setTranslation] = useState<string>();
const [translating, setTranslating] = useState<boolean>(false);
const [displayTranslation, setDisplayTranslation] = useState<boolean>(false);
const [ipa, setIpa] = useState<{ word?: string; ipa?: string }[]>([]);
const [ipaGenerating, setIpaGenerating] = useState<boolean>(false);
const [displayIpa, setDisplayIpa] = useState<boolean>(false);
const [lookingUp, setLookingUp] = useState<boolean>(false);
const [lookupResult, setLookupResult] = useState<LookupType>();
const { translate, pronounce } = useAiCommand();
const caption = (transcription?.result?.timeline as Timeline)?.[
currentSegmentIndex
];
const toggleIpa = async () => {
if (ipaGenerating) return;
const { translate, lookupWord } = useAiCommand();
if (ipa.length > 0) {
setDisplayIpa(!displayIpa);
return;
}
const lookup = () => {
if (selectedIndices.length === 0) return;
setIpaGenerating(true);
toast.promise(
pronounce(transcription.text)
.then((words) => {
if (words?.length > 0) {
setIpa(words);
setDisplayIpa(true);
}
})
.finally(() => {
setIpaGenerating(false);
}),
{
loading: t("generatingIpa"),
success: t("generatedIpaSuccessfully"),
error: (err) => t("generatingIpaFailed", { error: err.message }),
position: "bottom-right",
}
);
const word = selectedIndices
.map((index) => caption.timeline[index].text)
.join(" ");
setLookingUp(true);
lookupWord({
word,
context: caption.text,
sourceId: transcription.targetId,
sourceType: transcription.targetType,
})
.then((lookup) => {
if (lookup?.meaning) {
setLookupResult(lookup);
}
})
.catch((error) => {
toast.error(error.message);
})
.finally(() => {
setLookingUp(false);
});
};
const toggleTranslation = async () => {
@@ -97,7 +81,7 @@ export const MediaCaption = (props: {
}
toast.promise(
translate(transcription.text)
translate(caption.text)
.then((result) => {
if (result) {
setTranslation(result);
@@ -116,177 +100,370 @@ export const MediaCaption = (props: {
);
};
const toggleMultiSelect = (event: KeyboardEvent) => {
setMultiSelecting(event.shiftKey && event.type === "keydown");
};
const toggleRegion = (index: number) => {
if (!activeRegion) return;
if (editingRegion) {
toast.warning(t("currentRegionIsBeingEdited"));
return;
}
const word = caption.timeline[index];
if (!word) return;
const start = word.startTime;
const end = word.endTime;
const regionStart = activeRegion.start;
const regionEnd = activeRegion.end;
if (activeRegion.id.startsWith("word-region")) {
if (start >= regionStart && end <= regionEnd) {
setActiveRegion(
regions.getRegions().find((r) => r.id.startsWith("segment-region"))
);
} else if (multiSelecting) {
const region = regions.addRegion({
id: `word-region-${index}`,
start: Math.min(start, regionStart),
end: Math.max(end, regionEnd),
color: "#fb6f9233",
drag: false,
resize: editingRegion,
});
setActiveRegion(region);
} else {
const region = regions.addRegion({
id: `word-region-${index}`,
start,
end,
color: "#fb6f9233",
drag: false,
resize: editingRegion,
});
setActiveRegion(region);
}
activeRegion.remove();
} else {
const region = regions.addRegion({
id: `word-region-${index}`,
start,
end,
color: "#fb6f9233",
drag: false,
resize: false,
});
setActiveRegion(region);
}
};
const markPhoneRegions = () => {
const phoneRegions = regions
.getRegions()
.filter((r) => r.id.startsWith("phone-region"));
if (phoneRegions.length > 0) {
phoneRegions.forEach((r) => {
r.remove();
r.unAll();
});
return;
}
if (!activeRegion) return;
if (!activeRegion.id.startsWith("word-region")) return;
if (!selectedIndices) return;
selectedIndices.forEach((index) => {
const word = caption.timeline[index];
word.timeline.forEach((token) => {
token.timeline.forEach((phone) => {
const region = regions.addRegion({
id: `phone-region-${index}`,
start: phone.startTime,
end: phone.endTime,
color: "#efefefef",
drag: false,
resize: editingRegion,
});
region.on("click", () => {
region.play();
});
});
});
});
};
useEffect(() => {
if (!transcription) return;
const time = Math.round(currentTime * 1000);
const index = transcription.segments.findIndex(
(w) => time >= w.offsets.from && time < w.offsets.to
if (!caption) return;
const index = caption.timeline.findIndex(
(w) => currentTime >= w.startTime && currentTime < w.endTime
);
if (index !== activeIndex) {
setActiveIndex(index);
}
}, [currentTime, transcription]);
}, [currentTime, caption]);
if (!transcription) return null;
if (Math.round(currentTime * 1000) < transcription.offsets.from) return null;
useEffect(() => {
if (!caption?.timeline) return;
if (!activeRegion) return;
if (!activeRegion.id.startsWith("word-region")) {
setSelectedIndices([]);
return;
}
const indices: number[] = [];
caption.timeline.forEach((w, index) => {
if (
w.startTime >= activeRegion.start &&
(w.endTime <= activeRegion.end ||
// The last word's end time may be a little greater than the duration of the audio in somehow.
w.endTime > wavesurfer.getDuration())
) {
indices.push(index);
}
});
if (indices.length > 0) {
const el = document.getElementById(
`word-${currentSegmentIndex}-${indices[0]}`
);
}
setSelectedIndices(indices);
setLookupResult(undefined);
}, [caption, activeRegion]);
useEffect(() => {
if (!activeRegion) return;
if (!activeRegion.id.startsWith("word-region")) return;
const region = regions.addRegion({
id: `word-region-${selectedIndices.join("-")}`,
start: activeRegion.start,
end: activeRegion.end,
color: "#fb6f9233",
drag: false,
resize: editingRegion,
});
activeRegion.remove();
setActiveRegion(region);
const subscriptions = [
regions.on("region-updated", (region) => {
if (!region.id.startsWith("word-region")) return;
const draft = cloneDeep(transcription.result);
const draftCaption = draft.timeline[currentSegmentIndex];
const firstIndex = selectedIndices[0];
const lastIndex = selectedIndices[selectedIndices.length - 1];
const firstWord = draftCaption.timeline[firstIndex];
const lastWord = draftCaption.timeline[lastIndex];
// If no word is selected somehow, then ignore the update.
if (!firstWord || !lastWord) {
setEditingRegion(false);
return;
}
firstWord.startTime = region.start;
lastWord.endTime = region.end;
/* Update the timeline of the previous and next words
* It happens only when regions are intersecting with the previous or next word.
* It will ignore if the previous/next word's position changed in timestamps.
*/
const prevWord = draftCaption.timeline[firstIndex - 1];
const nextWord = draftCaption.timeline[lastIndex + 1];
if (
prevWord &&
prevWord.endTime > region.start &&
prevWord.startTime < region.start
) {
prevWord.endTime = region.start;
}
if (
nextWord &&
nextWord.startTime < region.end &&
nextWord.endTime > region.end
) {
nextWord.startTime = region.end;
}
/*
* If the last word is the last word of the segment, then update the segment's end time.
*/
if (lastIndex === draftCaption.timeline.length - 1) {
draftCaption.endTime = region.end;
}
setTranscriptionDraft(draft);
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
}, [editingRegion]);
useEffect(() => {
setTranslation(undefined);
setDisplayTranslation(false);
}, [caption]);
useEffect(() => {
document.addEventListener("keydown", (event: KeyboardEvent) =>
toggleMultiSelect(event)
);
document.addEventListener("keyup", (event: KeyboardEvent) =>
toggleMultiSelect(event)
);
return () => {
document.removeEventListener("keydown", toggleMultiSelect);
document.removeEventListener("keyup", toggleMultiSelect);
};
}, []);
if (!caption) return null;
return (
<div className={cn("relative px-4 py-2 text-lg", className)}>
<div className="flex items-start space-x-4">
<div className="flex-1">
<div className="flex flex-wrap">
{(transcription.segments || []).map((w, index) => (
<div
key={index}
className={`mr-1 cursor-pointer hover:bg-red-500/10 ${
index === activeIndex ? "text-red-500" : ""
}`}
onClick={(event) => {
setSelected({
index,
word: w.text,
position: {
top:
event.currentTarget.offsetTop +
event.currentTarget.offsetHeight,
left: event.currentTarget.offsetLeft,
},
});
setIsPlaying(false);
if (onSeek) onSeek(w.offsets.from / 1000);
}}
>
<div>{w.text}</div>
{displayIpa &&
ipa.find(
(i) =>
i.word.trim() === w.text.replace(/[\.",?!]/g, "").trim()
)?.ipa && (
<div className="text-sm text-foreground/70 font-serif">
{
ipa.find(
(i) =>
i.word.trim() ===
w.text.replace(/[\.",?!]/g, "").trim()
)?.ipa
}
</div>
)}
</div>
))}
</div>
{displayTranslation && translation && (
<div className="select-text py-2 text-sm text-foreground/70">
{translation}
</div>
)}
<div className="h-full flex justify-between space-x-4">
<ScrollArea className="flex-1 px-6 py-4 font-serif h-full border shadow-lg rounded-lg">
<div className="flex flex-wrap mb-4">
{/* use the words splitted by caption text if it is matched with the timeline length, otherwise use the timeline */}
{caption.text.split(" ").length === caption.timeline.length
? caption.text.split(" ").map((word, index) => (
<div
key={index}
id={`word-${currentSegmentIndex}-${index}`}
className={`pr-2 pb-2 cursor-pointer hover:bg-red-500/10 ${
index === activeIndex ? "text-red-500" : ""
} ${selectedIndices.includes(index) ? "bg-red-500/10" : ""}`}
onClick={() => toggleRegion(index)}
>
<div className="">
<div className="text-2xl">{word}</div>
{displayIpa && (
<div className="text-muted-foreground">
{caption.timeline[index].timeline
.map((t) => t.timeline.map((s) => s.text).join(""))
.join(" · ")}
</div>
)}
</div>
</div>
))
: (caption.timeline || []).map((w, index) => (
<div
key={index}
id={`word-${currentSegmentIndex}-${index}`}
className={`pr-2 pb-2 cursor-pointer hover:bg-red-500/10 ${
index === activeIndex ? "text-red-500" : ""
} ${
selectedIndices.includes(index)
? "bg-red-500/10 selected"
: ""
}`}
onClick={() => toggleRegion(index)}
>
<div className="">
<div className="text-2xl">{w.text}</div>
{displayIpa && (
<div className="text-muted-foreground">
{w.timeline
.map((t) => t.timeline.map((s) => s.text).join(""))
.join(" · ")}
</div>
)}
</div>
</div>
))}
</div>
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button variant="ghost" size="icon">
<ChevronDownIcon className="w-4 h-4" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem
className="cursor-pointer capitalize"
disabled={translating}
onClick={toggleTranslation}
>
{translating ? (
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
) : (
<LanguagesIcon className="w-4 h-4 mr-2" />
)}
<span>{t("translate")}</span>
</DropdownMenuItem>
<DropdownMenuItem
className="cursor-pointer capitalize"
disabled={ipaGenerating}
onClick={toggleIpa}
>
{ipaGenerating ? (
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
) : (
<SpeechIcon className="w-4 h-4 mr-2" />
)}
<span>{t("displayIpa")}</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
{displayTranslation && translation && (
<>
<Separator className="my-2" />
<div className="text-sm font-semibold py-2">{t("translation")}</div>
<div className="select-text py-2 text-sm text-foreground">
{translation}
</div>
</>
)}
<Popover
open={Boolean(selected) && !isPlaying}
onOpenChange={(value) => {
if (!value) setSelected(null);
}}
>
<PopoverAnchor
className="absolute w-0 h-0"
style={{
top: selected?.position?.top,
left: selected?.position?.left,
}}
></PopoverAnchor>
<PopoverContent
className="w-full max-w-md p-0"
updatePositionStrategy="always"
{selectedIndices.length > 0 && (
<>
<Separator className="my-2" />
<div className="flex flex-wrap items-center space-x-2 select-text mb-4">
{selectedIndices.map((index) => {
const word = caption.timeline[index];
if (!word) return;
return (
<div key={index}>
<div className="font-serif text-lg font-semibold tracking-tight">
{word.text}
</div>
<div className="text-sm text-serif text-muted-foreground">
{word.timeline
.map((t) => t.timeline.map((s) => s.text).join(""))
.join(" · ")}
</div>
</div>
);
})}
</div>
{lookupResult ? (
<div className="py-2 select-text">
<div className="text-serif">
{lookupResult.meaning.translation}
</div>
<div className="text-serif">
{lookupResult.meaning.definition}
</div>
</div>
) : (
<div className="flex items-center py-2">
<Button size="sm" disabled={lookingUp} onClick={lookup}>
{lookingUp && (
<LoaderIcon className="animate-spin w-4 h-4 mr-2" />
)}
<span>{t("translate")}</span>
</Button>
</div>
)}
</>
)}
</ScrollArea>
<div className="flex flex-col space-y-2">
<Button
variant={displayTranslation ? "secondary" : "outline"}
size="icon"
className="rounded-full w-8 h-8 p-0"
disabled={translating}
onClick={toggleTranslation}
>
{selected?.word && (
<ResourceCaptionSelectionMenu
word={selected.word}
context={transcription.segments
.map((w) => w.text)
.join(" ")
.trim()}
mediaId={props.mediaId}
mediaType={props.mediaType}
onPlay={() => {
setIsPlaying(true);
}}
/>
)}
</PopoverContent>
</Popover>
</div>
);
};
const ResourceCaptionSelectionMenu = (props: {
word: string;
context: string;
mediaId: string;
mediaType: string;
onPlay: () => void;
}) => {
const { word, context, mediaId, mediaType, onPlay } = props;
const [translating, setTranslating] = useState<boolean>(false);
if (!word) return null;
if (translating) {
return (
<LookupResult
word={word}
context={context}
sourceId={mediaId}
sourceType={mediaType}
/>
);
}
return (
<div className="flex items-center p-1">
<Button onClick={onPlay} variant="ghost" size="icon">
<PlayIcon size={16} />
</Button>
<Button onClick={() => setTranslating(true)} variant="ghost" size="icon">
<LanguagesIcon size={16} />
</Button>
<LanguagesIcon className="w-4 h-4" />
</Button>
<Button
variant={displayIpa ? "secondary" : "outline"}
size="icon"
className="rounded-full w-8 h-8 p-0"
onClick={() => setDisplayIpa(!displayIpa)}
>
<SpeechIcon className="w-4 h-4" />
</Button>
</div>
</div>
);
};

View File

@@ -0,0 +1,511 @@
import { useEffect, useContext, useRef, useState } from "react";
import {
AppSettingsProviderContext,
MediaPlayerProviderContext,
} from "@renderer/context";
import { MediaRecorder, RecordingDetail } from "@renderer/components";
import { renderPitchContour } from "@renderer/lib/utils";
import { extractFrequencies } from "@/utils";
import WaveSurfer from "wavesurfer.js";
import Regions from "wavesurfer.js/dist/plugins/regions";
import {
AlertDialog,
AlertDialogContent,
AlertDialogDescription,
AlertDialogFooter,
AlertDialogHeader,
AlertDialogTitle,
AlertDialogCancel,
AlertDialogAction,
Button,
DropdownMenu,
DropdownMenuItem,
DropdownMenuTrigger,
DropdownMenuContent,
toast,
Sheet,
SheetContent,
SheetHeader,
SheetClose,
} from "@renderer/components/ui";
import {
GitCompareIcon,
PauseIcon,
PlayIcon,
Share2Icon,
GaugeCircleIcon,
ChevronDownIcon,
MoreVerticalIcon,
TextCursorInputIcon,
} from "lucide-react";
import { t } from "i18next";
import { formatDuration } from "@renderer/lib/utils";
import { useHotkeys } from "react-hotkeys-hook";
export const MediaCurrentRecording = (props: { height?: number }) => {
const { height = 192 } = props;
const {
isRecording,
currentRecording,
renderPitchContour: renderMediaPitchContour,
regions: mediaRegions,
activeRegion: mediaActiveRegion,
wavesurfer,
zoomRatio,
editingRegion,
currentTime: mediaCurrentTime,
} = useContext(MediaPlayerProviderContext);
const { webApi, EnjoyApp } = useContext(AppSettingsProviderContext);
const [player, setPlayer] = useState(null);
const [regions, setRegions] = useState<Regions | null>(null);
const [currentTime, setCurrentTime] = useState(0);
const [detailIsOpen, setDetailIsOpen] = useState(false);
const [isComparing, setIsComparing] = useState(false);
const [isSharing, setIsSharing] = useState(false);
const [isSelectingRegion, setIsSelectingRegion] = useState(false);
const [frequencies, setFrequencies] = useState<number[]>([]);
const [peaks, setPeaks] = useState<number[]>([]);
const ref = useRef(null);
const removeComparingPitchContour = () => {
if (!wavesurfer) return;
regions
.getRegions()
.find((r) => r.id.startsWith("recording-voice-region"))
?.remove();
const wrapper = (wavesurfer as any).renderer.getWrapper();
wrapper
.querySelectorAll(".pitch-contour-recording")
.forEach((el: HTMLDivElement) => el.remove());
};
/*
* Render recording's pitch contour on the original audio waveform
* with the original pitch contour.
*/
const renderComparingPitchContour = () => {
const region = mediaRegions
.getRegions()
.find((r) => r.id.startsWith("segment-region"));
if (!region) return;
if (!frequencies || !peaks) return;
// Trim the peaks from start to end, so we can render the voicable part of the recording
const minValue = 0.01;
let voiceStartIndex = 0;
let voiceEndIndex = peaks.length - 1;
for (let i = 1; i < voiceEndIndex; i++) {
if (peaks[i] >= minValue) {
voiceStartIndex = i;
break;
}
}
for (let i = voiceEndIndex; i > voiceStartIndex; i--) {
if (peaks[i] >= minValue) {
voiceEndIndex = i;
break;
}
}
const voiceStartFrequenciesIndex = Math.round(
((1.0 * voiceStartIndex) / peaks.length) * frequencies.length
);
const voiceEndFrequenciesIndex = Math.round(
((1.0 * voiceEndIndex) / peaks.length) * frequencies.length
);
regions.clearRegions();
regions.addRegion({
id: `recording-voice-region-${currentRecording.id}`,
start: (voiceStartIndex / peaks.length) * player.getDuration(),
end: (voiceEndIndex / peaks.length) * player.getDuration(),
color: "#fb6f9211",
drag: false,
resize: false,
});
const data = frequencies.slice(
voiceStartFrequenciesIndex,
voiceEndFrequenciesIndex
);
renderMediaPitchContour(region, {
repaint: false,
canvasId: `pitch-contour-${currentRecording.id}-canvas`,
containerClassNames: ["pitch-contour-recording"],
data: {
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
borderColor: "#fb6f92",
pointBorderColor: "#fb6f92",
pointBackgroundColor: "#ff8fab",
},
],
},
});
};
const toggleCompare = () => {
if (isComparing) {
removeComparingPitchContour();
setIsComparing(false);
} else {
setIsComparing(true);
renderComparingPitchContour();
}
};
const handleShare = async () => {
if (!currentRecording.uploadedAt) {
try {
await EnjoyApp.recordings.upload(currentRecording.id);
} catch (error) {
toast.error(t("shareFailed"), { description: error.message });
return;
}
}
webApi
.createPost({
targetId: currentRecording.id,
targetType: "Recording",
})
.then(() => {
toast.success(t("sharedSuccessfully"), {
description: t("sharedRecording"),
});
})
.catch((error) => {
toast.error(t("shareFailed"), {
description: error.message,
});
});
};
useEffect(() => {
if (!ref.current) return;
if (isRecording) return;
if (!currentRecording?.src) return;
const ws = WaveSurfer.create({
container: ref.current,
url: currentRecording.src,
height,
barWidth: 2,
cursorWidth: 1,
autoCenter: true,
autoScroll: true,
minPxPerSec: 150,
waveColor: "#efefef",
normalize: false,
progressColor: "rgba(0, 0, 0, 0.1)",
});
setPlayer(ws);
const regions = ws.registerPlugin(Regions.create());
setRegions(regions);
ws.on("timeupdate", (time: number) => setCurrentTime(time));
ws.on("finish", () => ws.seekTo(0));
ws.on("ready", () => {
const peaks: Float32Array = ws.getDecodedData().getChannelData(0);
const sampleRate = ws.options.sampleRate;
const data = extractFrequencies({ peaks, sampleRate });
setFrequencies(data);
setPeaks(Array.from(peaks));
renderPitchContour({
wrapper: ws.getWrapper(),
canvasId: `pitch-contour-${currentRecording.id}-canvas`,
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
borderColor: "#fb6f92",
pointBorderColor: "#fb6f92",
pointBackgroundColor: "#ff8fab",
},
],
});
});
return () => {
ws.destroy();
};
}, [ref, currentRecording, isRecording]);
useEffect(() => {
setIsComparing(false);
removeComparingPitchContour();
}, [currentRecording]);
useEffect(() => {
if (!isComparing) return;
if (editingRegion) {
setIsComparing(false);
} else {
setTimeout(() => {
renderComparingPitchContour();
}, 100);
}
}, [zoomRatio, editingRegion]);
useEffect(() => {
if (!regions) return;
let disableSelectingRegion: () => void | undefined;
if (isSelectingRegion) {
regions.clearRegions();
disableSelectingRegion = regions.enableDragSelection({
color: "rgba(76, 201, 240, 0.2)",
drag: false,
});
}
const subscriptions = [
regions.on("region-created", () => {}),
regions.on("region-clicked", (region, e) => {
e.stopPropagation();
region.play();
}),
regions.on("region-out", () => {
player.pause();
}),
];
return () => {
disableSelectingRegion && disableSelectingRegion();
regions.clearRegions();
subscriptions.forEach((unsub) => unsub());
};
}, [regions, isSelectingRegion, player]);
/*
* Update player styles
*/
useEffect(() => {
if (!ref?.current || !player) return;
const scrollContainer = player.getWrapper()?.closest(".scroll");
if (!scrollContainer) return;
scrollContainer.style.width = `${
ref.current.getBoundingClientRect().width
}px`;
scrollContainer.style.scrollbarWidth = "thin";
}, [ref, player]);
/*
* play recording along with the media when isComparing is true
* only when the media is playing and the active region is the segment region
*/
useEffect(() => {
if (!regions) return;
if (!isComparing) return;
if (!wavesurfer?.isPlaying()) return;
if (player?.isPlaying()) return;
if (!mediaActiveRegion?.id?.startsWith("segment-region")) return;
regions
.getRegions()
.find((r) => r.id.startsWith("recording-voice-region"))
?.play();
}, [
wavesurfer,
player,
regions,
isComparing,
mediaCurrentTime,
mediaActiveRegion,
]);
useHotkeys(
["Ctrl+R", "Meta+R"],
(keyboardEvent, hotkeyEvent) => {
if (!player) return;
keyboardEvent.preventDefault();
if (
(navigator.platform.includes("Mac") && hotkeyEvent.meta) ||
hotkeyEvent.ctrl
) {
document.getElementById("recording-play-or-pause-button").click();
}
},
[player]
);
if (isRecording) return <MediaRecorder />;
if (!currentRecording?.src)
return (
<div className="h-full w-full border rounded-xl shadow-lg flex items-center justify-center">
<div
className="m-auto"
dangerouslySetInnerHTML={{
__html: t("noRecordingForThisSegmentYet"),
}}
></div>
</div>
);
return (
<div className="flex space-x-4">
<div className="border rounded-xl shadow-lg flex-1 relative">
<div ref={ref}></div>
<div className="absolute right-2 top-1">
<span className="text-sm">{formatDuration(currentTime || 0)}</span>
<span className="mx-1">/</span>
<span className="text-sm">
{formatDuration(
player?.getDuration() || currentRecording.duration / 1000.0 || 0
)}
</span>
</div>
</div>
<div className="flex flex-col space-y-1.5">
<Button
variant="default"
size="icon"
id="recording-play-or-pause-button"
data-tooltip-id="media-player-controls-tooltip"
data-tooltip-content={t("playRecording")}
className="rounded-full w-8 h-8 p-0"
onClick={() => {
const region = regions
?.getRegions()
?.find((r) => r.id.startsWith("recording-voice-region"));
if (region) {
region.play();
} else {
player?.playPause();
}
}}
>
{player?.isPlaying() ? (
<PauseIcon className="w-4 h-4" />
) : (
<PlayIcon className="w-4 h-4" />
)}
</Button>
<Button
variant={isComparing ? "secondary" : "outline"}
size="icon"
data-tooltip-id="media-player-controls-tooltip"
data-tooltip-content={t("compare")}
className="rounded-full w-8 h-8 p-0"
onClick={toggleCompare}
>
<GitCompareIcon className="w-4 h-4" />
</Button>
<Button
variant={isSelectingRegion ? "secondary" : "outline"}
size="icon"
data-tooltip-id="media-player-controls-tooltip"
data-tooltip-content={t("selectRegion")}
className="rounded-full w-8 h-8 p-0"
onClick={() => setIsSelectingRegion(!isSelectingRegion)}
>
<TextCursorInputIcon className="w-4 h-4" />
</Button>
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant="outline"
size="icon"
data-tooltip-id="media-player-controls-tooltip"
data-tooltip-content={t("more")}
className="rounded-full w-8 h-8 p-0"
>
<MoreVerticalIcon className="w-4 h-4" />
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem
className="cursor-pointer"
onClick={() => setDetailIsOpen(true)}
>
<GaugeCircleIcon
className={`w-4 h-4 mr-4
${
currentRecording.pronunciationAssessment
? currentRecording.pronunciationAssessment
.pronunciationScore >= 80
? "text-green-500"
: currentRecording.pronunciationAssessment
.pronunciationScore >= 60
? "text-yellow-600"
: "text-red-500"
: ""
}
`}
/>
<span>{t("pronunciationAssessment")}</span>
</DropdownMenuItem>
<DropdownMenuItem
className="cursor-pointer"
onClick={() => setIsSharing(true)}
>
<Share2Icon className="w-4 h-4 mr-4" />
<span>{t("share")}</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
<AlertDialog open={isSharing} onOpenChange={setIsSharing}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("shareRecording")}</AlertDialogTitle>
<AlertDialogDescription>
{t("areYouSureToShareThisRecordingToCommunity")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction asChild>
<Button onClick={handleShare}>{t("share")}</Button>
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
<Sheet open={detailIsOpen} onOpenChange={(open) => setDetailIsOpen(open)}>
<SheetContent
side="bottom"
className="rounded-t-2xl shadow-lg"
displayClose={false}
>
<SheetHeader className="flex items-center justify-center -mt-4 mb-2">
<SheetClose>
<ChevronDownIcon />
</SheetClose>
</SheetHeader>
<RecordingDetail recording={currentRecording} />
</SheetContent>
</Sheet>
</div>
);
};

View File

@@ -0,0 +1,40 @@
import { useContext } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import { formatDuration, formatDateTime } from "@renderer/lib/utils";
import { t } from "i18next";
export const MediaInfoPanel = () => {
const { media } = useContext(MediaPlayerProviderContext);
if (!media) return null;
return (
<div className="px-4" data-testid="media-info-panel">
{[
{ label: t("models.audio.name"), value: media.name },
{
label: t("models.audio.duration"),
value: formatDuration(media.duration),
},
{
label: t("models.audio.recordingsCount"),
value: media.recordingsCount ? media.recordingsCount : 0,
},
{
label: t("models.audio.recordingsDuration"),
value: formatDuration(media.recordingsDuration, "ms"),
},
{
label: t("models.audio.createdAt"),
value: formatDateTime(media.createdAt),
},
].map((item, index) => (
<div key={`media-info-item-${index}`} className="mb-2">
<div className="capitalize text-sm text-muted-foreground mb-1">
{item.label}
</div>
<div className="">{item.value}</div>
</div>
))}
</div>
);
};

View File

@@ -0,0 +1,104 @@
import { useContext } from "react";
import {
MediaPlayerProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import {
AlertDialog,
AlertDialogHeader,
AlertDialogDescription,
AlertDialogTitle,
AlertDialogContent,
AlertDialogFooter,
AlertDialogOverlay,
Button,
PingPoint,
Progress,
} from "@renderer/components/ui";
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
import { t } from "i18next";
import { useNavigate } from "react-router-dom";
export const MediaLoadingModal = () => {
const navigate = useNavigate();
const { whisperConfig } = useContext(AISettingsProviderContext);
const {
decoded,
transcription,
transcribing,
transcribingProgress,
generateTranscription,
} = useContext(MediaPlayerProviderContext);
return (
<AlertDialog open={!decoded || !Boolean(transcription?.result)}>
<AlertDialogOverlay className="z-[100]" />
<AlertDialogContent className="z-[100]">
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
<AlertDialogDescription>
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
</AlertDialogDescription>
</AlertDialogHeader>
<div className="py-4">
{decoded ? (
<div className="mb-4 flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("waveformIsDecoded")}</span>
</div>
) : (
<div className="mb-4 flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("decodingWaveform")}</span>
</div>
)}
{!transcription ? (
<div className="flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (
<div className="flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("transcribedSuccessfully")}</span>
</div>
) : transcribing ? (
<div className="">
<div className="flex items-center space-x-4 mb-2">
<PingPoint colorClassName="bg-yellow-500" />
<span>{t("transcribing")}</span>
</div>
{whisperConfig.service === "local" && (
<Progress value={transcribingProgress} />
)}
</div>
) : (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<div className="inline">
<span>{t("notTranscribedYet")}</span>
{decoded && (
<Button
onClick={generateTranscription}
className="ml-4"
size="sm"
>
{t("transcribe")}
</Button>
)}
</div>
</div>
)}
</div>
<AlertDialogFooter>
<Button variant="secondary" onClick={() => navigate(-1)}>
{t("cancel")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
);
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,5 @@
import { useEffect, useState, useCallback, useRef, useContext } from "react";
import {
extractFrequencies,
PitchContour,
MediaPlayerControls,
MediaCaption,
} from "@renderer/components";
import Regions, {
Region,
type Region as RegionType,
} from "wavesurfer.js/dist/plugins/regions";
import { secondsToTimestamp } from "@renderer/lib/utils";
import WaveSurfer from "wavesurfer.js";
import { useDebounce } from "@uidotdev/usehooks";
import { AppSettingsProviderContext } from "@renderer/context";
import cloneDeep from "lodash/cloneDeep";
import { useContext } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import {
MediaPlayer as VidstackMediaPlayer,
MediaProvider,
@@ -23,626 +9,32 @@ import {
} from "@vidstack/react";
import {
DefaultAudioLayout,
DefaultVideoLayout,
defaultLayoutIcons,
} from "@vidstack/react/player/layouts/default";
import { useHotkeys } from "react-hotkeys-hook";
const minPxPerSecBase = 150;
export const MediaPlayer = (props: {
mediaId: string;
mediaType: "Audio" | "Video";
mediaUrl: string;
mediaMd5?: string;
transcription: TranscriptionType;
// player controls
currentTime: number;
setCurrentTime: (time: number) => void;
currentSegmentIndex: number;
setCurrentSegmentIndex: (index: number) => void;
initialized: boolean;
setInitialized: (value: boolean) => void;
recordButtonVisible?: boolean;
setRecordButtonVisible?: (value: boolean) => void;
seek?: {
seekTo: number;
timestamp: number;
};
height?: number;
zoomRatio: number;
setZoomRatio: (value: number) => void;
isPlaying: boolean;
setIsPlaying: (value: boolean) => void;
playMode?: "loop" | "single" | "all";
setPlayMode?: (value: "loop" | "single" | "all") => void;
playBackRate: number;
setPlaybackRate: (value: number) => void;
displayInlineCaption?: boolean;
setDisplayInlineCaption?: (value: boolean) => void;
onShare?: () => void;
onDecoded?: (data: { duration: number; sampleRate: number }) => void;
}) => {
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const {
mediaId,
mediaType,
mediaUrl,
mediaMd5,
transcription,
height = 200,
currentTime,
setCurrentTime,
currentSegmentIndex,
setCurrentSegmentIndex,
initialized,
setInitialized,
recordButtonVisible,
setRecordButtonVisible,
seek,
zoomRatio,
setZoomRatio,
isPlaying,
setIsPlaying,
playMode,
setPlayMode,
playBackRate,
setPlaybackRate,
displayInlineCaption,
setDisplayInlineCaption,
onShare,
onDecoded,
} = props;
if (!mediaUrl) return;
const [wavesurfer, setWavesurfer] = useState(null);
const [waveform, setWaveForm] = useState<WaveFormDataType>(null);
const containerRef = useRef<HTMLDivElement>();
const [mediaProvider, setMediaProvider] = useState<
HTMLAudioElement | HTMLVideoElement
>(null);
export const MediaPlayer = () => {
const { media, setMediaProvider } = useContext(MediaPlayerProviderContext);
const mediaRemote = useMediaRemote();
const [transcriptionResult, setTranscriptionResult] = useState<
TranscriptionResultSegmentGroupType[] | null
>(null);
const [transcriptionDirty, setTranscriptionDirty] = useState<boolean>(false);
const [regions, setRegions] = useState<Regions | null>(null);
const debouncedTRanscription = useDebounce(transcriptionResult, 500);
const resetTranscription = () => {
if (!transcriptionDirty) return;
if (!transcription?.result) return;
setTranscriptionResult(cloneDeep(transcription.result));
setTranscriptionDirty(false);
};
const saveTranscription = () => {
if (!transcriptionDirty) return;
if (!debouncedTRanscription) return;
EnjoyApp.transcriptions.update(transcription.id, {
result: debouncedTRanscription,
});
};
const onPlayClick = useCallback(() => {
wavesurfer.isPlaying() ? wavesurfer.pause() : wavesurfer.play();
}, [wavesurfer]);
const handlePlaybackRateChange = useCallback(
(rate: number) => {
wavesurfer.setPlaybackRate(rate);
setPlaybackRate(wavesurfer.getPlaybackRate());
},
[initialized]
);
const findCurrentSegment = (time: number) => {
if (!transcription) return;
if (isPlaying && playMode === "loop") return;
time = Math.round(time * 1000);
const index = transcriptionResult.findIndex(
(t) => time >= t.offsets.from && time < t.offsets.to
);
if (index === -1) return;
setCurrentSegmentIndex(index);
};
const addSegmentRegion = (from: number, to: number) => {
if (!initialized) return;
const span = document.createElement("span");
span.innerText = secondsToTimestamp(from) + ` (${(to - from).toFixed(2)}s)`;
span.style.padding = "1rem";
span.style.fontSize = "0.9rem";
if (regions) {
regions.clearRegions();
const region = regions.addRegion({
start: from,
end: to,
color: "rgba(255, 0, 0, 0.03)",
drag: false,
resize: true,
content: span,
});
renderPitchContour(region);
}
};
const renderPitchContour = (region: RegionType) => {
if (!region) return;
if (!waveform?.frequencies?.length) return;
if (!wavesurfer) return;
const duration = wavesurfer.getDuration();
const fromIndex = Math.round(
(region.start / duration) * waveform.frequencies.length
);
const toIndex = Math.round(
(region.end / duration) * waveform.frequencies.length
);
const containerId = `pitch-contour-${mediaId}-${currentSegmentIndex}`;
const wrapper = wavesurfer.renderer.getWrapper();
const wrapperWidth = wrapper.getBoundingClientRect().width;
const canvas = PitchContour({
frequencies: waveform.frequencies.slice(fromIndex, toIndex),
height,
});
const offsetLeft = (region.start / duration) * wrapperWidth;
const width = ((region.end - region.start) / duration) * wrapperWidth;
const pitchContourWidthContainer = document.createElement("div");
pitchContourWidthContainer.appendChild(canvas);
pitchContourWidthContainer.style.position = "absolute";
pitchContourWidthContainer.style.top = "0";
pitchContourWidthContainer.style.left = "0";
canvas.style.width = `${width}px`;
pitchContourWidthContainer.style.height = `${height}px`;
pitchContourWidthContainer.style.marginLeft = `${offsetLeft}px`;
pitchContourWidthContainer.className = "pitch-contour";
pitchContourWidthContainer.id = containerId;
const regionDuration = region.end - region.start;
if (displayInlineCaption) {
const captionContainer = document.createElement("div");
captionContainer.style.position = "absolute";
captionContainer.style.bottom = "0";
captionContainer.style.width = `${width}px`;
captionContainer.style.fontSize = "0.75rem";
captionContainer.style.opacity = "0.75";
transcriptionResult?.[currentSegmentIndex]?.segments?.forEach(
(segment, index) => {
const span = document.createElement("span");
span.innerText = segment.text;
span.style.position = "absolute";
span.style.bottom = "0";
span.style.left = `${
((segment.offsets.from / 1000 - region.start) / regionDuration) *
width
}px`;
if (index % 2 === 1) {
span.style.paddingBottom = "0.75rem";
}
captionContainer.appendChild(span);
}
);
pitchContourWidthContainer.appendChild(captionContainer);
}
wrapper.querySelector("#" + containerId)?.remove();
wrapper.appendChild(pitchContourWidthContainer);
};
const reRenderPitchContour = () => {
if (!wavesurfer) return;
const wrapper = wavesurfer.renderer.getWrapper();
wrapper
.querySelectorAll(".pitch-contour")
.forEach((canvas: HTMLCanvasElement) => {
canvas.remove();
});
if (!regions) return;
const region = regions.getRegions()[0];
if (!region) return;
renderPitchContour(region);
};
useEffect(() => {
if (!transcription) return;
setTranscriptionDirty(false);
setTranscriptionResult(cloneDeep(transcription.result));
}, [transcription]);
// Initialize wavesurfer
const initializeWavesurfer = async () => {
if (!mediaProvider) return;
if (!containerRef.current) return;
const ws = WaveSurfer.create({
container: containerRef.current,
height,
waveColor: "#ddd",
progressColor: "rgba(0, 0, 0, 0.25)",
cursorColor: "#dc143c",
barWidth: 1,
autoScroll: true,
minPxPerSec: 150,
autoCenter: false,
dragToSeek: false,
media: mediaProvider,
peaks: waveform ? [waveform.peaks] : undefined,
duration: waveform ? waveform.duration : undefined,
});
const blob = await fetch(mediaUrl).then((res) => res.blob());
if (waveform) {
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
setInitialized(true);
} else {
ws.loadBlob(blob);
}
setRegions(ws.registerPlugin(Regions.create()));
setWavesurfer(ws);
};
useEffect(() => {
initializeWavesurfer();
return () => {
wavesurfer?.destroy();
};
}, [mediaUrl, height, mediaProvider]);
// Install listeners for wavesurfer
useEffect(() => {
if (!wavesurfer) return;
setCurrentTime(0);
setIsPlaying(false);
const subscriptions = [
wavesurfer.on("play", () => setIsPlaying(true)),
wavesurfer.on("pause", () => setIsPlaying(false)),
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
wavesurfer.on("decode", () => {
if (waveform?.frequencies) return;
const peaks: Float32Array = wavesurfer
.getDecodedData()
.getChannelData(0);
const duration: number = wavesurfer.getDuration();
const sampleRate = wavesurfer.options.sampleRate;
const _frequencies = extractFrequencies({ peaks, sampleRate });
const _waveform = {
peaks: Array.from(peaks),
duration,
sampleRate,
frequencies: _frequencies,
};
EnjoyApp.waveforms.save(mediaMd5, _waveform);
setWaveForm(_waveform);
onDecoded &&
onDecoded({
duration,
sampleRate,
});
}),
wavesurfer.on("ready", () => {
setInitialized(true);
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
}, [wavesurfer]);
useEffect(() => {
if (!transcriptionResult) return;
if (transcriptionDirty) return;
const currentSegment = transcriptionResult[currentSegmentIndex];
if (!currentSegment) return;
addSegmentRegion(
currentSegment.offsets.from / 1000.0,
currentSegment.offsets.to / 1000.0
);
// set zoom ratio to fit the current segment
if (!isPlaying) {
setZoomRatio(calcFitZoomRatio());
}
}, [
currentSegmentIndex,
initialized,
transcriptionDirty,
transcriptionResult,
]);
useEffect(() => {
if (!transcriptionResult) return;
findCurrentSegment(currentTime);
}, [currentTime, transcriptionResult]);
useEffect(() => {
if (!regions) return;
const subscriptions = [
wavesurfer.on("finish", () => {
if (playMode !== "loop") return;
regions?.getRegions()[0]?.play();
}),
regions.on("region-updated", (region) => {
const from = region.start;
const to = region.end;
const offsets = {
from: Math.round(from * 1000),
to: Math.round(to * 1000),
};
const timestamps = {
from: [
secondsToTimestamp(from),
Math.round((from * 1000) % 1000),
].join(","),
to: [secondsToTimestamp(to), Math.round((to * 1000) % 1000)].join(
","
),
};
const _transcription = cloneDeep(transcriptionResult);
_transcription[currentSegmentIndex].offsets = offsets;
_transcription[currentSegmentIndex].timestamps = timestamps;
// ensure that the previous segment ends before the current segment
if (
currentSegmentIndex > 0 &&
_transcription[currentSegmentIndex - 1].offsets.to > offsets.from
) {
_transcription[currentSegmentIndex - 1].offsets.to = offsets.from;
}
// ensure that the next segment starts after the current segment
if (
currentSegmentIndex < _transcription.length - 1 &&
_transcription[currentSegmentIndex + 1].offsets.from < offsets.to
) {
_transcription[currentSegmentIndex + 1].offsets.from = offsets.to;
}
setTranscriptionResult(_transcription);
setTranscriptionDirty(true);
renderPitchContour(region);
}),
regions.on("region-out", (region: Region) => {
if (isPlaying && playMode === "loop") {
region.play();
} else if (isPlaying && playMode === "single") {
wavesurfer.pause();
wavesurfer.seekTo(region.start / wavesurfer.getDuration());
} else {
resetTranscription();
}
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
}, [regions, isPlaying, playMode, currentSegmentIndex, transcriptionDirty]);
useEffect(() => {
if (!wavesurfer) return;
if (!initialized) return;
wavesurfer.zoom(zoomRatio * minPxPerSecBase);
reRenderPitchContour();
}, [zoomRatio, wavesurfer, initialized, displayInlineCaption]);
useEffect(() => {
if (typeof seek?.seekTo !== "number") return;
if (!wavesurfer) return;
if (!initialized) return;
wavesurfer.seekTo(seek?.seekTo / wavesurfer.getDuration());
wavesurfer.setScrollTime(seek?.seekTo);
}, [seek, wavesurfer, initialized]);
// Handle media provider
useEffect(() => {
if (!mediaRemote) return;
if (!mediaProvider) return;
if (mediaType !== "Video") return;
if (recordButtonVisible) {
mediaRemote.togglePictureInPicture();
} else {
mediaRemote.exitPictureInPicture();
}
}, [mediaRemote, mediaProvider, recordButtonVisible]);
useEffect(() => {
if (!wavesurfer) return;
if (isPlaying) {
wavesurfer.play();
} else {
wavesurfer.pause();
}
}, [wavesurfer, isPlaying]);
useEffect(() => {
EnjoyApp.waveforms.find(mediaMd5).then((waveform) => {
setWaveForm(waveform);
onDecoded &&
onDecoded({
duration: waveform.duration,
sampleRate: waveform.sampleRate,
});
});
}, []);
const calcFitZoomRatio = () => {
if (!containerRef.current) return;
if (!wavesurfer) return;
const currentSegment = transcriptionResult?.[currentSegmentIndex];
if (!currentSegment) return;
const containerWidth = containerRef.current.getBoundingClientRect().width;
const duration =
currentSegment.offsets.to / 1000.0 - currentSegment.offsets.from / 1000.0;
const fitZoomRatio = containerWidth / duration / minPxPerSecBase;
return fitZoomRatio;
};
useHotkeys(
"Space",
(keyboardEvent, _hotkeyEvent) => {
if (!wavesurfer) return;
keyboardEvent.preventDefault();
onPlayClick();
},
[wavesurfer]
);
if (!media?.src) return null;
return (
<>
<div
className="mb-2"
ref={containerRef}
data-testid="media-player-container"
/>
<div className="mb-2 flex justify-center">
<MediaPlayerControls
isPlaying={isPlaying}
onPlayOrPause={onPlayClick}
onNext={() => {
if (!transcription) return;
const segment = transcription?.result?.[currentSegmentIndex + 1];
if (!segment) return;
wavesurfer.seekTo(
segment.offsets.from / 1000 / wavesurfer.getDuration()
);
}}
onPrev={() => {
if (!transcription) return;
const segment = transcription?.result?.[currentSegmentIndex - 1];
if (!segment) return;
wavesurfer.seekTo(
segment.offsets.from / 1000 / wavesurfer.getDuration()
);
}}
playMode={playMode}
setPlayMode={setPlayMode}
playbackRate={playBackRate}
setPlaybackRate={handlePlaybackRateChange}
zoomRatio={zoomRatio}
setZoomRatio={setZoomRatio}
fitZoomRatio={calcFitZoomRatio()}
recordButtonVisible={recordButtonVisible}
setRecordButtonVisible={setRecordButtonVisible}
transcriptionDirty={transcriptionDirty}
resetTranscription={resetTranscription}
saveTranscription={saveTranscription}
wavesurferOptions={wavesurfer?.options}
setWavesurferOptions={(options) => wavesurfer?.setOptions(options)}
displayInlineCaption={displayInlineCaption}
setDisplayInlineCaption={setDisplayInlineCaption}
onShare={onShare}
/>
</div>
{initialized && (
<div className={recordButtonVisible && mediaProvider ? "" : "hidden"}>
<MediaCaption
key={`${mediaId}-${currentSegmentIndex}`}
mediaId={mediaId}
mediaType={mediaType}
currentTime={currentTime}
transcription={transcriptionResult?.[currentSegmentIndex]}
onSeek={(time) => {
wavesurfer.seekTo(time / wavesurfer.getDuration());
}}
isPlaying={isPlaying}
setIsPlaying={setIsPlaying}
/>
</div>
)}
<div
className={recordButtonVisible && mediaProvider ? "hidden" : "flex-1"}
<div className="px-4" data-testid="media-player">
<VidstackMediaPlayer
controls
src={media.src}
onCanPlayThrough={(detail, nativeEvent) => {
mediaRemote.setTarget(nativeEvent.target);
const { provider } = detail;
if (isAudioProvider(provider)) {
setMediaProvider(provider.audio);
} else if (isVideoProvider(provider)) {
setMediaProvider(provider.video);
}
}}
>
<VidstackMediaPlayer
src={mediaUrl}
onCanPlayThrough={(detail, nativeEvent) => {
mediaRemote.setTarget(nativeEvent.target);
const { provider } = detail;
if (isAudioProvider(provider)) {
setMediaProvider(provider.audio);
} else if (isVideoProvider(provider)) {
setMediaProvider(provider.video);
}
}}
>
<MediaProvider />
{mediaType === "Audio" && (
<DefaultAudioLayout icons={defaultLayoutIcons} />
)}
{mediaType === "Video" && (
<>
<DefaultVideoLayout icons={defaultLayoutIcons} />
<div className="vds-captions">
<div className="absolute mx-auto bottom-[15%] flex items-center justify-center w-full">
<div className="flex">
<MediaCaption
mediaId={mediaId}
mediaType={mediaType}
className="mx-auto w-5/6 text-center bg-primary/70 text-xl text-white"
transcription={transcriptionResult?.[currentSegmentIndex]}
currentTime={currentTime}
isPlaying={isPlaying}
setIsPlaying={setIsPlaying}
/>
</div>
</div>
</div>
</>
)}
</VidstackMediaPlayer>
</div>
</>
<MediaProvider />
<DefaultAudioLayout icons={defaultLayoutIcons} />
</VidstackMediaPlayer>
</div>
);
};

View File

@@ -0,0 +1,145 @@
import { useEffect, useState, useContext, useRef } from "react";
import {
MediaPlayerProviderContext,
AppSettingsProviderContext,
} from "@renderer/context";
import RecordPlugin from "wavesurfer.js/dist/plugins/record";
import WaveSurfer from "wavesurfer.js";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { toast } from "@renderer/components/ui";
import {
FFMPEG_TRIM_SILENCE_OPTIONS,
FFMPEG_CONVERT_WAV_OPTIONS,
} from "@/constants";
export const MediaRecorder = (props: { height?: number }) => {
const { height = 192 } = props;
const {
media,
isRecording,
setIsRecording,
transcription,
currentSegmentIndex,
} = useContext(MediaPlayerProviderContext);
const [access, setAccess] = useState<boolean>(false);
const [duration, setDuration] = useState<number>(0);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { transcode } = useTranscribe();
const ref = useRef(null);
const askForMediaAccess = () => {
EnjoyApp.system.preferences.mediaAccess("microphone").then((access) => {
if (access) {
setAccess(true);
} else {
setAccess(false);
toast.warning(t("noMicrophoneAccess"));
}
});
};
const createRecording = async (params: { blob: Blob; duration: number }) => {
if (!media) return;
const { blob, duration } = params;
toast.promise(
async () => {
let output: Blob;
output = await transcode(blob, [
// ...FFMPEG_TRIM_SILENCE_OPTIONS,
...FFMPEG_CONVERT_WAV_OPTIONS,
]);
const currentSegment =
transcription?.result?.timeline?.[currentSegmentIndex];
if (!currentSegment) return;
return EnjoyApp.recordings.create({
targetId: media.id,
targetType: media.mediaType,
blob: {
type: output.type.split(";")[0],
arrayBuffer: await output.arrayBuffer(),
},
referenceId: currentSegmentIndex,
referenceText: currentSegment.text,
duration,
});
},
{
loading: t("savingRecording"),
success: t("recordingSaved"),
error: (e) => t("failedToSaveRecording" + " : " + e.message),
position: "bottom-right",
},
);
};
useEffect(() => {
if (!access) return;
if (!isRecording) return;
if (!ref.current) return;
const ws = WaveSurfer.create({
container: ref.current,
fillParent: true,
height,
autoCenter: false,
normalize: false,
});
const record = ws.registerPlugin(RecordPlugin.create());
let startAt = 0;
record.on("record-start", () => {
startAt = Date.now();
});
record.on("record-end", async (blob: Blob) => {
createRecording({ blob, duration: Date.now() - startAt });
});
let interval: NodeJS.Timeout;
RecordPlugin.getAvailableAudioDevices()
.then((devices) => devices.find((d) => d.kind === "audioinput"))
.then((device) => {
if (device) {
record.startRecording({ deviceId: device.deviceId });
setDuration(0);
interval = setInterval(() => {
setDuration((duration) => {
if (duration >= 300) {
setIsRecording(false);
}
return duration + 1;
});
}, 100);
} else {
toast.error(t("cannotFindMicrophone"));
}
});
return () => {
clearInterval(interval);
record.stopRecording();
ws.destroy();
};
}, [ref, isRecording, access]);
useEffect(() => {
askForMediaAccess();
}, []);
return (
<div className="border rounded-xl shadow-lg relative">
<span className="absolute bottom-2 right-2 serif">
{duration / 10}
<span className="text-xs"> / 300</span>
</span>
<div className="h-full" ref={ref}></div>
</div>
);
};

View File

@@ -0,0 +1,149 @@
import { useContext, useRef, useEffect, useState } from "react";
import {
AlertDialog,
AlertDialogHeader,
AlertDialogDescription,
AlertDialogTitle,
AlertDialogContent,
AlertDialogFooter,
AlertDialogCancel,
AlertDialogAction,
Button,
DropdownMenu,
DropdownMenuItem,
DropdownMenuTrigger,
DropdownMenuContent,
ScrollArea,
} from "@renderer/components/ui";
import {
AppSettingsProviderContext,
MediaPlayerProviderContext,
} from "@renderer/context";
import {
LoaderIcon,
MicIcon,
MoreHorizontalIcon,
Trash2Icon,
} from "lucide-react";
import { t } from "i18next";
import { formatDateTime, formatDuration } from "@renderer/lib/utils";
export const MediaRecordings = () => {
const containerRef = useRef<HTMLDivElement>();
const {
recordings = [],
hasMoreRecordings,
loadingRecordings,
fetchRecordings,
currentRecording,
setCurrentRecording,
currentSegmentIndex,
} = useContext(MediaPlayerProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [selectedRecording, setSelectedRecording] = useState(null);
const handleDelete = () => {
if (!selectedRecording) return;
EnjoyApp.recordings.destroy(selectedRecording.id);
};
useEffect(() => {
setCurrentRecording(recordings[0]);
}, [currentSegmentIndex, recordings]);
return (
<div ref={containerRef} data-testid="media-recordings-result">
{recordings.length == 0 && (
<div
className="text-center px-6 py-8 text-sm text-muted-foreground"
dangerouslySetInnerHTML={{
__html: t("noRecordingForThisSegmentYet"),
}}
></div>
)}
{recordings.map((recording) => (
<div
key={recording.id}
className={`flex items-center justify-between px-4 py-2 cursor-pointer ${
recording.id === currentRecording?.id ? "bg-muted" : ""
}`}
style={{
borderLeftColor: `#${recording.md5.substr(0, 6)}`,
borderLeftWidth: 3,
}}
onClick={() => {
setCurrentRecording(recording);
}}
>
<div className="flex items-center space-x-2">
<MicIcon className="w-4 h-4" />
<span>{formatDuration(recording.duration, "ms")}</span>
</div>
<div className="flex items-center space-x-2">
<span className="text-sm text-muted-foreground">
{formatDateTime(recording.createdAt)}
</span>
<DropdownMenu>
<DropdownMenuTrigger>
<MoreHorizontalIcon className="w-4 h-4" />
</DropdownMenuTrigger>
<DropdownMenuContent>
<DropdownMenuItem
className="text-destructive cursor-pointer"
onClick={() => setSelectedRecording(recording)}
>
<Trash2Icon className="w-4 h-4 mr-2" />
<span>{t("delete")}</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
</div>
))}
{hasMoreRecordings && (
<div className="py-2 flex items-center justify-center">
<Button
variant="outline"
size="sm"
disabled={loadingRecordings}
onClick={() => fetchRecordings(recordings.length)}
>
{loadingRecordings && (
<LoaderIcon className="w-4 h-4 animate-spin mr-2" />
)}
{t("loadMore")}
</Button>
</div>
)}
<AlertDialog
open={selectedRecording}
onOpenChange={(value) => {
if (value) return;
setSelectedRecording(null);
}}
>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("deleteRecording")}</AlertDialogTitle>
<AlertDialogDescription>
{t("deleteRecordingConfirmation")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction asChild>
<Button onClick={handleDelete}>{t("delete")}</Button>
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
);
};

View File

@@ -0,0 +1,78 @@
import { useEffect, useContext, useState } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import {
MediaPlayer,
MediaTranscription,
MediaInfoPanel,
MediaRecordings,
} from "@renderer/components";
import { ScrollArea } from "@renderer/components/ui";
import { t } from "i18next";
export const MediaTabs = () => {
const { media, decoded } = useContext(MediaPlayerProviderContext);
const [tab, setTab] = useState("player");
useEffect(() => {
if (!decoded) return;
setTab("transcription");
}, [decoded]);
if (!media) return null;
return (
<ScrollArea className="h-full">
<div className="flex items-center space-x-2 justify-between p-1 bg-muted rounded-t-lg mb-2 text-sm sticky top-0 z-10">
{media.mediaType === "Video" && (
<div
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
tab === "player" ? "bg-background" : ""
}`}
onClick={() => setTab("player")}
>
{t("player")}
</div>
)}
<div
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
tab === "transcription" ? "bg-background" : ""
}`}
onClick={() => setTab("transcription")}
>
{t("transcription")}
</div>
<div
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
tab === "recordings" ? "bg-background" : ""
}`}
onClick={() => setTab("recordings")}
>
{t("myRecordings")}
</div>
<div
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
tab === "info" ? "bg-background" : ""
}`}
onClick={() => setTab("info")}
>
{t("mediaInfo")}
</div>
</div>
<div className={tab === "player" ? "" : "hidden"}>
<MediaPlayer />
</div>
<div className={tab === "recordings" ? "" : "hidden"}>
<MediaRecordings />
</div>
<div className={tab === "transcription" ? "" : "hidden"}>
<MediaTranscription />
</div>
<div className={tab === "info" ? "" : "hidden"}>
<MediaInfoPanel />
</div>
</ScrollArea>
);
};

View File

@@ -1,4 +1,12 @@
import { useEffect, useContext, useRef, useState } from "react";
import {
AppSettingsProviderContext,
DbProviderContext,
MediaPlayerProviderContext,
} from "@renderer/context";
import { t } from "i18next";
import {
Button,
AlertDialog,
AlertDialogTrigger,
AlertDialogFooter,
@@ -8,182 +16,150 @@ import {
AlertDialogDescription,
AlertDialogCancel,
AlertDialogAction,
Skeleton,
ScrollArea,
Button,
PingPoint,
} from "@renderer/components/ui";
import React, { useEffect, useContext, useState } from "react";
import { t } from "i18next";
import { LoaderIcon, CheckCircleIcon, MicIcon } from "lucide-react";
import {
DbProviderContext,
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
import { formatDuration } from "@renderer/lib/utils";
export const MediaTranscription = (props: {
transcription: TranscriptionType;
progress: number;
transcribe: () => void;
transcribing: boolean;
mediaId: string;
mediaType: "Audio" | "Video";
mediaName?: string;
currentSegmentIndex?: number;
onSelectSegment?: (index: number) => void;
}) => {
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
export const MediaTranscription = () => {
const containerRef = useRef<HTMLDivElement>();
const {
transcription,
transcribing,
progress,
transcribe,
mediaId,
mediaType,
mediaName,
media,
currentSegmentIndex,
onSelectSegment,
} = props;
const containerRef = React.createRef<HTMLDivElement>();
wavesurfer,
setCurrentSegmentIndex,
transcription,
generateTranscription,
transcribing,
transcribingProgress,
} = useContext(MediaPlayerProviderContext);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [recordingStats, setRecordingStats] =
useState<SegementRecordingStatsType>([]);
const fetchSegmentStats = async () => {
if (!mediaId) return;
if (!media) return;
EnjoyApp.recordings.groupBySegment(mediaId, mediaType).then((stats) => {
setRecordingStats(stats);
});
EnjoyApp.recordings
.groupBySegment(media.id, media.mediaType)
.then((stats) => {
setRecordingStats(stats);
});
};
useEffect(() => {
if (!transcription?.result) return;
addDblistener(fetchSegmentStats);
fetchSegmentStats();
return () => {
removeDbListener(fetchSegmentStats);
};
}, [transcription]);
}, [transcription?.result]);
useEffect(() => {
if (!containerRef?.current) return;
containerRef.current
?.querySelector(`#segment-${currentSegmentIndex}`)
?.scrollIntoView({
block: "center",
inline: "center",
} as ScrollIntoViewOptions);
}, [currentSegmentIndex, transcription]);
}, [currentSegmentIndex, transcription, containerRef]);
if (!transcription)
return (
<div className="p-4 w-full">
<TranscriptionPlaceholder />
</div>
);
if (!transcription?.result) {
return null;
}
return (
<div
className="w-full h-full flex flex-col"
data-testid="media-transcription"
>
<div className="mb-4 flex items-cener justify-between">
<div className="flex items-center space-x-2">
{transcribing || transcription.state === "processing" ? (
<>
<PingPoint colorClassName="bg-yellow-500" />
<div className="text-sm">
{whisperConfig.service === "local" && `${progress}%`}
</div>
</>
) : transcription.state === "finished" ? (
<CheckCircleIcon className="text-green-500 w-4 h-4" />
) : (
<PingPoint colorClassName="bg-mute" />
)}
<span className="capitalize">{t("transcript")}</span>
<div ref={containerRef} data-testid="media-transcription-result">
<div className="px-4 py-1 bg-background">
<div className="flex items-cener justify-between">
<div className="flex items-center space-x-2">
{transcribing || transcription.state === "processing" ? (
<>
<PingPoint colorClassName="bg-yellow-500" />
<div className="text-sm">
{transcribingProgress > 0 && `${transcribingProgress}%`}
</div>
</>
) : transcription.state === "finished" ? (
<CheckCircleIcon className="text-green-500 w-4 h-4" />
) : (
<PingPoint colorClassName="bg-mute" />
)}
<span className="capitalize">{t("transcript")}</span>
</div>
<AlertDialog>
<AlertDialogTrigger asChild>
<Button
variant="outline"
size="sm"
disabled={transcribing || transcription.state === "processing"}
className="capitalize"
>
{(transcribing || transcription.state === "processing") && (
<LoaderIcon className="animate-spin w-4 mr-2" />
)}
{transcription.result ? t("regenerate") : t("transcribe")}
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
<AlertDialogDescription>
{t("transcribeMediaConfirmation", {
name: media.name,
})}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction onClick={generateTranscription}>
{t("transcribe")}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
<AlertDialog>
<AlertDialogTrigger asChild>
<Button
disabled={transcribing || transcription.state === "processing"}
className="capitalize"
>
{(transcribing || transcription.state === "processing") && (
<LoaderIcon className="animate-spin w-4 mr-2" />
)}
{transcription.result ? t("regenerate") : t("transcribe")}
</Button>
</AlertDialogTrigger>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
<AlertDialogDescription>
{t("transcribeAudioConfirmation", {
name: mediaName,
})}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction onClick={transcribe}>
{t("transcribe")}
</AlertDialogAction>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
{transcription?.result ? (
<ScrollArea
ref={containerRef}
className="flex-1 px-2"
data-testid="media-transcription-result"
>
{transcription.result.map((t, index) => (
<div
key={index}
id={`segment-${index}`}
className={`py-1 px-2 mb-2 cursor-pointer hover:bg-yellow-400/25 ${
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
}`}
onClick={() => {
onSelectSegment?.(index);
}}
>
<div className="flex items-center justify-between">
<span className="text-xs opacity-50">#{index + 1}</span>
<div className="flex items-center space-x-2">
{(recordingStats || []).findIndex(
(s) => s.referenceId === index
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
<span className="text-xs opacity-50">
{t.timestamps.from.split(",")[0]}
</span>
</div>
{(transcription.result as AlignmentResult).timeline.map(
(sentence, index) => (
<div
key={index}
id={`segment-${index}`}
className={`py-2 px-4 cursor-pointer hover:bg-yellow-400/10 ${
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
}`}
onClick={() => {
wavesurfer.seekTo(
Math.floor((sentence.startTime / media.duration) * 1e8) / 1e8
);
wavesurfer.setScrollTime(sentence.startTime);
setCurrentSegmentIndex(index);
}}
>
<div className="flex items-center justify-between">
<span className="text-xs opacity-50">#{index + 1}</span>
<div className="flex items-center space-x-2">
{(recordingStats || []).findIndex(
(s) => s.referenceId === index
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
<span className="text-xs opacity-50">
{formatDuration(sentence.startTime, "s")}
</span>
</div>
<p className="">{t.text}</p>
</div>
))}
</ScrollArea>
) : (
<TranscriptionPlaceholder />
<p className="">{sentence.text}</p>
</div>
)
)}
</div>
);
};
export const TranscriptionPlaceholder = () => {
return (
<div className="p-4">
{Array.from({ length: 5 }).map((_, i) => (
<Skeleton key={i} className="h-4 w-full mb-4" />
))}
<Skeleton className="h-4 w-3/5" />
</div>
);
};

View File

@@ -14,7 +14,7 @@ import {
} from "@renderer/components/ui";
import {
SpeechPlayer,
AudioDetail,
AudioPlayer,
ConversationShortcuts,
} from "@renderer/components";
import { useState, useEffect, useContext } from "react";
@@ -242,16 +242,16 @@ export const AssistantMessageComponent = (props: {
<Sheet open={shadowing} onOpenChange={(value) => setShadowing(value)}>
<SheetContent
side="bottom"
className="rounded-t-2xl shadow-lg"
className="h-100vh p-0"
displayClose={false}
>
<SheetHeader className="flex items-center justify-center -mt-4 mb-2">
<SheetHeader className="flex items-center justify-center h-14">
<SheetClose>
<ChevronDownIcon />
</SheetClose>
</SheetHeader>
{Boolean(speech) && <AudioDetail md5={speech.md5} />}
{Boolean(speech) && <AudioPlayer md5={speech.md5} />}
</SheetContent>
</Sheet>
</div>

View File

@@ -1,79 +0,0 @@
import Pitchfinder from "pitchfinder";
export const extractFrequencies = (props: {
peaks: Float32Array;
sampleRate: number;
}): number[] => {
const { peaks, sampleRate } = props;
const detectPitch = Pitchfinder.AMDF({ sampleRate });
const duration = peaks.length / sampleRate;
const bpm = peaks.length / duration / 60;
const frequencies = Pitchfinder.frequencies(detectPitch, peaks, {
tempo: bpm,
quantization: bpm,
});
return frequencies;
};
export const PitchContour = (props: {
peaks?: Float32Array;
sampleRate?: number;
frequencies?: number[];
height: number;
id?: string;
}) => {
const { peaks, sampleRate, height, id } = props;
let { frequencies } = props;
if (!frequencies) {
frequencies = extractFrequencies({ peaks, sampleRate });
}
// Find the baseline frequency (the value that appears most often)
const frequencyMap: any = {};
let maxAmount = 0;
let baseFrequency = 0;
frequencies.forEach((frequency) => {
if (!frequency) return;
const tolerance = 10;
frequency = Math.round(frequency * tolerance) / tolerance;
if (!frequencyMap[frequency]) frequencyMap[frequency] = 0;
frequencyMap[frequency] += 1;
if (frequencyMap[frequency] > maxAmount) {
maxAmount = frequencyMap[frequency];
baseFrequency = frequency;
}
});
const pitchUpColor = "#385587";
// const pitchDownColor = "#C26351";
const pitchDownColor = "#385587";
const canvas = document.createElement("canvas");
const ctx = canvas.getContext("2d");
canvas.width = frequencies.length;
canvas.height = height;
canvas.style.width = "100%";
canvas.style.height = "100%";
// Each frequency is a point whose Y position is the frequency and X position is the time
let prevY = 0;
frequencies.forEach((frequency, index) => {
if (!frequency) return;
const hratio = 0.5; // the bigger the narrower the pitch contour drawn on canvas.
const marginTop = height * 0.4; // the bigger the lower the pitch contour positioned.
const y =
Math.round(height - (frequency / (baseFrequency * 2)) * height) * hratio +
marginTop;
ctx.fillStyle = y > prevY ? pitchDownColor : pitchUpColor;
ctx.fillRect(index, y, 1, 2);
prevY = y;
});
canvas.id = id;
return canvas;
};

View File

@@ -1,6 +1,7 @@
import { useEffect, useState, useRef, useCallback, useContext } from "react";
import { AppSettingsProviderContext } from "@renderer/context";
import { PitchContour } from "@renderer/components";
import { renderPitchContour } from "@renderer/lib/utils";
import { extractFrequencies } from "@/utils";
import WaveSurfer from "wavesurfer.js";
import { Button, Skeleton } from "@renderer/components/ui";
import { PlayIcon, PauseIcon } from "lucide-react";
@@ -12,6 +13,7 @@ import {
defaultLayoutIcons,
} from "@vidstack/react/player/layouts/default";
export const STORAGE_WORKER_ENDPOINT = "https://enjoy-storage.baizhiheizi.com";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
export const PostAudio = (props: {
audio: Partial<MediumType>;
@@ -22,11 +24,16 @@ export const PostAudio = (props: {
const { webApi } = useContext(AppSettingsProviderContext);
const [transcription, setTranscription] = useState<TranscriptionType>();
const currentTranscription = (transcription?.result || []).find(
(s) =>
currentTime >= s.offsets.from / 1000.0 &&
currentTime <= s.offsets.to / 1000.0
);
const currentTranscription = transcription?.result["transcript"]
? (transcription.result?.timeline || []).find(
(s: TimelineEntry) =>
currentTime >= s.startTime && currentTime <= s.endTime
)
: (transcription?.result || []).find(
(s: TranscriptionResultSegmentType) =>
currentTime >= s.offsets.from / 1000.0 &&
currentTime <= s.offsets.to / 1000.0
);
useEffect(() => {
webApi
@@ -134,17 +141,25 @@ const WavesurferPlayer = (props: {
wavesurfer.on("timeupdate", (time: number) => {
setCurrentTime(time);
}),
wavesurfer.on("decode", () => {
wavesurfer.on("ready", () => {
setDuration(wavesurfer.getDuration());
const peaks = wavesurfer.getDecodedData().getChannelData(0);
const sampleRate = wavesurfer.options.sampleRate;
wavesurfer.renderer.getWrapper().appendChild(
PitchContour({
peaks,
sampleRate,
height,
})
);
const data = extractFrequencies({ peaks, sampleRate });
setTimeout(() => {
renderPitchContour({
wrapper: wavesurfer.getWrapper(),
canvasId: `pitch-contour-${audio.id}-canvas`,
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
pointRadius: 1,
},
],
});
}, 1000);
setInitialized(true);
}),
];

View File

@@ -1,5 +1,6 @@
import { useEffect, useState, useRef, useCallback } from "react";
import { PitchContour } from "@renderer/components";
import { renderPitchContour } from "@renderer/lib/utils";
import { extractFrequencies } from "@/utils";
import WaveSurfer from "wavesurfer.js";
import { Button, Skeleton } from "@renderer/components/ui";
import { PlayIcon, PauseIcon } from "lucide-react";
@@ -59,17 +60,28 @@ export const PostRecording = (props: {
wavesurfer.on("pause", () => {
setIsPlaying(false);
}),
wavesurfer.on("decode", () => {
wavesurfer.on("ready", () => {
setDuration(wavesurfer.getDuration());
const peaks = wavesurfer.getDecodedData().getChannelData(0);
const sampleRate = wavesurfer.options.sampleRate;
wavesurfer.renderer.getWrapper().appendChild(
PitchContour({
peaks,
sampleRate,
height,
})
);
const data = extractFrequencies({ peaks, sampleRate });
setTimeout(() => {
renderPitchContour({
wrapper: wavesurfer.getWrapper(),
canvasId: `pitch-contour-${recording.id}-canvas`,
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
pointRadius: 1,
borderColor: "#fb6f92",
pointBorderColor: "#fb6f92",
pointBackgroundColor: "#ff8fab",
},
],
});
}, 1000);
setInitialized(true);
}),
];
@@ -119,15 +131,13 @@ export const PostRecording = (props: {
></div>
</div>
{
recording.referenceText && (
<div className="mt-2 bg-muted px-4 py-2 rounded">
<div className="text-muted-foreground text-center font-serif">
{recording.referenceText}
</div>
{recording.referenceText && (
<div className="mt-2 bg-muted px-4 py-2 rounded">
<div className="text-muted-foreground text-center font-serif">
{recording.referenceText}
</div>
)
}
</div>
)}
</div>
);
};

View File

@@ -8,29 +8,84 @@ export const Hotkeys = () => {
<>
<div className="font-semibold mb-4 capitilized">{t("hotkeys")}</div>
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">{t("quitApp")}</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
{commandOrCtrl} + Q
</kbd>
</div>
<Separator />
<div className="mb-6">
<div className="text-sm text-muted-foreground">{t("system")}</div>
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">{t("openPreferences")}</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
{commandOrCtrl} + ,
</kbd>
</div>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">{t("quitApp")}</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
{commandOrCtrl} + Q
</kbd>
</div>
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">{t("playOrPause")}</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
Space
</kbd>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">
{t("openPreferences")}
</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
{commandOrCtrl} + ,
</kbd>
</div>
<Separator />
</div>
<div className="mb-6">
<div className="text-sm text-muted-foreground">{t("player")}</div>
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">{t("playOrPause")}</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
Space
</kbd>
</div>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2 capitalize">
{t("startOrStopRecording")}
</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
r
</kbd>
</div>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2">
{t("playOrPauseRecording")}
</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
{commandOrCtrl} + r
</kbd>
</div>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2 capitalize">
{t("playPreviousSegment")}
</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
p
</kbd>
</div>
<Separator />
<div className="flex items-center justify-between py-4">
<div className="flex items-center space-x-2 capitalize">
{t("playNextSegment")}
</div>
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
n
</kbd>
</div>
<Separator />
</div>
<Separator />
</>
);
};

View File

@@ -117,7 +117,6 @@ export const OpenaiSettings = () => {
<Input
disabled={!editing}
placeholder={t("leaveEmptyToUseDefault")}
defaultValue=""
value={field.value}
onChange={field.onChange}
/>

View File

@@ -1,6 +1,7 @@
import { useEffect, useState, useRef, useCallback } from "react";
import WaveSurfer from "wavesurfer.js";
import { PitchContour } from "@renderer/components";
import { renderPitchContour } from "@renderer/lib/utils";
import { extractFrequencies } from "@/utils";
import { Button, Skeleton } from "@renderer/components/ui";
import { PlayIcon, PauseIcon } from "lucide-react";
import { useIntersectionObserver } from "@uidotdev/usehooks";
@@ -70,16 +71,23 @@ export const RecordingPlayer = (props: {
wavesurfer.on("timeupdate", (time: number) => {
onCurrentTimeChange?.(time);
}),
wavesurfer.on("decode", () => {
wavesurfer.on("ready", () => {
const peaks = wavesurfer.getDecodedData().getChannelData(0);
const sampleRate = wavesurfer.options.sampleRate;
wavesurfer.renderer.getWrapper().appendChild(
PitchContour({
peaks,
sampleRate,
height,
})
);
const data = extractFrequencies({ peaks, sampleRate });
setTimeout(() => {
renderPitchContour({
wrapper: wavesurfer.getWrapper(),
canvasId: `pitch-contour-${recording.id}-canvas`,
labels: new Array(data.length).fill(""),
datasets: [
{
data,
cubicInterpolationMode: "monotone",
},
],
});
}, 1000);
setInitialized(true);
}),
];

View File

@@ -1,6 +1,6 @@
export * from "./videos-table";
export * from "./video-edit-form";
export * from "./video-detail";
export * from "./video-player";
export * from "./videos-component";

View File

@@ -1,407 +0,0 @@
import { useEffect, useState, useContext } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import {
LoaderSpin,
RecordingsList,
PagePlaceholder,
MediaPlayer,
MediaTranscription,
} from "@renderer/components";
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
import {
AlertDialog,
AlertDialogHeader,
AlertDialogDescription,
AlertDialogTitle,
AlertDialogContent,
AlertDialogFooter,
AlertDialogCancel,
Button,
PingPoint,
Progress,
ScrollArea,
toast,
} from "@renderer/components/ui";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { useNavigate } from "react-router-dom";
export const VideoDetail = (props: { id?: string; md5?: string }) => {
const navigate = useNavigate();
const { id, md5 } = props;
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const [video, setVideo] = useState<VideoType | null>(null);
const [transcription, setTranscription] = useState<TranscriptionType>(null);
const [sharing, setSharing] = useState<boolean>(false);
// Transcription controls
const [transcribing, setTranscribing] = useState<boolean>(false);
const { transcribe } = useTranscribe();
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
// Player controls
const [initialized, setInitialized] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [seek, setSeek] = useState<{
seekTo: number;
timestamp: number;
}>();
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
const [recordButtonVisible, setRecordButtonVisible] =
useState<boolean>(false);
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
const [isPlaying, setIsPlaying] = useState(false);
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
const [playBackRate, setPlaybackRate] = useState<number>(1);
const [displayInlineCaption, setDisplayInlineCaption] =
useState<boolean>(true);
const onTransactionUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model === "Transcription" && action === "update") {
setTranscription(record);
}
};
const findOrCreateTranscription = async () => {
return EnjoyApp.transcriptions
.findOrCreate({
targetId: video.id,
targetType: "Video",
})
.then((transcription) => {
setTranscription(transcription);
});
};
const generateTranscription = async () => {
if (transcribing) return;
if (!transcription) {
await findOrCreateTranscription();
}
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, result } = await transcribe(video.src, {
targetId: video.id,
targetType: "Video",
});
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result,
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const findTranscriptionFromWebApi = async () => {
if (!transcription) {
await findOrCreateTranscription();
}
const res = await webApi.transcriptions({
targetMd5: video.md5,
});
const transcript = (res?.transcriptions || []).filter((t) =>
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
)?.[0];
if (!transcript) {
throw new Error("Transcription not found");
}
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: transcript.result,
engine: transcript.engine,
model: transcript.model,
});
};
const findOrGenerateTranscription = async () => {
try {
await findTranscriptionFromWebApi();
} catch (err) {
console.error(err);
await generateTranscription();
}
};
const handleShare = async () => {
if (!video.source.startsWith("http")) {
toast.error(t("shareFailed"), {
description: t("cannotShareLocalVideo"),
});
return;
}
if (!video.source && !video.isUploaded) {
try {
await EnjoyApp.videos.upload(video.id);
} catch (err) {
toast.error(t("shareFailed"), { description: err.message });
return;
}
}
webApi
.createPost({
targetType: "Video",
targetId: video.id,
})
.then(() => {
toast.success(t("sharedSuccessfully"), {
description: t("sharedVideo"),
});
})
.catch((err) => {
toast.error(t("shareFailed"), { description: err.message });
});
setSharing(false);
};
useEffect(() => {
const where = id ? { id } : { md5 };
EnjoyApp.videos.findOne(where).then((video) => {
if (video) {
setVideo(video);
} else {
toast.error(t("models.video.notFound"));
}
});
}, [id, md5]);
useEffect(() => {
if (!video) return;
findOrCreateTranscription();
}, [video]);
useEffect(() => {
if (!initialized) return;
if (!transcription) return;
addDblistener(onTransactionUpdate);
if (transcription?.state == "pending") {
findOrGenerateTranscription();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setTranscribingProgress(p);
});
}
return () => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [md5, transcription, initialized]);
if (!video) {
return <LoaderSpin />;
}
if (!video.src) {
return (
<PagePlaceholder placeholder="invalid" extra="cannot find play source" />
);
}
return (
<div className="relative">
<div className={`grid grid-cols-7 gap-4 ${initialized ? "" : "blur-sm"}`}>
<div className="col-span-5 h-[calc(100vh-6.5rem)] flex flex-col">
<MediaPlayer
mediaId={video.id}
mediaType="Video"
mediaUrl={video.src}
mediaMd5={video.md5}
transcription={transcription}
currentTime={currentTime}
setCurrentTime={setCurrentTime}
currentSegmentIndex={currentSegmentIndex}
setCurrentSegmentIndex={setCurrentSegmentIndex}
recordButtonVisible={recordButtonVisible}
setRecordButtonVisible={setRecordButtonVisible}
seek={seek}
initialized={initialized}
setInitialized={setInitialized}
zoomRatio={zoomRatio}
setZoomRatio={setZoomRatio}
isPlaying={isPlaying}
setIsPlaying={setIsPlaying}
playMode={playMode}
setPlayMode={setPlayMode}
playBackRate={playBackRate}
setPlaybackRate={setPlaybackRate}
displayInlineCaption={displayInlineCaption}
setDisplayInlineCaption={setDisplayInlineCaption}
onShare={() => setSharing(true)}
onDecoded={({ duration, sampleRate }) => {
if (video.duration) return;
EnjoyApp.videos.update(video.id, {
metadata: Object.assign({}, video.metadata, {
duration,
sampleRate,
}),
});
}}
/>
<ScrollArea
className={`flex-1 relative ${
recordButtonVisible ? "bg-muted" : "hidden"
}`}
>
<RecordingsList
key={`recordings-list-${video.id}-${currentSegmentIndex}`}
targetId={video.id}
targetType="Video"
referenceText={transcription?.result?.[currentSegmentIndex]?.text}
referenceId={currentSegmentIndex}
/>
</ScrollArea>
</div>
<div className="col-span-2 h-[calc(100vh-6.5rem)]">
<MediaTranscription
mediaId={video.id}
mediaType="Video"
mediaName={video.name}
transcription={transcription}
transcribing={transcribing}
progress={transcribingProgress}
transcribe={generateTranscription}
currentSegmentIndex={currentSegmentIndex}
onSelectSegment={(index) => {
if (currentSegmentIndex === index) return;
const segment = transcription?.result?.[index];
if (!segment) return;
if (playMode === "loop" && isPlaying) {
setIsPlaying(false);
}
setSeek({
seekTo: segment.offsets.from / 1000,
timestamp: Date.now(),
});
}}
/>
</div>
</div>
<AlertDialog open={sharing} onOpenChange={(value) => setSharing(value)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("shareAudio")}</AlertDialogTitle>
<AlertDialogDescription>
{t("areYouSureToShareThisAudioToCommunity")}
</AlertDialogDescription>
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<Button variant="default" onClick={handleShare}>
{t("share")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
{/* Show loading progress until waveform is decoded & transcribed */}
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingVideo")}</AlertDialogTitle>
<AlertDialogDescription>
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
</AlertDialogDescription>
</AlertDialogHeader>
<div className="py-4">
{initialized ? (
<div className="mb-4 flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("waveformIsDecoded")}</span>
</div>
) : (
<div className="mb-4 flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("decodingWaveform")}</span>
</div>
)}
{!transcription ? (
<div className="flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (
<div className="flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("transcribedSuccessfully")}</span>
</div>
) : transcribing ? (
<div className="">
<div className="flex items-center space-x-4 mb-2">
<PingPoint colorClassName="bg-yellow-500" />
<span>{t("transcribing")}</span>
</div>
{whisperConfig.service === "local" && (
<Progress value={transcribingProgress} />
)}
</div>
) : (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<div className="inline">
<span>{t("notTranscribedYet")}</span>
{initialized && (
<Button
onClick={generateTranscription}
className="ml-4"
size="sm"
>
{t("transcribe")}
</Button>
)}
</div>
</div>
)}
</div>
<AlertDialogFooter>
<Button variant="secondary" onClick={() => navigate(-1)}>
{t("cancel")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
{!initialized && (
<div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
<LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
</div>
)}
</div>
);
};

View File

@@ -0,0 +1,72 @@
import { useEffect, useContext, useRef } from "react";
import { MediaPlayerProviderContext } from "@renderer/context";
import {
MediaLoadingModal,
MediaCaption,
MediaPlayerControls,
MediaTabs,
MediaCurrentRecording,
} from "@renderer/components";
import { formatDuration } from "@renderer/lib/utils";
import { useVideo } from "@renderer/hooks";
export const VideoPlayer = (props: { id?: string; md5?: string }) => {
const { id, md5 } = props;
const { media, currentTime, setMedia, setRef } = useContext(
MediaPlayerProviderContext
);
const { video } = useVideo({ id, md5 });
const ref = useRef(null);
useEffect(() => {
if (!video) return;
setMedia(video);
}, [video]);
useEffect(() => {
setRef(ref);
}, [ref]);
return (
<div data-testid="video-player">
<div className="h-[calc(100vh-37.5rem)] mb-4">
<div className="grid grid-cols-3 gap-4 px-6 h-full">
<div className="col-span-1 rounded-lg border shadow-lg h-[calc(100vh-37.5rem)]">
<MediaTabs />
</div>
<div className="col-span-2 h-[calc(100vh-37.5rem)]">
<MediaCaption />
</div>
</div>
</div>
<div className="h-[33rem] flex flex-col">
<div className="h-[13rem] py-2 px-6 mb-4">
<MediaCurrentRecording />
</div>
<div className="w-full h-[13rem] px-6 py-2 mb-4">
<div className="border rounded-xl shadow-lg relative">
<div data-testid="media-player-container" ref={ref} />
<div className="absolute right-2 top-1">
<span className="text-sm">
{formatDuration(currentTime || 0)}
</span>
<span className="mx-1">/</span>
<span className="text-sm">
{formatDuration(media?.duration || 0)}
</span>
</div>
</div>
</div>
<div className="w-full bg-background z-10 shadow-xl">
<MediaPlayerControls />
</div>
</div>
<MediaLoadingModal />
</div>
);
};

View File

@@ -239,7 +239,7 @@ export const VideosComponent = () => {
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
<AlertDialogDescription>
<p className="break-all">
{t("transcribeVideoConfirmation", {
{t("transcribeMediaConfirmation", {
name: transcribing?.name || "",
})}
</p>

View File

@@ -25,7 +25,7 @@ export const AISettingsProvider = ({
}: {
children: React.ReactNode;
}) => {
const [defaultEngine, setDefaultEngine] = useState<string>(null);
const [defaultEngine, setDefaultEngine] = useState<string>("openai");
const [openai, setOpenai] = useState<LlmProviderType>(null);
const [googleGenerativeAi, setGoogleGenerativeAi] =
useState<LlmProviderType>(null);

View File

@@ -2,3 +2,5 @@ export * from "./ai-settings-provider";
export * from "./app-settings-provider";
export * from "./db-provider";
export * from "./theme-provider";
export * from "./wavesurfer-provider";
export * from "./media-player-provider";

View File

@@ -0,0 +1,454 @@
import { createContext, useEffect, useState, useContext } from "react";
import { extractFrequencies } from "@/utils";
import { AppSettingsProviderContext } from "@renderer/context";
import { useTranscriptions, useRecordings } from "@renderer/hooks";
import WaveSurfer from "wavesurfer.js";
import Regions, {
type Region as RegionType,
} from "wavesurfer.js/dist/plugins/regions";
import Chart from "chart.js/auto";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
import { IPA_MAPPING } from "@/constants";
type MediaPlayerContextType = {
media: AudioType | VideoType;
setMedia: (media: AudioType | VideoType) => void;
setMediaProvider: (mediaProvider: HTMLAudioElement | null) => void;
waveform: WaveFormDataType;
// wavesurfer
wavesurfer: WaveSurfer;
setRef: (ref: any) => void;
decoded: boolean;
// player state
currentTime: number;
currentSegmentIndex: number;
setCurrentSegmentIndex: (index: number) => void;
zoomRatio: number;
setZoomRatio: (zoomRation: number) => void;
fitZoomRatio: number;
minPxPerSec: number;
// regions
regions: Regions | null;
activeRegion: RegionType;
setActiveRegion: (region: RegionType) => void;
editingRegion: boolean;
setEditingRegion: (editing: boolean) => void;
renderPitchContour: (
region: RegionType,
options?: {
repaint?: boolean;
canvasId?: string;
containerClassNames?: string[];
data?: Chart["data"];
}
) => void;
pitchChart: Chart;
// Transcription
transcription: TranscriptionType;
generateTranscription: () => void;
transcribing: boolean;
transcribingProgress: number;
transcriptionDraft: TranscriptionType["result"];
setTranscriptionDraft: (result: TranscriptionType["result"]) => void;
// Recordings
isRecording: boolean;
setIsRecording: (isRecording: boolean) => void;
currentRecording: RecordingType;
setCurrentRecording: (recording: RecordingType) => void;
recordings: RecordingType[];
fetchRecordings: (offset: number) => void;
loadingRecordings: boolean;
hasMoreRecordings: boolean;
};
export const MediaPlayerProviderContext =
createContext<MediaPlayerContextType>(null);
export const MediaPlayerProvider = ({
children,
}: {
children: React.ReactNode;
}) => {
const height = 192;
const minPxPerSec = 150;
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [media, setMedia] = useState<AudioType | VideoType>(null);
const [mediaProvider, setMediaProvider] = useState<HTMLAudioElement | null>(
null
);
const [waveform, setWaveForm] = useState<WaveFormDataType>(null);
const [wavesurfer, setWavesurfer] = useState(null);
const [regions, setRegions] = useState<Regions | null>(null);
const [activeRegion, setActiveRegion] = useState<RegionType>(null);
const [editingRegion, setEditingRegion] = useState<boolean>(false);
const [pitchChart, setPitchChart] = useState<Chart>(null);
const [ref, setRef] = useState(null);
// Player state
const [decoded, setDecoded] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
const [fitZoomRatio, setFitZoomRatio] = useState<number>(1.0);
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
const [isRecording, setIsRecording] = useState<boolean>(false);
const [currentRecording, setCurrentRecording] = useState<RecordingType>(null);
const [transcriptionDraft, setTranscriptionDraft] =
useState<TranscriptionType["result"]>();
const {
transcription,
generateTranscription,
transcribing,
transcribingProgress,
} = useTranscriptions(media);
const {
recordings,
fetchRecordings,
loading: loadingRecordings,
hasMore: hasMoreRecordings,
} = useRecordings(media, currentSegmentIndex);
const initializeWavesurfer = async () => {
if (!media) return;
if (!mediaProvider) return;
if (!ref.current) return;
const ws = WaveSurfer.create({
container: ref.current,
height,
waveColor: "#eaeaea",
progressColor: "#c0d6df",
cursorColor: "#ff0054",
barWidth: 2,
autoScroll: true,
minPxPerSec,
autoCenter: false,
dragToSeek: false,
fillParent: true,
media: mediaProvider,
peaks: waveform ? [waveform.peaks] : undefined,
duration: waveform ? waveform.duration : undefined,
});
const blob = await fetch(media.src).then((res) => res.blob());
if (waveform) {
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
setDecoded(true);
} else {
ws.loadBlob(blob);
}
setWavesurfer(ws);
};
const renderPitchContour = (
region: RegionType,
options?: {
repaint?: boolean;
canvasId?: string;
containerClassNames?: string[];
data?: Chart["data"];
}
) => {
if (!region) return;
if (!waveform?.frequencies?.length) return;
if (!wavesurfer) return;
const { repaint = true, containerClassNames = [] } = options || {};
const duration = wavesurfer.getDuration();
const fromIndex = Math.round(
(region.start / duration) * waveform.frequencies.length
);
const toIndex = Math.round(
(region.end / duration) * waveform.frequencies.length
);
const wrapper = (wavesurfer as any).renderer.getWrapper();
// remove existing pitch contour
if (repaint) {
wrapper
.querySelectorAll(".pitch-contour")
.forEach((element: HTMLDivElement) => {
element.remove();
});
}
// calculate offset and width
const wrapperWidth = wrapper.getBoundingClientRect().width;
const offsetLeft = (region.start / duration) * wrapperWidth;
const width = ((region.end - region.start) / duration) * wrapperWidth;
// create container and canvas
const pitchContourWidthContainer = document.createElement("div");
const canvas = document.createElement("canvas");
const canvasId = options?.canvasId || `pitch-contour-${region.id}-canvas`;
canvas.id = canvasId;
canvas.style.width = `${width}px`;
canvas.style.height = `${height}px`;
pitchContourWidthContainer.appendChild(canvas);
pitchContourWidthContainer.style.position = "absolute";
pitchContourWidthContainer.style.top = "0";
pitchContourWidthContainer.style.left = "0";
pitchContourWidthContainer.style.width = `${width}px`;
pitchContourWidthContainer.style.height = `${height}px`;
pitchContourWidthContainer.style.marginLeft = `${offsetLeft}px`;
pitchContourWidthContainer.classList.add(
"pitch-contour",
...containerClassNames
);
// pitchContourWidthContainer.style.zIndex = "3";
wrapper.appendChild(pitchContourWidthContainer);
// prepare chart data
let chartData: Chart["data"] = options?.data;
if (!chartData) {
const data = waveform.frequencies.slice(fromIndex, toIndex);
const regionDuration = region.end - region.start;
const labels = new Array(data.length).fill("");
const caption = transcription?.result?.timeline?.[currentSegmentIndex];
if (region.id.startsWith("segment-region")) {
caption.timeline.forEach((segment: TimelineEntry) => {
const index = Math.round(
((segment.startTime - region.start) / regionDuration) * data.length
);
labels[index] = segment.text.trim();
});
} else if (region.id.startsWith("word-region")) {
const words = caption.timeline.filter(
(w: TimelineEntry) =>
w.startTime >= region.start &&
w.endTime <= region.end &&
w.type === "word"
);
let phones: TimelineEntry[] = [];
words.forEach((word: TimelineEntry) => {
word.timeline.forEach((token: TimelineEntry) => {
phones = phones.concat(token.timeline);
});
});
phones.forEach((phone: TimelineEntry) => {
const index = Math.round(
((phone.startTime - region.start) / regionDuration) * data.length
);
labels[index] = [
labels[index] || "",
(IPA_MAPPING as any)[phone.text.trim()] || phone.text.trim(),
].join("");
});
}
chartData = {
labels,
datasets: [
{
data,
cubicInterpolationMode: "monotone",
},
],
};
}
setPitchChart(
new Chart(canvas, {
type: "line",
data: chartData,
options: {
plugins: {
legend: {
display: false,
},
title: {
display: false,
},
},
scales: {
x: {
beginAtZero: true,
ticks: {
autoSkip: false,
},
display: true,
grid: {
display: false,
},
border: {
display: false,
},
},
y: {
beginAtZero: true,
display: false,
},
},
},
})
);
};
useEffect(() => {
if (!media) return;
EnjoyApp.waveforms.find(media.md5).then((waveform) => {
setWaveForm(waveform);
});
}, [media]);
/*
* Initialize wavesurfer when container ref is available
* and mediaProvider is available
*/
useEffect(() => {
initializeWavesurfer();
}, [media, ref, mediaProvider]);
/*
* When wavesurfer is decoded,
* set up event listeners for wavesurfer
* and clean up when component is unmounted
*/
useEffect(() => {
if (!wavesurfer) return;
setRegions(wavesurfer.registerPlugin(Regions.create()));
setCurrentTime(0);
const subscriptions = [
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
wavesurfer.on("decode", () => {
const peaks: Float32Array = wavesurfer
.getDecodedData()
.getChannelData(0);
const duration: number = wavesurfer.getDuration();
const sampleRate = wavesurfer.options.sampleRate;
const _frequencies = extractFrequencies({ peaks, sampleRate });
const _waveform = {
peaks: Array.from(peaks),
duration,
sampleRate,
frequencies: _frequencies,
};
EnjoyApp.waveforms.save(media.md5, _waveform);
setWaveForm(_waveform);
}),
wavesurfer.on("ready", () => {
setDecoded(true);
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
}, [wavesurfer]);
/*
* update fitZoomRatio when currentSegmentIndex is updated
*/
useEffect(() => {
if (!ref?.current) return;
if (!wavesurfer) return;
if (!activeRegion) return;
const containerWidth = ref.current.getBoundingClientRect().width;
const duration = activeRegion.end - activeRegion.start;
if (activeRegion.id.startsWith("segment-region")) {
setFitZoomRatio(containerWidth / duration / minPxPerSec);
} else if (activeRegion.id.startsWith("word-region")) {
setFitZoomRatio(containerWidth / 3 / duration / minPxPerSec);
}
}, [ref, wavesurfer, activeRegion]);
/*
* Zoom chart when zoomRatio update
*/
useEffect(() => {
if (!wavesurfer) return;
if (!decoded) return;
wavesurfer.zoom(zoomRatio * minPxPerSec);
if (!activeRegion) return;
renderPitchContour(activeRegion);
wavesurfer.setScrollTime(activeRegion.start);
}, [zoomRatio, wavesurfer, decoded]);
/*
* Re-render pitch contour when active region changed
*/
useEffect(() => {
if (!activeRegion) return;
renderPitchContour(activeRegion);
}, [activeRegion]);
/*
* Update player styles
*/
useEffect(() => {
if (!wavesurfer) return;
if (!decoded) return;
const scrollContainer = wavesurfer.getWrapper().closest(".scroll");
scrollContainer.style.scrollbarWidth = "thin";
}, [decoded, wavesurfer]);
return (
<MediaPlayerProviderContext.Provider
value={{
media,
setMedia,
setMediaProvider,
wavesurfer,
setRef,
decoded,
currentTime,
currentSegmentIndex,
setCurrentSegmentIndex,
waveform,
zoomRatio,
setZoomRatio,
fitZoomRatio,
minPxPerSec,
transcription,
regions,
renderPitchContour,
pitchChart,
activeRegion,
setActiveRegion,
editingRegion,
setEditingRegion,
generateTranscription,
transcribing,
transcribingProgress,
transcriptionDraft,
setTranscriptionDraft,
isRecording,
setIsRecording,
currentRecording,
setCurrentRecording,
recordings,
fetchRecordings,
loadingRecordings,
hasMoreRecordings,
}}
>
{children}
</MediaPlayerProviderContext.Provider>
);
};

View File

@@ -0,0 +1,185 @@
import { createContext, useEffect, useState, useContext } from "react";
import { extractFrequencies } from "@/utils";
import { AppSettingsProviderContext } from "@renderer/context";
import WaveSurfer from "wavesurfer.js";
import Regions, {
type Region as RegionType,
} from "wavesurfer.js/dist/plugins/regions";
type WavesurferContextType = {
media: AudioType | VideoType;
setMedia: (media: AudioType | VideoType) => void;
setMediaProvider: (mediaProvider: HTMLAudioElement | null) => void;
wavesurfer: WaveSurfer;
setRef: (ref: any) => void;
initialized: boolean;
currentTime: number;
currentSegmentIndex: number;
setCurrentSegmentIndex: (index: number) => void;
zoomRatio: number;
};
export const WavesurferContext = createContext<WavesurferContextType>(null);
export const WavesurferProvider = ({
children,
}: {
children: React.ReactNode;
}) => {
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [media, setMedia] = useState<AudioType | VideoType>(null);
const [mediaProvider, setMediaProvider] = useState<HTMLAudioElement | null>(
null
);
const [wavesurfer, setWavesurfer] = useState(null);
const [regions, setRegions] = useState<Regions | null>(null);
const [ref, setRef] = useState(null);
// Player state
const [initialized, setInitialized] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [seek, setSeek] = useState<{
seekTo: number;
timestamp: number;
}>();
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
const [isPlaying, setIsPlaying] = useState(false);
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
const [playBackRate, setPlaybackRate] = useState<number>(1);
const [displayInlineCaption, setDisplayInlineCaption] =
useState<boolean>(true);
const initializeWavesurfer = async () => {
if (!media) return;
if (!mediaProvider) return;
if (!ref.current) return;
const waveform = await EnjoyApp.waveforms.find(media.md5);
const ws = WaveSurfer.create({
container: ref.current,
height: 250,
waveColor: "#eee",
progressColor: "rgba(0, 0, 0, 0.15)",
cursorColor: "#aaa",
barWidth: 2,
autoScroll: true,
minPxPerSec: 150,
autoCenter: false,
dragToSeek: false,
media: mediaProvider,
peaks: waveform ? [waveform.peaks] : undefined,
duration: waveform ? waveform.duration : undefined,
});
const blob = await fetch(media.src).then((res) => res.blob());
if (waveform) {
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
setInitialized(true);
} else {
ws.loadBlob(blob);
}
// Set up region plugin
setRegions(ws.registerPlugin(Regions.create()));
setWavesurfer(ws);
};
/*
* Initialize wavesurfer when container ref is available
* and mediaProvider is available
*/
useEffect(() => {
initializeWavesurfer();
}, [media, ref, mediaProvider]);
/*
* When wavesurfer is initialized,
* set up event listeners for wavesurfer
* and clean up when component is unmounted
*/
useEffect(() => {
if (!wavesurfer) return;
setCurrentTime(0);
setIsPlaying(false);
const subscriptions = [
wavesurfer.on("play", () => setIsPlaying(true)),
wavesurfer.on("pause", () => setIsPlaying(false)),
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
wavesurfer.on("decode", () => {
const peaks: Float32Array = wavesurfer
.getDecodedData()
.getChannelData(0);
const duration: number = wavesurfer.getDuration();
const sampleRate = wavesurfer.options.sampleRate;
const _frequencies = extractFrequencies({ peaks, sampleRate });
const _waveform = {
peaks: Array.from(peaks),
duration,
sampleRate,
frequencies: _frequencies,
};
EnjoyApp.waveforms.save(media.md5, _waveform);
}),
wavesurfer.on("ready", () => {
setInitialized(true);
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
}, [wavesurfer]);
/*
* When regions are available,
* set up event listeners for regions
* and clean up when component is unmounted
*/
useEffect(() => {
if (!regions) return;
const subscriptions = [
wavesurfer.on("finish", () => {
if (playMode !== "loop") return;
regions?.getRegions()[0]?.play();
}),
regions.on("region-created", (region: RegionType) => {
region.on("click", () => {
wavesurfer.play(region.start, region.end);
});
}),
];
return () => {
subscriptions.forEach((unsub) => unsub());
};
});
return (
<WavesurferContext.Provider
value={{
media,
setMedia,
setMediaProvider,
wavesurfer,
setRef,
initialized,
currentTime,
currentSegmentIndex,
setCurrentSegmentIndex,
zoomRatio,
}}
>
{children}
</WavesurferContext.Provider>
);
};

View File

@@ -1,3 +1,10 @@
export * from './use-recordings';
export * from './use-transcribe';
export * from './use-transcriptions';
export * from './use-ai-command';
export * from './use-conversation';
export * from './use-audio';
export * from './use-video';

View File

@@ -0,0 +1,43 @@
import { useEffect, useContext, useState } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
} from "@renderer/context";
import { toast } from "@renderer/components/ui";
import { t } from "i18next";
export const useAudio = (options: { id?: string; md5?: string }) => {
const { id, md5 } = options;
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [audio, setAudio] = useState<AudioType>(null);
const onAudioUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model !== "Audio") return;
if (record?.id != audio?.id) return;
if (action !== "update") return;
setAudio(record);
};
useEffect(() => {
const where = id ? { id } : { md5 };
EnjoyApp.audios.findOne(where).then((audio) => {
if (audio) {
setAudio(audio);
} else {
toast.error(t("models.audio.notFound"));
}
});
addDblistener(onAudioUpdate);
return () => {
removeDbListener(onAudioUpdate);
};
}, [id, md5]);
return {
audio,
};
};

View File

@@ -0,0 +1,101 @@
import { useState, useContext, useEffect, useRef, useReducer } from "react";
import {
AppSettingsProviderContext,
DbProviderContext,
} from "@renderer/context";
import { recordingsReducer } from "@renderer/reducers";
export const useRecordings = (
media: AudioType | VideoType,
referenceId: number
) => {
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [recordings, dispatchRecordings] = useReducer(recordingsReducer, []);
const [loading, setLoading] = useState(false);
const [hasMore, setHasMore] = useState(true);
const fetchRecordings = async (offset = 0) => {
setLoading(true);
const limit = 10;
EnjoyApp.recordings
.findAll({
limit,
offset,
where: {
targetId: media.id,
targetType: media.mediaType,
referenceId,
},
})
.then((_recordings) => {
if (_recordings.length < limit) {
setHasMore(false);
} else {
setHasMore(true);
}
dispatchRecordings({
type: offset === 0 ? "set" : "append",
records: _recordings,
});
})
.finally(() => {
setLoading(false);
});
};
const onRecordingsUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model === "PronunciationAssessment" && action === "create") {
const recording = recordings.find((r) => r.id === record.targetId);
if (!recording) return;
recording.pronunciationAssessment = record;
dispatchRecordings({
type: "update",
record: recording,
});
}
if (model != "Recording") return;
if (action === "destroy") {
dispatchRecordings({
type: "destroy",
record,
});
} else if (action === "create") {
if ((record as RecordingType).targetId !== media.id) return;
if ((record as RecordingType).referenceId !== referenceId) return;
dispatchRecordings({
type: "create",
record,
});
}
};
useEffect(() => {
addDblistener(onRecordingsUpdate);
return () => {
removeDbListener(onRecordingsUpdate);
};
}, [recordings]);
useEffect(() => {
if (!media) return;
fetchRecordings(0);
}, [media, referenceId]);
return {
recordings,
hasMore,
fetchRecordings,
loading,
};
};

View File

@@ -12,11 +12,10 @@ import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import axios from "axios";
import take from "lodash/take";
import sortedUniqBy from "lodash/sortedUniqBy";
import {
groupTranscription,
END_OF_WORD_REGEX,
milisecondsToTimestamp,
} from "@/utils";
import { groupTranscription, milisecondsToTimestamp } from "@/utils";
import { END_OF_SENTENCE_REGEX } from "@/constants";
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
export const useTranscribe = () => {
const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext(
@@ -28,12 +27,16 @@ export const useTranscribe = () => {
if (ffmpegValid) {
if (src instanceof Blob) {
src = await EnjoyApp.cacheObjects.writeFile(
`${Date.now()}.${src.type.split("/")[1]}`,
`${Date.now()}.${src.type.split("/")[1].split(";")[0]}`,
await src.arrayBuffer()
);
}
const output = `enjoy://library/cache/${src.split("/").pop()}.wav`;
const output = `enjoy://library/cache/${src
.split("/")
.pop()
.split(";")
.shift()}.wav`;
await EnjoyApp.ffmpeg.transcode(src, output, options);
const data = await fetchFile(output);
return new Blob([data], { type: "audio/wav" });
@@ -45,7 +48,7 @@ export const useTranscribe = () => {
const transcodeUsingWasm = async (src: string | Blob, options?: string[]) => {
if (!ffmpegWasm?.loaded) return;
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
try {
let uri: URL;
@@ -80,21 +83,32 @@ export const useTranscribe = () => {
): Promise<{
engine: string;
model: string;
result: TranscriptionResultSegmentGroupType[];
alignmentResult: AlignmentResult;
}> => {
const blob = await transcode(mediaSrc);
let result;
if (whisperConfig.service === "local") {
return transcribeByLocal(blob);
result = await transcribeByLocal(blob);
} else if (whisperConfig.service === "cloudflare") {
return transcribeByCloudflareAi(blob);
result = await transcribeByCloudflareAi(blob);
} else if (whisperConfig.service === "openai") {
return transcribeByOpenAi(blob);
result = await transcribeByOpenAi(blob);
} else if (whisperConfig.service === "azure") {
return transcribeByAzureAi(blob, params);
result = await transcribeByAzureAi(blob, params);
} else {
throw new Error(t("whisperServiceNotSupported"));
}
const alignmentResult = await EnjoyApp.echogarden.align(
new Uint8Array(await blob.arrayBuffer()),
result.result.map((segment) => segment.text).join(" ")
);
return {
...result,
alignmentResult,
};
};
const transcribeByLocal = async (blob: Blob) => {
@@ -267,7 +281,7 @@ export const useTranscribe = () => {
if (
index === best.Words.length - 1 &&
!text.trim().match(END_OF_WORD_REGEX)
!text.trim().match(END_OF_SENTENCE_REGEX)
) {
text = text + ".";
}

View File

@@ -0,0 +1,192 @@
import { useState, useContext, useEffect } from "react";
import { useTranscribe } from "@renderer/hooks";
import {
AISettingsProviderContext,
AppSettingsProviderContext,
DbProviderContext,
} from "@renderer/context";
import { toast } from "@renderer/components/ui";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
export const useTranscriptions = (media: AudioType | VideoType) => {
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [transcription, setTranscription] = useState<TranscriptionType>(null);
const { transcribe } = useTranscribe();
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
const [transcribing, setTranscribing] = useState<boolean>(false);
const onTransactionUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (
model === "Transcription" &&
record.id === transcription.id &&
action === "update"
) {
setTranscription(record);
}
};
const findOrCreateTranscription = async () => {
if (!media) return;
if (transcription) return;
return EnjoyApp.transcriptions
.findOrCreate({
targetId: media.id,
targetType: media.mediaType,
})
.then((t) => {
if (t.result && !t.result["transcript"]) {
t.result = null;
}
setTranscription(t);
})
.catch((err) => {
toast.error(err.message);
});
};
const generateTranscription = async () => {
if (transcribing) return;
if (!transcription) {
await findOrCreateTranscription();
}
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, alignmentResult } = await transcribe(media.src, {
targetId: media.id,
targetType: media.mediaType,
});
let timeline: TimelineEntry[] = [];
if (alignmentResult) {
alignmentResult.timeline.forEach((t) => {
if (t.type === "sentence") {
timeline.push(t);
} else {
t.timeline.forEach((st) => {
timeline.push(st);
});
}
});
}
/*
* Pre-process
* Some words end with period should not be a single sentence, like Mr./Ms./Dr. etc
*/
timeline.forEach((sentence, i) => {
const nextSentence = timeline[i + 1];
if (
!sentence.text
.replaceAll(MAGIC_TOKEN_REGEX, "")
.match(END_OF_SENTENCE_REGEX) &&
nextSentence?.text
) {
console.log(sentence.text);
nextSentence.text = [sentence.text, nextSentence.text].join(" ");
nextSentence.timeline = [
...sentence.timeline,
...nextSentence.timeline,
];
nextSentence.startTime = sentence.startTime;
timeline.splice(i, 1);
}
});
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: {
timeline: timeline,
transcript: alignmentResult.transcript,
},
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const findTranscriptionFromWebApi = async () => {
if (!transcription) {
await findOrCreateTranscription();
}
const res = await webApi.transcriptions({
targetMd5: media.md5,
});
const transcript = (res?.transcriptions || []).filter((t) =>
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
)?.[0];
if (!transcript) {
return Promise.reject("Transcription not found");
}
if (!transcript.result["transcript"]) {
return Promise.reject("Transcription not aligned");
}
return EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: transcript.result,
engine: transcript.engine,
model: transcript.model,
});
};
const findOrGenerateTranscription = async () => {
try {
await findTranscriptionFromWebApi();
} catch (err) {
console.error(err);
await generateTranscription();
}
};
useEffect(() => {
if (!media) return;
findOrCreateTranscription();
}, [media]);
useEffect(() => {
if (!transcription) return;
addDblistener(onTransactionUpdate);
if (
transcription.state == "pending" ||
!transcription.result?.["transcript"]
) {
findOrGenerateTranscription();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setTranscribingProgress(p);
});
}
return () => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [transcription, media]);
return {
transcription,
transcribingProgress,
transcribing,
generateTranscription,
};
};

View File

@@ -0,0 +1,43 @@
import { useEffect, useContext, useState } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
} from "@renderer/context";
import { toast } from "@renderer/components/ui";
import { t } from "i18next";
export const useVideo = (options: { id?: string; md5?: string }) => {
const { id, md5 } = options;
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const [video, setVideo] = useState<VideoType>(null);
const onAudioUpdate = (event: CustomEvent) => {
const { model, action, record } = event.detail || {};
if (model !== "Audio") return;
if (record?.id != video?.id) return;
if (action !== "update") return;
setVideo(record);
};
useEffect(() => {
const where = id ? { id } : { md5 };
EnjoyApp.videos.findOne(where).then((video) => {
if (video) {
setVideo(video);
} else {
toast.error(t("models.video.notFound"));
}
});
addDblistener(onAudioUpdate);
return () => {
removeDbListener(onAudioUpdate);
};
}, [id, md5]);
return {
video,
};
};

View File

@@ -10,6 +10,7 @@ import i18next, { t } from "i18next";
dayjs.extend(localizedFormat);
dayjs.extend(duration);
dayjs.extend(relativeTime);
import Chart from "chart.js/auto";
export function cn(...inputs: ClassValue[]) {
return twMerge(clsx(inputs));
@@ -37,7 +38,8 @@ export function formatDuration(
format = "HH:mm:ss"
) {
dayjs.locale(i18next.resolvedLanguage?.toLowerCase() || "en");
return dayjs.duration(duration, unit).format(format);
const display = dayjs.duration(duration, unit).format(format);
return display.replace(/^00:/, "");
}
export function bytesToSize(bytes: number) {
@@ -78,3 +80,60 @@ export function formatDate(date: string | Date) {
return then.fromNow();
}
}
export function renderPitchContour(options: {
wrapper: HTMLElement;
canvasId: string;
labels: string[];
datasets: Chart["data"]["datasets"];
}) {
const { wrapper, datasets, labels, canvasId } = options;
const width = wrapper.getBoundingClientRect().width;
const height = wrapper.getBoundingClientRect().height;
const canvas = document.createElement("canvas");
canvas.id = canvasId;
canvas.style.position = "absolute";
canvas.style.width = `${width}px`;
canvas.style.height = `${height}px`;
canvas.style.top = "0";
canvas.style.left = "0";
wrapper.appendChild(canvas);
new Chart(canvas, {
type: "line",
data: {
labels,
datasets,
},
options: {
plugins: {
legend: {
display: false,
},
title: {
display: false,
},
},
scales: {
x: {
beginAtZero: true,
ticks: {
autoSkip: false,
},
display: false,
grid: {
display: false,
},
border: {
display: false,
},
},
y: {
display: false,
},
},
},
});
}

View File

@@ -1,8 +1,9 @@
import { useParams , useNavigate } from "react-router-dom";
import { AudioDetail } from "@renderer/components";
import { useParams, useNavigate } from "react-router-dom";
import { AudioPlayer } from "@renderer/components";
import { Button } from "@renderer/components/ui";
import { ChevronLeftIcon } from "lucide-react";
import { t } from "i18next";
import { MediaPlayerProvider } from "@renderer/context";
export default () => {
const navigate = useNavigate();
@@ -10,15 +11,17 @@ export default () => {
return (
<>
<div className="h-full px-4 py-6 xl:px-8">
<div className="flex space-x-1 items-center mb-4">
<div className="h-full relative">
<div className="flex space-x-1 items-center h-14 px-4 xl:px-8">
<Button variant="ghost" size="icon" onClick={() => navigate(-1)}>
<ChevronLeftIcon className="w-5 h-5" />
</Button>
<span>{t("shadowingAudio")}</span>
</div>
<AudioDetail id={id} />
<MediaPlayerProvider>
<AudioPlayer id={id} />
</MediaPlayerProvider>
</div>
</>
);

View File

@@ -15,6 +15,7 @@ import { t } from "i18next";
import {
DbProviderContext,
AppSettingsProviderContext,
MediaPlayerProvider,
} from "@renderer/context";
import { messagesReducer } from "@renderer/reducers";
import { v4 as uuidv4 } from "uuid";
@@ -249,52 +250,54 @@ export default () => {
</Sheet>
</div>
<ScrollArea ref={containerRef} className="px-4 flex-1">
<div className="messages flex flex-col-reverse gap-6 my-6">
<div className="w-full h-16"></div>
{messages.map((message) => (
<MessageComponent
key={message.id}
message={message}
configuration={{
type: conversation.type,
...conversation.configuration,
}}
onResend={() => {
if (message.status === "error") {
dispatchMessages({ type: "destroy", record: message });
}
<MediaPlayerProvider>
<ScrollArea ref={containerRef} className="px-4 flex-1">
<div className="messages flex flex-col-reverse gap-6 my-6">
<div className="w-full h-16"></div>
{messages.map((message) => (
<MessageComponent
key={message.id}
message={message}
configuration={{
type: conversation.type,
...conversation.configuration,
}}
onResend={() => {
if (message.status === "error") {
dispatchMessages({ type: "destroy", record: message });
}
handleSubmit(message.content);
}}
onRemove={() => {
if (message.status === "error") {
dispatchMessages({ type: "destroy", record: message });
} else {
EnjoyApp.messages.destroy(message.id).catch((err) => {
toast.error(err.message);
});
}
}}
/>
))}
{offset > -1 && (
<div className="flex justify-center">
<Button
variant="ghost"
onClick={() => fetchMessages()}
disabled={loading || offset === -1}
className="px-4 py-2"
>
{t("loadMore")}
{loading && (
<LoaderIcon className="h-4 w-4 animate-spin ml-2" />
)}
</Button>
</div>
)}
</div>
</ScrollArea>
handleSubmit(message.content);
}}
onRemove={() => {
if (message.status === "error") {
dispatchMessages({ type: "destroy", record: message });
} else {
EnjoyApp.messages.destroy(message.id).catch((err) => {
toast.error(err.message);
});
}
}}
/>
))}
{offset > -1 && (
<div className="flex justify-center">
<Button
variant="ghost"
onClick={() => fetchMessages()}
disabled={loading || offset === -1}
className="px-4 py-2"
>
{t("loadMore")}
{loading && (
<LoaderIcon className="h-4 w-4 animate-spin ml-2" />
)}
</Button>
</div>
)}
</div>
</ScrollArea>
</MediaPlayerProvider>
<div className="px-4 absolute w-full bottom-0 left-0 h-14 bg-muted z-50">
<div className="focus-within:bg-background px-4 py-2 flex items-center space-x-4 rounded-lg border">

View File

@@ -64,12 +64,12 @@ export default () => {
const presets = CONVERSATION_PRESETS.map((preset) =>
Object.assign({}, preset, {
engine: currentEngine.name,
engine: currentEngine?.name,
configuration: {
...preset.configuration,
tts: {
...preset.configuration.tts,
engine: currentEngine.name,
engine: currentEngine?.name,
},
},
})
@@ -78,7 +78,7 @@ export default () => {
const customPreset = {
key: "custom",
name: t("custom"),
engine: currentEngine.name,
engine: currentEngine?.name,
configuration: {
type: "gpt",
model: "gpt-4-turbo-preview",
@@ -92,7 +92,7 @@ export default () => {
historyBufferSize: 0,
tts: {
baseUrl: "",
engine: currentEngine.name,
engine: currentEngine?.name,
model: "tts-1",
voice: "alloy",
},
@@ -107,7 +107,7 @@ export default () => {
type: "tts",
tts: {
baseUrl: "",
engine: currentEngine.name,
engine: currentEngine?.name,
model: "tts-1",
voice: "alloy",
},

View File

@@ -1,8 +1,9 @@
import { useParams , useNavigate } from "react-router-dom";
import { VideoDetail } from "@renderer/components";
import { useParams, useNavigate } from "react-router-dom";
import { VideoPlayer } from "@renderer/components";
import { Button } from "@renderer/components/ui";
import { ChevronLeftIcon } from "lucide-react";
import { t } from "i18next";
import { MediaPlayerProvider } from "@renderer/context";
export default () => {
const navigate = useNavigate();
@@ -10,15 +11,17 @@ export default () => {
return (
<>
<div className="h-full px-4 py-6 xl:px-8">
<div className="flex space-x-1 items-center mb-4">
<div className="h-full relative">
<div className="flex space-x-1 items-center h-14 px-4 xl:px-8">
<Button variant="ghost" size="icon" onClick={() => navigate(-1)}>
<ChevronLeftIcon className="w-5 h-5" />
</Button>
<span>{t("shadowingVideo")}</span>
</div>
<VideoDetail id={id} />
<MediaPlayerProvider>
<VideoPlayer id={id} />
</MediaPlayerProvider>
</div>
</>
);

View File

@@ -1,4 +1,5 @@
type AudioType = {
mediaType: string,
id: string;
source: string;
name: string;

View File

@@ -206,6 +206,14 @@ type EnjoyAppType = {
}
) => Promise<SpeechType>;
};
echogarden: {
align: (
input: string | Uint8Array,
transcript: string,
options?: any
) => Promise<AlignmentResult>;
check: () => Promise<boolean>;
};
whisper: {
config: () => Promise<WhisperConfigType>;
check: () => Promise<{ success: boolean; log: string }>;

View File

@@ -5,7 +5,7 @@ type TranscriptionType = {
state: "pending" | "processing" | "finished";
engine: string;
model: string;
result: TranscriptionResultSegmentGroupType[];
result: AlignmentResult;
};
type TranscriptionResultSegmentType = {

View File

@@ -1,4 +1,5 @@
type VideoType = {
mediaType: string,
id: string;
source: string;
name: string;

View File

@@ -1,7 +1,19 @@
import Pitchfinder from "pitchfinder";
import { END_OF_SENTENCE_REGEX, MAGIC_TOKEN_REGEX } from "./constants";
export function generatePitch(peaks: Float32Array, sampleRate: number) {
const detectPitch = Pitchfinder.YIN({ sampleRate });
export const extractFrequencies = (props: {
peaks: Float32Array;
sampleRate: number;
}): number[] => {
const { peaks, sampleRate } = props;
const detectPitch = Pitchfinder.AMDF({
sampleRate,
sensitivity: 0.05,
minFrequency: 100,
maxFrequency: 1000,
ratio: 5,
});
const duration = peaks.length / sampleRate;
const bpm = peaks.length / duration / 60;
@@ -10,24 +22,8 @@ export function generatePitch(peaks: Float32Array, sampleRate: number) {
quantization: bpm,
});
// Find the baseline frequency (the value that appears most often)
const frequencyMap: any = {};
let maxAmount = 0;
let baseFrequency = 0;
frequencies.forEach((frequency) => {
if (!frequency) return;
const tolerance = 10;
frequency = Math.round(frequency * tolerance) / tolerance;
if (!frequencyMap[frequency]) frequencyMap[frequency] = 0;
frequencyMap[frequency] += 1;
if (frequencyMap[frequency] > maxAmount) {
maxAmount = frequencyMap[frequency];
baseFrequency = frequency;
}
});
return { frequencies, baseFrequency };
}
return frequencies;
};
export function milisecondsToTimestamp(ms: number) {
const hours = Math.floor(ms / 3600000).toString();
@@ -40,8 +36,6 @@ export function milisecondsToTimestamp(ms: number) {
)}:${seconds.padStart(2, "0")},${milliseconds}`;
}
export const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
export const END_OF_WORD_REGEX = /[^\.!,\?][\.!\?]/g;
export const groupTranscription = (
transcription: TranscriptionResultSegmentType[]
): TranscriptionResultSegmentGroupType[] => {
@@ -75,8 +69,8 @@ export const groupTranscription = (
group.push(segment);
if (
!MAGIC_TOKENS.includes(text) &&
segment.text.trim().match(END_OF_WORD_REGEX)
!text.match(MAGIC_TOKEN_REGEX) &&
segment.text.trim().match(END_OF_SENTENCE_REGEX)
) {
// Group a complete sentence;
groups.push(generateGroup(group));

View File

@@ -70,6 +70,7 @@ module.exports = {
plugins: [
require("tailwindcss-animate"),
require("@tailwindcss/typography"),
require("tailwind-scrollbar"),
require("tailwind-scrollbar-hide"),
require("@vidstack/react/tailwind.cjs"),
],

View File

@@ -23,21 +23,11 @@ export default defineConfig((env) => {
formats: ["es"],
},
rollupOptions: {
external,
// external: [
// "axios",
// "child_process",
// "crypto",
// "fs-extra",
// "fs",
// "path",
// "sequelize",
// "umzug",
// "sqlite3",
// "fluent-ffmpeg",
// "ffmpeg-static",
// "@andrkrn/ffprobe-static",
// ],
external: [...external, "echogarden/dist/api/API.js"],
output: {
strict: false,
},
plugins: [],
},
commonjsOptions: {
transformMixedEsModules: true,

2858
yarn.lock

File diff suppressed because it is too large Load Diff