Transcription force alignment & more (#416)
* add wavesurfer-provider * brand new layout for player * refactor pitch contour * clean up * update styl * refactor * update layout * use new layout for video * refactor * may select word * may edit word timestamp * may toggle multiselect words * clean code * improve word region update * improve layout * update layout * add echogarden * fix test * use aligned transcription * fix ipa * some refactor * improve code * implement ipa & translate & lookup * recording play & share * fix * fix post audio * improve layout * may delete recording * may record * fix video player layout * fix player in conversation * render recording along with orignal audio * may custom create region in recording * fix float issue when seekTo * fix recording player * fix load more recordings * fix seekTo * clean up * refactor pitch contour * fix some warnings * upgrade deps * fix group transcription sentence * zoom to fit when segment update * add more hotkeys * update player layout * improve style * play recording overlap audio when comparing * update echogarden dep * add recorded mark on transcription * fix recording pitch contour rendering * improve recording * adjust pitch finder params
This commit is contained in:
@@ -7,9 +7,9 @@
|
||||
"markdown-it-mathjax3": "^4.3.2",
|
||||
"markdown-it-sub": "^2.0.0",
|
||||
"markdown-it-sup": "^2.0.0",
|
||||
"mermaid": "^10.8.0",
|
||||
"sass": "^1.71.1",
|
||||
"vitepress": "^1.0.0-rc.42",
|
||||
"mermaid": "^10.9.0",
|
||||
"sass": "^1.72.0",
|
||||
"vitepress": "^1.0.0-rc.45",
|
||||
"vitepress-plugin-mermaid": "^2.0.16",
|
||||
"vue": "^3.4.21"
|
||||
},
|
||||
|
||||
@@ -78,6 +78,16 @@ test("valid ffmpeg command", async () => {
|
||||
expect(res).toBeTruthy();
|
||||
});
|
||||
|
||||
test("validate echogarden align command", async () => {
|
||||
const res = await page.evaluate(() => {
|
||||
return window.__ENJOY_APP__.echogarden.check();
|
||||
});
|
||||
expect(res).toBeTruthy();
|
||||
|
||||
const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
|
||||
expect(settings.whisper.service).toBe("local");
|
||||
});
|
||||
|
||||
test("should setup default library path", async () => {
|
||||
const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
|
||||
expect(settings.library).not.toBeNull();
|
||||
|
||||
@@ -122,9 +122,39 @@ test.describe("with login", async () => {
|
||||
},
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
* steps:
|
||||
* 1. create a tts conversation
|
||||
* 2. submit a message to the conversation
|
||||
* 3. the speech should auto create
|
||||
*/
|
||||
test("tts conversation", async () => {
|
||||
// navigate to the conversations page
|
||||
await page.getByTestId("sidebar-conversations").click();
|
||||
|
||||
// trigger new conversation modal
|
||||
await page.getByTestId("conversation-new-button").click();
|
||||
|
||||
// create a tts conversation
|
||||
await page.click("[data-testid=conversation-preset-tts]");
|
||||
await page.getByTestId("conversation-form").waitFor();
|
||||
await page.click("[data-testid=conversation-form-submit]");
|
||||
|
||||
// wait for the conversation to be created
|
||||
await page.getByTestId("conversation-page").waitFor();
|
||||
|
||||
// submit a message to the conversation
|
||||
await page.getByTestId("conversation-page-input").fill("How are you?");
|
||||
await page.getByTestId("conversation-page-submit").click();
|
||||
await page.locator(".ai-message").waitFor();
|
||||
const player = page
|
||||
.locator(".ai-message")
|
||||
.getByTestId("wavesurfer-container");
|
||||
await player.waitFor();
|
||||
|
||||
expect(await player.isVisible()).toBeTruthy();
|
||||
});
|
||||
|
||||
/*
|
||||
@@ -136,6 +166,9 @@ test.describe("with login", async () => {
|
||||
* 5. audio waveform player should be visible and transcription should be generated
|
||||
*/
|
||||
test("gpt conversation", async () => {
|
||||
// navigate to the conversations page
|
||||
await page.getByTestId("sidebar-conversations").click();
|
||||
|
||||
// trigger new conversation modal
|
||||
await page.getByTestId("conversation-new-button").click();
|
||||
|
||||
@@ -166,43 +199,12 @@ test.describe("with login", async () => {
|
||||
|
||||
// add to library
|
||||
await page.getByTestId("message-start-shadow").click();
|
||||
await page.getByTestId("audio-detail").waitFor();
|
||||
await page.getByTestId("audio-player").waitFor();
|
||||
await page.getByTestId("media-player-container").waitFor();
|
||||
await page.getByTestId("media-transcription").waitFor();
|
||||
await page.getByTestId("media-transcription-result").waitFor();
|
||||
expect(
|
||||
await page.getByTestId("media-transcription-result").isVisible()
|
||||
).toBeTruthy();
|
||||
});
|
||||
|
||||
/*
|
||||
* steps:
|
||||
* 1. create a tts conversation
|
||||
* 2. submit a message to the conversation
|
||||
* 3. the speech should auto create
|
||||
*/
|
||||
test("tts conversation", async () => {
|
||||
// trigger new conversation modal
|
||||
await page.getByTestId("conversation-new-button").click();
|
||||
|
||||
// create a tts conversation
|
||||
await page.click("[data-testid=conversation-preset-tts]");
|
||||
await page.getByTestId("conversation-form").waitFor();
|
||||
await page.click("[data-testid=conversation-form-submit]");
|
||||
|
||||
// wait for the conversation to be created
|
||||
await page.getByTestId("conversation-page").waitFor();
|
||||
|
||||
// submit a message to the conversation
|
||||
await page.getByTestId("conversation-page-input").fill("How are you?");
|
||||
await page.getByTestId("conversation-page-submit").click();
|
||||
await page.locator(".ai-message").waitFor();
|
||||
const player = page
|
||||
.locator(".ai-message")
|
||||
.getByTestId("wavesurfer-container");
|
||||
await player.waitFor();
|
||||
|
||||
expect(await player.isVisible()).toBeTruthy();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,7 +12,7 @@ const config = {
|
||||
asar: {
|
||||
// Binary files won't work in asar, so we need to unpack them
|
||||
unpackDir:
|
||||
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static}",
|
||||
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static,node_modules/onnxruntime-node/bin}",
|
||||
},
|
||||
icon: "./assets/icon",
|
||||
name: "Enjoy",
|
||||
|
||||
@@ -47,18 +47,18 @@
|
||||
"@types/fluent-ffmpeg": "^2.1.24",
|
||||
"@types/html-to-text": "^9.0.4",
|
||||
"@types/intl-tel-input": "^18.1.4",
|
||||
"@types/lodash": "^4.14.202",
|
||||
"@types/lodash": "^4.17.0",
|
||||
"@types/mark.js": "^8.11.12",
|
||||
"@types/node": "^20.11.24",
|
||||
"@types/react": "^18.2.62",
|
||||
"@types/react-dom": "^18.2.19",
|
||||
"@types/node": "^20.11.27",
|
||||
"@types/react": "^18.2.66",
|
||||
"@types/react-dom": "^18.2.22",
|
||||
"@types/validator": "^13.11.9",
|
||||
"@types/wavesurfer.js": "^6.0.12",
|
||||
"@typescript-eslint/eslint-plugin": "^7.1.1",
|
||||
"@typescript-eslint/parser": "^7.1.1",
|
||||
"@typescript-eslint/eslint-plugin": "^7.2.0",
|
||||
"@typescript-eslint/parser": "^7.2.0",
|
||||
"@vitejs/plugin-react": "^4.2.1",
|
||||
"autoprefixer": "^10.4.18",
|
||||
"electron": "^29.1.0",
|
||||
"electron": "^29.1.4",
|
||||
"electron-playwright-helpers": "^1.7.1",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-import-resolver-typescript": "^3.6.1",
|
||||
@@ -67,12 +67,13 @@
|
||||
"octokit": "^3.1.2",
|
||||
"progress": "^2.0.3",
|
||||
"tailwind-merge": "^2.2.1",
|
||||
"tailwind-scrollbar": "^3.1.0",
|
||||
"tailwindcss": "^3.4.1",
|
||||
"tailwindcss-animate": "^1.0.7",
|
||||
"ts-node": "^10.9.2",
|
||||
"tslib": "^2.6.2",
|
||||
"typescript": "^5.3.3",
|
||||
"vite": "^5.1.5",
|
||||
"typescript": "^5.4.2",
|
||||
"vite": "^5.1.6",
|
||||
"vite-plugin-static-copy": "^1.0.1",
|
||||
"zx": "^7.2.3"
|
||||
},
|
||||
@@ -81,7 +82,7 @@
|
||||
"@ffmpeg/ffmpeg": "^0.12.10",
|
||||
"@ffmpeg/util": "^0.12.1",
|
||||
"@hookform/resolvers": "^3.3.4",
|
||||
"@langchain/community": "^0.0.34",
|
||||
"@langchain/community": "^0.0.39",
|
||||
"@langchain/google-genai": "^0.0.10",
|
||||
"@mozilla/readability": "^0.5.0",
|
||||
"@radix-ui/react-accordion": "^1.1.2",
|
||||
@@ -112,6 +113,7 @@
|
||||
"axios": "^1.6.7",
|
||||
"camelcase": "^8.0.0",
|
||||
"camelcase-keys": "^9.1.3",
|
||||
"chart.js": "^4.4.2",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"clsx": "^2.1.0",
|
||||
@@ -122,7 +124,8 @@
|
||||
"dayjs": "^1.11.10",
|
||||
"decamelize": "^6.0.0",
|
||||
"decamelize-keys": "^2.0.1",
|
||||
"electron-log": "^5.1.1",
|
||||
"echogarden": "https://github.com/an-lee/echogarden",
|
||||
"electron-log": "^5.1.2",
|
||||
"electron-settings": "^4.0.2",
|
||||
"electron-squirrel-startup": "^1.0.0",
|
||||
"ffmpeg-static": "^5.2.0",
|
||||
@@ -130,27 +133,27 @@
|
||||
"fs-extra": "^11.2.0",
|
||||
"html-to-text": "^9.0.5",
|
||||
"https-proxy-agent": "^7.0.4",
|
||||
"i18next": "^23.10.0",
|
||||
"intl-tel-input": "^19.5.5",
|
||||
"i18next": "^23.10.1",
|
||||
"intl-tel-input": "^19.5.7",
|
||||
"js-md5": "^0.8.3",
|
||||
"langchain": "^0.1.25",
|
||||
"langchain": "^0.1.28",
|
||||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.344.0",
|
||||
"lucide-react": "^0.358.0",
|
||||
"mark.js": "^8.11.1",
|
||||
"microsoft-cognitiveservices-speech-sdk": "^1.35.0",
|
||||
"next-themes": "^0.2.1",
|
||||
"openai": "^4.28.4",
|
||||
"microsoft-cognitiveservices-speech-sdk": "^1.36.0",
|
||||
"next-themes": "^0.3.0",
|
||||
"openai": "^4.29.0",
|
||||
"pitchfinder": "^2.3.2",
|
||||
"postcss": "^8.4.35",
|
||||
"proxy-agent": "^6.4.0",
|
||||
"react": "^18.2.0",
|
||||
"react-activity-calendar": "^2.2.7",
|
||||
"react-activity-calendar": "^2.2.8",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-hook-form": "^7.51.0",
|
||||
"react-hotkeys-hook": "^4.5.0",
|
||||
"react-i18next": "^14.0.5",
|
||||
"react-i18next": "^14.1.0",
|
||||
"react-markdown": "^9.0.1",
|
||||
"react-router-dom": "^6.22.2",
|
||||
"react-router-dom": "^6.22.3",
|
||||
"react-tooltip": "^5.26.3",
|
||||
"reflect-metadata": "^0.2.1",
|
||||
"rimraf": "^5.0.5",
|
||||
@@ -160,7 +163,7 @@
|
||||
"sqlite3": "^5.1.7",
|
||||
"tailwind-scrollbar-hide": "^1.1.7",
|
||||
"umzug": "^3.7.0",
|
||||
"wavesurfer.js": "^7.7.3",
|
||||
"wavesurfer.js": "^7.7.5",
|
||||
"zod": "^3.22.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ export default defineConfig({
|
||||
/* Retry on CI only */
|
||||
retries: process.env.CI ? 2 : 0,
|
||||
/* Opt out of parallel tests on CI. */
|
||||
workers: process.env.CI ? 1 : undefined,
|
||||
workers: 1,
|
||||
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
|
||||
reporter: "html",
|
||||
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
|
||||
|
||||
@@ -7,6 +7,24 @@ export const WEB_API_URL = "https://enjoy-web.fly.dev";
|
||||
|
||||
export const REPO_URL = "https://github.com/xiaolai/everyone-can-use-english";
|
||||
|
||||
export const MAGIC_TOKEN_REGEX =
|
||||
/\b(Mrs|Ms|Mr|Dr|Prof|St|[a-zA-Z]{1,2}|\d{1,2})\.\b/g;
|
||||
export const END_OF_SENTENCE_REGEX = /[^\.!,\?][\.!\?]/g;
|
||||
|
||||
export const FFMPEG_TRIM_SILENCE_OPTIONS = [
|
||||
"-af",
|
||||
"silenceremove=1:start_duration=1:start_threshold=-50dB:detection=peak,aformat=dblp,areverse,silenceremove=start_periods=1:start_duration=1:start_threshold=-50dB:detection=peak,aformat=dblp,areverse",
|
||||
];
|
||||
|
||||
export const FFMPEG_CONVERT_WAV_OPTIONS = [
|
||||
"-ar",
|
||||
"16000",
|
||||
"-ac",
|
||||
"1",
|
||||
"-c:a",
|
||||
"pcm_s16le",
|
||||
];
|
||||
|
||||
// https://huggingface.co/ggerganov/whisper.cpp/tree/main
|
||||
export const WHISPER_MODELS_OPTIONS = [
|
||||
{
|
||||
@@ -344,3 +362,133 @@ export const CONVERSATION_PRESETS = [
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export const IPA_MAPPING = {
|
||||
p: "p",
|
||||
b: "b",
|
||||
t: "t",
|
||||
d: "d",
|
||||
ʈ: "t",
|
||||
ɖ: "d",
|
||||
c: "k",
|
||||
ɟ: "g",
|
||||
k: "k",
|
||||
g: "g",
|
||||
q: "k",
|
||||
ɢ: "g",
|
||||
ʔ: "",
|
||||
ɡ: "g",
|
||||
m: "m",
|
||||
ɱ: "m",
|
||||
n: "n",
|
||||
ɳ: "n",
|
||||
ɲ: "j",
|
||||
ŋ: "ŋ",
|
||||
ɴ: "ŋ",
|
||||
n̩: "n",
|
||||
ʙ: "r",
|
||||
r: "r",
|
||||
ʀ: "r",
|
||||
ⱱ: "",
|
||||
ɾ: "r",
|
||||
ɽ: "r",
|
||||
ɸ: "f",
|
||||
β: "v",
|
||||
f: "f",
|
||||
v: "v",
|
||||
θ: "θ",
|
||||
ð: "ð",
|
||||
s: "s",
|
||||
z: "z",
|
||||
ʃ: "ʃ",
|
||||
ʒ: "ʒ",
|
||||
ʂ: "s",
|
||||
ʐ: "z",
|
||||
ç: "",
|
||||
ʝ: "j",
|
||||
x: "h",
|
||||
ɣ: "g",
|
||||
χ: "h",
|
||||
ʁ: "r",
|
||||
ħ: "h",
|
||||
ʕ: "",
|
||||
h: "h",
|
||||
ɦ: "h",
|
||||
ɬ: "",
|
||||
ɮ: "",
|
||||
tʃ: "tʃ",
|
||||
ʈʃ: "tʃ",
|
||||
dʒ: "dʒ",
|
||||
ʋ: "v",
|
||||
ɹ: "r",
|
||||
ɻ: "r",
|
||||
j: "j",
|
||||
ɰ: "w",
|
||||
w: "w",
|
||||
l: "l",
|
||||
ɭ: "l",
|
||||
ʎ: "j",
|
||||
ʟ: "l",
|
||||
i: "iː",
|
||||
yɨ: "iː",
|
||||
ʉɯ: "uː",
|
||||
u: "uː",
|
||||
iː: "iː",
|
||||
ɪ: "ɪ",
|
||||
ʏ: "ɪ",
|
||||
ʊ: "ʊ",
|
||||
ɨ: "ɪ",
|
||||
ᵻ: "ɪ",
|
||||
e: "e",
|
||||
ø: "e",
|
||||
ɘ: "ə",
|
||||
ɵ: "ə",
|
||||
ɤ: "ɒ",
|
||||
o: "ɔː",
|
||||
ə: "ə",
|
||||
oː: "ɔː",
|
||||
ɛ: "æ",
|
||||
œ: "æ",
|
||||
ɜ: "əː",
|
||||
ɞ: "əː",
|
||||
ʌ: "ʌ",
|
||||
ɔ: "ɔː",
|
||||
ɜː: "əː",
|
||||
uː: "uː",
|
||||
ɔː: "ɔː",
|
||||
ɛː: "æ",
|
||||
æ: "æ",
|
||||
a: "ɑː",
|
||||
ɶ: "ɑː",
|
||||
ɐ: "ɑː",
|
||||
ɑ: "ɑː",
|
||||
ɒ: "ɒ",
|
||||
ɑː: "ɑː",
|
||||
"◌˞": "",
|
||||
ɚ: "ɪə",
|
||||
ɝ: "ɪə",
|
||||
ɹ̩: "r",
|
||||
eɪ: "eɪ",
|
||||
əʊ: "əʊ",
|
||||
oʊ: "əʊ",
|
||||
aɪ: "aɪ",
|
||||
ɔɪ: "ɔɪ",
|
||||
aʊ: "aʊ",
|
||||
iə: "ɪə",
|
||||
ɜr: "ɪə(r)",
|
||||
ɑr: "ɑː(r)",
|
||||
ɔr: "ɔː(r)",
|
||||
oʊr: "əʊ(r)",
|
||||
oːɹ: "ɔː(r)",
|
||||
ir: "iː(r)",
|
||||
ɪɹ: "ɪ(r)",
|
||||
ɔːɹ: "ɔː(r)",
|
||||
ɑːɹ: "ɑː(r)",
|
||||
ʊɹ: "ʊ(r)",
|
||||
ʊr: "ʊ(r)",
|
||||
ɛr: "æ(r)",
|
||||
ɛɹ: "æ(r)",
|
||||
əl: "ə",
|
||||
aɪɚ: "aɪ",
|
||||
aɪə: "aɪ",
|
||||
};
|
||||
|
||||
@@ -151,6 +151,7 @@
|
||||
"yesterday": "yesterday",
|
||||
"play": "play",
|
||||
"pause": "pause",
|
||||
"switchPlayMode": "switch play mode",
|
||||
"playSingleSegment": "play single segment",
|
||||
"playAllSegments": "play all segments",
|
||||
"playInLoop": "play in loop",
|
||||
@@ -241,9 +242,13 @@
|
||||
"logoutAndRemoveAllPersonalData": "Logout and remove all personal data",
|
||||
"logoutAndRemoveAllPersonalSettings": "Logout and remove all personal settings",
|
||||
"hotkeys": "Hotkeys",
|
||||
"system": "System",
|
||||
"player": "Player",
|
||||
"quitApp": "Quit APP",
|
||||
"openPreferences": "Open preferences",
|
||||
"playOrPause": "Play or pause",
|
||||
"playOrPauseRecording": "Play or pause recording",
|
||||
"startOrStopRecording": "start or stop recording",
|
||||
"about": "About",
|
||||
"currentVersion": "Current version",
|
||||
"checkUpdate": "Check update",
|
||||
@@ -268,8 +273,7 @@
|
||||
"editResource": "edit resource",
|
||||
"deleteResource": "delete resource",
|
||||
"deleteResourceConfirmation": "Are you sure to delete {{name}}?",
|
||||
"transcribeAudioConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
|
||||
"transcribeVideoConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
|
||||
"transcribeMediaConfirmation": "It will remove the old transcription. Are you sure to transcribe {{name}}",
|
||||
"localFile": "local file",
|
||||
"resourcesYouAddedRecently": "resources you added recently",
|
||||
"recentlyAdded": "recently added",
|
||||
@@ -291,6 +295,7 @@
|
||||
"deleteRecording": "delete recording",
|
||||
"deleteRecordingConfirmation": "Are you sure to delete this recording?",
|
||||
"myRecordings": "my recordings",
|
||||
"noRecordingForThisSegmentYet": "No recordings for this segment yet. Press <kbd>R</kbd> to start recording.",
|
||||
"lastYear": "last year",
|
||||
"less": "less",
|
||||
"more": "more",
|
||||
@@ -474,7 +479,19 @@
|
||||
"itMayTakeAWhileToPrepareForTheFirstLoad": "It may take a while to prepare for the first load. Please be patient.",
|
||||
"loadingTranscription": "Loading transcription",
|
||||
"cannotFindMicrophone": "Cannot find microphone",
|
||||
"savingRecording": "Saving recording",
|
||||
"recordingSaved": "Recording saved",
|
||||
"failedToSaveRecording": "Failed to save recording",
|
||||
"speechNotCreatedYet": "Speech not created yet",
|
||||
"goToConversation": "Go to conversation"
|
||||
"goToConversation": "Go to conversation",
|
||||
"mediaInfo": "Media Info",
|
||||
"editRegion": "edit region",
|
||||
"dragRegionBorderToEdit": "Drag region border to edit",
|
||||
"startRecording": "start recording",
|
||||
"stopRecording": "stop recording",
|
||||
"playRecording": "play recording",
|
||||
"clickAnyWordToSelect": "Click any words to select. Press shift to select multiple words.",
|
||||
"currentRegionIsBeingEdited": "Current region is being edited",
|
||||
"compare": "compare",
|
||||
"selectRegion": "select region"
|
||||
}
|
||||
|
||||
@@ -151,6 +151,7 @@
|
||||
"yesterday": "昨天",
|
||||
"play": "播放",
|
||||
"pause": "暂停",
|
||||
"switchPlayMode": "切换播放模式",
|
||||
"playSingleSegment": "播放单句",
|
||||
"playAllSegments": "播放所有",
|
||||
"playInLoop": "单句循环",
|
||||
@@ -241,9 +242,13 @@
|
||||
"logoutAndRemoveAllPersonalData": "退出登录并删除所有个人数据",
|
||||
"logoutAndRemoveAllPersonalSettings": "退出登录并删除所有个人设置选项",
|
||||
"hotkeys": "快捷键",
|
||||
"system": "系统",
|
||||
"player": "播放器",
|
||||
"quitApp": "退出应用",
|
||||
"openPreferences": "打开设置",
|
||||
"playOrPause": "播放/暂停",
|
||||
"playOrPauseRecording": "播放/暂停录音",
|
||||
"startOrStopRecording": "开始/结束录音",
|
||||
"about": "关于",
|
||||
"currentVersion": "当前版本",
|
||||
"checkUpdate": "检查更新",
|
||||
@@ -269,7 +274,6 @@
|
||||
"deleteResource": "删除资源",
|
||||
"deleteResourceConfirmation": "您确定要删除资源 {{name}} 吗?",
|
||||
"transcribeAudioConfirmation": "这将删除原来的语音文本,您确定要重新对 {{name}} 进行语音转文本吗?",
|
||||
"transcribeVideoConfirmation": "这将删除原来的语音文本,您确定要重新对 {{name}} 进行语音转文本吗?",
|
||||
"localFile": "本地文件",
|
||||
"recentlyAdded": "最近添加",
|
||||
"resourcesYouAddedRecently": "最近添加的资源",
|
||||
@@ -291,6 +295,7 @@
|
||||
"deleteRecording": "删除录音",
|
||||
"deleteRecordingConfirmation": "您确定要删除录音吗?",
|
||||
"myRecordings": "我的练习",
|
||||
"noRecordingForThisSegmentYet": "当前句子还没有练习过。按 <kbd>R</kbd> 键开始录音。",
|
||||
"lastYear": "过去一年",
|
||||
"less": "更少",
|
||||
"more": "更多",
|
||||
@@ -473,7 +478,19 @@
|
||||
"itMayTakeAWhileToPrepareForTheFirstLoad": "首次加载可能需要一些时间,请耐心等候",
|
||||
"loadingTranscription": "正在加载语音文本",
|
||||
"cannotFindMicrophone": "无法找到麦克风",
|
||||
"savingRecording": "正在保存录音",
|
||||
"recordingSaved": "录音已保存",
|
||||
"failedToSaveRecording": "保存录音失败",
|
||||
"speechNotCreatedYet": "尚未生成语音",
|
||||
"goToConversation": "前往对话"
|
||||
"goToConversation": "前往对话",
|
||||
"mediaInfo": "资源信息",
|
||||
"editRegion": "修改当前区域",
|
||||
"dragRegionBorderToEdit": "拖动区域边界以修改",
|
||||
"startRecording": "开始录音",
|
||||
"stopRecording": "结束录音",
|
||||
"playRecording": "播放录音",
|
||||
"clickAnyWordToSelect": "点击任意单词可以选中,同时按下 Shift 键可以多选",
|
||||
"currentRegionIsBeingEdited": "当前区域正在编辑中",
|
||||
"compare": "对比",
|
||||
"selectRegion": "选取区域"
|
||||
}
|
||||
|
||||
@@ -81,6 +81,12 @@
|
||||
}
|
||||
}
|
||||
|
||||
@layer components {
|
||||
.scroll {
|
||||
@appply scrollbar-thin scrollbar-thumb-primary scrollbar-track-secondary;
|
||||
}
|
||||
}
|
||||
|
||||
body {
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
@@ -129,6 +129,11 @@ export class Audio extends Model<Audio> {
|
||||
return this.getDataValue("metadata").duration;
|
||||
}
|
||||
|
||||
@Column(DataType.VIRTUAL)
|
||||
get mediaType(): string {
|
||||
return "Audio";
|
||||
}
|
||||
|
||||
get extname(): string {
|
||||
return (
|
||||
this.getDataValue("metadata").extname ||
|
||||
|
||||
@@ -25,6 +25,7 @@ import storage from "@main/storage";
|
||||
import { Client } from "@/api";
|
||||
import { WEB_API_URL } from "@/constants";
|
||||
import { AzureSpeechSdk } from "@main/azure-speech-sdk";
|
||||
import Ffmpeg from "@main/ffmpeg";
|
||||
import camelcaseKeys from "camelcase-keys";
|
||||
|
||||
const logger = log.scope("db/models/recording");
|
||||
@@ -299,10 +300,18 @@ export class Recording extends Model<Recording> {
|
||||
referenceText?: string;
|
||||
}
|
||||
) {
|
||||
const { targetId, targetType, referenceId, referenceText, duration } =
|
||||
params;
|
||||
const { targetId, targetType, referenceId, referenceText } = params;
|
||||
let { duration } = params;
|
||||
|
||||
if (blob.arrayBuffer.byteLength === 0) {
|
||||
throw new Error("Empty recording");
|
||||
}
|
||||
|
||||
const format = blob.type.split("/")[1]?.split(";")?.[0];
|
||||
if (!format) {
|
||||
throw new Error("Unknown recording format");
|
||||
}
|
||||
|
||||
const format = blob.type.split("/")[1];
|
||||
const file = path.join(
|
||||
settings.userDataPath(),
|
||||
"recordings",
|
||||
@@ -310,6 +319,18 @@ export class Recording extends Model<Recording> {
|
||||
);
|
||||
await fs.outputFile(file, Buffer.from(blob.arrayBuffer));
|
||||
|
||||
try {
|
||||
const ffmpeg = new Ffmpeg();
|
||||
const metadata = await ffmpeg.generateMetadata(file);
|
||||
duration = Math.floor(metadata.format.duration * 1000);
|
||||
} catch (err) {
|
||||
logger.error(err);
|
||||
}
|
||||
|
||||
if (duration === 0) {
|
||||
throw new Error("Failed to get duration of the recording");
|
||||
}
|
||||
|
||||
const md5 = await hashFile(file, { algo: "md5" });
|
||||
const filename = `${md5}.${format}`;
|
||||
fs.renameSync(file, path.join(path.dirname(file), filename));
|
||||
|
||||
@@ -129,6 +129,11 @@ export class Video extends Model<Video> {
|
||||
return this.getDataValue("metadata").duration;
|
||||
}
|
||||
|
||||
@Column(DataType.VIRTUAL)
|
||||
get mediaType(): string {
|
||||
return "Video";
|
||||
}
|
||||
|
||||
get extname(): string {
|
||||
return (
|
||||
this.getDataValue("metadata").extname ||
|
||||
|
||||
68
enjoy/src/main/echogarden.ts
Normal file
68
enjoy/src/main/echogarden.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import { ipcMain } from "electron";
|
||||
import { align } from "echogarden/dist/api/API.js";
|
||||
import { AlignmentOptions } from "echogarden/dist/api/API";
|
||||
import { AudioSourceParam } from "echogarden/dist/audio/AudioUtilities";
|
||||
import path from "path";
|
||||
import log from "@main/logger";
|
||||
import url from "url";
|
||||
import settings from "@main/settings";
|
||||
import fs from "fs-extra";
|
||||
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
/*
|
||||
* sample files will be in /app.asar.unpacked instead of /app.asar
|
||||
*/
|
||||
const __dirname = path
|
||||
.dirname(__filename)
|
||||
.replace("app.asar", "app.asar.unpacked");
|
||||
|
||||
const logger = log.scope("echogarden");
|
||||
class EchogardenWrapper {
|
||||
public align: typeof align;
|
||||
|
||||
constructor() {
|
||||
this.align = align;
|
||||
}
|
||||
|
||||
async check() {
|
||||
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
|
||||
try {
|
||||
const result = await this.align(
|
||||
sampleFile,
|
||||
"And so my fellow Americans ask not what your country can do for you",
|
||||
{}
|
||||
);
|
||||
logger.info(result);
|
||||
fs.writeJsonSync(
|
||||
path.join(settings.cachePath(), "echogarden-check.json"),
|
||||
result,
|
||||
{ spaces: 2 }
|
||||
);
|
||||
|
||||
return true;
|
||||
} catch (e) {
|
||||
logger.error(e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
registerIpcHandlers() {
|
||||
ipcMain.handle(
|
||||
"echogarden-align",
|
||||
async (
|
||||
_event,
|
||||
input: AudioSourceParam,
|
||||
transcript: string,
|
||||
options: AlignmentOptions
|
||||
) => {
|
||||
return this.align(input, transcript, options);
|
||||
}
|
||||
);
|
||||
|
||||
ipcMain.handle("echogarden-check", async (_event) => {
|
||||
return this.check();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export default new EchogardenWrapper();
|
||||
@@ -7,6 +7,7 @@ import path from "path";
|
||||
import fs from "fs-extra";
|
||||
import settings from "./settings";
|
||||
import url from "url";
|
||||
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
|
||||
|
||||
/*
|
||||
* ffmpeg and ffprobe bin file will be in /app.asar.unpacked instead of /app.asar
|
||||
@@ -19,6 +20,8 @@ const __dirname = path
|
||||
.dirname(__filename)
|
||||
.replace("app.asar", "app.asar.unpacked");
|
||||
|
||||
process.env.FFMPEG_PATH = ffmpegPath;
|
||||
|
||||
const logger = log.scope("ffmpeg");
|
||||
export default class FfmpegWrapper {
|
||||
checkCommand(): Promise<boolean> {
|
||||
@@ -211,7 +214,7 @@ export default class FfmpegWrapper {
|
||||
);
|
||||
}
|
||||
|
||||
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
|
||||
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
|
||||
|
||||
const ffmpeg = Ffmpeg();
|
||||
return new Promise((resolve, reject) => {
|
||||
|
||||
@@ -21,6 +21,7 @@ import { AudibleProvider, TedProvider } from "@main/providers";
|
||||
import Ffmpeg from "@main/ffmpeg";
|
||||
import { Waveform } from "./waveform";
|
||||
import url from "url";
|
||||
import echogarden from "./echogarden";
|
||||
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
@@ -49,6 +50,9 @@ main.init = () => {
|
||||
// Prepare Settings
|
||||
settings.registerIpcHandlers();
|
||||
|
||||
// echogarden
|
||||
echogarden.registerIpcHandlers();
|
||||
|
||||
// Whisper
|
||||
whisper.registerIpcHandlers();
|
||||
|
||||
@@ -433,10 +437,11 @@ ${log}
|
||||
// Create the browser window.
|
||||
const mainWindow = new BrowserWindow({
|
||||
icon: "./assets/icon.png",
|
||||
width: 1600,
|
||||
height: 1200,
|
||||
minWidth: 1024,
|
||||
minHeight: 768,
|
||||
width: 1920,
|
||||
height: 1080,
|
||||
minWidth: 1440,
|
||||
minHeight: 900,
|
||||
fullscreen: true,
|
||||
webPreferences: {
|
||||
preload: path.join(__dirname, "preload.js"),
|
||||
},
|
||||
|
||||
@@ -350,6 +350,14 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", {
|
||||
return ipcRenderer.invoke("audiowaveform-frequencies", file);
|
||||
},
|
||||
},
|
||||
echogarden: {
|
||||
align: (input: string, transcript: string, options: any) => {
|
||||
return ipcRenderer.invoke("echogarden-align", input, transcript, options);
|
||||
},
|
||||
check: () => {
|
||||
return ipcRenderer.invoke("echogarden-check");
|
||||
},
|
||||
},
|
||||
whisper: {
|
||||
config: () => {
|
||||
return ipcRenderer.invoke("whisper-config");
|
||||
|
||||
@@ -1,394 +0,0 @@
|
||||
import { useEffect, useState, useContext } from "react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
AISettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import {
|
||||
LoaderSpin,
|
||||
RecordingsList,
|
||||
PagePlaceholder,
|
||||
MediaPlayer,
|
||||
MediaTranscription,
|
||||
} from "@renderer/components";
|
||||
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogHeader,
|
||||
AlertDialogDescription,
|
||||
AlertDialogTitle,
|
||||
AlertDialogContent,
|
||||
AlertDialogFooter,
|
||||
AlertDialogCancel,
|
||||
Button,
|
||||
PingPoint,
|
||||
Progress,
|
||||
ScrollArea,
|
||||
toast,
|
||||
} from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
import { useTranscribe } from "@renderer/hooks";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
|
||||
export const AudioDetail = (props: { id?: string; md5?: string }) => {
|
||||
const navigate = useNavigate();
|
||||
|
||||
const { id, md5 } = props;
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const [audio, setAudio] = useState<AudioType | null>(null);
|
||||
const [transcription, setTranscription] = useState<TranscriptionType>(null);
|
||||
const [sharing, setSharing] = useState<boolean>(false);
|
||||
|
||||
// Transcription controls
|
||||
const [transcribing, setTranscribing] = useState<boolean>(false);
|
||||
const { transcribe } = useTranscribe();
|
||||
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
|
||||
|
||||
// Player controls
|
||||
const [initialized, setInitialized] = useState<boolean>(false);
|
||||
const [currentTime, setCurrentTime] = useState<number>(0);
|
||||
const [seek, setSeek] = useState<{
|
||||
seekTo: number;
|
||||
timestamp: number;
|
||||
}>();
|
||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
|
||||
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
|
||||
const [playBackRate, setPlaybackRate] = useState<number>(1);
|
||||
const [displayInlineCaption, setDisplayInlineCaption] =
|
||||
useState<boolean>(true);
|
||||
|
||||
const onTransactionUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
if (model === "Transcription" && action === "update") {
|
||||
setTranscription(record);
|
||||
}
|
||||
};
|
||||
|
||||
const findOrCreateTranscription = async () => {
|
||||
if (!audio) return;
|
||||
if (transcription) return;
|
||||
|
||||
return EnjoyApp.transcriptions
|
||||
.findOrCreate({
|
||||
targetId: audio.id,
|
||||
targetType: "Audio",
|
||||
})
|
||||
.then((transcription) => {
|
||||
setTranscription(transcription);
|
||||
})
|
||||
.catch((err) => {
|
||||
toast.error(err.message);
|
||||
});
|
||||
};
|
||||
|
||||
const generateTranscription = async () => {
|
||||
if (transcribing) return;
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
setTranscribing(true);
|
||||
setTranscribingProgress(0);
|
||||
try {
|
||||
const { engine, model, result } = await transcribe(audio.src, {
|
||||
targetId: audio.id,
|
||||
targetType: "Audio",
|
||||
});
|
||||
await EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result,
|
||||
engine,
|
||||
model,
|
||||
});
|
||||
} catch (err) {
|
||||
toast.error(err.message);
|
||||
}
|
||||
|
||||
setTranscribing(false);
|
||||
};
|
||||
|
||||
const findTranscriptionFromWebApi = async () => {
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
const res = await webApi.transcriptions({
|
||||
targetMd5: audio.md5,
|
||||
});
|
||||
|
||||
const transcript = (res?.transcriptions || []).filter((t) =>
|
||||
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
|
||||
)?.[0];
|
||||
|
||||
if (!transcript) {
|
||||
throw new Error("Transcription not found");
|
||||
}
|
||||
|
||||
await EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result: transcript.result,
|
||||
engine: transcript.engine,
|
||||
model: transcript.model,
|
||||
});
|
||||
};
|
||||
|
||||
const findOrGenerateTranscription = async () => {
|
||||
try {
|
||||
await findTranscriptionFromWebApi();
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
await generateTranscription();
|
||||
}
|
||||
};
|
||||
|
||||
const handleShare = async () => {
|
||||
if (!audio.source && !audio.isUploaded) {
|
||||
try {
|
||||
await EnjoyApp.audios.upload(audio.id);
|
||||
} catch (err) {
|
||||
toast.error(t("shareFailed"), {
|
||||
description: err.message,
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
webApi
|
||||
.createPost({
|
||||
targetType: "Audio",
|
||||
targetId: audio.id,
|
||||
})
|
||||
.then(() => {
|
||||
toast.success(t("sharedSuccessfully"), {
|
||||
description: t("sharedAudio"),
|
||||
});
|
||||
})
|
||||
.catch((err) => {
|
||||
toast.error(t("shareFailed"), {
|
||||
description: err.message,
|
||||
});
|
||||
});
|
||||
setSharing(false);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const where = id ? { id } : { md5 };
|
||||
EnjoyApp.audios.findOne(where).then((audio) => {
|
||||
if (audio) {
|
||||
setAudio(audio);
|
||||
} else {
|
||||
toast.error(t("models.audio.notFound"));
|
||||
}
|
||||
});
|
||||
}, [id, md5]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!audio) return;
|
||||
|
||||
findOrCreateTranscription();
|
||||
}, [audio]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!initialized) return;
|
||||
if (!transcription) return;
|
||||
|
||||
addDblistener(onTransactionUpdate);
|
||||
|
||||
if (transcription?.state == "pending") {
|
||||
findOrGenerateTranscription();
|
||||
}
|
||||
|
||||
if (whisperConfig.service === "local") {
|
||||
EnjoyApp.whisper.onProgress((_, p: number) => {
|
||||
if (p > 100) p = 100;
|
||||
setTranscribingProgress(p);
|
||||
});
|
||||
}
|
||||
|
||||
return () => {
|
||||
removeDbListener(onTransactionUpdate);
|
||||
EnjoyApp.whisper.removeProgressListeners();
|
||||
};
|
||||
}, [md5, transcription, initialized]);
|
||||
|
||||
if (!audio) {
|
||||
return <LoaderSpin />;
|
||||
}
|
||||
|
||||
if (!audio.src) {
|
||||
return (
|
||||
<PagePlaceholder placeholder="invalid" extra="cannot find play source" />
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="relative" data-testid="audio-detail">
|
||||
<div className={`grid grid-cols-7 gap-4 ${initialized ? "" : "blur-sm"}`}>
|
||||
<div className="col-span-5 h-[calc(100vh-6.5rem)] flex flex-col">
|
||||
<MediaPlayer
|
||||
mediaId={audio.id}
|
||||
mediaType="Audio"
|
||||
mediaUrl={audio.src}
|
||||
mediaMd5={audio.md5}
|
||||
transcription={transcription}
|
||||
currentTime={currentTime}
|
||||
setCurrentTime={setCurrentTime}
|
||||
currentSegmentIndex={currentSegmentIndex}
|
||||
setCurrentSegmentIndex={setCurrentSegmentIndex}
|
||||
recordButtonVisible={true}
|
||||
seek={seek}
|
||||
initialized={initialized}
|
||||
setInitialized={setInitialized}
|
||||
zoomRatio={zoomRatio}
|
||||
setZoomRatio={setZoomRatio}
|
||||
isPlaying={isPlaying}
|
||||
setIsPlaying={setIsPlaying}
|
||||
playMode={playMode}
|
||||
setPlayMode={setPlayMode}
|
||||
playBackRate={playBackRate}
|
||||
setPlaybackRate={setPlaybackRate}
|
||||
displayInlineCaption={displayInlineCaption}
|
||||
setDisplayInlineCaption={setDisplayInlineCaption}
|
||||
onShare={() => setSharing(true)}
|
||||
onDecoded={({ duration, sampleRate }) => {
|
||||
if (audio.duration) return;
|
||||
|
||||
EnjoyApp.audios.update(audio.id, {
|
||||
metadata: Object.assign({}, audio.metadata, {
|
||||
duration,
|
||||
sampleRate,
|
||||
}),
|
||||
});
|
||||
}}
|
||||
/>
|
||||
|
||||
<ScrollArea className={`flex-1 relative bg-muted`}>
|
||||
<RecordingsList
|
||||
key={`recordings-list-${audio.id}-${currentSegmentIndex}`}
|
||||
targetId={audio.id}
|
||||
targetType="Audio"
|
||||
referenceText={transcription?.result?.[currentSegmentIndex]?.text}
|
||||
referenceId={currentSegmentIndex}
|
||||
/>
|
||||
</ScrollArea>
|
||||
</div>
|
||||
|
||||
<div className="col-span-2 h-[calc(100vh-6.5rem)]">
|
||||
<MediaTranscription
|
||||
mediaId={audio.id}
|
||||
mediaType="Audio"
|
||||
mediaName={audio.name}
|
||||
transcription={transcription}
|
||||
transcribing={transcribing}
|
||||
progress={transcribingProgress}
|
||||
transcribe={generateTranscription}
|
||||
currentSegmentIndex={currentSegmentIndex}
|
||||
onSelectSegment={(index) => {
|
||||
if (currentSegmentIndex === index) return;
|
||||
|
||||
const segment = transcription?.result?.[index];
|
||||
if (!segment) return;
|
||||
|
||||
if (playMode === "loop" && isPlaying) setIsPlaying(false);
|
||||
setSeek({
|
||||
seekTo: segment.offsets.from / 1000,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<AlertDialog open={sharing} onOpenChange={(value) => setSharing(value)}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("shareAudio")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("areYouSureToShareThisAudioToCommunity")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<Button variant="default" onClick={handleShare}>
|
||||
{t("share")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
|
||||
{/* Show loading progress until waveform is decoded & transcribed */}
|
||||
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
|
||||
<div className="py-4">
|
||||
{initialized ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("waveformIsDecoded")}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("decodingWaveform")}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!transcription ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("loadingTranscription")}</span>
|
||||
</div>
|
||||
) : transcription.result ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("transcribedSuccessfully")}</span>
|
||||
</div>
|
||||
) : transcribing ? (
|
||||
<div className="">
|
||||
<div className="flex items-center space-x-4 mb-2">
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<span>{t("transcribing")}</span>
|
||||
</div>
|
||||
{whisperConfig.service === "local" && (
|
||||
<Progress value={transcribingProgress} />
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center space-x-4">
|
||||
<PingPoint colorClassName="bg-muted" />
|
||||
<div className="inline">
|
||||
<span>{t("notTranscribedYet")}</span>
|
||||
{initialized && (
|
||||
<Button
|
||||
onClick={generateTranscription}
|
||||
className="ml-4"
|
||||
size="sm"
|
||||
>
|
||||
{t("transcribe")}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<AlertDialogFooter>
|
||||
<Button variant="secondary" onClick={() => navigate(-1)}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
72
enjoy/src/renderer/components/audios/audio-player.tsx
Normal file
72
enjoy/src/renderer/components/audios/audio-player.tsx
Normal file
@@ -0,0 +1,72 @@
|
||||
import { useEffect, useContext, useRef } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
MediaLoadingModal,
|
||||
MediaCaption,
|
||||
MediaPlayerControls,
|
||||
MediaTabs,
|
||||
MediaCurrentRecording,
|
||||
} from "@renderer/components";
|
||||
import { formatDuration } from "@renderer/lib/utils";
|
||||
import { useAudio } from "@renderer/hooks";
|
||||
|
||||
export const AudioPlayer = (props: { id?: string; md5?: string }) => {
|
||||
const { id, md5 } = props;
|
||||
const { media, currentTime, setMedia, setRef } = useContext(
|
||||
MediaPlayerProviderContext
|
||||
);
|
||||
const { audio } = useAudio({ id, md5 });
|
||||
const ref = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!audio) return;
|
||||
|
||||
setMedia(audio);
|
||||
}, [audio]);
|
||||
|
||||
useEffect(() => {
|
||||
setRef(ref);
|
||||
}, [ref]);
|
||||
|
||||
return (
|
||||
<div data-testid="audio-player">
|
||||
<div className="h-[calc(100vh-37.5rem)] mb-4">
|
||||
<div className="grid grid-cols-3 gap-4 px-6 h-full">
|
||||
<div className="col-span-1 rounded-lg border shadow-lg h-[calc(100vh-37.5rem)]">
|
||||
<MediaTabs />
|
||||
</div>
|
||||
<div className="col-span-2 h-[calc(100vh-37.5rem)]">
|
||||
<MediaCaption />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="h-[33rem] flex flex-col">
|
||||
<div className="h-[13rem] py-2 px-6 mb-4">
|
||||
<MediaCurrentRecording />
|
||||
</div>
|
||||
|
||||
<div className="w-full h-[13rem] px-6 py-2 mb-4">
|
||||
<div className="border rounded-xl shadow-lg relative">
|
||||
<div data-testid="media-player-container" ref={ref} />
|
||||
<div className="absolute right-2 top-1">
|
||||
<span className="text-sm">
|
||||
{formatDuration(currentTime || 0)}
|
||||
</span>
|
||||
<span className="mx-1">/</span>
|
||||
<span className="text-sm">
|
||||
{formatDuration(media?.duration || 0)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="w-full bg-background z-10 shadow-xl">
|
||||
<MediaPlayerControls />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<MediaLoadingModal />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@@ -1,8 +1,9 @@
|
||||
export * from "./audios-table";
|
||||
export * from "./audio-edit-form";
|
||||
export * from "./audio-detail";
|
||||
|
||||
export * from "./audios-component";
|
||||
export * from "./audible-books-segment";
|
||||
export * from "./audios-segment";
|
||||
export * from "./audio-card";
|
||||
|
||||
export * from "./audio-player";
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useEffect, useState, useRef, useCallback } from "react";
|
||||
import { PitchContour } from "@renderer/components";
|
||||
import { renderPitchContour } from "@renderer/lib/utils";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { Button, Skeleton } from "@renderer/components/ui";
|
||||
import { PlayIcon, PauseIcon } from "lucide-react";
|
||||
@@ -59,17 +60,25 @@ export const SpeechPlayer = (props: {
|
||||
wavesurfer.on("pause", () => {
|
||||
setIsPlaying(false);
|
||||
}),
|
||||
wavesurfer.on("decode", () => {
|
||||
wavesurfer.on("ready", () => {
|
||||
setDuration(wavesurfer.getDuration());
|
||||
const peaks = wavesurfer.getDecodedData().getChannelData(0);
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
wavesurfer.renderer.getWrapper().appendChild(
|
||||
PitchContour({
|
||||
peaks,
|
||||
sampleRate,
|
||||
height,
|
||||
})
|
||||
);
|
||||
const data = extractFrequencies({ peaks, sampleRate });
|
||||
setTimeout(() => {
|
||||
renderPitchContour({
|
||||
wrapper: wavesurfer.getWrapper(),
|
||||
canvasId: `pitch-contour-${speech.id}-canvas`,
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
pointRadius: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
}, 1000);
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -25,7 +25,6 @@ export * from "./login-form";
|
||||
export * from "./choose-library-path-input";
|
||||
export * from "./whisper-model-options";
|
||||
|
||||
export * from "./pitch-contour";
|
||||
export * from "./reset-all-button";
|
||||
|
||||
export * from "./loader-spin";
|
||||
|
||||
@@ -1,5 +1,11 @@
|
||||
export * from "./add-media-button";
|
||||
export * from "./media-player";
|
||||
export * from "./media-player-controls";
|
||||
export * from "./media-caption";
|
||||
export * from "./media-info-panel";
|
||||
export * from "./media-recordings";
|
||||
export * from "./media-current-recording";
|
||||
export * from "./media-recorder";
|
||||
export * from "./media-transcription";
|
||||
export * from "./media-player";
|
||||
export * from "./media-tabs";
|
||||
export * from "./media-loading-modal";
|
||||
export * from "./add-media-button";
|
||||
|
||||
@@ -1,91 +1,75 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import { cn } from "@renderer/lib/utils";
|
||||
import { useEffect, useState, useContext } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import cloneDeep from "lodash/cloneDeep";
|
||||
import {
|
||||
Button,
|
||||
DropdownMenu,
|
||||
DropdownMenuContent,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
Popover,
|
||||
PopoverContent,
|
||||
PopoverAnchor,
|
||||
toast,
|
||||
ScrollArea,
|
||||
Separator,
|
||||
} from "@renderer/components/ui";
|
||||
import { LookupResult } from "@renderer/components";
|
||||
import {
|
||||
ChevronDownIcon,
|
||||
LanguagesIcon,
|
||||
PlayIcon,
|
||||
LoaderIcon,
|
||||
SpeechIcon,
|
||||
} from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { LanguagesIcon, SpeechIcon } from "lucide-react";
|
||||
import { Timeline } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
import { IPA_MAPPING } from "@/constants";
|
||||
import { useAiCommand } from "@renderer/hooks";
|
||||
import { LoaderIcon } from "lucide-react";
|
||||
|
||||
export const MediaCaption = (props: {
|
||||
mediaId: string;
|
||||
mediaType: string;
|
||||
currentTime: number;
|
||||
transcription: TranscriptionResultSegmentGroupType;
|
||||
onSeek?: (time: number) => void;
|
||||
className?: string;
|
||||
isPlaying: boolean;
|
||||
setIsPlaying: (isPlaying: boolean) => void;
|
||||
}) => {
|
||||
export const MediaCaption = () => {
|
||||
const {
|
||||
transcription,
|
||||
wavesurfer,
|
||||
currentSegmentIndex,
|
||||
currentTime,
|
||||
onSeek,
|
||||
className,
|
||||
isPlaying,
|
||||
setIsPlaying,
|
||||
} = props;
|
||||
transcription,
|
||||
regions,
|
||||
activeRegion,
|
||||
setActiveRegion,
|
||||
editingRegion,
|
||||
setEditingRegion,
|
||||
setTranscriptionDraft,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const [activeIndex, setActiveIndex] = useState<number>(0);
|
||||
const [selected, setSelected] = useState<{
|
||||
index: number;
|
||||
word: string;
|
||||
position?: {
|
||||
top: number;
|
||||
left: number;
|
||||
};
|
||||
}>();
|
||||
const [selectedIndices, setSelectedIndices] = useState<number[]>([]);
|
||||
const [multiSelecting, setMultiSelecting] = useState<boolean>(false);
|
||||
|
||||
const [displayIpa, setDisplayIpa] = useState<boolean>(true);
|
||||
|
||||
const [translation, setTranslation] = useState<string>();
|
||||
const [translating, setTranslating] = useState<boolean>(false);
|
||||
const [displayTranslation, setDisplayTranslation] = useState<boolean>(false);
|
||||
|
||||
const [ipa, setIpa] = useState<{ word?: string; ipa?: string }[]>([]);
|
||||
const [ipaGenerating, setIpaGenerating] = useState<boolean>(false);
|
||||
const [displayIpa, setDisplayIpa] = useState<boolean>(false);
|
||||
const [lookingUp, setLookingUp] = useState<boolean>(false);
|
||||
const [lookupResult, setLookupResult] = useState<LookupType>();
|
||||
|
||||
const { translate, pronounce } = useAiCommand();
|
||||
const caption = (transcription?.result?.timeline as Timeline)?.[
|
||||
currentSegmentIndex
|
||||
];
|
||||
|
||||
const toggleIpa = async () => {
|
||||
if (ipaGenerating) return;
|
||||
const { translate, lookupWord } = useAiCommand();
|
||||
|
||||
if (ipa.length > 0) {
|
||||
setDisplayIpa(!displayIpa);
|
||||
return;
|
||||
}
|
||||
const lookup = () => {
|
||||
if (selectedIndices.length === 0) return;
|
||||
|
||||
setIpaGenerating(true);
|
||||
toast.promise(
|
||||
pronounce(transcription.text)
|
||||
.then((words) => {
|
||||
if (words?.length > 0) {
|
||||
setIpa(words);
|
||||
setDisplayIpa(true);
|
||||
}
|
||||
})
|
||||
.finally(() => {
|
||||
setIpaGenerating(false);
|
||||
}),
|
||||
{
|
||||
loading: t("generatingIpa"),
|
||||
success: t("generatedIpaSuccessfully"),
|
||||
error: (err) => t("generatingIpaFailed", { error: err.message }),
|
||||
position: "bottom-right",
|
||||
}
|
||||
);
|
||||
const word = selectedIndices
|
||||
.map((index) => caption.timeline[index].text)
|
||||
.join(" ");
|
||||
setLookingUp(true);
|
||||
lookupWord({
|
||||
word,
|
||||
context: caption.text,
|
||||
sourceId: transcription.targetId,
|
||||
sourceType: transcription.targetType,
|
||||
})
|
||||
.then((lookup) => {
|
||||
if (lookup?.meaning) {
|
||||
setLookupResult(lookup);
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
toast.error(error.message);
|
||||
})
|
||||
.finally(() => {
|
||||
setLookingUp(false);
|
||||
});
|
||||
};
|
||||
|
||||
const toggleTranslation = async () => {
|
||||
@@ -97,7 +81,7 @@ export const MediaCaption = (props: {
|
||||
}
|
||||
|
||||
toast.promise(
|
||||
translate(transcription.text)
|
||||
translate(caption.text)
|
||||
.then((result) => {
|
||||
if (result) {
|
||||
setTranslation(result);
|
||||
@@ -116,177 +100,370 @@ export const MediaCaption = (props: {
|
||||
);
|
||||
};
|
||||
|
||||
const toggleMultiSelect = (event: KeyboardEvent) => {
|
||||
setMultiSelecting(event.shiftKey && event.type === "keydown");
|
||||
};
|
||||
|
||||
const toggleRegion = (index: number) => {
|
||||
if (!activeRegion) return;
|
||||
if (editingRegion) {
|
||||
toast.warning(t("currentRegionIsBeingEdited"));
|
||||
return;
|
||||
}
|
||||
|
||||
const word = caption.timeline[index];
|
||||
if (!word) return;
|
||||
|
||||
const start = word.startTime;
|
||||
const end = word.endTime;
|
||||
const regionStart = activeRegion.start;
|
||||
const regionEnd = activeRegion.end;
|
||||
|
||||
if (activeRegion.id.startsWith("word-region")) {
|
||||
if (start >= regionStart && end <= regionEnd) {
|
||||
setActiveRegion(
|
||||
regions.getRegions().find((r) => r.id.startsWith("segment-region"))
|
||||
);
|
||||
} else if (multiSelecting) {
|
||||
const region = regions.addRegion({
|
||||
id: `word-region-${index}`,
|
||||
start: Math.min(start, regionStart),
|
||||
end: Math.max(end, regionEnd),
|
||||
color: "#fb6f9233",
|
||||
drag: false,
|
||||
resize: editingRegion,
|
||||
});
|
||||
|
||||
setActiveRegion(region);
|
||||
} else {
|
||||
const region = regions.addRegion({
|
||||
id: `word-region-${index}`,
|
||||
start,
|
||||
end,
|
||||
color: "#fb6f9233",
|
||||
drag: false,
|
||||
resize: editingRegion,
|
||||
});
|
||||
|
||||
setActiveRegion(region);
|
||||
}
|
||||
activeRegion.remove();
|
||||
} else {
|
||||
const region = regions.addRegion({
|
||||
id: `word-region-${index}`,
|
||||
start,
|
||||
end,
|
||||
color: "#fb6f9233",
|
||||
drag: false,
|
||||
resize: false,
|
||||
});
|
||||
|
||||
setActiveRegion(region);
|
||||
}
|
||||
};
|
||||
|
||||
const markPhoneRegions = () => {
|
||||
const phoneRegions = regions
|
||||
.getRegions()
|
||||
.filter((r) => r.id.startsWith("phone-region"));
|
||||
if (phoneRegions.length > 0) {
|
||||
phoneRegions.forEach((r) => {
|
||||
r.remove();
|
||||
r.unAll();
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (!activeRegion) return;
|
||||
if (!activeRegion.id.startsWith("word-region")) return;
|
||||
if (!selectedIndices) return;
|
||||
|
||||
selectedIndices.forEach((index) => {
|
||||
const word = caption.timeline[index];
|
||||
|
||||
word.timeline.forEach((token) => {
|
||||
token.timeline.forEach((phone) => {
|
||||
const region = regions.addRegion({
|
||||
id: `phone-region-${index}`,
|
||||
start: phone.startTime,
|
||||
end: phone.endTime,
|
||||
color: "#efefefef",
|
||||
drag: false,
|
||||
resize: editingRegion,
|
||||
});
|
||||
region.on("click", () => {
|
||||
region.play();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcription) return;
|
||||
const time = Math.round(currentTime * 1000);
|
||||
const index = transcription.segments.findIndex(
|
||||
(w) => time >= w.offsets.from && time < w.offsets.to
|
||||
if (!caption) return;
|
||||
|
||||
const index = caption.timeline.findIndex(
|
||||
(w) => currentTime >= w.startTime && currentTime < w.endTime
|
||||
);
|
||||
|
||||
if (index !== activeIndex) {
|
||||
setActiveIndex(index);
|
||||
}
|
||||
}, [currentTime, transcription]);
|
||||
}, [currentTime, caption]);
|
||||
|
||||
if (!transcription) return null;
|
||||
if (Math.round(currentTime * 1000) < transcription.offsets.from) return null;
|
||||
useEffect(() => {
|
||||
if (!caption?.timeline) return;
|
||||
if (!activeRegion) return;
|
||||
|
||||
if (!activeRegion.id.startsWith("word-region")) {
|
||||
setSelectedIndices([]);
|
||||
return;
|
||||
}
|
||||
|
||||
const indices: number[] = [];
|
||||
caption.timeline.forEach((w, index) => {
|
||||
if (
|
||||
w.startTime >= activeRegion.start &&
|
||||
(w.endTime <= activeRegion.end ||
|
||||
// The last word's end time may be a little greater than the duration of the audio in somehow.
|
||||
w.endTime > wavesurfer.getDuration())
|
||||
) {
|
||||
indices.push(index);
|
||||
}
|
||||
});
|
||||
|
||||
if (indices.length > 0) {
|
||||
const el = document.getElementById(
|
||||
`word-${currentSegmentIndex}-${indices[0]}`
|
||||
);
|
||||
}
|
||||
setSelectedIndices(indices);
|
||||
setLookupResult(undefined);
|
||||
}, [caption, activeRegion]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!activeRegion) return;
|
||||
if (!activeRegion.id.startsWith("word-region")) return;
|
||||
|
||||
const region = regions.addRegion({
|
||||
id: `word-region-${selectedIndices.join("-")}`,
|
||||
start: activeRegion.start,
|
||||
end: activeRegion.end,
|
||||
color: "#fb6f9233",
|
||||
drag: false,
|
||||
resize: editingRegion,
|
||||
});
|
||||
|
||||
activeRegion.remove();
|
||||
setActiveRegion(region);
|
||||
|
||||
const subscriptions = [
|
||||
regions.on("region-updated", (region) => {
|
||||
if (!region.id.startsWith("word-region")) return;
|
||||
|
||||
const draft = cloneDeep(transcription.result);
|
||||
const draftCaption = draft.timeline[currentSegmentIndex];
|
||||
|
||||
const firstIndex = selectedIndices[0];
|
||||
const lastIndex = selectedIndices[selectedIndices.length - 1];
|
||||
const firstWord = draftCaption.timeline[firstIndex];
|
||||
const lastWord = draftCaption.timeline[lastIndex];
|
||||
|
||||
// If no word is selected somehow, then ignore the update.
|
||||
if (!firstWord || !lastWord) {
|
||||
setEditingRegion(false);
|
||||
return;
|
||||
}
|
||||
|
||||
firstWord.startTime = region.start;
|
||||
lastWord.endTime = region.end;
|
||||
|
||||
/* Update the timeline of the previous and next words
|
||||
* It happens only when regions are intersecting with the previous or next word.
|
||||
* It will ignore if the previous/next word's position changed in timestamps.
|
||||
*/
|
||||
const prevWord = draftCaption.timeline[firstIndex - 1];
|
||||
const nextWord = draftCaption.timeline[lastIndex + 1];
|
||||
if (
|
||||
prevWord &&
|
||||
prevWord.endTime > region.start &&
|
||||
prevWord.startTime < region.start
|
||||
) {
|
||||
prevWord.endTime = region.start;
|
||||
}
|
||||
if (
|
||||
nextWord &&
|
||||
nextWord.startTime < region.end &&
|
||||
nextWord.endTime > region.end
|
||||
) {
|
||||
nextWord.startTime = region.end;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the last word is the last word of the segment, then update the segment's end time.
|
||||
*/
|
||||
if (lastIndex === draftCaption.timeline.length - 1) {
|
||||
draftCaption.endTime = region.end;
|
||||
}
|
||||
|
||||
setTranscriptionDraft(draft);
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [editingRegion]);
|
||||
|
||||
useEffect(() => {
|
||||
setTranslation(undefined);
|
||||
setDisplayTranslation(false);
|
||||
}, [caption]);
|
||||
|
||||
useEffect(() => {
|
||||
document.addEventListener("keydown", (event: KeyboardEvent) =>
|
||||
toggleMultiSelect(event)
|
||||
);
|
||||
document.addEventListener("keyup", (event: KeyboardEvent) =>
|
||||
toggleMultiSelect(event)
|
||||
);
|
||||
|
||||
return () => {
|
||||
document.removeEventListener("keydown", toggleMultiSelect);
|
||||
document.removeEventListener("keyup", toggleMultiSelect);
|
||||
};
|
||||
}, []);
|
||||
|
||||
if (!caption) return null;
|
||||
|
||||
return (
|
||||
<div className={cn("relative px-4 py-2 text-lg", className)}>
|
||||
<div className="flex items-start space-x-4">
|
||||
<div className="flex-1">
|
||||
<div className="flex flex-wrap">
|
||||
{(transcription.segments || []).map((w, index) => (
|
||||
<div
|
||||
key={index}
|
||||
className={`mr-1 cursor-pointer hover:bg-red-500/10 ${
|
||||
index === activeIndex ? "text-red-500" : ""
|
||||
}`}
|
||||
onClick={(event) => {
|
||||
setSelected({
|
||||
index,
|
||||
word: w.text,
|
||||
position: {
|
||||
top:
|
||||
event.currentTarget.offsetTop +
|
||||
event.currentTarget.offsetHeight,
|
||||
left: event.currentTarget.offsetLeft,
|
||||
},
|
||||
});
|
||||
|
||||
setIsPlaying(false);
|
||||
if (onSeek) onSeek(w.offsets.from / 1000);
|
||||
}}
|
||||
>
|
||||
<div>{w.text}</div>
|
||||
{displayIpa &&
|
||||
ipa.find(
|
||||
(i) =>
|
||||
i.word.trim() === w.text.replace(/[\.",?!]/g, "").trim()
|
||||
)?.ipa && (
|
||||
<div className="text-sm text-foreground/70 font-serif">
|
||||
{
|
||||
ipa.find(
|
||||
(i) =>
|
||||
i.word.trim() ===
|
||||
w.text.replace(/[\.",?!]/g, "").trim()
|
||||
)?.ipa
|
||||
}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
{displayTranslation && translation && (
|
||||
<div className="select-text py-2 text-sm text-foreground/70">
|
||||
{translation}
|
||||
</div>
|
||||
)}
|
||||
<div className="h-full flex justify-between space-x-4">
|
||||
<ScrollArea className="flex-1 px-6 py-4 font-serif h-full border shadow-lg rounded-lg">
|
||||
<div className="flex flex-wrap mb-4">
|
||||
{/* use the words splitted by caption text if it is matched with the timeline length, otherwise use the timeline */}
|
||||
{caption.text.split(" ").length === caption.timeline.length
|
||||
? caption.text.split(" ").map((word, index) => (
|
||||
<div
|
||||
key={index}
|
||||
id={`word-${currentSegmentIndex}-${index}`}
|
||||
className={`pr-2 pb-2 cursor-pointer hover:bg-red-500/10 ${
|
||||
index === activeIndex ? "text-red-500" : ""
|
||||
} ${selectedIndices.includes(index) ? "bg-red-500/10" : ""}`}
|
||||
onClick={() => toggleRegion(index)}
|
||||
>
|
||||
<div className="">
|
||||
<div className="text-2xl">{word}</div>
|
||||
{displayIpa && (
|
||||
<div className="text-muted-foreground">
|
||||
{caption.timeline[index].timeline
|
||||
.map((t) => t.timeline.map((s) => s.text).join(""))
|
||||
.join(" · ")}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))
|
||||
: (caption.timeline || []).map((w, index) => (
|
||||
<div
|
||||
key={index}
|
||||
id={`word-${currentSegmentIndex}-${index}`}
|
||||
className={`pr-2 pb-2 cursor-pointer hover:bg-red-500/10 ${
|
||||
index === activeIndex ? "text-red-500" : ""
|
||||
} ${
|
||||
selectedIndices.includes(index)
|
||||
? "bg-red-500/10 selected"
|
||||
: ""
|
||||
}`}
|
||||
onClick={() => toggleRegion(index)}
|
||||
>
|
||||
<div className="">
|
||||
<div className="text-2xl">{w.text}</div>
|
||||
{displayIpa && (
|
||||
<div className="text-muted-foreground">
|
||||
{w.timeline
|
||||
.map((t) => t.timeline.map((s) => s.text).join(""))
|
||||
.join(" · ")}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button variant="ghost" size="icon">
|
||||
<ChevronDownIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer capitalize"
|
||||
disabled={translating}
|
||||
onClick={toggleTranslation}
|
||||
>
|
||||
{translating ? (
|
||||
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
|
||||
) : (
|
||||
<LanguagesIcon className="w-4 h-4 mr-2" />
|
||||
)}
|
||||
<span>{t("translate")}</span>
|
||||
</DropdownMenuItem>
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer capitalize"
|
||||
disabled={ipaGenerating}
|
||||
onClick={toggleIpa}
|
||||
>
|
||||
{ipaGenerating ? (
|
||||
<LoaderIcon className="w-4 h-4 mr-2 animate-spin" />
|
||||
) : (
|
||||
<SpeechIcon className="w-4 h-4 mr-2" />
|
||||
)}
|
||||
<span>{t("displayIpa")}</span>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
{displayTranslation && translation && (
|
||||
<>
|
||||
<Separator className="my-2" />
|
||||
<div className="text-sm font-semibold py-2">{t("translation")}</div>
|
||||
<div className="select-text py-2 text-sm text-foreground">
|
||||
{translation}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Popover
|
||||
open={Boolean(selected) && !isPlaying}
|
||||
onOpenChange={(value) => {
|
||||
if (!value) setSelected(null);
|
||||
}}
|
||||
>
|
||||
<PopoverAnchor
|
||||
className="absolute w-0 h-0"
|
||||
style={{
|
||||
top: selected?.position?.top,
|
||||
left: selected?.position?.left,
|
||||
}}
|
||||
></PopoverAnchor>
|
||||
<PopoverContent
|
||||
className="w-full max-w-md p-0"
|
||||
updatePositionStrategy="always"
|
||||
{selectedIndices.length > 0 && (
|
||||
<>
|
||||
<Separator className="my-2" />
|
||||
<div className="flex flex-wrap items-center space-x-2 select-text mb-4">
|
||||
{selectedIndices.map((index) => {
|
||||
const word = caption.timeline[index];
|
||||
if (!word) return;
|
||||
return (
|
||||
<div key={index}>
|
||||
<div className="font-serif text-lg font-semibold tracking-tight">
|
||||
{word.text}
|
||||
</div>
|
||||
<div className="text-sm text-serif text-muted-foreground">
|
||||
{word.timeline
|
||||
.map((t) => t.timeline.map((s) => s.text).join(""))
|
||||
.join(" · ")}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
|
||||
{lookupResult ? (
|
||||
<div className="py-2 select-text">
|
||||
<div className="text-serif">
|
||||
{lookupResult.meaning.translation}
|
||||
</div>
|
||||
<div className="text-serif">
|
||||
{lookupResult.meaning.definition}
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center py-2">
|
||||
<Button size="sm" disabled={lookingUp} onClick={lookup}>
|
||||
{lookingUp && (
|
||||
<LoaderIcon className="animate-spin w-4 h-4 mr-2" />
|
||||
)}
|
||||
<span>{t("translate")}</span>
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</ScrollArea>
|
||||
|
||||
<div className="flex flex-col space-y-2">
|
||||
<Button
|
||||
variant={displayTranslation ? "secondary" : "outline"}
|
||||
size="icon"
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
disabled={translating}
|
||||
onClick={toggleTranslation}
|
||||
>
|
||||
{selected?.word && (
|
||||
<ResourceCaptionSelectionMenu
|
||||
word={selected.word}
|
||||
context={transcription.segments
|
||||
.map((w) => w.text)
|
||||
.join(" ")
|
||||
.trim()}
|
||||
mediaId={props.mediaId}
|
||||
mediaType={props.mediaType}
|
||||
onPlay={() => {
|
||||
setIsPlaying(true);
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const ResourceCaptionSelectionMenu = (props: {
|
||||
word: string;
|
||||
context: string;
|
||||
mediaId: string;
|
||||
mediaType: string;
|
||||
onPlay: () => void;
|
||||
}) => {
|
||||
const { word, context, mediaId, mediaType, onPlay } = props;
|
||||
const [translating, setTranslating] = useState<boolean>(false);
|
||||
|
||||
if (!word) return null;
|
||||
|
||||
if (translating) {
|
||||
return (
|
||||
<LookupResult
|
||||
word={word}
|
||||
context={context}
|
||||
sourceId={mediaId}
|
||||
sourceType={mediaType}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex items-center p-1">
|
||||
<Button onClick={onPlay} variant="ghost" size="icon">
|
||||
<PlayIcon size={16} />
|
||||
</Button>
|
||||
<Button onClick={() => setTranslating(true)} variant="ghost" size="icon">
|
||||
<LanguagesIcon size={16} />
|
||||
</Button>
|
||||
<LanguagesIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
<Button
|
||||
variant={displayIpa ? "secondary" : "outline"}
|
||||
size="icon"
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
onClick={() => setDisplayIpa(!displayIpa)}
|
||||
>
|
||||
<SpeechIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
511
enjoy/src/renderer/components/medias/media-current-recording.tsx
Normal file
511
enjoy/src/renderer/components/medias/media-current-recording.tsx
Normal file
@@ -0,0 +1,511 @@
|
||||
import { useEffect, useContext, useRef, useState } from "react";
|
||||
import {
|
||||
AppSettingsProviderContext,
|
||||
MediaPlayerProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { MediaRecorder, RecordingDetail } from "@renderer/components";
|
||||
import { renderPitchContour } from "@renderer/lib/utils";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import Regions from "wavesurfer.js/dist/plugins/regions";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogContent,
|
||||
AlertDialogDescription,
|
||||
AlertDialogFooter,
|
||||
AlertDialogHeader,
|
||||
AlertDialogTitle,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
Button,
|
||||
DropdownMenu,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
DropdownMenuContent,
|
||||
toast,
|
||||
Sheet,
|
||||
SheetContent,
|
||||
SheetHeader,
|
||||
SheetClose,
|
||||
} from "@renderer/components/ui";
|
||||
import {
|
||||
GitCompareIcon,
|
||||
PauseIcon,
|
||||
PlayIcon,
|
||||
Share2Icon,
|
||||
GaugeCircleIcon,
|
||||
ChevronDownIcon,
|
||||
MoreVerticalIcon,
|
||||
TextCursorInputIcon,
|
||||
} from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { formatDuration } from "@renderer/lib/utils";
|
||||
import { useHotkeys } from "react-hotkeys-hook";
|
||||
|
||||
export const MediaCurrentRecording = (props: { height?: number }) => {
|
||||
const { height = 192 } = props;
|
||||
const {
|
||||
isRecording,
|
||||
currentRecording,
|
||||
renderPitchContour: renderMediaPitchContour,
|
||||
regions: mediaRegions,
|
||||
activeRegion: mediaActiveRegion,
|
||||
wavesurfer,
|
||||
zoomRatio,
|
||||
editingRegion,
|
||||
currentTime: mediaCurrentTime,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const { webApi, EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const [player, setPlayer] = useState(null);
|
||||
const [regions, setRegions] = useState<Regions | null>(null);
|
||||
const [currentTime, setCurrentTime] = useState(0);
|
||||
|
||||
const [detailIsOpen, setDetailIsOpen] = useState(false);
|
||||
const [isComparing, setIsComparing] = useState(false);
|
||||
const [isSharing, setIsSharing] = useState(false);
|
||||
const [isSelectingRegion, setIsSelectingRegion] = useState(false);
|
||||
|
||||
const [frequencies, setFrequencies] = useState<number[]>([]);
|
||||
const [peaks, setPeaks] = useState<number[]>([]);
|
||||
|
||||
const ref = useRef(null);
|
||||
|
||||
const removeComparingPitchContour = () => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
regions
|
||||
.getRegions()
|
||||
.find((r) => r.id.startsWith("recording-voice-region"))
|
||||
?.remove();
|
||||
|
||||
const wrapper = (wavesurfer as any).renderer.getWrapper();
|
||||
wrapper
|
||||
.querySelectorAll(".pitch-contour-recording")
|
||||
.forEach((el: HTMLDivElement) => el.remove());
|
||||
};
|
||||
|
||||
/*
|
||||
* Render recording's pitch contour on the original audio waveform
|
||||
* with the original pitch contour.
|
||||
*/
|
||||
const renderComparingPitchContour = () => {
|
||||
const region = mediaRegions
|
||||
.getRegions()
|
||||
.find((r) => r.id.startsWith("segment-region"));
|
||||
if (!region) return;
|
||||
|
||||
if (!frequencies || !peaks) return;
|
||||
|
||||
// Trim the peaks from start to end, so we can render the voicable part of the recording
|
||||
const minValue = 0.01;
|
||||
let voiceStartIndex = 0;
|
||||
let voiceEndIndex = peaks.length - 1;
|
||||
|
||||
for (let i = 1; i < voiceEndIndex; i++) {
|
||||
if (peaks[i] >= minValue) {
|
||||
voiceStartIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (let i = voiceEndIndex; i > voiceStartIndex; i--) {
|
||||
if (peaks[i] >= minValue) {
|
||||
voiceEndIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
const voiceStartFrequenciesIndex = Math.round(
|
||||
((1.0 * voiceStartIndex) / peaks.length) * frequencies.length
|
||||
);
|
||||
const voiceEndFrequenciesIndex = Math.round(
|
||||
((1.0 * voiceEndIndex) / peaks.length) * frequencies.length
|
||||
);
|
||||
|
||||
regions.clearRegions();
|
||||
regions.addRegion({
|
||||
id: `recording-voice-region-${currentRecording.id}`,
|
||||
start: (voiceStartIndex / peaks.length) * player.getDuration(),
|
||||
end: (voiceEndIndex / peaks.length) * player.getDuration(),
|
||||
color: "#fb6f9211",
|
||||
drag: false,
|
||||
resize: false,
|
||||
});
|
||||
|
||||
const data = frequencies.slice(
|
||||
voiceStartFrequenciesIndex,
|
||||
voiceEndFrequenciesIndex
|
||||
);
|
||||
renderMediaPitchContour(region, {
|
||||
repaint: false,
|
||||
canvasId: `pitch-contour-${currentRecording.id}-canvas`,
|
||||
containerClassNames: ["pitch-contour-recording"],
|
||||
data: {
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
borderColor: "#fb6f92",
|
||||
pointBorderColor: "#fb6f92",
|
||||
pointBackgroundColor: "#ff8fab",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const toggleCompare = () => {
|
||||
if (isComparing) {
|
||||
removeComparingPitchContour();
|
||||
setIsComparing(false);
|
||||
} else {
|
||||
setIsComparing(true);
|
||||
renderComparingPitchContour();
|
||||
}
|
||||
};
|
||||
|
||||
const handleShare = async () => {
|
||||
if (!currentRecording.uploadedAt) {
|
||||
try {
|
||||
await EnjoyApp.recordings.upload(currentRecording.id);
|
||||
} catch (error) {
|
||||
toast.error(t("shareFailed"), { description: error.message });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
webApi
|
||||
.createPost({
|
||||
targetId: currentRecording.id,
|
||||
targetType: "Recording",
|
||||
})
|
||||
.then(() => {
|
||||
toast.success(t("sharedSuccessfully"), {
|
||||
description: t("sharedRecording"),
|
||||
});
|
||||
})
|
||||
.catch((error) => {
|
||||
toast.error(t("shareFailed"), {
|
||||
description: error.message,
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!ref.current) return;
|
||||
if (isRecording) return;
|
||||
if (!currentRecording?.src) return;
|
||||
|
||||
const ws = WaveSurfer.create({
|
||||
container: ref.current,
|
||||
url: currentRecording.src,
|
||||
height,
|
||||
barWidth: 2,
|
||||
cursorWidth: 1,
|
||||
autoCenter: true,
|
||||
autoScroll: true,
|
||||
minPxPerSec: 150,
|
||||
waveColor: "#efefef",
|
||||
normalize: false,
|
||||
progressColor: "rgba(0, 0, 0, 0.1)",
|
||||
});
|
||||
|
||||
setPlayer(ws);
|
||||
|
||||
const regions = ws.registerPlugin(Regions.create());
|
||||
setRegions(regions);
|
||||
|
||||
ws.on("timeupdate", (time: number) => setCurrentTime(time));
|
||||
|
||||
ws.on("finish", () => ws.seekTo(0));
|
||||
|
||||
ws.on("ready", () => {
|
||||
const peaks: Float32Array = ws.getDecodedData().getChannelData(0);
|
||||
const sampleRate = ws.options.sampleRate;
|
||||
const data = extractFrequencies({ peaks, sampleRate });
|
||||
setFrequencies(data);
|
||||
setPeaks(Array.from(peaks));
|
||||
|
||||
renderPitchContour({
|
||||
wrapper: ws.getWrapper(),
|
||||
canvasId: `pitch-contour-${currentRecording.id}-canvas`,
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
borderColor: "#fb6f92",
|
||||
pointBorderColor: "#fb6f92",
|
||||
pointBackgroundColor: "#ff8fab",
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
return () => {
|
||||
ws.destroy();
|
||||
};
|
||||
}, [ref, currentRecording, isRecording]);
|
||||
|
||||
useEffect(() => {
|
||||
setIsComparing(false);
|
||||
removeComparingPitchContour();
|
||||
}, [currentRecording]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isComparing) return;
|
||||
|
||||
if (editingRegion) {
|
||||
setIsComparing(false);
|
||||
} else {
|
||||
setTimeout(() => {
|
||||
renderComparingPitchContour();
|
||||
}, 100);
|
||||
}
|
||||
}, [zoomRatio, editingRegion]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!regions) return;
|
||||
|
||||
let disableSelectingRegion: () => void | undefined;
|
||||
if (isSelectingRegion) {
|
||||
regions.clearRegions();
|
||||
disableSelectingRegion = regions.enableDragSelection({
|
||||
color: "rgba(76, 201, 240, 0.2)",
|
||||
drag: false,
|
||||
});
|
||||
}
|
||||
|
||||
const subscriptions = [
|
||||
regions.on("region-created", () => {}),
|
||||
|
||||
regions.on("region-clicked", (region, e) => {
|
||||
e.stopPropagation();
|
||||
region.play();
|
||||
}),
|
||||
|
||||
regions.on("region-out", () => {
|
||||
player.pause();
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
disableSelectingRegion && disableSelectingRegion();
|
||||
regions.clearRegions();
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [regions, isSelectingRegion, player]);
|
||||
|
||||
/*
|
||||
* Update player styles
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!ref?.current || !player) return;
|
||||
|
||||
const scrollContainer = player.getWrapper()?.closest(".scroll");
|
||||
if (!scrollContainer) return;
|
||||
|
||||
scrollContainer.style.width = `${
|
||||
ref.current.getBoundingClientRect().width
|
||||
}px`;
|
||||
scrollContainer.style.scrollbarWidth = "thin";
|
||||
}, [ref, player]);
|
||||
|
||||
/*
|
||||
* play recording along with the media when isComparing is true
|
||||
* only when the media is playing and the active region is the segment region
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!regions) return;
|
||||
if (!isComparing) return;
|
||||
if (!wavesurfer?.isPlaying()) return;
|
||||
if (player?.isPlaying()) return;
|
||||
if (!mediaActiveRegion?.id?.startsWith("segment-region")) return;
|
||||
|
||||
regions
|
||||
.getRegions()
|
||||
.find((r) => r.id.startsWith("recording-voice-region"))
|
||||
?.play();
|
||||
}, [
|
||||
wavesurfer,
|
||||
player,
|
||||
regions,
|
||||
isComparing,
|
||||
mediaCurrentTime,
|
||||
mediaActiveRegion,
|
||||
]);
|
||||
|
||||
useHotkeys(
|
||||
["Ctrl+R", "Meta+R"],
|
||||
(keyboardEvent, hotkeyEvent) => {
|
||||
if (!player) return;
|
||||
keyboardEvent.preventDefault();
|
||||
|
||||
if (
|
||||
(navigator.platform.includes("Mac") && hotkeyEvent.meta) ||
|
||||
hotkeyEvent.ctrl
|
||||
) {
|
||||
document.getElementById("recording-play-or-pause-button").click();
|
||||
}
|
||||
},
|
||||
[player]
|
||||
);
|
||||
|
||||
if (isRecording) return <MediaRecorder />;
|
||||
if (!currentRecording?.src)
|
||||
return (
|
||||
<div className="h-full w-full border rounded-xl shadow-lg flex items-center justify-center">
|
||||
<div
|
||||
className="m-auto"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t("noRecordingForThisSegmentYet"),
|
||||
}}
|
||||
></div>
|
||||
</div>
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="flex space-x-4">
|
||||
<div className="border rounded-xl shadow-lg flex-1 relative">
|
||||
<div ref={ref}></div>
|
||||
|
||||
<div className="absolute right-2 top-1">
|
||||
<span className="text-sm">{formatDuration(currentTime || 0)}</span>
|
||||
<span className="mx-1">/</span>
|
||||
<span className="text-sm">
|
||||
{formatDuration(
|
||||
player?.getDuration() || currentRecording.duration / 1000.0 || 0
|
||||
)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col space-y-1.5">
|
||||
<Button
|
||||
variant="default"
|
||||
size="icon"
|
||||
id="recording-play-or-pause-button"
|
||||
data-tooltip-id="media-player-controls-tooltip"
|
||||
data-tooltip-content={t("playRecording")}
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
onClick={() => {
|
||||
const region = regions
|
||||
?.getRegions()
|
||||
?.find((r) => r.id.startsWith("recording-voice-region"));
|
||||
|
||||
if (region) {
|
||||
region.play();
|
||||
} else {
|
||||
player?.playPause();
|
||||
}
|
||||
}}
|
||||
>
|
||||
{player?.isPlaying() ? (
|
||||
<PauseIcon className="w-4 h-4" />
|
||||
) : (
|
||||
<PlayIcon className="w-4 h-4" />
|
||||
)}
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
variant={isComparing ? "secondary" : "outline"}
|
||||
size="icon"
|
||||
data-tooltip-id="media-player-controls-tooltip"
|
||||
data-tooltip-content={t("compare")}
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
onClick={toggleCompare}
|
||||
>
|
||||
<GitCompareIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
|
||||
<Button
|
||||
variant={isSelectingRegion ? "secondary" : "outline"}
|
||||
size="icon"
|
||||
data-tooltip-id="media-player-controls-tooltip"
|
||||
data-tooltip-content={t("selectRegion")}
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
onClick={() => setIsSelectingRegion(!isSelectingRegion)}
|
||||
>
|
||||
<TextCursorInputIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="icon"
|
||||
data-tooltip-id="media-player-controls-tooltip"
|
||||
data-tooltip-content={t("more")}
|
||||
className="rounded-full w-8 h-8 p-0"
|
||||
>
|
||||
<MoreVerticalIcon className="w-4 h-4" />
|
||||
</Button>
|
||||
</DropdownMenuTrigger>
|
||||
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer"
|
||||
onClick={() => setDetailIsOpen(true)}
|
||||
>
|
||||
<GaugeCircleIcon
|
||||
className={`w-4 h-4 mr-4
|
||||
${
|
||||
currentRecording.pronunciationAssessment
|
||||
? currentRecording.pronunciationAssessment
|
||||
.pronunciationScore >= 80
|
||||
? "text-green-500"
|
||||
: currentRecording.pronunciationAssessment
|
||||
.pronunciationScore >= 60
|
||||
? "text-yellow-600"
|
||||
: "text-red-500"
|
||||
: ""
|
||||
}
|
||||
`}
|
||||
/>
|
||||
<span>{t("pronunciationAssessment")}</span>
|
||||
</DropdownMenuItem>
|
||||
|
||||
<DropdownMenuItem
|
||||
className="cursor-pointer"
|
||||
onClick={() => setIsSharing(true)}
|
||||
>
|
||||
<Share2Icon className="w-4 h-4 mr-4" />
|
||||
<span>{t("share")}</span>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
|
||||
<AlertDialog open={isSharing} onOpenChange={setIsSharing}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("shareRecording")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("areYouSureToShareThisRecordingToCommunity")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction asChild>
|
||||
<Button onClick={handleShare}>{t("share")}</Button>
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
<Sheet open={detailIsOpen} onOpenChange={(open) => setDetailIsOpen(open)}>
|
||||
<SheetContent
|
||||
side="bottom"
|
||||
className="rounded-t-2xl shadow-lg"
|
||||
displayClose={false}
|
||||
>
|
||||
<SheetHeader className="flex items-center justify-center -mt-4 mb-2">
|
||||
<SheetClose>
|
||||
<ChevronDownIcon />
|
||||
</SheetClose>
|
||||
</SheetHeader>
|
||||
|
||||
<RecordingDetail recording={currentRecording} />
|
||||
</SheetContent>
|
||||
</Sheet>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
40
enjoy/src/renderer/components/medias/media-info-panel.tsx
Normal file
40
enjoy/src/renderer/components/medias/media-info-panel.tsx
Normal file
@@ -0,0 +1,40 @@
|
||||
import { useContext } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import { formatDuration, formatDateTime } from "@renderer/lib/utils";
|
||||
import { t } from "i18next";
|
||||
|
||||
export const MediaInfoPanel = () => {
|
||||
const { media } = useContext(MediaPlayerProviderContext);
|
||||
if (!media) return null;
|
||||
|
||||
return (
|
||||
<div className="px-4" data-testid="media-info-panel">
|
||||
{[
|
||||
{ label: t("models.audio.name"), value: media.name },
|
||||
{
|
||||
label: t("models.audio.duration"),
|
||||
value: formatDuration(media.duration),
|
||||
},
|
||||
{
|
||||
label: t("models.audio.recordingsCount"),
|
||||
value: media.recordingsCount ? media.recordingsCount : 0,
|
||||
},
|
||||
{
|
||||
label: t("models.audio.recordingsDuration"),
|
||||
value: formatDuration(media.recordingsDuration, "ms"),
|
||||
},
|
||||
{
|
||||
label: t("models.audio.createdAt"),
|
||||
value: formatDateTime(media.createdAt),
|
||||
},
|
||||
].map((item, index) => (
|
||||
<div key={`media-info-item-${index}`} className="mb-2">
|
||||
<div className="capitalize text-sm text-muted-foreground mb-1">
|
||||
{item.label}
|
||||
</div>
|
||||
<div className="">{item.value}</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
104
enjoy/src/renderer/components/medias/media-loading-modal.tsx
Normal file
104
enjoy/src/renderer/components/medias/media-loading-modal.tsx
Normal file
@@ -0,0 +1,104 @@
|
||||
import { useContext } from "react";
|
||||
import {
|
||||
MediaPlayerProviderContext,
|
||||
AISettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogHeader,
|
||||
AlertDialogDescription,
|
||||
AlertDialogTitle,
|
||||
AlertDialogContent,
|
||||
AlertDialogFooter,
|
||||
AlertDialogOverlay,
|
||||
Button,
|
||||
PingPoint,
|
||||
Progress,
|
||||
} from "@renderer/components/ui";
|
||||
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
|
||||
export const MediaLoadingModal = () => {
|
||||
const navigate = useNavigate();
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const {
|
||||
decoded,
|
||||
transcription,
|
||||
transcribing,
|
||||
transcribingProgress,
|
||||
generateTranscription,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
|
||||
return (
|
||||
<AlertDialog open={!decoded || !Boolean(transcription?.result)}>
|
||||
<AlertDialogOverlay className="z-[100]" />
|
||||
<AlertDialogContent className="z-[100]">
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
|
||||
<div className="py-4">
|
||||
{decoded ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("waveformIsDecoded")}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("decodingWaveform")}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!transcription ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("loadingTranscription")}</span>
|
||||
</div>
|
||||
) : transcription.result ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("transcribedSuccessfully")}</span>
|
||||
</div>
|
||||
) : transcribing ? (
|
||||
<div className="">
|
||||
<div className="flex items-center space-x-4 mb-2">
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<span>{t("transcribing")}</span>
|
||||
</div>
|
||||
{whisperConfig.service === "local" && (
|
||||
<Progress value={transcribingProgress} />
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center space-x-4">
|
||||
<PingPoint colorClassName="bg-muted" />
|
||||
<div className="inline">
|
||||
<span>{t("notTranscribedYet")}</span>
|
||||
{decoded && (
|
||||
<Button
|
||||
onClick={generateTranscription}
|
||||
className="ml-4"
|
||||
size="sm"
|
||||
>
|
||||
{t("transcribe")}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<AlertDialogFooter>
|
||||
<Button variant="secondary" onClick={() => navigate(-1)}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
);
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,19 +1,5 @@
|
||||
import { useEffect, useState, useCallback, useRef, useContext } from "react";
|
||||
import {
|
||||
extractFrequencies,
|
||||
PitchContour,
|
||||
MediaPlayerControls,
|
||||
MediaCaption,
|
||||
} from "@renderer/components";
|
||||
import Regions, {
|
||||
Region,
|
||||
type Region as RegionType,
|
||||
} from "wavesurfer.js/dist/plugins/regions";
|
||||
import { secondsToTimestamp } from "@renderer/lib/utils";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { useDebounce } from "@uidotdev/usehooks";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
import cloneDeep from "lodash/cloneDeep";
|
||||
import { useContext } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
MediaPlayer as VidstackMediaPlayer,
|
||||
MediaProvider,
|
||||
@@ -23,626 +9,32 @@ import {
|
||||
} from "@vidstack/react";
|
||||
import {
|
||||
DefaultAudioLayout,
|
||||
DefaultVideoLayout,
|
||||
defaultLayoutIcons,
|
||||
} from "@vidstack/react/player/layouts/default";
|
||||
import { useHotkeys } from "react-hotkeys-hook";
|
||||
|
||||
const minPxPerSecBase = 150;
|
||||
|
||||
export const MediaPlayer = (props: {
|
||||
mediaId: string;
|
||||
mediaType: "Audio" | "Video";
|
||||
mediaUrl: string;
|
||||
mediaMd5?: string;
|
||||
transcription: TranscriptionType;
|
||||
// player controls
|
||||
currentTime: number;
|
||||
setCurrentTime: (time: number) => void;
|
||||
currentSegmentIndex: number;
|
||||
setCurrentSegmentIndex: (index: number) => void;
|
||||
initialized: boolean;
|
||||
setInitialized: (value: boolean) => void;
|
||||
recordButtonVisible?: boolean;
|
||||
setRecordButtonVisible?: (value: boolean) => void;
|
||||
seek?: {
|
||||
seekTo: number;
|
||||
timestamp: number;
|
||||
};
|
||||
height?: number;
|
||||
zoomRatio: number;
|
||||
setZoomRatio: (value: number) => void;
|
||||
isPlaying: boolean;
|
||||
setIsPlaying: (value: boolean) => void;
|
||||
playMode?: "loop" | "single" | "all";
|
||||
setPlayMode?: (value: "loop" | "single" | "all") => void;
|
||||
playBackRate: number;
|
||||
setPlaybackRate: (value: number) => void;
|
||||
displayInlineCaption?: boolean;
|
||||
setDisplayInlineCaption?: (value: boolean) => void;
|
||||
onShare?: () => void;
|
||||
onDecoded?: (data: { duration: number; sampleRate: number }) => void;
|
||||
}) => {
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const {
|
||||
mediaId,
|
||||
mediaType,
|
||||
mediaUrl,
|
||||
mediaMd5,
|
||||
transcription,
|
||||
height = 200,
|
||||
currentTime,
|
||||
setCurrentTime,
|
||||
currentSegmentIndex,
|
||||
setCurrentSegmentIndex,
|
||||
initialized,
|
||||
setInitialized,
|
||||
recordButtonVisible,
|
||||
setRecordButtonVisible,
|
||||
seek,
|
||||
zoomRatio,
|
||||
setZoomRatio,
|
||||
isPlaying,
|
||||
setIsPlaying,
|
||||
playMode,
|
||||
setPlayMode,
|
||||
playBackRate,
|
||||
setPlaybackRate,
|
||||
displayInlineCaption,
|
||||
setDisplayInlineCaption,
|
||||
onShare,
|
||||
onDecoded,
|
||||
} = props;
|
||||
if (!mediaUrl) return;
|
||||
|
||||
const [wavesurfer, setWavesurfer] = useState(null);
|
||||
const [waveform, setWaveForm] = useState<WaveFormDataType>(null);
|
||||
const containerRef = useRef<HTMLDivElement>();
|
||||
const [mediaProvider, setMediaProvider] = useState<
|
||||
HTMLAudioElement | HTMLVideoElement
|
||||
>(null);
|
||||
export const MediaPlayer = () => {
|
||||
const { media, setMediaProvider } = useContext(MediaPlayerProviderContext);
|
||||
const mediaRemote = useMediaRemote();
|
||||
|
||||
const [transcriptionResult, setTranscriptionResult] = useState<
|
||||
TranscriptionResultSegmentGroupType[] | null
|
||||
>(null);
|
||||
|
||||
const [transcriptionDirty, setTranscriptionDirty] = useState<boolean>(false);
|
||||
const [regions, setRegions] = useState<Regions | null>(null);
|
||||
|
||||
const debouncedTRanscription = useDebounce(transcriptionResult, 500);
|
||||
|
||||
const resetTranscription = () => {
|
||||
if (!transcriptionDirty) return;
|
||||
if (!transcription?.result) return;
|
||||
|
||||
setTranscriptionResult(cloneDeep(transcription.result));
|
||||
setTranscriptionDirty(false);
|
||||
};
|
||||
|
||||
const saveTranscription = () => {
|
||||
if (!transcriptionDirty) return;
|
||||
if (!debouncedTRanscription) return;
|
||||
|
||||
EnjoyApp.transcriptions.update(transcription.id, {
|
||||
result: debouncedTRanscription,
|
||||
});
|
||||
};
|
||||
|
||||
const onPlayClick = useCallback(() => {
|
||||
wavesurfer.isPlaying() ? wavesurfer.pause() : wavesurfer.play();
|
||||
}, [wavesurfer]);
|
||||
|
||||
const handlePlaybackRateChange = useCallback(
|
||||
(rate: number) => {
|
||||
wavesurfer.setPlaybackRate(rate);
|
||||
setPlaybackRate(wavesurfer.getPlaybackRate());
|
||||
},
|
||||
[initialized]
|
||||
);
|
||||
|
||||
const findCurrentSegment = (time: number) => {
|
||||
if (!transcription) return;
|
||||
if (isPlaying && playMode === "loop") return;
|
||||
|
||||
time = Math.round(time * 1000);
|
||||
const index = transcriptionResult.findIndex(
|
||||
(t) => time >= t.offsets.from && time < t.offsets.to
|
||||
);
|
||||
if (index === -1) return;
|
||||
setCurrentSegmentIndex(index);
|
||||
};
|
||||
|
||||
const addSegmentRegion = (from: number, to: number) => {
|
||||
if (!initialized) return;
|
||||
|
||||
const span = document.createElement("span");
|
||||
span.innerText = secondsToTimestamp(from) + ` (${(to - from).toFixed(2)}s)`;
|
||||
span.style.padding = "1rem";
|
||||
span.style.fontSize = "0.9rem";
|
||||
|
||||
if (regions) {
|
||||
regions.clearRegions();
|
||||
const region = regions.addRegion({
|
||||
start: from,
|
||||
end: to,
|
||||
color: "rgba(255, 0, 0, 0.03)",
|
||||
drag: false,
|
||||
resize: true,
|
||||
content: span,
|
||||
});
|
||||
renderPitchContour(region);
|
||||
}
|
||||
};
|
||||
|
||||
const renderPitchContour = (region: RegionType) => {
|
||||
if (!region) return;
|
||||
if (!waveform?.frequencies?.length) return;
|
||||
if (!wavesurfer) return;
|
||||
|
||||
const duration = wavesurfer.getDuration();
|
||||
const fromIndex = Math.round(
|
||||
(region.start / duration) * waveform.frequencies.length
|
||||
);
|
||||
const toIndex = Math.round(
|
||||
(region.end / duration) * waveform.frequencies.length
|
||||
);
|
||||
|
||||
const containerId = `pitch-contour-${mediaId}-${currentSegmentIndex}`;
|
||||
const wrapper = wavesurfer.renderer.getWrapper();
|
||||
|
||||
const wrapperWidth = wrapper.getBoundingClientRect().width;
|
||||
const canvas = PitchContour({
|
||||
frequencies: waveform.frequencies.slice(fromIndex, toIndex),
|
||||
height,
|
||||
});
|
||||
const offsetLeft = (region.start / duration) * wrapperWidth;
|
||||
const width = ((region.end - region.start) / duration) * wrapperWidth;
|
||||
const pitchContourWidthContainer = document.createElement("div");
|
||||
pitchContourWidthContainer.appendChild(canvas);
|
||||
|
||||
pitchContourWidthContainer.style.position = "absolute";
|
||||
pitchContourWidthContainer.style.top = "0";
|
||||
pitchContourWidthContainer.style.left = "0";
|
||||
|
||||
canvas.style.width = `${width}px`;
|
||||
pitchContourWidthContainer.style.height = `${height}px`;
|
||||
pitchContourWidthContainer.style.marginLeft = `${offsetLeft}px`;
|
||||
pitchContourWidthContainer.className = "pitch-contour";
|
||||
pitchContourWidthContainer.id = containerId;
|
||||
|
||||
const regionDuration = region.end - region.start;
|
||||
|
||||
if (displayInlineCaption) {
|
||||
const captionContainer = document.createElement("div");
|
||||
captionContainer.style.position = "absolute";
|
||||
captionContainer.style.bottom = "0";
|
||||
captionContainer.style.width = `${width}px`;
|
||||
captionContainer.style.fontSize = "0.75rem";
|
||||
captionContainer.style.opacity = "0.75";
|
||||
transcriptionResult?.[currentSegmentIndex]?.segments?.forEach(
|
||||
(segment, index) => {
|
||||
const span = document.createElement("span");
|
||||
span.innerText = segment.text;
|
||||
span.style.position = "absolute";
|
||||
span.style.bottom = "0";
|
||||
span.style.left = `${
|
||||
((segment.offsets.from / 1000 - region.start) / regionDuration) *
|
||||
width
|
||||
}px`;
|
||||
if (index % 2 === 1) {
|
||||
span.style.paddingBottom = "0.75rem";
|
||||
}
|
||||
|
||||
captionContainer.appendChild(span);
|
||||
}
|
||||
);
|
||||
pitchContourWidthContainer.appendChild(captionContainer);
|
||||
}
|
||||
|
||||
wrapper.querySelector("#" + containerId)?.remove();
|
||||
wrapper.appendChild(pitchContourWidthContainer);
|
||||
};
|
||||
|
||||
const reRenderPitchContour = () => {
|
||||
if (!wavesurfer) return;
|
||||
const wrapper = wavesurfer.renderer.getWrapper();
|
||||
wrapper
|
||||
.querySelectorAll(".pitch-contour")
|
||||
.forEach((canvas: HTMLCanvasElement) => {
|
||||
canvas.remove();
|
||||
});
|
||||
|
||||
if (!regions) return;
|
||||
|
||||
const region = regions.getRegions()[0];
|
||||
if (!region) return;
|
||||
|
||||
renderPitchContour(region);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcription) return;
|
||||
setTranscriptionDirty(false);
|
||||
|
||||
setTranscriptionResult(cloneDeep(transcription.result));
|
||||
}, [transcription]);
|
||||
|
||||
// Initialize wavesurfer
|
||||
const initializeWavesurfer = async () => {
|
||||
if (!mediaProvider) return;
|
||||
if (!containerRef.current) return;
|
||||
|
||||
const ws = WaveSurfer.create({
|
||||
container: containerRef.current,
|
||||
height,
|
||||
waveColor: "#ddd",
|
||||
progressColor: "rgba(0, 0, 0, 0.25)",
|
||||
cursorColor: "#dc143c",
|
||||
barWidth: 1,
|
||||
autoScroll: true,
|
||||
minPxPerSec: 150,
|
||||
autoCenter: false,
|
||||
dragToSeek: false,
|
||||
media: mediaProvider,
|
||||
peaks: waveform ? [waveform.peaks] : undefined,
|
||||
duration: waveform ? waveform.duration : undefined,
|
||||
});
|
||||
|
||||
const blob = await fetch(mediaUrl).then((res) => res.blob());
|
||||
|
||||
if (waveform) {
|
||||
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
|
||||
setInitialized(true);
|
||||
} else {
|
||||
ws.loadBlob(blob);
|
||||
}
|
||||
|
||||
setRegions(ws.registerPlugin(Regions.create()));
|
||||
setWavesurfer(ws);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
initializeWavesurfer();
|
||||
|
||||
return () => {
|
||||
wavesurfer?.destroy();
|
||||
};
|
||||
}, [mediaUrl, height, mediaProvider]);
|
||||
|
||||
// Install listeners for wavesurfer
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
setCurrentTime(0);
|
||||
setIsPlaying(false);
|
||||
|
||||
const subscriptions = [
|
||||
wavesurfer.on("play", () => setIsPlaying(true)),
|
||||
wavesurfer.on("pause", () => setIsPlaying(false)),
|
||||
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
|
||||
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
|
||||
wavesurfer.on("decode", () => {
|
||||
if (waveform?.frequencies) return;
|
||||
|
||||
const peaks: Float32Array = wavesurfer
|
||||
.getDecodedData()
|
||||
.getChannelData(0);
|
||||
const duration: number = wavesurfer.getDuration();
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
const _frequencies = extractFrequencies({ peaks, sampleRate });
|
||||
const _waveform = {
|
||||
peaks: Array.from(peaks),
|
||||
duration,
|
||||
sampleRate,
|
||||
frequencies: _frequencies,
|
||||
};
|
||||
EnjoyApp.waveforms.save(mediaMd5, _waveform);
|
||||
setWaveForm(_waveform);
|
||||
onDecoded &&
|
||||
onDecoded({
|
||||
duration,
|
||||
sampleRate,
|
||||
});
|
||||
}),
|
||||
wavesurfer.on("ready", () => {
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [wavesurfer]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcriptionResult) return;
|
||||
if (transcriptionDirty) return;
|
||||
|
||||
const currentSegment = transcriptionResult[currentSegmentIndex];
|
||||
if (!currentSegment) return;
|
||||
|
||||
addSegmentRegion(
|
||||
currentSegment.offsets.from / 1000.0,
|
||||
currentSegment.offsets.to / 1000.0
|
||||
);
|
||||
|
||||
// set zoom ratio to fit the current segment
|
||||
if (!isPlaying) {
|
||||
setZoomRatio(calcFitZoomRatio());
|
||||
}
|
||||
}, [
|
||||
currentSegmentIndex,
|
||||
initialized,
|
||||
transcriptionDirty,
|
||||
transcriptionResult,
|
||||
]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcriptionResult) return;
|
||||
|
||||
findCurrentSegment(currentTime);
|
||||
}, [currentTime, transcriptionResult]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!regions) return;
|
||||
|
||||
const subscriptions = [
|
||||
wavesurfer.on("finish", () => {
|
||||
if (playMode !== "loop") return;
|
||||
|
||||
regions?.getRegions()[0]?.play();
|
||||
}),
|
||||
|
||||
regions.on("region-updated", (region) => {
|
||||
const from = region.start;
|
||||
const to = region.end;
|
||||
|
||||
const offsets = {
|
||||
from: Math.round(from * 1000),
|
||||
to: Math.round(to * 1000),
|
||||
};
|
||||
|
||||
const timestamps = {
|
||||
from: [
|
||||
secondsToTimestamp(from),
|
||||
Math.round((from * 1000) % 1000),
|
||||
].join(","),
|
||||
to: [secondsToTimestamp(to), Math.round((to * 1000) % 1000)].join(
|
||||
","
|
||||
),
|
||||
};
|
||||
|
||||
const _transcription = cloneDeep(transcriptionResult);
|
||||
_transcription[currentSegmentIndex].offsets = offsets;
|
||||
_transcription[currentSegmentIndex].timestamps = timestamps;
|
||||
|
||||
// ensure that the previous segment ends before the current segment
|
||||
if (
|
||||
currentSegmentIndex > 0 &&
|
||||
_transcription[currentSegmentIndex - 1].offsets.to > offsets.from
|
||||
) {
|
||||
_transcription[currentSegmentIndex - 1].offsets.to = offsets.from;
|
||||
}
|
||||
|
||||
// ensure that the next segment starts after the current segment
|
||||
if (
|
||||
currentSegmentIndex < _transcription.length - 1 &&
|
||||
_transcription[currentSegmentIndex + 1].offsets.from < offsets.to
|
||||
) {
|
||||
_transcription[currentSegmentIndex + 1].offsets.from = offsets.to;
|
||||
}
|
||||
|
||||
setTranscriptionResult(_transcription);
|
||||
setTranscriptionDirty(true);
|
||||
|
||||
renderPitchContour(region);
|
||||
}),
|
||||
regions.on("region-out", (region: Region) => {
|
||||
if (isPlaying && playMode === "loop") {
|
||||
region.play();
|
||||
} else if (isPlaying && playMode === "single") {
|
||||
wavesurfer.pause();
|
||||
wavesurfer.seekTo(region.start / wavesurfer.getDuration());
|
||||
} else {
|
||||
resetTranscription();
|
||||
}
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [regions, isPlaying, playMode, currentSegmentIndex, transcriptionDirty]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
if (!initialized) return;
|
||||
|
||||
wavesurfer.zoom(zoomRatio * minPxPerSecBase);
|
||||
reRenderPitchContour();
|
||||
}, [zoomRatio, wavesurfer, initialized, displayInlineCaption]);
|
||||
|
||||
useEffect(() => {
|
||||
if (typeof seek?.seekTo !== "number") return;
|
||||
if (!wavesurfer) return;
|
||||
if (!initialized) return;
|
||||
|
||||
wavesurfer.seekTo(seek?.seekTo / wavesurfer.getDuration());
|
||||
wavesurfer.setScrollTime(seek?.seekTo);
|
||||
}, [seek, wavesurfer, initialized]);
|
||||
|
||||
// Handle media provider
|
||||
useEffect(() => {
|
||||
if (!mediaRemote) return;
|
||||
if (!mediaProvider) return;
|
||||
|
||||
if (mediaType !== "Video") return;
|
||||
if (recordButtonVisible) {
|
||||
mediaRemote.togglePictureInPicture();
|
||||
} else {
|
||||
mediaRemote.exitPictureInPicture();
|
||||
}
|
||||
}, [mediaRemote, mediaProvider, recordButtonVisible]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
if (isPlaying) {
|
||||
wavesurfer.play();
|
||||
} else {
|
||||
wavesurfer.pause();
|
||||
}
|
||||
}, [wavesurfer, isPlaying]);
|
||||
|
||||
useEffect(() => {
|
||||
EnjoyApp.waveforms.find(mediaMd5).then((waveform) => {
|
||||
setWaveForm(waveform);
|
||||
onDecoded &&
|
||||
onDecoded({
|
||||
duration: waveform.duration,
|
||||
sampleRate: waveform.sampleRate,
|
||||
});
|
||||
});
|
||||
}, []);
|
||||
|
||||
const calcFitZoomRatio = () => {
|
||||
if (!containerRef.current) return;
|
||||
if (!wavesurfer) return;
|
||||
|
||||
const currentSegment = transcriptionResult?.[currentSegmentIndex];
|
||||
if (!currentSegment) return;
|
||||
|
||||
const containerWidth = containerRef.current.getBoundingClientRect().width;
|
||||
|
||||
const duration =
|
||||
currentSegment.offsets.to / 1000.0 - currentSegment.offsets.from / 1000.0;
|
||||
const fitZoomRatio = containerWidth / duration / minPxPerSecBase;
|
||||
|
||||
return fitZoomRatio;
|
||||
};
|
||||
|
||||
useHotkeys(
|
||||
"Space",
|
||||
(keyboardEvent, _hotkeyEvent) => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
keyboardEvent.preventDefault();
|
||||
onPlayClick();
|
||||
},
|
||||
[wavesurfer]
|
||||
);
|
||||
if (!media?.src) return null;
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
className="mb-2"
|
||||
ref={containerRef}
|
||||
data-testid="media-player-container"
|
||||
/>
|
||||
<div className="mb-2 flex justify-center">
|
||||
<MediaPlayerControls
|
||||
isPlaying={isPlaying}
|
||||
onPlayOrPause={onPlayClick}
|
||||
onNext={() => {
|
||||
if (!transcription) return;
|
||||
|
||||
const segment = transcription?.result?.[currentSegmentIndex + 1];
|
||||
if (!segment) return;
|
||||
|
||||
wavesurfer.seekTo(
|
||||
segment.offsets.from / 1000 / wavesurfer.getDuration()
|
||||
);
|
||||
}}
|
||||
onPrev={() => {
|
||||
if (!transcription) return;
|
||||
|
||||
const segment = transcription?.result?.[currentSegmentIndex - 1];
|
||||
if (!segment) return;
|
||||
|
||||
wavesurfer.seekTo(
|
||||
segment.offsets.from / 1000 / wavesurfer.getDuration()
|
||||
);
|
||||
}}
|
||||
playMode={playMode}
|
||||
setPlayMode={setPlayMode}
|
||||
playbackRate={playBackRate}
|
||||
setPlaybackRate={handlePlaybackRateChange}
|
||||
zoomRatio={zoomRatio}
|
||||
setZoomRatio={setZoomRatio}
|
||||
fitZoomRatio={calcFitZoomRatio()}
|
||||
recordButtonVisible={recordButtonVisible}
|
||||
setRecordButtonVisible={setRecordButtonVisible}
|
||||
transcriptionDirty={transcriptionDirty}
|
||||
resetTranscription={resetTranscription}
|
||||
saveTranscription={saveTranscription}
|
||||
wavesurferOptions={wavesurfer?.options}
|
||||
setWavesurferOptions={(options) => wavesurfer?.setOptions(options)}
|
||||
displayInlineCaption={displayInlineCaption}
|
||||
setDisplayInlineCaption={setDisplayInlineCaption}
|
||||
onShare={onShare}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{initialized && (
|
||||
<div className={recordButtonVisible && mediaProvider ? "" : "hidden"}>
|
||||
<MediaCaption
|
||||
key={`${mediaId}-${currentSegmentIndex}`}
|
||||
mediaId={mediaId}
|
||||
mediaType={mediaType}
|
||||
currentTime={currentTime}
|
||||
transcription={transcriptionResult?.[currentSegmentIndex]}
|
||||
onSeek={(time) => {
|
||||
wavesurfer.seekTo(time / wavesurfer.getDuration());
|
||||
}}
|
||||
isPlaying={isPlaying}
|
||||
setIsPlaying={setIsPlaying}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div
|
||||
className={recordButtonVisible && mediaProvider ? "hidden" : "flex-1"}
|
||||
<div className="px-4" data-testid="media-player">
|
||||
<VidstackMediaPlayer
|
||||
controls
|
||||
src={media.src}
|
||||
onCanPlayThrough={(detail, nativeEvent) => {
|
||||
mediaRemote.setTarget(nativeEvent.target);
|
||||
const { provider } = detail;
|
||||
if (isAudioProvider(provider)) {
|
||||
setMediaProvider(provider.audio);
|
||||
} else if (isVideoProvider(provider)) {
|
||||
setMediaProvider(provider.video);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<VidstackMediaPlayer
|
||||
src={mediaUrl}
|
||||
onCanPlayThrough={(detail, nativeEvent) => {
|
||||
mediaRemote.setTarget(nativeEvent.target);
|
||||
const { provider } = detail;
|
||||
if (isAudioProvider(provider)) {
|
||||
setMediaProvider(provider.audio);
|
||||
} else if (isVideoProvider(provider)) {
|
||||
setMediaProvider(provider.video);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MediaProvider />
|
||||
|
||||
{mediaType === "Audio" && (
|
||||
<DefaultAudioLayout icons={defaultLayoutIcons} />
|
||||
)}
|
||||
|
||||
{mediaType === "Video" && (
|
||||
<>
|
||||
<DefaultVideoLayout icons={defaultLayoutIcons} />
|
||||
<div className="vds-captions">
|
||||
<div className="absolute mx-auto bottom-[15%] flex items-center justify-center w-full">
|
||||
<div className="flex">
|
||||
<MediaCaption
|
||||
mediaId={mediaId}
|
||||
mediaType={mediaType}
|
||||
className="mx-auto w-5/6 text-center bg-primary/70 text-xl text-white"
|
||||
transcription={transcriptionResult?.[currentSegmentIndex]}
|
||||
currentTime={currentTime}
|
||||
isPlaying={isPlaying}
|
||||
setIsPlaying={setIsPlaying}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</VidstackMediaPlayer>
|
||||
</div>
|
||||
</>
|
||||
<MediaProvider />
|
||||
<DefaultAudioLayout icons={defaultLayoutIcons} />
|
||||
</VidstackMediaPlayer>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
145
enjoy/src/renderer/components/medias/media-recorder.tsx
Normal file
145
enjoy/src/renderer/components/medias/media-recorder.tsx
Normal file
@@ -0,0 +1,145 @@
|
||||
import { useEffect, useState, useContext, useRef } from "react";
|
||||
import {
|
||||
MediaPlayerProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import RecordPlugin from "wavesurfer.js/dist/plugins/record";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { t } from "i18next";
|
||||
import { useTranscribe } from "@renderer/hooks";
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import {
|
||||
FFMPEG_TRIM_SILENCE_OPTIONS,
|
||||
FFMPEG_CONVERT_WAV_OPTIONS,
|
||||
} from "@/constants";
|
||||
|
||||
export const MediaRecorder = (props: { height?: number }) => {
|
||||
const { height = 192 } = props;
|
||||
const {
|
||||
media,
|
||||
isRecording,
|
||||
setIsRecording,
|
||||
transcription,
|
||||
currentSegmentIndex,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const [access, setAccess] = useState<boolean>(false);
|
||||
const [duration, setDuration] = useState<number>(0);
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { transcode } = useTranscribe();
|
||||
|
||||
const ref = useRef(null);
|
||||
|
||||
const askForMediaAccess = () => {
|
||||
EnjoyApp.system.preferences.mediaAccess("microphone").then((access) => {
|
||||
if (access) {
|
||||
setAccess(true);
|
||||
} else {
|
||||
setAccess(false);
|
||||
toast.warning(t("noMicrophoneAccess"));
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const createRecording = async (params: { blob: Blob; duration: number }) => {
|
||||
if (!media) return;
|
||||
|
||||
const { blob, duration } = params;
|
||||
|
||||
toast.promise(
|
||||
async () => {
|
||||
let output: Blob;
|
||||
output = await transcode(blob, [
|
||||
// ...FFMPEG_TRIM_SILENCE_OPTIONS,
|
||||
...FFMPEG_CONVERT_WAV_OPTIONS,
|
||||
]);
|
||||
|
||||
const currentSegment =
|
||||
transcription?.result?.timeline?.[currentSegmentIndex];
|
||||
if (!currentSegment) return;
|
||||
|
||||
return EnjoyApp.recordings.create({
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
blob: {
|
||||
type: output.type.split(";")[0],
|
||||
arrayBuffer: await output.arrayBuffer(),
|
||||
},
|
||||
referenceId: currentSegmentIndex,
|
||||
referenceText: currentSegment.text,
|
||||
duration,
|
||||
});
|
||||
},
|
||||
{
|
||||
loading: t("savingRecording"),
|
||||
success: t("recordingSaved"),
|
||||
error: (e) => t("failedToSaveRecording" + " : " + e.message),
|
||||
position: "bottom-right",
|
||||
},
|
||||
);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!access) return;
|
||||
if (!isRecording) return;
|
||||
if (!ref.current) return;
|
||||
|
||||
const ws = WaveSurfer.create({
|
||||
container: ref.current,
|
||||
fillParent: true,
|
||||
height,
|
||||
autoCenter: false,
|
||||
normalize: false,
|
||||
});
|
||||
|
||||
const record = ws.registerPlugin(RecordPlugin.create());
|
||||
let startAt = 0;
|
||||
|
||||
record.on("record-start", () => {
|
||||
startAt = Date.now();
|
||||
});
|
||||
|
||||
record.on("record-end", async (blob: Blob) => {
|
||||
createRecording({ blob, duration: Date.now() - startAt });
|
||||
});
|
||||
let interval: NodeJS.Timeout;
|
||||
|
||||
RecordPlugin.getAvailableAudioDevices()
|
||||
.then((devices) => devices.find((d) => d.kind === "audioinput"))
|
||||
.then((device) => {
|
||||
if (device) {
|
||||
record.startRecording({ deviceId: device.deviceId });
|
||||
setDuration(0);
|
||||
interval = setInterval(() => {
|
||||
setDuration((duration) => {
|
||||
if (duration >= 300) {
|
||||
setIsRecording(false);
|
||||
}
|
||||
return duration + 1;
|
||||
});
|
||||
}, 100);
|
||||
} else {
|
||||
toast.error(t("cannotFindMicrophone"));
|
||||
}
|
||||
});
|
||||
|
||||
return () => {
|
||||
clearInterval(interval);
|
||||
record.stopRecording();
|
||||
ws.destroy();
|
||||
};
|
||||
}, [ref, isRecording, access]);
|
||||
|
||||
useEffect(() => {
|
||||
askForMediaAccess();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div className="border rounded-xl shadow-lg relative">
|
||||
<span className="absolute bottom-2 right-2 serif">
|
||||
{duration / 10}
|
||||
<span className="text-xs"> / 300</span>
|
||||
</span>
|
||||
<div className="h-full" ref={ref}></div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
149
enjoy/src/renderer/components/medias/media-recordings.tsx
Normal file
149
enjoy/src/renderer/components/medias/media-recordings.tsx
Normal file
@@ -0,0 +1,149 @@
|
||||
import { useContext, useRef, useEffect, useState } from "react";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogHeader,
|
||||
AlertDialogDescription,
|
||||
AlertDialogTitle,
|
||||
AlertDialogContent,
|
||||
AlertDialogFooter,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
Button,
|
||||
DropdownMenu,
|
||||
DropdownMenuItem,
|
||||
DropdownMenuTrigger,
|
||||
DropdownMenuContent,
|
||||
ScrollArea,
|
||||
} from "@renderer/components/ui";
|
||||
import {
|
||||
AppSettingsProviderContext,
|
||||
MediaPlayerProviderContext,
|
||||
} from "@renderer/context";
|
||||
import {
|
||||
LoaderIcon,
|
||||
MicIcon,
|
||||
MoreHorizontalIcon,
|
||||
Trash2Icon,
|
||||
} from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { formatDateTime, formatDuration } from "@renderer/lib/utils";
|
||||
|
||||
export const MediaRecordings = () => {
|
||||
const containerRef = useRef<HTMLDivElement>();
|
||||
const {
|
||||
recordings = [],
|
||||
hasMoreRecordings,
|
||||
loadingRecordings,
|
||||
fetchRecordings,
|
||||
currentRecording,
|
||||
setCurrentRecording,
|
||||
currentSegmentIndex,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const [selectedRecording, setSelectedRecording] = useState(null);
|
||||
|
||||
const handleDelete = () => {
|
||||
if (!selectedRecording) return;
|
||||
|
||||
EnjoyApp.recordings.destroy(selectedRecording.id);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
setCurrentRecording(recordings[0]);
|
||||
}, [currentSegmentIndex, recordings]);
|
||||
|
||||
return (
|
||||
<div ref={containerRef} data-testid="media-recordings-result">
|
||||
{recordings.length == 0 && (
|
||||
<div
|
||||
className="text-center px-6 py-8 text-sm text-muted-foreground"
|
||||
dangerouslySetInnerHTML={{
|
||||
__html: t("noRecordingForThisSegmentYet"),
|
||||
}}
|
||||
></div>
|
||||
)}
|
||||
|
||||
{recordings.map((recording) => (
|
||||
<div
|
||||
key={recording.id}
|
||||
className={`flex items-center justify-between px-4 py-2 cursor-pointer ${
|
||||
recording.id === currentRecording?.id ? "bg-muted" : ""
|
||||
}`}
|
||||
style={{
|
||||
borderLeftColor: `#${recording.md5.substr(0, 6)}`,
|
||||
borderLeftWidth: 3,
|
||||
}}
|
||||
onClick={() => {
|
||||
setCurrentRecording(recording);
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center space-x-2">
|
||||
<MicIcon className="w-4 h-4" />
|
||||
<span>{formatDuration(recording.duration, "ms")}</span>
|
||||
</div>
|
||||
<div className="flex items-center space-x-2">
|
||||
<span className="text-sm text-muted-foreground">
|
||||
{formatDateTime(recording.createdAt)}
|
||||
</span>
|
||||
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger>
|
||||
<MoreHorizontalIcon className="w-4 h-4" />
|
||||
</DropdownMenuTrigger>
|
||||
|
||||
<DropdownMenuContent>
|
||||
<DropdownMenuItem
|
||||
className="text-destructive cursor-pointer"
|
||||
onClick={() => setSelectedRecording(recording)}
|
||||
>
|
||||
<Trash2Icon className="w-4 h-4 mr-2" />
|
||||
<span>{t("delete")}</span>
|
||||
</DropdownMenuItem>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
|
||||
{hasMoreRecordings && (
|
||||
<div className="py-2 flex items-center justify-center">
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
disabled={loadingRecordings}
|
||||
onClick={() => fetchRecordings(recordings.length)}
|
||||
>
|
||||
{loadingRecordings && (
|
||||
<LoaderIcon className="w-4 h-4 animate-spin mr-2" />
|
||||
)}
|
||||
{t("loadMore")}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
|
||||
<AlertDialog
|
||||
open={selectedRecording}
|
||||
onOpenChange={(value) => {
|
||||
if (value) return;
|
||||
setSelectedRecording(null);
|
||||
}}
|
||||
>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("deleteRecording")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("deleteRecordingConfirmation")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction asChild>
|
||||
<Button onClick={handleDelete}>{t("delete")}</Button>
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
78
enjoy/src/renderer/components/medias/media-tabs.tsx
Normal file
78
enjoy/src/renderer/components/medias/media-tabs.tsx
Normal file
@@ -0,0 +1,78 @@
|
||||
import { useEffect, useContext, useState } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
MediaPlayer,
|
||||
MediaTranscription,
|
||||
MediaInfoPanel,
|
||||
MediaRecordings,
|
||||
} from "@renderer/components";
|
||||
import { ScrollArea } from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
|
||||
export const MediaTabs = () => {
|
||||
const { media, decoded } = useContext(MediaPlayerProviderContext);
|
||||
const [tab, setTab] = useState("player");
|
||||
|
||||
useEffect(() => {
|
||||
if (!decoded) return;
|
||||
|
||||
setTab("transcription");
|
||||
}, [decoded]);
|
||||
|
||||
if (!media) return null;
|
||||
|
||||
return (
|
||||
<ScrollArea className="h-full">
|
||||
<div className="flex items-center space-x-2 justify-between p-1 bg-muted rounded-t-lg mb-2 text-sm sticky top-0 z-10">
|
||||
{media.mediaType === "Video" && (
|
||||
<div
|
||||
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
|
||||
tab === "player" ? "bg-background" : ""
|
||||
}`}
|
||||
onClick={() => setTab("player")}
|
||||
>
|
||||
{t("player")}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div
|
||||
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
|
||||
tab === "transcription" ? "bg-background" : ""
|
||||
}`}
|
||||
onClick={() => setTab("transcription")}
|
||||
>
|
||||
{t("transcription")}
|
||||
</div>
|
||||
<div
|
||||
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
|
||||
tab === "recordings" ? "bg-background" : ""
|
||||
}`}
|
||||
onClick={() => setTab("recordings")}
|
||||
>
|
||||
{t("myRecordings")}
|
||||
</div>
|
||||
<div
|
||||
className={`rounded cursor-pointer px-2 py-1 text-sm text-center capitalize ${
|
||||
tab === "info" ? "bg-background" : ""
|
||||
}`}
|
||||
onClick={() => setTab("info")}
|
||||
>
|
||||
{t("mediaInfo")}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className={tab === "player" ? "" : "hidden"}>
|
||||
<MediaPlayer />
|
||||
</div>
|
||||
<div className={tab === "recordings" ? "" : "hidden"}>
|
||||
<MediaRecordings />
|
||||
</div>
|
||||
<div className={tab === "transcription" ? "" : "hidden"}>
|
||||
<MediaTranscription />
|
||||
</div>
|
||||
<div className={tab === "info" ? "" : "hidden"}>
|
||||
<MediaInfoPanel />
|
||||
</div>
|
||||
</ScrollArea>
|
||||
);
|
||||
};
|
||||
@@ -1,4 +1,12 @@
|
||||
import { useEffect, useContext, useRef, useState } from "react";
|
||||
import {
|
||||
AppSettingsProviderContext,
|
||||
DbProviderContext,
|
||||
MediaPlayerProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { t } from "i18next";
|
||||
import {
|
||||
Button,
|
||||
AlertDialog,
|
||||
AlertDialogTrigger,
|
||||
AlertDialogFooter,
|
||||
@@ -8,182 +16,150 @@ import {
|
||||
AlertDialogDescription,
|
||||
AlertDialogCancel,
|
||||
AlertDialogAction,
|
||||
Skeleton,
|
||||
ScrollArea,
|
||||
Button,
|
||||
PingPoint,
|
||||
} from "@renderer/components/ui";
|
||||
import React, { useEffect, useContext, useState } from "react";
|
||||
import { t } from "i18next";
|
||||
import { LoaderIcon, CheckCircleIcon, MicIcon } from "lucide-react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
AISettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
|
||||
import { formatDuration } from "@renderer/lib/utils";
|
||||
|
||||
export const MediaTranscription = (props: {
|
||||
transcription: TranscriptionType;
|
||||
progress: number;
|
||||
transcribe: () => void;
|
||||
transcribing: boolean;
|
||||
mediaId: string;
|
||||
mediaType: "Audio" | "Video";
|
||||
mediaName?: string;
|
||||
currentSegmentIndex?: number;
|
||||
onSelectSegment?: (index: number) => void;
|
||||
}) => {
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
export const MediaTranscription = () => {
|
||||
const containerRef = useRef<HTMLDivElement>();
|
||||
const {
|
||||
transcription,
|
||||
transcribing,
|
||||
progress,
|
||||
transcribe,
|
||||
mediaId,
|
||||
mediaType,
|
||||
mediaName,
|
||||
media,
|
||||
currentSegmentIndex,
|
||||
onSelectSegment,
|
||||
} = props;
|
||||
const containerRef = React.createRef<HTMLDivElement>();
|
||||
wavesurfer,
|
||||
setCurrentSegmentIndex,
|
||||
transcription,
|
||||
generateTranscription,
|
||||
transcribing,
|
||||
transcribingProgress,
|
||||
} = useContext(MediaPlayerProviderContext);
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
|
||||
const [recordingStats, setRecordingStats] =
|
||||
useState<SegementRecordingStatsType>([]);
|
||||
|
||||
const fetchSegmentStats = async () => {
|
||||
if (!mediaId) return;
|
||||
if (!media) return;
|
||||
|
||||
EnjoyApp.recordings.groupBySegment(mediaId, mediaType).then((stats) => {
|
||||
setRecordingStats(stats);
|
||||
});
|
||||
EnjoyApp.recordings
|
||||
.groupBySegment(media.id, media.mediaType)
|
||||
.then((stats) => {
|
||||
setRecordingStats(stats);
|
||||
});
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcription?.result) return;
|
||||
|
||||
addDblistener(fetchSegmentStats);
|
||||
fetchSegmentStats();
|
||||
|
||||
return () => {
|
||||
removeDbListener(fetchSegmentStats);
|
||||
};
|
||||
}, [transcription]);
|
||||
}, [transcription?.result]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!containerRef?.current) return;
|
||||
|
||||
containerRef.current
|
||||
?.querySelector(`#segment-${currentSegmentIndex}`)
|
||||
?.scrollIntoView({
|
||||
block: "center",
|
||||
inline: "center",
|
||||
} as ScrollIntoViewOptions);
|
||||
}, [currentSegmentIndex, transcription]);
|
||||
}, [currentSegmentIndex, transcription, containerRef]);
|
||||
|
||||
if (!transcription)
|
||||
return (
|
||||
<div className="p-4 w-full">
|
||||
<TranscriptionPlaceholder />
|
||||
</div>
|
||||
);
|
||||
if (!transcription?.result) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className="w-full h-full flex flex-col"
|
||||
data-testid="media-transcription"
|
||||
>
|
||||
<div className="mb-4 flex items-cener justify-between">
|
||||
<div className="flex items-center space-x-2">
|
||||
{transcribing || transcription.state === "processing" ? (
|
||||
<>
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<div className="text-sm">
|
||||
{whisperConfig.service === "local" && `${progress}%`}
|
||||
</div>
|
||||
</>
|
||||
) : transcription.state === "finished" ? (
|
||||
<CheckCircleIcon className="text-green-500 w-4 h-4" />
|
||||
) : (
|
||||
<PingPoint colorClassName="bg-mute" />
|
||||
)}
|
||||
<span className="capitalize">{t("transcript")}</span>
|
||||
<div ref={containerRef} data-testid="media-transcription-result">
|
||||
<div className="px-4 py-1 bg-background">
|
||||
<div className="flex items-cener justify-between">
|
||||
<div className="flex items-center space-x-2">
|
||||
{transcribing || transcription.state === "processing" ? (
|
||||
<>
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<div className="text-sm">
|
||||
{transcribingProgress > 0 && `${transcribingProgress}%`}
|
||||
</div>
|
||||
</>
|
||||
) : transcription.state === "finished" ? (
|
||||
<CheckCircleIcon className="text-green-500 w-4 h-4" />
|
||||
) : (
|
||||
<PingPoint colorClassName="bg-mute" />
|
||||
)}
|
||||
<span className="capitalize">{t("transcript")}</span>
|
||||
</div>
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
disabled={transcribing || transcription.state === "processing"}
|
||||
className="capitalize"
|
||||
>
|
||||
{(transcribing || transcription.state === "processing") && (
|
||||
<LoaderIcon className="animate-spin w-4 mr-2" />
|
||||
)}
|
||||
{transcription.result ? t("regenerate") : t("transcribe")}
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("transcribeMediaConfirmation", {
|
||||
name: media.name,
|
||||
})}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction onClick={generateTranscription}>
|
||||
{t("transcribe")}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</div>
|
||||
<AlertDialog>
|
||||
<AlertDialogTrigger asChild>
|
||||
<Button
|
||||
disabled={transcribing || transcription.state === "processing"}
|
||||
className="capitalize"
|
||||
>
|
||||
{(transcribing || transcription.state === "processing") && (
|
||||
<LoaderIcon className="animate-spin w-4 mr-2" />
|
||||
)}
|
||||
{transcription.result ? t("regenerate") : t("transcribe")}
|
||||
</Button>
|
||||
</AlertDialogTrigger>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("transcribeAudioConfirmation", {
|
||||
name: mediaName,
|
||||
})}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<AlertDialogAction onClick={transcribe}>
|
||||
{t("transcribe")}
|
||||
</AlertDialogAction>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
</div>
|
||||
|
||||
{transcription?.result ? (
|
||||
<ScrollArea
|
||||
ref={containerRef}
|
||||
className="flex-1 px-2"
|
||||
data-testid="media-transcription-result"
|
||||
>
|
||||
{transcription.result.map((t, index) => (
|
||||
<div
|
||||
key={index}
|
||||
id={`segment-${index}`}
|
||||
className={`py-1 px-2 mb-2 cursor-pointer hover:bg-yellow-400/25 ${
|
||||
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
|
||||
}`}
|
||||
onClick={() => {
|
||||
onSelectSegment?.(index);
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-xs opacity-50">#{index + 1}</span>
|
||||
|
||||
<div className="flex items-center space-x-2">
|
||||
{(recordingStats || []).findIndex(
|
||||
(s) => s.referenceId === index
|
||||
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
|
||||
<span className="text-xs opacity-50">
|
||||
{t.timestamps.from.split(",")[0]}
|
||||
</span>
|
||||
</div>
|
||||
{(transcription.result as AlignmentResult).timeline.map(
|
||||
(sentence, index) => (
|
||||
<div
|
||||
key={index}
|
||||
id={`segment-${index}`}
|
||||
className={`py-2 px-4 cursor-pointer hover:bg-yellow-400/10 ${
|
||||
currentSegmentIndex === index ? "bg-yellow-400/25" : ""
|
||||
}`}
|
||||
onClick={() => {
|
||||
wavesurfer.seekTo(
|
||||
Math.floor((sentence.startTime / media.duration) * 1e8) / 1e8
|
||||
);
|
||||
wavesurfer.setScrollTime(sentence.startTime);
|
||||
setCurrentSegmentIndex(index);
|
||||
}}
|
||||
>
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-xs opacity-50">#{index + 1}</span>
|
||||
<div className="flex items-center space-x-2">
|
||||
{(recordingStats || []).findIndex(
|
||||
(s) => s.referenceId === index
|
||||
) !== -1 && <MicIcon className="w-3 h-3 text-sky-500" />}
|
||||
<span className="text-xs opacity-50">
|
||||
{formatDuration(sentence.startTime, "s")}
|
||||
</span>
|
||||
</div>
|
||||
<p className="">{t.text}</p>
|
||||
</div>
|
||||
))}
|
||||
</ScrollArea>
|
||||
) : (
|
||||
<TranscriptionPlaceholder />
|
||||
<p className="">{sentence.text}</p>
|
||||
</div>
|
||||
)
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export const TranscriptionPlaceholder = () => {
|
||||
return (
|
||||
<div className="p-4">
|
||||
{Array.from({ length: 5 }).map((_, i) => (
|
||||
<Skeleton key={i} className="h-4 w-full mb-4" />
|
||||
))}
|
||||
<Skeleton className="h-4 w-3/5" />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
} from "@renderer/components/ui";
|
||||
import {
|
||||
SpeechPlayer,
|
||||
AudioDetail,
|
||||
AudioPlayer,
|
||||
ConversationShortcuts,
|
||||
} from "@renderer/components";
|
||||
import { useState, useEffect, useContext } from "react";
|
||||
@@ -242,16 +242,16 @@ export const AssistantMessageComponent = (props: {
|
||||
<Sheet open={shadowing} onOpenChange={(value) => setShadowing(value)}>
|
||||
<SheetContent
|
||||
side="bottom"
|
||||
className="rounded-t-2xl shadow-lg"
|
||||
className="h-100vh p-0"
|
||||
displayClose={false}
|
||||
>
|
||||
<SheetHeader className="flex items-center justify-center -mt-4 mb-2">
|
||||
<SheetHeader className="flex items-center justify-center h-14">
|
||||
<SheetClose>
|
||||
<ChevronDownIcon />
|
||||
</SheetClose>
|
||||
</SheetHeader>
|
||||
|
||||
{Boolean(speech) && <AudioDetail md5={speech.md5} />}
|
||||
{Boolean(speech) && <AudioPlayer md5={speech.md5} />}
|
||||
</SheetContent>
|
||||
</Sheet>
|
||||
</div>
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
import Pitchfinder from "pitchfinder";
|
||||
|
||||
export const extractFrequencies = (props: {
|
||||
peaks: Float32Array;
|
||||
sampleRate: number;
|
||||
}): number[] => {
|
||||
const { peaks, sampleRate } = props;
|
||||
|
||||
const detectPitch = Pitchfinder.AMDF({ sampleRate });
|
||||
const duration = peaks.length / sampleRate;
|
||||
const bpm = peaks.length / duration / 60;
|
||||
|
||||
const frequencies = Pitchfinder.frequencies(detectPitch, peaks, {
|
||||
tempo: bpm,
|
||||
quantization: bpm,
|
||||
});
|
||||
|
||||
return frequencies;
|
||||
};
|
||||
|
||||
export const PitchContour = (props: {
|
||||
peaks?: Float32Array;
|
||||
sampleRate?: number;
|
||||
frequencies?: number[];
|
||||
height: number;
|
||||
id?: string;
|
||||
}) => {
|
||||
const { peaks, sampleRate, height, id } = props;
|
||||
let { frequencies } = props;
|
||||
|
||||
if (!frequencies) {
|
||||
frequencies = extractFrequencies({ peaks, sampleRate });
|
||||
}
|
||||
|
||||
// Find the baseline frequency (the value that appears most often)
|
||||
const frequencyMap: any = {};
|
||||
let maxAmount = 0;
|
||||
let baseFrequency = 0;
|
||||
frequencies.forEach((frequency) => {
|
||||
if (!frequency) return;
|
||||
const tolerance = 10;
|
||||
frequency = Math.round(frequency * tolerance) / tolerance;
|
||||
if (!frequencyMap[frequency]) frequencyMap[frequency] = 0;
|
||||
frequencyMap[frequency] += 1;
|
||||
if (frequencyMap[frequency] > maxAmount) {
|
||||
maxAmount = frequencyMap[frequency];
|
||||
baseFrequency = frequency;
|
||||
}
|
||||
});
|
||||
|
||||
const pitchUpColor = "#385587";
|
||||
// const pitchDownColor = "#C26351";
|
||||
const pitchDownColor = "#385587";
|
||||
|
||||
const canvas = document.createElement("canvas");
|
||||
const ctx = canvas.getContext("2d");
|
||||
canvas.width = frequencies.length;
|
||||
canvas.height = height;
|
||||
canvas.style.width = "100%";
|
||||
canvas.style.height = "100%";
|
||||
|
||||
// Each frequency is a point whose Y position is the frequency and X position is the time
|
||||
let prevY = 0;
|
||||
frequencies.forEach((frequency, index) => {
|
||||
if (!frequency) return;
|
||||
const hratio = 0.5; // the bigger the narrower the pitch contour drawn on canvas.
|
||||
const marginTop = height * 0.4; // the bigger the lower the pitch contour positioned.
|
||||
const y =
|
||||
Math.round(height - (frequency / (baseFrequency * 2)) * height) * hratio +
|
||||
marginTop;
|
||||
ctx.fillStyle = y > prevY ? pitchDownColor : pitchUpColor;
|
||||
ctx.fillRect(index, y, 1, 2);
|
||||
prevY = y;
|
||||
});
|
||||
|
||||
canvas.id = id;
|
||||
|
||||
return canvas;
|
||||
};
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useEffect, useState, useRef, useCallback, useContext } from "react";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
import { PitchContour } from "@renderer/components";
|
||||
import { renderPitchContour } from "@renderer/lib/utils";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { Button, Skeleton } from "@renderer/components/ui";
|
||||
import { PlayIcon, PauseIcon } from "lucide-react";
|
||||
@@ -12,6 +13,7 @@ import {
|
||||
defaultLayoutIcons,
|
||||
} from "@vidstack/react/player/layouts/default";
|
||||
export const STORAGE_WORKER_ENDPOINT = "https://enjoy-storage.baizhiheizi.com";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
|
||||
export const PostAudio = (props: {
|
||||
audio: Partial<MediumType>;
|
||||
@@ -22,11 +24,16 @@ export const PostAudio = (props: {
|
||||
const { webApi } = useContext(AppSettingsProviderContext);
|
||||
const [transcription, setTranscription] = useState<TranscriptionType>();
|
||||
|
||||
const currentTranscription = (transcription?.result || []).find(
|
||||
(s) =>
|
||||
currentTime >= s.offsets.from / 1000.0 &&
|
||||
currentTime <= s.offsets.to / 1000.0
|
||||
);
|
||||
const currentTranscription = transcription?.result["transcript"]
|
||||
? (transcription.result?.timeline || []).find(
|
||||
(s: TimelineEntry) =>
|
||||
currentTime >= s.startTime && currentTime <= s.endTime
|
||||
)
|
||||
: (transcription?.result || []).find(
|
||||
(s: TranscriptionResultSegmentType) =>
|
||||
currentTime >= s.offsets.from / 1000.0 &&
|
||||
currentTime <= s.offsets.to / 1000.0
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
webApi
|
||||
@@ -134,17 +141,25 @@ const WavesurferPlayer = (props: {
|
||||
wavesurfer.on("timeupdate", (time: number) => {
|
||||
setCurrentTime(time);
|
||||
}),
|
||||
wavesurfer.on("decode", () => {
|
||||
wavesurfer.on("ready", () => {
|
||||
setDuration(wavesurfer.getDuration());
|
||||
const peaks = wavesurfer.getDecodedData().getChannelData(0);
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
wavesurfer.renderer.getWrapper().appendChild(
|
||||
PitchContour({
|
||||
peaks,
|
||||
sampleRate,
|
||||
height,
|
||||
})
|
||||
);
|
||||
const data = extractFrequencies({ peaks, sampleRate });
|
||||
setTimeout(() => {
|
||||
renderPitchContour({
|
||||
wrapper: wavesurfer.getWrapper(),
|
||||
canvasId: `pitch-contour-${audio.id}-canvas`,
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
pointRadius: 1,
|
||||
},
|
||||
],
|
||||
});
|
||||
}, 1000);
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { useEffect, useState, useRef, useCallback } from "react";
|
||||
import { PitchContour } from "@renderer/components";
|
||||
import { renderPitchContour } from "@renderer/lib/utils";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { Button, Skeleton } from "@renderer/components/ui";
|
||||
import { PlayIcon, PauseIcon } from "lucide-react";
|
||||
@@ -59,17 +60,28 @@ export const PostRecording = (props: {
|
||||
wavesurfer.on("pause", () => {
|
||||
setIsPlaying(false);
|
||||
}),
|
||||
wavesurfer.on("decode", () => {
|
||||
wavesurfer.on("ready", () => {
|
||||
setDuration(wavesurfer.getDuration());
|
||||
const peaks = wavesurfer.getDecodedData().getChannelData(0);
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
wavesurfer.renderer.getWrapper().appendChild(
|
||||
PitchContour({
|
||||
peaks,
|
||||
sampleRate,
|
||||
height,
|
||||
})
|
||||
);
|
||||
const data = extractFrequencies({ peaks, sampleRate });
|
||||
setTimeout(() => {
|
||||
renderPitchContour({
|
||||
wrapper: wavesurfer.getWrapper(),
|
||||
canvasId: `pitch-contour-${recording.id}-canvas`,
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
pointRadius: 1,
|
||||
borderColor: "#fb6f92",
|
||||
pointBorderColor: "#fb6f92",
|
||||
pointBackgroundColor: "#ff8fab",
|
||||
},
|
||||
],
|
||||
});
|
||||
}, 1000);
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
@@ -119,15 +131,13 @@ export const PostRecording = (props: {
|
||||
></div>
|
||||
</div>
|
||||
|
||||
{
|
||||
recording.referenceText && (
|
||||
<div className="mt-2 bg-muted px-4 py-2 rounded">
|
||||
<div className="text-muted-foreground text-center font-serif">
|
||||
{recording.referenceText}
|
||||
</div>
|
||||
{recording.referenceText && (
|
||||
<div className="mt-2 bg-muted px-4 py-2 rounded">
|
||||
<div className="text-muted-foreground text-center font-serif">
|
||||
{recording.referenceText}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -8,29 +8,84 @@ export const Hotkeys = () => {
|
||||
<>
|
||||
<div className="font-semibold mb-4 capitilized">{t("hotkeys")}</div>
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">{t("quitApp")}</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
{commandOrCtrl} + Q
|
||||
</kbd>
|
||||
</div>
|
||||
<Separator />
|
||||
<div className="mb-6">
|
||||
<div className="text-sm text-muted-foreground">{t("system")}</div>
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">{t("openPreferences")}</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
{commandOrCtrl} + ,
|
||||
</kbd>
|
||||
</div>
|
||||
<Separator />
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">{t("quitApp")}</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
{commandOrCtrl} + Q
|
||||
</kbd>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">{t("playOrPause")}</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
Space
|
||||
</kbd>
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">
|
||||
{t("openPreferences")}
|
||||
</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
{commandOrCtrl} + ,
|
||||
</kbd>
|
||||
</div>
|
||||
<Separator />
|
||||
</div>
|
||||
|
||||
<div className="mb-6">
|
||||
<div className="text-sm text-muted-foreground">{t("player")}</div>
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">{t("playOrPause")}</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
Space
|
||||
</kbd>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2 capitalize">
|
||||
{t("startOrStopRecording")}
|
||||
</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
r
|
||||
</kbd>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2">
|
||||
{t("playOrPauseRecording")}
|
||||
</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
{commandOrCtrl} + r
|
||||
</kbd>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2 capitalize">
|
||||
{t("playPreviousSegment")}
|
||||
</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
p
|
||||
</kbd>
|
||||
</div>
|
||||
|
||||
<Separator />
|
||||
|
||||
<div className="flex items-center justify-between py-4">
|
||||
<div className="flex items-center space-x-2 capitalize">
|
||||
{t("playNextSegment")}
|
||||
</div>
|
||||
<kbd className="bg-muted px-2 py-1 rounded-md text-sm text-muted-foreground">
|
||||
n
|
||||
</kbd>
|
||||
</div>
|
||||
<Separator />
|
||||
</div>
|
||||
<Separator />
|
||||
</>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -117,7 +117,6 @@ export const OpenaiSettings = () => {
|
||||
<Input
|
||||
disabled={!editing}
|
||||
placeholder={t("leaveEmptyToUseDefault")}
|
||||
defaultValue=""
|
||||
value={field.value}
|
||||
onChange={field.onChange}
|
||||
/>
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useEffect, useState, useRef, useCallback } from "react";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import { PitchContour } from "@renderer/components";
|
||||
import { renderPitchContour } from "@renderer/lib/utils";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import { Button, Skeleton } from "@renderer/components/ui";
|
||||
import { PlayIcon, PauseIcon } from "lucide-react";
|
||||
import { useIntersectionObserver } from "@uidotdev/usehooks";
|
||||
@@ -70,16 +71,23 @@ export const RecordingPlayer = (props: {
|
||||
wavesurfer.on("timeupdate", (time: number) => {
|
||||
onCurrentTimeChange?.(time);
|
||||
}),
|
||||
wavesurfer.on("decode", () => {
|
||||
wavesurfer.on("ready", () => {
|
||||
const peaks = wavesurfer.getDecodedData().getChannelData(0);
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
wavesurfer.renderer.getWrapper().appendChild(
|
||||
PitchContour({
|
||||
peaks,
|
||||
sampleRate,
|
||||
height,
|
||||
})
|
||||
);
|
||||
const data = extractFrequencies({ peaks, sampleRate });
|
||||
setTimeout(() => {
|
||||
renderPitchContour({
|
||||
wrapper: wavesurfer.getWrapper(),
|
||||
canvasId: `pitch-contour-${recording.id}-canvas`,
|
||||
labels: new Array(data.length).fill(""),
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
},
|
||||
],
|
||||
});
|
||||
}, 1000);
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
export * from "./videos-table";
|
||||
export * from "./video-edit-form";
|
||||
export * from "./video-detail";
|
||||
export * from "./video-player";
|
||||
|
||||
export * from "./videos-component";
|
||||
|
||||
|
||||
@@ -1,407 +0,0 @@
|
||||
import { useEffect, useState, useContext } from "react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
AISettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import {
|
||||
LoaderSpin,
|
||||
RecordingsList,
|
||||
PagePlaceholder,
|
||||
MediaPlayer,
|
||||
MediaTranscription,
|
||||
} from "@renderer/components";
|
||||
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
|
||||
import {
|
||||
AlertDialog,
|
||||
AlertDialogHeader,
|
||||
AlertDialogDescription,
|
||||
AlertDialogTitle,
|
||||
AlertDialogContent,
|
||||
AlertDialogFooter,
|
||||
AlertDialogCancel,
|
||||
Button,
|
||||
PingPoint,
|
||||
Progress,
|
||||
ScrollArea,
|
||||
toast,
|
||||
} from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
import { useTranscribe } from "@renderer/hooks";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
|
||||
export const VideoDetail = (props: { id?: string; md5?: string }) => {
|
||||
const navigate = useNavigate();
|
||||
|
||||
const { id, md5 } = props;
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const [video, setVideo] = useState<VideoType | null>(null);
|
||||
const [transcription, setTranscription] = useState<TranscriptionType>(null);
|
||||
const [sharing, setSharing] = useState<boolean>(false);
|
||||
|
||||
// Transcription controls
|
||||
const [transcribing, setTranscribing] = useState<boolean>(false);
|
||||
const { transcribe } = useTranscribe();
|
||||
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
|
||||
|
||||
// Player controls
|
||||
const [initialized, setInitialized] = useState<boolean>(false);
|
||||
const [currentTime, setCurrentTime] = useState<number>(0);
|
||||
const [seek, setSeek] = useState<{
|
||||
seekTo: number;
|
||||
timestamp: number;
|
||||
}>();
|
||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
|
||||
const [recordButtonVisible, setRecordButtonVisible] =
|
||||
useState<boolean>(false);
|
||||
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
|
||||
const [playBackRate, setPlaybackRate] = useState<number>(1);
|
||||
const [displayInlineCaption, setDisplayInlineCaption] =
|
||||
useState<boolean>(true);
|
||||
|
||||
const onTransactionUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
if (model === "Transcription" && action === "update") {
|
||||
setTranscription(record);
|
||||
}
|
||||
};
|
||||
|
||||
const findOrCreateTranscription = async () => {
|
||||
return EnjoyApp.transcriptions
|
||||
.findOrCreate({
|
||||
targetId: video.id,
|
||||
targetType: "Video",
|
||||
})
|
||||
.then((transcription) => {
|
||||
setTranscription(transcription);
|
||||
});
|
||||
};
|
||||
|
||||
const generateTranscription = async () => {
|
||||
if (transcribing) return;
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
setTranscribing(true);
|
||||
setTranscribingProgress(0);
|
||||
try {
|
||||
const { engine, model, result } = await transcribe(video.src, {
|
||||
targetId: video.id,
|
||||
targetType: "Video",
|
||||
});
|
||||
await EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result,
|
||||
engine,
|
||||
model,
|
||||
});
|
||||
} catch (err) {
|
||||
toast.error(err.message);
|
||||
}
|
||||
|
||||
setTranscribing(false);
|
||||
};
|
||||
|
||||
const findTranscriptionFromWebApi = async () => {
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
const res = await webApi.transcriptions({
|
||||
targetMd5: video.md5,
|
||||
});
|
||||
|
||||
const transcript = (res?.transcriptions || []).filter((t) =>
|
||||
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
|
||||
)?.[0];
|
||||
|
||||
if (!transcript) {
|
||||
throw new Error("Transcription not found");
|
||||
}
|
||||
|
||||
await EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result: transcript.result,
|
||||
engine: transcript.engine,
|
||||
model: transcript.model,
|
||||
});
|
||||
};
|
||||
|
||||
const findOrGenerateTranscription = async () => {
|
||||
try {
|
||||
await findTranscriptionFromWebApi();
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
await generateTranscription();
|
||||
}
|
||||
};
|
||||
|
||||
const handleShare = async () => {
|
||||
if (!video.source.startsWith("http")) {
|
||||
toast.error(t("shareFailed"), {
|
||||
description: t("cannotShareLocalVideo"),
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (!video.source && !video.isUploaded) {
|
||||
try {
|
||||
await EnjoyApp.videos.upload(video.id);
|
||||
} catch (err) {
|
||||
toast.error(t("shareFailed"), { description: err.message });
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
webApi
|
||||
.createPost({
|
||||
targetType: "Video",
|
||||
targetId: video.id,
|
||||
})
|
||||
.then(() => {
|
||||
toast.success(t("sharedSuccessfully"), {
|
||||
description: t("sharedVideo"),
|
||||
});
|
||||
})
|
||||
.catch((err) => {
|
||||
toast.error(t("shareFailed"), { description: err.message });
|
||||
});
|
||||
setSharing(false);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const where = id ? { id } : { md5 };
|
||||
EnjoyApp.videos.findOne(where).then((video) => {
|
||||
if (video) {
|
||||
setVideo(video);
|
||||
} else {
|
||||
toast.error(t("models.video.notFound"));
|
||||
}
|
||||
});
|
||||
}, [id, md5]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!video) return;
|
||||
|
||||
findOrCreateTranscription();
|
||||
}, [video]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!initialized) return;
|
||||
if (!transcription) return;
|
||||
|
||||
addDblistener(onTransactionUpdate);
|
||||
|
||||
if (transcription?.state == "pending") {
|
||||
findOrGenerateTranscription();
|
||||
}
|
||||
|
||||
if (whisperConfig.service === "local") {
|
||||
EnjoyApp.whisper.onProgress((_, p: number) => {
|
||||
if (p > 100) p = 100;
|
||||
setTranscribingProgress(p);
|
||||
});
|
||||
}
|
||||
|
||||
return () => {
|
||||
removeDbListener(onTransactionUpdate);
|
||||
EnjoyApp.whisper.removeProgressListeners();
|
||||
};
|
||||
}, [md5, transcription, initialized]);
|
||||
|
||||
if (!video) {
|
||||
return <LoaderSpin />;
|
||||
}
|
||||
|
||||
if (!video.src) {
|
||||
return (
|
||||
<PagePlaceholder placeholder="invalid" extra="cannot find play source" />
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="relative">
|
||||
<div className={`grid grid-cols-7 gap-4 ${initialized ? "" : "blur-sm"}`}>
|
||||
<div className="col-span-5 h-[calc(100vh-6.5rem)] flex flex-col">
|
||||
<MediaPlayer
|
||||
mediaId={video.id}
|
||||
mediaType="Video"
|
||||
mediaUrl={video.src}
|
||||
mediaMd5={video.md5}
|
||||
transcription={transcription}
|
||||
currentTime={currentTime}
|
||||
setCurrentTime={setCurrentTime}
|
||||
currentSegmentIndex={currentSegmentIndex}
|
||||
setCurrentSegmentIndex={setCurrentSegmentIndex}
|
||||
recordButtonVisible={recordButtonVisible}
|
||||
setRecordButtonVisible={setRecordButtonVisible}
|
||||
seek={seek}
|
||||
initialized={initialized}
|
||||
setInitialized={setInitialized}
|
||||
zoomRatio={zoomRatio}
|
||||
setZoomRatio={setZoomRatio}
|
||||
isPlaying={isPlaying}
|
||||
setIsPlaying={setIsPlaying}
|
||||
playMode={playMode}
|
||||
setPlayMode={setPlayMode}
|
||||
playBackRate={playBackRate}
|
||||
setPlaybackRate={setPlaybackRate}
|
||||
displayInlineCaption={displayInlineCaption}
|
||||
setDisplayInlineCaption={setDisplayInlineCaption}
|
||||
onShare={() => setSharing(true)}
|
||||
onDecoded={({ duration, sampleRate }) => {
|
||||
if (video.duration) return;
|
||||
|
||||
EnjoyApp.videos.update(video.id, {
|
||||
metadata: Object.assign({}, video.metadata, {
|
||||
duration,
|
||||
sampleRate,
|
||||
}),
|
||||
});
|
||||
}}
|
||||
/>
|
||||
|
||||
<ScrollArea
|
||||
className={`flex-1 relative ${
|
||||
recordButtonVisible ? "bg-muted" : "hidden"
|
||||
}`}
|
||||
>
|
||||
<RecordingsList
|
||||
key={`recordings-list-${video.id}-${currentSegmentIndex}`}
|
||||
targetId={video.id}
|
||||
targetType="Video"
|
||||
referenceText={transcription?.result?.[currentSegmentIndex]?.text}
|
||||
referenceId={currentSegmentIndex}
|
||||
/>
|
||||
</ScrollArea>
|
||||
</div>
|
||||
|
||||
<div className="col-span-2 h-[calc(100vh-6.5rem)]">
|
||||
<MediaTranscription
|
||||
mediaId={video.id}
|
||||
mediaType="Video"
|
||||
mediaName={video.name}
|
||||
transcription={transcription}
|
||||
transcribing={transcribing}
|
||||
progress={transcribingProgress}
|
||||
transcribe={generateTranscription}
|
||||
currentSegmentIndex={currentSegmentIndex}
|
||||
onSelectSegment={(index) => {
|
||||
if (currentSegmentIndex === index) return;
|
||||
|
||||
const segment = transcription?.result?.[index];
|
||||
if (!segment) return;
|
||||
|
||||
if (playMode === "loop" && isPlaying) {
|
||||
setIsPlaying(false);
|
||||
}
|
||||
setSeek({
|
||||
seekTo: segment.offsets.from / 1000,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<AlertDialog open={sharing} onOpenChange={(value) => setSharing(value)}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("shareAudio")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("areYouSureToShareThisAudioToCommunity")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
<AlertDialogFooter>
|
||||
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
|
||||
<Button variant="default" onClick={handleShare}>
|
||||
{t("share")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
|
||||
{/* Show loading progress until waveform is decoded & transcribed */}
|
||||
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
|
||||
<AlertDialogContent>
|
||||
<AlertDialogHeader>
|
||||
<AlertDialogTitle>{t("preparingVideo")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
|
||||
</AlertDialogDescription>
|
||||
</AlertDialogHeader>
|
||||
|
||||
<div className="py-4">
|
||||
{initialized ? (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("waveformIsDecoded")}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="mb-4 flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("decodingWaveform")}</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!transcription ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<LoaderIcon className="w-4 h-4 animate-spin" />
|
||||
<span>{t("loadingTranscription")}</span>
|
||||
</div>
|
||||
) : transcription.result ? (
|
||||
<div className="flex items-center space-x-4">
|
||||
<CheckCircleIcon className="w-4 h-4 text-green-500" />
|
||||
<span>{t("transcribedSuccessfully")}</span>
|
||||
</div>
|
||||
) : transcribing ? (
|
||||
<div className="">
|
||||
<div className="flex items-center space-x-4 mb-2">
|
||||
<PingPoint colorClassName="bg-yellow-500" />
|
||||
<span>{t("transcribing")}</span>
|
||||
</div>
|
||||
{whisperConfig.service === "local" && (
|
||||
<Progress value={transcribingProgress} />
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center space-x-4">
|
||||
<PingPoint colorClassName="bg-muted" />
|
||||
<div className="inline">
|
||||
<span>{t("notTranscribedYet")}</span>
|
||||
{initialized && (
|
||||
<Button
|
||||
onClick={generateTranscription}
|
||||
className="ml-4"
|
||||
size="sm"
|
||||
>
|
||||
{t("transcribe")}
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<AlertDialogFooter>
|
||||
<Button variant="secondary" onClick={() => navigate(-1)}>
|
||||
{t("cancel")}
|
||||
</Button>
|
||||
</AlertDialogFooter>
|
||||
</AlertDialogContent>
|
||||
</AlertDialog>
|
||||
|
||||
{!initialized && (
|
||||
<div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
|
||||
<LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
72
enjoy/src/renderer/components/videos/video-player.tsx
Normal file
72
enjoy/src/renderer/components/videos/video-player.tsx
Normal file
@@ -0,0 +1,72 @@
|
||||
import { useEffect, useContext, useRef } from "react";
|
||||
import { MediaPlayerProviderContext } from "@renderer/context";
|
||||
import {
|
||||
MediaLoadingModal,
|
||||
MediaCaption,
|
||||
MediaPlayerControls,
|
||||
MediaTabs,
|
||||
MediaCurrentRecording,
|
||||
} from "@renderer/components";
|
||||
import { formatDuration } from "@renderer/lib/utils";
|
||||
import { useVideo } from "@renderer/hooks";
|
||||
|
||||
export const VideoPlayer = (props: { id?: string; md5?: string }) => {
|
||||
const { id, md5 } = props;
|
||||
const { media, currentTime, setMedia, setRef } = useContext(
|
||||
MediaPlayerProviderContext
|
||||
);
|
||||
const { video } = useVideo({ id, md5 });
|
||||
const ref = useRef(null);
|
||||
|
||||
useEffect(() => {
|
||||
if (!video) return;
|
||||
|
||||
setMedia(video);
|
||||
}, [video]);
|
||||
|
||||
useEffect(() => {
|
||||
setRef(ref);
|
||||
}, [ref]);
|
||||
|
||||
return (
|
||||
<div data-testid="video-player">
|
||||
<div className="h-[calc(100vh-37.5rem)] mb-4">
|
||||
<div className="grid grid-cols-3 gap-4 px-6 h-full">
|
||||
<div className="col-span-1 rounded-lg border shadow-lg h-[calc(100vh-37.5rem)]">
|
||||
<MediaTabs />
|
||||
</div>
|
||||
<div className="col-span-2 h-[calc(100vh-37.5rem)]">
|
||||
<MediaCaption />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="h-[33rem] flex flex-col">
|
||||
<div className="h-[13rem] py-2 px-6 mb-4">
|
||||
<MediaCurrentRecording />
|
||||
</div>
|
||||
|
||||
<div className="w-full h-[13rem] px-6 py-2 mb-4">
|
||||
<div className="border rounded-xl shadow-lg relative">
|
||||
<div data-testid="media-player-container" ref={ref} />
|
||||
<div className="absolute right-2 top-1">
|
||||
<span className="text-sm">
|
||||
{formatDuration(currentTime || 0)}
|
||||
</span>
|
||||
<span className="mx-1">/</span>
|
||||
<span className="text-sm">
|
||||
{formatDuration(media?.duration || 0)}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="w-full bg-background z-10 shadow-xl">
|
||||
<MediaPlayerControls />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<MediaLoadingModal />
|
||||
</div>
|
||||
);
|
||||
};
|
||||
@@ -239,7 +239,7 @@ export const VideosComponent = () => {
|
||||
<AlertDialogTitle>{t("transcribe")}</AlertDialogTitle>
|
||||
<AlertDialogDescription>
|
||||
<p className="break-all">
|
||||
{t("transcribeVideoConfirmation", {
|
||||
{t("transcribeMediaConfirmation", {
|
||||
name: transcribing?.name || "",
|
||||
})}
|
||||
</p>
|
||||
|
||||
@@ -25,7 +25,7 @@ export const AISettingsProvider = ({
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const [defaultEngine, setDefaultEngine] = useState<string>(null);
|
||||
const [defaultEngine, setDefaultEngine] = useState<string>("openai");
|
||||
const [openai, setOpenai] = useState<LlmProviderType>(null);
|
||||
const [googleGenerativeAi, setGoogleGenerativeAi] =
|
||||
useState<LlmProviderType>(null);
|
||||
|
||||
@@ -2,3 +2,5 @@ export * from "./ai-settings-provider";
|
||||
export * from "./app-settings-provider";
|
||||
export * from "./db-provider";
|
||||
export * from "./theme-provider";
|
||||
export * from "./wavesurfer-provider";
|
||||
export * from "./media-player-provider";
|
||||
|
||||
454
enjoy/src/renderer/context/media-player-provider.tsx
Normal file
454
enjoy/src/renderer/context/media-player-provider.tsx
Normal file
@@ -0,0 +1,454 @@
|
||||
import { createContext, useEffect, useState, useContext } from "react";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
import { useTranscriptions, useRecordings } from "@renderer/hooks";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import Regions, {
|
||||
type Region as RegionType,
|
||||
} from "wavesurfer.js/dist/plugins/regions";
|
||||
import Chart from "chart.js/auto";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
import { IPA_MAPPING } from "@/constants";
|
||||
|
||||
type MediaPlayerContextType = {
|
||||
media: AudioType | VideoType;
|
||||
setMedia: (media: AudioType | VideoType) => void;
|
||||
setMediaProvider: (mediaProvider: HTMLAudioElement | null) => void;
|
||||
waveform: WaveFormDataType;
|
||||
// wavesurfer
|
||||
wavesurfer: WaveSurfer;
|
||||
setRef: (ref: any) => void;
|
||||
decoded: boolean;
|
||||
// player state
|
||||
currentTime: number;
|
||||
currentSegmentIndex: number;
|
||||
setCurrentSegmentIndex: (index: number) => void;
|
||||
zoomRatio: number;
|
||||
setZoomRatio: (zoomRation: number) => void;
|
||||
fitZoomRatio: number;
|
||||
minPxPerSec: number;
|
||||
// regions
|
||||
regions: Regions | null;
|
||||
activeRegion: RegionType;
|
||||
setActiveRegion: (region: RegionType) => void;
|
||||
editingRegion: boolean;
|
||||
setEditingRegion: (editing: boolean) => void;
|
||||
renderPitchContour: (
|
||||
region: RegionType,
|
||||
options?: {
|
||||
repaint?: boolean;
|
||||
canvasId?: string;
|
||||
containerClassNames?: string[];
|
||||
data?: Chart["data"];
|
||||
}
|
||||
) => void;
|
||||
pitchChart: Chart;
|
||||
// Transcription
|
||||
transcription: TranscriptionType;
|
||||
generateTranscription: () => void;
|
||||
transcribing: boolean;
|
||||
transcribingProgress: number;
|
||||
transcriptionDraft: TranscriptionType["result"];
|
||||
setTranscriptionDraft: (result: TranscriptionType["result"]) => void;
|
||||
// Recordings
|
||||
isRecording: boolean;
|
||||
setIsRecording: (isRecording: boolean) => void;
|
||||
currentRecording: RecordingType;
|
||||
setCurrentRecording: (recording: RecordingType) => void;
|
||||
recordings: RecordingType[];
|
||||
fetchRecordings: (offset: number) => void;
|
||||
loadingRecordings: boolean;
|
||||
hasMoreRecordings: boolean;
|
||||
};
|
||||
|
||||
export const MediaPlayerProviderContext =
|
||||
createContext<MediaPlayerContextType>(null);
|
||||
|
||||
export const MediaPlayerProvider = ({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const height = 192;
|
||||
const minPxPerSec = 150;
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const [media, setMedia] = useState<AudioType | VideoType>(null);
|
||||
const [mediaProvider, setMediaProvider] = useState<HTMLAudioElement | null>(
|
||||
null
|
||||
);
|
||||
const [waveform, setWaveForm] = useState<WaveFormDataType>(null);
|
||||
const [wavesurfer, setWavesurfer] = useState(null);
|
||||
|
||||
const [regions, setRegions] = useState<Regions | null>(null);
|
||||
const [activeRegion, setActiveRegion] = useState<RegionType>(null);
|
||||
const [editingRegion, setEditingRegion] = useState<boolean>(false);
|
||||
const [pitchChart, setPitchChart] = useState<Chart>(null);
|
||||
|
||||
const [ref, setRef] = useState(null);
|
||||
|
||||
// Player state
|
||||
const [decoded, setDecoded] = useState<boolean>(false);
|
||||
const [currentTime, setCurrentTime] = useState<number>(0);
|
||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
|
||||
const [fitZoomRatio, setFitZoomRatio] = useState<number>(1.0);
|
||||
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
|
||||
|
||||
const [isRecording, setIsRecording] = useState<boolean>(false);
|
||||
const [currentRecording, setCurrentRecording] = useState<RecordingType>(null);
|
||||
|
||||
const [transcriptionDraft, setTranscriptionDraft] =
|
||||
useState<TranscriptionType["result"]>();
|
||||
|
||||
const {
|
||||
transcription,
|
||||
generateTranscription,
|
||||
transcribing,
|
||||
transcribingProgress,
|
||||
} = useTranscriptions(media);
|
||||
|
||||
const {
|
||||
recordings,
|
||||
fetchRecordings,
|
||||
loading: loadingRecordings,
|
||||
hasMore: hasMoreRecordings,
|
||||
} = useRecordings(media, currentSegmentIndex);
|
||||
|
||||
const initializeWavesurfer = async () => {
|
||||
if (!media) return;
|
||||
if (!mediaProvider) return;
|
||||
if (!ref.current) return;
|
||||
|
||||
const ws = WaveSurfer.create({
|
||||
container: ref.current,
|
||||
height,
|
||||
waveColor: "#eaeaea",
|
||||
progressColor: "#c0d6df",
|
||||
cursorColor: "#ff0054",
|
||||
barWidth: 2,
|
||||
autoScroll: true,
|
||||
minPxPerSec,
|
||||
autoCenter: false,
|
||||
dragToSeek: false,
|
||||
fillParent: true,
|
||||
media: mediaProvider,
|
||||
peaks: waveform ? [waveform.peaks] : undefined,
|
||||
duration: waveform ? waveform.duration : undefined,
|
||||
});
|
||||
|
||||
const blob = await fetch(media.src).then((res) => res.blob());
|
||||
|
||||
if (waveform) {
|
||||
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
|
||||
setDecoded(true);
|
||||
} else {
|
||||
ws.loadBlob(blob);
|
||||
}
|
||||
|
||||
setWavesurfer(ws);
|
||||
};
|
||||
|
||||
const renderPitchContour = (
|
||||
region: RegionType,
|
||||
options?: {
|
||||
repaint?: boolean;
|
||||
canvasId?: string;
|
||||
containerClassNames?: string[];
|
||||
data?: Chart["data"];
|
||||
}
|
||||
) => {
|
||||
if (!region) return;
|
||||
if (!waveform?.frequencies?.length) return;
|
||||
if (!wavesurfer) return;
|
||||
|
||||
const { repaint = true, containerClassNames = [] } = options || {};
|
||||
const duration = wavesurfer.getDuration();
|
||||
const fromIndex = Math.round(
|
||||
(region.start / duration) * waveform.frequencies.length
|
||||
);
|
||||
const toIndex = Math.round(
|
||||
(region.end / duration) * waveform.frequencies.length
|
||||
);
|
||||
|
||||
const wrapper = (wavesurfer as any).renderer.getWrapper();
|
||||
// remove existing pitch contour
|
||||
if (repaint) {
|
||||
wrapper
|
||||
.querySelectorAll(".pitch-contour")
|
||||
.forEach((element: HTMLDivElement) => {
|
||||
element.remove();
|
||||
});
|
||||
}
|
||||
|
||||
// calculate offset and width
|
||||
const wrapperWidth = wrapper.getBoundingClientRect().width;
|
||||
const offsetLeft = (region.start / duration) * wrapperWidth;
|
||||
const width = ((region.end - region.start) / duration) * wrapperWidth;
|
||||
|
||||
// create container and canvas
|
||||
const pitchContourWidthContainer = document.createElement("div");
|
||||
const canvas = document.createElement("canvas");
|
||||
const canvasId = options?.canvasId || `pitch-contour-${region.id}-canvas`;
|
||||
canvas.id = canvasId;
|
||||
canvas.style.width = `${width}px`;
|
||||
canvas.style.height = `${height}px`;
|
||||
pitchContourWidthContainer.appendChild(canvas);
|
||||
|
||||
pitchContourWidthContainer.style.position = "absolute";
|
||||
pitchContourWidthContainer.style.top = "0";
|
||||
pitchContourWidthContainer.style.left = "0";
|
||||
|
||||
pitchContourWidthContainer.style.width = `${width}px`;
|
||||
pitchContourWidthContainer.style.height = `${height}px`;
|
||||
pitchContourWidthContainer.style.marginLeft = `${offsetLeft}px`;
|
||||
pitchContourWidthContainer.classList.add(
|
||||
"pitch-contour",
|
||||
...containerClassNames
|
||||
);
|
||||
// pitchContourWidthContainer.style.zIndex = "3";
|
||||
|
||||
wrapper.appendChild(pitchContourWidthContainer);
|
||||
|
||||
// prepare chart data
|
||||
let chartData: Chart["data"] = options?.data;
|
||||
|
||||
if (!chartData) {
|
||||
const data = waveform.frequencies.slice(fromIndex, toIndex);
|
||||
const regionDuration = region.end - region.start;
|
||||
|
||||
const labels = new Array(data.length).fill("");
|
||||
const caption = transcription?.result?.timeline?.[currentSegmentIndex];
|
||||
if (region.id.startsWith("segment-region")) {
|
||||
caption.timeline.forEach((segment: TimelineEntry) => {
|
||||
const index = Math.round(
|
||||
((segment.startTime - region.start) / regionDuration) * data.length
|
||||
);
|
||||
labels[index] = segment.text.trim();
|
||||
});
|
||||
} else if (region.id.startsWith("word-region")) {
|
||||
const words = caption.timeline.filter(
|
||||
(w: TimelineEntry) =>
|
||||
w.startTime >= region.start &&
|
||||
w.endTime <= region.end &&
|
||||
w.type === "word"
|
||||
);
|
||||
|
||||
let phones: TimelineEntry[] = [];
|
||||
words.forEach((word: TimelineEntry) => {
|
||||
word.timeline.forEach((token: TimelineEntry) => {
|
||||
phones = phones.concat(token.timeline);
|
||||
});
|
||||
});
|
||||
|
||||
phones.forEach((phone: TimelineEntry) => {
|
||||
const index = Math.round(
|
||||
((phone.startTime - region.start) / regionDuration) * data.length
|
||||
);
|
||||
labels[index] = [
|
||||
labels[index] || "",
|
||||
(IPA_MAPPING as any)[phone.text.trim()] || phone.text.trim(),
|
||||
].join("");
|
||||
});
|
||||
}
|
||||
|
||||
chartData = {
|
||||
labels,
|
||||
datasets: [
|
||||
{
|
||||
data,
|
||||
cubicInterpolationMode: "monotone",
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
setPitchChart(
|
||||
new Chart(canvas, {
|
||||
type: "line",
|
||||
data: chartData,
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: false,
|
||||
},
|
||||
title: {
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
beginAtZero: true,
|
||||
ticks: {
|
||||
autoSkip: false,
|
||||
},
|
||||
display: true,
|
||||
grid: {
|
||||
display: false,
|
||||
},
|
||||
border: {
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
y: {
|
||||
beginAtZero: true,
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!media) return;
|
||||
|
||||
EnjoyApp.waveforms.find(media.md5).then((waveform) => {
|
||||
setWaveForm(waveform);
|
||||
});
|
||||
}, [media]);
|
||||
|
||||
/*
|
||||
* Initialize wavesurfer when container ref is available
|
||||
* and mediaProvider is available
|
||||
*/
|
||||
useEffect(() => {
|
||||
initializeWavesurfer();
|
||||
}, [media, ref, mediaProvider]);
|
||||
|
||||
/*
|
||||
* When wavesurfer is decoded,
|
||||
* set up event listeners for wavesurfer
|
||||
* and clean up when component is unmounted
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
setRegions(wavesurfer.registerPlugin(Regions.create()));
|
||||
|
||||
setCurrentTime(0);
|
||||
|
||||
const subscriptions = [
|
||||
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
|
||||
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
|
||||
wavesurfer.on("decode", () => {
|
||||
const peaks: Float32Array = wavesurfer
|
||||
.getDecodedData()
|
||||
.getChannelData(0);
|
||||
const duration: number = wavesurfer.getDuration();
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
const _frequencies = extractFrequencies({ peaks, sampleRate });
|
||||
const _waveform = {
|
||||
peaks: Array.from(peaks),
|
||||
duration,
|
||||
sampleRate,
|
||||
frequencies: _frequencies,
|
||||
};
|
||||
EnjoyApp.waveforms.save(media.md5, _waveform);
|
||||
setWaveForm(_waveform);
|
||||
}),
|
||||
wavesurfer.on("ready", () => {
|
||||
setDecoded(true);
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [wavesurfer]);
|
||||
|
||||
/*
|
||||
* update fitZoomRatio when currentSegmentIndex is updated
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!ref?.current) return;
|
||||
if (!wavesurfer) return;
|
||||
|
||||
if (!activeRegion) return;
|
||||
|
||||
const containerWidth = ref.current.getBoundingClientRect().width;
|
||||
const duration = activeRegion.end - activeRegion.start;
|
||||
if (activeRegion.id.startsWith("segment-region")) {
|
||||
setFitZoomRatio(containerWidth / duration / minPxPerSec);
|
||||
} else if (activeRegion.id.startsWith("word-region")) {
|
||||
setFitZoomRatio(containerWidth / 3 / duration / minPxPerSec);
|
||||
}
|
||||
}, [ref, wavesurfer, activeRegion]);
|
||||
|
||||
/*
|
||||
* Zoom chart when zoomRatio update
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
if (!decoded) return;
|
||||
|
||||
wavesurfer.zoom(zoomRatio * minPxPerSec);
|
||||
if (!activeRegion) return;
|
||||
|
||||
renderPitchContour(activeRegion);
|
||||
wavesurfer.setScrollTime(activeRegion.start);
|
||||
}, [zoomRatio, wavesurfer, decoded]);
|
||||
|
||||
/*
|
||||
* Re-render pitch contour when active region changed
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!activeRegion) return;
|
||||
|
||||
renderPitchContour(activeRegion);
|
||||
}, [activeRegion]);
|
||||
|
||||
/*
|
||||
* Update player styles
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
if (!decoded) return;
|
||||
|
||||
const scrollContainer = wavesurfer.getWrapper().closest(".scroll");
|
||||
scrollContainer.style.scrollbarWidth = "thin";
|
||||
}, [decoded, wavesurfer]);
|
||||
|
||||
return (
|
||||
<MediaPlayerProviderContext.Provider
|
||||
value={{
|
||||
media,
|
||||
setMedia,
|
||||
setMediaProvider,
|
||||
wavesurfer,
|
||||
setRef,
|
||||
decoded,
|
||||
currentTime,
|
||||
currentSegmentIndex,
|
||||
setCurrentSegmentIndex,
|
||||
waveform,
|
||||
zoomRatio,
|
||||
setZoomRatio,
|
||||
fitZoomRatio,
|
||||
minPxPerSec,
|
||||
transcription,
|
||||
regions,
|
||||
renderPitchContour,
|
||||
pitchChart,
|
||||
activeRegion,
|
||||
setActiveRegion,
|
||||
editingRegion,
|
||||
setEditingRegion,
|
||||
generateTranscription,
|
||||
transcribing,
|
||||
transcribingProgress,
|
||||
transcriptionDraft,
|
||||
setTranscriptionDraft,
|
||||
isRecording,
|
||||
setIsRecording,
|
||||
currentRecording,
|
||||
setCurrentRecording,
|
||||
recordings,
|
||||
fetchRecordings,
|
||||
loadingRecordings,
|
||||
hasMoreRecordings,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</MediaPlayerProviderContext.Provider>
|
||||
);
|
||||
};
|
||||
185
enjoy/src/renderer/context/wavesurfer-provider.tsx
Normal file
185
enjoy/src/renderer/context/wavesurfer-provider.tsx
Normal file
@@ -0,0 +1,185 @@
|
||||
import { createContext, useEffect, useState, useContext } from "react";
|
||||
import { extractFrequencies } from "@/utils";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
import WaveSurfer from "wavesurfer.js";
|
||||
import Regions, {
|
||||
type Region as RegionType,
|
||||
} from "wavesurfer.js/dist/plugins/regions";
|
||||
|
||||
type WavesurferContextType = {
|
||||
media: AudioType | VideoType;
|
||||
setMedia: (media: AudioType | VideoType) => void;
|
||||
setMediaProvider: (mediaProvider: HTMLAudioElement | null) => void;
|
||||
wavesurfer: WaveSurfer;
|
||||
setRef: (ref: any) => void;
|
||||
initialized: boolean;
|
||||
currentTime: number;
|
||||
currentSegmentIndex: number;
|
||||
setCurrentSegmentIndex: (index: number) => void;
|
||||
zoomRatio: number;
|
||||
};
|
||||
|
||||
export const WavesurferContext = createContext<WavesurferContextType>(null);
|
||||
|
||||
export const WavesurferProvider = ({
|
||||
children,
|
||||
}: {
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const [media, setMedia] = useState<AudioType | VideoType>(null);
|
||||
const [mediaProvider, setMediaProvider] = useState<HTMLAudioElement | null>(
|
||||
null
|
||||
);
|
||||
const [wavesurfer, setWavesurfer] = useState(null);
|
||||
const [regions, setRegions] = useState<Regions | null>(null);
|
||||
const [ref, setRef] = useState(null);
|
||||
|
||||
// Player state
|
||||
const [initialized, setInitialized] = useState<boolean>(false);
|
||||
const [currentTime, setCurrentTime] = useState<number>(0);
|
||||
const [seek, setSeek] = useState<{
|
||||
seekTo: number;
|
||||
timestamp: number;
|
||||
}>();
|
||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState<number>(0);
|
||||
const [zoomRatio, setZoomRatio] = useState<number>(1.0);
|
||||
const [isPlaying, setIsPlaying] = useState(false);
|
||||
const [playMode, setPlayMode] = useState<"loop" | "single" | "all">("all");
|
||||
const [playBackRate, setPlaybackRate] = useState<number>(1);
|
||||
const [displayInlineCaption, setDisplayInlineCaption] =
|
||||
useState<boolean>(true);
|
||||
|
||||
const initializeWavesurfer = async () => {
|
||||
if (!media) return;
|
||||
if (!mediaProvider) return;
|
||||
if (!ref.current) return;
|
||||
|
||||
const waveform = await EnjoyApp.waveforms.find(media.md5);
|
||||
const ws = WaveSurfer.create({
|
||||
container: ref.current,
|
||||
height: 250,
|
||||
waveColor: "#eee",
|
||||
progressColor: "rgba(0, 0, 0, 0.15)",
|
||||
cursorColor: "#aaa",
|
||||
barWidth: 2,
|
||||
autoScroll: true,
|
||||
minPxPerSec: 150,
|
||||
autoCenter: false,
|
||||
dragToSeek: false,
|
||||
media: mediaProvider,
|
||||
peaks: waveform ? [waveform.peaks] : undefined,
|
||||
duration: waveform ? waveform.duration : undefined,
|
||||
});
|
||||
|
||||
const blob = await fetch(media.src).then((res) => res.blob());
|
||||
|
||||
if (waveform) {
|
||||
ws.loadBlob(blob, [waveform.peaks], waveform.duration);
|
||||
setInitialized(true);
|
||||
} else {
|
||||
ws.loadBlob(blob);
|
||||
}
|
||||
|
||||
// Set up region plugin
|
||||
setRegions(ws.registerPlugin(Regions.create()));
|
||||
|
||||
setWavesurfer(ws);
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize wavesurfer when container ref is available
|
||||
* and mediaProvider is available
|
||||
*/
|
||||
useEffect(() => {
|
||||
initializeWavesurfer();
|
||||
}, [media, ref, mediaProvider]);
|
||||
|
||||
/*
|
||||
* When wavesurfer is initialized,
|
||||
* set up event listeners for wavesurfer
|
||||
* and clean up when component is unmounted
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!wavesurfer) return;
|
||||
|
||||
setCurrentTime(0);
|
||||
setIsPlaying(false);
|
||||
|
||||
const subscriptions = [
|
||||
wavesurfer.on("play", () => setIsPlaying(true)),
|
||||
wavesurfer.on("pause", () => setIsPlaying(false)),
|
||||
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
|
||||
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
|
||||
wavesurfer.on("decode", () => {
|
||||
const peaks: Float32Array = wavesurfer
|
||||
.getDecodedData()
|
||||
.getChannelData(0);
|
||||
const duration: number = wavesurfer.getDuration();
|
||||
const sampleRate = wavesurfer.options.sampleRate;
|
||||
const _frequencies = extractFrequencies({ peaks, sampleRate });
|
||||
const _waveform = {
|
||||
peaks: Array.from(peaks),
|
||||
duration,
|
||||
sampleRate,
|
||||
frequencies: _frequencies,
|
||||
};
|
||||
EnjoyApp.waveforms.save(media.md5, _waveform);
|
||||
}),
|
||||
wavesurfer.on("ready", () => {
|
||||
setInitialized(true);
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
}, [wavesurfer]);
|
||||
|
||||
/*
|
||||
* When regions are available,
|
||||
* set up event listeners for regions
|
||||
* and clean up when component is unmounted
|
||||
*/
|
||||
useEffect(() => {
|
||||
if (!regions) return;
|
||||
|
||||
const subscriptions = [
|
||||
wavesurfer.on("finish", () => {
|
||||
if (playMode !== "loop") return;
|
||||
|
||||
regions?.getRegions()[0]?.play();
|
||||
}),
|
||||
|
||||
regions.on("region-created", (region: RegionType) => {
|
||||
region.on("click", () => {
|
||||
wavesurfer.play(region.start, region.end);
|
||||
});
|
||||
}),
|
||||
];
|
||||
|
||||
return () => {
|
||||
subscriptions.forEach((unsub) => unsub());
|
||||
};
|
||||
});
|
||||
|
||||
return (
|
||||
<WavesurferContext.Provider
|
||||
value={{
|
||||
media,
|
||||
setMedia,
|
||||
setMediaProvider,
|
||||
wavesurfer,
|
||||
setRef,
|
||||
initialized,
|
||||
currentTime,
|
||||
currentSegmentIndex,
|
||||
setCurrentSegmentIndex,
|
||||
zoomRatio,
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</WavesurferContext.Provider>
|
||||
);
|
||||
};
|
||||
@@ -1,3 +1,10 @@
|
||||
export * from './use-recordings';
|
||||
|
||||
export * from './use-transcribe';
|
||||
export * from './use-transcriptions';
|
||||
|
||||
export * from './use-ai-command';
|
||||
export * from './use-conversation';
|
||||
|
||||
export * from './use-audio';
|
||||
export * from './use-video';
|
||||
|
||||
43
enjoy/src/renderer/hooks/use-audio.tsx
Normal file
43
enjoy/src/renderer/hooks/use-audio.tsx
Normal file
@@ -0,0 +1,43 @@
|
||||
import { useEffect, useContext, useState } from "react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
|
||||
export const useAudio = (options: { id?: string; md5?: string }) => {
|
||||
const { id, md5 } = options;
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const [audio, setAudio] = useState<AudioType>(null);
|
||||
|
||||
const onAudioUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
if (model !== "Audio") return;
|
||||
if (record?.id != audio?.id) return;
|
||||
if (action !== "update") return;
|
||||
|
||||
setAudio(record);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const where = id ? { id } : { md5 };
|
||||
EnjoyApp.audios.findOne(where).then((audio) => {
|
||||
if (audio) {
|
||||
setAudio(audio);
|
||||
} else {
|
||||
toast.error(t("models.audio.notFound"));
|
||||
}
|
||||
});
|
||||
|
||||
addDblistener(onAudioUpdate);
|
||||
return () => {
|
||||
removeDbListener(onAudioUpdate);
|
||||
};
|
||||
}, [id, md5]);
|
||||
|
||||
return {
|
||||
audio,
|
||||
};
|
||||
};
|
||||
101
enjoy/src/renderer/hooks/use-recordings.tsx
Normal file
101
enjoy/src/renderer/hooks/use-recordings.tsx
Normal file
@@ -0,0 +1,101 @@
|
||||
import { useState, useContext, useEffect, useRef, useReducer } from "react";
|
||||
import {
|
||||
AppSettingsProviderContext,
|
||||
DbProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { recordingsReducer } from "@renderer/reducers";
|
||||
|
||||
export const useRecordings = (
|
||||
media: AudioType | VideoType,
|
||||
referenceId: number
|
||||
) => {
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const [recordings, dispatchRecordings] = useReducer(recordingsReducer, []);
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [hasMore, setHasMore] = useState(true);
|
||||
|
||||
const fetchRecordings = async (offset = 0) => {
|
||||
setLoading(true);
|
||||
|
||||
const limit = 10;
|
||||
EnjoyApp.recordings
|
||||
.findAll({
|
||||
limit,
|
||||
offset,
|
||||
where: {
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
referenceId,
|
||||
},
|
||||
})
|
||||
.then((_recordings) => {
|
||||
if (_recordings.length < limit) {
|
||||
setHasMore(false);
|
||||
} else {
|
||||
setHasMore(true);
|
||||
}
|
||||
|
||||
dispatchRecordings({
|
||||
type: offset === 0 ? "set" : "append",
|
||||
records: _recordings,
|
||||
});
|
||||
})
|
||||
.finally(() => {
|
||||
setLoading(false);
|
||||
});
|
||||
};
|
||||
|
||||
const onRecordingsUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
|
||||
if (model === "PronunciationAssessment" && action === "create") {
|
||||
const recording = recordings.find((r) => r.id === record.targetId);
|
||||
if (!recording) return;
|
||||
|
||||
recording.pronunciationAssessment = record;
|
||||
dispatchRecordings({
|
||||
type: "update",
|
||||
record: recording,
|
||||
});
|
||||
}
|
||||
|
||||
if (model != "Recording") return;
|
||||
|
||||
if (action === "destroy") {
|
||||
dispatchRecordings({
|
||||
type: "destroy",
|
||||
record,
|
||||
});
|
||||
} else if (action === "create") {
|
||||
if ((record as RecordingType).targetId !== media.id) return;
|
||||
if ((record as RecordingType).referenceId !== referenceId) return;
|
||||
|
||||
dispatchRecordings({
|
||||
type: "create",
|
||||
record,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
addDblistener(onRecordingsUpdate);
|
||||
|
||||
return () => {
|
||||
removeDbListener(onRecordingsUpdate);
|
||||
};
|
||||
}, [recordings]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!media) return;
|
||||
|
||||
fetchRecordings(0);
|
||||
}, [media, referenceId]);
|
||||
|
||||
return {
|
||||
recordings,
|
||||
hasMore,
|
||||
fetchRecordings,
|
||||
loading,
|
||||
};
|
||||
};
|
||||
@@ -12,11 +12,10 @@ import * as sdk from "microsoft-cognitiveservices-speech-sdk";
|
||||
import axios from "axios";
|
||||
import take from "lodash/take";
|
||||
import sortedUniqBy from "lodash/sortedUniqBy";
|
||||
import {
|
||||
groupTranscription,
|
||||
END_OF_WORD_REGEX,
|
||||
milisecondsToTimestamp,
|
||||
} from "@/utils";
|
||||
import { groupTranscription, milisecondsToTimestamp } from "@/utils";
|
||||
import { END_OF_SENTENCE_REGEX } from "@/constants";
|
||||
import { AlignmentResult } from "echogarden/dist/api/API.d.js";
|
||||
import { FFMPEG_CONVERT_WAV_OPTIONS } from "@/constants";
|
||||
|
||||
export const useTranscribe = () => {
|
||||
const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext(
|
||||
@@ -28,12 +27,16 @@ export const useTranscribe = () => {
|
||||
if (ffmpegValid) {
|
||||
if (src instanceof Blob) {
|
||||
src = await EnjoyApp.cacheObjects.writeFile(
|
||||
`${Date.now()}.${src.type.split("/")[1]}`,
|
||||
`${Date.now()}.${src.type.split("/")[1].split(";")[0]}`,
|
||||
await src.arrayBuffer()
|
||||
);
|
||||
}
|
||||
|
||||
const output = `enjoy://library/cache/${src.split("/").pop()}.wav`;
|
||||
const output = `enjoy://library/cache/${src
|
||||
.split("/")
|
||||
.pop()
|
||||
.split(";")
|
||||
.shift()}.wav`;
|
||||
await EnjoyApp.ffmpeg.transcode(src, output, options);
|
||||
const data = await fetchFile(output);
|
||||
return new Blob([data], { type: "audio/wav" });
|
||||
@@ -45,7 +48,7 @@ export const useTranscribe = () => {
|
||||
const transcodeUsingWasm = async (src: string | Blob, options?: string[]) => {
|
||||
if (!ffmpegWasm?.loaded) return;
|
||||
|
||||
options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"];
|
||||
options = options || FFMPEG_CONVERT_WAV_OPTIONS;
|
||||
|
||||
try {
|
||||
let uri: URL;
|
||||
@@ -80,21 +83,32 @@ export const useTranscribe = () => {
|
||||
): Promise<{
|
||||
engine: string;
|
||||
model: string;
|
||||
result: TranscriptionResultSegmentGroupType[];
|
||||
alignmentResult: AlignmentResult;
|
||||
}> => {
|
||||
const blob = await transcode(mediaSrc);
|
||||
|
||||
let result;
|
||||
if (whisperConfig.service === "local") {
|
||||
return transcribeByLocal(blob);
|
||||
result = await transcribeByLocal(blob);
|
||||
} else if (whisperConfig.service === "cloudflare") {
|
||||
return transcribeByCloudflareAi(blob);
|
||||
result = await transcribeByCloudflareAi(blob);
|
||||
} else if (whisperConfig.service === "openai") {
|
||||
return transcribeByOpenAi(blob);
|
||||
result = await transcribeByOpenAi(blob);
|
||||
} else if (whisperConfig.service === "azure") {
|
||||
return transcribeByAzureAi(blob, params);
|
||||
result = await transcribeByAzureAi(blob, params);
|
||||
} else {
|
||||
throw new Error(t("whisperServiceNotSupported"));
|
||||
}
|
||||
|
||||
const alignmentResult = await EnjoyApp.echogarden.align(
|
||||
new Uint8Array(await blob.arrayBuffer()),
|
||||
result.result.map((segment) => segment.text).join(" ")
|
||||
);
|
||||
|
||||
return {
|
||||
...result,
|
||||
alignmentResult,
|
||||
};
|
||||
};
|
||||
|
||||
const transcribeByLocal = async (blob: Blob) => {
|
||||
@@ -267,7 +281,7 @@ export const useTranscribe = () => {
|
||||
|
||||
if (
|
||||
index === best.Words.length - 1 &&
|
||||
!text.trim().match(END_OF_WORD_REGEX)
|
||||
!text.trim().match(END_OF_SENTENCE_REGEX)
|
||||
) {
|
||||
text = text + ".";
|
||||
}
|
||||
|
||||
192
enjoy/src/renderer/hooks/use-transcriptions.tsx
Normal file
192
enjoy/src/renderer/hooks/use-transcriptions.tsx
Normal file
@@ -0,0 +1,192 @@
|
||||
import { useState, useContext, useEffect } from "react";
|
||||
import { useTranscribe } from "@renderer/hooks";
|
||||
import {
|
||||
AISettingsProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
DbProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";
|
||||
import { MAGIC_TOKEN_REGEX, END_OF_SENTENCE_REGEX } from "@/constants";
|
||||
|
||||
export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
const { whisperConfig } = useContext(AISettingsProviderContext);
|
||||
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const [transcription, setTranscription] = useState<TranscriptionType>(null);
|
||||
const { transcribe } = useTranscribe();
|
||||
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
|
||||
const [transcribing, setTranscribing] = useState<boolean>(false);
|
||||
|
||||
const onTransactionUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
if (
|
||||
model === "Transcription" &&
|
||||
record.id === transcription.id &&
|
||||
action === "update"
|
||||
) {
|
||||
setTranscription(record);
|
||||
}
|
||||
};
|
||||
const findOrCreateTranscription = async () => {
|
||||
if (!media) return;
|
||||
if (transcription) return;
|
||||
|
||||
return EnjoyApp.transcriptions
|
||||
.findOrCreate({
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
})
|
||||
.then((t) => {
|
||||
if (t.result && !t.result["transcript"]) {
|
||||
t.result = null;
|
||||
}
|
||||
setTranscription(t);
|
||||
})
|
||||
.catch((err) => {
|
||||
toast.error(err.message);
|
||||
});
|
||||
};
|
||||
|
||||
const generateTranscription = async () => {
|
||||
if (transcribing) return;
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
setTranscribing(true);
|
||||
setTranscribingProgress(0);
|
||||
try {
|
||||
const { engine, model, alignmentResult } = await transcribe(media.src, {
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
});
|
||||
|
||||
let timeline: TimelineEntry[] = [];
|
||||
if (alignmentResult) {
|
||||
alignmentResult.timeline.forEach((t) => {
|
||||
if (t.type === "sentence") {
|
||||
timeline.push(t);
|
||||
} else {
|
||||
t.timeline.forEach((st) => {
|
||||
timeline.push(st);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/*
|
||||
* Pre-process
|
||||
* Some words end with period should not be a single sentence, like Mr./Ms./Dr. etc
|
||||
*/
|
||||
timeline.forEach((sentence, i) => {
|
||||
const nextSentence = timeline[i + 1];
|
||||
if (
|
||||
!sentence.text
|
||||
.replaceAll(MAGIC_TOKEN_REGEX, "")
|
||||
.match(END_OF_SENTENCE_REGEX) &&
|
||||
nextSentence?.text
|
||||
) {
|
||||
console.log(sentence.text);
|
||||
nextSentence.text = [sentence.text, nextSentence.text].join(" ");
|
||||
nextSentence.timeline = [
|
||||
...sentence.timeline,
|
||||
...nextSentence.timeline,
|
||||
];
|
||||
nextSentence.startTime = sentence.startTime;
|
||||
timeline.splice(i, 1);
|
||||
}
|
||||
});
|
||||
|
||||
await EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result: {
|
||||
timeline: timeline,
|
||||
transcript: alignmentResult.transcript,
|
||||
},
|
||||
engine,
|
||||
model,
|
||||
});
|
||||
} catch (err) {
|
||||
toast.error(err.message);
|
||||
}
|
||||
|
||||
setTranscribing(false);
|
||||
};
|
||||
|
||||
const findTranscriptionFromWebApi = async () => {
|
||||
if (!transcription) {
|
||||
await findOrCreateTranscription();
|
||||
}
|
||||
|
||||
const res = await webApi.transcriptions({
|
||||
targetMd5: media.md5,
|
||||
});
|
||||
|
||||
const transcript = (res?.transcriptions || []).filter((t) =>
|
||||
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
|
||||
)?.[0];
|
||||
|
||||
if (!transcript) {
|
||||
return Promise.reject("Transcription not found");
|
||||
}
|
||||
|
||||
if (!transcript.result["transcript"]) {
|
||||
return Promise.reject("Transcription not aligned");
|
||||
}
|
||||
|
||||
return EnjoyApp.transcriptions.update(transcription.id, {
|
||||
state: "finished",
|
||||
result: transcript.result,
|
||||
engine: transcript.engine,
|
||||
model: transcript.model,
|
||||
});
|
||||
};
|
||||
|
||||
const findOrGenerateTranscription = async () => {
|
||||
try {
|
||||
await findTranscriptionFromWebApi();
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
await generateTranscription();
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (!media) return;
|
||||
|
||||
findOrCreateTranscription();
|
||||
}, [media]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!transcription) return;
|
||||
|
||||
addDblistener(onTransactionUpdate);
|
||||
|
||||
if (
|
||||
transcription.state == "pending" ||
|
||||
!transcription.result?.["transcript"]
|
||||
) {
|
||||
findOrGenerateTranscription();
|
||||
}
|
||||
|
||||
if (whisperConfig.service === "local") {
|
||||
EnjoyApp.whisper.onProgress((_, p: number) => {
|
||||
if (p > 100) p = 100;
|
||||
setTranscribingProgress(p);
|
||||
});
|
||||
}
|
||||
|
||||
return () => {
|
||||
removeDbListener(onTransactionUpdate);
|
||||
EnjoyApp.whisper.removeProgressListeners();
|
||||
};
|
||||
}, [transcription, media]);
|
||||
|
||||
return {
|
||||
transcription,
|
||||
transcribingProgress,
|
||||
transcribing,
|
||||
generateTranscription,
|
||||
};
|
||||
};
|
||||
43
enjoy/src/renderer/hooks/use-video.tsx
Normal file
43
enjoy/src/renderer/hooks/use-video.tsx
Normal file
@@ -0,0 +1,43 @@
|
||||
import { useEffect, useContext, useState } from "react";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
} from "@renderer/context";
|
||||
import { toast } from "@renderer/components/ui";
|
||||
import { t } from "i18next";
|
||||
|
||||
export const useVideo = (options: { id?: string; md5?: string }) => {
|
||||
const { id, md5 } = options;
|
||||
const { EnjoyApp } = useContext(AppSettingsProviderContext);
|
||||
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
|
||||
const [video, setVideo] = useState<VideoType>(null);
|
||||
|
||||
const onAudioUpdate = (event: CustomEvent) => {
|
||||
const { model, action, record } = event.detail || {};
|
||||
if (model !== "Audio") return;
|
||||
if (record?.id != video?.id) return;
|
||||
if (action !== "update") return;
|
||||
|
||||
setVideo(record);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const where = id ? { id } : { md5 };
|
||||
EnjoyApp.videos.findOne(where).then((video) => {
|
||||
if (video) {
|
||||
setVideo(video);
|
||||
} else {
|
||||
toast.error(t("models.video.notFound"));
|
||||
}
|
||||
});
|
||||
|
||||
addDblistener(onAudioUpdate);
|
||||
return () => {
|
||||
removeDbListener(onAudioUpdate);
|
||||
};
|
||||
}, [id, md5]);
|
||||
|
||||
return {
|
||||
video,
|
||||
};
|
||||
};
|
||||
@@ -10,6 +10,7 @@ import i18next, { t } from "i18next";
|
||||
dayjs.extend(localizedFormat);
|
||||
dayjs.extend(duration);
|
||||
dayjs.extend(relativeTime);
|
||||
import Chart from "chart.js/auto";
|
||||
|
||||
export function cn(...inputs: ClassValue[]) {
|
||||
return twMerge(clsx(inputs));
|
||||
@@ -37,7 +38,8 @@ export function formatDuration(
|
||||
format = "HH:mm:ss"
|
||||
) {
|
||||
dayjs.locale(i18next.resolvedLanguage?.toLowerCase() || "en");
|
||||
return dayjs.duration(duration, unit).format(format);
|
||||
const display = dayjs.duration(duration, unit).format(format);
|
||||
return display.replace(/^00:/, "");
|
||||
}
|
||||
|
||||
export function bytesToSize(bytes: number) {
|
||||
@@ -78,3 +80,60 @@ export function formatDate(date: string | Date) {
|
||||
return then.fromNow();
|
||||
}
|
||||
}
|
||||
|
||||
export function renderPitchContour(options: {
|
||||
wrapper: HTMLElement;
|
||||
canvasId: string;
|
||||
labels: string[];
|
||||
datasets: Chart["data"]["datasets"];
|
||||
}) {
|
||||
const { wrapper, datasets, labels, canvasId } = options;
|
||||
|
||||
const width = wrapper.getBoundingClientRect().width;
|
||||
const height = wrapper.getBoundingClientRect().height;
|
||||
const canvas = document.createElement("canvas");
|
||||
canvas.id = canvasId;
|
||||
canvas.style.position = "absolute";
|
||||
canvas.style.width = `${width}px`;
|
||||
canvas.style.height = `${height}px`;
|
||||
canvas.style.top = "0";
|
||||
canvas.style.left = "0";
|
||||
|
||||
wrapper.appendChild(canvas);
|
||||
|
||||
new Chart(canvas, {
|
||||
type: "line",
|
||||
data: {
|
||||
labels,
|
||||
datasets,
|
||||
},
|
||||
options: {
|
||||
plugins: {
|
||||
legend: {
|
||||
display: false,
|
||||
},
|
||||
title: {
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
beginAtZero: true,
|
||||
ticks: {
|
||||
autoSkip: false,
|
||||
},
|
||||
display: false,
|
||||
grid: {
|
||||
display: false,
|
||||
},
|
||||
border: {
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
y: {
|
||||
display: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { useParams , useNavigate } from "react-router-dom";
|
||||
import { AudioDetail } from "@renderer/components";
|
||||
import { useParams, useNavigate } from "react-router-dom";
|
||||
import { AudioPlayer } from "@renderer/components";
|
||||
import { Button } from "@renderer/components/ui";
|
||||
import { ChevronLeftIcon } from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { MediaPlayerProvider } from "@renderer/context";
|
||||
|
||||
export default () => {
|
||||
const navigate = useNavigate();
|
||||
@@ -10,15 +11,17 @@ export default () => {
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="h-full px-4 py-6 xl:px-8">
|
||||
<div className="flex space-x-1 items-center mb-4">
|
||||
<div className="h-full relative">
|
||||
<div className="flex space-x-1 items-center h-14 px-4 xl:px-8">
|
||||
<Button variant="ghost" size="icon" onClick={() => navigate(-1)}>
|
||||
<ChevronLeftIcon className="w-5 h-5" />
|
||||
</Button>
|
||||
<span>{t("shadowingAudio")}</span>
|
||||
</div>
|
||||
|
||||
<AudioDetail id={id} />
|
||||
<MediaPlayerProvider>
|
||||
<AudioPlayer id={id} />
|
||||
</MediaPlayerProvider>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -15,6 +15,7 @@ import { t } from "i18next";
|
||||
import {
|
||||
DbProviderContext,
|
||||
AppSettingsProviderContext,
|
||||
MediaPlayerProvider,
|
||||
} from "@renderer/context";
|
||||
import { messagesReducer } from "@renderer/reducers";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
@@ -249,52 +250,54 @@ export default () => {
|
||||
</Sheet>
|
||||
</div>
|
||||
|
||||
<ScrollArea ref={containerRef} className="px-4 flex-1">
|
||||
<div className="messages flex flex-col-reverse gap-6 my-6">
|
||||
<div className="w-full h-16"></div>
|
||||
{messages.map((message) => (
|
||||
<MessageComponent
|
||||
key={message.id}
|
||||
message={message}
|
||||
configuration={{
|
||||
type: conversation.type,
|
||||
...conversation.configuration,
|
||||
}}
|
||||
onResend={() => {
|
||||
if (message.status === "error") {
|
||||
dispatchMessages({ type: "destroy", record: message });
|
||||
}
|
||||
<MediaPlayerProvider>
|
||||
<ScrollArea ref={containerRef} className="px-4 flex-1">
|
||||
<div className="messages flex flex-col-reverse gap-6 my-6">
|
||||
<div className="w-full h-16"></div>
|
||||
{messages.map((message) => (
|
||||
<MessageComponent
|
||||
key={message.id}
|
||||
message={message}
|
||||
configuration={{
|
||||
type: conversation.type,
|
||||
...conversation.configuration,
|
||||
}}
|
||||
onResend={() => {
|
||||
if (message.status === "error") {
|
||||
dispatchMessages({ type: "destroy", record: message });
|
||||
}
|
||||
|
||||
handleSubmit(message.content);
|
||||
}}
|
||||
onRemove={() => {
|
||||
if (message.status === "error") {
|
||||
dispatchMessages({ type: "destroy", record: message });
|
||||
} else {
|
||||
EnjoyApp.messages.destroy(message.id).catch((err) => {
|
||||
toast.error(err.message);
|
||||
});
|
||||
}
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
{offset > -1 && (
|
||||
<div className="flex justify-center">
|
||||
<Button
|
||||
variant="ghost"
|
||||
onClick={() => fetchMessages()}
|
||||
disabled={loading || offset === -1}
|
||||
className="px-4 py-2"
|
||||
>
|
||||
{t("loadMore")}
|
||||
{loading && (
|
||||
<LoaderIcon className="h-4 w-4 animate-spin ml-2" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
handleSubmit(message.content);
|
||||
}}
|
||||
onRemove={() => {
|
||||
if (message.status === "error") {
|
||||
dispatchMessages({ type: "destroy", record: message });
|
||||
} else {
|
||||
EnjoyApp.messages.destroy(message.id).catch((err) => {
|
||||
toast.error(err.message);
|
||||
});
|
||||
}
|
||||
}}
|
||||
/>
|
||||
))}
|
||||
{offset > -1 && (
|
||||
<div className="flex justify-center">
|
||||
<Button
|
||||
variant="ghost"
|
||||
onClick={() => fetchMessages()}
|
||||
disabled={loading || offset === -1}
|
||||
className="px-4 py-2"
|
||||
>
|
||||
{t("loadMore")}
|
||||
{loading && (
|
||||
<LoaderIcon className="h-4 w-4 animate-spin ml-2" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</ScrollArea>
|
||||
</MediaPlayerProvider>
|
||||
|
||||
<div className="px-4 absolute w-full bottom-0 left-0 h-14 bg-muted z-50">
|
||||
<div className="focus-within:bg-background px-4 py-2 flex items-center space-x-4 rounded-lg border">
|
||||
|
||||
@@ -64,12 +64,12 @@ export default () => {
|
||||
|
||||
const presets = CONVERSATION_PRESETS.map((preset) =>
|
||||
Object.assign({}, preset, {
|
||||
engine: currentEngine.name,
|
||||
engine: currentEngine?.name,
|
||||
configuration: {
|
||||
...preset.configuration,
|
||||
tts: {
|
||||
...preset.configuration.tts,
|
||||
engine: currentEngine.name,
|
||||
engine: currentEngine?.name,
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -78,7 +78,7 @@ export default () => {
|
||||
const customPreset = {
|
||||
key: "custom",
|
||||
name: t("custom"),
|
||||
engine: currentEngine.name,
|
||||
engine: currentEngine?.name,
|
||||
configuration: {
|
||||
type: "gpt",
|
||||
model: "gpt-4-turbo-preview",
|
||||
@@ -92,7 +92,7 @@ export default () => {
|
||||
historyBufferSize: 0,
|
||||
tts: {
|
||||
baseUrl: "",
|
||||
engine: currentEngine.name,
|
||||
engine: currentEngine?.name,
|
||||
model: "tts-1",
|
||||
voice: "alloy",
|
||||
},
|
||||
@@ -107,7 +107,7 @@ export default () => {
|
||||
type: "tts",
|
||||
tts: {
|
||||
baseUrl: "",
|
||||
engine: currentEngine.name,
|
||||
engine: currentEngine?.name,
|
||||
model: "tts-1",
|
||||
voice: "alloy",
|
||||
},
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { useParams , useNavigate } from "react-router-dom";
|
||||
import { VideoDetail } from "@renderer/components";
|
||||
import { useParams, useNavigate } from "react-router-dom";
|
||||
import { VideoPlayer } from "@renderer/components";
|
||||
import { Button } from "@renderer/components/ui";
|
||||
import { ChevronLeftIcon } from "lucide-react";
|
||||
import { t } from "i18next";
|
||||
import { MediaPlayerProvider } from "@renderer/context";
|
||||
|
||||
export default () => {
|
||||
const navigate = useNavigate();
|
||||
@@ -10,15 +11,17 @@ export default () => {
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className="h-full px-4 py-6 xl:px-8">
|
||||
<div className="flex space-x-1 items-center mb-4">
|
||||
<div className="h-full relative">
|
||||
<div className="flex space-x-1 items-center h-14 px-4 xl:px-8">
|
||||
<Button variant="ghost" size="icon" onClick={() => navigate(-1)}>
|
||||
<ChevronLeftIcon className="w-5 h-5" />
|
||||
</Button>
|
||||
<span>{t("shadowingVideo")}</span>
|
||||
</div>
|
||||
|
||||
<VideoDetail id={id} />
|
||||
<MediaPlayerProvider>
|
||||
<VideoPlayer id={id} />
|
||||
</MediaPlayerProvider>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
|
||||
1
enjoy/src/types/audio.d.ts
vendored
1
enjoy/src/types/audio.d.ts
vendored
@@ -1,4 +1,5 @@
|
||||
type AudioType = {
|
||||
mediaType: string,
|
||||
id: string;
|
||||
source: string;
|
||||
name: string;
|
||||
|
||||
8
enjoy/src/types/enjoy-app.d.ts
vendored
8
enjoy/src/types/enjoy-app.d.ts
vendored
@@ -206,6 +206,14 @@ type EnjoyAppType = {
|
||||
}
|
||||
) => Promise<SpeechType>;
|
||||
};
|
||||
echogarden: {
|
||||
align: (
|
||||
input: string | Uint8Array,
|
||||
transcript: string,
|
||||
options?: any
|
||||
) => Promise<AlignmentResult>;
|
||||
check: () => Promise<boolean>;
|
||||
};
|
||||
whisper: {
|
||||
config: () => Promise<WhisperConfigType>;
|
||||
check: () => Promise<{ success: boolean; log: string }>;
|
||||
|
||||
2
enjoy/src/types/transcription.d.ts
vendored
2
enjoy/src/types/transcription.d.ts
vendored
@@ -5,7 +5,7 @@ type TranscriptionType = {
|
||||
state: "pending" | "processing" | "finished";
|
||||
engine: string;
|
||||
model: string;
|
||||
result: TranscriptionResultSegmentGroupType[];
|
||||
result: AlignmentResult;
|
||||
};
|
||||
|
||||
type TranscriptionResultSegmentType = {
|
||||
|
||||
1
enjoy/src/types/video.d.ts
vendored
1
enjoy/src/types/video.d.ts
vendored
@@ -1,4 +1,5 @@
|
||||
type VideoType = {
|
||||
mediaType: string,
|
||||
id: string;
|
||||
source: string;
|
||||
name: string;
|
||||
|
||||
@@ -1,7 +1,19 @@
|
||||
import Pitchfinder from "pitchfinder";
|
||||
import { END_OF_SENTENCE_REGEX, MAGIC_TOKEN_REGEX } from "./constants";
|
||||
|
||||
export function generatePitch(peaks: Float32Array, sampleRate: number) {
|
||||
const detectPitch = Pitchfinder.YIN({ sampleRate });
|
||||
export const extractFrequencies = (props: {
|
||||
peaks: Float32Array;
|
||||
sampleRate: number;
|
||||
}): number[] => {
|
||||
const { peaks, sampleRate } = props;
|
||||
|
||||
const detectPitch = Pitchfinder.AMDF({
|
||||
sampleRate,
|
||||
sensitivity: 0.05,
|
||||
minFrequency: 100,
|
||||
maxFrequency: 1000,
|
||||
ratio: 5,
|
||||
});
|
||||
const duration = peaks.length / sampleRate;
|
||||
const bpm = peaks.length / duration / 60;
|
||||
|
||||
@@ -10,24 +22,8 @@ export function generatePitch(peaks: Float32Array, sampleRate: number) {
|
||||
quantization: bpm,
|
||||
});
|
||||
|
||||
// Find the baseline frequency (the value that appears most often)
|
||||
const frequencyMap: any = {};
|
||||
let maxAmount = 0;
|
||||
let baseFrequency = 0;
|
||||
frequencies.forEach((frequency) => {
|
||||
if (!frequency) return;
|
||||
const tolerance = 10;
|
||||
frequency = Math.round(frequency * tolerance) / tolerance;
|
||||
if (!frequencyMap[frequency]) frequencyMap[frequency] = 0;
|
||||
frequencyMap[frequency] += 1;
|
||||
if (frequencyMap[frequency] > maxAmount) {
|
||||
maxAmount = frequencyMap[frequency];
|
||||
baseFrequency = frequency;
|
||||
}
|
||||
});
|
||||
|
||||
return { frequencies, baseFrequency };
|
||||
}
|
||||
return frequencies;
|
||||
};
|
||||
|
||||
export function milisecondsToTimestamp(ms: number) {
|
||||
const hours = Math.floor(ms / 3600000).toString();
|
||||
@@ -40,8 +36,6 @@ export function milisecondsToTimestamp(ms: number) {
|
||||
)}:${seconds.padStart(2, "0")},${milliseconds}`;
|
||||
}
|
||||
|
||||
export const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
|
||||
export const END_OF_WORD_REGEX = /[^\.!,\?][\.!\?]/g;
|
||||
export const groupTranscription = (
|
||||
transcription: TranscriptionResultSegmentType[]
|
||||
): TranscriptionResultSegmentGroupType[] => {
|
||||
@@ -75,8 +69,8 @@ export const groupTranscription = (
|
||||
group.push(segment);
|
||||
|
||||
if (
|
||||
!MAGIC_TOKENS.includes(text) &&
|
||||
segment.text.trim().match(END_OF_WORD_REGEX)
|
||||
!text.match(MAGIC_TOKEN_REGEX) &&
|
||||
segment.text.trim().match(END_OF_SENTENCE_REGEX)
|
||||
) {
|
||||
// Group a complete sentence;
|
||||
groups.push(generateGroup(group));
|
||||
|
||||
@@ -70,6 +70,7 @@ module.exports = {
|
||||
plugins: [
|
||||
require("tailwindcss-animate"),
|
||||
require("@tailwindcss/typography"),
|
||||
require("tailwind-scrollbar"),
|
||||
require("tailwind-scrollbar-hide"),
|
||||
require("@vidstack/react/tailwind.cjs"),
|
||||
],
|
||||
|
||||
@@ -23,21 +23,11 @@ export default defineConfig((env) => {
|
||||
formats: ["es"],
|
||||
},
|
||||
rollupOptions: {
|
||||
external,
|
||||
// external: [
|
||||
// "axios",
|
||||
// "child_process",
|
||||
// "crypto",
|
||||
// "fs-extra",
|
||||
// "fs",
|
||||
// "path",
|
||||
// "sequelize",
|
||||
// "umzug",
|
||||
// "sqlite3",
|
||||
// "fluent-ffmpeg",
|
||||
// "ffmpeg-static",
|
||||
// "@andrkrn/ffprobe-static",
|
||||
// ],
|
||||
external: [...external, "echogarden/dist/api/API.js"],
|
||||
output: {
|
||||
strict: false,
|
||||
},
|
||||
plugins: [],
|
||||
},
|
||||
commonjsOptions: {
|
||||
transformMixedEsModules: true,
|
||||
|
||||
Reference in New Issue
Block a user