diff --git a/enjoy/src/api/client.ts b/enjoy/src/api/client.ts index cd796397..3c1b41ff 100644 --- a/enjoy/src/api/client.ts +++ b/enjoy/src/api/client.ts @@ -166,8 +166,11 @@ export class Client { return this.api.delete(`/api/mine/recordings/${id}`); } - generateSpeechToken(): Promise<{ token: string; region: string }> { - return this.api.post("/api/speech/tokens"); + generateSpeechToken(params?: { + targetType?: string; + targetId?: string; + }): Promise<{ token: string; region: string }> { + return this.api.post("/api/speech/tokens", decamelizeKeys(params || {})); } syncPronunciationAssessment( diff --git a/enjoy/src/main/db/models/recording.ts b/enjoy/src/main/db/models/recording.ts index 9c33dc10..34d50aa6 100644 --- a/enjoy/src/main/db/models/recording.ts +++ b/enjoy/src/main/db/models/recording.ts @@ -162,7 +162,10 @@ export class Recording extends Model { logger, }); - const { token, region } = await webApi.generateSpeechToken(); + const { token, region } = await webApi.generateSpeechToken({ + targetId: this.id, + targetType: "Recording", + }); const sdk = new AzureSpeechSdk(token, region); const result = await sdk.pronunciationAssessment({ diff --git a/enjoy/src/renderer/components/audios/audio-detail.tsx b/enjoy/src/renderer/components/audios/audio-detail.tsx index a1508329..78fd3746 100644 --- a/enjoy/src/renderer/components/audios/audio-detail.tsx +++ b/enjoy/src/renderer/components/audios/audio-detail.tsx @@ -95,7 +95,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => { setTranscribing(true); setTranscribingProgress(0); try { - const { engine, model, result } = await transcribe(audio.src); + const { engine, model, result } = await transcribe(audio.src, { + targetId: audio.id, + targetType: "Audio", + }); await EnjoyApp.transcriptions.update(transcription.id, { state: "finished", result, diff --git a/enjoy/src/renderer/components/audios/audios-component.tsx b/enjoy/src/renderer/components/audios/audios-component.tsx index a4e68c01..6feaf78d 100644 --- a/enjoy/src/renderer/components/audios/audios-component.tsx +++ b/enjoy/src/renderer/components/audios/audios-component.tsx @@ -252,10 +252,9 @@ export const AudiosComponent = () => { onClick={async () => { if (!transcribing) return; - transcribe({ - mediaId: transcribing.id, - mediaSrc: transcribing.src, - mediaType: "Audio", + transcribe(transcribing.src, { + targetId: transcribing.id, + targetType: "Audio", }).finally(() => { setTranscribing(null); }); diff --git a/enjoy/src/renderer/components/videos/video-detail.tsx b/enjoy/src/renderer/components/videos/video-detail.tsx index f980934f..e226813d 100644 --- a/enjoy/src/renderer/components/videos/video-detail.tsx +++ b/enjoy/src/renderer/components/videos/video-detail.tsx @@ -91,7 +91,10 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => { setTranscribing(true); setTranscribingProgress(0); try { - const { engine, model, result } = await transcribe(video.src); + const { engine, model, result } = await transcribe(video.src, { + targetId: video.id, + targetType: "Video", + }); await EnjoyApp.transcriptions.update(transcription.id, { state: "finished", result, diff --git a/enjoy/src/renderer/components/videos/videos-component.tsx b/enjoy/src/renderer/components/videos/videos-component.tsx index 26a6e591..d3cf7f1c 100644 --- a/enjoy/src/renderer/components/videos/videos-component.tsx +++ b/enjoy/src/renderer/components/videos/videos-component.tsx @@ -252,10 +252,9 @@ export const VideosComponent = () => { onClick={async () => { if (!transcribing) return; - transcribe({ - mediaId: transcribing.id, - mediaSrc: transcribing.src, - mediaType: "Video", + transcribe(transcribing.src, { + targetId: transcribing.id, + targetType: "Video", }).finally(() => { setTranscribing(null); }); diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx index 3441099c..8f6495fc 100644 --- a/enjoy/src/renderer/hooks/use-transcribe.tsx +++ b/enjoy/src/renderer/hooks/use-transcribe.tsx @@ -72,7 +72,11 @@ export const useTranscribe = () => { }; const transcribe = async ( - mediaSrc: string + mediaSrc: string, + params?: { + targetId?: string; + targetType?: string; + } ): Promise<{ engine: string; model: string; @@ -87,7 +91,7 @@ export const useTranscribe = () => { } else if (whisperConfig.service === "openai") { return transcribeByOpenAi(blob); } else if (whisperConfig.service === "azure") { - return transcribeByAzureAi(blob); + return transcribeByAzureAi(blob, params); } else { throw new Error(t("whisperServiceNotSupported")); } @@ -200,13 +204,17 @@ export const useTranscribe = () => { }; const transcribeByAzureAi = async ( - blob: Blob + blob: Blob, + params?: { + targetId?: string; + targetType?: string; + } ): Promise<{ engine: string; model: string; result: TranscriptionResultSegmentGroupType[]; }> => { - const { token, region } = await webApi.generateSpeechToken(); + const { token, region } = await webApi.generateSpeechToken(params); const config = sdk.SpeechConfig.fromAuthorizationToken(token, region); const audioConfig = sdk.AudioConfig.fromWavFileInput( new File([blob], "audio.wav")