specify target when generate azure token

2024-03-04 09:26:44 +08:00
parent 747b406c41
commit e8bea7cb0f
7 changed files with 35 additions and 17 deletions
--- a/enjoy/src/api/client.ts
+++ b/enjoy/src/api/client.ts
@@ -166,8 +166,11 @@ export class Client {
    return this.api.delete(`/api/mine/recordings/${id}`);
  }

-  generateSpeechToken(): Promise<{ token: string; region: string }> {
-    return this.api.post("/api/speech/tokens");
+  generateSpeechToken(params?: {
+    targetType?: string;
+    targetId?: string;
+  }): Promise<{ token: string; region: string }> {
+    return this.api.post("/api/speech/tokens", decamelizeKeys(params || {}));
  }

  syncPronunciationAssessment(
--- a/enjoy/src/main/db/models/recording.ts
+++ b/enjoy/src/main/db/models/recording.ts
@@ -162,7 +162,10 @@ export class Recording extends Model<Recording> {
      logger,
    });

-    const { token, region } = await webApi.generateSpeechToken();
+    const { token, region } = await webApi.generateSpeechToken({
+      targetId: this.id,
+      targetType: "Recording",
+    });
    const sdk = new AzureSpeechSdk(token, region);

    const result = await sdk.pronunciationAssessment({
--- a/enjoy/src/renderer/components/audios/audio-detail.tsx
+++ b/enjoy/src/renderer/components/audios/audio-detail.tsx
@@ -95,7 +95,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
    setTranscribing(true);
    setTranscribingProgress(0);
    try {
-      const { engine, model, result } = await transcribe(audio.src);
+      const { engine, model, result } = await transcribe(audio.src, {
+        targetId: audio.id,
+        targetType: "Audio",
+      });
      await EnjoyApp.transcriptions.update(transcription.id, {
        state: "finished",
        result,
--- a/enjoy/src/renderer/components/audios/audios-component.tsx
+++ b/enjoy/src/renderer/components/audios/audios-component.tsx
@@ -252,10 +252,9 @@ export const AudiosComponent = () => {
              onClick={async () => {
                if (!transcribing) return;

-                transcribe({
-                  mediaId: transcribing.id,
-                  mediaSrc: transcribing.src,
-                  mediaType: "Audio",
+                transcribe(transcribing.src, {
+                  targetId: transcribing.id,
+                  targetType: "Audio",
                }).finally(() => {
                  setTranscribing(null);
                });
--- a/enjoy/src/renderer/components/videos/video-detail.tsx
+++ b/enjoy/src/renderer/components/videos/video-detail.tsx
@@ -91,7 +91,10 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
    setTranscribing(true);
    setTranscribingProgress(0);
    try {
-      const { engine, model, result } = await transcribe(video.src);
+      const { engine, model, result } = await transcribe(video.src, {
+        targetId: video.id,
+        targetType: "Video",
+      });
      await EnjoyApp.transcriptions.update(transcription.id, {
        state: "finished",
        result,
--- a/enjoy/src/renderer/components/videos/videos-component.tsx
+++ b/enjoy/src/renderer/components/videos/videos-component.tsx
@@ -252,10 +252,9 @@ export const VideosComponent = () => {
              onClick={async () => {
                if (!transcribing) return;

-                transcribe({
-                  mediaId: transcribing.id,
-                  mediaSrc: transcribing.src,
-                  mediaType: "Video",
+                transcribe(transcribing.src, {
+                  targetId: transcribing.id,
+                  targetType: "Video",
                }).finally(() => {
                  setTranscribing(null);
                });
--- a/enjoy/src/renderer/hooks/use-transcribe.tsx
+++ b/enjoy/src/renderer/hooks/use-transcribe.tsx
@@ -72,7 +72,11 @@ export const useTranscribe = () => {
  };

  const transcribe = async (
-    mediaSrc: string
+    mediaSrc: string,
+    params?: {
+      targetId?: string;
+      targetType?: string;
+    }
  ): Promise<{
    engine: string;
    model: string;
@@ -87,7 +91,7 @@ export const useTranscribe = () => {
    } else if (whisperConfig.service === "openai") {
      return transcribeByOpenAi(blob);
    } else if (whisperConfig.service === "azure") {
-      return transcribeByAzureAi(blob);
+      return transcribeByAzureAi(blob, params);
    } else {
      throw new Error(t("whisperServiceNotSupported"));
    }
@@ -200,13 +204,17 @@ export const useTranscribe = () => {
  };

  const transcribeByAzureAi = async (
-    blob: Blob
+    blob: Blob,
+    params?: {
+      targetId?: string;
+      targetType?: string;
+    }
  ): Promise<{
    engine: string;
    model: string;
    result: TranscriptionResultSegmentGroupType[];
  }> => {
-    const { token, region } = await webApi.generateSpeechToken();
+    const { token, region } = await webApi.generateSpeechToken(params);
    const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
    const audioConfig = sdk.AudioConfig.fromWavFileInput(
      new File([blob], "audio.wav")