diff --git a/enjoy/src/api/client.ts b/enjoy/src/api/client.ts
index cd796397..3c1b41ff 100644
--- a/enjoy/src/api/client.ts
+++ b/enjoy/src/api/client.ts
@@ -166,8 +166,11 @@ export class Client {
     return this.api.delete(`/api/mine/recordings/${id}`);
   }
 
-  generateSpeechToken(): Promise<{ token: string; region: string }> {
-    return this.api.post("/api/speech/tokens");
+  generateSpeechToken(params?: {
+    targetType?: string;
+    targetId?: string;
+  }): Promise<{ token: string; region: string }> {
+    return this.api.post("/api/speech/tokens", decamelizeKeys(params || {}));
   }
 
   syncPronunciationAssessment(
diff --git a/enjoy/src/main/db/models/recording.ts b/enjoy/src/main/db/models/recording.ts
index 9c33dc10..34d50aa6 100644
--- a/enjoy/src/main/db/models/recording.ts
+++ b/enjoy/src/main/db/models/recording.ts
@@ -162,7 +162,10 @@ export class Recording extends Model<Recording> {
       logger,
     });
 
-    const { token, region } = await webApi.generateSpeechToken();
+    const { token, region } = await webApi.generateSpeechToken({
+      targetId: this.id,
+      targetType: "Recording",
+    });
     const sdk = new AzureSpeechSdk(token, region);
 
     const result = await sdk.pronunciationAssessment({
diff --git a/enjoy/src/renderer/components/audios/audio-detail.tsx b/enjoy/src/renderer/components/audios/audio-detail.tsx
index a1508329..78fd3746 100644
--- a/enjoy/src/renderer/components/audios/audio-detail.tsx
+++ b/enjoy/src/renderer/components/audios/audio-detail.tsx
@@ -95,7 +95,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
     setTranscribing(true);
     setTranscribingProgress(0);
     try {
-      const { engine, model, result } = await transcribe(audio.src);
+      const { engine, model, result } = await transcribe(audio.src, {
+        targetId: audio.id,
+        targetType: "Audio",
+      });
       await EnjoyApp.transcriptions.update(transcription.id, {
         state: "finished",
         result,
diff --git a/enjoy/src/renderer/components/audios/audios-component.tsx b/enjoy/src/renderer/components/audios/audios-component.tsx
index a4e68c01..6feaf78d 100644
--- a/enjoy/src/renderer/components/audios/audios-component.tsx
+++ b/enjoy/src/renderer/components/audios/audios-component.tsx
@@ -252,10 +252,9 @@ export const AudiosComponent = () => {
               onClick={async () => {
                 if (!transcribing) return;
 
-                transcribe({
-                  mediaId: transcribing.id,
-                  mediaSrc: transcribing.src,
-                  mediaType: "Audio",
+                transcribe(transcribing.src, {
+                  targetId: transcribing.id,
+                  targetType: "Audio",
                 }).finally(() => {
                   setTranscribing(null);
                 });
diff --git a/enjoy/src/renderer/components/videos/video-detail.tsx b/enjoy/src/renderer/components/videos/video-detail.tsx
index f980934f..e226813d 100644
--- a/enjoy/src/renderer/components/videos/video-detail.tsx
+++ b/enjoy/src/renderer/components/videos/video-detail.tsx
@@ -91,7 +91,10 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
     setTranscribing(true);
     setTranscribingProgress(0);
     try {
-      const { engine, model, result } = await transcribe(video.src);
+      const { engine, model, result } = await transcribe(video.src, {
+        targetId: video.id,
+        targetType: "Video",
+      });
       await EnjoyApp.transcriptions.update(transcription.id, {
         state: "finished",
         result,
diff --git a/enjoy/src/renderer/components/videos/videos-component.tsx b/enjoy/src/renderer/components/videos/videos-component.tsx
index 26a6e591..d3cf7f1c 100644
--- a/enjoy/src/renderer/components/videos/videos-component.tsx
+++ b/enjoy/src/renderer/components/videos/videos-component.tsx
@@ -252,10 +252,9 @@ export const VideosComponent = () => {
               onClick={async () => {
                 if (!transcribing) return;
 
-                transcribe({
-                  mediaId: transcribing.id,
-                  mediaSrc: transcribing.src,
-                  mediaType: "Video",
+                transcribe(transcribing.src, {
+                  targetId: transcribing.id,
+                  targetType: "Video",
                 }).finally(() => {
                   setTranscribing(null);
                 });
diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx
index 3441099c..8f6495fc 100644
--- a/enjoy/src/renderer/hooks/use-transcribe.tsx
+++ b/enjoy/src/renderer/hooks/use-transcribe.tsx
@@ -72,7 +72,11 @@ export const useTranscribe = () => {
   };
 
   const transcribe = async (
-    mediaSrc: string
+    mediaSrc: string,
+    params?: {
+      targetId?: string;
+      targetType?: string;
+    }
   ): Promise<{
     engine: string;
     model: string;
@@ -87,7 +91,7 @@ export const useTranscribe = () => {
     } else if (whisperConfig.service === "openai") {
       return transcribeByOpenAi(blob);
     } else if (whisperConfig.service === "azure") {
-      return transcribeByAzureAi(blob);
+      return transcribeByAzureAi(blob, params);
     } else {
       throw new Error(t("whisperServiceNotSupported"));
     }
@@ -200,13 +204,17 @@ export const useTranscribe = () => {
   };
 
   const transcribeByAzureAi = async (
-    blob: Blob
+    blob: Blob,
+    params?: {
+      targetId?: string;
+      targetType?: string;
+    }
   ): Promise<{
     engine: string;
     model: string;
     result: TranscriptionResultSegmentGroupType[];
   }> => {
-    const { token, region } = await webApi.generateSpeechToken();
+    const { token, region } = await webApi.generateSpeechToken(params);
     const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
     const audioConfig = sdk.AudioConfig.fromWavFileInput(
       new File([blob], "audio.wav")