Refactor azure config (#729)

* save token id

* save token id in assessment result

* add use-pronunciation-assessment hook
This commit is contained in:
an-lee
2024-06-27 18:02:16 +08:00
committed by GitHub
parent 54872bb449
commit 63fa482363
6 changed files with 105 additions and 10 deletions

View File

@@ -172,7 +172,11 @@ export class Recording extends Model<Recording> {
logger,
});
const { token, region } = await webApi.generateSpeechToken({
const {
id: tokenId,
token,
region,
} = await webApi.generateSpeechToken({
targetId: this.id,
targetType: "Recording",
});
@@ -191,6 +195,7 @@ export class Recording extends Model<Recording> {
}
);
resultJson.duration = this.duration;
resultJson.tokenId = tokenId;
const _pronunciationAssessment = await PronunciationAssessment.create(
{

View File

@@ -56,7 +56,10 @@ export class Transcription extends Model<Transcription> {
model: string;
@Column(DataType.JSON)
result: Partial<AlignmentResult> & { originalText?: string };
result: Partial<AlignmentResult> & {
originalText?: string;
tokenId?: string | number;
};
@Column(DataType.DATE)
syncedAt: Date;

View File

@@ -4,6 +4,7 @@ export * from "./use-camdict";
export * from "./use-conversation";
export * from "./use-notes";
export * from "./use-recordings";
export * from "./use-pronunciation-assessments";
export * from "./use-segments";
export * from "./use-transcribe";
export * from "./use-transcriptions";

View File

@@ -0,0 +1,79 @@
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import { useContext } from "react";
import { AppSettingsProviderContext } from "@renderer/context";
export const usePronunciationAssessments = () => {
const { webApi } = useContext(AppSettingsProviderContext);
const assess = async (
params: {
blob: Blob;
language: string;
reference?: string;
},
options?: {
targetId?: string;
targetType?: string;
}
) => {
const { blob, language, reference } = params;
const { id, token, region } = await webApi.generateSpeechToken(options);
const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
const audioConfig = sdk.AudioConfig.fromWavFileInput(
new File([blob], "audio.wav")
);
const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
reference,
sdk.PronunciationAssessmentGradingSystem.HundredMark,
sdk.PronunciationAssessmentGranularity.Phoneme,
true
);
pronunciationAssessmentConfig.phonemeAlphabet = "IPA";
// setting the recognition language
config.speechRecognitionLanguage = language;
// create the speech recognizer.
const reco = new sdk.SpeechRecognizer(config, audioConfig);
pronunciationAssessmentConfig.applyTo(reco);
return new Promise((resolve, reject) => {
reco.recognizeOnceAsync((result) => {
reco.close();
switch (result.reason) {
case sdk.ResultReason.RecognizedSpeech:
const pronunciationResult =
sdk.PronunciationAssessmentResult.fromResult(result);
console.debug(
"Received pronunciation assessment result.",
pronunciationResult.detailResult
);
resolve(pronunciationResult);
break;
case sdk.ResultReason.NoMatch:
reject(new Error("No speech could be recognized."));
break;
case sdk.ResultReason.Canceled:
const cancellationDetails =
sdk.CancellationDetails.fromResult(result);
console.debug(
"CANCELED: Reason=" +
cancellationDetails.reason +
" ErrorDetails=" +
cancellationDetails.errorDetails
);
reject(new Error(cancellationDetails.errorDetails));
break;
default:
reject(result);
}
});
});
};
return {
assess,
};
};

View File

@@ -42,6 +42,7 @@ export const useTranscribe = () => {
model: string;
alignmentResult: AlignmentResult;
originalText?: string;
tokenId?: number;
}> => {
const url = await transcode(mediaSrc);
const { targetId, targetType, originalText, language, service } =
@@ -173,8 +174,9 @@ export const useTranscribe = () => {
engine: string;
model: string;
text: string;
tokenId: number;
}> => {
const { token, region } = await webApi.generateSpeechToken(params);
const { id, token, region } = await webApi.generateSpeechToken(params);
const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
const audioConfig = sdk.AudioConfig.fromWavFileInput(
new File([blob], "audio.wav")
@@ -217,6 +219,7 @@ export const useTranscribe = () => {
engine: "azure",
model: "whisper",
text: results.map((result) => result.DisplayText).join(" "),
tokenId: id,
});
};

View File

@@ -78,13 +78,16 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, alignmentResult } = await transcribe(media.src, {
targetId: media.id,
targetType: media.mediaType,
originalText,
language,
service,
});
const { engine, model, alignmentResult, tokenId } = await transcribe(
media.src,
{
targetId: media.id,
targetType: media.mediaType,
originalText,
language,
service,
}
);
let timeline: TimelineEntry[] = [];
alignmentResult.timeline.forEach((t) => {
@@ -174,6 +177,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
timeline: timeline,
transcript: alignmentResult.transcript,
originalText,
tokenId,
},
engine,
model,