Refactor azure config (#729)
* save token id * save token id in assessment result * add use-pronunciation-assessment hook
This commit is contained in:
@@ -172,7 +172,11 @@ export class Recording extends Model<Recording> {
|
||||
logger,
|
||||
});
|
||||
|
||||
const { token, region } = await webApi.generateSpeechToken({
|
||||
const {
|
||||
id: tokenId,
|
||||
token,
|
||||
region,
|
||||
} = await webApi.generateSpeechToken({
|
||||
targetId: this.id,
|
||||
targetType: "Recording",
|
||||
});
|
||||
@@ -191,6 +195,7 @@ export class Recording extends Model<Recording> {
|
||||
}
|
||||
);
|
||||
resultJson.duration = this.duration;
|
||||
resultJson.tokenId = tokenId;
|
||||
|
||||
const _pronunciationAssessment = await PronunciationAssessment.create(
|
||||
{
|
||||
|
||||
@@ -56,7 +56,10 @@ export class Transcription extends Model<Transcription> {
|
||||
model: string;
|
||||
|
||||
@Column(DataType.JSON)
|
||||
result: Partial<AlignmentResult> & { originalText?: string };
|
||||
result: Partial<AlignmentResult> & {
|
||||
originalText?: string;
|
||||
tokenId?: string | number;
|
||||
};
|
||||
|
||||
@Column(DataType.DATE)
|
||||
syncedAt: Date;
|
||||
|
||||
@@ -4,6 +4,7 @@ export * from "./use-camdict";
|
||||
export * from "./use-conversation";
|
||||
export * from "./use-notes";
|
||||
export * from "./use-recordings";
|
||||
export * from "./use-pronunciation-assessments";
|
||||
export * from "./use-segments";
|
||||
export * from "./use-transcribe";
|
||||
export * from "./use-transcriptions";
|
||||
|
||||
79
enjoy/src/renderer/hooks/use-pronunciation-assessments.tsx
Normal file
79
enjoy/src/renderer/hooks/use-pronunciation-assessments.tsx
Normal file
@@ -0,0 +1,79 @@
|
||||
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
|
||||
import { useContext } from "react";
|
||||
import { AppSettingsProviderContext } from "@renderer/context";
|
||||
|
||||
export const usePronunciationAssessments = () => {
|
||||
const { webApi } = useContext(AppSettingsProviderContext);
|
||||
|
||||
const assess = async (
|
||||
params: {
|
||||
blob: Blob;
|
||||
language: string;
|
||||
reference?: string;
|
||||
},
|
||||
options?: {
|
||||
targetId?: string;
|
||||
targetType?: string;
|
||||
}
|
||||
) => {
|
||||
const { blob, language, reference } = params;
|
||||
const { id, token, region } = await webApi.generateSpeechToken(options);
|
||||
const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
|
||||
const audioConfig = sdk.AudioConfig.fromWavFileInput(
|
||||
new File([blob], "audio.wav")
|
||||
);
|
||||
|
||||
const pronunciationAssessmentConfig = new sdk.PronunciationAssessmentConfig(
|
||||
reference,
|
||||
sdk.PronunciationAssessmentGradingSystem.HundredMark,
|
||||
sdk.PronunciationAssessmentGranularity.Phoneme,
|
||||
true
|
||||
);
|
||||
pronunciationAssessmentConfig.phonemeAlphabet = "IPA";
|
||||
|
||||
// setting the recognition language
|
||||
config.speechRecognitionLanguage = language;
|
||||
|
||||
// create the speech recognizer.
|
||||
const reco = new sdk.SpeechRecognizer(config, audioConfig);
|
||||
pronunciationAssessmentConfig.applyTo(reco);
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
reco.recognizeOnceAsync((result) => {
|
||||
reco.close();
|
||||
|
||||
switch (result.reason) {
|
||||
case sdk.ResultReason.RecognizedSpeech:
|
||||
const pronunciationResult =
|
||||
sdk.PronunciationAssessmentResult.fromResult(result);
|
||||
console.debug(
|
||||
"Received pronunciation assessment result.",
|
||||
pronunciationResult.detailResult
|
||||
);
|
||||
resolve(pronunciationResult);
|
||||
break;
|
||||
case sdk.ResultReason.NoMatch:
|
||||
reject(new Error("No speech could be recognized."));
|
||||
break;
|
||||
case sdk.ResultReason.Canceled:
|
||||
const cancellationDetails =
|
||||
sdk.CancellationDetails.fromResult(result);
|
||||
console.debug(
|
||||
"CANCELED: Reason=" +
|
||||
cancellationDetails.reason +
|
||||
" ErrorDetails=" +
|
||||
cancellationDetails.errorDetails
|
||||
);
|
||||
reject(new Error(cancellationDetails.errorDetails));
|
||||
break;
|
||||
default:
|
||||
reject(result);
|
||||
}
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
assess,
|
||||
};
|
||||
};
|
||||
@@ -42,6 +42,7 @@ export const useTranscribe = () => {
|
||||
model: string;
|
||||
alignmentResult: AlignmentResult;
|
||||
originalText?: string;
|
||||
tokenId?: number;
|
||||
}> => {
|
||||
const url = await transcode(mediaSrc);
|
||||
const { targetId, targetType, originalText, language, service } =
|
||||
@@ -173,8 +174,9 @@ export const useTranscribe = () => {
|
||||
engine: string;
|
||||
model: string;
|
||||
text: string;
|
||||
tokenId: number;
|
||||
}> => {
|
||||
const { token, region } = await webApi.generateSpeechToken(params);
|
||||
const { id, token, region } = await webApi.generateSpeechToken(params);
|
||||
const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
|
||||
const audioConfig = sdk.AudioConfig.fromWavFileInput(
|
||||
new File([blob], "audio.wav")
|
||||
@@ -217,6 +219,7 @@ export const useTranscribe = () => {
|
||||
engine: "azure",
|
||||
model: "whisper",
|
||||
text: results.map((result) => result.DisplayText).join(" "),
|
||||
tokenId: id,
|
||||
});
|
||||
};
|
||||
|
||||
|
||||
@@ -78,13 +78,16 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
setTranscribing(true);
|
||||
setTranscribingProgress(0);
|
||||
try {
|
||||
const { engine, model, alignmentResult } = await transcribe(media.src, {
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
originalText,
|
||||
language,
|
||||
service,
|
||||
});
|
||||
const { engine, model, alignmentResult, tokenId } = await transcribe(
|
||||
media.src,
|
||||
{
|
||||
targetId: media.id,
|
||||
targetType: media.mediaType,
|
||||
originalText,
|
||||
language,
|
||||
service,
|
||||
}
|
||||
);
|
||||
|
||||
let timeline: TimelineEntry[] = [];
|
||||
alignmentResult.timeline.forEach((t) => {
|
||||
@@ -174,6 +177,7 @@ export const useTranscriptions = (media: AudioType | VideoType) => {
|
||||
timeline: timeline,
|
||||
transcript: alignmentResult.transcript,
|
||||
originalText,
|
||||
tokenId,
|
||||
},
|
||||
engine,
|
||||
model,
|
||||
|
||||
Reference in New Issue
Block a user