Files
everyone-can-use-english/enjoy/src/utils.ts
an-lee bc22a5e2b4 Feat: refactor STT service (#294)
* add stt hook interface

* fix crypto exported to browser

* refactor use-transcribe

* may use openai stt

* refactor: remove decprecated codes

* fix undefined method
2024-02-10 19:55:07 +08:00

95 lines
2.7 KiB
TypeScript

import Pitchfinder from "pitchfinder";
export function generatePitch(peaks: Float32Array, sampleRate: number) {
const detectPitch = Pitchfinder.YIN({ sampleRate });
const duration = peaks.length / sampleRate;
const bpm = peaks.length / duration / 60;
const frequencies = Pitchfinder.frequencies(detectPitch, peaks, {
tempo: bpm,
quantization: bpm,
});
// Find the baseline frequency (the value that appears most often)
const frequencyMap: any = {};
let maxAmount = 0;
let baseFrequency = 0;
frequencies.forEach((frequency) => {
if (!frequency) return;
const tolerance = 10;
frequency = Math.round(frequency * tolerance) / tolerance;
if (!frequencyMap[frequency]) frequencyMap[frequency] = 0;
frequencyMap[frequency] += 1;
if (frequencyMap[frequency] > maxAmount) {
maxAmount = frequencyMap[frequency];
baseFrequency = frequency;
}
});
return { frequencies, baseFrequency };
}
export function milisecondsToTimestamp(ms: number) {
const hours = Math.floor(ms / 3600000).toString();
const minutes = Math.floor((ms % 3600000) / 60000).toString();
const seconds = Math.floor(((ms % 360000) % 60000) / 1000).toString();
const milliseconds = Math.floor(((ms % 360000) % 60000) % 1000).toString();
return `${hours.padStart(2, "0")}:${minutes.padStart(
2,
"0"
)}:${seconds.padStart(2, "0")},${milliseconds}`;
}
export const MAGIC_TOKENS = ["Mrs.", "Ms.", "Mr.", "Dr.", "Prof.", "St."];
export const END_OF_WORD_REGEX = /[^\.!,\?][\.!\?]/g;
export const groupTranscription = (
transcription: TranscriptionResultSegmentType[]
): TranscriptionResultSegmentGroupType[] => {
const generateGroup = (group?: TranscriptionResultSegmentType[]) => {
if (!group || group.length === 0) return;
const firstWord = group[0];
const lastWord = group[group.length - 1];
return {
offsets: {
from: firstWord.offsets.from,
to: lastWord.offsets.to,
},
text: group.map((w) => w.text.trim()).join(" "),
timestamps: {
from: firstWord.timestamps.from,
to: lastWord.timestamps.to,
},
segments: group,
};
};
const groups: TranscriptionResultSegmentGroupType[] = [];
let group: TranscriptionResultSegmentType[] = [];
transcription.forEach((segment) => {
const text = segment.text.trim();
if (!text) return;
group.push(segment);
if (
!MAGIC_TOKENS.includes(text) &&
segment.text.trim().match(END_OF_WORD_REGEX)
) {
// Group a complete sentence;
groups.push(generateGroup(group));
// init a new group
group = [];
}
});
// Group the last group
const lastSentence = generateGroup(group);
if (lastSentence) groups.push(lastSentence);
return groups;
};