Refactor transcription (#476)

* word-level timestamp is not needed for alignment * remove deprecated code * fix error when stop recording
2024-04-02 14:03:02 +08:00
parent f4d1d2a730
commit f0f4319044
4 changed files with 13 additions and 147 deletions
--- a/enjoy/src/utils.ts
+++ b/enjoy/src/utils.ts
@@ -1,5 +1,4 @@
 import Pitchfinder from "pitchfinder";
-import { END_OF_SENTENCE_REGEX, MAGIC_TOKEN_REGEX } from "./constants";
 import { IPA_MAPPING } from "./constants";

 export const extractFrequencies = (props: {
@@ -34,57 +33,6 @@ export function milisecondsToTimestamp(ms: number) {
  )}:${seconds.padStart(2, "0")},${milliseconds}`;
 }

-export const groupTranscription = (
-  transcription: TranscriptionResultSegmentType[]
-): TranscriptionResultSegmentGroupType[] => {
-  const generateGroup = (group?: TranscriptionResultSegmentType[]) => {
-    if (!group || group.length === 0) return;
-
-    const firstWord = group[0];
-    const lastWord = group[group.length - 1];
-
-    return {
-      offsets: {
-        from: firstWord.offsets.from,
-        to: lastWord.offsets.to,
-      },
-      text: group.map((w) => w.text.trim()).join(" "),
-      timestamps: {
-        from: firstWord.timestamps.from,
-        to: lastWord.timestamps.to,
-      },
-      segments: group,
-    };
-  };
-
-  const groups: TranscriptionResultSegmentGroupType[] = [];
-  let group: TranscriptionResultSegmentType[] = [];
-
-  transcription.forEach((segment) => {
-    const text = segment.text.trim();
-    if (!text) return;
-
-    group.push(segment);
-
-    if (
-      !text.match(MAGIC_TOKEN_REGEX) &&
-      segment.text.trim().match(END_OF_SENTENCE_REGEX)
-    ) {
-      // Group a complete sentence;
-      groups.push(generateGroup(group));
-
-      // init a new group
-      group = [];
-    }
-  });
-
-  // Group the last group
-  const lastSentence = generateGroup(group);
-  if (lastSentence) groups.push(lastSentence);
-
-  return groups;
-};
-
 export const convertIpaToNormal = (ipa: string) => {
  const mark = ipa.match(/(\ˈ|ˌ)/);
  const cleanIpa = ipa.replace(mark ? mark[0] : "", "");