diff --git a/enjoy/src/main/whisper.ts b/enjoy/src/main/whisper.ts
index 94974541..f38f8767 100644
--- a/enjoy/src/main/whisper.ts
+++ b/enjoy/src/main/whisper.ts
@@ -188,9 +188,6 @@ class Whipser {
       "--output-file",
       path.join(tmpDir, filename),
       "-pp",
-      "--split-on-word",
-      "--max-len",
-      "1",
       ...extra,
     ];
 
diff --git a/enjoy/src/renderer/components/medias/media-recorder.tsx b/enjoy/src/renderer/components/medias/media-recorder.tsx
index 83d45f68..f2759f62 100644
--- a/enjoy/src/renderer/components/medias/media-recorder.tsx
+++ b/enjoy/src/renderer/components/medias/media-recorder.tsx
@@ -18,6 +18,7 @@ export const MediaRecorder = () => {
     transcription,
     currentSegmentIndex,
   } = useContext(MediaPlayerProviderContext);
+  const [player, setPlayer] = useState<WaveSurfer>();
   const [access, setAccess] = useState<boolean>(false);
   const [duration, setDuration] = useState<number>(0);
   const { EnjoyApp } = useContext(AppSettingsProviderContext);
@@ -80,6 +81,7 @@ export const MediaRecorder = () => {
       autoCenter: false,
       normalize: false,
     });
+    setPlayer(ws);
 
     const record = ws.registerPlugin(RecordPlugin.create());
     let startAt = 0;
@@ -113,9 +115,9 @@ export const MediaRecorder = () => {
       });
 
     return () => {
-      clearInterval(interval);
-      record.stopRecording();
-      ws?.destroy();
+      if (interval) clearInterval(interval);
+      record?.stopRecording();
+      player?.destroy();
     };
   }, [ref, isRecording, access, layout?.playerHeight]);
 
diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx
index cffde52f..669f8466 100644
--- a/enjoy/src/renderer/hooks/use-transcribe.tsx
+++ b/enjoy/src/renderer/hooks/use-transcribe.tsx
@@ -8,10 +8,6 @@ import { t } from "i18next";
 import { AI_WORKER_ENDPOINT } from "@/constants";
 import * as sdk from "microsoft-cognitiveservices-speech-sdk";
 import axios from "axios";
-import take from "lodash/take";
-import sortedUniqBy from "lodash/sortedUniqBy";
-import { groupTranscription, milisecondsToTimestamp } from "@/utils";
-import { END_OF_SENTENCE_REGEX } from "@/constants";
 import { AlignmentResult } from "echogarden/dist/api/API.d.js";
 
 export const useTranscribe = () => {
@@ -67,7 +63,7 @@ export const useTranscribe = () => {
 
     const alignmentResult = await EnjoyApp.echogarden.align(
       new Uint8Array(await blob.arrayBuffer()),
-      originalText || result.result.map((segment) => segment.text).join(" ")
+      originalText || result.text
     );
 
     return {
@@ -88,12 +84,10 @@ export const useTranscribe = () => {
       }
     );
 
-    const result = groupTranscription(res.transcription);
-
     return {
       engine: "whisper",
       model: res.model.type,
-      result,
+      text: res.transcription.map((segment) => segment.text).join(" "),
     };
   };
 
@@ -108,41 +102,16 @@ export const useTranscribe = () => {
       dangerouslyAllowBrowser: true,
     });
 
-    const res: {
-      words: {
-        word: string;
-        start: number;
-        end: number;
-      }[];
-    } = (await client.audio.transcriptions.create({
+    const res: { text: string } = (await client.audio.transcriptions.create({
       file: new File([blob], "audio.wav"),
       model: "whisper-1",
-      response_format: "verbose_json",
-      timestamp_granularities: ["word"],
+      response_format: "json",
     })) as any;
 
-    const transcription: TranscriptionResultSegmentType[] = res.words.map(
-      (word) => {
-        return {
-          offsets: {
-            from: word.start * 1000,
-            to: word.end * 1000,
-          },
-          timestamps: {
-            from: milisecondsToTimestamp(word.start * 1000),
-            to: milisecondsToTimestamp(word.end * 1000),
-          },
-          text: word.word,
-        };
-      }
-    );
-
-    const result = groupTranscription(transcription);
-
     return {
       engine: "openai",
       model: "whisper-1",
-      result,
+      text: res.text,
     };
   };
 
@@ -155,28 +124,11 @@ export const useTranscribe = () => {
         timeout: 1000 * 60 * 5,
       })
     ).data;
-    const transcription: TranscriptionResultSegmentType[] = res.words.map(
-      (word) => {
-        return {
-          offsets: {
-            from: word.start * 1000,
-            to: word.end * 1000,
-          },
-          timestamps: {
-            from: milisecondsToTimestamp(word.start * 1000),
-            to: milisecondsToTimestamp(word.end * 1000),
-          },
-          text: word.word,
-        };
-      }
-    );
-
-    const result = groupTranscription(transcription);
 
     return {
       engine: "cloudflare",
       model: "@cf/openai/whisper",
-      result,
+      text: res.text,
     };
   };
 
@@ -189,7 +141,7 @@ export const useTranscribe = () => {
   ): Promise<{
     engine: string;
     model: string;
-    result: TranscriptionResultSegmentGroupType[];
+    text: string;
   }> => {
     const { token, region } = await webApi.generateSpeechToken(params);
     const config = sdk.SpeechConfig.fromAuthorizationToken(token, region);
@@ -230,43 +182,10 @@ export const useTranscribe = () => {
       reco.sessionStopped = (_s, _e) => {
         reco.stopContinuousRecognitionAsync();
 
-        const transcription: TranscriptionResultSegmentType[] = [];
-
-        results.forEach((result) => {
-          const best = take(sortedUniqBy(result.NBest, "Confidence"), 1)[0];
-          const words = best.Display.trim().split(" ");
-
-          best.Words.map((word, index) => {
-            let text = word.Word;
-            if (words.length === best.Words.length) {
-              text = words[index];
-            }
-
-            if (
-              index === best.Words.length - 1 &&
-              !text.trim().match(END_OF_SENTENCE_REGEX)
-            ) {
-              text = text + ".";
-            }
-
-            transcription.push({
-              offsets: {
-                from: word.Offset / 1e4,
-                to: (word.Offset + word.Duration) / 1e4,
-              },
-              timestamps: {
-                from: milisecondsToTimestamp(word.Offset / 1e4),
-                to: milisecondsToTimestamp((word.Offset + word.Duration) * 1e4),
-              },
-              text,
-            });
-          });
-        });
-
         resolve({
           engine: "azure",
           model: "whisper",
-          result: groupTranscription(transcription),
+          text: results.map((result) => result.DisplayText).join(' '),
         });
       };
 
diff --git a/enjoy/src/utils.ts b/enjoy/src/utils.ts
index 3306a388..36c16319 100644
--- a/enjoy/src/utils.ts
+++ b/enjoy/src/utils.ts
@@ -1,5 +1,4 @@
 import Pitchfinder from "pitchfinder";
-import { END_OF_SENTENCE_REGEX, MAGIC_TOKEN_REGEX } from "./constants";
 import { IPA_MAPPING } from "./constants";
 
 export const extractFrequencies = (props: {
@@ -34,57 +33,6 @@ export function milisecondsToTimestamp(ms: number) {
   )}:${seconds.padStart(2, "0")},${milliseconds}`;
 }
 
-export const groupTranscription = (
-  transcription: TranscriptionResultSegmentType[]
-): TranscriptionResultSegmentGroupType[] => {
-  const generateGroup = (group?: TranscriptionResultSegmentType[]) => {
-    if (!group || group.length === 0) return;
-
-    const firstWord = group[0];
-    const lastWord = group[group.length - 1];
-
-    return {
-      offsets: {
-        from: firstWord.offsets.from,
-        to: lastWord.offsets.to,
-      },
-      text: group.map((w) => w.text.trim()).join(" "),
-      timestamps: {
-        from: firstWord.timestamps.from,
-        to: lastWord.timestamps.to,
-      },
-      segments: group,
-    };
-  };
-
-  const groups: TranscriptionResultSegmentGroupType[] = [];
-  let group: TranscriptionResultSegmentType[] = [];
-
-  transcription.forEach((segment) => {
-    const text = segment.text.trim();
-    if (!text) return;
-
-    group.push(segment);
-
-    if (
-      !text.match(MAGIC_TOKEN_REGEX) &&
-      segment.text.trim().match(END_OF_SENTENCE_REGEX)
-    ) {
-      // Group a complete sentence;
-      groups.push(generateGroup(group));
-
-      // init a new group
-      group = [];
-    }
-  });
-
-  // Group the last group
-  const lastSentence = generateGroup(group);
-  if (lastSentence) groups.push(lastSentence);
-
-  return groups;
-};
-
 export const convertIpaToNormal = (ipa: string) => {
   const mark = ipa.match(/(\ˈ|ˌ)/);
   const cleanIpa = ipa.replace(mark ? mark[0] : "", "");