Feat improve media player page (#320)

* download transcription from api when transcribed

* add friendly loading panel on audio page

* add loading panel for video page

* fix locale

* remove ubuntu 20.04 for e2e test

* do not auto upload recording
This commit is contained in:
an-lee
2024-02-19 11:01:52 +08:00
committed by GitHub
parent 9a605b9f39
commit 06f8d32169
15 changed files with 347 additions and 75 deletions

View File

@@ -23,7 +23,6 @@ jobs:
macos-14,
windows-2019,
windows-latest,
ubuntu-20.04,
ubuntu-latest,
]
steps:

View File

@@ -8,7 +8,7 @@
"types": "./src/types.d.ts",
"scripts": {
"predev": "yarn run download",
"dev": "rimraf .vite && yarn run download && WEB_API_URL=http://localhost:3000 SETTINGS_PATH=./tmp LIBRARY_PATH=./tmp electron-forge start",
"dev": "rimraf .vite && yarn run download && WEB_API_URL=http://localhost:3000 SETTINGS_PATH=${PWD}/enjoy/tmp LIBRARY_PATH=${PWD}/enjoy/tmp electron-forge start",
"start": "rimraf .vite && yarn run download && electron-forge start",
"package": "rimraf .vite && yarn run download && electron-forge package",
"make": "rimraf .vite && yarn run download && electron-forge make",

View File

@@ -450,5 +450,14 @@
"syncingRecordings": "Syncing {{count}} recordings",
"failedToSyncRecordings": "Syncing recordings failed",
"downloadUrlNotResolved": "Download URL not resolved",
"resolvingDownloadUrl": "Resolving download URL"
"resolvingDownloadUrl": "Resolving download URL",
"waveformIsDecoded": "Waveform is decoded",
"decodingWaveform": "Decoding waveform",
"transcribedSuccessfully": "Transcribed successfully",
"transcribing": "Transcribing",
"notTranscribedYet": "Not transcribed yet",
"preparingAudio": "Preparing audio",
"preparingVideo": "Preparing video",
"itMayTakeAWhileToPrepareForTheFirstLoad": "It may take a while to prepare for the first load. Please be patient.",
"loadingTranscription": "Loading transcription"
}

View File

@@ -449,5 +449,14 @@
"syncingRecordings": "{{count}} 条录音正在同步",
"failedToSyncRecordings": "同步录音失败",
"downloadUrlNotResolved": "无法解析下载地址",
"resolvingDownloadUrl": "正在解析下载地址"
"resolvingDownloadUrl": "正在解析下载地址",
"waveformIsDecoded": "波形已解码",
"decodingWaveform": "正在解码波形",
"transcribedSuccessfully": "语音转文本成功",
"transcribing": "正在语音转文本",
"notTranscribedYet": "尚未语音转文本",
"preparingAudio": "正在准备音频",
"preparingVideo": "正在准备视频",
"itMayTakeAWhileToPrepareForTheFirstLoad": "首次加载可能需要一些时间,请耐心等候",
"loadingTranscription": "正在加载语音文本"
}

View File

@@ -174,7 +174,8 @@ export class Audio extends Model<Audio> {
});
return webApi.syncAudio(this.toJSON()).then(() => {
this.update({ syncedAt: new Date() });
const now = new Date();
this.update({ syncedAt: now, updatedAt: now });
});
}

View File

@@ -134,12 +134,10 @@ export class Recording extends Model<Recording> {
}
async sync() {
this.upload().catch(() => {});
const webApi = new Client({
baseUrl: process.env.WEB_API_URL || WEB_API_URL,
accessToken: settings.getSync("user.accessToken") as string,
logger: log.scope("recording/sync"),
logger,
});
return webApi.syncRecording(this.toJSON()).then(() => {
@@ -156,11 +154,12 @@ export class Recording extends Model<Recording> {
return assessment;
}
await this.upload();
await this.sync();
const webApi = new Client({
baseUrl: process.env.WEB_API_URL || WEB_API_URL,
accessToken: settings.getSync("user.accessToken") as string,
logger: log.scope("recording/assess"),
logger,
});
const { token, region } = await webApi.generateSpeechToken();
@@ -221,7 +220,7 @@ export class Recording extends Model<Recording> {
@AfterCreate
static autoSync(recording: Recording) {
// auto upload should not block the main thread
// auto sync should not block the main thread
recording.sync().catch(() => {});
}

View File

@@ -69,6 +69,7 @@ export class Transcription extends Model<Transcription> {
}
async sync() {
if (this.isSynced) return;
if (this.getDataValue("state") !== "finished") return;
const webApi = new Client({
@@ -77,7 +78,8 @@ export class Transcription extends Model<Transcription> {
logger,
});
return webApi.syncTranscription(this.toJSON()).then(() => {
this.update({ syncedAt: new Date() });
const now = new Date();
this.update({ syncedAt: now, updatedAt: now });
});
}
@@ -86,6 +88,13 @@ export class Transcription extends Model<Transcription> {
this.notify(transcription, "update");
}
@AfterUpdate
static syncAfterUpdate(transcription: Transcription) {
transcription.sync().catch((err) => {
logger.error("sync error", err);
});
}
@AfterDestroy
static notifyForDestroy(transcription: Transcription) {
this.notify(transcription, "destroy");

View File

@@ -191,11 +191,12 @@ export class Video extends Model<Video> {
const webApi = new Client({
baseUrl: process.env.WEB_API_URL || WEB_API_URL,
accessToken: settings.getSync("user.accessToken") as string,
logger: log.scope("video/sync"),
logger,
});
return webApi.syncVideo(this.toJSON()).then(() => {
this.update({ syncedAt: new Date() });
const now = new Date();
this.update({ syncedAt: now, updatedAt: now });
});
}

View File

@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import {
LoaderSpin,
@@ -10,7 +11,7 @@ import {
MediaPlayer,
MediaTranscription,
} from "@renderer/components";
import { LoaderIcon } from "lucide-react";
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
import {
AlertDialog,
AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
AlertDialogFooter,
AlertDialogCancel,
Button,
PingPoint,
Progress,
ScrollArea,
toast,
} from "@renderer/components/ui";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { useNavigate } from "react-router-dom";
export const AudioDetail = (props: { id?: string; md5?: string }) => {
const navigate = useNavigate();
const { id, md5 } = props;
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const [audio, setAudio] = useState<AudioType | null>(null);
const [transcription, setTranscription] = useState<TranscriptionType>(null);
const [initialized, setInitialized] = useState<boolean>(false);
const [sharing, setSharing] = useState<boolean>(false);
// Transcription controls
const [transcribing, setTranscribing] = useState<boolean>(false);
const { transcribe } = useTranscribe();
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
// Player controls
const [initialized, setInitialized] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [seek, setSeek] = useState<{
seekTo: number;
@@ -56,6 +69,56 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
}
};
const generateTranscription = async () => {
if (transcribing) return;
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, result } = await transcribe(audio.src);
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result,
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const findTranscriptionFromWebApi = async () => {
const res = await webApi.transcriptions({
targetMd5: audio.md5,
});
const transcript = (res?.transcriptions || []).filter((t) =>
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
)?.[0];
if (!transcript) {
throw new Error("Transcription not found");
}
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: transcript.result,
engine: transcript.engine,
model: transcript.model,
});
};
const findOrGenerateTranscription = async () => {
try {
await findTranscriptionFromWebApi();
} catch (err) {
console.error(err);
await generateTranscription();
}
};
const handleShare = async () => {
if (!audio.source && !audio.isUploaded) {
try {
@@ -110,11 +173,26 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
}, [audio]);
useEffect(() => {
if (!transcription) return;
addDblistener(onTransactionUpdate);
if (transcription?.state == "pending") {
findOrGenerateTranscription();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setTranscribingProgress(p);
});
}
return () => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [transcription]);
}, [md5, transcription]);
if (!audio) {
return <LoaderSpin />;
@@ -183,8 +261,10 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
mediaId={audio.id}
mediaType="Audio"
mediaName={audio.name}
mediaUrl={audio.src}
transcription={transcription}
transcribing={transcribing}
progress={transcribingProgress}
transcribe={generateTranscription}
currentSegmentIndex={currentSegmentIndex}
onSelectSegment={(index) => {
if (currentSegmentIndex === index) return;
@@ -219,11 +299,69 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => {
</AlertDialogContent>
</AlertDialog>
{!initialized && (
<div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
<LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />
</div>
)}
{/* Show loading progress until waveform is decoded & transcribed */}
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingAudio")}</AlertDialogTitle>
<AlertDialogDescription>
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
</AlertDialogDescription>
</AlertDialogHeader>
<div className="py-4">
{initialized ? (
<div className="mb-4 flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("waveformIsDecoded")}</span>
</div>
) : (
<div className="mb-4 flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("decodingWaveform")}</span>
</div>
)}
{!transcription ? (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (
<div className="flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("transcribedSuccessfully")}</span>
</div>
) : transcribing ? (
<div className="">
<div className="flex items-center space-x-4 mb-2">
<PingPoint colorClassName="bg-yellow-500" />
<span>{t("transcribing")}</span>
</div>
{whisperConfig.service === "local" && (
<Progress value={transcribingProgress} />
)}
</div>
) : (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<div className="inline">
<span>{t("notTranscribedYet")}</span>
<Button className="ml-4" size="sm">
{t("transcribe")}
</Button>
</div>
</div>
)}
</div>
<AlertDialogFooter>
<Button variant="secondary" onClick={() => navigate(-1)}>
{t("cancel")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
</div>
);
};

View File

@@ -319,7 +319,7 @@ export const MediaPlayer = (props: {
const subscriptions = [
wavesurfer.on("play", () => setIsPlaying(true)),
wavesurfer.on("pause", () => setIsPlaying(false)),
wavesurfer.on("loading", (percent: number) => console.log(percent)),
wavesurfer.on("loading", (percent: number) => console.log(`${percent}%`)),
wavesurfer.on("timeupdate", (time: number) => setCurrentTime(time)),
wavesurfer.on("decode", () => {
if (waveform?.frequencies) return;

View File

@@ -12,7 +12,6 @@ import {
ScrollArea,
Button,
PingPoint,
toast,
} from "@renderer/components/ui";
import React, { useEffect, useContext, useState } from "react";
import { t } from "i18next";
@@ -22,14 +21,15 @@ import {
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import { useTranscribe } from "@renderer/hooks";
export const MediaTranscription = (props: {
transcription: TranscriptionType;
progress: number;
transcribe: () => void;
transcribing: boolean;
mediaId: string;
mediaType: "Audio" | "Video";
mediaName?: string;
mediaUrl: string;
currentSegmentIndex?: number;
onSelectSegment?: (index: number) => void;
}) => {
@@ -38,41 +38,20 @@ export const MediaTranscription = (props: {
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const {
transcription,
transcribing,
progress,
transcribe,
mediaId,
mediaType,
mediaName,
mediaUrl,
currentSegmentIndex,
onSelectSegment,
} = props;
const containerRef = React.createRef<HTMLDivElement>();
const [transcribing, setTranscribing] = useState<boolean>(false);
const { transcribe } = useTranscribe();
const [progress, setProgress] = useState<number>(0);
const [recordingStats, setRecordingStats] =
useState<SegementRecordingStatsType>([]);
const generate = async () => {
if (transcribing) return;
setTranscribing(true);
setProgress(0);
try {
const { engine, model, result } = await transcribe(mediaUrl);
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result,
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const fetchSegmentStats = async () => {
if (!mediaId) return;
@@ -85,22 +64,10 @@ export const MediaTranscription = (props: {
addDblistener(fetchSegmentStats);
fetchSegmentStats();
if (transcription?.state == "pending") {
generate();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setProgress(p);
});
}
return () => {
removeDbListener(fetchSegmentStats);
EnjoyApp.whisper.removeProgressListeners();
};
}, [mediaId, mediaType, transcription]);
}, [transcription]);
useEffect(() => {
containerRef.current
@@ -159,7 +126,7 @@ export const MediaTranscription = (props: {
</AlertDialogHeader>
<AlertDialogFooter>
<AlertDialogCancel>{t("cancel")}</AlertDialogCancel>
<AlertDialogAction onClick={generate}>
<AlertDialogAction onClick={transcribe}>
{t("transcribe")}
</AlertDialogAction>
</AlertDialogFooter>

View File

@@ -38,7 +38,7 @@ export const PostActions = (props: { post: PostType }) => {
const [copied, setCopied] = useState<boolean>(false);
const { EnjoyApp } = useContext(AppSettingsProviderContext);
const [asking, setAsking] = useState<boolean>(false);
const [aiReplies, setAiReplies] = useState<MessageType[]>([]);
const [aiReplies, setAiReplies] = useState<Partial<MessageType>[]>([]);
const handleAddMedium = async () => {
if (post.targetType !== "Medium") return;
@@ -185,7 +185,7 @@ export const PostActions = (props: { post: PostType }) => {
);
};
const AIReplies = (props: { replies: MessageType[] }) => {
const AIReplies = (props: { replies: Partial<MessageType>[] }) => {
return (
<div>
<div className="space-y-2">

View File

@@ -51,11 +51,6 @@ export const TedTalksSegment = () => {
coverUrl: selectedTalk?.primaryImageSet[0].url,
})
.then((record) => {
if (!record) {
toast.error(t("failedToDownload"));
return;
}
if (type === "video") {
navigate(`/videos/${record.id}`);
} else {

View File

@@ -2,6 +2,7 @@ import { useEffect, useState, useContext } from "react";
import {
DbProviderContext,
AppSettingsProviderContext,
AISettingsProviderContext,
} from "@renderer/context";
import {
LoaderSpin,
@@ -10,7 +11,7 @@ import {
MediaPlayer,
MediaTranscription,
} from "@renderer/components";
import { LoaderIcon } from "lucide-react";
import { CheckCircleIcon, LoaderIcon } from "lucide-react";
import {
AlertDialog,
AlertDialogHeader,
@@ -20,22 +21,34 @@ import {
AlertDialogFooter,
AlertDialogCancel,
Button,
PingPoint,
Progress,
ScrollArea,
toast,
} from "@renderer/components/ui";
import { t } from "i18next";
import { useTranscribe } from "@renderer/hooks";
import { useNavigate } from "react-router-dom";
export const VideoDetail = (props: { id?: string; md5?: string }) => {
const navigate = useNavigate();
const { id, md5 } = props;
const { addDblistener, removeDbListener } = useContext(DbProviderContext);
const { whisperConfig } = useContext(AISettingsProviderContext);
const { EnjoyApp, webApi } = useContext(AppSettingsProviderContext);
const [video, setVideo] = useState<VideoType | null>(null);
const [transcription, setTranscription] = useState<TranscriptionType>(null);
const [initialized, setInitialized] = useState<boolean>(false);
const [sharing, setSharing] = useState<boolean>(false);
// Transcription controls
const [transcribing, setTranscribing] = useState<boolean>(false);
const { transcribe } = useTranscribe();
const [transcribingProgress, setTranscribingProgress] = useState<number>(0);
// Player controls
const [initialized, setInitialized] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [seek, setSeek] = useState<{
seekTo: number;
@@ -58,6 +71,56 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
}
};
const generateTranscription = async () => {
if (transcribing) return;
setTranscribing(true);
setTranscribingProgress(0);
try {
const { engine, model, result } = await transcribe(video.src);
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result,
engine,
model,
});
} catch (err) {
toast.error(err.message);
}
setTranscribing(false);
};
const findTranscriptionFromWebApi = async () => {
const res = await webApi.transcriptions({
targetMd5: video.md5,
});
const transcript = (res?.transcriptions || []).filter((t) =>
["base", "small", "medium", "large", "whisper-1"].includes(t.model)
)?.[0];
if (!transcript) {
throw new Error("Transcription not found");
}
await EnjoyApp.transcriptions.update(transcription.id, {
state: "finished",
result: transcript.result,
engine: transcript.engine,
model: transcript.model,
});
};
const findOrGenerateTranscription = async () => {
try {
await findTranscriptionFromWebApi();
} catch (err) {
console.error(err);
await generateTranscription();
}
};
const handleShare = async () => {
if (!video.source.startsWith("http")) {
toast.error(t("shareFailed"), {
@@ -116,11 +179,26 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
}, [video]);
useEffect(() => {
if (!transcription) return;
addDblistener(onTransactionUpdate);
if (transcription?.state == "pending") {
findOrGenerateTranscription();
}
if (whisperConfig.service === "local") {
EnjoyApp.whisper.onProgress((_, p: number) => {
if (p > 100) p = 100;
setTranscribingProgress(p);
});
}
return () => {
removeDbListener(onTransactionUpdate);
EnjoyApp.whisper.removeProgressListeners();
};
}, [transcription]);
}, [md5, transcription]);
if (!video) {
return <LoaderSpin />;
@@ -193,9 +271,11 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
<MediaTranscription
mediaId={video.id}
mediaType="Video"
mediaUrl={video.src}
mediaName={video.name}
transcription={transcription}
transcribing={transcribing}
progress={transcribingProgress}
transcribe={generateTranscription}
currentSegmentIndex={currentSegmentIndex}
onSelectSegment={(index) => {
if (currentSegmentIndex === index) return;
@@ -232,6 +312,70 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => {
</AlertDialogContent>
</AlertDialog>
{/* Show loading progress until waveform is decoded & transcribed */}
<AlertDialog open={!initialized || !Boolean(transcription?.result)}>
<AlertDialogContent>
<AlertDialogHeader>
<AlertDialogTitle>{t("preparingVideo")}</AlertDialogTitle>
<AlertDialogDescription>
{t("itMayTakeAWhileToPrepareForTheFirstLoad")}
</AlertDialogDescription>
</AlertDialogHeader>
<div className="py-4">
{initialized ? (
<div className="mb-4 flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("waveformIsDecoded")}</span>
</div>
) : (
<div className="mb-4 flex items-center space-x-4">
<LoaderIcon className="w-4 h-4 animate-spin" />
<span>{t("decodingWaveform")}</span>
</div>
)}
{!transcription ? (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<span>{t("loadingTranscription")}</span>
</div>
) : transcription.result ? (
<div className="flex items-center space-x-4">
<CheckCircleIcon className="w-4 h-4 text-green-500" />
<span>{t("transcribedSuccessfully")}</span>
</div>
) : transcribing ? (
<div className="">
<div className="flex items-center space-x-4 mb-2">
<PingPoint colorClassName="bg-yellow-500" />
<span>{t("transcribing")}</span>
</div>
{whisperConfig.service === "local" && (
<Progress value={transcribingProgress} />
)}
</div>
) : (
<div className="flex items-center space-x-4">
<PingPoint colorClassName="bg-muted" />
<div className="inline">
<span>{t("notTranscribedYet")}</span>
<Button className="ml-4" size="sm">
{t("transcribe")}
</Button>
</div>
</div>
)}
</div>
<AlertDialogFooter>
<Button variant="secondary" onClick={() => navigate(-1)}>
{t("cancel")}
</Button>
</AlertDialogFooter>
</AlertDialogContent>
</AlertDialog>
{!initialized && (
<div className="top-0 w-full h-full absolute z-30 bg-background/10 flex items-center justify-center">
<LoaderIcon className="text-muted-foreground animate-spin w-8 h-8" />

View File

@@ -3,6 +3,7 @@ type TranscriptionType = {
targetId: string;
targetType: string;
state: "pending" | "processing" | "finished";
engine: string;
model: string;
result: TranscriptionResultSegmentGroupType[];
};