Fix download script (#317)
* bundle tiny.en as whisper default model * improve download-whisper-model script * improve download-ffmpeg-wasm script
This commit is contained in:
@@ -35,9 +35,7 @@ await Promise.all(
|
||||
console.info(chalk.green(`✅ File ${file.name} valid`));
|
||||
} else {
|
||||
console.warn(
|
||||
chalk.yellow(
|
||||
`❌ File ${file.name} not valid, start to redownload`
|
||||
)
|
||||
chalk.yellow(`❌ File ${file.name} not valid, start to redownload`)
|
||||
);
|
||||
fs.removeSync(path.join(dir, file.name));
|
||||
pendingFiles.push(file);
|
||||
@@ -81,6 +79,8 @@ if (proxyUrl) {
|
||||
}
|
||||
|
||||
const download = async (url, dest, md5) => {
|
||||
console.info(chalk.blue(`=> Start to download ${url} to ${dest}`));
|
||||
|
||||
return spinner(async () => {
|
||||
console.info(chalk.blue(`=> Start to download file ${url}`));
|
||||
await axios
|
||||
@@ -89,22 +89,27 @@ const download = async (url, dest, md5) => {
|
||||
})
|
||||
.then(async (response) => {
|
||||
const data = Buffer.from(response.data, "binary");
|
||||
console.info(chalk.green(`✅ ${dest} downloaded successfully`));
|
||||
|
||||
fs.writeFileSync(dest, data);
|
||||
const hash = await hashFile(dest, { algo: "md5" });
|
||||
if (hash === md5) {
|
||||
console.info(chalk.green(`✅ ${dest} downloaded successfully`));
|
||||
console.info(chalk.green(`✅ ${dest} valid`));
|
||||
} else {
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Error: ${dest} MD5 not match, ${hash} should be ${md5}`
|
||||
`❌ Error: ${dest} not valid. \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
|
||||
)
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error(chalk.red(`❌ Error: ${err}`));
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
|
||||
)
|
||||
);
|
||||
process.exit(1);
|
||||
});
|
||||
});
|
||||
@@ -126,12 +131,17 @@ const cleanup = () => {
|
||||
try {
|
||||
fs.removeSync(path.join(dir, file.name));
|
||||
} catch (err) {
|
||||
console.error(chalk.red(`❌ Error: ${err}`));
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
|
||||
)
|
||||
);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm";
|
||||
// const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm";
|
||||
const baseURL = "https://enjoy-storage.baizhiheizi.com";
|
||||
try {
|
||||
await Promise.all(
|
||||
pendingFiles.map((file) =>
|
||||
@@ -139,7 +149,11 @@ try {
|
||||
)
|
||||
);
|
||||
} catch (err) {
|
||||
console.error(chalk.red(`❌ Error: ${err}`));
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
|
||||
)
|
||||
);
|
||||
cleanup();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
@@ -4,8 +4,8 @@ import axios from "axios";
|
||||
import progress from "progress";
|
||||
import { createHash } from "crypto";
|
||||
|
||||
const model = "ggml-base.en-q5_1.bin";
|
||||
const md5 = "55309cc6613788f07ac7988985210734";
|
||||
const model = "ggml-tiny.en.bin";
|
||||
const sha = "c78c86eb1a8faa21b369bcd33207cc90d64ae9df";
|
||||
|
||||
const dir = path.join(process.cwd(), "lib/whisper.cpp/models");
|
||||
|
||||
@@ -15,8 +15,8 @@ fs.ensureDirSync(dir);
|
||||
try {
|
||||
if (fs.statSync(path.join(dir, model)).isFile()) {
|
||||
console.info(chalk.green(`✅ Model ${model} already exists`));
|
||||
const hash = await hashFile(path.join(dir, model), { algo: "md5" });
|
||||
if (hash === md5) {
|
||||
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
|
||||
if (hash === sha) {
|
||||
console.info(chalk.green(`✅ Model ${model} valid`));
|
||||
process.exit(0);
|
||||
} else {
|
||||
@@ -50,11 +50,12 @@ if (proxyUrl) {
|
||||
};
|
||||
}
|
||||
|
||||
const modelUrlPrefix =
|
||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
|
||||
// const modelUrlPrefix =
|
||||
// "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
|
||||
const modelUrlPrefix = "https://enjoy-storage.baizhiheizi.com";
|
||||
|
||||
function hashFile(path, options) {
|
||||
const algo = options.algo || "md5";
|
||||
const algo = options.algo || "sha1";
|
||||
return new Promise((resolve, reject) => {
|
||||
const hash = createHash(algo);
|
||||
const stream = fs.createReadStream(path);
|
||||
@@ -65,6 +66,7 @@ function hashFile(path, options) {
|
||||
}
|
||||
|
||||
const download = async (url, dest) => {
|
||||
console.info(chalk.blue(`=> Start to download from ${url} to ${dest}`));
|
||||
return axios
|
||||
.get(url, { responseType: "stream" })
|
||||
.then((response) => {
|
||||
@@ -82,13 +84,28 @@ const download = async (url, dest) => {
|
||||
progressBar.tick(chunk.length);
|
||||
});
|
||||
|
||||
response.data.pipe(fs.createWriteStream(dest)).on("close", () => {
|
||||
response.data.pipe(fs.createWriteStream(dest)).on("close", async () => {
|
||||
console.info(chalk.green(`✅ Model ${model} downloaded successfully`));
|
||||
process.exit(0);
|
||||
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
|
||||
if (hash === sha) {
|
||||
console.info(chalk.green(`✅ Model ${model} valid`));
|
||||
process.exit(0);
|
||||
} else {
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Model ${model} not valid, please try again using command \`yarn workspace enjoy download-whisper-model\``
|
||||
)
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
})
|
||||
.catch((err) => {
|
||||
console.error(chalk.red(`❌ Error: ${err}`));
|
||||
console.error(
|
||||
chalk.red(
|
||||
`❌ Failed to download ${url}: ${err}.\nPlease try again using command \`yarn workspace enjoy download-whisper-model\``
|
||||
)
|
||||
);
|
||||
process.exit(1);
|
||||
});
|
||||
};
|
||||
|
||||
@@ -12,31 +12,36 @@ export const WHISPER_MODELS_OPTIONS = [
|
||||
{
|
||||
type: "tiny",
|
||||
name: "ggml-tiny.en.bin",
|
||||
size: "77.7 MB",
|
||||
size: "75 MB",
|
||||
sha: "c78c86eb1a8faa21b369bcd33207cc90d64ae9df",
|
||||
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin",
|
||||
},
|
||||
{
|
||||
type: "base",
|
||||
name: "ggml-base.en.bin",
|
||||
size: "148 MB",
|
||||
size: "142 MB",
|
||||
sha: "137c40403d78fd54d454da0f9bd998f78703390c",
|
||||
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin",
|
||||
},
|
||||
{
|
||||
type: "small",
|
||||
name: "ggml-small.en.bin",
|
||||
size: "488 MB",
|
||||
size: "466 MB",
|
||||
sha: "db8a495a91d927739e50b3fc1cc4c6b8f6c2d022",
|
||||
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin",
|
||||
},
|
||||
{
|
||||
type: "medium",
|
||||
name: "ggml-medium.en.bin",
|
||||
size: "1.53 GB",
|
||||
size: "1.5 GB",
|
||||
sha: "8c30f0e44ce9560643ebd10bbe50cd20eafd3723",
|
||||
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin",
|
||||
},
|
||||
{
|
||||
type: "large",
|
||||
name: "ggml-large-v3.bin",
|
||||
size: "3.09 GB",
|
||||
size: "2.9 GB",
|
||||
sha: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
|
||||
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
|
||||
},
|
||||
];
|
||||
|
||||
@@ -10,7 +10,7 @@ const logger = log.scope("whisper");
|
||||
|
||||
class Whipser {
|
||||
private binMain: string;
|
||||
private defaultModel: string;
|
||||
private bundledModelsDir: string;
|
||||
public config: WhisperConfigType;
|
||||
|
||||
constructor(config?: WhisperConfigType) {
|
||||
@@ -20,13 +20,7 @@ class Whipser {
|
||||
"whisper",
|
||||
"main"
|
||||
);
|
||||
this.defaultModel = path.join(
|
||||
__dirname,
|
||||
"lib",
|
||||
"whisper",
|
||||
"models",
|
||||
"ggml-base.en-q5_1.bin"
|
||||
);
|
||||
this.bundledModelsDir = path.join(__dirname, "lib", "whisper", "models");
|
||||
if (fs.existsSync(customWhisperPath)) {
|
||||
this.binMain = customWhisperPath;
|
||||
} else {
|
||||
@@ -36,23 +30,32 @@ class Whipser {
|
||||
|
||||
currentModel() {
|
||||
if (!this.config.availableModels) return;
|
||||
if (!this.config.model) {
|
||||
const model = this.config.availableModels[0];
|
||||
settings.setSync("whisper.model", this.config.availableModels[0].name);
|
||||
return model.savePath;
|
||||
|
||||
let model: WhisperConfigType["availableModels"][0];
|
||||
if (this.config.model) {
|
||||
model = (this.config.availableModels || []).find(
|
||||
(m) => m.name === this.config.model
|
||||
);
|
||||
}
|
||||
if (!model) {
|
||||
model = this.config.availableModels[0];
|
||||
}
|
||||
|
||||
return (this.config.availableModels || []).find(
|
||||
(m) => m.name === this.config.model
|
||||
)?.savePath;
|
||||
settings.setSync("whisper.model", model.name);
|
||||
return model.savePath;
|
||||
}
|
||||
|
||||
async initialize() {
|
||||
const bundleModels = fs.readdirSync(this.bundledModelsDir);
|
||||
|
||||
const dir = path.join(settings.libraryPath(), "whisper", "models");
|
||||
fs.ensureDirSync(dir);
|
||||
const files = fs.readdirSync(dir);
|
||||
|
||||
const availableModelFiles = bundleModels.concat(files);
|
||||
|
||||
const models = [];
|
||||
for (const file of files) {
|
||||
for (const file of availableModelFiles) {
|
||||
const model = WHISPER_MODELS_OPTIONS.find((m) => m.name == file);
|
||||
if (!model) continue;
|
||||
|
||||
@@ -102,7 +105,7 @@ class Whipser {
|
||||
async check() {
|
||||
await this.initialize();
|
||||
|
||||
const model = this.currentModel() || this.defaultModel;
|
||||
const model = this.currentModel();
|
||||
|
||||
const sampleFile = path.join(__dirname, "samples", "jfk.wav");
|
||||
const tmpDir = settings.cachePath();
|
||||
@@ -169,7 +172,7 @@ class Whipser {
|
||||
throw new Error("No file or blob provided");
|
||||
}
|
||||
|
||||
const model = this.currentModel() || this.defaultModel;
|
||||
const model = this.currentModel();
|
||||
|
||||
if (blob) {
|
||||
const format = blob.type.split("/")[1];
|
||||
|
||||
Reference in New Issue
Block a user