dt_audio/bitfree/videoParse.py

import requests
from tqdm import tqdm
import urllib.parse
import os

def extract_total_size(content_range):
    parts = content_range.split('/')
    if len(parts) == 2:
        return int(parts[1])
    return None

def download_video(url, headers):
    # 解析 URL 获取文件名部分
    url_parts = url.split('/')
    encoded_filename = url_parts[-1]
    decoded_filename = urllib.parse.unquote(encoded_filename)

    # 判断本地是否已经有该视频文件
    if os.path.exists(decoded_filename):
        print(f"视频 {decoded_filename} 已存在，无需下载。")
        return

    # 先进行一次请求获取 Content-Range
    first_response = requests.get(url, headers=headers, stream=True)
    if first_response.status_code == 200 or first_response.status_code == 206:
        content_range = first_response.headers.get('Content-Range')
        total_size = extract_total_size(content_range)
        if total_size is not None:
            # 构建新的请求头
            new_headers = headers.copy()
            new_headers['Range'] = f'bytes=0-{total_size - 1}'
            response = requests.get(url, headers=new_headers, stream=True)
            if response.status_code == 206:
                progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
                with open(decoded_filename, 'wb') as f:
                    for chunk in response.iter_content(chunk_size=1024):
                        f.write(chunk)
                        progress_bar.update(len(chunk))
                progress_bar.close()
                print(f"视频 {decoded_filename} 下载成功！")
            else:
                print(f"第二次请求 {url} 失败，状态码：{response.status_code}")
        else:
            print(f"在第一次请求 {url} 时未正确获取总大小。")
    else:
        print(f"第一次请求 {url} 失败，状态码：{first_response.status_code}")

# 自定义的 Header
headers = {
    'Accept': '*/*',
    'Sec-Fetch-Site': 'cross-site',
    'Accept-Language': 'en-US,en;q=0.9',
    # 'Accept-Encoding': 'identity',
    'Sec-Fetch-Mode': 'no-cors',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15',
    'Referer': 'https://bitfree.cn/',
    'Connection': 'keep-alive',
    'Range': 'bytes=0-1',
    'Sec-Fetch-Dest': 'video',
    'Priority': 'u=3, i',
    'X-Playback-Session-Id': '6657BD92-3E02-4F9D-B03E-D0DFC26D02F7',
}

# 读取包含视频链接的文本文件
with open('video_links.txt', 'r') as original_file:
    lines = original_file.readlines()
new_lines = []
for index, line in enumerate(lines):
    if line.startswith('#') or line.strip() == "":
        new_lines.append(line)
        continue
    if index > 0 and not lines[index - 1].startswith('#'):
        # 添加注释行
        url_parts = line.strip().split('/')
        encoded_filename = url_parts[-1]
        # 去除文件扩展名（不区分.mp4、.MP4等）
        decoded_filename_no_ext = encoded_filename.rsplit('.', 1)[0]
        decoded_filename = urllib.parse.unquote(decoded_filename_no_ext)
        new_lines.append(f"# {decoded_filename}\n")
    new_lines.append(line)

with open('video_links.txt', 'w') as new_file:
    new_file.writelines(new_lines)

# 重新读取文件进行下载
with open('video_links.txt', 'r') as file:
    prev_line_comment = False
    for line in file:
        if line.startswith('#') or line.strip() == "":
            prev_line_comment = True
            continue
        video_url = line.strip()
        download_video(video_url, headers)
        prev_line_comment = False