add other parse code

This commit is contained in:
alsesa
2024-10-14 17:20:03 +08:00
parent 0a69ccf73d
commit 23576dd582
47 changed files with 1319 additions and 33903 deletions

95
bitfree/videoParse.py Normal file
View File

@@ -0,0 +1,95 @@
import requests
from tqdm import tqdm
import urllib.parse
import os
def extract_total_size(content_range):
parts = content_range.split('/')
if len(parts) == 2:
return int(parts[1])
return None
def download_video(url, headers):
# 解析 URL 获取文件名部分
url_parts = url.split('/')
encoded_filename = url_parts[-1]
decoded_filename = urllib.parse.unquote(encoded_filename)
# 判断本地是否已经有该视频文件
if os.path.exists(decoded_filename):
print(f"视频 {decoded_filename} 已存在,无需下载。")
return
# 先进行一次请求获取 Content-Range
first_response = requests.get(url, headers=headers, stream=True)
if first_response.status_code == 200 or first_response.status_code == 206:
content_range = first_response.headers.get('Content-Range')
total_size = extract_total_size(content_range)
if total_size is not None:
# 构建新的请求头
new_headers = headers.copy()
new_headers['Range'] = f'bytes=0-{total_size - 1}'
response = requests.get(url, headers=new_headers, stream=True)
if response.status_code == 206:
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
with open(decoded_filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
f.write(chunk)
progress_bar.update(len(chunk))
progress_bar.close()
print(f"视频 {decoded_filename} 下载成功!")
else:
print(f"第二次请求 {url} 失败,状态码:{response.status_code}")
else:
print(f"在第一次请求 {url} 时未正确获取总大小。")
else:
print(f"第一次请求 {url} 失败,状态码:{first_response.status_code}")
# 自定义的 Header
headers = {
'Accept': '*/*',
'Sec-Fetch-Site': 'cross-site',
'Accept-Language': 'en-US,en;q=0.9',
# 'Accept-Encoding': 'identity',
'Sec-Fetch-Mode': 'no-cors',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15',
'Referer': 'https://bitfree.cn/',
'Connection': 'keep-alive',
'Range': 'bytes=0-1',
'Sec-Fetch-Dest': 'video',
'Priority': 'u=3, i',
'X-Playback-Session-Id': '6657BD92-3E02-4F9D-B03E-D0DFC26D02F7',
}
# 读取包含视频链接的文本文件
with open('video_links.txt', 'r') as original_file:
lines = original_file.readlines()
new_lines = []
for index, line in enumerate(lines):
if line.startswith('#') or line.strip() == "":
new_lines.append(line)
continue
if index > 0 and not lines[index - 1].startswith('#'):
# 添加注释行
url_parts = line.strip().split('/')
encoded_filename = url_parts[-1]
# 去除文件扩展名(不区分.mp4、.MP4等
decoded_filename_no_ext = encoded_filename.rsplit('.', 1)[0]
decoded_filename = urllib.parse.unquote(decoded_filename_no_ext)
new_lines.append(f"# {decoded_filename}\n")
new_lines.append(line)
with open('video_links.txt', 'w') as new_file:
new_file.writelines(new_lines)
# 重新读取文件进行下载
with open('video_links.txt', 'r') as file:
prev_line_comment = False
for line in file:
if line.startswith('#') or line.strip() == "":
prev_line_comment = True
continue
video_url = line.strip()
download_video(video_url, headers)
prev_line_comment = False