initial commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/.venv/
|
||||
/.idea/
|
||||
5
Dockerfile
Normal file
5
Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM python:3.9-slim
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
RUN pip install -r requirements.txt
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
|
||||
131
app.py
Normal file
131
app.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# from docx import Document
|
||||
import os
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
import thulac
|
||||
from fastapi import FastAPI, Form, UploadFile, File, Request
|
||||
from fastapi.responses import FileResponse
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from pydantic import BaseModel
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
|
||||
class ExportRequest(BaseModel):
|
||||
format: str # 导出格式(html、pdf、rtf)
|
||||
content: str # 要导出的内容
|
||||
|
||||
# 初始化 THULAC 分词工具
|
||||
thu = thulac.thulac()
|
||||
|
||||
# 初始化 FastAPI 应用和模板
|
||||
app = FastAPI()
|
||||
templates = Jinja2Templates(directory="templates")
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def home(request: Request):
|
||||
return templates.TemplateResponse("index.html", {"request": request})
|
||||
|
||||
|
||||
@app.post("/process", response_class=HTMLResponse)
|
||||
async def process(request: Request, file: UploadFile = File(None), text: str = Form(None)):
|
||||
"""
|
||||
统一处理上传文件和粘贴文本,返回标注结果页面
|
||||
"""
|
||||
if file:
|
||||
# 读取上传文件内容
|
||||
content = await file.read()
|
||||
text_content = content.decode("utf-8")
|
||||
elif text:
|
||||
# 读取粘贴文本内容
|
||||
text_content = text
|
||||
else:
|
||||
return templates.TemplateResponse(
|
||||
"error.html", {"request": request, "message": "未提供文件或文本"}
|
||||
)
|
||||
|
||||
# 处理文本,返回标注结果
|
||||
processed_text = process_text(text)
|
||||
|
||||
return templates.TemplateResponse("result.html", {"request": request, "content": processed_text})
|
||||
|
||||
|
||||
def process_text(text: str) -> str:
|
||||
"""处理文本并为每种词性添加 CSS 类,同时保留段落换行"""
|
||||
words = thu.cut(text)
|
||||
colored_text = ""
|
||||
|
||||
for word, tag in words:
|
||||
if tag: # 确保 tag 不为空
|
||||
colored_text += f"<span class='word {tag[0]}'>{word}</span>"
|
||||
else:
|
||||
colored_text += word # 没有词性时直接添加词语
|
||||
|
||||
# 替换换行符为 <br>
|
||||
return colored_text.replace("\n", "<br>")
|
||||
|
||||
|
||||
|
||||
@app.post("/export")
|
||||
async def export_file(request: ExportRequest):
|
||||
format = request.format
|
||||
content = request.content
|
||||
|
||||
if format == "html":
|
||||
file_name = "exported_result.html"
|
||||
file_content = f"<html><body>{content}</body></html>"
|
||||
return create_file_response(file_name, file_content.encode("utf-8"), "text/html")
|
||||
|
||||
elif format == "pdf":
|
||||
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
||||
generate_pdf(temp_file.name, content)
|
||||
return FileResponse(temp_file.name, filename="exported_result.pdf", media_type="application/pdf")
|
||||
|
||||
elif format == "rtf":
|
||||
file_name = "exported_result.rtf"
|
||||
file_content = generate_rtf(content)
|
||||
return create_file_response(file_name, file_content.encode("utf-8"), "application/rtf")
|
||||
|
||||
return {"error": "Unsupported format"}
|
||||
|
||||
|
||||
def create_file_response(file_name, file_content, media_type):
|
||||
"""
|
||||
创建文件响应
|
||||
"""
|
||||
with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file:
|
||||
temp_file.write(file_content)
|
||||
temp_file.flush()
|
||||
return FileResponse(temp_file.name, filename=file_name, media_type=media_type)
|
||||
|
||||
|
||||
def generate_pdf(file_path, content):
|
||||
"""
|
||||
生成 PDF 文件
|
||||
"""
|
||||
c = canvas.Canvas(file_path, pagesize=letter)
|
||||
c.setFont("Helvetica", 12)
|
||||
width, height = letter
|
||||
y = height - 40
|
||||
|
||||
for line in content.split("<br>"):
|
||||
c.drawString(40, y, line.strip())
|
||||
y -= 20
|
||||
if y < 40: # 换页条件
|
||||
c.showPage()
|
||||
c.setFont("Helvetica", 12)
|
||||
y = height - 40
|
||||
c.save()
|
||||
|
||||
|
||||
def generate_rtf(content):
|
||||
"""
|
||||
生成 RTF 文件
|
||||
"""
|
||||
rtf_header = r"{\rtf1\ansi\deff0"
|
||||
rtf_footer = r"}"
|
||||
rtf_body = content.replace("<br>", r"\line ")
|
||||
return rtf_header + rtf_body + rtf_footer
|
||||
|
||||
2
build.sh
Normal file
2
build.sh
Normal file
@@ -0,0 +1,2 @@
|
||||
docker build -t fastapi-nlp .
|
||||
docker run -d -p 8008:80 fastapi-nlp
|
||||
13
main.py
Normal file
13
main.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from fastapi import FastAPI
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "Hello World"}
|
||||
|
||||
|
||||
@app.get("/hello/{name}")
|
||||
async def say_hello(name: str):
|
||||
return {"message": f"Hello {name}"}
|
||||
4
requirements.txt
Normal file
4
requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
thulac~=0.2.2
|
||||
fastapi~=0.115.5
|
||||
pydantic~=2.9.2
|
||||
reportlab~=4.2.5
|
||||
0
templates/error.html
Normal file
0
templates/error.html
Normal file
150
templates/index.html
Normal file
150
templates/index.html
Normal file
@@ -0,0 +1,150 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>词性标注工具</title>
|
||||
<style>
|
||||
/* 全局样式 */
|
||||
body {
|
||||
font-family: "PingFang SC", "Microsoft YaHei", "Source Han Sans", Arial, sans-serif;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
/* 页面容器 */
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: 50px auto;
|
||||
padding: 20px;
|
||||
background-color: #fff;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
/* 标题样式 */
|
||||
h1 {
|
||||
text-align: center;
|
||||
color: #333;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
/* 切换按钮样式 */
|
||||
.switch-buttons {
|
||||
text-align: center;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.switch-buttons button {
|
||||
background-color: #007bff;
|
||||
color: #fff;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
font-size: 14px;
|
||||
cursor: pointer;
|
||||
margin: 0 10px;
|
||||
border-radius: 5px;
|
||||
transition: background-color 0.3s ease;
|
||||
}
|
||||
|
||||
.switch-buttons button:hover {
|
||||
background-color: #0056b3;
|
||||
}
|
||||
|
||||
/* 输入框容器 */
|
||||
.input-container {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.input-container.active {
|
||||
display: block !important;
|
||||
}
|
||||
|
||||
/* 表单元素 */
|
||||
label {
|
||||
font-size: 14px;
|
||||
color: #555;
|
||||
margin-bottom: 8px;
|
||||
display: block;
|
||||
}
|
||||
|
||||
input[type="file"],
|
||||
textarea {
|
||||
width: 97%;
|
||||
padding: 10px;
|
||||
font-size: 14px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 5px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
/* 提交按钮右对齐 */
|
||||
.form-actions {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
button[type="submit"] {
|
||||
background-color: #28a745;
|
||||
color: #fff;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
font-size: 14px;
|
||||
cursor: pointer;
|
||||
border-radius: 5px;
|
||||
transition: background-color 0.3s ease;
|
||||
}
|
||||
|
||||
button[type="submit"]:hover {
|
||||
background-color: #218838;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>词性标注工具</h1>
|
||||
|
||||
<!-- 切换方式 -->
|
||||
<div class="switch-buttons">
|
||||
<button onclick="toggleInputMethod('file')">上传文件</button>
|
||||
<button onclick="toggleInputMethod('text')">粘贴文本</button>
|
||||
</div>
|
||||
|
||||
<!-- 文件上传 -->
|
||||
<div id="file-upload-container" class="input-container active">
|
||||
<form action="/process" method="post" enctype="multipart/form-data">
|
||||
<label for="file">选择文件:</label>
|
||||
<input type="file" name="file" id="file">
|
||||
<div class="form-actions">
|
||||
<button type="submit">提交文本</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- 粘贴文本 -->
|
||||
<div id="text-paste-container" class="input-container">
|
||||
<form action="/process" method="post">
|
||||
<label for="text">粘贴文本:</label>
|
||||
<textarea name="text" id="text" rows="10" placeholder="在这里粘贴要处理的文本"></textarea>
|
||||
<div class="form-actions">
|
||||
<button type="submit">提交文本</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function toggleInputMethod(method) {
|
||||
const fileContainer = document.getElementById("file-upload-container");
|
||||
const textContainer = document.getElementById("text-paste-container");
|
||||
if (method === "file") {
|
||||
fileContainer.classList.add("active");
|
||||
textContainer.classList.remove("active");
|
||||
} else {
|
||||
textContainer.classList.add("active");
|
||||
fileContainer.classList.remove("active");
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
143
templates/result.html
Normal file
143
templates/result.html
Normal file
@@ -0,0 +1,143 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>标注结果</title>
|
||||
<style>
|
||||
body {
|
||||
background-color: #fafafa;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#result {
|
||||
font-family: "PingFang SC", "Microsoft YaHei", "Source Han Sans", "Noto Sans CJK SC", Arial, sans-serif;
|
||||
font-size: 16px;
|
||||
color: #333;
|
||||
line-height: 1.8;
|
||||
background-color: #f9f9f9;
|
||||
border: 1px solid #ddd;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
max-width: 800px;
|
||||
width: 90%;
|
||||
margin-top: 20px;
|
||||
}
|
||||
|
||||
#return {
|
||||
position: absolute; /* 固定在页面的左上角 */
|
||||
top: 10px;
|
||||
left: 10px;
|
||||
text-decoration: none;
|
||||
font-size: 14px;
|
||||
color: #007bff; /* 蓝色字体 */
|
||||
background: #f1f1f1; /* 浅灰背景 */
|
||||
padding: 8px 12px;
|
||||
border-radius: 5px;
|
||||
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
#return:hover {
|
||||
background: #e2e6ea; /* 鼠标悬停时背景变化 */
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 10px 0;
|
||||
text-align: justify;
|
||||
}
|
||||
|
||||
.word.n { color: red; }
|
||||
.word.v { color: blue; }
|
||||
.word.a { color: green; }
|
||||
.word.d { color: orange; }
|
||||
|
||||
.word.hidden { color: inherit !important; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>标注结果</h1>
|
||||
|
||||
<!-- 返回按钮 -->
|
||||
<a href="/" id="return">返回</a>
|
||||
|
||||
<!-- 词性说明与控制 -->
|
||||
<div style="margin-bottom: 10px;">
|
||||
<strong>词性显示控制:</strong>
|
||||
<label><input type="checkbox" id="toggle-n" checked> <span style="color: red;">名词</span></label>
|
||||
<label><input type="checkbox" id="toggle-a" checked> <span style="color: green;">形容词</span></label>
|
||||
|
||||
<label><input type="checkbox" id="toggle-v"> <span style="color: blue;">动词</span></label>
|
||||
<label><input type="checkbox" id="toggle-d"> <span style="color: orange;">副词</span></label>
|
||||
</div>
|
||||
|
||||
<!-- 标注结果 -->
|
||||
<div id="result">
|
||||
{{ content | safe }}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<strong>导出结果:</strong>
|
||||
<button onclick="exportFile('html')">导出为 HTML</button>
|
||||
<button onclick="exportFile('pdf')">导出为 PDF</button>
|
||||
<button onclick="exportFile('rtf')">导出为富文本(RTF)</button>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function exportFile(format) {
|
||||
const content = document.getElementById("result").innerHTML; // 获取结果内容
|
||||
fetch("/export", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
body: JSON.stringify({ format: format, content: content }) // 确保键名正确
|
||||
})
|
||||
.then(response => {
|
||||
if (response.ok) return response.blob();
|
||||
throw new Error("导出失败");
|
||||
})
|
||||
.then(blob => {
|
||||
const url = window.URL.createObjectURL(blob);
|
||||
const a = document.createElement("a");
|
||||
a.style.display = "none";
|
||||
a.href = url;
|
||||
a.download = `exported_result.${format}`;
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
window.URL.revokeObjectURL(url);
|
||||
})
|
||||
.catch(err => alert(err.message));
|
||||
}
|
||||
</script>
|
||||
|
||||
<script>
|
||||
// 获取复选框和结果区域
|
||||
const toggles = {
|
||||
n: document.getElementById("toggle-n"),
|
||||
v: document.getElementById("toggle-v"),
|
||||
a: document.getElementById("toggle-a"),
|
||||
d: document.getElementById("toggle-d"),
|
||||
};
|
||||
const resultDiv = document.getElementById("result");
|
||||
|
||||
// 监听复选框的切换事件
|
||||
Object.entries(toggles).forEach(([tag, checkbox]) => {
|
||||
checkbox.addEventListener("change", () => {
|
||||
const words = resultDiv.querySelectorAll(`.word.${tag}`);
|
||||
words.forEach(word => {
|
||||
if (checkbox.checked) {
|
||||
word.classList.remove("hidden"); // 恢复颜色
|
||||
} else {
|
||||
word.classList.add("hidden"); // 移除颜色
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
11
test_main.http
Normal file
11
test_main.http
Normal file
@@ -0,0 +1,11 @@
|
||||
# Test your FastAPI endpoints
|
||||
|
||||
GET http://127.0.0.1:8000/
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
|
||||
GET http://127.0.0.1:8000/hello/User
|
||||
Accept: application/json
|
||||
|
||||
###
|
||||
Reference in New Issue
Block a user