diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a3a3f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.venv/ +/.idea/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5c804f4 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.9-slim +WORKDIR /app +COPY . /app +RUN pip install -r requirements.txt +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..4e9616f --- /dev/null +++ b/app.py @@ -0,0 +1,131 @@ +# from docx import Document +import os +from tempfile import NamedTemporaryFile + +import thulac +from fastapi import FastAPI, Form, UploadFile, File, Request +from fastapi.responses import FileResponse +from fastapi.responses import HTMLResponse +from fastapi.templating import Jinja2Templates +from pydantic import BaseModel +from reportlab.lib.pagesizes import letter +from reportlab.pdfgen import canvas + + +class ExportRequest(BaseModel): + format: str # 导出格式(html、pdf、rtf) + content: str # 要导出的内容 + +# 初始化 THULAC 分词工具 +thu = thulac.thulac() + +# 初始化 FastAPI 应用和模板 +app = FastAPI() +templates = Jinja2Templates(directory="templates") + + +@app.get("/", response_class=HTMLResponse) +async def home(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) + + +@app.post("/process", response_class=HTMLResponse) +async def process(request: Request, file: UploadFile = File(None), text: str = Form(None)): + """ + 统一处理上传文件和粘贴文本,返回标注结果页面 + """ + if file: + # 读取上传文件内容 + content = await file.read() + text_content = content.decode("utf-8") + elif text: + # 读取粘贴文本内容 + text_content = text + else: + return templates.TemplateResponse( + "error.html", {"request": request, "message": "未提供文件或文本"} + ) + + # 处理文本,返回标注结果 + processed_text = process_text(text) + + return templates.TemplateResponse("result.html", {"request": request, "content": processed_text}) + + +def process_text(text: str) -> str: + """处理文本并为每种词性添加 CSS 类,同时保留段落换行""" + words = thu.cut(text) + colored_text = "" + + for word, tag in words: + if tag: # 确保 tag 不为空 + colored_text += f"{word}" + else: + colored_text += word # 没有词性时直接添加词语 + + # 替换换行符为
+ return colored_text.replace("\n", "
") + + + +@app.post("/export") +async def export_file(request: ExportRequest): + format = request.format + content = request.content + + if format == "html": + file_name = "exported_result.html" + file_content = f"{content}" + return create_file_response(file_name, file_content.encode("utf-8"), "text/html") + + elif format == "pdf": + with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: + generate_pdf(temp_file.name, content) + return FileResponse(temp_file.name, filename="exported_result.pdf", media_type="application/pdf") + + elif format == "rtf": + file_name = "exported_result.rtf" + file_content = generate_rtf(content) + return create_file_response(file_name, file_content.encode("utf-8"), "application/rtf") + + return {"error": "Unsupported format"} + + +def create_file_response(file_name, file_content, media_type): + """ + 创建文件响应 + """ + with NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1]) as temp_file: + temp_file.write(file_content) + temp_file.flush() + return FileResponse(temp_file.name, filename=file_name, media_type=media_type) + + +def generate_pdf(file_path, content): + """ + 生成 PDF 文件 + """ + c = canvas.Canvas(file_path, pagesize=letter) + c.setFont("Helvetica", 12) + width, height = letter + y = height - 40 + + for line in content.split("
"): + c.drawString(40, y, line.strip()) + y -= 20 + if y < 40: # 换页条件 + c.showPage() + c.setFont("Helvetica", 12) + y = height - 40 + c.save() + + +def generate_rtf(content): + """ + 生成 RTF 文件 + """ + rtf_header = r"{\rtf1\ansi\deff0" + rtf_footer = r"}" + rtf_body = content.replace("
", r"\line ") + return rtf_header + rtf_body + rtf_footer + diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..e77a149 --- /dev/null +++ b/build.sh @@ -0,0 +1,2 @@ +docker build -t fastapi-nlp . +docker run -d -p 8008:80 fastapi-nlp \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..6d7c6d9 --- /dev/null +++ b/main.py @@ -0,0 +1,13 @@ +from fastapi import FastAPI + +app = FastAPI() + + +@app.get("/") +async def root(): + return {"message": "Hello World"} + + +@app.get("/hello/{name}") +async def say_hello(name: str): + return {"message": f"Hello {name}"} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a2b8def --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +thulac~=0.2.2 +fastapi~=0.115.5 +pydantic~=2.9.2 +reportlab~=4.2.5 \ No newline at end of file diff --git a/templates/error.html b/templates/error.html new file mode 100644 index 0000000..e69de29 diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..97d067c --- /dev/null +++ b/templates/index.html @@ -0,0 +1,150 @@ + + + + + + 词性标注工具 + + + +
+

词性标注工具

+ + +
+ + +
+ + +
+
+ + +
+ +
+
+
+ + +
+
+ + +
+ +
+
+
+
+ + + + diff --git a/templates/result.html b/templates/result.html new file mode 100644 index 0000000..15a5c1d --- /dev/null +++ b/templates/result.html @@ -0,0 +1,143 @@ + + + + + + 标注结果 + + + +

标注结果

+ + + 返回 + + +
+ 词性显示控制: + + + + + +
+ + +
+ {{ content | safe }} +
+ +
+ 导出结果: + + + +
+ + + + + + diff --git a/test_main.http b/test_main.http new file mode 100644 index 0000000..a2d81a9 --- /dev/null +++ b/test_main.http @@ -0,0 +1,11 @@ +# Test your FastAPI endpoints + +GET http://127.0.0.1:8000/ +Accept: application/json + +### + +GET http://127.0.0.1:8000/hello/User +Accept: application/json + +###