From 946f4eac224a9b2ca9d81e6415c3149d5cd3c198 Mon Sep 17 00:00:00 2001
From: YuanHui <31339626+alsesa@users.noreply.github.com>
Date: Tue, 19 Nov 2024 13:48:29 +0800
Subject: [PATCH] initial commit
---
.gitignore | 2 +
Dockerfile | 5 ++
app.py | 131 ++++++++++++++++++++++++++++++++++++
build.sh | 2 +
main.py | 13 ++++
requirements.txt | 4 ++
templates/error.html | 0
templates/index.html | 150 ++++++++++++++++++++++++++++++++++++++++++
templates/result.html | 143 ++++++++++++++++++++++++++++++++++++++++
test_main.http | 11 ++++
10 files changed, 461 insertions(+)
create mode 100644 .gitignore
create mode 100644 Dockerfile
create mode 100644 app.py
create mode 100644 build.sh
create mode 100644 main.py
create mode 100644 requirements.txt
create mode 100644 templates/error.html
create mode 100644 templates/index.html
create mode 100644 templates/result.html
create mode 100644 test_main.http
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7a3a3f1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/.venv/
+/.idea/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..5c804f4
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,5 @@
+FROM python:3.9-slim
+WORKDIR /app
+COPY . /app
+RUN pip install -r requirements.txt
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..4e9616f
--- /dev/null
+++ b/app.py
@@ -0,0 +1,131 @@
+# from docx import Document
+import os
+from tempfile import NamedTemporaryFile
+
+import thulac
+from fastapi import FastAPI, Form, UploadFile, File, Request
+from fastapi.responses import FileResponse
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+from reportlab.lib.pagesizes import letter
+from reportlab.pdfgen import canvas
+
+
+class ExportRequest(BaseModel):
+ format: str # 导出格式(html、pdf、rtf)
+ content: str # 要导出的内容
+
+# 初始化 THULAC 分词工具
+thu = thulac.thulac()
+
+# 初始化 FastAPI 应用和模板
+app = FastAPI()
+templates = Jinja2Templates(directory="templates")
+
+
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+ return templates.TemplateResponse("index.html", {"request": request})
+
+
+@app.post("/process", response_class=HTMLResponse)
+async def process(request: Request, file: UploadFile = File(None), text: str = Form(None)):
+ """
+ 统一处理上传文件和粘贴文本,返回标注结果页面
+ """
+ if file:
+ # 读取上传文件内容
+ content = await file.read()
+ text_content = content.decode("utf-8")
+ elif text:
+ # 读取粘贴文本内容
+ text_content = text
+ else:
+ return templates.TemplateResponse(
+ "error.html", {"request": request, "message": "未提供文件或文本"}
+ )
+
+ # 处理文本,返回标注结果
+ processed_text = process_text(text)
+
+ return templates.TemplateResponse("result.html", {"request": request, "content": processed_text})
+
+
+def process_text(text: str) -> str:
+ """处理文本并为每种词性添加 CSS 类,同时保留段落换行"""
+ words = thu.cut(text)
+ colored_text = ""
+
+ for word, tag in words:
+ if tag: # 确保 tag 不为空
+ colored_text += f"{word}"
+ else:
+ colored_text += word # 没有词性时直接添加词语
+
+ # 替换换行符为
+ return colored_text.replace("\n", "
")
+
+
+
+@app.post("/export")
+async def export_file(request: ExportRequest):
+ format = request.format
+ content = request.content
+
+ if format == "html":
+ file_name = "exported_result.html"
+ file_content = f"