- FastAPI backend with SQLAlchemy ORM and SQLite - AI chatbot with OpenAI-compatible LLM integration (SSE streaming) - Admin panel for content management, LLM config, token management - Anonymous access with 3-question limit, token-based access control - Recruiter intent detection with admin notification - Resume generator (JD-based, Markdown to Word export) - Chinese localized public interface Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
import base64
|
|
import io
|
|
|
|
from fastapi import UploadFile
|
|
|
|
|
|
async def parse_file(file: UploadFile) -> dict:
|
|
"""Parse uploaded file and extract text content or base64 image data."""
|
|
content = await file.read()
|
|
filename = file.filename or ""
|
|
ext = filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
|
|
|
|
if len(content) > 10 * 1024 * 1024:
|
|
return {"text": "", "error": "文件大小超过10MB限制"}
|
|
|
|
try:
|
|
if ext == "txt":
|
|
return {"text": _decode_text(content)}
|
|
|
|
elif ext == "docx":
|
|
return {"text": _parse_docx(content)}
|
|
|
|
elif ext == "pdf":
|
|
return {"text": _parse_pdf(content)}
|
|
|
|
elif ext in ("png", "jpg", "jpeg", "gif", "bmp", "webp"):
|
|
b64 = base64.b64encode(content).decode("utf-8")
|
|
mime = f"image/{ext}" if ext != "jpg" else "image/jpeg"
|
|
return {"text": "", "base64_image": b64, "mime_type": mime}
|
|
|
|
else:
|
|
return {"text": "", "error": f"不支持的文件格式: .{ext}"}
|
|
|
|
except Exception as e:
|
|
return {"text": "", "error": f"文件解析失败: {str(e)}"}
|
|
|
|
|
|
def _decode_text(content: bytes) -> str:
|
|
for encoding in ("utf-8", "gbk", "gb2312", "utf-16"):
|
|
try:
|
|
return content.decode(encoding)
|
|
except (UnicodeDecodeError, LookupError):
|
|
continue
|
|
return content.decode("utf-8", errors="replace")
|
|
|
|
|
|
def _parse_docx(content: bytes) -> str:
|
|
from docx import Document
|
|
|
|
doc = Document(io.BytesIO(content))
|
|
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
|
|
return "\n".join(paragraphs)
|
|
|
|
|
|
def _parse_pdf(content: bytes) -> str:
|
|
from PyPDF2 import PdfReader
|
|
|
|
reader = PdfReader(io.BytesIO(content))
|
|
texts = []
|
|
for page in reader.pages:
|
|
text = page.extract_text()
|
|
if text:
|
|
texts.append(text)
|
|
return "\n".join(texts)
|