chore: initial commit — existing single-user document scanner codebase

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-05-22 08:53:28 +02:00
parent 6fed5ba531
commit 7a34807fa0
71 changed files with 16408 additions and 0 deletions
+101
View File
@@ -0,0 +1,101 @@
from datetime import datetime, timezone
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Query
from services import storage, extractor, classifier
router = APIRouter(prefix="/api/documents", tags=["documents"])
ALLOWED_MIME_TYPES = {
"application/pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"application/msword",
"text/plain",
"text/markdown",
"image/png",
"image/jpeg",
"image/jpg",
"image/tiff",
"image/webp",
}
@router.post("/upload")
async def upload_document(
file: UploadFile = File(...),
auto_classify: bool = Form(True),
):
content = await file.read()
if len(content) == 0:
raise HTTPException(400, "Empty file")
mime = file.content_type or "application/octet-stream"
saved = storage.save_upload(content, file.filename or "upload", mime)
text = extractor.extract_text(saved["path"], mime)
now = datetime.now(timezone.utc).isoformat()
meta = {
"id": saved["id"],
"original_name": file.filename or "upload",
"filename": saved["filename"],
"mime_type": mime,
"size_bytes": len(content),
"extracted_text": text,
"topics": [],
"created_at": now,
"classified_at": None,
}
storage.save_metadata(meta)
if auto_classify:
try:
topics = await classifier.classify_document(saved["id"])
meta["topics"] = topics
meta["classified_at"] = datetime.now(timezone.utc).isoformat()
except Exception as e:
# Classification failure is non-fatal; document is still saved
meta["classification_error"] = str(e)
return meta
@router.get("")
async def list_documents(
topic: str | None = Query(None),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
):
docs = storage.list_metadata(topic=topic)
total = len(docs)
start = (page - 1) * per_page
return {"items": docs[start : start + per_page], "total": total, "page": page, "per_page": per_page}
@router.get("/{doc_id}")
async def get_document(doc_id: str):
meta = storage.get_metadata(doc_id)
if meta is None:
raise HTTPException(404, "Document not found")
return meta
@router.delete("/{doc_id}")
async def delete_document(doc_id: str):
ok = storage.delete_document(doc_id)
if not ok:
raise HTTPException(404, "Document not found")
return {"success": True}
@router.post("/{doc_id}/classify")
async def classify_document(doc_id: str, body: dict = {}):
meta = storage.get_metadata(doc_id)
if meta is None:
raise HTTPException(404, "Document not found")
topic_names = body.get("topics") if body else None
try:
topics = await classifier.classify_document(doc_id, topic_names)
except Exception as e:
raise HTTPException(500, f"Classification failed: {e}")
return {"topics": topics}