feat(01-05): wire main.py lifespan+health and rewrite documents+topics to async session

- Rewrite main.py lifespan: MinIO client created at startup, docuvault bucket
  auto-created if missing, stored on app.state.minio; engine.dispose() on shutdown
- Extend /health endpoint: probes PostgreSQL (SELECT 1) and MinIO (bucket_exists)
  returning {"status": "ok"|"degraded", "checks": {"postgres": ..., "minio": ...}}
- Rewrite api/documents.py: all routes inject session: AsyncSession = Depends(get_db);
  save_upload/save_metadata/list_metadata/get_metadata/delete_document all async;
  upload handler queues extract_and_classify.delay() instead of inline classification;
  /classify endpoint retains synchronous await classifier.classify_document() for
  backward-compatible immediate response
- Rewrite api/topics.py: all routes inject session dependency; all storage calls
  are async with session parameter; Pydantic models TopicCreate/TopicUpdate/
  SuggestRequest preserved verbatim
This commit is contained in:
curo1305
2026-05-22 09:47:00 +02:00
parent 32d67de1ca
commit c1931fd566
3 changed files with 120 additions and 42 deletions
+61 -7
View File
@@ -1,31 +1,85 @@
import asyncio
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from config import ensure_data_dirs
from minio import Minio
from sqlalchemy import text
from api.documents import router as documents_router
from api.topics import router as topics_router
from api.settings import router as settings_router
from api.topics import router as topics_router
from config import settings
from db.session import AsyncSessionLocal, engine
@asynccontextmanager
async def lifespan(app: FastAPI):
ensure_data_dirs()
"""FastAPI lifespan: create MinIO bucket at startup, dispose engine at shutdown.
D-07: bucket auto-create ensures the docuvault bucket exists on every reboot.
MinIO client stored on app.state.minio for use in the /health endpoint.
"""
# MinIO bucket initialization (RESEARCH.md Pattern 4)
minio_client = Minio(
settings.minio_endpoint,
access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key,
secure=False,
)
exists = await asyncio.to_thread(minio_client.bucket_exists, settings.minio_bucket)
if not exists:
await asyncio.to_thread(minio_client.make_bucket, settings.minio_bucket)
app.state.minio = minio_client
yield
# Shutdown: close all pooled connections
await engine.dispose()
app = FastAPI(title="Document Scanner API", version="1.0.0", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_origins=["*"], # Phase 1: locked down in Phase 2 after auth lands
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/health")
async def health():
return {"status": "ok"}
async def health(request: Request):
"""Extended health probe: reports PostgreSQL and MinIO connectivity (D-07).
Always returns HTTP 200 — 'degraded' status signals a partial outage without
causing load-balancer retries.
Note (T-01-05-03): error strings expose Python exception class names — acceptable
for an internal/dev endpoint in Phase 1. Phase 2 will trim to 'error' or
'unhealthy' once the endpoint is internet-facing.
"""
checks: dict = {}
# PostgreSQL probe
try:
async with AsyncSessionLocal() as session:
await session.execute(text("SELECT 1"))
checks["postgres"] = "ok"
except Exception as e:
checks["postgres"] = f"error: {type(e).__name__}: {e}"
# MinIO probe
try:
ok = await asyncio.to_thread(
request.app.state.minio.bucket_exists, settings.minio_bucket
)
checks["minio"] = "ok" if ok else "error: bucket missing"
except Exception as e:
checks["minio"] = f"error: {type(e).__name__}: {e}"
status = "ok" if all(v == "ok" for v in checks.values()) else "degraded"
return {"status": status, "checks": checks}
app.include_router(documents_router)