Files
Business-Management/backend/app/routers/documents_proxy.py
T
curo1305 0d34867a69 Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async
  text extraction (pdfplumber), AI classification via Anthropic/Ollama/
  LM Studio, per-user categories, file download
- Alembic migration isolated with `alembic_version_doc_service` table
- Main backend: httpx proxy routers for /api/documents/* and
  /api/documents/categories/*, admin settings API at /api/settings/*
- Runtime config in /config/doc_service_config.json (shared Docker
  volume); api_key masking on reads; atomic write with os.replace()
- Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage
  launcher hub, simplified Nav (removed Settings link), new routes
- docker-compose: doc-service service, doc_data + app_config volumes,
  removed internal:true from backend-net for outbound AI API calls
- Fix pre-commit hook: probe Docker socket path so git subprocess picks
  up Docker Desktop on macOS
- Fix security_check.py: use sys.executable for bandit so venv python
  is used instead of system python

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 05:28:11 +02:00

85 lines
2.3 KiB
Python

"""
Proxy all /api/documents/* requests to doc-service:8001/documents/*.
Uses a module-level AsyncClient for connection pooling.
Strips hop-by-hop headers that must not be forwarded.
File downloads (/file endpoint) are streamed.
"""
import os
import httpx
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import StreamingResponse
from app.deps import get_current_user
from app.models.user import User
DOC_SERVICE_URL = os.environ.get("DOC_SERVICE_URL", "http://doc-service:8001")
# Module-level client — reused across requests for connection pooling
_client = httpx.AsyncClient(base_url=DOC_SERVICE_URL, timeout=120.0)
router = APIRouter()
_HOP_BY_HOP = frozenset(
[
"connection",
"keep-alive",
"proxy-authenticate",
"proxy-authorization",
"te",
"trailers",
"transfer-encoding",
"upgrade",
"host",
]
)
def _forward_headers(request: Request, user_id: str) -> dict:
headers = {
k: v
for k, v in request.headers.items()
if k.lower() not in _HOP_BY_HOP
}
headers["x-user-id"] = user_id
return headers
@router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE"])
async def proxy_documents(
path: str,
request: Request,
current_user: User = Depends(get_current_user),
) -> StreamingResponse:
url = f"/documents/{path}" if path else "/documents"
headers = _forward_headers(request, str(current_user.id))
# For multipart uploads, stream the body directly
body = await request.body()
try:
response = await _client.request(
method=request.method,
url=url,
headers=headers,
content=body,
params=dict(request.query_params),
)
except httpx.RequestError as exc:
raise HTTPException(status_code=502, detail=f"doc-service unreachable: {exc}")
# Strip hop-by-hop from response headers
resp_headers = {
k: v
for k, v in response.headers.items()
if k.lower() not in _HOP_BY_HOP
}
return StreamingResponse(
content=iter([response.content]),
status_code=response.status_code,
headers=resp_headers,
media_type=response.headers.get("content-type"),
)