Add PDF document service with AI extraction and per-app settings
- New `features/doc-service` FastAPI microservice: PDF upload, async text extraction (pdfplumber), AI classification via Anthropic/Ollama/ LM Studio, per-user categories, file download - Alembic migration isolated with `alembic_version_doc_service` table - Main backend: httpx proxy routers for /api/documents/* and /api/documents/categories/*, admin settings API at /api/settings/* - Runtime config in /config/doc_service_config.json (shared Docker volume); api_key masking on reads; atomic write with os.replace() - Frontend: DocumentsPage, DocumentAdminSettingsPage, updated AppsPage launcher hub, simplified Nav (removed Settings link), new routes - docker-compose: doc-service service, doc_data + app_config volumes, removed internal:true from backend-net for outbound AI API calls - Fix pre-commit hook: probe Docker socket path so git subprocess picks up Docker Desktop on macOS - Fix security_check.py: use sys.executable for bandit so venv python is used instead of system python Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Proxy all /api/documents/* requests to doc-service:8001/documents/*.
|
||||
|
||||
Uses a module-level AsyncClient for connection pooling.
|
||||
Strips hop-by-hop headers that must not be forwarded.
|
||||
File downloads (/file endpoint) are streamed.
|
||||
"""
|
||||
import os
|
||||
|
||||
import httpx
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
from app.deps import get_current_user
|
||||
from app.models.user import User
|
||||
|
||||
DOC_SERVICE_URL = os.environ.get("DOC_SERVICE_URL", "http://doc-service:8001")
|
||||
|
||||
# Module-level client — reused across requests for connection pooling
|
||||
_client = httpx.AsyncClient(base_url=DOC_SERVICE_URL, timeout=120.0)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
_HOP_BY_HOP = frozenset(
|
||||
[
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
"host",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _forward_headers(request: Request, user_id: str) -> dict:
|
||||
headers = {
|
||||
k: v
|
||||
for k, v in request.headers.items()
|
||||
if k.lower() not in _HOP_BY_HOP
|
||||
}
|
||||
headers["x-user-id"] = user_id
|
||||
return headers
|
||||
|
||||
|
||||
@router.api_route("/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE"])
|
||||
async def proxy_documents(
|
||||
path: str,
|
||||
request: Request,
|
||||
current_user: User = Depends(get_current_user),
|
||||
) -> StreamingResponse:
|
||||
url = f"/documents/{path}" if path else "/documents"
|
||||
headers = _forward_headers(request, str(current_user.id))
|
||||
|
||||
# For multipart uploads, stream the body directly
|
||||
body = await request.body()
|
||||
|
||||
try:
|
||||
response = await _client.request(
|
||||
method=request.method,
|
||||
url=url,
|
||||
headers=headers,
|
||||
content=body,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
except httpx.RequestError as exc:
|
||||
raise HTTPException(status_code=502, detail=f"doc-service unreachable: {exc}")
|
||||
|
||||
# Strip hop-by-hop from response headers
|
||||
resp_headers = {
|
||||
k: v
|
||||
for k, v in response.headers.items()
|
||||
if k.lower() not in _HOP_BY_HOP
|
||||
}
|
||||
|
||||
return StreamingResponse(
|
||||
content=iter([response.content]),
|
||||
status_code=response.status_code,
|
||||
headers=resp_headers,
|
||||
media_type=response.headers.get("content-type"),
|
||||
)
|
||||
Reference in New Issue
Block a user