b1a136b5be
CR-01: add `except HTTPException: raise` before broad except in stream_document_content — prevents 503 (reconnect prompt) from being swallowed and replaced with misleading 502 CR-02: move pre-flight credential checks BEFORE Redis setex in oauth_initiate — no orphan state tokens written for unconfigured providers; also adds onedrive_tenant_id to OneDrive pre-flight condition (WR-02) CR-03: add CLOUD_CREDS_KEY to celery-worker environment in docker-compose.yml — worker cannot decrypt cloud credentials without this key; every cloud document task was silently failing at runtime WR-03: assert Redis store empty after 400 pre-flight responses in both new tests — confirms no token leak on misconfigured-provider requests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
792 lines
30 KiB
Python
792 lines
30 KiB
Python
"""
|
|
Document API endpoints for DocuVault — Phase 3 Wave 2 / Phase 5 Plan 06.
|
|
|
|
Implements the presigned PUT upload flow (D-04, D-05):
|
|
POST /api/documents/upload-url — create pending Document row, return presigned URL
|
|
POST /api/documents/{id}/confirm — stat MinIO for authoritative size, atomic quota UPDATE
|
|
|
|
Cloud upload path (D-10, D-14, D-15 — Phase 5 Plan 06):
|
|
POST /api/documents/upload — multipart upload with target_backend parameter;
|
|
cloud backends bypass presigned URL and use direct put_object()
|
|
|
|
Preserved endpoints (auth guards added in Plan 03-03):
|
|
GET /api/documents — list documents
|
|
GET /api/documents/{id} — get document metadata
|
|
DELETE /api/documents/{id} — delete document (decrements quota atomically)
|
|
POST /api/documents/{id}/classify — reclassify document topics
|
|
GET /api/documents/{id}/content — stream document bytes (all backends, Phase 5 Plan 06)
|
|
|
|
NOTE (Wave 2): No auth guards on any endpoint yet — Plan 03-03 adds get_current_user
|
|
to all handlers. The doc.user_id=None guard in /confirm is a Wave 2 placeholder.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request, UploadFile, File, status
|
|
from fastapi.responses import StreamingResponse
|
|
from pydantic import BaseModel, Field, field_validator
|
|
from sqlalchemy import select, text, func
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from config import settings
|
|
from db.models import CloudConnection, Document, Folder, Quota, Share, User
|
|
from deps.auth import get_regular_user
|
|
from deps.db import get_db
|
|
from services import classifier, storage
|
|
from services.audit import write_audit_log
|
|
from storage import get_storage_backend, get_storage_backend_for_document
|
|
from storage.cloud_utils import decrypt_credentials
|
|
from tasks.document_tasks import extract_and_classify
|
|
|
|
try:
|
|
from minio.error import S3Error
|
|
except ImportError:
|
|
# Fallback for test environments where minio is not installed
|
|
S3Error = Exception # type: ignore[assignment,misc]
|
|
|
|
try:
|
|
from storage.google_drive_backend import CloudConnectionError
|
|
except ImportError:
|
|
# Fallback: define a stub so the except clause compiles even if google deps absent
|
|
class CloudConnectionError(Exception): # type: ignore[no-redef]
|
|
def __init__(self, msg: str = "", *, reason: str = "") -> None:
|
|
super().__init__(msg)
|
|
self.reason = reason
|
|
|
|
# Valid cloud backend slugs (T-05-06-01: validated against allowlist, not user-supplied string)
|
|
_CLOUD_PROVIDERS = frozenset({"google_drive", "onedrive", "nextcloud", "webdav"})
|
|
|
|
router = APIRouter(prefix="/api/documents", tags=["documents"])
|
|
|
|
|
|
# ── Request models ────────────────────────────────────────────────────────────
|
|
|
|
class UploadUrlRequest(BaseModel):
|
|
filename: str
|
|
content_type: str
|
|
|
|
|
|
class DocumentPatch(BaseModel):
|
|
"""Pydantic model for PATCH /api/documents/{doc_id}.
|
|
|
|
Optional fields — model_fields_set distinguishes "not provided" from "set to null".
|
|
At least one field must be present in model_fields_set (enforced in the handler).
|
|
|
|
T-05-09-01: explicit field declaration prevents mass assignment.
|
|
T-05-09-02: only filename and folder_id are accepted — no other fields can be set.
|
|
"""
|
|
filename: Optional[str] = Field(None, min_length=1, max_length=255)
|
|
folder_id: Optional[uuid.UUID] = None
|
|
|
|
@field_validator("filename")
|
|
@classmethod
|
|
def filename_no_path_separators(cls, v: Optional[str]) -> Optional[str]:
|
|
if v is not None and ("/" in v or "\\" in v):
|
|
raise ValueError("filename must not contain path separators")
|
|
return v
|
|
|
|
|
|
# ── POST /api/documents/upload-url ───────────────────────────────────────────
|
|
|
|
@router.post("/upload-url")
|
|
async def request_upload_url(
|
|
body: UploadUrlRequest,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Create a pending Document row and return a presigned PUT URL.
|
|
|
|
D-05 step 1: FastAPI creates a Document row (status='pending'), generates a
|
|
15-minute presigned PUT URL, returns {upload_url, document_id}.
|
|
Quota is NOT reserved at this step — quota enforcement happens at /confirm.
|
|
|
|
T-03-04: object_key is computed server-side using str(current_user.id); filename
|
|
stored in DB only (CLAUDE.md MinIO key schema).
|
|
T-03-15: object_key prefix is always the authenticated user's id — never user-supplied.
|
|
"""
|
|
doc_id = uuid.uuid4()
|
|
suffix = Path(body.filename).suffix.lower()
|
|
object_key = f"{current_user.id}/{doc_id}/{uuid.uuid4()}{suffix}"
|
|
|
|
doc = Document(
|
|
id=doc_id,
|
|
user_id=current_user.id,
|
|
filename=body.filename,
|
|
content_type=body.content_type,
|
|
size_bytes=0,
|
|
storage_backend="minio",
|
|
status="pending",
|
|
object_key=object_key,
|
|
)
|
|
session.add(doc)
|
|
await session.commit()
|
|
|
|
upload_url = await get_storage_backend().generate_presigned_put_url(
|
|
object_key, expires_minutes=15
|
|
)
|
|
return {"upload_url": upload_url, "document_id": str(doc_id)}
|
|
|
|
|
|
# ── POST /api/documents/upload ────────────────────────────────────────────────
|
|
|
|
@router.post("/upload")
|
|
async def upload_document(
|
|
file: UploadFile = File(...),
|
|
target_backend: str = Form("minio"),
|
|
cloud_folder_path: str = Form(None),
|
|
request: Request = None,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Direct multipart upload endpoint supporting cloud backends (D-10, D-14, D-15).
|
|
|
|
If target_backend == "minio": generates a presigned PUT URL (unchanged MinIO flow).
|
|
If target_backend in ("google_drive", "onedrive", "nextcloud", "webdav"):
|
|
1. Reads file bytes from UploadFile
|
|
2. Loads CloudConnection for current_user.id + target_backend; 404 if not found/not ACTIVE
|
|
3. Decrypts credentials and instantiates the correct backend class
|
|
4. Calls cloud_backend.put_object() to upload directly to the provider
|
|
5. Creates Document with storage_backend=target_backend
|
|
6. Returns {document_id, storage_backend} — no upload_url (cloud upload is synchronous)
|
|
|
|
Cloud uploads do NOT use the atomic quota UPDATE — cloud files are not counted
|
|
against MinIO quota (D-11: separate backends; cloud storage quota is provider-side).
|
|
|
|
Security:
|
|
T-05-06-01: target_backend validated against _CLOUD_PROVIDERS allowlist → 422 on invalid value
|
|
T-05-06-02: CloudConnectionError detail message never includes provider error detail
|
|
"""
|
|
if target_backend == "minio":
|
|
# MinIO: generate a presigned URL for client-side PUT (existing flow reused)
|
|
doc_id = uuid.uuid4()
|
|
suffix = Path(file.filename or "file").suffix.lower()
|
|
object_key = f"{current_user.id}/{doc_id}/{uuid.uuid4()}{suffix}"
|
|
|
|
doc = Document(
|
|
id=doc_id,
|
|
user_id=current_user.id,
|
|
filename=file.filename or "upload",
|
|
content_type=file.content_type or "application/octet-stream",
|
|
size_bytes=0,
|
|
storage_backend="minio",
|
|
status="pending",
|
|
object_key=object_key,
|
|
)
|
|
session.add(doc)
|
|
await session.commit()
|
|
|
|
upload_url = await get_storage_backend().generate_presigned_put_url(
|
|
object_key, expires_minutes=15
|
|
)
|
|
return {"upload_url": upload_url, "document_id": str(doc_id)}
|
|
|
|
# Cloud backend path
|
|
if target_backend not in _CLOUD_PROVIDERS:
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail=f"Invalid target_backend '{target_backend}'. Valid values: minio, {', '.join(sorted(_CLOUD_PROVIDERS))}",
|
|
)
|
|
|
|
# Load active CloudConnection for current user + provider (T-05-06-01: user-scoped query)
|
|
result = await session.execute(
|
|
select(CloudConnection).where(
|
|
CloudConnection.user_id == current_user.id,
|
|
CloudConnection.provider == target_backend,
|
|
CloudConnection.status == "ACTIVE",
|
|
)
|
|
)
|
|
conn = result.scalar_one_or_none()
|
|
if conn is None:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"No active {target_backend} connection found. Please connect in Settings.",
|
|
)
|
|
|
|
# Decrypt per-user credentials
|
|
master_key = settings.cloud_creds_key.encode()
|
|
credentials = decrypt_credentials(master_key, str(current_user.id), conn.credentials_enc)
|
|
|
|
# Read file bytes
|
|
file_bytes = await file.read()
|
|
filename = file.filename or "upload"
|
|
content_type = file.content_type or "application/octet-stream"
|
|
extension = Path(filename).suffix.lower()
|
|
|
|
doc_id = uuid.uuid4()
|
|
|
|
# Instantiate backend and upload
|
|
if target_backend == "google_drive":
|
|
from storage.google_drive_backend import GoogleDriveBackend # lazy import
|
|
cloud_backend = GoogleDriveBackend(credentials)
|
|
elif target_backend == "onedrive":
|
|
from storage.onedrive_backend import OneDriveBackend # lazy import
|
|
cloud_backend = OneDriveBackend(credentials)
|
|
elif target_backend == "nextcloud":
|
|
from storage.nextcloud_backend import NextcloudBackend # lazy import
|
|
cloud_backend = NextcloudBackend(
|
|
credentials["server_url"],
|
|
credentials["username"],
|
|
credentials["password"],
|
|
)
|
|
elif target_backend == "webdav":
|
|
from storage.webdav_backend import WebDAVBackend # lazy import
|
|
cloud_backend = WebDAVBackend(
|
|
credentials["server_url"],
|
|
credentials["username"],
|
|
credentials["password"],
|
|
)
|
|
|
|
try:
|
|
object_key = await cloud_backend.put_object(
|
|
str(current_user.id),
|
|
str(doc_id),
|
|
file_bytes,
|
|
extension,
|
|
content_type,
|
|
cloud_folder=cloud_folder_path or None,
|
|
original_filename=filename if cloud_folder_path else None,
|
|
)
|
|
except CloudConnectionError as exc:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Cloud connection requires re-authentication. Please reconnect in Settings.",
|
|
) from exc
|
|
|
|
# Bust folder listing cache so the next GET /folders reflects the new file
|
|
if cloud_folder_path:
|
|
from services.cloud_cache import invalidate_provider_cache # lazy import
|
|
invalidate_provider_cache(str(current_user.id), target_backend)
|
|
|
|
doc = Document(
|
|
id=doc_id,
|
|
user_id=current_user.id,
|
|
filename=filename,
|
|
content_type=content_type,
|
|
size_bytes=len(file_bytes),
|
|
storage_backend=target_backend,
|
|
status="uploaded",
|
|
object_key=object_key,
|
|
)
|
|
session.add(doc)
|
|
|
|
_ip = (
|
|
request.headers.get("X-Forwarded-For") or (request.client.host if request.client else None)
|
|
) if request else None
|
|
await write_audit_log(
|
|
session,
|
|
event_type="document.uploaded",
|
|
user_id=current_user.id,
|
|
actor_id=current_user.id,
|
|
resource_id=doc.id,
|
|
ip_address=_ip,
|
|
metadata_={"size_bytes": len(file_bytes), "storage_backend": target_backend},
|
|
)
|
|
await session.commit()
|
|
|
|
extract_and_classify.delay(str(doc.id))
|
|
|
|
return {"document_id": str(doc.id), "storage_backend": target_backend}
|
|
|
|
|
|
# ── POST /api/documents/{doc_id}/confirm ─────────────────────────────────────
|
|
|
|
@router.post("/{doc_id}/confirm")
|
|
async def confirm_upload(
|
|
doc_id: str,
|
|
request: Request,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Confirm a presigned PUT upload: stat MinIO for size, enforce quota atomically.
|
|
|
|
D-05 step 3: FastAPI reads authoritative file size from MinIO stat_object (never
|
|
from client), runs atomic quota UPDATE, sets status='uploaded', enqueues Celery task.
|
|
|
|
Quota exceeded: HTTP 413 with {"used_bytes": N, "limit_bytes": M, "rejected_bytes": K}
|
|
Upload not found: HTTP 422 (presigned URL may have expired)
|
|
|
|
T-03-05: size always comes from backend.stat_object(doc.object_key) — never client.
|
|
T-03-06: atomic SQL UPDATE prevents concurrent over-quota uploads (STORE-03 SC2).
|
|
T-03-11: ownership assertion — cross-user access returns 404 (D-16).
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None or doc.user_id != current_user.id:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
# Get authoritative file size from MinIO (T-03-05 — never trust client-supplied size)
|
|
try:
|
|
size = await get_storage_backend().stat_object(doc.object_key)
|
|
except Exception as exc:
|
|
code = getattr(exc, "code", "")
|
|
if code == "NoSuchKey":
|
|
raise HTTPException(
|
|
status_code=422,
|
|
detail="Upload not found — presigned URL may have expired",
|
|
)
|
|
raise HTTPException(status_code=502, detail=f"Storage error: {exc}")
|
|
|
|
doc.size_bytes = size
|
|
await session.flush()
|
|
|
|
# Atomic quota enforcement — user_id is always set post-migration (Plan 03-03+)
|
|
result = await session.execute(
|
|
text(
|
|
"UPDATE quotas "
|
|
"SET used_bytes = used_bytes + :delta "
|
|
"WHERE user_id = :uid "
|
|
" AND (used_bytes + :delta) <= limit_bytes "
|
|
"RETURNING used_bytes, limit_bytes"
|
|
),
|
|
{"delta": size, "uid": str(doc.user_id)},
|
|
)
|
|
row = result.fetchone()
|
|
|
|
if row is None:
|
|
# Quota exceeded — fetch current quota state for the 413 body
|
|
quota_result = await session.execute(
|
|
text("SELECT used_bytes, limit_bytes FROM quotas WHERE user_id = :uid"),
|
|
{"uid": str(doc.user_id)},
|
|
)
|
|
q = quota_result.fetchone()
|
|
# Delete the pending Document row and best-effort remove the MinIO object
|
|
await session.delete(doc)
|
|
try:
|
|
await get_storage_backend().delete_object(doc.object_key)
|
|
except Exception:
|
|
pass # MinIO cleanup is best-effort; object TTL will eventually expire
|
|
await session.commit()
|
|
raise HTTPException(
|
|
status_code=413,
|
|
detail={
|
|
"used_bytes": q.used_bytes if q else 0,
|
|
"limit_bytes": q.limit_bytes if q else 0,
|
|
"rejected_bytes": size,
|
|
},
|
|
)
|
|
|
|
used_bytes = row.used_bytes
|
|
|
|
doc.status = "uploaded"
|
|
# D-13: document uploaded event — size_bytes + storage_backend only, NO filename, NO extracted_text (T-04-07-02)
|
|
_ip = request.headers.get("X-Forwarded-For") or (request.client.host if request.client else None)
|
|
await write_audit_log(
|
|
session,
|
|
event_type="document.uploaded",
|
|
user_id=current_user.id,
|
|
actor_id=current_user.id,
|
|
resource_id=doc.id,
|
|
ip_address=_ip,
|
|
metadata_={"size_bytes": size, "storage_backend": "minio"},
|
|
)
|
|
await session.commit()
|
|
extract_and_classify.delay(str(doc.id))
|
|
|
|
return {
|
|
"id": str(doc.id),
|
|
"size_bytes": size,
|
|
"used_bytes": used_bytes,
|
|
"status": "uploaded",
|
|
}
|
|
|
|
|
|
# ── GET /api/documents ────────────────────────────────────────────────────────
|
|
|
|
@router.get("")
|
|
async def list_documents(
|
|
topic: Optional[str] = Query(None),
|
|
page: int = Query(1, ge=1),
|
|
per_page: int = Query(20, ge=1, le=100),
|
|
sort: str = Query("date"),
|
|
order: str = Query("desc"),
|
|
folder_id: Optional[str] = Query(None),
|
|
q: Optional[str] = Query(None),
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""List documents with optional sort, folder filter, and full-text search.
|
|
|
|
D-16: requires authenticated regular user (get_regular_user rejects admins).
|
|
Returns only documents belonging to the current user.
|
|
|
|
FOLD-05: sort by name|date|size; order asc|desc; folder_id filter;
|
|
q full-text search via plainto_tsquery (PostgreSQL only — silently skipped
|
|
on SQLite when function is unavailable). FTS scope is always scoped to
|
|
current_user.id (T-04-03-02).
|
|
|
|
Backward-compat: when sort/order/folder_id/q are not provided, behaviour
|
|
is identical to the pre-Phase-4 implementation.
|
|
"""
|
|
# If no new params used, fall through to the legacy storage.list_metadata path
|
|
# to preserve full backward compatibility with topic filtering.
|
|
if folder_id is None and q is None and sort == "date" and order == "desc":
|
|
docs = await storage.list_metadata(session, user_id=current_user.id, topic=topic)
|
|
total = len(docs)
|
|
start = (page - 1) * per_page
|
|
# Add is_shared field (Phase 4 addition)
|
|
shared_result = await session.execute(
|
|
select(Share.document_id).where(Share.owner_id == current_user.id)
|
|
)
|
|
shared_ids = {row[0] for row in shared_result.fetchall()}
|
|
items = []
|
|
for d in docs[start : start + per_page]:
|
|
doc_id_str = d.get("id", "")
|
|
try:
|
|
doc_uuid = uuid.UUID(doc_id_str)
|
|
except (ValueError, AttributeError):
|
|
doc_uuid = None
|
|
d["is_shared"] = doc_uuid in shared_ids if doc_uuid else False
|
|
items.append(d)
|
|
return {"items": items, "total": total, "page": page, "per_page": per_page}
|
|
|
|
# New path: direct ORM query with sort/filter/FTS
|
|
from db.models import DocumentTopic, Topic # noqa: PLC0415 (avoid circular at module top)
|
|
|
|
stmt = select(Document).where(Document.user_id == current_user.id)
|
|
|
|
# Topic filter (join-based, same as list_metadata)
|
|
if topic is not None:
|
|
stmt = (
|
|
stmt.join(DocumentTopic, DocumentTopic.document_id == Document.id)
|
|
.join(Topic, Topic.id == DocumentTopic.topic_id)
|
|
.where(Topic.name == topic)
|
|
)
|
|
|
|
# Folder filter
|
|
if folder_id is not None:
|
|
try:
|
|
folder_uuid = uuid.UUID(folder_id)
|
|
except ValueError:
|
|
raise HTTPException(status_code=404, detail="Folder not found")
|
|
stmt = stmt.where(Document.folder_id == folder_uuid)
|
|
|
|
# Full-text search — plainto_tsquery on extracted_text (PostgreSQL only)
|
|
# Wrapped in try/except so unit tests on SQLite are not broken (FOLD-05)
|
|
fts_requested = q is not None and len(q) >= 2
|
|
if fts_requested:
|
|
try:
|
|
stmt = stmt.where(
|
|
func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")(
|
|
func.plainto_tsquery("english", q)
|
|
)
|
|
)
|
|
except Exception:
|
|
pass # FTS not available (e.g. SQLite) — return unfiltered results
|
|
|
|
# Sort
|
|
sort_col = Document.created_at # default: date
|
|
if sort == "name":
|
|
sort_col = Document.filename
|
|
elif sort == "size":
|
|
sort_col = Document.size_bytes
|
|
|
|
if order == "asc":
|
|
stmt = stmt.order_by(sort_col.asc())
|
|
else:
|
|
stmt = stmt.order_by(sort_col.desc())
|
|
|
|
result = await session.execute(stmt)
|
|
docs_orm = result.scalars().all()
|
|
|
|
# is_shared subquery
|
|
shared_result = await session.execute(
|
|
select(Share.document_id).where(Share.owner_id == current_user.id)
|
|
)
|
|
shared_ids = {row[0] for row in shared_result.fetchall()}
|
|
|
|
# Serialize
|
|
all_items = []
|
|
for doc in docs_orm:
|
|
from services.storage import _doc_to_dict, _load_topic_names # noqa: PLC0415
|
|
topic_names = await _load_topic_names(session, doc.id)
|
|
d = _doc_to_dict(doc, topic_names)
|
|
d["is_shared"] = doc.id in shared_ids
|
|
all_items.append(d)
|
|
|
|
total = len(all_items)
|
|
start = (page - 1) * per_page
|
|
return {
|
|
"items": all_items[start : start + per_page],
|
|
"total": total,
|
|
"page": page,
|
|
"per_page": per_page,
|
|
}
|
|
|
|
|
|
# ── GET /api/documents/{doc_id} ───────────────────────────────────────────────
|
|
|
|
@router.get("/{doc_id}")
|
|
async def get_document(
|
|
doc_id: str,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Return document metadata by ID.
|
|
|
|
D-16: requires authenticated regular user. Asserts ownership — cross-user
|
|
access returns 404 (not 403) to avoid information leakage (T-03-11).
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None or doc.user_id != current_user.id:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
meta = await storage.get_metadata(session, doc_id)
|
|
if meta is None:
|
|
raise HTTPException(404, "Document not found")
|
|
return meta
|
|
|
|
|
|
# ── PATCH /api/documents/{doc_id} ────────────────────────────────────────────
|
|
|
|
@router.patch("/{doc_id}")
|
|
async def patch_document(
|
|
doc_id: str,
|
|
body: DocumentPatch,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Update document metadata (filename and/or folder_id).
|
|
|
|
T-05-09-01: get_regular_user dep rejects admins (403) and unauthenticated (401).
|
|
T-05-09-01: ownership check — non-owner gets 404 to avoid leaking document IDs (D-16).
|
|
T-05-09-02: response uses storage.get_metadata() which excludes credentials_enc and
|
|
password_hash via the _doc_to_dict whitelist.
|
|
|
|
At least one field must be provided — empty body returns 422.
|
|
folder_id=null moves the document to the root (no folder).
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None or doc.user_id != current_user.id:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
# Require at least one field to be set (model_fields_set tracks provided fields)
|
|
if not body.model_fields_set:
|
|
raise HTTPException(422, "At least one field (filename, folder_id) must be provided")
|
|
|
|
if "filename" in body.model_fields_set and body.filename is not None:
|
|
doc.filename = body.filename
|
|
|
|
if "folder_id" in body.model_fields_set:
|
|
# folder_id=null → move to root (no folder); folder_id=<uuid> → move to folder
|
|
if body.folder_id is not None:
|
|
target = await session.get(Folder, body.folder_id)
|
|
if target is None or target.user_id != current_user.id:
|
|
raise HTTPException(404, "Folder not found")
|
|
doc.folder_id = body.folder_id
|
|
|
|
await session.commit()
|
|
|
|
meta = await storage.get_metadata(session, doc_id)
|
|
if meta is None:
|
|
raise HTTPException(404, "Document not found")
|
|
return meta
|
|
|
|
|
|
# ── DELETE /api/documents/{doc_id} ───────────────────────────────────────────
|
|
|
|
@router.delete("/{doc_id}")
|
|
async def delete_document(
|
|
doc_id: str,
|
|
request: Request,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Delete a document and decrement quota atomically.
|
|
|
|
services.storage.delete_document handles the atomic quota decrement
|
|
(STORE-06, D-07) via GREATEST(0, used_bytes - delta) SQL.
|
|
|
|
D-16: requires authenticated regular user. Asserts ownership — cross-user
|
|
delete returns 404 (not 403) to avoid information leakage (T-03-11).
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None or doc.user_id != current_user.id:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
# Capture audit metadata before delete removes the row
|
|
_doc_size = doc.size_bytes
|
|
_doc_id = doc.id
|
|
_ip = request.headers.get("X-Forwarded-For") or (request.client.host if request.client else None)
|
|
|
|
ok = await storage.delete_document(session, doc_id)
|
|
if not ok:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
# D-13: document deleted event — written AFTER successful delete, size_bytes only (T-04-07-02)
|
|
await write_audit_log(
|
|
session,
|
|
event_type="document.deleted",
|
|
user_id=current_user.id,
|
|
actor_id=current_user.id,
|
|
resource_id=_doc_id,
|
|
ip_address=_ip,
|
|
metadata_={"size_bytes": _doc_size},
|
|
)
|
|
await session.commit()
|
|
|
|
return {"success": True}
|
|
|
|
|
|
# ── POST /api/documents/{doc_id}/classify ────────────────────────────────────
|
|
|
|
@router.post("/{doc_id}/classify")
|
|
async def classify_document(
|
|
doc_id: str,
|
|
body: dict = {},
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Reclassify a document's topics on demand.
|
|
|
|
D-16: requires authenticated regular user. Asserts ownership — cross-user
|
|
classify returns 404 (not 403) to avoid information leakage (T-03-11).
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None or doc.user_id != current_user.id:
|
|
raise HTTPException(404, "Document not found")
|
|
|
|
topic_names = body.get("topics") if body else None
|
|
try:
|
|
topics = await classifier.classify_document(session, doc_id, topic_names)
|
|
except Exception as e:
|
|
raise HTTPException(500, f"Classification failed: {e}")
|
|
|
|
return {"topics": topics}
|
|
|
|
|
|
# ── Range header parsing helper ───────────────────────────────────────────────
|
|
|
|
def _parse_range(range_header: str, file_size: int) -> tuple:
|
|
"""Parse a 'bytes=X-Y' Range header and return (start, end).
|
|
|
|
Returns (start, end) where both are inclusive byte offsets.
|
|
Raises HTTP 416 on any invalid or out-of-bounds range.
|
|
|
|
T-04-05-03: validates start <= end, start >= 0, end < file_size.
|
|
"""
|
|
try:
|
|
h = range_header.replace("bytes=", "").split("-")
|
|
start = int(h[0]) if h[0] != "" else 0
|
|
end = int(h[1]) if h[1] != "" else file_size - 1
|
|
except (ValueError, IndexError):
|
|
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
|
if start > end or start < 0 or end >= file_size:
|
|
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
|
return start, end
|
|
|
|
|
|
# ── GET /api/documents/{doc_id}/content ──────────────────────────────────────
|
|
|
|
@router.get("/{doc_id}/content")
|
|
async def stream_document_content(
|
|
doc_id: str,
|
|
request: Request,
|
|
session: AsyncSession = Depends(get_db),
|
|
current_user: User = Depends(get_regular_user),
|
|
):
|
|
"""Stream document bytes directly from MinIO (DOC-02).
|
|
|
|
T-04-05-01: uses get_regular_user — admin role → 403 (critical security invariant).
|
|
T-04-05-02: bytes fetched via get_object() ONLY — presigned_get_url() never called.
|
|
T-04-05-03: Range header validated via _parse_range(); invalid range → 416.
|
|
T-04-05-04: access gated on ownership OR active Share.recipient_id.
|
|
|
|
Returns 200 (or 206 for Range requests) with:
|
|
Content-Type: doc.content_type
|
|
Content-Disposition: inline; filename="<filename>"
|
|
Accept-Ranges: bytes
|
|
Content-Length: <size>
|
|
"""
|
|
try:
|
|
uid = uuid.UUID(doc_id)
|
|
except ValueError:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
doc = await session.get(Document, uid)
|
|
if doc is None:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
# Access control: owner OR share recipient (T-04-05-04)
|
|
if doc.user_id != current_user.id:
|
|
result = await session.execute(
|
|
select(Share).where(
|
|
Share.document_id == doc.id,
|
|
Share.recipient_id == current_user.id,
|
|
)
|
|
)
|
|
share = result.scalar_one_or_none()
|
|
if share is None:
|
|
raise HTTPException(status_code=404, detail="Document not found")
|
|
|
|
# Fetch bytes from the correct backend — get_storage_backend_for_document handles
|
|
# all backends (MinIO, Google Drive, OneDrive, Nextcloud, WebDAV) transparently
|
|
# (D-15, T-04-05-02). NEVER via presigned URL for cloud backends (D-14).
|
|
try:
|
|
storage_backend = await get_storage_backend_for_document(doc, current_user, session)
|
|
file_bytes = await storage_backend.get_object(doc.object_key)
|
|
except CloudConnectionError as exc:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Cloud connection requires re-authentication. Please reconnect in Settings.",
|
|
) from exc
|
|
except HTTPException:
|
|
raise
|
|
except Exception as exc:
|
|
raise HTTPException(
|
|
status_code=502,
|
|
detail="Cloud backend unreachable. Please try again or reconnect in Settings.",
|
|
) from exc
|
|
file_size = len(file_bytes)
|
|
|
|
headers = {
|
|
"content-type": doc.content_type,
|
|
"content-disposition": f'inline; filename="{doc.filename}"',
|
|
"accept-ranges": "bytes",
|
|
"content-length": str(file_size),
|
|
}
|
|
|
|
range_header = request.headers.get("range")
|
|
if range_header:
|
|
start, end = _parse_range(range_header, file_size)
|
|
chunk = file_bytes[start : end + 1]
|
|
headers["content-range"] = f"bytes {start}-{end}/{file_size}"
|
|
headers["content-length"] = str(len(chunk))
|
|
return StreamingResponse(
|
|
iter([chunk]),
|
|
status_code=206,
|
|
headers=headers,
|
|
)
|
|
|
|
return StreamingResponse(
|
|
iter([file_bytes]),
|
|
status_code=200,
|
|
headers=headers,
|
|
)
|