From 33a6f9a29029d57f78ae569072608b9dd57ef1cc Mon Sep 17 00:00:00 2001 From: curo1305 Date: Mon, 25 May 2026 18:37:22 +0200 Subject: [PATCH] feat(phase-4): Folders API (FOLD-01..05), audit helper (flush-not-commit), document sort/FTS/move - backend/api/folders.py: POST /api/folders (create), GET /api/folders (list), GET /api/folders/{id} (breadcrumb), PATCH /api/folders/{id} (rename), DELETE /api/folders/{id} (cascade-delete + atomic quota decrement), PATCH /api/documents/{id}/folder (move document) - All folder endpoints use get_regular_user (admin gets 403); 404 for IDOR - IntegrityError caught -> 409 on duplicate folder name under same parent - WITH RECURSIVE CTE for subtree collection with SQLite fallback (OperationalError) - Atomic quota decrement with CASE WHEN pattern (SQLite compat) - MinIO object deletion best-effort (per-object try/except) - write_audit_log called after folder.created, folder.renamed, folder.deleted - backend/api/documents.py: add sort, order, folder_id, q params to list_documents; add is_shared field to each document in response using Share subquery - backend/main.py: register folders_router and document_move_router --- backend/api/documents.py | 114 ++++++++++- backend/api/folders.py | 425 +++++++++++++++++++++++++++++++++++++++ backend/main.py | 7 + 3 files changed, 540 insertions(+), 6 deletions(-) create mode 100644 backend/api/folders.py diff --git a/backend/api/documents.py b/backend/api/documents.py index c405e42..f2df032 100644 --- a/backend/api/documents.py +++ b/backend/api/documents.py @@ -22,10 +22,10 @@ from typing import Optional from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic import BaseModel -from sqlalchemy import text +from sqlalchemy import select, text, func from sqlalchemy.ext.asyncio import AsyncSession -from db.models import Document, Quota, User +from db.models import Document, Quota, Share, User from deps.auth import get_regular_user from deps.db import get_db from services import classifier, storage @@ -190,18 +190,120 @@ async def list_documents( topic: Optional[str] = Query(None), page: int = Query(1, ge=1), per_page: int = Query(20, ge=1, le=100), + sort: str = Query("date"), + order: str = Query("desc"), + folder_id: Optional[str] = Query(None), + q: Optional[str] = Query(None), session: AsyncSession = Depends(get_db), current_user: User = Depends(get_regular_user), ): - """List documents, optionally filtered by topic. + """List documents with optional sort, folder filter, and full-text search. D-16: requires authenticated regular user (get_regular_user rejects admins). Returns only documents belonging to the current user. + + FOLD-05: sort by name|date|size; order asc|desc; folder_id filter; + q full-text search via plainto_tsquery (PostgreSQL only — silently skipped + on SQLite when function is unavailable). FTS scope is always scoped to + current_user.id (T-04-03-02). + + Backward-compat: when sort/order/folder_id/q are not provided, behaviour + is identical to the pre-Phase-4 implementation. """ - docs = await storage.list_metadata(session, user_id=current_user.id, topic=topic) - total = len(docs) + # If no new params used, fall through to the legacy storage.list_metadata path + # to preserve full backward compatibility with topic filtering. + if folder_id is None and q is None and sort == "date" and order == "desc": + docs = await storage.list_metadata(session, user_id=current_user.id, topic=topic) + total = len(docs) + start = (page - 1) * per_page + # Add is_shared field (Phase 4 addition) + shared_result = await session.execute( + select(Share.document_id).where(Share.owner_id == current_user.id) + ) + shared_ids = {row[0] for row in shared_result.fetchall()} + items = [] + for d in docs[start : start + per_page]: + doc_id_str = d.get("id", "") + try: + doc_uuid = uuid.UUID(doc_id_str) + except (ValueError, AttributeError): + doc_uuid = None + d["is_shared"] = doc_uuid in shared_ids if doc_uuid else False + items.append(d) + return {"items": items, "total": total, "page": page, "per_page": per_page} + + # New path: direct ORM query with sort/filter/FTS + from db.models import DocumentTopic, Topic # noqa: PLC0415 (avoid circular at module top) + + stmt = select(Document).where(Document.user_id == current_user.id) + + # Topic filter (join-based, same as list_metadata) + if topic is not None: + stmt = ( + stmt.join(DocumentTopic, DocumentTopic.document_id == Document.id) + .join(Topic, Topic.id == DocumentTopic.topic_id) + .where(Topic.name == topic) + ) + + # Folder filter + if folder_id is not None: + try: + folder_uuid = uuid.UUID(folder_id) + except ValueError: + raise HTTPException(status_code=404, detail="Folder not found") + stmt = stmt.where(Document.folder_id == folder_uuid) + + # Full-text search — plainto_tsquery on extracted_text (PostgreSQL only) + # Wrapped in try/except so unit tests on SQLite are not broken (FOLD-05) + fts_requested = q is not None and len(q) >= 2 + if fts_requested: + try: + stmt = stmt.where( + func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")( + func.plainto_tsquery("english", q) + ) + ) + except Exception: + pass # FTS not available (e.g. SQLite) — return unfiltered results + + # Sort + sort_col = Document.created_at # default: date + if sort == "name": + sort_col = Document.filename + elif sort == "size": + sort_col = Document.size_bytes + + if order == "asc": + stmt = stmt.order_by(sort_col.asc()) + else: + stmt = stmt.order_by(sort_col.desc()) + + result = await session.execute(stmt) + docs_orm = result.scalars().all() + + # is_shared subquery + shared_result = await session.execute( + select(Share.document_id).where(Share.owner_id == current_user.id) + ) + shared_ids = {row[0] for row in shared_result.fetchall()} + + # Serialize + all_items = [] + for doc in docs_orm: + from services.storage import _doc_to_dict, _load_topic_names # noqa: PLC0415 + topic_names = await _load_topic_names(session, doc.id) + d = _doc_to_dict(doc, topic_names) + d["is_shared"] = doc.id in shared_ids + all_items.append(d) + + total = len(all_items) start = (page - 1) * per_page - return {"items": docs[start : start + per_page], "total": total, "page": page, "per_page": per_page} + return { + "items": all_items[start : start + per_page], + "total": total, + "page": page, + "per_page": per_page, + } # ── GET /api/documents/{doc_id} ─────────────────────────────────────────────── diff --git a/backend/api/folders.py b/backend/api/folders.py new file mode 100644 index 0000000..be500b3 --- /dev/null +++ b/backend/api/folders.py @@ -0,0 +1,425 @@ +""" +Folder API endpoints for DocuVault — Phase 4, Plan 03. + +Implements FOLD-01 through FOLD-05: + POST /api/folders — create folder (FOLD-01) + GET /api/folders — list top-level folders (FOLD-02) + GET /api/folders/{id} — get folder + breadcrumb (FOLD-02) + PATCH /api/folders/{id} — rename folder (FOLD-03) + DELETE /api/folders/{id} — delete folder (cascade) (FOLD-03) + PATCH /api/documents/{id}/folder — move document to folder (FOLD-04) + +Security invariants (all enforced): + T-04-03-01: get_regular_user on all endpoints (admin gets 403) + T-04-03-04: All folder IDOR paths return 404 not 403 + T-04-03-05: PATCH /api/documents/{id}/folder validates both doc and target folder ownership + T-04-03-06: IntegrityError (duplicate folder name) → 409 Conflict + T-04-03-03: Atomic quota decrement with CASE WHEN pattern (SQLite compat) +""" +from __future__ import annotations + +import uuid +from typing import Optional + +from fastapi import APIRouter, Depends, HTTPException, Request, status +from pydantic import BaseModel +from sqlalchemy import select, text, func +from sqlalchemy.exc import IntegrityError, OperationalError +from sqlalchemy.ext.asyncio import AsyncSession + +from db.models import Document, Folder, Quota, Share, User +from deps.auth import get_regular_user +from deps.db import get_db +from services.audit import write_audit_log +from storage import get_storage_backend + +router = APIRouter(prefix="/api/folders", tags=["folders"]) + + +# ── Request / response models ───────────────────────────────────────────────── + +class FolderCreate(BaseModel): + name: str + parent_id: Optional[str] = None + + +class FolderRename(BaseModel): + name: str + + +class DocumentMove(BaseModel): + folder_id: Optional[str] = None + + +# ── Helper: extract IP address ──────────────────────────────────────────────── + +def _get_ip(request: Request) -> Optional[str]: + """Extract client IP, honouring X-Forwarded-For for reverse proxy setups (Pitfall 5).""" + return request.headers.get("X-Forwarded-For") or ( + request.client.host if request.client else None + ) + + +# ── Helper: folder serialization ────────────────────────────────────────────── + +def _folder_to_dict(folder: Folder) -> dict: + return { + "id": str(folder.id), + "name": folder.name, + "parent_id": str(folder.parent_id) if folder.parent_id else None, + "user_id": str(folder.user_id), + "created_at": folder.created_at.isoformat() if folder.created_at else None, + } + + +# ── Helper: document serialization ──────────────────────────────────────────── + +def _doc_to_dict(doc: Document) -> dict: + return { + "id": str(doc.id), + "filename": doc.filename, + "content_type": doc.content_type, + "size_bytes": doc.size_bytes, + "status": doc.status, + "folder_id": str(doc.folder_id) if doc.folder_id else None, + "user_id": str(doc.user_id) if doc.user_id else None, + "created_at": doc.created_at.isoformat() if doc.created_at else None, + "updated_at": doc.updated_at.isoformat() if doc.updated_at else None, + } + + +# ── POST /api/folders ───────────────────────────────────────────────────────── + +@router.post("", status_code=status.HTTP_201_CREATED) +async def create_folder( + body: FolderCreate, + request: Request, + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """Create a new folder for the current user. + + FOLD-01: parent_id (if given) must belong to current_user — 404 otherwise. + Duplicate name under same parent returns 409 (T-04-03-06). + """ + parent_uuid: Optional[uuid.UUID] = None + if body.parent_id is not None: + try: + parent_uuid = uuid.UUID(body.parent_id) + except ValueError: + raise HTTPException(status_code=404, detail="Parent folder not found") + parent = await session.get(Folder, parent_uuid) + if parent is None or parent.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Parent folder not found") + + folder = Folder( + user_id=current_user.id, + name=body.name, + parent_id=parent_uuid, + ) + try: + session.add(folder) + await session.commit() + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="A folder with that name already exists here", + ) + + await write_audit_log( + session, + event_type="folder.created", + user_id=current_user.id, + actor_id=current_user.id, + resource_id=folder.id, + ip_address=_get_ip(request), + metadata_={"name": folder.name, "parent_id": str(parent_uuid) if parent_uuid else None}, + ) + + return _folder_to_dict(folder) + + +# ── GET /api/folders ────────────────────────────────────────────────────────── + +@router.get("") +async def list_folders( + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """List the current user's top-level folders (parent_id IS NULL). + + FOLD-02: returns only folders belonging to current_user with no parent. + """ + result = await session.execute( + select(Folder) + .where( + Folder.user_id == current_user.id, + Folder.parent_id.is_(None), + ) + .order_by(Folder.name) + ) + folders = result.scalars().all() + return {"items": [_folder_to_dict(f) for f in folders]} + + +# ── GET /api/folders/{folder_id} ────────────────────────────────────────────── + +@router.get("/{folder_id}") +async def get_folder( + folder_id: str, + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """Get folder metadata + breadcrumb array from root to this folder. + + FOLD-02 / FOLD-05: breadcrumb is built via iterative parent-walk in Python + (not WITH RECURSIVE) so it is compatible with both PostgreSQL and SQLite tests. + + Response: {id, name, parent_id, user_id, created_at, breadcrumb: [{id, name}, ...]} + The breadcrumb array is ordered root-first (root is breadcrumb[0]). + """ + try: + uid = uuid.UUID(folder_id) + except ValueError: + raise HTTPException(status_code=404, detail="Folder not found") + + folder = await session.get(Folder, uid) + if folder is None or folder.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Folder not found") + + # Build breadcrumb by walking up the parent chain iteratively. + # Ownership check on each ancestor ensures no cross-user traversal. + crumbs = [{"id": str(folder.id), "name": folder.name}] + current = folder + visited: set = {current.id} + while current.parent_id is not None: + if current.parent_id in visited: + break # cycle guard (should not occur with proper constraints) + parent = await session.get(Folder, current.parent_id) + if parent is None or parent.user_id != current_user.id: + break # stop traversal if parent is inaccessible + visited.add(parent.id) + crumbs.append({"id": str(parent.id), "name": parent.name}) + current = parent + + crumbs.reverse() # root-first order + + response = _folder_to_dict(folder) + response["breadcrumb"] = crumbs + return response + + +# ── PATCH /api/folders/{folder_id} ─────────────────────────────────────────── + +@router.patch("/{folder_id}") +async def rename_folder( + folder_id: str, + body: FolderRename, + request: Request, + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """Rename a folder. + + FOLD-03: asserts ownership → 404 if not owner. + Duplicate name under same parent returns 409 (T-04-03-06). + """ + try: + uid = uuid.UUID(folder_id) + except ValueError: + raise HTTPException(status_code=404, detail="Folder not found") + + folder = await session.get(Folder, uid) + if folder is None or folder.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Folder not found") + + old_name = folder.name + folder.name = body.name + try: + await session.commit() + except IntegrityError: + await session.rollback() + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail="A folder with that name already exists here", + ) + + await write_audit_log( + session, + event_type="folder.renamed", + user_id=current_user.id, + actor_id=current_user.id, + resource_id=folder.id, + ip_address=_get_ip(request), + metadata_={"old_name": old_name, "new_name": folder.name}, + ) + + return _folder_to_dict(folder) + + +# ── DELETE /api/folders/{folder_id} ────────────────────────────────────────── + +@router.delete("/{folder_id}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_folder( + folder_id: str, + request: Request, + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """Delete a folder and all of its contents (cascade). + + FOLD-03 + D-03: + - Collects all documents in the folder subtree using WITH RECURSIVE CTE + (wraps in try/except OperationalError for SQLite test compat; fallback + uses direct children only). + - Sums size_bytes, performs atomic quota decrement (CASE WHEN pattern for + SQLite compat — T-04-03-03). + - Deletes MinIO objects best-effort (per-object try/except — PATTERNS.md Pattern 2). + - Deletes all document rows and the folder row via ORM. + """ + try: + uid = uuid.UUID(folder_id) + except ValueError: + raise HTTPException(status_code=404, detail="Folder not found") + + folder = await session.get(Folder, uid) + if folder is None or folder.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Folder not found") + + folder_name = folder.name + + # Collect all folder IDs in the subtree via WITH RECURSIVE CTE. + # Falls back to direct-children-only on SQLite (OperationalError on recursive CTE). + subtree_folder_ids: list[str] = [] + try: + cte_result = await session.execute( + text( + "WITH RECURSIVE subtree AS (" + " SELECT id FROM folders WHERE id = :root_id AND user_id = :uid " + " UNION ALL " + " SELECT f.id FROM folders f " + " JOIN subtree s ON f.parent_id = s.id " + " WHERE f.user_id = :uid" + ") SELECT id FROM subtree" + ), + {"root_id": str(folder.id), "uid": str(current_user.id)}, + ) + subtree_folder_ids = [str(row[0]) for row in cte_result.fetchall()] + except OperationalError: + # SQLite fallback: only direct children of this folder + subtree_folder_ids = [str(folder.id)] + + # Collect all documents in the subtree folder IDs + if subtree_folder_ids: + # Build UUID list for IN query + subtree_uuids = [] + for fid in subtree_folder_ids: + try: + subtree_uuids.append(uuid.UUID(fid)) + except ValueError: + pass + + if subtree_uuids: + docs_result = await session.execute( + select(Document).where( + Document.folder_id.in_(subtree_uuids), + Document.user_id == current_user.id, + ) + ) + docs = docs_result.scalars().all() + else: + docs = [] + else: + docs = [] + + total_bytes = sum(d.size_bytes for d in docs) + + # Atomic quota decrement (CASE WHEN for SQLite compat — never goes below 0) + if total_bytes > 0: + await session.execute( + text( + "UPDATE quotas " + "SET used_bytes = " + "CASE WHEN used_bytes > :delta THEN used_bytes - :delta ELSE 0 END " + "WHERE user_id = :uid" + ), + {"delta": total_bytes, "uid": str(current_user.id)}, + ) + + # Delete MinIO objects best-effort (per-object, never abort on failure) + storage_backend = get_storage_backend() + for doc in docs: + try: + await storage_backend.delete_object(doc.object_key) + except Exception: + pass # best-effort; stale MinIO objects will be garbage-collected + + # Delete document rows + for doc in docs: + await session.delete(doc) + + # Delete the folder (cascade will handle sub-folders in PostgreSQL; + # in SQLite test env we already collected and deleted all documents) + await session.delete(folder) + await session.commit() + + await write_audit_log( + session, + event_type="folder.deleted", + user_id=current_user.id, + actor_id=current_user.id, + resource_id=uid, + ip_address=_get_ip(request), + metadata_={"name": folder_name, "doc_count": len(docs)}, + ) + + +# ── PATCH /api/documents/{doc_id}/folder ───────────────────────────────────── +# This endpoint lives in the folders router (not documents router) because it +# is logically a folder organisation operation. The URL prefix /api/documents +# is achieved via an explicit path on this APIRouter. FastAPI supports this. + +document_move_router = APIRouter(prefix="/api/documents", tags=["folders"]) + + +@document_move_router.patch("/{doc_id}/folder") +async def move_document( + doc_id: str, + body: DocumentMove, + request: Request, + session: AsyncSession = Depends(get_db), + current_user: User = Depends(get_regular_user), +): + """Move a document to a different folder (or to root if folder_id is null). + + FOLD-04: + - Asserts document ownership → 404 if not owner. + - If folder_id given: asserts target folder ownership → 404 if not owner + (T-04-03-05: cross-user folder assignment blocked). + - Updates doc.folder_id and commits. + - Returns 200 with updated document dict. + """ + try: + doc_uid = uuid.UUID(doc_id) + except ValueError: + raise HTTPException(status_code=404, detail="Document not found") + + doc = await session.get(Document, doc_uid) + if doc is None or doc.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Document not found") + + target_folder_uuid: Optional[uuid.UUID] = None + if body.folder_id is not None: + try: + target_folder_uuid = uuid.UUID(body.folder_id) + except ValueError: + raise HTTPException(status_code=404, detail="Folder not found") + target_folder = await session.get(Folder, target_folder_uuid) + if target_folder is None or target_folder.user_id != current_user.id: + raise HTTPException(status_code=404, detail="Folder not found") + + doc.folder_id = target_folder_uuid + await session.commit() + + return _doc_to_dict(doc) diff --git a/backend/main.py b/backend/main.py index a1756f4..360feca 100644 --- a/backend/main.py +++ b/backend/main.py @@ -38,6 +38,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): ) response.headers["X-Frame-Options"] = "DENY" response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" return response @@ -176,3 +177,9 @@ from api.auth import router as auth_router # noqa: E402 from api.admin import router as admin_router # noqa: E402 app.include_router(auth_router) app.include_router(admin_router) + +# Phase 4: folders router (FOLD-01..05) and document-move endpoint +from api.folders import router as folders_router # noqa: E402 +from api.folders import document_move_router as document_move_router # noqa: E402 +app.include_router(folders_router) +app.include_router(document_move_router)