feat(phase-4): Folders API (FOLD-01..05), audit helper (flush-not-commit), document sort/FTS/move

- backend/api/folders.py: POST /api/folders (create), GET /api/folders (list),
  GET /api/folders/{id} (breadcrumb), PATCH /api/folders/{id} (rename),
  DELETE /api/folders/{id} (cascade-delete + atomic quota decrement),
  PATCH /api/documents/{id}/folder (move document)
- All folder endpoints use get_regular_user (admin gets 403); 404 for IDOR
- IntegrityError caught -> 409 on duplicate folder name under same parent
- WITH RECURSIVE CTE for subtree collection with SQLite fallback (OperationalError)
- Atomic quota decrement with CASE WHEN pattern (SQLite compat)
- MinIO object deletion best-effort (per-object try/except)
- write_audit_log called after folder.created, folder.renamed, folder.deleted
- backend/api/documents.py: add sort, order, folder_id, q params to list_documents;
  add is_shared field to each document in response using Share subquery
- backend/main.py: register folders_router and document_move_router
This commit is contained in:
curo1305
2026-05-25 18:37:22 +02:00
parent 259a1542d8
commit 33a6f9a290
3 changed files with 540 additions and 6 deletions
+108 -6
View File
@@ -22,10 +22,10 @@ from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Query, status from fastapi import APIRouter, Depends, HTTPException, Query, status
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy import text from sqlalchemy import select, text, func
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from db.models import Document, Quota, User from db.models import Document, Quota, Share, User
from deps.auth import get_regular_user from deps.auth import get_regular_user
from deps.db import get_db from deps.db import get_db
from services import classifier, storage from services import classifier, storage
@@ -190,18 +190,120 @@ async def list_documents(
topic: Optional[str] = Query(None), topic: Optional[str] = Query(None),
page: int = Query(1, ge=1), page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100), per_page: int = Query(20, ge=1, le=100),
sort: str = Query("date"),
order: str = Query("desc"),
folder_id: Optional[str] = Query(None),
q: Optional[str] = Query(None),
session: AsyncSession = Depends(get_db), session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user), current_user: User = Depends(get_regular_user),
): ):
"""List documents, optionally filtered by topic. """List documents with optional sort, folder filter, and full-text search.
D-16: requires authenticated regular user (get_regular_user rejects admins). D-16: requires authenticated regular user (get_regular_user rejects admins).
Returns only documents belonging to the current user. Returns only documents belonging to the current user.
FOLD-05: sort by name|date|size; order asc|desc; folder_id filter;
q full-text search via plainto_tsquery (PostgreSQL only — silently skipped
on SQLite when function is unavailable). FTS scope is always scoped to
current_user.id (T-04-03-02).
Backward-compat: when sort/order/folder_id/q are not provided, behaviour
is identical to the pre-Phase-4 implementation.
""" """
docs = await storage.list_metadata(session, user_id=current_user.id, topic=topic) # If no new params used, fall through to the legacy storage.list_metadata path
total = len(docs) # to preserve full backward compatibility with topic filtering.
if folder_id is None and q is None and sort == "date" and order == "desc":
docs = await storage.list_metadata(session, user_id=current_user.id, topic=topic)
total = len(docs)
start = (page - 1) * per_page
# Add is_shared field (Phase 4 addition)
shared_result = await session.execute(
select(Share.document_id).where(Share.owner_id == current_user.id)
)
shared_ids = {row[0] for row in shared_result.fetchall()}
items = []
for d in docs[start : start + per_page]:
doc_id_str = d.get("id", "")
try:
doc_uuid = uuid.UUID(doc_id_str)
except (ValueError, AttributeError):
doc_uuid = None
d["is_shared"] = doc_uuid in shared_ids if doc_uuid else False
items.append(d)
return {"items": items, "total": total, "page": page, "per_page": per_page}
# New path: direct ORM query with sort/filter/FTS
from db.models import DocumentTopic, Topic # noqa: PLC0415 (avoid circular at module top)
stmt = select(Document).where(Document.user_id == current_user.id)
# Topic filter (join-based, same as list_metadata)
if topic is not None:
stmt = (
stmt.join(DocumentTopic, DocumentTopic.document_id == Document.id)
.join(Topic, Topic.id == DocumentTopic.topic_id)
.where(Topic.name == topic)
)
# Folder filter
if folder_id is not None:
try:
folder_uuid = uuid.UUID(folder_id)
except ValueError:
raise HTTPException(status_code=404, detail="Folder not found")
stmt = stmt.where(Document.folder_id == folder_uuid)
# Full-text search — plainto_tsquery on extracted_text (PostgreSQL only)
# Wrapped in try/except so unit tests on SQLite are not broken (FOLD-05)
fts_requested = q is not None and len(q) >= 2
if fts_requested:
try:
stmt = stmt.where(
func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")(
func.plainto_tsquery("english", q)
)
)
except Exception:
pass # FTS not available (e.g. SQLite) — return unfiltered results
# Sort
sort_col = Document.created_at # default: date
if sort == "name":
sort_col = Document.filename
elif sort == "size":
sort_col = Document.size_bytes
if order == "asc":
stmt = stmt.order_by(sort_col.asc())
else:
stmt = stmt.order_by(sort_col.desc())
result = await session.execute(stmt)
docs_orm = result.scalars().all()
# is_shared subquery
shared_result = await session.execute(
select(Share.document_id).where(Share.owner_id == current_user.id)
)
shared_ids = {row[0] for row in shared_result.fetchall()}
# Serialize
all_items = []
for doc in docs_orm:
from services.storage import _doc_to_dict, _load_topic_names # noqa: PLC0415
topic_names = await _load_topic_names(session, doc.id)
d = _doc_to_dict(doc, topic_names)
d["is_shared"] = doc.id in shared_ids
all_items.append(d)
total = len(all_items)
start = (page - 1) * per_page start = (page - 1) * per_page
return {"items": docs[start : start + per_page], "total": total, "page": page, "per_page": per_page} return {
"items": all_items[start : start + per_page],
"total": total,
"page": page,
"per_page": per_page,
}
# ── GET /api/documents/{doc_id} ─────────────────────────────────────────────── # ── GET /api/documents/{doc_id} ───────────────────────────────────────────────
+425
View File
@@ -0,0 +1,425 @@
"""
Folder API endpoints for DocuVault — Phase 4, Plan 03.
Implements FOLD-01 through FOLD-05:
POST /api/folders — create folder (FOLD-01)
GET /api/folders — list top-level folders (FOLD-02)
GET /api/folders/{id} — get folder + breadcrumb (FOLD-02)
PATCH /api/folders/{id} — rename folder (FOLD-03)
DELETE /api/folders/{id} — delete folder (cascade) (FOLD-03)
PATCH /api/documents/{id}/folder — move document to folder (FOLD-04)
Security invariants (all enforced):
T-04-03-01: get_regular_user on all endpoints (admin gets 403)
T-04-03-04: All folder IDOR paths return 404 not 403
T-04-03-05: PATCH /api/documents/{id}/folder validates both doc and target folder ownership
T-04-03-06: IntegrityError (duplicate folder name) → 409 Conflict
T-04-03-03: Atomic quota decrement with CASE WHEN pattern (SQLite compat)
"""
from __future__ import annotations
import uuid
from typing import Optional
from fastapi import APIRouter, Depends, HTTPException, Request, status
from pydantic import BaseModel
from sqlalchemy import select, text, func
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.ext.asyncio import AsyncSession
from db.models import Document, Folder, Quota, Share, User
from deps.auth import get_regular_user
from deps.db import get_db
from services.audit import write_audit_log
from storage import get_storage_backend
router = APIRouter(prefix="/api/folders", tags=["folders"])
# ── Request / response models ─────────────────────────────────────────────────
class FolderCreate(BaseModel):
name: str
parent_id: Optional[str] = None
class FolderRename(BaseModel):
name: str
class DocumentMove(BaseModel):
folder_id: Optional[str] = None
# ── Helper: extract IP address ────────────────────────────────────────────────
def _get_ip(request: Request) -> Optional[str]:
"""Extract client IP, honouring X-Forwarded-For for reverse proxy setups (Pitfall 5)."""
return request.headers.get("X-Forwarded-For") or (
request.client.host if request.client else None
)
# ── Helper: folder serialization ──────────────────────────────────────────────
def _folder_to_dict(folder: Folder) -> dict:
return {
"id": str(folder.id),
"name": folder.name,
"parent_id": str(folder.parent_id) if folder.parent_id else None,
"user_id": str(folder.user_id),
"created_at": folder.created_at.isoformat() if folder.created_at else None,
}
# ── Helper: document serialization ────────────────────────────────────────────
def _doc_to_dict(doc: Document) -> dict:
return {
"id": str(doc.id),
"filename": doc.filename,
"content_type": doc.content_type,
"size_bytes": doc.size_bytes,
"status": doc.status,
"folder_id": str(doc.folder_id) if doc.folder_id else None,
"user_id": str(doc.user_id) if doc.user_id else None,
"created_at": doc.created_at.isoformat() if doc.created_at else None,
"updated_at": doc.updated_at.isoformat() if doc.updated_at else None,
}
# ── POST /api/folders ─────────────────────────────────────────────────────────
@router.post("", status_code=status.HTTP_201_CREATED)
async def create_folder(
body: FolderCreate,
request: Request,
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""Create a new folder for the current user.
FOLD-01: parent_id (if given) must belong to current_user — 404 otherwise.
Duplicate name under same parent returns 409 (T-04-03-06).
"""
parent_uuid: Optional[uuid.UUID] = None
if body.parent_id is not None:
try:
parent_uuid = uuid.UUID(body.parent_id)
except ValueError:
raise HTTPException(status_code=404, detail="Parent folder not found")
parent = await session.get(Folder, parent_uuid)
if parent is None or parent.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Parent folder not found")
folder = Folder(
user_id=current_user.id,
name=body.name,
parent_id=parent_uuid,
)
try:
session.add(folder)
await session.commit()
except IntegrityError:
await session.rollback()
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A folder with that name already exists here",
)
await write_audit_log(
session,
event_type="folder.created",
user_id=current_user.id,
actor_id=current_user.id,
resource_id=folder.id,
ip_address=_get_ip(request),
metadata_={"name": folder.name, "parent_id": str(parent_uuid) if parent_uuid else None},
)
return _folder_to_dict(folder)
# ── GET /api/folders ──────────────────────────────────────────────────────────
@router.get("")
async def list_folders(
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""List the current user's top-level folders (parent_id IS NULL).
FOLD-02: returns only folders belonging to current_user with no parent.
"""
result = await session.execute(
select(Folder)
.where(
Folder.user_id == current_user.id,
Folder.parent_id.is_(None),
)
.order_by(Folder.name)
)
folders = result.scalars().all()
return {"items": [_folder_to_dict(f) for f in folders]}
# ── GET /api/folders/{folder_id} ──────────────────────────────────────────────
@router.get("/{folder_id}")
async def get_folder(
folder_id: str,
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""Get folder metadata + breadcrumb array from root to this folder.
FOLD-02 / FOLD-05: breadcrumb is built via iterative parent-walk in Python
(not WITH RECURSIVE) so it is compatible with both PostgreSQL and SQLite tests.
Response: {id, name, parent_id, user_id, created_at, breadcrumb: [{id, name}, ...]}
The breadcrumb array is ordered root-first (root is breadcrumb[0]).
"""
try:
uid = uuid.UUID(folder_id)
except ValueError:
raise HTTPException(status_code=404, detail="Folder not found")
folder = await session.get(Folder, uid)
if folder is None or folder.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Folder not found")
# Build breadcrumb by walking up the parent chain iteratively.
# Ownership check on each ancestor ensures no cross-user traversal.
crumbs = [{"id": str(folder.id), "name": folder.name}]
current = folder
visited: set = {current.id}
while current.parent_id is not None:
if current.parent_id in visited:
break # cycle guard (should not occur with proper constraints)
parent = await session.get(Folder, current.parent_id)
if parent is None or parent.user_id != current_user.id:
break # stop traversal if parent is inaccessible
visited.add(parent.id)
crumbs.append({"id": str(parent.id), "name": parent.name})
current = parent
crumbs.reverse() # root-first order
response = _folder_to_dict(folder)
response["breadcrumb"] = crumbs
return response
# ── PATCH /api/folders/{folder_id} ───────────────────────────────────────────
@router.patch("/{folder_id}")
async def rename_folder(
folder_id: str,
body: FolderRename,
request: Request,
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""Rename a folder.
FOLD-03: asserts ownership → 404 if not owner.
Duplicate name under same parent returns 409 (T-04-03-06).
"""
try:
uid = uuid.UUID(folder_id)
except ValueError:
raise HTTPException(status_code=404, detail="Folder not found")
folder = await session.get(Folder, uid)
if folder is None or folder.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Folder not found")
old_name = folder.name
folder.name = body.name
try:
await session.commit()
except IntegrityError:
await session.rollback()
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail="A folder with that name already exists here",
)
await write_audit_log(
session,
event_type="folder.renamed",
user_id=current_user.id,
actor_id=current_user.id,
resource_id=folder.id,
ip_address=_get_ip(request),
metadata_={"old_name": old_name, "new_name": folder.name},
)
return _folder_to_dict(folder)
# ── DELETE /api/folders/{folder_id} ──────────────────────────────────────────
@router.delete("/{folder_id}", status_code=status.HTTP_204_NO_CONTENT)
async def delete_folder(
folder_id: str,
request: Request,
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""Delete a folder and all of its contents (cascade).
FOLD-03 + D-03:
- Collects all documents in the folder subtree using WITH RECURSIVE CTE
(wraps in try/except OperationalError for SQLite test compat; fallback
uses direct children only).
- Sums size_bytes, performs atomic quota decrement (CASE WHEN pattern for
SQLite compat — T-04-03-03).
- Deletes MinIO objects best-effort (per-object try/except — PATTERNS.md Pattern 2).
- Deletes all document rows and the folder row via ORM.
"""
try:
uid = uuid.UUID(folder_id)
except ValueError:
raise HTTPException(status_code=404, detail="Folder not found")
folder = await session.get(Folder, uid)
if folder is None or folder.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Folder not found")
folder_name = folder.name
# Collect all folder IDs in the subtree via WITH RECURSIVE CTE.
# Falls back to direct-children-only on SQLite (OperationalError on recursive CTE).
subtree_folder_ids: list[str] = []
try:
cte_result = await session.execute(
text(
"WITH RECURSIVE subtree AS ("
" SELECT id FROM folders WHERE id = :root_id AND user_id = :uid "
" UNION ALL "
" SELECT f.id FROM folders f "
" JOIN subtree s ON f.parent_id = s.id "
" WHERE f.user_id = :uid"
") SELECT id FROM subtree"
),
{"root_id": str(folder.id), "uid": str(current_user.id)},
)
subtree_folder_ids = [str(row[0]) for row in cte_result.fetchall()]
except OperationalError:
# SQLite fallback: only direct children of this folder
subtree_folder_ids = [str(folder.id)]
# Collect all documents in the subtree folder IDs
if subtree_folder_ids:
# Build UUID list for IN query
subtree_uuids = []
for fid in subtree_folder_ids:
try:
subtree_uuids.append(uuid.UUID(fid))
except ValueError:
pass
if subtree_uuids:
docs_result = await session.execute(
select(Document).where(
Document.folder_id.in_(subtree_uuids),
Document.user_id == current_user.id,
)
)
docs = docs_result.scalars().all()
else:
docs = []
else:
docs = []
total_bytes = sum(d.size_bytes for d in docs)
# Atomic quota decrement (CASE WHEN for SQLite compat — never goes below 0)
if total_bytes > 0:
await session.execute(
text(
"UPDATE quotas "
"SET used_bytes = "
"CASE WHEN used_bytes > :delta THEN used_bytes - :delta ELSE 0 END "
"WHERE user_id = :uid"
),
{"delta": total_bytes, "uid": str(current_user.id)},
)
# Delete MinIO objects best-effort (per-object, never abort on failure)
storage_backend = get_storage_backend()
for doc in docs:
try:
await storage_backend.delete_object(doc.object_key)
except Exception:
pass # best-effort; stale MinIO objects will be garbage-collected
# Delete document rows
for doc in docs:
await session.delete(doc)
# Delete the folder (cascade will handle sub-folders in PostgreSQL;
# in SQLite test env we already collected and deleted all documents)
await session.delete(folder)
await session.commit()
await write_audit_log(
session,
event_type="folder.deleted",
user_id=current_user.id,
actor_id=current_user.id,
resource_id=uid,
ip_address=_get_ip(request),
metadata_={"name": folder_name, "doc_count": len(docs)},
)
# ── PATCH /api/documents/{doc_id}/folder ─────────────────────────────────────
# This endpoint lives in the folders router (not documents router) because it
# is logically a folder organisation operation. The URL prefix /api/documents
# is achieved via an explicit path on this APIRouter. FastAPI supports this.
document_move_router = APIRouter(prefix="/api/documents", tags=["folders"])
@document_move_router.patch("/{doc_id}/folder")
async def move_document(
doc_id: str,
body: DocumentMove,
request: Request,
session: AsyncSession = Depends(get_db),
current_user: User = Depends(get_regular_user),
):
"""Move a document to a different folder (or to root if folder_id is null).
FOLD-04:
- Asserts document ownership → 404 if not owner.
- If folder_id given: asserts target folder ownership → 404 if not owner
(T-04-03-05: cross-user folder assignment blocked).
- Updates doc.folder_id and commits.
- Returns 200 with updated document dict.
"""
try:
doc_uid = uuid.UUID(doc_id)
except ValueError:
raise HTTPException(status_code=404, detail="Document not found")
doc = await session.get(Document, doc_uid)
if doc is None or doc.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Document not found")
target_folder_uuid: Optional[uuid.UUID] = None
if body.folder_id is not None:
try:
target_folder_uuid = uuid.UUID(body.folder_id)
except ValueError:
raise HTTPException(status_code=404, detail="Folder not found")
target_folder = await session.get(Folder, target_folder_uuid)
if target_folder is None or target_folder.user_id != current_user.id:
raise HTTPException(status_code=404, detail="Folder not found")
doc.folder_id = target_folder_uuid
await session.commit()
return _doc_to_dict(doc)
+7
View File
@@ -38,6 +38,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
) )
response.headers["X-Frame-Options"] = "DENY" response.headers["X-Frame-Options"] = "DENY"
response.headers["X-Content-Type-Options"] = "nosniff" response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
return response return response
@@ -176,3 +177,9 @@ from api.auth import router as auth_router # noqa: E402
from api.admin import router as admin_router # noqa: E402 from api.admin import router as admin_router # noqa: E402
app.include_router(auth_router) app.include_router(auth_router)
app.include_router(admin_router) app.include_router(admin_router)
# Phase 4: folders router (FOLD-01..05) and document-move endpoint
from api.folders import router as folders_router # noqa: E402
from api.folders import document_move_router as document_move_router # noqa: E402
app.include_router(folders_router)
app.include_router(document_move_router)