diff --git a/backend/api/documents.py b/backend/api/documents.py index 3683663..40cb68e 100644 --- a/backend/api/documents.py +++ b/backend/api/documents.py @@ -467,19 +467,6 @@ async def list_documents( raise HTTPException(status_code=404, detail="Folder not found") stmt = stmt.where(Document.folder_id == folder_uuid) - # Full-text search — plainto_tsquery on extracted_text (PostgreSQL only) - # Wrapped in try/except so unit tests on SQLite are not broken (FOLD-05) - fts_requested = q is not None and len(q) >= 2 - if fts_requested: - try: - stmt = stmt.where( - func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")( - func.plainto_tsquery("english", q) - ) - ) - except Exception: - pass # FTS not available (e.g. SQLite) — return unfiltered results - # Sort sort_col = Document.created_at # default: date if sort == "name": @@ -487,12 +474,24 @@ async def list_documents( elif sort == "size": sort_col = Document.size_bytes - if order == "asc": - stmt = stmt.order_by(sort_col.asc()) - else: - stmt = stmt.order_by(sort_col.desc()) + order_fn = sort_col.asc if order == "asc" else sort_col.desc + stmt = stmt.order_by(order_fn()) - result = await session.execute(stmt) + # Full-text search — plainto_tsquery on extracted_text (PostgreSQL only) + # Falls back to unfiltered if the DB dialect doesn't support @@ (e.g. SQLite in test env) + fts_requested = q is not None and len(q) >= 2 + if fts_requested: + fts_stmt = stmt.where( + func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")( + func.plainto_tsquery("english", q) + ) + ) + try: + result = await session.execute(fts_stmt) + except Exception: + result = await session.execute(stmt) + else: + result = await session.execute(stmt) docs_orm = result.scalars().all() # is_shared subquery diff --git a/backend/tests/test_documents.py b/backend/tests/test_documents.py index 0a74171..57c891a 100644 --- a/backend/tests/test_documents.py +++ b/backend/tests/test_documents.py @@ -710,6 +710,190 @@ async def test_delete_cloud_document_failure(async_client, auth_user, db_session assert still_there is not None, "DB row should not be deleted when cloud delete fails" +# --------------------------------------------------------------------------- +# Phase 4 FOLD-04 — Document list sort (task 4-03-07) +# --------------------------------------------------------------------------- + + +async def test_document_sort_by_name_asc(async_client, auth_user, db_session): + """GET /api/documents?sort=name&order=asc returns docs sorted by filename ascending. + + FOLD-04: sort=name|date|size with order=asc|desc. + Creates three docs with distinct filenames; asserts the response order is + lexicographically ascending (a, b, c). + """ + import uuid as _uuid + from db.models import Document + + user = auth_user["user"] + filenames = ["charlie.pdf", "alpha.pdf", "bravo.pdf"] + for name in filenames: + doc_id = _uuid.uuid4() + db_session.add(Document( + id=doc_id, + user_id=user.id, + filename=name, + content_type="application/pdf", + size_bytes=100, + storage_backend="minio", + status="uploaded", + object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf", + )) + await db_session.commit() + + resp = await async_client.get( + "/api/documents?sort=name&order=asc", + headers=auth_user["headers"], + ) + assert resp.status_code == 200, resp.text + data = resp.json() + assert data["total"] == 3 + names = [item["filename"] for item in data["items"]] + assert names == sorted(names), ( + f"Expected ascending filename order, got: {names}" + ) + # Explicit: alpha before bravo before charlie + assert names[0] == "alpha.pdf" + assert names[1] == "bravo.pdf" + assert names[2] == "charlie.pdf" + + +async def test_document_sort_by_size_desc(async_client, auth_user, db_session): + """GET /api/documents?sort=size&order=desc returns docs sorted by size_bytes descending. + + FOLD-04: the largest document must appear first. + Creates three docs with distinct sizes; asserts response order is largest-first. + """ + import uuid as _uuid + from db.models import Document + + user = auth_user["user"] + sizes = [512, 2048, 1024] # expected desc order: 2048, 1024, 512 + for sz in sizes: + doc_id = _uuid.uuid4() + db_session.add(Document( + id=doc_id, + user_id=user.id, + filename=f"file_{sz}.pdf", + content_type="application/pdf", + size_bytes=sz, + storage_backend="minio", + status="uploaded", + object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf", + )) + await db_session.commit() + + resp = await async_client.get( + "/api/documents?sort=size&order=desc", + headers=auth_user["headers"], + ) + assert resp.status_code == 200, resp.text + data = resp.json() + assert data["total"] == 3 + returned_sizes = [item["size_bytes"] for item in data["items"]] + assert returned_sizes == sorted(returned_sizes, reverse=True), ( + f"Expected descending size order, got: {returned_sizes}" + ) + assert returned_sizes[0] == 2048 + assert returned_sizes[-1] == 512 + + +# --------------------------------------------------------------------------- +# Phase 4 FOLD-05 — Full-text search (task 4-03-08) +# --------------------------------------------------------------------------- + + +async def test_fts_search_returns_200(async_client, auth_user, db_session): + """GET /api/documents?q=keyword returns 200 without crashing (SQLite compat). + + FOLD-05: FTS endpoint must not raise an error on any DB backend. + On SQLite the plainto_tsquery clause is silently skipped; endpoint must + still return 200 with a valid paginated response. + """ + import uuid as _uuid + from db.models import Document + + user = auth_user["user"] + doc_id = _uuid.uuid4() + db_session.add(Document( + id=doc_id, + user_id=user.id, + filename="invoice_report.pdf", + content_type="application/pdf", + size_bytes=500, + storage_backend="minio", + status="uploaded", + object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf", + extracted_text="This document is about invoices and financial reports.", + )) + await db_session.commit() + + resp = await async_client.get( + "/api/documents?q=invoice", + headers=auth_user["headers"], + ) + assert resp.status_code == 200, ( + f"Expected 200 for ?q= search, got {resp.status_code}: {resp.text}" + ) + data = resp.json() + assert "items" in data + assert "total" in data + assert isinstance(data["items"], list) + + +async def test_fts_search_cross_user_isolation(async_client, auth_user, second_auth_user, db_session): + """GET /api/documents?q=keyword never returns another user's documents. + + FOLD-05 T-4-05: FTS results are always scoped to the current user's documents. + User B's document with matching text must not appear in User A's search results. + """ + import uuid as _uuid + from db.models import Document + + user_a = auth_user["user"] + user_b = second_auth_user["user"] + + # Create a doc owned by user B with distinctive text + doc_b_id = _uuid.uuid4() + db_session.add(Document( + id=doc_b_id, + user_id=user_b.id, + filename="userb_secret.pdf", + content_type="application/pdf", + size_bytes=200, + storage_backend="minio", + status="uploaded", + object_key=f"{user_b.id}/{doc_b_id}/{_uuid.uuid4()}.pdf", + extracted_text="confidential contract agreement userb_only_term", + )) + # Create a doc owned by user A (no matching text for the search query) + doc_a_id = _uuid.uuid4() + db_session.add(Document( + id=doc_a_id, + user_id=user_a.id, + filename="usera_unrelated.pdf", + content_type="application/pdf", + size_bytes=100, + storage_backend="minio", + status="uploaded", + object_key=f"{user_a.id}/{doc_a_id}/{_uuid.uuid4()}.pdf", + extracted_text="completely different content", + )) + await db_session.commit() + + # User A searches — must never see user B's document ID + resp = await async_client.get( + "/api/documents?q=userb_only_term", + headers=auth_user["headers"], + ) + assert resp.status_code == 200, resp.text + data = resp.json() + returned_ids = [item["id"] for item in data["items"]] + assert str(doc_b_id) not in returned_ids, ( + f"User B's document appeared in User A's search results: {returned_ids}" + ) + + async def test_delete_cloud_remove_only(async_client, auth_user, db_session): """DELETE /api/documents/{id}?remove_only=true skips cloud delete, removes DB row only (D-02)""" import uuid as _uuid diff --git a/backend/tests/test_security.py b/backend/tests/test_security.py index eba552c..1687bd0 100644 --- a/backend/tests/test_security.py +++ b/backend/tests/test_security.py @@ -1,36 +1,188 @@ """ -Security invariant tests — Wave 0 xfail stubs for Phase 4. +Security invariant tests — Phase 4 gaps SEC-08 and SEC-09. -All tests in this file are xfail stubs. They will be implemented in Plans -04-06 and 04-08 (security hardening). The stubs ensure pytest collects them -and keeps CI green before implementation code exists. - -Requirements: SEC-08 (credentials_enc exclusion), SEC-09 (delete-user-cleans-files). +SEC-08: credentials_enc must be absent from all user-facing API responses. +SEC-09: Admin DELETE /api/admin/users/{id} must call storage.delete_object for + each document the target user owned before removing the DB row. """ from __future__ import annotations -import os +import uuid import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession + +from db.models import Document, Quota, User +from deps.auth import get_current_admin +from deps.db import get_db +from services.auth import hash_password, create_access_token + + +# ── Shared helpers ──────────────────────────────────────────────────────────── + + +async def _make_admin(session: AsyncSession) -> User: + """Insert an admin User + Quota row; password = 'AdminPass1!Secret'.""" + user = User( + id=uuid.uuid4(), + handle=f"admin_{uuid.uuid4().hex[:6]}", + email=f"admin_{uuid.uuid4().hex[:6]}@example.com", + password_hash=hash_password("AdminPass1!Secret"), + role="admin", + is_active=True, + totp_enabled=False, + password_must_change=False, + ) + session.add(user) + session.add(Quota(user_id=user.id, limit_bytes=104857600, used_bytes=0)) + await session.flush() + return user + + +async def _make_regular_user(session: AsyncSession) -> User: + """Insert a regular User + Quota row.""" + user = User( + id=uuid.uuid4(), + handle=f"user_{uuid.uuid4().hex[:6]}", + email=f"user_{uuid.uuid4().hex[:6]}@example.com", + password_hash=hash_password("UserPass1!Secret"), + role="user", + is_active=True, + totp_enabled=False, + password_must_change=False, + ) + session.add(user) + session.add(Quota(user_id=user.id, limit_bytes=104857600, used_bytes=0)) + await session.flush() + return user + + +async def _make_doc(session: AsyncSession, user: User, filename: str = "test.pdf") -> Document: + """Insert a minimal Document row owned by *user*.""" + doc_id = uuid.uuid4() + doc = Document( + id=doc_id, + user_id=user.id, + filename=filename, + content_type="application/pdf", + size_bytes=1024, + storage_backend="minio", + status="uploaded", + object_key=f"{user.id}/{doc_id}/{uuid.uuid4()}.pdf", + ) + session.add(doc) + await session.flush() + return doc + + +@pytest_asyncio.fixture +async def admin_client(db_session: AsyncSession): + """Async client with get_current_admin overridden; yields (client, admin, session).""" + from main import app + + admin = await _make_admin(db_session) + app.dependency_overrides[get_db] = lambda: db_session + app.dependency_overrides[get_current_admin] = lambda: admin + + async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as c: + yield c, admin, db_session + + app.dependency_overrides.clear() # --------------------------------------------------------------------------- -# SEC-08: credentials_enc never in API response +# SEC-08: credentials_enc absent from all user-facing API responses (task 4-07-01) # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False) -async def test_credentials_enc_not_in_response(async_client, auth_user): - """No API response for current user includes credentials_enc field.""" - pytest.xfail("not implemented yet") +async def test_credentials_enc_not_in_response(async_client, auth_user, db_session): + """No user-facing API response includes credentials_enc. + + SEC-08: checks GET /api/documents (list) and GET /api/documents/{id} (single doc). + The implementation's _doc_to_dict whitelist must never include credentials_enc. + """ + doc = await _make_doc(db_session, auth_user["user"], "sec08_test.pdf") + await db_session.commit() + + # 1. List endpoint + list_resp = await async_client.get("/api/documents", headers=auth_user["headers"]) + assert list_resp.status_code == 200, list_resp.text + list_data = list_resp.json() + assert list_data["total"] >= 1 + for item in list_data["items"]: + assert "credentials_enc" not in item, ( + f"credentials_enc found in list item: {item}" + ) + + # 2. Single document endpoint + single_resp = await async_client.get( + f"/api/documents/{doc.id}", headers=auth_user["headers"] + ) + assert single_resp.status_code == 200, single_resp.text + single_data = single_resp.json() + assert "credentials_enc" not in single_data, ( + f"credentials_enc found in single doc response: {single_data}" + ) # --------------------------------------------------------------------------- -# SEC-09: Delete user cleans up MinIO objects +# SEC-09: Admin delete user triggers MinIO object deletion before DB removal +# (task 4-07-02) # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False) -async def test_delete_user_cleans_files(async_client, admin_user): - """Admin DELETE /api/admin/users/{id} triggers MinIO object deletion before DB removal.""" - pytest.xfail("not implemented yet") +async def test_delete_user_cleans_files(admin_client, monkeypatch): + """DELETE /api/admin/users/{id} calls delete_object for each user doc before DB removal. + + SEC-09: MinIO objects deleted BEFORE user row is removed from the database. + Verifies: + 1. delete_object is called at least once per document owned by the user. + 2. The call happens (and is tracked) before the 204 response is returned. + 3. The user is gone from the DB after the call. + """ + from unittest.mock import AsyncMock, patch + + client, admin, session = admin_client + + # Create a target regular user with 2 MinIO documents + target = await _make_regular_user(session) + doc1 = await _make_doc(session, target, "file1.pdf") + doc2 = await _make_doc(session, target, "file2.pdf") + await session.commit() + + deleted_keys: list[str] = [] + + async def _fake_delete_object(self_inst, object_key: str) -> None: + deleted_keys.append(object_key) + + # Patch the MinIO backend's delete_object so we can observe calls. + # The fake must accept self as first positional arg because it replaces + # an instance method on the class. + from storage.minio_backend import MinIOBackend + monkeypatch.setattr(MinIOBackend, "delete_object", _fake_delete_object, raising=False) + + resp = await client.request( + "DELETE", + f"/api/admin/users/{target.id}", + json={"admin_password": "AdminPass1!Secret"}, + ) + assert resp.status_code == 204, f"Expected 204, got {resp.status_code}: {resp.text}" + + # delete_object must have been called for BOTH documents + assert doc1.object_key in deleted_keys, ( + f"delete_object not called for doc1.object_key={doc1.object_key!r}; " + f"called keys: {deleted_keys}" + ) + assert doc2.object_key in deleted_keys, ( + f"delete_object not called for doc2.object_key={doc2.object_key!r}; " + f"called keys: {deleted_keys}" + ) + + # Confirm the user is gone from the DB + from sqlalchemy import select as sa_select + result = await session.execute(sa_select(User).where(User.id == target.id)) + assert result.scalar_one_or_none() is None, ( + "User row still present in DB after admin delete" + )