test(phase-04): fill Nyquist validation gaps — FOLD-04, FOLD-05, SEC-08, SEC-09

Add 6 new tests covering document sort (name/size), FTS search cross-user
isolation, credentials_enc exclusion from all responses, and MinIO object
cleanup on user deletion.

Fix FTS try/except misplacement in api/documents.py — was wrapping the ORM
statement builder (never raises) instead of the execute call, causing HTTP 500
on SQLite test env. Now falls back to unfiltered results when @@ unsupported.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-05-31 15:21:02 +02:00
parent eab5f124f6
commit 62daf0d750
3 changed files with 370 additions and 35 deletions
+16 -17
View File
@@ -467,19 +467,6 @@ async def list_documents(
raise HTTPException(status_code=404, detail="Folder not found") raise HTTPException(status_code=404, detail="Folder not found")
stmt = stmt.where(Document.folder_id == folder_uuid) stmt = stmt.where(Document.folder_id == folder_uuid)
# Full-text search — plainto_tsquery on extracted_text (PostgreSQL only)
# Wrapped in try/except so unit tests on SQLite are not broken (FOLD-05)
fts_requested = q is not None and len(q) >= 2
if fts_requested:
try:
stmt = stmt.where(
func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")(
func.plainto_tsquery("english", q)
)
)
except Exception:
pass # FTS not available (e.g. SQLite) — return unfiltered results
# Sort # Sort
sort_col = Document.created_at # default: date sort_col = Document.created_at # default: date
if sort == "name": if sort == "name":
@@ -487,11 +474,23 @@ async def list_documents(
elif sort == "size": elif sort == "size":
sort_col = Document.size_bytes sort_col = Document.size_bytes
if order == "asc": order_fn = sort_col.asc if order == "asc" else sort_col.desc
stmt = stmt.order_by(sort_col.asc()) stmt = stmt.order_by(order_fn())
else:
stmt = stmt.order_by(sort_col.desc())
# Full-text search — plainto_tsquery on extracted_text (PostgreSQL only)
# Falls back to unfiltered if the DB dialect doesn't support @@ (e.g. SQLite in test env)
fts_requested = q is not None and len(q) >= 2
if fts_requested:
fts_stmt = stmt.where(
func.to_tsvector("english", func.coalesce(Document.extracted_text, "")).op("@@")(
func.plainto_tsquery("english", q)
)
)
try:
result = await session.execute(fts_stmt)
except Exception:
result = await session.execute(stmt)
else:
result = await session.execute(stmt) result = await session.execute(stmt)
docs_orm = result.scalars().all() docs_orm = result.scalars().all()
+184
View File
@@ -710,6 +710,190 @@ async def test_delete_cloud_document_failure(async_client, auth_user, db_session
assert still_there is not None, "DB row should not be deleted when cloud delete fails" assert still_there is not None, "DB row should not be deleted when cloud delete fails"
# ---------------------------------------------------------------------------
# Phase 4 FOLD-04 — Document list sort (task 4-03-07)
# ---------------------------------------------------------------------------
async def test_document_sort_by_name_asc(async_client, auth_user, db_session):
"""GET /api/documents?sort=name&order=asc returns docs sorted by filename ascending.
FOLD-04: sort=name|date|size with order=asc|desc.
Creates three docs with distinct filenames; asserts the response order is
lexicographically ascending (a, b, c).
"""
import uuid as _uuid
from db.models import Document
user = auth_user["user"]
filenames = ["charlie.pdf", "alpha.pdf", "bravo.pdf"]
for name in filenames:
doc_id = _uuid.uuid4()
db_session.add(Document(
id=doc_id,
user_id=user.id,
filename=name,
content_type="application/pdf",
size_bytes=100,
storage_backend="minio",
status="uploaded",
object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf",
))
await db_session.commit()
resp = await async_client.get(
"/api/documents?sort=name&order=asc",
headers=auth_user["headers"],
)
assert resp.status_code == 200, resp.text
data = resp.json()
assert data["total"] == 3
names = [item["filename"] for item in data["items"]]
assert names == sorted(names), (
f"Expected ascending filename order, got: {names}"
)
# Explicit: alpha before bravo before charlie
assert names[0] == "alpha.pdf"
assert names[1] == "bravo.pdf"
assert names[2] == "charlie.pdf"
async def test_document_sort_by_size_desc(async_client, auth_user, db_session):
"""GET /api/documents?sort=size&order=desc returns docs sorted by size_bytes descending.
FOLD-04: the largest document must appear first.
Creates three docs with distinct sizes; asserts response order is largest-first.
"""
import uuid as _uuid
from db.models import Document
user = auth_user["user"]
sizes = [512, 2048, 1024] # expected desc order: 2048, 1024, 512
for sz in sizes:
doc_id = _uuid.uuid4()
db_session.add(Document(
id=doc_id,
user_id=user.id,
filename=f"file_{sz}.pdf",
content_type="application/pdf",
size_bytes=sz,
storage_backend="minio",
status="uploaded",
object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf",
))
await db_session.commit()
resp = await async_client.get(
"/api/documents?sort=size&order=desc",
headers=auth_user["headers"],
)
assert resp.status_code == 200, resp.text
data = resp.json()
assert data["total"] == 3
returned_sizes = [item["size_bytes"] for item in data["items"]]
assert returned_sizes == sorted(returned_sizes, reverse=True), (
f"Expected descending size order, got: {returned_sizes}"
)
assert returned_sizes[0] == 2048
assert returned_sizes[-1] == 512
# ---------------------------------------------------------------------------
# Phase 4 FOLD-05 — Full-text search (task 4-03-08)
# ---------------------------------------------------------------------------
async def test_fts_search_returns_200(async_client, auth_user, db_session):
"""GET /api/documents?q=keyword returns 200 without crashing (SQLite compat).
FOLD-05: FTS endpoint must not raise an error on any DB backend.
On SQLite the plainto_tsquery clause is silently skipped; endpoint must
still return 200 with a valid paginated response.
"""
import uuid as _uuid
from db.models import Document
user = auth_user["user"]
doc_id = _uuid.uuid4()
db_session.add(Document(
id=doc_id,
user_id=user.id,
filename="invoice_report.pdf",
content_type="application/pdf",
size_bytes=500,
storage_backend="minio",
status="uploaded",
object_key=f"{user.id}/{doc_id}/{_uuid.uuid4()}.pdf",
extracted_text="This document is about invoices and financial reports.",
))
await db_session.commit()
resp = await async_client.get(
"/api/documents?q=invoice",
headers=auth_user["headers"],
)
assert resp.status_code == 200, (
f"Expected 200 for ?q= search, got {resp.status_code}: {resp.text}"
)
data = resp.json()
assert "items" in data
assert "total" in data
assert isinstance(data["items"], list)
async def test_fts_search_cross_user_isolation(async_client, auth_user, second_auth_user, db_session):
"""GET /api/documents?q=keyword never returns another user's documents.
FOLD-05 T-4-05: FTS results are always scoped to the current user's documents.
User B's document with matching text must not appear in User A's search results.
"""
import uuid as _uuid
from db.models import Document
user_a = auth_user["user"]
user_b = second_auth_user["user"]
# Create a doc owned by user B with distinctive text
doc_b_id = _uuid.uuid4()
db_session.add(Document(
id=doc_b_id,
user_id=user_b.id,
filename="userb_secret.pdf",
content_type="application/pdf",
size_bytes=200,
storage_backend="minio",
status="uploaded",
object_key=f"{user_b.id}/{doc_b_id}/{_uuid.uuid4()}.pdf",
extracted_text="confidential contract agreement userb_only_term",
))
# Create a doc owned by user A (no matching text for the search query)
doc_a_id = _uuid.uuid4()
db_session.add(Document(
id=doc_a_id,
user_id=user_a.id,
filename="usera_unrelated.pdf",
content_type="application/pdf",
size_bytes=100,
storage_backend="minio",
status="uploaded",
object_key=f"{user_a.id}/{doc_a_id}/{_uuid.uuid4()}.pdf",
extracted_text="completely different content",
))
await db_session.commit()
# User A searches — must never see user B's document ID
resp = await async_client.get(
"/api/documents?q=userb_only_term",
headers=auth_user["headers"],
)
assert resp.status_code == 200, resp.text
data = resp.json()
returned_ids = [item["id"] for item in data["items"]]
assert str(doc_b_id) not in returned_ids, (
f"User B's document appeared in User A's search results: {returned_ids}"
)
async def test_delete_cloud_remove_only(async_client, auth_user, db_session): async def test_delete_cloud_remove_only(async_client, auth_user, db_session):
"""DELETE /api/documents/{id}?remove_only=true skips cloud delete, removes DB row only (D-02)""" """DELETE /api/documents/{id}?remove_only=true skips cloud delete, removes DB row only (D-02)"""
import uuid as _uuid import uuid as _uuid
+169 -17
View File
@@ -1,36 +1,188 @@
""" """
Security invariant tests — Wave 0 xfail stubs for Phase 4. Security invariant tests — Phase 4 gaps SEC-08 and SEC-09.
All tests in this file are xfail stubs. They will be implemented in Plans SEC-08: credentials_enc must be absent from all user-facing API responses.
04-06 and 04-08 (security hardening). The stubs ensure pytest collects them SEC-09: Admin DELETE /api/admin/users/{id} must call storage.delete_object for
and keeps CI green before implementation code exists. each document the target user owned before removing the DB row.
Requirements: SEC-08 (credentials_enc exclusion), SEC-09 (delete-user-cleans-files).
""" """
from __future__ import annotations from __future__ import annotations
import os import uuid
import pytest import pytest
import pytest_asyncio
from httpx import ASGITransport, AsyncClient
from sqlalchemy.ext.asyncio import AsyncSession
from db.models import Document, Quota, User
from deps.auth import get_current_admin
from deps.db import get_db
from services.auth import hash_password, create_access_token
# ── Shared helpers ────────────────────────────────────────────────────────────
async def _make_admin(session: AsyncSession) -> User:
"""Insert an admin User + Quota row; password = 'AdminPass1!Secret'."""
user = User(
id=uuid.uuid4(),
handle=f"admin_{uuid.uuid4().hex[:6]}",
email=f"admin_{uuid.uuid4().hex[:6]}@example.com",
password_hash=hash_password("AdminPass1!Secret"),
role="admin",
is_active=True,
totp_enabled=False,
password_must_change=False,
)
session.add(user)
session.add(Quota(user_id=user.id, limit_bytes=104857600, used_bytes=0))
await session.flush()
return user
async def _make_regular_user(session: AsyncSession) -> User:
"""Insert a regular User + Quota row."""
user = User(
id=uuid.uuid4(),
handle=f"user_{uuid.uuid4().hex[:6]}",
email=f"user_{uuid.uuid4().hex[:6]}@example.com",
password_hash=hash_password("UserPass1!Secret"),
role="user",
is_active=True,
totp_enabled=False,
password_must_change=False,
)
session.add(user)
session.add(Quota(user_id=user.id, limit_bytes=104857600, used_bytes=0))
await session.flush()
return user
async def _make_doc(session: AsyncSession, user: User, filename: str = "test.pdf") -> Document:
"""Insert a minimal Document row owned by *user*."""
doc_id = uuid.uuid4()
doc = Document(
id=doc_id,
user_id=user.id,
filename=filename,
content_type="application/pdf",
size_bytes=1024,
storage_backend="minio",
status="uploaded",
object_key=f"{user.id}/{doc_id}/{uuid.uuid4()}.pdf",
)
session.add(doc)
await session.flush()
return doc
@pytest_asyncio.fixture
async def admin_client(db_session: AsyncSession):
"""Async client with get_current_admin overridden; yields (client, admin, session)."""
from main import app
admin = await _make_admin(db_session)
app.dependency_overrides[get_db] = lambda: db_session
app.dependency_overrides[get_current_admin] = lambda: admin
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as c:
yield c, admin, db_session
app.dependency_overrides.clear()
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# SEC-08: credentials_enc never in API response # SEC-08: credentials_enc absent from all user-facing API responses (task 4-07-01)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False) async def test_credentials_enc_not_in_response(async_client, auth_user, db_session):
async def test_credentials_enc_not_in_response(async_client, auth_user): """No user-facing API response includes credentials_enc.
"""No API response for current user includes credentials_enc field."""
pytest.xfail("not implemented yet") SEC-08: checks GET /api/documents (list) and GET /api/documents/{id} (single doc).
The implementation's _doc_to_dict whitelist must never include credentials_enc.
"""
doc = await _make_doc(db_session, auth_user["user"], "sec08_test.pdf")
await db_session.commit()
# 1. List endpoint
list_resp = await async_client.get("/api/documents", headers=auth_user["headers"])
assert list_resp.status_code == 200, list_resp.text
list_data = list_resp.json()
assert list_data["total"] >= 1
for item in list_data["items"]:
assert "credentials_enc" not in item, (
f"credentials_enc found in list item: {item}"
)
# 2. Single document endpoint
single_resp = await async_client.get(
f"/api/documents/{doc.id}", headers=auth_user["headers"]
)
assert single_resp.status_code == 200, single_resp.text
single_data = single_resp.json()
assert "credentials_enc" not in single_data, (
f"credentials_enc found in single doc response: {single_data}"
)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# SEC-09: Delete user cleans up MinIO objects # SEC-09: Admin delete user triggers MinIO object deletion before DB removal
# (task 4-07-02)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False) async def test_delete_user_cleans_files(admin_client, monkeypatch):
async def test_delete_user_cleans_files(async_client, admin_user): """DELETE /api/admin/users/{id} calls delete_object for each user doc before DB removal.
"""Admin DELETE /api/admin/users/{id} triggers MinIO object deletion before DB removal."""
pytest.xfail("not implemented yet") SEC-09: MinIO objects deleted BEFORE user row is removed from the database.
Verifies:
1. delete_object is called at least once per document owned by the user.
2. The call happens (and is tracked) before the 204 response is returned.
3. The user is gone from the DB after the call.
"""
from unittest.mock import AsyncMock, patch
client, admin, session = admin_client
# Create a target regular user with 2 MinIO documents
target = await _make_regular_user(session)
doc1 = await _make_doc(session, target, "file1.pdf")
doc2 = await _make_doc(session, target, "file2.pdf")
await session.commit()
deleted_keys: list[str] = []
async def _fake_delete_object(self_inst, object_key: str) -> None:
deleted_keys.append(object_key)
# Patch the MinIO backend's delete_object so we can observe calls.
# The fake must accept self as first positional arg because it replaces
# an instance method on the class.
from storage.minio_backend import MinIOBackend
monkeypatch.setattr(MinIOBackend, "delete_object", _fake_delete_object, raising=False)
resp = await client.request(
"DELETE",
f"/api/admin/users/{target.id}",
json={"admin_password": "AdminPass1!Secret"},
)
assert resp.status_code == 204, f"Expected 204, got {resp.status_code}: {resp.text}"
# delete_object must have been called for BOTH documents
assert doc1.object_key in deleted_keys, (
f"delete_object not called for doc1.object_key={doc1.object_key!r}; "
f"called keys: {deleted_keys}"
)
assert doc2.object_key in deleted_keys, (
f"delete_object not called for doc2.object_key={doc2.object_key!r}; "
f"called keys: {deleted_keys}"
)
# Confirm the user is gone from the DB
from sqlalchemy import select as sa_select
result = await session.execute(sa_select(User).where(User.id == target.id))
assert result.scalar_one_or_none() is None, (
"User row still present in DB after admin delete"
)