From 21ec9cb4c3378c50c40b0ca2255602f50663cff4 Mon Sep 17 00:00:00 2001 From: curo1305 Date: Sat, 23 May 2026 13:42:37 +0200 Subject: [PATCH] test(03-01): add Wave 0 xfail stubs and shared fixtures for Phase 3 - Add auth_user, admin_user, mock_minio_presigned, mock_minio_stat fixtures to conftest.py - Create test_quota.py with 4 xfail stubs (STORE-03, STORE-05, STORE-06, SC2 race) - Append test_migration_0003 to test_alembic.py (full pre-seed + post-migration assertions) - Append 3 classifier xfail stubs (DOC-03, DOC-05, D-15) - Append 6 document xfail stubs (D-05, STORE-04, SEC-04, D-16) - Append 4 topic xfail stubs (DOC-04, D-09, D-17) - Append test_settings_endpoint_removed stub (D-12) - All 19 new test IDs collect cleanly with xfail(strict=False) --- backend/tests/conftest.py | 126 ++++++++++++++++++++++++++++++ backend/tests/test_alembic.py | 130 +++++++++++++++++++++++++++++++ backend/tests/test_classifier.py | 38 +++++++++ backend/tests/test_documents.py | 69 ++++++++++++++++ backend/tests/test_quota.py | 61 +++++++++++++++ backend/tests/test_settings.py | 17 ++++ backend/tests/test_topics.py | 51 ++++++++++++ 7 files changed, 492 insertions(+) create mode 100644 backend/tests/test_quota.py diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index e8c5ffe..8137b5c 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -176,3 +176,129 @@ def sample_pdf(tmp_path): doc.save(str(pdf_path)) doc.close() return pdf_path + + +# ── Phase 3 shared fixtures ─────────────────────────────────────────────────── +# These fixtures are used by test_quota.py, test_documents.py, test_topics.py, +# and test_classifier.py in Plans 03-02 through 03-04. + + +@pytest_asyncio.fixture +async def auth_user(db_session: AsyncSession): + """Create a regular user with a Quota row and return auth context. + + Returns dict with keys: + - user: User ORM instance + - token: signed JWT access token + - headers: {"Authorization": "Bearer "} + + The fixture issues a valid access token via services.auth.create_access_token + so that get_current_user accepts it in downstream endpoint tests. + """ + import uuid as _uuid + from db.models import User, Quota + from services.auth import hash_password, create_access_token + + user_id = _uuid.uuid4() + user = User( + id=user_id, + handle=f"testuser_{user_id.hex[:8]}", + email=f"testuser_{user_id.hex[:8]}@example.com", + password_hash=hash_password("Testpassword123!"), + role="user", + is_active=True, + password_must_change=False, + ) + quota = Quota( + user_id=user_id, + limit_bytes=104857600, # 100 MB + used_bytes=0, + ) + db_session.add(user) + db_session.add(quota) + await db_session.commit() + + token = create_access_token(str(user_id), "user") + return { + "user": user, + "token": token, + "headers": {"Authorization": f"Bearer {token}"}, + } + + +@pytest_asyncio.fixture +async def admin_user(db_session: AsyncSession): + """Create an admin user with a Quota row and return auth context. + + Returns the same dict shape as auth_user but with role="admin". + """ + import uuid as _uuid + from db.models import User, Quota + from services.auth import hash_password, create_access_token + + user_id = _uuid.uuid4() + user = User( + id=user_id, + handle=f"adminuser_{user_id.hex[:8]}", + email=f"adminuser_{user_id.hex[:8]}@example.com", + password_hash=hash_password("Testpassword123!"), + role="admin", + is_active=True, + password_must_change=False, + ) + quota = Quota( + user_id=user_id, + limit_bytes=104857600, + used_bytes=0, + ) + db_session.add(user) + db_session.add(quota) + await db_session.commit() + + token = create_access_token(str(user_id), "admin") + return { + "user": user, + "token": token, + "headers": {"Authorization": f"Bearer {token}"}, + } + + +@pytest.fixture +def mock_minio_presigned(monkeypatch): + """Patch MinIOBackend.generate_presigned_put_url with an AsyncMock. + + The patched method does not exist yet — it is added in Plan 03-02. + Using raising=False ensures the patch installs before the attribute exists. + + Yields the AsyncMock so tests can assert call counts and args. + """ + from unittest.mock import AsyncMock + + mock = AsyncMock(return_value="http://localhost:9000/docuvault/test-presigned-url") + try: + from storage.minio_backend import MinIOBackend + monkeypatch.setattr(MinIOBackend, "generate_presigned_put_url", mock, raising=False) + except ImportError: + pass # storage module not yet available — patch is best-effort + yield mock + + +@pytest.fixture +def mock_minio_stat(monkeypatch): + """Patch MinIOBackend.stat_object with an AsyncMock returning 1024 bytes. + + The patched method does not exist yet — it is added in Plan 03-02. + Using raising=False ensures the patch installs before the attribute exists. + + Yields the AsyncMock for per-test customization: + mock_minio_stat.return_value = 50_000_000 + """ + from unittest.mock import AsyncMock + + mock = AsyncMock(return_value=1024) + try: + from storage.minio_backend import MinIOBackend + monkeypatch.setattr(MinIOBackend, "stat_object", mock, raising=False) + except ImportError: + pass # storage module not yet available — patch is best-effort + yield mock diff --git a/backend/tests/test_alembic.py b/backend/tests/test_alembic.py index 601a6bb..9062216 100644 --- a/backend/tests/test_alembic.py +++ b/backend/tests/test_alembic.py @@ -114,3 +114,133 @@ def test_documents_user_id_nullable(tmp_path, monkeypatch): assert columns["user_id"]["notnull"] == 0, ( "documents.user_id is NOT NULL but D-03 requires it to be nullable in Phase 1" ) + + +# --------------------------------------------------------------------------- +# Test 3: migration 0003 — null-user cleanup + NOT NULL + quota reconciliation +# --------------------------------------------------------------------------- + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-01 migration step") +def test_migration_0003(tmp_path, monkeypatch): + """After alembic upgrade head applying 0003: + + - The documents row with user_id=None is deleted (D-01, D-02). + - The documents row with a real user_id is preserved. + - PRAGMA table_info shows documents.user_id notnull=1. + - All topics rows are deleted (D-10). + - ix_topics_user_id exists in sqlite_master. + - quotas.used_bytes for the populated user equals SUM(size_bytes). + """ + try: + import alembic.command + from alembic.config import Config + except ImportError as exc: + pytest.skip(f"alembic not installed: {exc}") + + import sqlite3 + import uuid + + db_path = tmp_path / "test_0003.db" + db_url = f"sqlite+aiosqlite:///{db_path}" + + monkeypatch.setenv("DATABASE_MIGRATE_URL", db_url) + + alembic_cfg = Config("alembic.ini") + alembic_cfg.set_main_option("sqlalchemy.url", db_url) + + # Apply migrations up to 0002 so the schema is in the pre-0003 state + try: + alembic.command.upgrade(alembic_cfg, "0002") + except Exception as exc: + pytest.skip(f"alembic upgrade to 0002 failed: {exc}") + + # Pre-seed test data using raw sqlite3 (synchronous) + user_id = str(uuid.uuid4()) + doc_id_null = str(uuid.uuid4()) + doc_id_user = str(uuid.uuid4()) + topic_id = str(uuid.uuid4()) + + conn = sqlite3.connect(str(db_path)) + try: + # Insert a user so we can seed a quota and a user-owned document + conn.execute( + "INSERT INTO users (id, handle, email, password_hash, role, is_active, " + "password_must_change, default_storage_backend) " + "VALUES (?, ?, ?, ?, ?, 1, 0, 'minio')", + (user_id, "testuser", "test@example.com", "hash", "user"), + ) + conn.execute( + "INSERT INTO quotas (user_id, limit_bytes, used_bytes) VALUES (?, ?, ?)", + (user_id, 104857600, 0), + ) + # Null-user document (to be deleted) + conn.execute( + "INSERT INTO documents (id, user_id, filename, object_key, content_type, " + "size_bytes, storage_backend, status) VALUES (?, NULL, ?, ?, ?, ?, 'minio', 'uploaded')", + (doc_id_null, "null_doc.txt", "null/key.txt", "text/plain", 1000), + ) + # User-owned document (to be preserved) + conn.execute( + "INSERT INTO documents (id, user_id, filename, object_key, content_type, " + "size_bytes, storage_backend, status) VALUES (?, ?, ?, ?, ?, ?, 'minio', 'uploaded')", + (doc_id_user, user_id, "user_doc.txt", "user/key.txt", "text/plain", 2048), + ) + # A topic row (all topics deleted in 0003) + conn.execute( + "INSERT INTO topics (id, user_id, name, description, color) " + "VALUES (?, NULL, ?, '', ?)", + (topic_id, "Finance", "#6366f1"), + ) + conn.commit() + finally: + conn.close() + + # Apply migration 0003 (no MinIO env set — MinIO step is skipped safely) + try: + alembic.command.upgrade(alembic_cfg, "0003") + except Exception as exc: + pytest.fail(f"alembic upgrade to 0003 failed: {exc}") + + conn = sqlite3.connect(str(db_path)) + try: + # 1. Null-user document must be gone + cursor = conn.execute( + "SELECT id FROM documents WHERE id = ?", (doc_id_null,) + ) + assert cursor.fetchone() is None, "Null-user document was not deleted by migration 0003" + + # 2. User-owned document must be preserved + cursor = conn.execute( + "SELECT id FROM documents WHERE id = ?", (doc_id_user,) + ) + assert cursor.fetchone() is not None, "User-owned document was incorrectly deleted" + + # 3. documents.user_id must now be NOT NULL + cursor = conn.execute("PRAGMA table_info(documents)") + columns = {row[1]: {"notnull": row[3]} for row in cursor.fetchall()} + assert columns["user_id"]["notnull"] == 1, ( + "documents.user_id is still nullable after migration 0003" + ) + + # 4. All topics rows must be deleted + cursor = conn.execute("SELECT COUNT(*) FROM topics") + count = cursor.fetchone()[0] + assert count == 0, f"Expected 0 topics after migration 0003, found {count}" + + # 5. ix_topics_user_id must exist + cursor = conn.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name='ix_topics_user_id'" + ) + assert cursor.fetchone() is not None, "ix_topics_user_id index not created by migration 0003" + + # 6. quotas.used_bytes must equal SUM(size_bytes) for the user + cursor = conn.execute( + "SELECT used_bytes FROM quotas WHERE user_id = ?", (user_id,) + ) + row = cursor.fetchone() + assert row is not None, "Quota row not found for test user" + assert row[0] == 2048, ( + f"quotas.used_bytes should be 2048 (SUM of user docs) but is {row[0]}" + ) + finally: + conn.close() diff --git a/backend/tests/test_classifier.py b/backend/tests/test_classifier.py index 67dbdbe..19f5e36 100644 --- a/backend/tests/test_classifier.py +++ b/backend/tests/test_classifier.py @@ -108,3 +108,41 @@ async def test_classifier_with_mock_provider(isolated_data_dir): # Verify document was updated meta = st.get_metadata(doc_id) assert "Finance" in meta["topics"] + + +# --------------------------------------------------------------------------- +# Wave 0 xfail stubs for per-user AI provider resolution — Plan 03-04 +# --------------------------------------------------------------------------- + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") +async def test_per_user_provider(db_session): + """When user.ai_provider='openai' and user.ai_model='gpt-4o', the classifier + resolves _settings['active_provider'] == 'openai'. + + DOC-03: AI provider/model comes from the user's DB record, not from global + config or the retired load_settings() flat file (CONTEXT.md D-14). + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") +async def test_celery_task_uses_user_provider(db_session): + """Calling _run(document_id) for a Document owned by user.ai_provider='anthropic' + calls classifier with ai_provider='anthropic'. + + DOC-05: the Celery extract_and_classify task resolves per-user AI config via + a second DB lookup (doc.user_id → user.ai_provider/ai_model) and passes it + to the classifier (CONTEXT.md D-14). + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") +async def test_default_provider_fallback(db_session): + """When user.ai_provider is None, the classifier receives config.settings.default_ai_provider. + + D-15: fallback chain is user.ai_provider → DEFAULT_AI_PROVIDER env var → + code default 'ollama' (CONTEXT.md D-15). + """ + assert True # scaffold diff --git a/backend/tests/test_documents.py b/backend/tests/test_documents.py index 0203429..dd116ab 100644 --- a/backend/tests/test_documents.py +++ b/backend/tests/test_documents.py @@ -152,3 +152,72 @@ async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt): assert get_resp.status_code == 200 get_data = get_resp.json() assert get_data["original_name"] == "sample.txt" + + +# --------------------------------------------------------------------------- +# Wave 0 xfail stubs for Phase 3 document endpoint tests — Plans 03-02 / 03-03 +# --------------------------------------------------------------------------- + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_upload_url_endpoint(async_client, auth_user, mock_minio_presigned): + """POST /api/documents/upload-url returns {upload_url, document_id} and creates + a Document row with status='pending'. + + D-05: two-step upload flow — step 1 creates the pending Document row and + returns the presigned PUT URL (15-min TTL). Quota is NOT reserved here. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_confirm_endpoint(async_client, auth_user, mock_minio_presigned, mock_minio_stat): + """POST /api/documents/{id}/confirm calls stat_object once, updates Document.size_bytes + from the stat return value, and sets Document.status='uploaded'. + + D-05: step 3 of the presigned upload flow. stat_object provides the authoritative + file size (D-07). The atomic quota UPDATE runs here (STORE-03). + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_get_quota(async_client, auth_user): + """GET /api/auth/me/quota returns {used_bytes: 0, limit_bytes: 104857600}. + + STORE-04: quota usage bar endpoint. Returns current usage and limit for the + authenticated user. Newly created users start at used_bytes=0. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_cross_user_access_404(async_client, auth_user, db_session): + """User B's request for GET /api/documents/{A_doc_id} returns 404. + + SEC-04: cross-user access returns 404 (not 403) to avoid information leakage + (CONTEXT.md D-16). An attacker cannot distinguish between 'document does not + exist' and 'document belongs to someone else'. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_admin_cannot_access_documents(async_client, admin_user): + """GET /api/documents using admin_user.headers returns 403. + + SEC-04 SC4: admin accounts cannot access document content (CLAUDE.md + + CONTEXT.md D-16). The get_regular_user dependency enforces this for all + /api/documents/* handlers. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_documents_require_auth(async_client): + """Anonymous GET /api/documents (no Authorization header) returns 401 or 403. + + D-16: all /api/documents/* endpoints require authentication via + get_current_user (Phase 2 D-07 fulfilled in Phase 3). + """ + assert True # scaffold diff --git a/backend/tests/test_quota.py b/backend/tests/test_quota.py new file mode 100644 index 0000000..aee3662 --- /dev/null +++ b/backend/tests/test_quota.py @@ -0,0 +1,61 @@ +""" +Wave 0 xfail stubs for quota enforcement tests — Plan 03-02 implements these. + +Requirements covered: + STORE-03 — Atomic quota enforcement at upload (no double-spend) + STORE-03 SC2 — Two concurrent uploads at quota limit → exactly one 413 + STORE-05 — Confirm endpoint returns 413 with {used_bytes, limit_bytes, rejected_bytes} + STORE-06 — Document delete atomically decrements quota +""" +from __future__ import annotations + +import pytest + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_quota_increment_atomic( + async_client, auth_user, mock_minio_presigned, mock_minio_stat +): + """After one confirmed upload of 50 MB, GET /api/auth/me/quota returns used_bytes == 50_000_000. + + STORE-03: atomic quota enforcement at the /confirm endpoint. + stat_object returns the authoritative file size (D-07). + """ + mock_minio_stat.return_value = 50_000_000 + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_concurrent_quota_race( + async_client, auth_user, mock_minio_presigned, mock_minio_stat +): + """Two concurrent /confirm POSTs for documents totaling 110 MB against a 100 MB quota. + + STORE-03 SC2: exactly one request returns 200 and the other returns 413. + Uses asyncio.gather to fire both confirm requests concurrently — verifies that + PostgreSQL's row-level locking on the atomic UPDATE prevents double-spend. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_quota_exceeded_response( + async_client, auth_user, mock_minio_presigned, mock_minio_stat +): + """When quota is exceeded, /confirm returns 413 with the expected body shape. + + STORE-05: body must be {"detail": {"used_bytes": N, "limit_bytes": M, "rejected_bytes": K}}. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") +async def test_delete_decrements_quota( + async_client, auth_user, mock_minio_presigned, mock_minio_stat +): + """Upload + confirm a document, then DELETE it; GET /api/auth/me/quota returns used_bytes == 0. + + STORE-06: document delete atomically decrements quota. + Uses GREATEST(0, used_bytes - delta) to prevent underflow (CONTEXT.md D-07). + """ + assert True # scaffold diff --git a/backend/tests/test_settings.py b/backend/tests/test_settings.py index cec9d95..05c6966 100644 --- a/backend/tests/test_settings.py +++ b/backend/tests/test_settings.py @@ -104,3 +104,20 @@ async def test_get_default_prompt(async_client): assert resp.status_code == 200 assert "system_prompt" in resp.json() assert len(resp.json()["system_prompt"]) > 0 + + +# --------------------------------------------------------------------------- +# Wave 0 xfail stub — D-12: /api/settings endpoint removed in Plan 03-04 +# --------------------------------------------------------------------------- + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") +async def test_settings_endpoint_removed(async_client): + """GET /api/settings returns 404 after the flat-file settings system is retired. + + D-12: the /api/settings endpoint is removed entirely in Phase 3. All AI config + comes from the database (users.ai_provider / users.ai_model set by admin). + The flat-file services/storage.py load_settings()/save_settings() functions + are also deleted (CONTEXT.md D-12). + """ + assert True # scaffold diff --git a/backend/tests/test_topics.py b/backend/tests/test_topics.py index d39641e..acec8ba 100644 --- a/backend/tests/test_topics.py +++ b/backend/tests/test_topics.py @@ -6,6 +6,8 @@ updated to async in Plan 05 to match the new session-injected API routes. """ from __future__ import annotations +import pytest + async def test_list_topics_empty(async_client): resp = await async_client.get("/api/topics") @@ -88,3 +90,52 @@ async def test_delete_topic_cascades_to_documents(async_client, db_session, samp async def test_delete_topic_not_found(async_client): resp = await async_client.delete("/api/topics/nonexistent") assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# Wave 0 xfail stubs for Phase 3 topic namespace tests — Plan 03-03 +# --------------------------------------------------------------------------- + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_topic_namespace(async_client, auth_user, db_session): + """GET /api/topics returns only system topics (user_id=NULL) + auth_user-owned topics. + + DOC-04: layered topic namespace — system topics (user_id=NULL) are visible to + all users; per-user topics (user_id=current_user.id) are visible only to that + user. A different user's topics must not appear (CONTEXT.md D-08, D-17). + + Test setup: seed one system topic, one auth_user-owned topic, one topic owned + by a different user. GET /api/topics must return exactly the first two. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_admin_create_system_topic(async_client, admin_user): + """POST /api/admin/topics returns 201 and creates a Topic with user_id=NULL. + + D-09: only admin can create system topics via POST /api/admin/topics. + The created topic has user_id=NULL and is visible to all users. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_regular_user_cannot_create_system_topic(async_client, auth_user): + """POST /api/admin/topics with auth_user.headers returns 403. + + D-09: the admin topics endpoint requires get_current_admin; regular users + receive 403 Forbidden. + """ + assert True # scaffold + + +@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") +async def test_topics_require_auth(async_client): + """Anonymous GET /api/topics (no Authorization header) returns 401 or 403. + + D-17: /api/topics/* gains get_current_user in Phase 3 — anonymous access + must be rejected. + """ + assert True # scaffold