""" Document API tests — async only (Plan 05 cutover). Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05. All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite). """ from __future__ import annotations import re import pytest async def test_upload_txt_no_classify(async_client, sample_txt): with open(sample_txt, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() assert data["original_name"] == "sample.txt" assert "extracted_text" in data assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0 assert data["topics"] == [] assert "id" in data async def test_upload_pdf_no_classify(async_client, sample_pdf): with open(sample_pdf, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.pdf", f, "application/pdf")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() assert data["mime_type"] == "application/pdf" assert len(data["extracted_text"]) > 0 async def test_list_documents(async_client, sample_txt): with open(sample_txt, "rb") as f: await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) resp = await async_client.get("/api/documents") assert resp.status_code == 200 data = resp.json() assert data["total"] == 1 assert len(data["items"]) == 1 async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() # Wire a topic via the storage service directly (replaces old flat-file call) from services import storage await storage.update_document_topics(db_session, upload["id"], ["finance"]) resp = await async_client.get("/api/documents?topic=finance") assert resp.json()["total"] == 1 resp2 = await async_client.get("/api/documents?topic=legal") assert resp2.json()["total"] == 0 async def test_get_document(async_client, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() resp = await async_client.get(f"/api/documents/{upload['id']}") assert resp.status_code == 200 assert resp.json()["id"] == upload["id"] async def test_get_document_not_found(async_client): resp = await async_client.get("/api/documents/nonexistent") assert resp.status_code == 404 async def test_delete_document(async_client, sample_txt): with open(sample_txt, "rb") as f: upload = ( await async_client.post( "/api/documents/upload", files={"file": ("a.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) ).json() resp = await async_client.delete(f"/api/documents/{upload['id']}") assert resp.status_code == 200 assert resp.json()["success"] is True resp2 = await async_client.get(f"/api/documents/{upload['id']}") assert resp2.status_code == 404 async def test_delete_document_not_found(async_client): resp = await async_client.delete("/api/documents/nonexistent") assert resp.status_code == 404 async def test_upload_empty_file(async_client): resp = await async_client.post( "/api/documents/upload", files={"file": ("empty.txt", b"", "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 400 async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt): """After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02).""" with open(sample_txt, "rb") as f: resp = await async_client.post( "/api/documents/upload", files={"file": ("sample.txt", f, "text/plain")}, data={"auto_classify": "false"}, ) assert resp.status_code == 200 data = resp.json() # Response must include a UUID-format id uuid_pattern = re.compile( r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$" ) assert "id" in data, "Upload response missing 'id'" assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID" # Metadata round-trips via GET doc_id = data["id"] get_resp = await async_client.get(f"/api/documents/{doc_id}") assert get_resp.status_code == 200 get_data = get_resp.json() assert get_data["original_name"] == "sample.txt" # --------------------------------------------------------------------------- # Wave 0 xfail stubs for Phase 3 document endpoint tests — Plans 03-02 / 03-03 # --------------------------------------------------------------------------- @pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") async def test_upload_url_endpoint(async_client, auth_user, mock_minio_presigned): """POST /api/documents/upload-url returns {upload_url, document_id} and creates a Document row with status='pending'. D-05: two-step upload flow — step 1 creates the pending Document row and returns the presigned PUT URL (15-min TTL). Quota is NOT reserved here. """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") async def test_confirm_endpoint(async_client, auth_user, mock_minio_presigned, mock_minio_stat): """POST /api/documents/{id}/confirm calls stat_object once, updates Document.size_bytes from the stat return value, and sets Document.status='uploaded'. D-05: step 3 of the presigned upload flow. stat_object provides the authoritative file size (D-07). The atomic quota UPDATE runs here (STORE-03). """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") async def test_get_quota(async_client, auth_user): """GET /api/auth/me/quota returns {used_bytes: 0, limit_bytes: 104857600}. STORE-04: quota usage bar endpoint. Returns current usage and limit for the authenticated user. Newly created users start at used_bytes=0. """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") async def test_cross_user_access_404(async_client, auth_user, db_session): """User B's request for GET /api/documents/{A_doc_id} returns 404. SEC-04: cross-user access returns 404 (not 403) to avoid information leakage (CONTEXT.md D-16). An attacker cannot distinguish between 'document does not exist' and 'document belongs to someone else'. """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-03") async def test_admin_cannot_access_documents(async_client, admin_user): """GET /api/documents using admin_user.headers returns 403. SEC-04 SC4: admin accounts cannot access document content (CLAUDE.md + CONTEXT.md D-16). The get_regular_user dependency enforces this for all /api/documents/* handlers. """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-02") async def test_documents_require_auth(async_client): """Anonymous GET /api/documents (no Authorization header) returns 401 or 403. D-16: all /api/documents/* endpoints require authentication via get_current_user (Phase 2 D-07 fulfilled in Phase 3). """ assert True # scaffold