21ec9cb4c3
- Add auth_user, admin_user, mock_minio_presigned, mock_minio_stat fixtures to conftest.py - Create test_quota.py with 4 xfail stubs (STORE-03, STORE-05, STORE-06, SC2 race) - Append test_migration_0003 to test_alembic.py (full pre-seed + post-migration assertions) - Append 3 classifier xfail stubs (DOC-03, DOC-05, D-15) - Append 6 document xfail stubs (D-05, STORE-04, SEC-04, D-16) - Append 4 topic xfail stubs (DOC-04, D-09, D-17) - Append test_settings_endpoint_removed stub (D-12) - All 19 new test IDs collect cleanly with xfail(strict=False)
224 lines
7.9 KiB
Python
224 lines
7.9 KiB
Python
"""
|
|
Document API tests — async only (Plan 05 cutover).
|
|
|
|
Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05.
|
|
All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
import pytest
|
|
|
|
|
|
async def test_upload_txt_no_classify(async_client, sample_txt):
|
|
with open(sample_txt, "rb") as f:
|
|
resp = await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("sample.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["original_name"] == "sample.txt"
|
|
assert "extracted_text" in data
|
|
assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0
|
|
assert data["topics"] == []
|
|
assert "id" in data
|
|
|
|
|
|
async def test_upload_pdf_no_classify(async_client, sample_pdf):
|
|
with open(sample_pdf, "rb") as f:
|
|
resp = await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("sample.pdf", f, "application/pdf")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["mime_type"] == "application/pdf"
|
|
assert len(data["extracted_text"]) > 0
|
|
|
|
|
|
async def test_list_documents(async_client, sample_txt):
|
|
with open(sample_txt, "rb") as f:
|
|
await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("a.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
resp = await async_client.get("/api/documents")
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["total"] == 1
|
|
assert len(data["items"]) == 1
|
|
|
|
|
|
async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt):
|
|
with open(sample_txt, "rb") as f:
|
|
upload = (
|
|
await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("a.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
).json()
|
|
|
|
# Wire a topic via the storage service directly (replaces old flat-file call)
|
|
from services import storage
|
|
|
|
await storage.update_document_topics(db_session, upload["id"], ["finance"])
|
|
|
|
resp = await async_client.get("/api/documents?topic=finance")
|
|
assert resp.json()["total"] == 1
|
|
|
|
resp2 = await async_client.get("/api/documents?topic=legal")
|
|
assert resp2.json()["total"] == 0
|
|
|
|
|
|
async def test_get_document(async_client, sample_txt):
|
|
with open(sample_txt, "rb") as f:
|
|
upload = (
|
|
await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("a.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
).json()
|
|
|
|
resp = await async_client.get(f"/api/documents/{upload['id']}")
|
|
assert resp.status_code == 200
|
|
assert resp.json()["id"] == upload["id"]
|
|
|
|
|
|
async def test_get_document_not_found(async_client):
|
|
resp = await async_client.get("/api/documents/nonexistent")
|
|
assert resp.status_code == 404
|
|
|
|
|
|
async def test_delete_document(async_client, sample_txt):
|
|
with open(sample_txt, "rb") as f:
|
|
upload = (
|
|
await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("a.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
).json()
|
|
|
|
resp = await async_client.delete(f"/api/documents/{upload['id']}")
|
|
assert resp.status_code == 200
|
|
assert resp.json()["success"] is True
|
|
|
|
resp2 = await async_client.get(f"/api/documents/{upload['id']}")
|
|
assert resp2.status_code == 404
|
|
|
|
|
|
async def test_delete_document_not_found(async_client):
|
|
resp = await async_client.delete("/api/documents/nonexistent")
|
|
assert resp.status_code == 404
|
|
|
|
|
|
async def test_upload_empty_file(async_client):
|
|
resp = await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("empty.txt", b"", "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
assert resp.status_code == 400
|
|
|
|
|
|
async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt):
|
|
"""After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02)."""
|
|
with open(sample_txt, "rb") as f:
|
|
resp = await async_client.post(
|
|
"/api/documents/upload",
|
|
files={"file": ("sample.txt", f, "text/plain")},
|
|
data={"auto_classify": "false"},
|
|
)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
|
|
# Response must include a UUID-format id
|
|
uuid_pattern = re.compile(
|
|
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
|
|
)
|
|
assert "id" in data, "Upload response missing 'id'"
|
|
assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID"
|
|
|
|
# Metadata round-trips via GET
|
|
doc_id = data["id"]
|
|
get_resp = await async_client.get(f"/api/documents/{doc_id}")
|
|
assert get_resp.status_code == 200
|
|
get_data = get_resp.json()
|
|
assert get_data["original_name"] == "sample.txt"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Wave 0 xfail stubs for Phase 3 document endpoint tests — Plans 03-02 / 03-03
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
|
|
async def test_upload_url_endpoint(async_client, auth_user, mock_minio_presigned):
|
|
"""POST /api/documents/upload-url returns {upload_url, document_id} and creates
|
|
a Document row with status='pending'.
|
|
|
|
D-05: two-step upload flow — step 1 creates the pending Document row and
|
|
returns the presigned PUT URL (15-min TTL). Quota is NOT reserved here.
|
|
"""
|
|
assert True # scaffold
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
|
|
async def test_confirm_endpoint(async_client, auth_user, mock_minio_presigned, mock_minio_stat):
|
|
"""POST /api/documents/{id}/confirm calls stat_object once, updates Document.size_bytes
|
|
from the stat return value, and sets Document.status='uploaded'.
|
|
|
|
D-05: step 3 of the presigned upload flow. stat_object provides the authoritative
|
|
file size (D-07). The atomic quota UPDATE runs here (STORE-03).
|
|
"""
|
|
assert True # scaffold
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
|
|
async def test_get_quota(async_client, auth_user):
|
|
"""GET /api/auth/me/quota returns {used_bytes: 0, limit_bytes: 104857600}.
|
|
|
|
STORE-04: quota usage bar endpoint. Returns current usage and limit for the
|
|
authenticated user. Newly created users start at used_bytes=0.
|
|
"""
|
|
assert True # scaffold
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03")
|
|
async def test_cross_user_access_404(async_client, auth_user, db_session):
|
|
"""User B's request for GET /api/documents/{A_doc_id} returns 404.
|
|
|
|
SEC-04: cross-user access returns 404 (not 403) to avoid information leakage
|
|
(CONTEXT.md D-16). An attacker cannot distinguish between 'document does not
|
|
exist' and 'document belongs to someone else'.
|
|
"""
|
|
assert True # scaffold
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03")
|
|
async def test_admin_cannot_access_documents(async_client, admin_user):
|
|
"""GET /api/documents using admin_user.headers returns 403.
|
|
|
|
SEC-04 SC4: admin accounts cannot access document content (CLAUDE.md +
|
|
CONTEXT.md D-16). The get_regular_user dependency enforces this for all
|
|
/api/documents/* handlers.
|
|
"""
|
|
assert True # scaffold
|
|
|
|
|
|
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
|
|
async def test_documents_require_auth(async_client):
|
|
"""Anonymous GET /api/documents (no Authorization header) returns 401 or 403.
|
|
|
|
D-16: all /api/documents/* endpoints require authentication via
|
|
get_current_user (Phase 2 D-07 fulfilled in Phase 3).
|
|
"""
|
|
assert True # scaffold
|