Files
kite/backend/tests/test_documents.py
T
curo1305 21ec9cb4c3 test(03-01): add Wave 0 xfail stubs and shared fixtures for Phase 3
- Add auth_user, admin_user, mock_minio_presigned, mock_minio_stat fixtures to conftest.py
- Create test_quota.py with 4 xfail stubs (STORE-03, STORE-05, STORE-06, SC2 race)
- Append test_migration_0003 to test_alembic.py (full pre-seed + post-migration assertions)
- Append 3 classifier xfail stubs (DOC-03, DOC-05, D-15)
- Append 6 document xfail stubs (D-05, STORE-04, SEC-04, D-16)
- Append 4 topic xfail stubs (DOC-04, D-09, D-17)
- Append test_settings_endpoint_removed stub (D-12)
- All 19 new test IDs collect cleanly with xfail(strict=False)
2026-05-23 13:42:37 +02:00

224 lines
7.9 KiB
Python

"""
Document API tests — async only (Plan 05 cutover).
Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05.
All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite).
"""
from __future__ import annotations
import re
import pytest
async def test_upload_txt_no_classify(async_client, sample_txt):
with open(sample_txt, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
assert data["original_name"] == "sample.txt"
assert "extracted_text" in data
assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0
assert data["topics"] == []
assert "id" in data
async def test_upload_pdf_no_classify(async_client, sample_pdf):
with open(sample_pdf, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.pdf", f, "application/pdf")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
assert data["mime_type"] == "application/pdf"
assert len(data["extracted_text"]) > 0
async def test_list_documents(async_client, sample_txt):
with open(sample_txt, "rb") as f:
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
resp = await async_client.get("/api/documents")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 1
assert len(data["items"]) == 1
async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
# Wire a topic via the storage service directly (replaces old flat-file call)
from services import storage
await storage.update_document_topics(db_session, upload["id"], ["finance"])
resp = await async_client.get("/api/documents?topic=finance")
assert resp.json()["total"] == 1
resp2 = await async_client.get("/api/documents?topic=legal")
assert resp2.json()["total"] == 0
async def test_get_document(async_client, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
resp = await async_client.get(f"/api/documents/{upload['id']}")
assert resp.status_code == 200
assert resp.json()["id"] == upload["id"]
async def test_get_document_not_found(async_client):
resp = await async_client.get("/api/documents/nonexistent")
assert resp.status_code == 404
async def test_delete_document(async_client, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
resp = await async_client.delete(f"/api/documents/{upload['id']}")
assert resp.status_code == 200
assert resp.json()["success"] is True
resp2 = await async_client.get(f"/api/documents/{upload['id']}")
assert resp2.status_code == 404
async def test_delete_document_not_found(async_client):
resp = await async_client.delete("/api/documents/nonexistent")
assert resp.status_code == 404
async def test_upload_empty_file(async_client):
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("empty.txt", b"", "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 400
async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt):
"""After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02)."""
with open(sample_txt, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
# Response must include a UUID-format id
uuid_pattern = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
)
assert "id" in data, "Upload response missing 'id'"
assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID"
# Metadata round-trips via GET
doc_id = data["id"]
get_resp = await async_client.get(f"/api/documents/{doc_id}")
assert get_resp.status_code == 200
get_data = get_resp.json()
assert get_data["original_name"] == "sample.txt"
# ---------------------------------------------------------------------------
# Wave 0 xfail stubs for Phase 3 document endpoint tests — Plans 03-02 / 03-03
# ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
async def test_upload_url_endpoint(async_client, auth_user, mock_minio_presigned):
"""POST /api/documents/upload-url returns {upload_url, document_id} and creates
a Document row with status='pending'.
D-05: two-step upload flow — step 1 creates the pending Document row and
returns the presigned PUT URL (15-min TTL). Quota is NOT reserved here.
"""
assert True # scaffold
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
async def test_confirm_endpoint(async_client, auth_user, mock_minio_presigned, mock_minio_stat):
"""POST /api/documents/{id}/confirm calls stat_object once, updates Document.size_bytes
from the stat return value, and sets Document.status='uploaded'.
D-05: step 3 of the presigned upload flow. stat_object provides the authoritative
file size (D-07). The atomic quota UPDATE runs here (STORE-03).
"""
assert True # scaffold
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
async def test_get_quota(async_client, auth_user):
"""GET /api/auth/me/quota returns {used_bytes: 0, limit_bytes: 104857600}.
STORE-04: quota usage bar endpoint. Returns current usage and limit for the
authenticated user. Newly created users start at used_bytes=0.
"""
assert True # scaffold
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03")
async def test_cross_user_access_404(async_client, auth_user, db_session):
"""User B's request for GET /api/documents/{A_doc_id} returns 404.
SEC-04: cross-user access returns 404 (not 403) to avoid information leakage
(CONTEXT.md D-16). An attacker cannot distinguish between 'document does not
exist' and 'document belongs to someone else'.
"""
assert True # scaffold
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-03")
async def test_admin_cannot_access_documents(async_client, admin_user):
"""GET /api/documents using admin_user.headers returns 403.
SEC-04 SC4: admin accounts cannot access document content (CLAUDE.md +
CONTEXT.md D-16). The get_regular_user dependency enforces this for all
/api/documents/* handlers.
"""
assert True # scaffold
@pytest.mark.xfail(strict=False, reason="implemented in plan 03-02")
async def test_documents_require_auth(async_client):
"""Anonymous GET /api/documents (no Authorization header) returns 401 or 403.
D-16: all /api/documents/* endpoints require authentication via
get_current_user (Phase 2 D-07 fulfilled in Phase 3).
"""
assert True # scaffold