Files
kite/backend/tests/test_documents.py
T
curo1305 970c8e4e44 feat(01-05): final cutover — delete data/, prune config.py, async-only tests
- Delete backend/data/ tracked files (D-04): flat-file metadata, settings.json,
  topics.json, and uploaded files removed from git; backend/data/ added to
  .gitignore (empty dir remains on macOS due to ACL — no tracked files remain)
- Prune backend/config.py: remove DATA_DIR, UPLOADS_DIR, METADATA_DIR,
  TOPICS_FILE, ensure_data_dirs(); rebase SETTINGS_FILE as derived path from
  settings.data_dir (Phase 1 flat-file settings kept per plan decision)
- Prune backend/tests/conftest.py: remove isolated_data_dir autouse fixture
  and sync TestClient client fixture; add SQLite type compatibility shim
  (visit_INET/JSONB) so in-memory db_session can create tables with
  PostgreSQL-specific column types; add live_services_available fixture
- Rewrite backend/tests/test_documents.py: delete all legacy sync tests,
  remove all @pytest.mark.xfail markers; async-only document tests now
  use async_client + storage service directly for topic wiring
- Rewrite backend/tests/test_health.py: delete legacy sync test_health(client);
  remove @pytest.mark.xfail from test_health_checks_postgres_and_minio
- Port backend/tests/test_topics.py to async_client (sync client removed)
- Port backend/tests/test_settings.py to async_client with monkeypatch for
  SETTINGS_FILE isolation (settings remain flat-file in Phase 1)
2026-05-22 09:53:39 +02:00

155 lines
5.1 KiB
Python

"""
Document API tests — async only (Plan 05 cutover).
Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05.
All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite).
"""
from __future__ import annotations
import re
import pytest
async def test_upload_txt_no_classify(async_client, sample_txt):
with open(sample_txt, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
assert data["original_name"] == "sample.txt"
assert "extracted_text" in data
assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0
assert data["topics"] == []
assert "id" in data
async def test_upload_pdf_no_classify(async_client, sample_pdf):
with open(sample_pdf, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.pdf", f, "application/pdf")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
assert data["mime_type"] == "application/pdf"
assert len(data["extracted_text"]) > 0
async def test_list_documents(async_client, sample_txt):
with open(sample_txt, "rb") as f:
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
resp = await async_client.get("/api/documents")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 1
assert len(data["items"]) == 1
async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
# Wire a topic via the storage service directly (replaces old flat-file call)
from services import storage
await storage.update_document_topics(db_session, upload["id"], ["finance"])
resp = await async_client.get("/api/documents?topic=finance")
assert resp.json()["total"] == 1
resp2 = await async_client.get("/api/documents?topic=legal")
assert resp2.json()["total"] == 0
async def test_get_document(async_client, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
resp = await async_client.get(f"/api/documents/{upload['id']}")
assert resp.status_code == 200
assert resp.json()["id"] == upload["id"]
async def test_get_document_not_found(async_client):
resp = await async_client.get("/api/documents/nonexistent")
assert resp.status_code == 404
async def test_delete_document(async_client, sample_txt):
with open(sample_txt, "rb") as f:
upload = (
await async_client.post(
"/api/documents/upload",
files={"file": ("a.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
).json()
resp = await async_client.delete(f"/api/documents/{upload['id']}")
assert resp.status_code == 200
assert resp.json()["success"] is True
resp2 = await async_client.get(f"/api/documents/{upload['id']}")
assert resp2.status_code == 404
async def test_delete_document_not_found(async_client):
resp = await async_client.delete("/api/documents/nonexistent")
assert resp.status_code == 404
async def test_upload_empty_file(async_client):
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("empty.txt", b"", "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 400
async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt):
"""After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02)."""
with open(sample_txt, "rb") as f:
resp = await async_client.post(
"/api/documents/upload",
files={"file": ("sample.txt", f, "text/plain")},
data={"auto_classify": "false"},
)
assert resp.status_code == 200
data = resp.json()
# Response must include a UUID-format id
uuid_pattern = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
)
assert "id" in data, "Upload response missing 'id'"
assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID"
# Metadata round-trips via GET
doc_id = data["id"]
get_resp = await async_client.get(f"/api/documents/{doc_id}")
assert get_resp.status_code == 200
get_data = get_resp.json()
assert get_data["original_name"] == "sample.txt"