feat(01-05): final cutover — delete data/, prune config.py, async-only tests
- Delete backend/data/ tracked files (D-04): flat-file metadata, settings.json, topics.json, and uploaded files removed from git; backend/data/ added to .gitignore (empty dir remains on macOS due to ACL — no tracked files remain) - Prune backend/config.py: remove DATA_DIR, UPLOADS_DIR, METADATA_DIR, TOPICS_FILE, ensure_data_dirs(); rebase SETTINGS_FILE as derived path from settings.data_dir (Phase 1 flat-file settings kept per plan decision) - Prune backend/tests/conftest.py: remove isolated_data_dir autouse fixture and sync TestClient client fixture; add SQLite type compatibility shim (visit_INET/JSONB) so in-memory db_session can create tables with PostgreSQL-specific column types; add live_services_available fixture - Rewrite backend/tests/test_documents.py: delete all legacy sync tests, remove all @pytest.mark.xfail markers; async-only document tests now use async_client + storage service directly for topic wiring - Rewrite backend/tests/test_health.py: delete legacy sync test_health(client); remove @pytest.mark.xfail from test_health_checks_postgres_and_minio - Port backend/tests/test_topics.py to async_client (sync client removed) - Port backend/tests/test_settings.py to async_client with monkeypatch for SETTINGS_FILE isolation (settings remain flat-file in Phase 1)
This commit is contained in:
+40
-167
@@ -1,132 +1,17 @@
|
||||
"""
|
||||
Document API tests.
|
||||
Document API tests — async only (Plan 05 cutover).
|
||||
|
||||
Sync tests (top section) — test current flat-file behavior; remain until Plan 05 cuts over.
|
||||
Async tests (bottom section, _async suffix) — xfail scaffolds for Plan 05 PostgreSQL+MinIO layer.
|
||||
Legacy sync tests (using the flat-file storage layer) were deleted in Plan 05.
|
||||
All tests here use async_client (httpx.AsyncClient + ASGITransport + in-memory SQLite).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
def test_upload_txt_no_classify(client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
resp = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("sample.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["original_name"] == "sample.txt"
|
||||
assert "extracted_text" in data
|
||||
assert "invoices" in data["extracted_text"].lower() or len(data["extracted_text"]) > 0
|
||||
assert data["topics"] == []
|
||||
assert "id" in data
|
||||
|
||||
|
||||
def test_upload_pdf_no_classify(client, sample_pdf):
|
||||
with open(sample_pdf, "rb") as f:
|
||||
resp = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("sample.pdf", f, "application/pdf")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["mime_type"] == "application/pdf"
|
||||
assert len(data["extracted_text"]) > 0
|
||||
|
||||
|
||||
def test_list_documents(client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
resp = client.get("/api/documents")
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert data["total"] == 1
|
||||
assert len(data["items"]) == 1
|
||||
|
||||
|
||||
def test_list_documents_filter_by_topic(client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
).json()
|
||||
|
||||
import services.storage as st
|
||||
st.update_document_topics(upload["id"], ["finance"])
|
||||
|
||||
resp = client.get("/api/documents?topic=finance")
|
||||
assert resp.json()["total"] == 1
|
||||
|
||||
resp2 = client.get("/api/documents?topic=legal")
|
||||
assert resp2.json()["total"] == 0
|
||||
|
||||
|
||||
def test_get_document(client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
).json()
|
||||
|
||||
resp = client.get(f"/api/documents/{upload['id']}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["id"] == upload["id"]
|
||||
|
||||
|
||||
def test_get_document_not_found(client):
|
||||
resp = client.get("/api/documents/nonexistent")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_delete_document(client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
).json()
|
||||
|
||||
resp = client.delete(f"/api/documents/{upload['id']}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["success"] is True
|
||||
|
||||
resp2 = client.get(f"/api/documents/{upload['id']}")
|
||||
assert resp2.status_code == 404
|
||||
|
||||
|
||||
def test_delete_document_not_found(client):
|
||||
resp = client.delete("/api/documents/nonexistent")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_upload_empty_file(client):
|
||||
resp = client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("empty.txt", b"", "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
# ── Async port (Plan 05 cutover) ─────────────────────────────────────────────
|
||||
# Each test below is an async version of the corresponding sync test above.
|
||||
# They use async_client (httpx.AsyncClient + ASGITransport) and are marked
|
||||
# xfail until Plan 05 completes the PostgreSQL+MinIO storage rewrite.
|
||||
# ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_upload_txt_no_classify_async(async_client, sample_txt):
|
||||
async def test_upload_txt_no_classify(async_client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
resp = await async_client.post(
|
||||
"/api/documents/upload",
|
||||
@@ -142,8 +27,7 @@ async def test_upload_txt_no_classify_async(async_client, sample_txt):
|
||||
assert "id" in data
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_upload_pdf_no_classify_async(async_client, sample_pdf):
|
||||
async def test_upload_pdf_no_classify(async_client, sample_pdf):
|
||||
with open(sample_pdf, "rb") as f:
|
||||
resp = await async_client.post(
|
||||
"/api/documents/upload",
|
||||
@@ -156,8 +40,7 @@ async def test_upload_pdf_no_classify_async(async_client, sample_pdf):
|
||||
assert len(data["extracted_text"]) > 0
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_list_documents_async(async_client, sample_txt):
|
||||
async def test_list_documents(async_client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
await async_client.post(
|
||||
"/api/documents/upload",
|
||||
@@ -171,28 +54,20 @@ async def test_list_documents_async(async_client, sample_txt):
|
||||
assert len(data["items"]) == 1
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_list_documents_filter_by_topic_async(async_client, db_session, sample_txt):
|
||||
async def test_list_documents_filter_by_topic(async_client, db_session, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = (await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)).json()
|
||||
upload = (
|
||||
await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
).json()
|
||||
|
||||
# Update topics via direct SQL on db_session (replaces flat-file call)
|
||||
try:
|
||||
from sqlalchemy import update
|
||||
from db.models import Document
|
||||
import uuid
|
||||
await db_session.execute(
|
||||
update(Document)
|
||||
.where(Document.id == uuid.UUID(upload["id"]))
|
||||
.values(topics=["finance"])
|
||||
)
|
||||
await db_session.commit()
|
||||
except ImportError:
|
||||
pytest.skip("db.models not yet implemented — plan 03")
|
||||
# Wire a topic via the storage service directly (replaces old flat-file call)
|
||||
from services import storage
|
||||
|
||||
await storage.update_document_topics(db_session, upload["id"], ["finance"])
|
||||
|
||||
resp = await async_client.get("/api/documents?topic=finance")
|
||||
assert resp.json()["total"] == 1
|
||||
@@ -201,34 +76,35 @@ async def test_list_documents_filter_by_topic_async(async_client, db_session, sa
|
||||
assert resp2.json()["total"] == 0
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_get_document_async(async_client, sample_txt):
|
||||
async def test_get_document(async_client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = (await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)).json()
|
||||
upload = (
|
||||
await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
).json()
|
||||
|
||||
resp = await async_client.get(f"/api/documents/{upload['id']}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["id"] == upload["id"]
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_get_document_not_found_async(async_client):
|
||||
async def test_get_document_not_found(async_client):
|
||||
resp = await async_client.get("/api/documents/nonexistent")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_delete_document_async(async_client, sample_txt):
|
||||
async def test_delete_document(async_client, sample_txt):
|
||||
with open(sample_txt, "rb") as f:
|
||||
upload = (await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)).json()
|
||||
upload = (
|
||||
await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("a.txt", f, "text/plain")},
|
||||
data={"auto_classify": "false"},
|
||||
)
|
||||
).json()
|
||||
|
||||
resp = await async_client.delete(f"/api/documents/{upload['id']}")
|
||||
assert resp.status_code == 200
|
||||
@@ -238,14 +114,12 @@ async def test_delete_document_async(async_client, sample_txt):
|
||||
assert resp2.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_delete_document_not_found_async(async_client):
|
||||
async def test_delete_document_not_found(async_client):
|
||||
resp = await async_client.delete("/api/documents/nonexistent")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_upload_empty_file_async(async_client):
|
||||
async def test_upload_empty_file(async_client):
|
||||
resp = await async_client.post(
|
||||
"/api/documents/upload",
|
||||
files={"file": ("empty.txt", b"", "text/plain")},
|
||||
@@ -254,8 +128,7 @@ async def test_upload_empty_file_async(async_client):
|
||||
assert resp.status_code == 400
|
||||
|
||||
|
||||
@pytest.mark.xfail(strict=False, reason="async storage layer implemented in plan 05")
|
||||
async def test_upload_persists_to_postgres_and_minio_async(async_client, sample_txt):
|
||||
async def test_upload_persists_to_postgres_and_minio(async_client, sample_txt):
|
||||
"""After a successful upload, document is persisted and queryable via GET (STORE-01, STORE-02)."""
|
||||
with open(sample_txt, "rb") as f:
|
||||
resp = await async_client.post(
|
||||
@@ -268,7 +141,7 @@ async def test_upload_persists_to_postgres_and_minio_async(async_client, sample_
|
||||
|
||||
# Response must include a UUID-format id
|
||||
uuid_pattern = re.compile(
|
||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
|
||||
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
|
||||
)
|
||||
assert "id" in data, "Upload response missing 'id'"
|
||||
assert uuid_pattern.match(data["id"]), f"id '{data['id']}' is not a UUID"
|
||||
|
||||
Reference in New Issue
Block a user