Add doc-service tests, AI category suggestions, LM Studio default
- pytest suite for doc-service: 20+ tests covering category CRUD, document upload/get/delete/patch, ownership isolation, category assignment, AI processing (mock), and live PDF tests (auto-skipped when tests/pdfs/ is empty) - Minimal in-memory PDF builder in conftest so tests run without any fixture files; real PDFs can be dropped into tests/pdfs/ to activate live extraction tests - AI prompt updated to return suggested_categories (2–5 short names) - Frontend: SuggestionChip component in DocumentRow shows AI-suggested categories after processing; "Assign" links to an existing category, "Create & Assign" creates it first, ✕ dismisses locally - Default AI provider changed to LM Studio at http://host.docker.internal:1234/v1 (host.docker.internal resolves to the macOS host from inside Docker Desktop) - tests/pdfs/ directory tracked via .gitkeep; *.pdf excluded by .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,244 @@
|
||||
"""
|
||||
Tests for the /documents endpoints.
|
||||
|
||||
Synthetic (minimal_pdf / invoice_pdf) tests run always.
|
||||
Live tests that use real PDFs from tests/pdfs/ are skipped when that
|
||||
directory is empty — drop any PDF there to activate them.
|
||||
"""
|
||||
import io
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
pytestmark = pytest.mark.asyncio
|
||||
|
||||
|
||||
# ── Helpers ────────────────────────────────────────────────────────────────────
|
||||
|
||||
def _pdf_upload(filename: str, data: bytes):
|
||||
return {"file": (filename, io.BytesIO(data), "application/pdf")}
|
||||
|
||||
|
||||
# ── List / empty state ─────────────────────────────────────────────────────────
|
||||
|
||||
async def test_list_documents_empty(client):
|
||||
r = await client.get("/documents")
|
||||
assert r.status_code == 200
|
||||
assert r.json() == []
|
||||
|
||||
|
||||
# ── Upload ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
async def test_upload_returns_202(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
r = await client.post("/documents/upload", files=_pdf_upload("test.pdf", minimal_pdf))
|
||||
assert r.status_code == 202
|
||||
data = r.json()
|
||||
assert data["filename"] == "test.pdf"
|
||||
assert data["status"] == "pending"
|
||||
assert "id" in data
|
||||
|
||||
|
||||
async def test_upload_non_pdf_rejected(client):
|
||||
r = await client.post(
|
||||
"/documents/upload",
|
||||
files={"file": ("note.txt", io.BytesIO(b"hello"), "text/plain")},
|
||||
)
|
||||
assert r.status_code == 415
|
||||
|
||||
|
||||
async def test_upload_file_too_large(client):
|
||||
big = b"%PDF-1.4\n" + b"x" * (21 * 1024 * 1024)
|
||||
with patch("app.routers.documents.process_document"):
|
||||
r = await client.post("/documents/upload", files=_pdf_upload("big.pdf", big))
|
||||
assert r.status_code == 413
|
||||
|
||||
|
||||
# ── Get / status ───────────────────────────────────────────────────────────────
|
||||
|
||||
async def test_get_document(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("get.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await client.get(f"/documents/{doc_id}")
|
||||
assert r.status_code == 200
|
||||
assert r.json()["id"] == doc_id
|
||||
|
||||
|
||||
async def test_get_document_not_found(client):
|
||||
r = await client.get("/documents/nonexistent-id")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
async def test_get_document_status(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("status.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await client.get(f"/documents/{doc_id}/status")
|
||||
assert r.status_code == 200
|
||||
assert r.json()["status"] == "pending"
|
||||
|
||||
|
||||
async def test_other_user_cannot_see_document(client, other_client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("private.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await other_client.get(f"/documents/{doc_id}")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
# ── Patch document type ────────────────────────────────────────────────────────
|
||||
|
||||
async def test_patch_document_type(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("patch.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await client.patch(f"/documents/{doc_id}/type", json={"document_type": "receipt"})
|
||||
assert r.status_code == 200
|
||||
assert r.json()["document_type"] == "receipt"
|
||||
|
||||
|
||||
# ── Delete ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
async def test_delete_document(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("del.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await client.delete(f"/documents/{doc_id}")
|
||||
assert r.status_code == 204
|
||||
|
||||
r2 = await client.get(f"/documents/{doc_id}")
|
||||
assert r2.status_code == 404
|
||||
|
||||
|
||||
async def test_delete_document_not_found(client):
|
||||
r = await client.delete("/documents/nonexistent-id")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
async def test_other_user_cannot_delete_document(client, other_client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("owned.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
r = await other_client.delete(f"/documents/{doc_id}")
|
||||
assert r.status_code == 404
|
||||
|
||||
# Original owner can still get it
|
||||
r2 = await client.get(f"/documents/{doc_id}")
|
||||
assert r2.status_code == 200
|
||||
|
||||
|
||||
# ── Category assignment ────────────────────────────────────────────────────────
|
||||
|
||||
async def test_assign_and_remove_category(client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("cat.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
cat = await client.post("/categories", json={"name": "Ops"})
|
||||
cat_id = cat.json()["id"]
|
||||
|
||||
r = await client.post(f"/documents/{doc_id}/categories/{cat_id}")
|
||||
assert r.status_code == 204
|
||||
|
||||
doc = await client.get(f"/documents/{doc_id}")
|
||||
cat_names = [c["name"] for c in doc.json()["categories"]]
|
||||
assert "Ops" in cat_names
|
||||
|
||||
r2 = await client.delete(f"/documents/{doc_id}/categories/{cat_id}")
|
||||
assert r2.status_code == 204
|
||||
|
||||
doc2 = await client.get(f"/documents/{doc_id}")
|
||||
assert doc2.json()["categories"] == []
|
||||
|
||||
|
||||
async def test_assign_category_idempotent(client, minimal_pdf):
|
||||
"""Assigning the same category twice should not error."""
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("idem.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
cat_id = (await client.post("/categories", json={"name": "Idem"})).json()["id"]
|
||||
|
||||
await client.post(f"/documents/{doc_id}/categories/{cat_id}")
|
||||
r = await client.post(f"/documents/{doc_id}/categories/{cat_id}")
|
||||
assert r.status_code == 204 # no error on duplicate
|
||||
|
||||
|
||||
async def test_cannot_assign_other_users_category(client, other_client, minimal_pdf):
|
||||
with patch("app.routers.documents.process_document"):
|
||||
up = await client.post("/documents/upload", files=_pdf_upload("x.pdf", minimal_pdf))
|
||||
doc_id = up.json()["id"]
|
||||
|
||||
# other_client creates a category
|
||||
other_cat = await other_client.post("/categories", json={"name": "Foreign"})
|
||||
other_cat_id = other_cat.json()["id"]
|
||||
|
||||
# original user tries to assign it
|
||||
r = await client.post(f"/documents/{doc_id}/categories/{other_cat_id}")
|
||||
assert r.status_code == 404
|
||||
|
||||
|
||||
# ── AI processing integration (with mock AI) ──────────────────────────────────
|
||||
|
||||
async def test_processing_sets_extracted_data(client, invoice_pdf, mock_ai):
|
||||
"""Upload + wait for background processing; verify extracted_data is populated."""
|
||||
r = await client.post("/documents/upload", files=_pdf_upload("invoice.pdf", invoice_pdf))
|
||||
assert r.status_code == 202
|
||||
doc_id = r.json()["id"]
|
||||
|
||||
# Background tasks run synchronously in test context once response is sent.
|
||||
# Poll the status endpoint briefly.
|
||||
import asyncio
|
||||
for _ in range(20):
|
||||
status_r = await client.get(f"/documents/{doc_id}/status")
|
||||
if status_r.json()["status"] in ("done", "failed"):
|
||||
break
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
doc_r = await client.get(f"/documents/{doc_id}")
|
||||
doc = doc_r.json()
|
||||
assert doc["status"] == "done"
|
||||
assert doc["document_type"] == "invoice"
|
||||
assert doc["extracted_data"] is not None
|
||||
|
||||
extracted = json.loads(doc["extracted_data"])
|
||||
assert extracted["vendor_name"] == "ACME Corp"
|
||||
assert "suggested_categories" in extracted
|
||||
assert isinstance(extracted["suggested_categories"], list)
|
||||
assert len(extracted["suggested_categories"]) > 0
|
||||
|
||||
|
||||
# ── Live tests (require real PDFs in tests/pdfs/) ─────────────────────────────
|
||||
|
||||
async def test_live_upload_real_pdf(client, real_pdfs, mock_ai):
|
||||
"""Upload each real PDF from tests/pdfs/ and verify it reaches 'done'."""
|
||||
import asyncio
|
||||
for pdf_path in real_pdfs:
|
||||
data = pdf_path.read_bytes()
|
||||
r = await client.post(
|
||||
"/documents/upload",
|
||||
files=_pdf_upload(pdf_path.name, data),
|
||||
)
|
||||
assert r.status_code == 202, f"Upload failed for {pdf_path.name}: {r.text}"
|
||||
doc_id = r.json()["id"]
|
||||
|
||||
for _ in range(30):
|
||||
status_r = await client.get(f"/documents/{doc_id}/status")
|
||||
if status_r.json()["status"] in ("done", "failed"):
|
||||
break
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
final = await client.get(f"/documents/{doc_id}")
|
||||
assert final.json()["status"] == "done", (
|
||||
f"{pdf_path.name} ended with status '{final.json()['status']}': "
|
||||
f"{final.json().get('error_message')}"
|
||||
)
|
||||
Reference in New Issue
Block a user