From 1cdc532fff2b7db17a0926e222f084db0dad883d Mon Sep 17 00:00:00 2001 From: curo1305 Date: Tue, 14 Apr 2026 11:27:57 +0200 Subject: [PATCH] Add doc-service tests, AI category suggestions, LM Studio default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - pytest suite for doc-service: 20+ tests covering category CRUD, document upload/get/delete/patch, ownership isolation, category assignment, AI processing (mock), and live PDF tests (auto-skipped when tests/pdfs/ is empty) - Minimal in-memory PDF builder in conftest so tests run without any fixture files; real PDFs can be dropped into tests/pdfs/ to activate live extraction tests - AI prompt updated to return suggested_categories (2–5 short names) - Frontend: SuggestionChip component in DocumentRow shows AI-suggested categories after processing; "Assign" links to an existing category, "Create & Assign" creates it first, ✕ dismisses locally - Default AI provider changed to LM Studio at http://host.docker.internal:1234/v1 (host.docker.internal resolves to the macOS host from inside Docker Desktop) - tests/pdfs/ directory tracked via .gitkeep; *.pdf excluded by .gitignore Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 3 + backend/app/core/app_config.py | 8 +- ...026-04-14_doc-service-tests-suggestions.md | 24 ++ features/doc-service/app/services/ai/base.py | 5 +- .../doc-service/app/services/config_reader.py | 9 +- features/doc-service/pyproject.toml | 1 + features/doc-service/tests/__init__.py | 0 features/doc-service/tests/conftest.py | 228 ++++++++++++++++ features/doc-service/tests/pdfs/.gitkeep | 0 features/doc-service/tests/test_categories.py | 99 +++++++ features/doc-service/tests/test_documents.py | 244 ++++++++++++++++++ frontend/src/pages/DocumentsPage.tsx | 209 ++++++++++----- 12 files changed, 755 insertions(+), 75 deletions(-) create mode 100644 changelog/2026-04-14_doc-service-tests-suggestions.md create mode 100644 features/doc-service/tests/__init__.py create mode 100644 features/doc-service/tests/conftest.py create mode 100644 features/doc-service/tests/pdfs/.gitkeep create mode 100644 features/doc-service/tests/test_categories.py create mode 100644 features/doc-service/tests/test_documents.py diff --git a/.gitignore b/.gitignore index a64342b..c5bd565 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,6 @@ frontend/dist/ # OS .DS_Store resume.txt + +# Test fixtures — drop PDFs here for local testing, never commit them +features/doc-service/tests/pdfs/*.pdf diff --git a/backend/app/core/app_config.py b/backend/app/core/app_config.py index c97345c..eb2a063 100644 --- a/backend/app/core/app_config.py +++ b/backend/app/core/app_config.py @@ -31,13 +31,15 @@ class OllamaConfig(BaseModel): class LMStudioConfig(BaseModel): - base_url: str = "http://192.168.1.x:1234/v1" + # host.docker.internal resolves to the host from inside Docker (macOS/Windows). + # For local dev outside Docker, use http://localhost:1234/v1 instead. + base_url: str = "http://host.docker.internal:1234/v1" model: str = "local-model" - api_key: str = "" + api_key: str = "lm-studio" class AIConfig(BaseModel): - provider: str = "anthropic" + provider: str = "lmstudio" anthropic: AnthropicConfig = AnthropicConfig() ollama: OllamaConfig = OllamaConfig() lmstudio: LMStudioConfig = LMStudioConfig() diff --git a/changelog/2026-04-14_doc-service-tests-suggestions.md b/changelog/2026-04-14_doc-service-tests-suggestions.md new file mode 100644 index 0000000..888cc72 --- /dev/null +++ b/changelog/2026-04-14_doc-service-tests-suggestions.md @@ -0,0 +1,24 @@ +# 2026-04-14 — Doc-service tests, AI category suggestions, LM Studio default + +**Timestamp:** 2026-04-14T00:00:00+00:00 + +## Summary + +Added pytest test suite for doc-service, updated the AI prompt to return suggested categories, wired up a suggestions UI in DocumentsPage (per-suggestion Accept/Create&Assign/Dismiss), changed the default AI provider to LM Studio at host.docker.internal:1234, and created a gitignored test PDF directory. + +## Files Added + +- `features/doc-service/tests/__init__.py` +- `features/doc-service/tests/conftest.py` — SQLite in-memory DB, tmp DATA_DIR, mock AI provider, minimal+invoice PDF builders, real_pdfs fixture (auto-skips if no PDFs present) +- `features/doc-service/tests/test_categories.py` — full CRUD + per-user isolation +- `features/doc-service/tests/test_documents.py` — upload, list, get, status, delete, category assignment, AI processing integration, live PDF tests +- `features/doc-service/tests/pdfs/.gitkeep` — tracked empty directory; drop PDFs here for live testing + +## Files Modified + +- `.gitignore` — ignore `features/doc-service/tests/pdfs/*.pdf` +- `features/doc-service/pyproject.toml` — added `aiosqlite>=0.20` to dev deps +- `features/doc-service/app/services/ai/base.py` — added `suggested_categories` to AI prompt (2–5 category names per document) +- `features/doc-service/app/services/config_reader.py` — default provider changed to `lmstudio`; URLs changed to `host.docker.internal:1234/v1` (Docker→host resolution on macOS/Windows) +- `backend/app/core/app_config.py` — default `LMStudioConfig.base_url` = `http://host.docker.internal:1234/v1`; default provider = `lmstudio` +- `frontend/src/pages/DocumentsPage.tsx` — added `SuggestionChip` component and `suggested_categories` section in DocumentRow: checks if suggestion already exists as a user category, shows "Assign" (existing) or "Create & Assign" (new), dismiss removes from local state diff --git a/features/doc-service/app/services/ai/base.py b/features/doc-service/app/services/ai/base.py index 2beb541..1c67865 100644 --- a/features/doc-service/app/services/ai/base.py +++ b/features/doc-service/app/services/ai/base.py @@ -17,8 +17,9 @@ customer_address (string or null), invoice_number (string or null), invoice_date (string or null), due_date (string or null), -tags (array of strings), -line_items (array of objects, each with keys: description, amount). +tags (array of short keyword strings describing the document), +line_items (array of objects, each with keys: description, amount), +suggested_categories (array of 2 to 5 short category name strings a user might want to file this document under, e.g. "Utilities", "Travel", "Software Subscriptions", "Client Invoices"). Document text: {text}""" diff --git a/features/doc-service/app/services/config_reader.py b/features/doc-service/app/services/config_reader.py index 536f481..5c37aa2 100644 --- a/features/doc-service/app/services/config_reader.py +++ b/features/doc-service/app/services/config_reader.py @@ -12,10 +12,13 @@ from app.core.config import settings _DEFAULT_CONFIG: dict = { "ai": { - "provider": "anthropic", + # Default: LM Studio running on the host machine at port 1234. + # Inside Docker, host.docker.internal resolves to the host; for local + # dev outside Docker use http://localhost:1234/v1 instead. + "provider": "lmstudio", "anthropic": {"api_key": "", "model": "claude-haiku-4-5-20251001"}, - "ollama": {"base_url": "http://localhost:11434/v1", "model": "llama3.2", "api_key": "ollama"}, - "lmstudio": {"base_url": "http://localhost:1234/v1", "model": "local-model", "api_key": ""}, + "ollama": {"base_url": "http://host.docker.internal:11434/v1", "model": "llama3.2", "api_key": "ollama"}, + "lmstudio": {"base_url": "http://host.docker.internal:1234/v1", "model": "local-model", "api_key": "lm-studio"}, }, "documents": {"max_pdf_bytes": 20 * 1024 * 1024}, } diff --git a/features/doc-service/pyproject.toml b/features/doc-service/pyproject.toml index 77df723..d60f52f 100644 --- a/features/doc-service/pyproject.toml +++ b/features/doc-service/pyproject.toml @@ -25,6 +25,7 @@ dev = [ "pytest>=8", "pytest-asyncio>=0.23", "httpx>=0.27", + "aiosqlite>=0.20", "ruff>=0.4", ] diff --git a/features/doc-service/tests/__init__.py b/features/doc-service/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/features/doc-service/tests/conftest.py b/features/doc-service/tests/conftest.py new file mode 100644 index 0000000..72b13f3 --- /dev/null +++ b/features/doc-service/tests/conftest.py @@ -0,0 +1,228 @@ +""" +Shared pytest fixtures for doc-service tests. + +Uses an in-memory SQLite database so tests run without a real PostgreSQL. +The DATA_DIR is overridden to a tmp directory per test session. +The AI provider is mocked so tests never hit a real endpoint. +""" +import io +import struct +import zlib +from pathlib import Path +from typing import AsyncGenerator +from unittest.mock import AsyncMock, patch + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine + +import app.models # noqa: F401 — registers all ORM classes +from app.database import Base, get_db +from app.main import app + +# ── Test database ────────────────────────────────────────────────────────────── + +TEST_DB_URL = "sqlite+aiosqlite:///:memory:" + +_engine = create_async_engine(TEST_DB_URL, connect_args={"check_same_thread": False}) +_TestSessionLocal = async_sessionmaker(_engine, expire_on_commit=False) + + +@pytest_asyncio.fixture(scope="session", autouse=True) +async def create_tables(): + """Create all tables once per test session.""" + async with _engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + yield + async with _engine.begin() as conn: + await conn.run_sync(Base.metadata.drop_all) + + +@pytest_asyncio.fixture +async def db_session() -> AsyncGenerator[AsyncSession, None]: + """Provide a transactional test DB session that is rolled back after each test.""" + async with _engine.begin() as conn: + async with AsyncSession(conn) as session: + yield session + await conn.rollback() + + +# ── Override get_db ──────────────────────────────────────────────────────────── + +async def _override_get_db(): + async with _TestSessionLocal() as session: + yield session + + +# ── Mock AI provider ─────────────────────────────────────────────────────────── + +MOCK_AI_RESULT = { + "document_type": "invoice", + "total_amount": "99.00", + "currency": "EUR", + "vendor_name": "ACME Corp", + "customer_name": "Test Customer", + "billing_address": "1 Main St", + "customer_address": "2 Other St", + "invoice_number": "INV-001", + "invoice_date": "2026-04-14", + "due_date": "2026-05-14", + "tags": ["invoice", "acme"], + "line_items": [{"description": "Widget", "amount": "99.00"}], + "suggested_categories": ["Suppliers", "Operating Expenses"], +} + + +@pytest.fixture +def mock_ai(): + """Patch the AI classify_document call to return MOCK_AI_RESULT.""" + provider_mock = AsyncMock() + provider_mock.classify_document = AsyncMock(return_value=MOCK_AI_RESULT) + with patch("app.routers.documents.get_provider", return_value=provider_mock): + yield provider_mock + + +# ── HTTP client ──────────────────────────────────────────────────────────────── + +TEST_USER_ID = "test-user-1" +OTHER_USER_ID = "test-user-2" + + +@pytest_asyncio.fixture +async def client(tmp_path) -> AsyncGenerator[AsyncClient, None]: + """ + AsyncClient wired to the FastAPI app with: + - get_db overridden to use test SQLite DB + - DATA_DIR pointed to a tmp directory + - Default X-User-Id header set to TEST_USER_ID + """ + app.dependency_overrides[get_db] = _override_get_db + + with patch("app.services.storage.settings") as mock_settings: + mock_settings.DATA_DIR = str(tmp_path) + async with AsyncClient( + transport=ASGITransport(app=app), + base_url="http://test", + headers={"x-user-id": TEST_USER_ID}, + ) as ac: + yield ac + + app.dependency_overrides.clear() + + +@pytest_asyncio.fixture +async def other_client(tmp_path) -> AsyncGenerator[AsyncClient, None]: + """Client acting as a different user — used to test ownership isolation.""" + app.dependency_overrides[get_db] = _override_get_db + + with patch("app.services.storage.settings") as mock_settings: + mock_settings.DATA_DIR = str(tmp_path) + async with AsyncClient( + transport=ASGITransport(app=app), + base_url="http://test", + headers={"x-user-id": OTHER_USER_ID}, + ) as ac: + yield ac + + app.dependency_overrides.clear() + + +# ── Minimal PDF bytes ────────────────────────────────────────────────────────── + +def _make_minimal_pdf(text: str = "Test invoice. Total: EUR 99.00. Vendor: ACME Corp.") -> bytes: + """ + Build a minimal but valid single-page PDF that pdfplumber can open and + extract text from. No external libraries needed — hand-crafted byte structure. + """ + # We embed the text as a PDF content stream using a built-in font. + content_stream = ( + f"BT /F1 12 Tf 50 750 Td ({text}) Tj ET" + ).encode() + compressed = zlib.compress(content_stream) + + objects: list[bytes] = [] + + def obj(n: int, body: bytes) -> bytes: + return f"{n} 0 obj\n".encode() + body + b"\nendobj\n" + + # 1: Catalog + objects.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>")) + # 2: Pages + objects.append(obj(2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>")) + # 3: Page + objects.append(obj(3, ( + b"<< /Type /Page /Parent 2 0 R " + b"/MediaBox [0 0 612 792] " + b"/Contents 4 0 R " + b"/Resources << /Font << /F1 5 0 R >> >> >>" + ))) + # 4: Content stream + objects.append(obj(4, ( + f"<< /Filter /FlateDecode /Length {len(compressed)} >>\n".encode() + + b"stream\n" + compressed + b"\nendstream" + ))) + # 5: Font + objects.append(obj(5, ( + b"<< /Type /Font /Subtype /Type1 " + b"/BaseFont /Helvetica " + b"/Encoding /WinAnsiEncoding >>" + ))) + + # Build xref + header = b"%PDF-1.4\n" + body = b"" + offsets = [] + for o in objects: + offsets.append(len(header) + len(body)) + body += o + + xref_offset = len(header) + len(body) + xref = f"xref\n0 {len(objects) + 1}\n0000000000 65535 f \n".encode() + for off in offsets: + xref += f"{off:010d} 00000 n \n".encode() + + trailer = ( + f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n" + f"startxref\n{xref_offset}\n%%EOF\n" + ).encode() + + return header + body + xref + trailer + + +@pytest.fixture +def minimal_pdf() -> bytes: + return _make_minimal_pdf() + + +@pytest.fixture +def invoice_pdf() -> bytes: + return _make_minimal_pdf( + "Invoice INV-001. Date: 2026-04-14. Due: 2026-05-14. " + "Vendor: ACME Corp, 1 Main St. Customer: Test Customer, 2 Other St. " + "Widget x1: EUR 99.00. Total: EUR 99.00." + ) + + +# ── Real PDF fixture (optional) ──────────────────────────────────────────────── + +def _pdf_fixtures_dir() -> Path: + return Path(__file__).parent / "pdfs" + + +def pytest_collect_file(parent, file_path): + """Not used — just a marker so pytest knows about the pdfs/ directory.""" + return None + + +@pytest.fixture +def real_pdfs() -> list[Path]: + """ + Returns a list of PDF paths from tests/pdfs/. + Tests that use this fixture are skipped if the directory is empty. + Drop any PDF into features/doc-service/tests/pdfs/ to run live tests. + """ + pdfs = list(_pdf_fixtures_dir().glob("*.pdf")) + if not pdfs: + pytest.skip("No PDFs in tests/pdfs/ — add a PDF file to run live upload tests") + return pdfs diff --git a/features/doc-service/tests/pdfs/.gitkeep b/features/doc-service/tests/pdfs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/features/doc-service/tests/test_categories.py b/features/doc-service/tests/test_categories.py new file mode 100644 index 0000000..5ed7a25 --- /dev/null +++ b/features/doc-service/tests/test_categories.py @@ -0,0 +1,99 @@ +""" +Tests for the /categories endpoints. + +Covers CRUD operations and per-user isolation. +""" +import pytest + + +pytestmark = pytest.mark.asyncio + + +async def test_list_categories_empty(client): + r = await client.get("/categories") + assert r.status_code == 200 + assert r.json() == [] + + +async def test_create_category(client): + r = await client.post("/categories", json={"name": "Utilities"}) + assert r.status_code == 201 + data = r.json() + assert data["name"] == "Utilities" + assert "id" in data + + +async def test_create_category_strips_whitespace(client): + r = await client.post("/categories", json={"name": " Travel "}) + assert r.status_code == 201 + assert r.json()["name"] == "Travel" + + +async def test_create_category_empty_name_rejected(client): + r = await client.post("/categories", json={"name": " "}) + assert r.status_code == 422 + + +async def test_list_categories_sorted(client): + await client.post("/categories", json={"name": "Zebra"}) + await client.post("/categories", json={"name": "Alpha"}) + r = await client.get("/categories") + names = [c["name"] for c in r.json()] + assert names == sorted(names) + + +async def test_rename_category(client): + create = await client.post("/categories", json={"name": "Old Name"}) + cat_id = create.json()["id"] + + r = await client.patch(f"/categories/{cat_id}", json={"name": "New Name"}) + assert r.status_code == 200 + assert r.json()["name"] == "New Name" + + +async def test_rename_category_not_found(client): + r = await client.patch("/categories/nonexistent-id", json={"name": "X"}) + assert r.status_code == 404 + + +async def test_delete_category(client): + create = await client.post("/categories", json={"name": "ToDelete"}) + cat_id = create.json()["id"] + + r = await client.delete(f"/categories/{cat_id}") + assert r.status_code == 204 + + # Confirm it's gone + r2 = await client.get("/categories") + ids = [c["id"] for c in r2.json()] + assert cat_id not in ids + + +async def test_delete_category_not_found(client): + r = await client.delete("/categories/nonexistent-id") + assert r.status_code == 404 + + +async def test_categories_isolated_by_user(client, other_client): + """Categories created by user A must not be visible to user B.""" + await client.post("/categories", json={"name": "UserA-Only"}) + + r = await other_client.get("/categories") + names = [c["name"] for c in r.json()] + assert "UserA-Only" not in names + + +async def test_cannot_rename_other_users_category(client, other_client): + create = await client.post("/categories", json={"name": "Owned"}) + cat_id = create.json()["id"] + + r = await other_client.patch(f"/categories/{cat_id}", json={"name": "Hacked"}) + assert r.status_code == 404 + + +async def test_cannot_delete_other_users_category(client, other_client): + create = await client.post("/categories", json={"name": "Owned"}) + cat_id = create.json()["id"] + + r = await other_client.delete(f"/categories/{cat_id}") + assert r.status_code == 404 diff --git a/features/doc-service/tests/test_documents.py b/features/doc-service/tests/test_documents.py new file mode 100644 index 0000000..bed2915 --- /dev/null +++ b/features/doc-service/tests/test_documents.py @@ -0,0 +1,244 @@ +""" +Tests for the /documents endpoints. + +Synthetic (minimal_pdf / invoice_pdf) tests run always. +Live tests that use real PDFs from tests/pdfs/ are skipped when that +directory is empty — drop any PDF there to activate them. +""" +import io +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + + +pytestmark = pytest.mark.asyncio + + +# ── Helpers ──────────────────────────────────────────────────────────────────── + +def _pdf_upload(filename: str, data: bytes): + return {"file": (filename, io.BytesIO(data), "application/pdf")} + + +# ── List / empty state ───────────────────────────────────────────────────────── + +async def test_list_documents_empty(client): + r = await client.get("/documents") + assert r.status_code == 200 + assert r.json() == [] + + +# ── Upload ───────────────────────────────────────────────────────────────────── + +async def test_upload_returns_202(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + r = await client.post("/documents/upload", files=_pdf_upload("test.pdf", minimal_pdf)) + assert r.status_code == 202 + data = r.json() + assert data["filename"] == "test.pdf" + assert data["status"] == "pending" + assert "id" in data + + +async def test_upload_non_pdf_rejected(client): + r = await client.post( + "/documents/upload", + files={"file": ("note.txt", io.BytesIO(b"hello"), "text/plain")}, + ) + assert r.status_code == 415 + + +async def test_upload_file_too_large(client): + big = b"%PDF-1.4\n" + b"x" * (21 * 1024 * 1024) + with patch("app.routers.documents.process_document"): + r = await client.post("/documents/upload", files=_pdf_upload("big.pdf", big)) + assert r.status_code == 413 + + +# ── Get / status ─────────────────────────────────────────────────────────────── + +async def test_get_document(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("get.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await client.get(f"/documents/{doc_id}") + assert r.status_code == 200 + assert r.json()["id"] == doc_id + + +async def test_get_document_not_found(client): + r = await client.get("/documents/nonexistent-id") + assert r.status_code == 404 + + +async def test_get_document_status(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("status.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await client.get(f"/documents/{doc_id}/status") + assert r.status_code == 200 + assert r.json()["status"] == "pending" + + +async def test_other_user_cannot_see_document(client, other_client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("private.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await other_client.get(f"/documents/{doc_id}") + assert r.status_code == 404 + + +# ── Patch document type ──────────────────────────────────────────────────────── + +async def test_patch_document_type(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("patch.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await client.patch(f"/documents/{doc_id}/type", json={"document_type": "receipt"}) + assert r.status_code == 200 + assert r.json()["document_type"] == "receipt" + + +# ── Delete ───────────────────────────────────────────────────────────────────── + +async def test_delete_document(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("del.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await client.delete(f"/documents/{doc_id}") + assert r.status_code == 204 + + r2 = await client.get(f"/documents/{doc_id}") + assert r2.status_code == 404 + + +async def test_delete_document_not_found(client): + r = await client.delete("/documents/nonexistent-id") + assert r.status_code == 404 + + +async def test_other_user_cannot_delete_document(client, other_client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("owned.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + r = await other_client.delete(f"/documents/{doc_id}") + assert r.status_code == 404 + + # Original owner can still get it + r2 = await client.get(f"/documents/{doc_id}") + assert r2.status_code == 200 + + +# ── Category assignment ──────────────────────────────────────────────────────── + +async def test_assign_and_remove_category(client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("cat.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + cat = await client.post("/categories", json={"name": "Ops"}) + cat_id = cat.json()["id"] + + r = await client.post(f"/documents/{doc_id}/categories/{cat_id}") + assert r.status_code == 204 + + doc = await client.get(f"/documents/{doc_id}") + cat_names = [c["name"] for c in doc.json()["categories"]] + assert "Ops" in cat_names + + r2 = await client.delete(f"/documents/{doc_id}/categories/{cat_id}") + assert r2.status_code == 204 + + doc2 = await client.get(f"/documents/{doc_id}") + assert doc2.json()["categories"] == [] + + +async def test_assign_category_idempotent(client, minimal_pdf): + """Assigning the same category twice should not error.""" + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("idem.pdf", minimal_pdf)) + doc_id = up.json()["id"] + cat_id = (await client.post("/categories", json={"name": "Idem"})).json()["id"] + + await client.post(f"/documents/{doc_id}/categories/{cat_id}") + r = await client.post(f"/documents/{doc_id}/categories/{cat_id}") + assert r.status_code == 204 # no error on duplicate + + +async def test_cannot_assign_other_users_category(client, other_client, minimal_pdf): + with patch("app.routers.documents.process_document"): + up = await client.post("/documents/upload", files=_pdf_upload("x.pdf", minimal_pdf)) + doc_id = up.json()["id"] + + # other_client creates a category + other_cat = await other_client.post("/categories", json={"name": "Foreign"}) + other_cat_id = other_cat.json()["id"] + + # original user tries to assign it + r = await client.post(f"/documents/{doc_id}/categories/{other_cat_id}") + assert r.status_code == 404 + + +# ── AI processing integration (with mock AI) ────────────────────────────────── + +async def test_processing_sets_extracted_data(client, invoice_pdf, mock_ai): + """Upload + wait for background processing; verify extracted_data is populated.""" + r = await client.post("/documents/upload", files=_pdf_upload("invoice.pdf", invoice_pdf)) + assert r.status_code == 202 + doc_id = r.json()["id"] + + # Background tasks run synchronously in test context once response is sent. + # Poll the status endpoint briefly. + import asyncio + for _ in range(20): + status_r = await client.get(f"/documents/{doc_id}/status") + if status_r.json()["status"] in ("done", "failed"): + break + await asyncio.sleep(0.1) + + doc_r = await client.get(f"/documents/{doc_id}") + doc = doc_r.json() + assert doc["status"] == "done" + assert doc["document_type"] == "invoice" + assert doc["extracted_data"] is not None + + extracted = json.loads(doc["extracted_data"]) + assert extracted["vendor_name"] == "ACME Corp" + assert "suggested_categories" in extracted + assert isinstance(extracted["suggested_categories"], list) + assert len(extracted["suggested_categories"]) > 0 + + +# ── Live tests (require real PDFs in tests/pdfs/) ───────────────────────────── + +async def test_live_upload_real_pdf(client, real_pdfs, mock_ai): + """Upload each real PDF from tests/pdfs/ and verify it reaches 'done'.""" + import asyncio + for pdf_path in real_pdfs: + data = pdf_path.read_bytes() + r = await client.post( + "/documents/upload", + files=_pdf_upload(pdf_path.name, data), + ) + assert r.status_code == 202, f"Upload failed for {pdf_path.name}: {r.text}" + doc_id = r.json()["id"] + + for _ in range(30): + status_r = await client.get(f"/documents/{doc_id}/status") + if status_r.json()["status"] in ("done", "failed"): + break + await asyncio.sleep(0.2) + + final = await client.get(f"/documents/{doc_id}") + assert final.json()["status"] == "done", ( + f"{pdf_path.name} ended with status '{final.json()['status']}': " + f"{final.json().get('error_message')}" + ) diff --git a/frontend/src/pages/DocumentsPage.tsx b/frontend/src/pages/DocumentsPage.tsx index ca68c8b..19ea90c 100644 --- a/frontend/src/pages/DocumentsPage.tsx +++ b/frontend/src/pages/DocumentsPage.tsx @@ -36,6 +36,57 @@ function StatusBadge({ status }: { status: DocumentOut["status"] }) { ); } +// ── Category suggestions ──────────────────────────────────────────────────── + +interface SuggestionChipProps { + name: string; + existing: CategoryOut | undefined; + onAccept: (name: string, existing: CategoryOut | undefined) => void; + onDismiss: (name: string) => void; +} + +function SuggestionChip({ name, existing, onAccept, onDismiss }: SuggestionChipProps) { + return ( + + {name} + + + + ); +} + +// ── Document row ──────────────────────────────────────────────────────────── + function DocumentRow({ doc, categories, @@ -48,11 +99,36 @@ function DocumentRow({ const [expanded, setExpanded] = useState(false); const qc = useQueryClient(); + // Parse extracted_data once + let extractedData: Record | null = null; + if (doc.extracted_data) { + try { extractedData = JSON.parse(doc.extracted_data); } catch { /* ignore */ } + } + + const tags: string[] = []; + if (doc.tags) { + try { + const parsed = JSON.parse(doc.tags); + if (Array.isArray(parsed)) tags.push(...parsed); + } catch { /* ignore */ } + } + + // Suggested categories from AI — dismissed ones are tracked locally + const allSuggestions: string[] = Array.isArray(extractedData?.suggested_categories) + ? (extractedData!.suggested_categories as string[]) + : []; + const assignedNames = new Set(doc.categories.map((c) => c.name)); + const [dismissed, setDismissed] = useState>(new Set()); + + // Only show suggestions that haven't been assigned yet and haven't been dismissed + const pendingSuggestions = allSuggestions.filter( + (s) => !assignedNames.has(s) && !dismissed.has(s) + ); + // Poll status while pending/processing const { data: liveStatus } = useQuery({ queryKey: ["docStatus", doc.id], queryFn: () => getDocumentStatus(doc.id), - // v5: refetchInterval receives the Query object; data lives in query.state.data refetchInterval: (query) => { const s = query.state.data?.status; return s === "pending" || s === "processing" ? 3000 : false; @@ -76,38 +152,36 @@ function DocumentRow({ onSuccess: () => qc.invalidateQueries({ queryKey: ["documents"] }), }); + const createAndAssignMut = useMutation({ + mutationFn: async (name: string) => { + const cat = await createCategory(name); + await assignCategory(doc.id, cat.id); + return cat; + }, + onSuccess: (_cat, name) => { + setDismissed((prev) => new Set([...prev, name])); + qc.invalidateQueries({ queryKey: ["documents"] }); + qc.invalidateQueries({ queryKey: ["categories"] }); + }, + }); + + const handleAcceptSuggestion = (name: string, existing: CategoryOut | undefined) => { + if (existing) { + assignMut.mutate({ catId: existing.id }); + setDismissed((prev) => new Set([...prev, name])); + } else { + createAndAssignMut.mutate(name); + } + }; + const assignedIds = new Set(doc.categories.map((c) => c.id)); const unassigned = categories.filter((c) => !assignedIds.has(c.id)); - let extractedData: Record | null = null; - if (doc.extracted_data) { - try { - extractedData = JSON.parse(doc.extracted_data); - } catch { - // ignore - } - } - - const tags: string[] = []; - if (doc.tags) { - try { - const parsed = JSON.parse(doc.tags); - if (Array.isArray(parsed)) tags.push(...parsed); - } catch { - // ignore - } - } - return (
+ {/* Row header */}
setExpanded((e) => !e)} > {doc.filename} @@ -119,10 +193,7 @@ function DocumentRow({ {(doc.file_size / 1024).toFixed(0)} KB
+ {/* Expanded detail */} {expanded && (
+ + {/* Tags */} {tags.length > 0 && (
Tags:{" "} {tags.map((t) => ( - + {t} ))}
)} + {/* Extracted fields (excluding internal-only keys) */} {extractedData && (
Extracted data: {Object.entries(extractedData) - .filter(([k]) => k !== "tags") + .filter(([k]) => k !== "tags" && k !== "suggested_categories") .map(([k, v]) => ( ))} @@ -186,25 +248,18 @@ function DocumentRow({ )} + {/* Error */} {doc.error_message && (
Error: {doc.error_message}
)} + {/* Assigned categories */}
Categories:{" "} {doc.categories.map((c) => ( - + {c.name}{" "}
+ + {/* AI-suggested categories */} + {pendingSuggestions.length > 0 && ( +
+ Suggested by AI: +
+ {pendingSuggestions.map((name) => { + const existing = categories.find( + (c) => c.name.toLowerCase() === name.toLowerCase() + ); + return ( + setDismissed((prev) => new Set([...prev, n]))} + /> + ); + })} +
+

+ "Assign" links an existing category · "Create & Assign" creates it first · ✕ dismisses the suggestion +

+
+ )} )} ); } +// ── Page ──────────────────────────────────────────────────────────────────── + export default function DocumentsPage() { const qc = useQueryClient(); const fileRef = useRef(null); @@ -317,15 +400,7 @@ export default function DocumentsPage() { Manage categories
{categories.map((c) => ( - + {c.name} ))}
{k} {Array.isArray(v) - ? v.length === 0 - ? "—" - : JSON.stringify(v, null, 2) - : v !== null && v !== undefined && v !== "" - ? String(v) - : "—"} + ? v.length === 0 ? "—" : JSON.stringify(v, null, 2) + : v !== null && v !== undefined && v !== "" ? String(v) : "—"}