Add doc-service tests, AI category suggestions, LM Studio default

- pytest suite for doc-service: 20+ tests covering category CRUD,
  document upload/get/delete/patch, ownership isolation, category
  assignment, AI processing (mock), and live PDF tests (auto-skipped
  when tests/pdfs/ is empty)
- Minimal in-memory PDF builder in conftest so tests run without any
  fixture files; real PDFs can be dropped into tests/pdfs/ to activate
  live extraction tests
- AI prompt updated to return suggested_categories (2–5 short names)
- Frontend: SuggestionChip component in DocumentRow shows AI-suggested
  categories after processing; "Assign" links to an existing category,
  "Create & Assign" creates it first, ✕ dismisses locally
- Default AI provider changed to LM Studio at
  http://host.docker.internal:1234/v1 (host.docker.internal resolves
  to the macOS host from inside Docker Desktop)
- tests/pdfs/ directory tracked via .gitkeep; *.pdf excluded by .gitignore

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
curo1305
2026-04-14 11:27:57 +02:00
parent b8238e03ea
commit 1cdc532fff
12 changed files with 755 additions and 75 deletions
+228
View File
@@ -0,0 +1,228 @@
"""
Shared pytest fixtures for doc-service tests.
Uses an in-memory SQLite database so tests run without a real PostgreSQL.
The DATA_DIR is overridden to a tmp directory per test session.
The AI provider is mocked so tests never hit a real endpoint.
"""
import io
import struct
import zlib
from pathlib import Path
from typing import AsyncGenerator
from unittest.mock import AsyncMock, patch
import pytest
import pytest_asyncio
from httpx import ASGITransport, AsyncClient
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
import app.models # noqa: F401 — registers all ORM classes
from app.database import Base, get_db
from app.main import app
# ── Test database ──────────────────────────────────────────────────────────────
TEST_DB_URL = "sqlite+aiosqlite:///:memory:"
_engine = create_async_engine(TEST_DB_URL, connect_args={"check_same_thread": False})
_TestSessionLocal = async_sessionmaker(_engine, expire_on_commit=False)
@pytest_asyncio.fixture(scope="session", autouse=True)
async def create_tables():
"""Create all tables once per test session."""
async with _engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
yield
async with _engine.begin() as conn:
await conn.run_sync(Base.metadata.drop_all)
@pytest_asyncio.fixture
async def db_session() -> AsyncGenerator[AsyncSession, None]:
"""Provide a transactional test DB session that is rolled back after each test."""
async with _engine.begin() as conn:
async with AsyncSession(conn) as session:
yield session
await conn.rollback()
# ── Override get_db ────────────────────────────────────────────────────────────
async def _override_get_db():
async with _TestSessionLocal() as session:
yield session
# ── Mock AI provider ───────────────────────────────────────────────────────────
MOCK_AI_RESULT = {
"document_type": "invoice",
"total_amount": "99.00",
"currency": "EUR",
"vendor_name": "ACME Corp",
"customer_name": "Test Customer",
"billing_address": "1 Main St",
"customer_address": "2 Other St",
"invoice_number": "INV-001",
"invoice_date": "2026-04-14",
"due_date": "2026-05-14",
"tags": ["invoice", "acme"],
"line_items": [{"description": "Widget", "amount": "99.00"}],
"suggested_categories": ["Suppliers", "Operating Expenses"],
}
@pytest.fixture
def mock_ai():
"""Patch the AI classify_document call to return MOCK_AI_RESULT."""
provider_mock = AsyncMock()
provider_mock.classify_document = AsyncMock(return_value=MOCK_AI_RESULT)
with patch("app.routers.documents.get_provider", return_value=provider_mock):
yield provider_mock
# ── HTTP client ────────────────────────────────────────────────────────────────
TEST_USER_ID = "test-user-1"
OTHER_USER_ID = "test-user-2"
@pytest_asyncio.fixture
async def client(tmp_path) -> AsyncGenerator[AsyncClient, None]:
"""
AsyncClient wired to the FastAPI app with:
- get_db overridden to use test SQLite DB
- DATA_DIR pointed to a tmp directory
- Default X-User-Id header set to TEST_USER_ID
"""
app.dependency_overrides[get_db] = _override_get_db
with patch("app.services.storage.settings") as mock_settings:
mock_settings.DATA_DIR = str(tmp_path)
async with AsyncClient(
transport=ASGITransport(app=app),
base_url="http://test",
headers={"x-user-id": TEST_USER_ID},
) as ac:
yield ac
app.dependency_overrides.clear()
@pytest_asyncio.fixture
async def other_client(tmp_path) -> AsyncGenerator[AsyncClient, None]:
"""Client acting as a different user — used to test ownership isolation."""
app.dependency_overrides[get_db] = _override_get_db
with patch("app.services.storage.settings") as mock_settings:
mock_settings.DATA_DIR = str(tmp_path)
async with AsyncClient(
transport=ASGITransport(app=app),
base_url="http://test",
headers={"x-user-id": OTHER_USER_ID},
) as ac:
yield ac
app.dependency_overrides.clear()
# ── Minimal PDF bytes ──────────────────────────────────────────────────────────
def _make_minimal_pdf(text: str = "Test invoice. Total: EUR 99.00. Vendor: ACME Corp.") -> bytes:
"""
Build a minimal but valid single-page PDF that pdfplumber can open and
extract text from. No external libraries needed — hand-crafted byte structure.
"""
# We embed the text as a PDF content stream using a built-in font.
content_stream = (
f"BT /F1 12 Tf 50 750 Td ({text}) Tj ET"
).encode()
compressed = zlib.compress(content_stream)
objects: list[bytes] = []
def obj(n: int, body: bytes) -> bytes:
return f"{n} 0 obj\n".encode() + body + b"\nendobj\n"
# 1: Catalog
objects.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>"))
# 2: Pages
objects.append(obj(2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"))
# 3: Page
objects.append(obj(3, (
b"<< /Type /Page /Parent 2 0 R "
b"/MediaBox [0 0 612 792] "
b"/Contents 4 0 R "
b"/Resources << /Font << /F1 5 0 R >> >> >>"
)))
# 4: Content stream
objects.append(obj(4, (
f"<< /Filter /FlateDecode /Length {len(compressed)} >>\n".encode()
+ b"stream\n" + compressed + b"\nendstream"
)))
# 5: Font
objects.append(obj(5, (
b"<< /Type /Font /Subtype /Type1 "
b"/BaseFont /Helvetica "
b"/Encoding /WinAnsiEncoding >>"
)))
# Build xref
header = b"%PDF-1.4\n"
body = b""
offsets = []
for o in objects:
offsets.append(len(header) + len(body))
body += o
xref_offset = len(header) + len(body)
xref = f"xref\n0 {len(objects) + 1}\n0000000000 65535 f \n".encode()
for off in offsets:
xref += f"{off:010d} 00000 n \n".encode()
trailer = (
f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n"
f"startxref\n{xref_offset}\n%%EOF\n"
).encode()
return header + body + xref + trailer
@pytest.fixture
def minimal_pdf() -> bytes:
return _make_minimal_pdf()
@pytest.fixture
def invoice_pdf() -> bytes:
return _make_minimal_pdf(
"Invoice INV-001. Date: 2026-04-14. Due: 2026-05-14. "
"Vendor: ACME Corp, 1 Main St. Customer: Test Customer, 2 Other St. "
"Widget x1: EUR 99.00. Total: EUR 99.00."
)
# ── Real PDF fixture (optional) ────────────────────────────────────────────────
def _pdf_fixtures_dir() -> Path:
return Path(__file__).parent / "pdfs"
def pytest_collect_file(parent, file_path):
"""Not used — just a marker so pytest knows about the pdfs/ directory."""
return None
@pytest.fixture
def real_pdfs() -> list[Path]:
"""
Returns a list of PDF paths from tests/pdfs/.
Tests that use this fixture are skipped if the directory is empty.
Drop any PDF into features/doc-service/tests/pdfs/ to run live tests.
"""
pdfs = list(_pdf_fixtures_dir().glob("*.pdf"))
if not pdfs:
pytest.skip("No PDFs in tests/pdfs/ — add a PDF file to run live upload tests")
return pdfs