1cdc532fff
- pytest suite for doc-service: 20+ tests covering category CRUD, document upload/get/delete/patch, ownership isolation, category assignment, AI processing (mock), and live PDF tests (auto-skipped when tests/pdfs/ is empty) - Minimal in-memory PDF builder in conftest so tests run without any fixture files; real PDFs can be dropped into tests/pdfs/ to activate live extraction tests - AI prompt updated to return suggested_categories (2–5 short names) - Frontend: SuggestionChip component in DocumentRow shows AI-suggested categories after processing; "Assign" links to an existing category, "Create & Assign" creates it first, ✕ dismisses locally - Default AI provider changed to LM Studio at http://host.docker.internal:1234/v1 (host.docker.internal resolves to the macOS host from inside Docker Desktop) - tests/pdfs/ directory tracked via .gitkeep; *.pdf excluded by .gitignore Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
229 lines
7.8 KiB
Python
229 lines
7.8 KiB
Python
"""
|
|
Shared pytest fixtures for doc-service tests.
|
|
|
|
Uses an in-memory SQLite database so tests run without a real PostgreSQL.
|
|
The DATA_DIR is overridden to a tmp directory per test session.
|
|
The AI provider is mocked so tests never hit a real endpoint.
|
|
"""
|
|
import io
|
|
import struct
|
|
import zlib
|
|
from pathlib import Path
|
|
from typing import AsyncGenerator
|
|
from unittest.mock import AsyncMock, patch
|
|
|
|
import pytest
|
|
import pytest_asyncio
|
|
from httpx import ASGITransport, AsyncClient
|
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
|
|
|
import app.models # noqa: F401 — registers all ORM classes
|
|
from app.database import Base, get_db
|
|
from app.main import app
|
|
|
|
# ── Test database ──────────────────────────────────────────────────────────────
|
|
|
|
TEST_DB_URL = "sqlite+aiosqlite:///:memory:"
|
|
|
|
_engine = create_async_engine(TEST_DB_URL, connect_args={"check_same_thread": False})
|
|
_TestSessionLocal = async_sessionmaker(_engine, expire_on_commit=False)
|
|
|
|
|
|
@pytest_asyncio.fixture(scope="session", autouse=True)
|
|
async def create_tables():
|
|
"""Create all tables once per test session."""
|
|
async with _engine.begin() as conn:
|
|
await conn.run_sync(Base.metadata.create_all)
|
|
yield
|
|
async with _engine.begin() as conn:
|
|
await conn.run_sync(Base.metadata.drop_all)
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def db_session() -> AsyncGenerator[AsyncSession, None]:
|
|
"""Provide a transactional test DB session that is rolled back after each test."""
|
|
async with _engine.begin() as conn:
|
|
async with AsyncSession(conn) as session:
|
|
yield session
|
|
await conn.rollback()
|
|
|
|
|
|
# ── Override get_db ────────────────────────────────────────────────────────────
|
|
|
|
async def _override_get_db():
|
|
async with _TestSessionLocal() as session:
|
|
yield session
|
|
|
|
|
|
# ── Mock AI provider ───────────────────────────────────────────────────────────
|
|
|
|
MOCK_AI_RESULT = {
|
|
"document_type": "invoice",
|
|
"total_amount": "99.00",
|
|
"currency": "EUR",
|
|
"vendor_name": "ACME Corp",
|
|
"customer_name": "Test Customer",
|
|
"billing_address": "1 Main St",
|
|
"customer_address": "2 Other St",
|
|
"invoice_number": "INV-001",
|
|
"invoice_date": "2026-04-14",
|
|
"due_date": "2026-05-14",
|
|
"tags": ["invoice", "acme"],
|
|
"line_items": [{"description": "Widget", "amount": "99.00"}],
|
|
"suggested_categories": ["Suppliers", "Operating Expenses"],
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_ai():
|
|
"""Patch the AI classify_document call to return MOCK_AI_RESULT."""
|
|
provider_mock = AsyncMock()
|
|
provider_mock.classify_document = AsyncMock(return_value=MOCK_AI_RESULT)
|
|
with patch("app.routers.documents.get_provider", return_value=provider_mock):
|
|
yield provider_mock
|
|
|
|
|
|
# ── HTTP client ────────────────────────────────────────────────────────────────
|
|
|
|
TEST_USER_ID = "test-user-1"
|
|
OTHER_USER_ID = "test-user-2"
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def client(tmp_path) -> AsyncGenerator[AsyncClient, None]:
|
|
"""
|
|
AsyncClient wired to the FastAPI app with:
|
|
- get_db overridden to use test SQLite DB
|
|
- DATA_DIR pointed to a tmp directory
|
|
- Default X-User-Id header set to TEST_USER_ID
|
|
"""
|
|
app.dependency_overrides[get_db] = _override_get_db
|
|
|
|
with patch("app.services.storage.settings") as mock_settings:
|
|
mock_settings.DATA_DIR = str(tmp_path)
|
|
async with AsyncClient(
|
|
transport=ASGITransport(app=app),
|
|
base_url="http://test",
|
|
headers={"x-user-id": TEST_USER_ID},
|
|
) as ac:
|
|
yield ac
|
|
|
|
app.dependency_overrides.clear()
|
|
|
|
|
|
@pytest_asyncio.fixture
|
|
async def other_client(tmp_path) -> AsyncGenerator[AsyncClient, None]:
|
|
"""Client acting as a different user — used to test ownership isolation."""
|
|
app.dependency_overrides[get_db] = _override_get_db
|
|
|
|
with patch("app.services.storage.settings") as mock_settings:
|
|
mock_settings.DATA_DIR = str(tmp_path)
|
|
async with AsyncClient(
|
|
transport=ASGITransport(app=app),
|
|
base_url="http://test",
|
|
headers={"x-user-id": OTHER_USER_ID},
|
|
) as ac:
|
|
yield ac
|
|
|
|
app.dependency_overrides.clear()
|
|
|
|
|
|
# ── Minimal PDF bytes ──────────────────────────────────────────────────────────
|
|
|
|
def _make_minimal_pdf(text: str = "Test invoice. Total: EUR 99.00. Vendor: ACME Corp.") -> bytes:
|
|
"""
|
|
Build a minimal but valid single-page PDF that pdfplumber can open and
|
|
extract text from. No external libraries needed — hand-crafted byte structure.
|
|
"""
|
|
# We embed the text as a PDF content stream using a built-in font.
|
|
content_stream = (
|
|
f"BT /F1 12 Tf 50 750 Td ({text}) Tj ET"
|
|
).encode()
|
|
compressed = zlib.compress(content_stream)
|
|
|
|
objects: list[bytes] = []
|
|
|
|
def obj(n: int, body: bytes) -> bytes:
|
|
return f"{n} 0 obj\n".encode() + body + b"\nendobj\n"
|
|
|
|
# 1: Catalog
|
|
objects.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>"))
|
|
# 2: Pages
|
|
objects.append(obj(2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>"))
|
|
# 3: Page
|
|
objects.append(obj(3, (
|
|
b"<< /Type /Page /Parent 2 0 R "
|
|
b"/MediaBox [0 0 612 792] "
|
|
b"/Contents 4 0 R "
|
|
b"/Resources << /Font << /F1 5 0 R >> >> >>"
|
|
)))
|
|
# 4: Content stream
|
|
objects.append(obj(4, (
|
|
f"<< /Filter /FlateDecode /Length {len(compressed)} >>\n".encode()
|
|
+ b"stream\n" + compressed + b"\nendstream"
|
|
)))
|
|
# 5: Font
|
|
objects.append(obj(5, (
|
|
b"<< /Type /Font /Subtype /Type1 "
|
|
b"/BaseFont /Helvetica "
|
|
b"/Encoding /WinAnsiEncoding >>"
|
|
)))
|
|
|
|
# Build xref
|
|
header = b"%PDF-1.4\n"
|
|
body = b""
|
|
offsets = []
|
|
for o in objects:
|
|
offsets.append(len(header) + len(body))
|
|
body += o
|
|
|
|
xref_offset = len(header) + len(body)
|
|
xref = f"xref\n0 {len(objects) + 1}\n0000000000 65535 f \n".encode()
|
|
for off in offsets:
|
|
xref += f"{off:010d} 00000 n \n".encode()
|
|
|
|
trailer = (
|
|
f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n"
|
|
f"startxref\n{xref_offset}\n%%EOF\n"
|
|
).encode()
|
|
|
|
return header + body + xref + trailer
|
|
|
|
|
|
@pytest.fixture
|
|
def minimal_pdf() -> bytes:
|
|
return _make_minimal_pdf()
|
|
|
|
|
|
@pytest.fixture
|
|
def invoice_pdf() -> bytes:
|
|
return _make_minimal_pdf(
|
|
"Invoice INV-001. Date: 2026-04-14. Due: 2026-05-14. "
|
|
"Vendor: ACME Corp, 1 Main St. Customer: Test Customer, 2 Other St. "
|
|
"Widget x1: EUR 99.00. Total: EUR 99.00."
|
|
)
|
|
|
|
|
|
# ── Real PDF fixture (optional) ────────────────────────────────────────────────
|
|
|
|
def _pdf_fixtures_dir() -> Path:
|
|
return Path(__file__).parent / "pdfs"
|
|
|
|
|
|
def pytest_collect_file(parent, file_path):
|
|
"""Not used — just a marker so pytest knows about the pdfs/ directory."""
|
|
return None
|
|
|
|
|
|
@pytest.fixture
|
|
def real_pdfs() -> list[Path]:
|
|
"""
|
|
Returns a list of PDF paths from tests/pdfs/.
|
|
Tests that use this fixture are skipped if the directory is empty.
|
|
Drop any PDF into features/doc-service/tests/pdfs/ to run live tests.
|
|
"""
|
|
pdfs = list(_pdf_fixtures_dir().glob("*.pdf"))
|
|
if not pdfs:
|
|
pytest.skip("No PDFs in tests/pdfs/ — add a PDF file to run live upload tests")
|
|
return pdfs
|