""" Shared pytest fixtures for doc-service tests. Uses an in-memory SQLite database so tests run without a real PostgreSQL. The DATA_DIR is overridden to a tmp directory per test session. The AI provider is mocked so tests never hit a real endpoint. """ import io import struct import zlib from pathlib import Path from typing import AsyncGenerator from unittest.mock import AsyncMock, patch import pytest import pytest_asyncio from httpx import ASGITransport, AsyncClient from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine import app.models # noqa: F401 — registers all ORM classes from app.database import Base, get_db from app.main import app # ── Test database ────────────────────────────────────────────────────────────── TEST_DB_URL = "sqlite+aiosqlite:///:memory:" _engine = create_async_engine(TEST_DB_URL, connect_args={"check_same_thread": False}) _TestSessionLocal = async_sessionmaker(_engine, expire_on_commit=False) @pytest_asyncio.fixture(scope="session", autouse=True) async def create_tables(): """Create all tables once per test session.""" async with _engine.begin() as conn: await conn.run_sync(Base.metadata.create_all) yield async with _engine.begin() as conn: await conn.run_sync(Base.metadata.drop_all) @pytest_asyncio.fixture async def db_session() -> AsyncGenerator[AsyncSession, None]: """Provide a transactional test DB session that is rolled back after each test.""" async with _engine.begin() as conn: async with AsyncSession(conn) as session: yield session await conn.rollback() # ── Override get_db ──────────────────────────────────────────────────────────── async def _override_get_db(): async with _TestSessionLocal() as session: yield session # ── Mock AI provider ─────────────────────────────────────────────────────────── MOCK_AI_RESULT = { "document_type": "invoice", "total_amount": "99.00", "currency": "EUR", "vendor_name": "ACME Corp", "customer_name": "Test Customer", "billing_address": "1 Main St", "customer_address": "2 Other St", "invoice_number": "INV-001", "invoice_date": "2026-04-14", "due_date": "2026-05-14", "tags": ["invoice", "acme"], "line_items": [{"description": "Widget", "amount": "99.00"}], "suggested_categories": ["Suppliers", "Operating Expenses"], } @pytest.fixture def mock_ai_service(): """Patch classify_document to return MOCK_AI_RESULT without hitting ai-service.""" with patch( "app.services.ai_client.classify_document", new=AsyncMock(return_value=MOCK_AI_RESULT), ) as mock: yield mock # ── HTTP client ──────────────────────────────────────────────────────────────── TEST_USER_ID = "test-user-1" OTHER_USER_ID = "test-user-2" @pytest_asyncio.fixture async def client(tmp_path) -> AsyncGenerator[AsyncClient, None]: """ AsyncClient wired to the FastAPI app with: - get_db overridden to use test SQLite DB - DATA_DIR pointed to a tmp directory - Default X-User-Id header set to TEST_USER_ID """ app.dependency_overrides[get_db] = _override_get_db with patch("app.services.storage.settings") as mock_settings: mock_settings.DATA_DIR = str(tmp_path) async with AsyncClient( transport=ASGITransport(app=app), base_url="http://test", headers={"x-user-id": TEST_USER_ID}, ) as ac: yield ac app.dependency_overrides.clear() @pytest_asyncio.fixture async def other_client(tmp_path) -> AsyncGenerator[AsyncClient, None]: """Client acting as a different user — used to test ownership isolation.""" app.dependency_overrides[get_db] = _override_get_db with patch("app.services.storage.settings") as mock_settings: mock_settings.DATA_DIR = str(tmp_path) async with AsyncClient( transport=ASGITransport(app=app), base_url="http://test", headers={"x-user-id": OTHER_USER_ID}, ) as ac: yield ac app.dependency_overrides.clear() # ── Minimal PDF bytes ────────────────────────────────────────────────────────── def _make_minimal_pdf(text: str = "Test invoice. Total: EUR 99.00. Vendor: ACME Corp.") -> bytes: """ Build a minimal but valid single-page PDF that pdfplumber can open and extract text from. No external libraries needed — hand-crafted byte structure. """ # We embed the text as a PDF content stream using a built-in font. content_stream = ( f"BT /F1 12 Tf 50 750 Td ({text}) Tj ET" ).encode() compressed = zlib.compress(content_stream) objects: list[bytes] = [] def obj(n: int, body: bytes) -> bytes: return f"{n} 0 obj\n".encode() + body + b"\nendobj\n" # 1: Catalog objects.append(obj(1, b"<< /Type /Catalog /Pages 2 0 R >>")) # 2: Pages objects.append(obj(2, b"<< /Type /Pages /Kids [3 0 R] /Count 1 >>")) # 3: Page objects.append(obj(3, ( b"<< /Type /Page /Parent 2 0 R " b"/MediaBox [0 0 612 792] " b"/Contents 4 0 R " b"/Resources << /Font << /F1 5 0 R >> >> >>" ))) # 4: Content stream objects.append(obj(4, ( f"<< /Filter /FlateDecode /Length {len(compressed)} >>\n".encode() + b"stream\n" + compressed + b"\nendstream" ))) # 5: Font objects.append(obj(5, ( b"<< /Type /Font /Subtype /Type1 " b"/BaseFont /Helvetica " b"/Encoding /WinAnsiEncoding >>" ))) # Build xref header = b"%PDF-1.4\n" body = b"" offsets = [] for o in objects: offsets.append(len(header) + len(body)) body += o xref_offset = len(header) + len(body) xref = f"xref\n0 {len(objects) + 1}\n0000000000 65535 f \n".encode() for off in offsets: xref += f"{off:010d} 00000 n \n".encode() trailer = ( f"trailer\n<< /Size {len(objects) + 1} /Root 1 0 R >>\n" f"startxref\n{xref_offset}\n%%EOF\n" ).encode() return header + body + xref + trailer @pytest.fixture def minimal_pdf() -> bytes: return _make_minimal_pdf() @pytest.fixture def invoice_pdf() -> bytes: return _make_minimal_pdf( "Invoice INV-001. Date: 2026-04-14. Due: 2026-05-14. " "Vendor: ACME Corp, 1 Main St. Customer: Test Customer, 2 Other St. " "Widget x1: EUR 99.00. Total: EUR 99.00." ) # ── Real PDF fixture (optional) ──────────────────────────────────────────────── def _pdf_fixtures_dir() -> Path: return Path(__file__).parent / "pdfs" def pytest_collect_file(parent, file_path): """Not used — just a marker so pytest knows about the pdfs/ directory.""" return None @pytest.fixture def real_pdfs() -> list[Path]: """ Returns a list of PDF paths from tests/pdfs/. Tests that use this fixture are skipped if the directory is empty. Drop any PDF into features/doc-service/tests/pdfs/ to run live tests. """ pdfs = list(_pdf_fixtures_dir().glob("*.pdf")) if not pdfs: pytest.skip("No PDFs in tests/pdfs/ — add a PDF file to run live upload tests") return pdfs