feat(01-05): final cutover — delete data/, prune config.py, async-only tests

- Delete backend/data/ tracked files (D-04): flat-file metadata, settings.json,
  topics.json, and uploaded files removed from git; backend/data/ added to
  .gitignore (empty dir remains on macOS due to ACL — no tracked files remain)
- Prune backend/config.py: remove DATA_DIR, UPLOADS_DIR, METADATA_DIR,
  TOPICS_FILE, ensure_data_dirs(); rebase SETTINGS_FILE as derived path from
  settings.data_dir (Phase 1 flat-file settings kept per plan decision)
- Prune backend/tests/conftest.py: remove isolated_data_dir autouse fixture
  and sync TestClient client fixture; add SQLite type compatibility shim
  (visit_INET/JSONB) so in-memory db_session can create tables with
  PostgreSQL-specific column types; add live_services_available fixture
- Rewrite backend/tests/test_documents.py: delete all legacy sync tests,
  remove all @pytest.mark.xfail markers; async-only document tests now
  use async_client + storage service directly for topic wiring
- Rewrite backend/tests/test_health.py: delete legacy sync test_health(client);
  remove @pytest.mark.xfail from test_health_checks_postgres_and_minio
- Port backend/tests/test_topics.py to async_client (sync client removed)
- Port backend/tests/test_settings.py to async_client with monkeypatch for
  SETTINGS_FILE isolation (settings remain flat-file in Phase 1)
This commit is contained in:
curo1305
2026-05-22 09:53:39 +02:00
parent c1931fd566
commit 970c8e4e44
17 changed files with 327 additions and 13135 deletions
+142 -93
View File
@@ -1,61 +1,161 @@
"""
pytest configuration: isolate each test with a temporary data directory.
pytest configuration for DocuVault backend tests.
Async fixtures (db_session, async_client) are added for Phase 1 — sync fixtures remain until Plan 05 cuts over.
Plan 05 cutover: all sync flat-file fixtures (isolated_data_dir, sync client)
removed. Tests use async fixtures only.
Service availability detection:
- INTEGRATION=1 env var: assume live Docker services are available
- Default (no INTEGRATION): use in-memory SQLite + skip tests requiring real
PostgreSQL/MinIO/Redis
SQLite compatibility note:
The ORM models use PostgreSQL-specific types (UUID, INET, JSONB). SQLite does
not understand these. The db_session fixture patches them before creating
tables so the in-memory engine can build the schema successfully.
"""
from __future__ import annotations
import os
import json
import socket
import pytest
import pytest_asyncio
import tempfile
import shutil
from pathlib import Path
from fastapi.testclient import TestClient
from httpx import AsyncClient, ASGITransport
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
from httpx import ASGITransport, AsyncClient
from sqlalchemy import String, Text
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.pool import StaticPool
# ── Sync fixtures (legacy — retained until Plan 05 cuts over) ──────────────────
# ── Service availability ──────────────────────────────────────────────────────
@pytest.fixture(autouse=True)
def isolated_data_dir(monkeypatch, tmp_path):
"""Each test gets its own clean data directory."""
data_dir = tmp_path / "data"
(data_dir / "uploads").mkdir(parents=True)
(data_dir / "metadata").mkdir(parents=True)
(data_dir / "topics.json").write_text(json.dumps({"topics": []}))
from config import DEFAULT_SETTINGS
(data_dir / "settings.json").write_text(json.dumps(DEFAULT_SETTINGS))
monkeypatch.setenv("DATA_DIR", str(data_dir))
# Patch the module-level path constants so the running app sees the temp dir
import config
monkeypatch.setattr(config, "DATA_DIR", data_dir)
monkeypatch.setattr(config, "UPLOADS_DIR", data_dir / "uploads")
monkeypatch.setattr(config, "METADATA_DIR", data_dir / "metadata")
monkeypatch.setattr(config, "TOPICS_FILE", data_dir / "topics.json")
monkeypatch.setattr(config, "SETTINGS_FILE", data_dir / "settings.json")
# Plan 04: services.storage is now async (PostgreSQL + MinIO).
# The flat-file _topics_lock / _settings_lock attributes no longer exist.
# Only SETTINGS_FILE is still used by the sync load_settings/save_settings.
import services.storage as st
monkeypatch.setattr(st, "SETTINGS_FILE", data_dir / "settings.json")
yield data_dir
def _port_open(host: str, port: int, timeout: float = 1.0) -> bool:
"""Return True if the given TCP port is reachable."""
try:
with socket.create_connection((host, port), timeout=timeout):
return True
except OSError:
return False
@pytest.fixture
def client(isolated_data_dir):
@pytest.fixture(scope="session")
def live_services_available():
"""True when Docker Compose services are reachable (or INTEGRATION=1 is set)."""
if os.environ.get("INTEGRATION") == "1":
return True
return (
_port_open("localhost", 5432)
and _port_open("localhost", 9000)
and _port_open("localhost", 6379)
)
# ── Core async fixtures ───────────────────────────────────────────────────────
def _patch_pg_types_for_sqlite():
"""Patch PostgreSQL-specific column types so SQLite can create the schema.
SQLite does not know about INET, UUID (as_uuid=True), or JSONB. We
replace them with Text/String equivalents for the in-memory test engine.
This is done by monkey-patching the dialect-type mapping rather than
modifying the models.
"""
try:
from sqlalchemy.dialects.postgresql import UUID as PG_UUID, INET, JSONB
# Override compile methods so SQLite renders them as TEXT
for pg_type in (INET, JSONB):
pg_type.__class_getitem__ = classmethod(lambda cls, item: cls())
# Patch impl so SQLite uses String
if not hasattr(INET, "_sqlite_patched"):
INET.impl = String
INET._sqlite_patched = True
if not hasattr(JSONB, "_sqlite_patched"):
JSONB.impl = Text
JSONB._sqlite_patched = True
except Exception:
pass # If patching fails, the fixture will raise a CompileError naturally
@pytest_asyncio.fixture
async def db_session():
"""In-memory async SQLite session for unit tests.
PostgreSQL-specific column types are overridden to Text/String so that
Base.metadata.create_all works against the SQLite dialect.
"""
from sqlalchemy.dialects.sqlite.base import SQLiteTypeCompiler
from sqlalchemy.dialects.postgresql import INET, JSONB
from db.models import Base
# ── Type compatibility shims ──────────────────────────────────────────────
# PostgreSQL-specific types (INET, JSONB) are unknown to the SQLite dialect.
# Temporarily add visit_* methods that render them as TEXT so that
# Base.metadata.create_all can build the schema in SQLite.
_orig_visit_INET = getattr(SQLiteTypeCompiler, "visit_INET", None)
_orig_visit_JSONB = getattr(SQLiteTypeCompiler, "visit_JSONB", None)
def _visit_inet(self, type_, **kw):
return "TEXT"
def _visit_jsonb(self, type_, **kw):
return "TEXT"
SQLiteTypeCompiler.visit_INET = _visit_inet # type: ignore[attr-defined]
SQLiteTypeCompiler.visit_JSONB = _visit_jsonb # type: ignore[attr-defined]
# UUID(as_uuid=True) renders as CHAR(32) in SQLite — already handled by
# SQLAlchemy's built-in UUID type mapping — no patch needed.
engine = create_async_engine(
"sqlite+aiosqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
try:
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
AsyncTestSession = async_sessionmaker(engine, expire_on_commit=False)
async with AsyncTestSession() as session:
yield session
finally:
await engine.dispose()
# Restore compiler methods to leave no side effects on other tests
if _orig_visit_INET is not None:
SQLiteTypeCompiler.visit_INET = _orig_visit_INET # type: ignore
else:
try:
del SQLiteTypeCompiler.visit_INET # type: ignore
except AttributeError:
pass
if _orig_visit_JSONB is not None:
SQLiteTypeCompiler.visit_JSONB = _orig_visit_JSONB # type: ignore
else:
try:
del SQLiteTypeCompiler.visit_JSONB # type: ignore
except AttributeError:
pass
@pytest_asyncio.fixture
async def async_client(db_session: AsyncSession):
"""Async HTTP test client with the DB dependency overridden to use in-memory SQLite."""
from deps.db import get_db
from main import app
with TestClient(app) as c:
app.dependency_overrides[get_db] = lambda: db_session
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as c:
yield c
app.dependency_overrides.clear()
# ── File fixtures ─────────────────────────────────────────────────────────────
@pytest.fixture
def sample_txt(tmp_path):
@@ -68,6 +168,7 @@ def sample_txt(tmp_path):
def sample_pdf(tmp_path):
"""Create a minimal valid PDF for testing."""
import fitz
doc = fitz.open()
page = doc.new_page()
page.insert_text((50, 50), "Test PDF document about contracts and legal matters.")
@@ -75,55 +176,3 @@ def sample_pdf(tmp_path):
doc.save(str(pdf_path))
doc.close()
return pdf_path
# ── Async fixtures (Phase 1 additions — Plan 03+ tests use these) ──────────────
@pytest_asyncio.fixture
async def db_session():
"""In-memory async SQLite session for unit tests.
Tries to import db.models.Base (available after Plan 03). If the module
does not yet exist the fixture skips the test gracefully so the suite
stays green during Wave 1.
"""
engine = create_async_engine(
"sqlite+aiosqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
try:
from db.models import Base
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
except ImportError:
await engine.dispose()
pytest.skip("db.models not yet implemented — plan 03")
AsyncTestSession = async_sessionmaker(engine, expire_on_commit=False)
async with AsyncTestSession() as session:
yield session
await engine.dispose()
@pytest_asyncio.fixture
async def async_client(db_session):
"""Async HTTP test client with DB dependency overridden.
Tries to import deps.db.get_db (available after Plan 03). If the module
does not yet exist the fixture skips the test gracefully.
"""
try:
from deps.db import get_db
from main import app
except ImportError as exc:
pytest.skip(f"deps.db.get_db not yet implemented — plan 03: {exc}")
app.dependency_overrides[get_db] = lambda: db_session
async with AsyncClient(transport=ASGITransport(app=app), base_url="http://test") as c:
yield c
app.dependency_overrides.clear()