From 3e1fcd69b5d5ceb42f912718bb45d7c1b6558853 Mon Sep 17 00:00:00 2001 From: curo1305 Date: Fri, 22 May 2026 09:16:21 +0200 Subject: [PATCH] feat(01-03): add full v1 ORM schema, async session factory, and DB dependency - backend/db/models.py: 11 SQLAlchemy 2.0 ORM models (User, Quota, RefreshToken, Folder, Document, Topic, DocumentTopic, Share, AuditLog, CloudConnection, Group) - Document.user_id declared nullable=True per D-03 (Phase 2 adds NOT NULL) - AuditLog.metadata_ uses mapped_column("metadata", JSONB) to avoid DeclarativeBase reserved-attribute conflict - Group table stub for D-02 (v2 feature, seeded per PROJECT.md) - Uses Optional[X] instead of X | None for Python < 3.10 compatibility - backend/db/session.py: async engine (pool_pre_ping=True, expire_on_commit=False) - backend/deps/db.py: async get_db() FastAPI dependency yielding AsyncSession --- backend/db/__init__.py | 0 backend/db/models.py | 307 +++++++++++++++++++++++++++++++++++++++ backend/db/session.py | 26 ++++ backend/deps/__init__.py | 0 backend/deps/db.py | 26 ++++ 5 files changed, 359 insertions(+) create mode 100644 backend/db/__init__.py create mode 100644 backend/db/models.py create mode 100644 backend/db/session.py create mode 100644 backend/deps/__init__.py create mode 100644 backend/deps/db.py diff --git a/backend/db/__init__.py b/backend/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/db/models.py b/backend/db/models.py new file mode 100644 index 0000000..6abd199 --- /dev/null +++ b/backend/db/models.py @@ -0,0 +1,307 @@ +""" +Full v1 SQLAlchemy 2.0 ORM schema for DocuVault. + +All 11 tables declared here: users, quotas, refresh_tokens, folders, documents, +topics, document_topics, shares, audit_log, cloud_connections, groups. + +Key decisions: + D-01: Full v1 skeleton in Phase 1 migration + D-02: groups table stub (v2 feature, seeded for schema completeness per PROJECT.md) + D-03: documents.user_id is nullable in Phase 1 (no auth yet); Phase 2 adds NOT NULL + +AuditLog note: The metadata column is declared as `metadata_` (ORM attribute name) +with `name="metadata"` (DB column name). This is required because `metadata` is a +reserved attribute on SQLAlchemy's DeclarativeBase and would cause silent conflicts +if used as an attribute name directly. + +Python compat note: `Optional[X]` is used instead of `X | None` union syntax +because the host environment may be Python < 3.10. Both are equivalent. +""" +from __future__ import annotations + +import uuid +from datetime import datetime +from typing import Optional +from sqlalchemy import ( + Boolean, + BigInteger, + ForeignKey, + Index, + String, + Text, + TIMESTAMP, + UniqueConstraint, + Integer, +) +from sqlalchemy.dialects.postgresql import UUID, INET, JSONB +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column +from sqlalchemy.sql import func + + +class Base(DeclarativeBase): + pass + + +class User(Base): + __tablename__ = "users" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + handle: Mapped[str] = mapped_column(String, unique=True, nullable=False) + email: Mapped[str] = mapped_column(String, unique=True, nullable=False) + password_hash: Mapped[str] = mapped_column(Text, nullable=False) + totp_secret: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + totp_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + role: Mapped[str] = mapped_column(String, nullable=False, default="user") + is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) + ai_provider: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + ai_model: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + default_storage_backend: Mapped[str] = mapped_column( + String, nullable=False, default="minio" + ) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + +class Quota(Base): + __tablename__ = "quotas" + + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + primary_key=True, + ) + # 100 MB default free-tier quota (STORE-01); admin can override limit_bytes per user + limit_bytes: Mapped[int] = mapped_column( + BigInteger, nullable=False, default=104857600 + ) + used_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0) + + +class RefreshToken(Base): + __tablename__ = "refresh_tokens" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ) + token_hash: Mapped[str] = mapped_column(Text, unique=True, nullable=False) + expires_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False + ) + revoked: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = (Index("ix_refresh_tokens_user_revoked", "user_id", "revoked"),) + + +class Folder(Base): + __tablename__ = "folders" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ) + parent_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("folders.id", ondelete="CASCADE"), + nullable=True, + ) + name: Mapped[str] = mapped_column(Text, nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = ( + UniqueConstraint("user_id", "parent_id", "name", name="uq_folders_user_parent_name"), + ) + + +class Document(Base): + __tablename__ = "documents" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + # D-03: user_id is NULLABLE in Phase 1 — no auth system yet. + # Phase 2 migration adds NOT NULL constraint after users/auth are live. + user_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=True, + ) + folder_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("folders.id", ondelete="SET NULL"), + nullable=True, + ) + # original human-readable filename — stored in DB only, never in the MinIO key + filename: Mapped[str] = mapped_column(Text, nullable=False) + # MinIO object key: {user_id}/{document_id}/{uuid4()}{ext} + object_key: Mapped[str] = mapped_column(Text, nullable=False) + content_type: Mapped[str] = mapped_column(Text, nullable=False) + size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0) + storage_backend: Mapped[str] = mapped_column(String, nullable=False, default="minio") + extracted_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + status: Mapped[str] = mapped_column(String, nullable=False, default="pending") + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + updated_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = ( + Index("ix_documents_user_folder", "user_id", "folder_id"), + Index("ix_documents_user_created", "user_id", "created_at"), + ) + + +class Topic(Base): + __tablename__ = "topics" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + user_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=True, + ) + name: Mapped[str] = mapped_column(Text, nullable=False) + description: Mapped[str] = mapped_column(Text, nullable=False, default="") + color: Mapped[str] = mapped_column(String(7), nullable=False, default="#6366f1") + + __table_args__ = (UniqueConstraint("user_id", "name", name="uq_topics_user_name"),) + + +class DocumentTopic(Base): + __tablename__ = "document_topics" + + document_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("documents.id", ondelete="CASCADE"), + primary_key=True, + ) + topic_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("topics.id", ondelete="CASCADE"), + primary_key=True, + ) + + +class Share(Base): + __tablename__ = "shares" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + document_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("documents.id", ondelete="CASCADE"), + nullable=False, + ) + owner_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ) + recipient_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ) + permission: Mapped[str] = mapped_column(String, nullable=False, default="view") + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = ( + UniqueConstraint("document_id", "recipient_id", name="uq_shares_document_recipient"), + Index("ix_shares_recipient", "recipient_id"), + ) + + +class AuditLog(Base): + __tablename__ = "audit_log" + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + user_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="SET NULL"), + nullable=True, + ) + actor_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="SET NULL"), + nullable=True, + ) + event_type: Mapped[str] = mapped_column(Text, nullable=False) + resource_id: Mapped[Optional[uuid.UUID]] = mapped_column( + UUID(as_uuid=True), nullable=True + ) + ip_address: Mapped[Optional[str]] = mapped_column(INET, nullable=True) + # ORM attribute is `metadata_` to avoid collision with DeclarativeBase.metadata. + # The DB column is named "metadata" via the mapped_column name= kwarg. + metadata_: Mapped[Optional[dict]] = mapped_column("metadata", JSONB, nullable=True) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = ( + Index("ix_audit_user_created", "user_id", "created_at"), + Index("ix_audit_event_created", "event_type", "created_at"), + ) + + +class CloudConnection(Base): + __tablename__ = "cloud_connections" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + user_id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + ForeignKey("users.id", ondelete="CASCADE"), + nullable=False, + ) + provider: Mapped[str] = mapped_column(String, nullable=False) + display_name: Mapped[str] = mapped_column(Text, nullable=False) + credentials_enc: Mapped[str] = mapped_column(Text, nullable=False) + status: Mapped[str] = mapped_column(String, nullable=False, default="ACTIVE") + connected_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) + + __table_args__ = (Index("ix_cloud_connections_user", "user_id"),) + + +class Group(Base): + """v2 stub — empty table, seeded for schema completeness (PROJECT.md D-02). + + Groups are a v2 feature; the table is created in Phase 1 so the schema is + complete and future migrations don't need to alter the dependency ordering. + No rows will be inserted until Phase 2 or later. + """ + + __tablename__ = "groups" + + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), primary_key=True, default=uuid.uuid4 + ) + name: Mapped[str] = mapped_column(Text, unique=True, nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP(timezone=True), nullable=False, server_default=func.now() + ) diff --git a/backend/db/session.py b/backend/db/session.py new file mode 100644 index 0000000..3b572f1 --- /dev/null +++ b/backend/db/session.py @@ -0,0 +1,26 @@ +""" +Async SQLAlchemy engine and session factory for DocuVault. + +The engine reads DATABASE_URL (restricted docuvault_app user — DML only). +The Alembic migration runner uses DATABASE_MIGRATE_URL (DDL user) separately. + +Key settings: + pool_pre_ping=True — detect stale connections before use + expire_on_commit=False — prevent MissingGreenlet errors after commit in + async context (RESEARCH.md Pitfall 1) +""" +from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession + +from config import settings + +engine = create_async_engine( + settings.database_url, # postgresql+psycopg://docuvault_app:...@postgres/docuvault + pool_pre_ping=True, # detect stale connections before use + echo=False, +) + +AsyncSessionLocal = async_sessionmaker( + engine, + class_=AsyncSession, + expire_on_commit=False, # prevent MissingGreenlet errors after commit (Pitfall 1) +) diff --git a/backend/deps/__init__.py b/backend/deps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/deps/db.py b/backend/deps/db.py new file mode 100644 index 0000000..7780111 --- /dev/null +++ b/backend/deps/db.py @@ -0,0 +1,26 @@ +""" +FastAPI dependency that yields an async SQLAlchemy session per request. + +Usage in route handlers: + from deps.db import get_db + from sqlalchemy.ext.asyncio import AsyncSession + from fastapi import Depends + + @router.get("/items") + async def list_items(session: AsyncSession = Depends(get_db)): + ... +""" +from typing import AsyncGenerator + +from sqlalchemy.ext.asyncio import AsyncSession + +from db.session import AsyncSessionLocal + + +async def get_db() -> AsyncGenerator[AsyncSession, None]: + """Yield a per-request AsyncSession; close it when the request is done.""" + async with AsyncSessionLocal() as session: + try: + yield session + finally: + await session.close()