feat(01-03): add full v1 ORM schema, async session factory, and DB dependency
- backend/db/models.py: 11 SQLAlchemy 2.0 ORM models (User, Quota, RefreshToken,
Folder, Document, Topic, DocumentTopic, Share, AuditLog, CloudConnection, Group)
- Document.user_id declared nullable=True per D-03 (Phase 2 adds NOT NULL)
- AuditLog.metadata_ uses mapped_column("metadata", JSONB) to avoid DeclarativeBase
reserved-attribute conflict
- Group table stub for D-02 (v2 feature, seeded per PROJECT.md)
- Uses Optional[X] instead of X | None for Python < 3.10 compatibility
- backend/db/session.py: async engine (pool_pre_ping=True, expire_on_commit=False)
- backend/deps/db.py: async get_db() FastAPI dependency yielding AsyncSession
This commit is contained in:
@@ -0,0 +1,307 @@
|
||||
"""
|
||||
Full v1 SQLAlchemy 2.0 ORM schema for DocuVault.
|
||||
|
||||
All 11 tables declared here: users, quotas, refresh_tokens, folders, documents,
|
||||
topics, document_topics, shares, audit_log, cloud_connections, groups.
|
||||
|
||||
Key decisions:
|
||||
D-01: Full v1 skeleton in Phase 1 migration
|
||||
D-02: groups table stub (v2 feature, seeded for schema completeness per PROJECT.md)
|
||||
D-03: documents.user_id is nullable in Phase 1 (no auth yet); Phase 2 adds NOT NULL
|
||||
|
||||
AuditLog note: The metadata column is declared as `metadata_` (ORM attribute name)
|
||||
with `name="metadata"` (DB column name). This is required because `metadata` is a
|
||||
reserved attribute on SQLAlchemy's DeclarativeBase and would cause silent conflicts
|
||||
if used as an attribute name directly.
|
||||
|
||||
Python compat note: `Optional[X]` is used instead of `X | None` union syntax
|
||||
because the host environment may be Python < 3.10. Both are equivalent.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
BigInteger,
|
||||
ForeignKey,
|
||||
Index,
|
||||
String,
|
||||
Text,
|
||||
TIMESTAMP,
|
||||
UniqueConstraint,
|
||||
Integer,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import UUID, INET, JSONB
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class User(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
handle: Mapped[str] = mapped_column(String, unique=True, nullable=False)
|
||||
email: Mapped[str] = mapped_column(String, unique=True, nullable=False)
|
||||
password_hash: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
totp_secret: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
totp_enabled: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
role: Mapped[str] = mapped_column(String, nullable=False, default="user")
|
||||
is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
|
||||
ai_provider: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
ai_model: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
default_storage_backend: Mapped[str] = mapped_column(
|
||||
String, nullable=False, default="minio"
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
|
||||
class Quota(Base):
|
||||
__tablename__ = "quotas"
|
||||
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
# 100 MB default free-tier quota (STORE-01); admin can override limit_bytes per user
|
||||
limit_bytes: Mapped[int] = mapped_column(
|
||||
BigInteger, nullable=False, default=104857600
|
||||
)
|
||||
used_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
|
||||
|
||||
|
||||
class RefreshToken(Base):
|
||||
__tablename__ = "refresh_tokens"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
token_hash: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
|
||||
expires_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False
|
||||
)
|
||||
revoked: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (Index("ix_refresh_tokens_user_revoked", "user_id", "revoked"),)
|
||||
|
||||
|
||||
class Folder(Base):
|
||||
__tablename__ = "folders"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
parent_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("folders.id", ondelete="CASCADE"),
|
||||
nullable=True,
|
||||
)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("user_id", "parent_id", "name", name="uq_folders_user_parent_name"),
|
||||
)
|
||||
|
||||
|
||||
class Document(Base):
|
||||
__tablename__ = "documents"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
# D-03: user_id is NULLABLE in Phase 1 — no auth system yet.
|
||||
# Phase 2 migration adds NOT NULL constraint after users/auth are live.
|
||||
user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=True,
|
||||
)
|
||||
folder_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("folders.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
# original human-readable filename — stored in DB only, never in the MinIO key
|
||||
filename: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
# MinIO object key: {user_id}/{document_id}/{uuid4()}{ext}
|
||||
object_key: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
content_type: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
size_bytes: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
|
||||
storage_backend: Mapped[str] = mapped_column(String, nullable=False, default="minio")
|
||||
extracted_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
|
||||
status: Mapped[str] = mapped_column(String, nullable=False, default="pending")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
updated_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_documents_user_folder", "user_id", "folder_id"),
|
||||
Index("ix_documents_user_created", "user_id", "created_at"),
|
||||
)
|
||||
|
||||
|
||||
class Topic(Base):
|
||||
__tablename__ = "topics"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=True,
|
||||
)
|
||||
name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
description: Mapped[str] = mapped_column(Text, nullable=False, default="")
|
||||
color: Mapped[str] = mapped_column(String(7), nullable=False, default="#6366f1")
|
||||
|
||||
__table_args__ = (UniqueConstraint("user_id", "name", name="uq_topics_user_name"),)
|
||||
|
||||
|
||||
class DocumentTopic(Base):
|
||||
__tablename__ = "document_topics"
|
||||
|
||||
document_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("documents.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
topic_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("topics.id", ondelete="CASCADE"),
|
||||
primary_key=True,
|
||||
)
|
||||
|
||||
|
||||
class Share(Base):
|
||||
__tablename__ = "shares"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
document_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("documents.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
owner_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
recipient_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
permission: Mapped[str] = mapped_column(String, nullable=False, default="view")
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("document_id", "recipient_id", name="uq_shares_document_recipient"),
|
||||
Index("ix_shares_recipient", "recipient_id"),
|
||||
)
|
||||
|
||||
|
||||
class AuditLog(Base):
|
||||
__tablename__ = "audit_log"
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
user_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
actor_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
)
|
||||
event_type: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
resource_id: Mapped[Optional[uuid.UUID]] = mapped_column(
|
||||
UUID(as_uuid=True), nullable=True
|
||||
)
|
||||
ip_address: Mapped[Optional[str]] = mapped_column(INET, nullable=True)
|
||||
# ORM attribute is `metadata_` to avoid collision with DeclarativeBase.metadata.
|
||||
# The DB column is named "metadata" via the mapped_column name= kwarg.
|
||||
metadata_: Mapped[Optional[dict]] = mapped_column("metadata", JSONB, nullable=True)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_audit_user_created", "user_id", "created_at"),
|
||||
Index("ix_audit_event_created", "event_type", "created_at"),
|
||||
)
|
||||
|
||||
|
||||
class CloudConnection(Base):
|
||||
__tablename__ = "cloud_connections"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
user_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True),
|
||||
ForeignKey("users.id", ondelete="CASCADE"),
|
||||
nullable=False,
|
||||
)
|
||||
provider: Mapped[str] = mapped_column(String, nullable=False)
|
||||
display_name: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
credentials_enc: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
status: Mapped[str] = mapped_column(String, nullable=False, default="ACTIVE")
|
||||
connected_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
__table_args__ = (Index("ix_cloud_connections_user", "user_id"),)
|
||||
|
||||
|
||||
class Group(Base):
|
||||
"""v2 stub — empty table, seeded for schema completeness (PROJECT.md D-02).
|
||||
|
||||
Groups are a v2 feature; the table is created in Phase 1 so the schema is
|
||||
complete and future migrations don't need to alter the dependency ordering.
|
||||
No rows will be inserted until Phase 2 or later.
|
||||
"""
|
||||
|
||||
__tablename__ = "groups"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
|
||||
)
|
||||
name: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Async SQLAlchemy engine and session factory for DocuVault.
|
||||
|
||||
The engine reads DATABASE_URL (restricted docuvault_app user — DML only).
|
||||
The Alembic migration runner uses DATABASE_MIGRATE_URL (DDL user) separately.
|
||||
|
||||
Key settings:
|
||||
pool_pre_ping=True — detect stale connections before use
|
||||
expire_on_commit=False — prevent MissingGreenlet errors after commit in
|
||||
async context (RESEARCH.md Pitfall 1)
|
||||
"""
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
|
||||
|
||||
from config import settings
|
||||
|
||||
engine = create_async_engine(
|
||||
settings.database_url, # postgresql+psycopg://docuvault_app:...@postgres/docuvault
|
||||
pool_pre_ping=True, # detect stale connections before use
|
||||
echo=False,
|
||||
)
|
||||
|
||||
AsyncSessionLocal = async_sessionmaker(
|
||||
engine,
|
||||
class_=AsyncSession,
|
||||
expire_on_commit=False, # prevent MissingGreenlet errors after commit (Pitfall 1)
|
||||
)
|
||||
Reference in New Issue
Block a user