feat(01-03): scaffold Alembic async config and author 0001_initial_schema migration

- backend/alembic.ini: script_location=migrations, sqlalchemy.url=%(DATABASE_MIGRATE_URL)s
- backend/migrations/env.py: async_engine_from_config + Base.metadata wiring;
  runtime os.environ.get("DATABASE_MIGRATE_URL") injection (alembic.ini interpolation
  does not read OS env directly)
- backend/migrations/versions/0001_initial_schema.py: creates all 11 tables in
  dependency order with correct FKs, indexes, and named constraints
- documents.user_id is nullable=True per D-03; Phase 2 adds NOT NULL
- Ends with GRANT + ALTER DEFAULT PRIVILEGES for docuvault_app (Pitfall 4)
- Also grants USAGE/SELECT on sequences (audit_log.id autoincrement)
- downgrade() drops all tables in reverse dependency order
This commit is contained in:
curo1305
2026-05-22 09:20:49 +02:00
parent 3e1fcd69b5
commit 75ea7ef106
5 changed files with 536 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
Generic single-database configuration with an async dbapi.
+103
View File
@@ -0,0 +1,103 @@
import asyncio
import os
from logging.config import fileConfig
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from alembic import context
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Inject the runtime DSN from the OS environment.
# %(DATABASE_MIGRATE_URL)s interpolation in alembic.ini only works when the variable
# is defined in the [alembic] section — it does NOT read from the OS environment.
# We override sqlalchemy.url here so that the env var is honoured at runtime.
config.set_main_option(
"sqlalchemy.url",
os.environ.get(
"DATABASE_MIGRATE_URL",
config.get_main_option("sqlalchemy.url") or "",
),
)
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Import all models so that Base.metadata is populated for autogenerate support.
# This MUST happen before target_metadata is assigned — Alembic won't see tables
# that haven't been imported (RESEARCH.md Pitfall 2).
from db.models import Base # noqa: F401 — must import to register all models (Pitfall 2)
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+28
View File
@@ -0,0 +1,28 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}
@@ -0,0 +1,257 @@
"""Initial v1 schema — all 11 DocuVault tables.
Revision ID: 0001
Revises:
Create Date: 2026-05-22
Creates tables in dependency order:
1. users (no FK dependencies)
2. groups (no FK dependencies — D-02 stub)
3. quotas (FK -> users)
4. refresh_tokens (FK -> users)
5. folders (FK -> users, self-referential FK -> folders)
6. topics (FK -> users)
7. documents (FK -> users [nullable, D-03], FK -> folders)
8. document_topics (FK -> documents, topics)
9. shares (FK -> documents, users x2)
10. audit_log (FK -> users x2)
11. cloud_connections (FK -> users)
Pitfall 4 note: ALTER DEFAULT PRIVILEGES is required so future migrations
(applied by docuvault_migrate) automatically grant access to docuvault_app.
The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql
with CONNECT but no table privileges — this migration grants them.
"""
from __future__ import annotations
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
# revision identifiers, used by Alembic.
revision = "0001"
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
# ── 1. users ──────────────────────────────────────────────────────────────
op.create_table("users",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("handle", sa.String(), nullable=False),
sa.Column("email", sa.String(), nullable=False),
sa.Column("password_hash", sa.Text(), nullable=False),
sa.Column("totp_secret", sa.Text(), nullable=True),
sa.Column("totp_enabled", sa.Boolean(), nullable=False, server_default="false"),
sa.Column("role", sa.String(), nullable=False, server_default="user"),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"),
sa.Column("ai_provider", sa.Text(), nullable=True),
sa.Column("ai_model", sa.Text(), nullable=True),
sa.Column("default_storage_backend", sa.String(), nullable=False, server_default="minio"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("handle"),
sa.UniqueConstraint("email"),
)
# ── 2. groups (D-02 stub — v2 feature) ───────────────────────────────────
op.create_table("groups",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("name"),
)
# ── 3. quotas ─────────────────────────────────────────────────────────────
op.create_table("quotas",
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
# 100 MB default free-tier quota
sa.Column("limit_bytes", sa.BigInteger(), nullable=False, server_default="104857600"),
sa.Column("used_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("user_id"),
)
# ── 4. refresh_tokens ─────────────────────────────────────────────────────
op.create_table("refresh_tokens",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("token_hash", sa.Text(), nullable=False),
sa.Column("expires_at", sa.TIMESTAMP(timezone=True), nullable=False),
sa.Column("revoked", sa.Boolean(), nullable=False, server_default="false"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("token_hash"),
)
op.create_index("ix_refresh_tokens_user_revoked", "refresh_tokens", ["user_id", "revoked"])
# ── 5. folders ────────────────────────────────────────────────────────────
op.create_table("folders",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("parent_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["parent_id"], ["folders.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", "parent_id", "name", name="uq_folders_user_parent_name"),
)
# ── 6. topics ─────────────────────────────────────────────────────────────
op.create_table("topics",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("description", sa.Text(), nullable=False, server_default=""),
sa.Column("color", sa.String(7), nullable=False, server_default="#6366f1"),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", "name", name="uq_topics_user_name"),
)
# ── 7. documents ──────────────────────────────────────────────────────────
op.create_table("documents",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
# D-03: user_id is nullable in Phase 1 — no auth yet.
# Phase 2 migration adds NOT NULL constraint.
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("folder_id", postgresql.UUID(as_uuid=True), nullable=True),
# original human-readable filename — stored in DB only, never in the MinIO key
sa.Column("filename", sa.Text(), nullable=False),
# MinIO object key: {user_id}/{document_id}/{uuid4()}{ext}
sa.Column("object_key", sa.Text(), nullable=False),
sa.Column("content_type", sa.Text(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column("storage_backend", sa.String(), nullable=False, server_default="minio"),
sa.Column("extracted_text", sa.Text(), nullable=True),
sa.Column("status", sa.String(), nullable=False, server_default="pending"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["folder_id"], ["folders.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_documents_user_folder", "documents", ["user_id", "folder_id"])
op.create_index("ix_documents_user_created", "documents", ["user_id", "created_at"])
# ── 8. document_topics ────────────────────────────────────────────────────
op.create_table("document_topics",
sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("topic_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["topic_id"], ["topics.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("document_id", "topic_id"),
)
# ── 9. shares ─────────────────────────────────────────────────────────────
op.create_table("shares",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("recipient_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("permission", sa.String(), nullable=False, server_default="view"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["recipient_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("document_id", "recipient_id", name="uq_shares_document_recipient"),
)
op.create_index("ix_shares_recipient", "shares", ["recipient_id"])
# ── 10. audit_log ─────────────────────────────────────────────────────────
op.create_table("audit_log",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("actor_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("event_type", sa.Text(), nullable=False),
sa.Column("resource_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("ip_address", sa.dialects.postgresql.INET(), nullable=True),
# DB column name is "metadata"; ORM uses "metadata_" to avoid reserved-attr conflict
sa.Column("metadata", sa.dialects.postgresql.JSONB(), nullable=True),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="SET NULL"),
sa.ForeignKeyConstraint(["actor_id"], ["users.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_audit_user_created", "audit_log", ["user_id", "created_at"])
op.create_index("ix_audit_event_created", "audit_log", ["event_type", "created_at"])
# ── 11. cloud_connections ─────────────────────────────────────────────────
op.create_table("cloud_connections",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("provider", sa.String(), nullable=False),
sa.Column("display_name", sa.Text(), nullable=False),
sa.Column("credentials_enc", sa.Text(), nullable=False),
sa.Column("status", sa.String(), nullable=False, server_default="ACTIVE"),
sa.Column("connected_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_cloud_connections_user", "cloud_connections", ["user_id"])
# ── Privilege grants ───────────────────────────────────────────────────────
# Pitfall 4: ALTER DEFAULT PRIVILEGES is required so future migrations
# (applied by docuvault_migrate user) automatically grant DML access to
# docuvault_app on any tables created afterward.
#
# The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql
# with CONNECT privilege only — no table-level access. These two grants establish:
# (a) immediate access to all tables/sequences created by THIS migration
# (b) automatic access to all tables/sequences created by FUTURE migrations
# run by the same docuvault_migrate user
op.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO docuvault_app;")
op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO docuvault_app;")
# Sequences grant is required because audit_log.id uses a sequence (autoincrement).
# docuvault_app must be able to call nextval() on any sequence.
op.execute("GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO docuvault_app;")
op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO docuvault_app;")
def downgrade() -> None:
# Drop indexes first, then tables in reverse dependency order.
# cloud_connections
op.drop_index("ix_cloud_connections_user", table_name="cloud_connections")
op.drop_table("cloud_connections")
# audit_log
op.drop_index("ix_audit_event_created", table_name="audit_log")
op.drop_index("ix_audit_user_created", table_name="audit_log")
op.drop_table("audit_log")
# shares
op.drop_index("ix_shares_recipient", table_name="shares")
op.drop_table("shares")
# document_topics
op.drop_table("document_topics")
# documents
op.drop_index("ix_documents_user_created", table_name="documents")
op.drop_index("ix_documents_user_folder", table_name="documents")
op.drop_table("documents")
# topics
op.drop_table("topics")
# folders
op.drop_table("folders")
# refresh_tokens
op.drop_index("ix_refresh_tokens_user_revoked", table_name="refresh_tokens")
op.drop_table("refresh_tokens")
# quotas
op.drop_table("quotas")
# groups (D-02 stub)
op.drop_table("groups")
# users (last — all other tables depend on it)
op.drop_table("users")