feat(01-03): scaffold Alembic async config and author 0001_initial_schema migration

- backend/alembic.ini: script_location=migrations, sqlalchemy.url=%(DATABASE_MIGRATE_URL)s
- backend/migrations/env.py: async_engine_from_config + Base.metadata wiring;
  runtime os.environ.get("DATABASE_MIGRATE_URL") injection (alembic.ini interpolation
  does not read OS env directly)
- backend/migrations/versions/0001_initial_schema.py: creates all 11 tables in
  dependency order with correct FKs, indexes, and named constraints
- documents.user_id is nullable=True per D-03; Phase 2 adds NOT NULL
- Ends with GRANT + ALTER DEFAULT PRIVILEGES for docuvault_app (Pitfall 4)
- Also grants USAGE/SELECT on sequences (audit_log.id autoincrement)
- downgrade() drops all tables in reverse dependency order
This commit is contained in:
curo1305
2026-05-22 09:20:49 +02:00
parent 3e1fcd69b5
commit 75ea7ef106
5 changed files with 536 additions and 0 deletions
+147
View File
@@ -0,0 +1,147 @@
# A generic, single database configuration.
[alembic]
# path to migration scripts.
# this is typically a path given in POSIX (e.g. forward slashes)
# format, relative to the token %(here)s which refers to the location of this
# ini file
script_location = migrations
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
# Uncomment the line below if you want the files to be prepended with date and time
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
# for all available tokens
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
# sys.path path, will be prepended to sys.path if present.
# defaults to the current working directory. for multiple paths, the path separator
# is defined by "path_separator" below.
prepend_sys_path = .
# timezone to use when rendering the date within the migration file
# as well as the filename.
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
# string value is passed to ZoneInfo()
# leave blank for localtime
# timezone =
# max length of characters to apply to the "slug" field
# truncate_slug_length = 40
# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false
# set to 'true' to allow .pyc and .pyo files without
# a source .py file to be detected as revisions in the
# versions/ directory
# sourceless = false
# version location specification; This defaults
# to <script_location>/versions. When using multiple version
# directories, initial revisions must be specified with --version-path.
# The path separator used here should be the separator specified by "path_separator"
# below.
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
# path_separator; This indicates what character is used to split lists of file
# paths, including version_locations and prepend_sys_path within configparser
# files such as alembic.ini.
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
# to provide os-dependent path splitting.
#
# Note that in order to support legacy alembic.ini files, this default does NOT
# take place if path_separator is not present in alembic.ini. If this
# option is omitted entirely, fallback logic is as follows:
#
# 1. Parsing of the version_locations option falls back to using the legacy
# "version_path_separator" key, which if absent then falls back to the legacy
# behavior of splitting on spaces and/or commas.
# 2. Parsing of the prepend_sys_path option falls back to the legacy
# behavior of splitting on spaces, commas, or colons.
#
# Valid values for path_separator are:
#
# path_separator = :
# path_separator = ;
# path_separator = space
# path_separator = newline
#
# Use os.pathsep. Default configuration used for new projects.
path_separator = os
# set to 'true' to search source files recursively
# in each "version_locations" directory
# new in Alembic version 1.10
# recursive_version_locations = false
# the output encoding used when revision files
# are written from script.py.mako
# output_encoding = utf-8
# database URL. This is consumed by the user-maintained env.py script only.
# other means of configuring database URLs may be customized within the env.py
# file.
sqlalchemy.url = %(DATABASE_MIGRATE_URL)s
[post_write_hooks]
# post_write_hooks defines scripts or Python functions that are run
# on newly generated revision scripts. See the documentation for further
# detail and examples
# format using "black" - use the console_scripts runner, against the "black" entrypoint
# hooks = black
# black.type = console_scripts
# black.entrypoint = black
# black.options = -l 79 REVISION_SCRIPT_FILENAME
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
# hooks = ruff
# ruff.type = module
# ruff.module = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Alternatively, use the exec runner to execute a binary found on your PATH
# hooks = ruff
# ruff.type = exec
# ruff.executable = ruff
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
# Logging configuration. This is also consumed by the user-maintained
# env.py script only.
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+1
View File
@@ -0,0 +1 @@
Generic single-database configuration with an async dbapi.
+103
View File
@@ -0,0 +1,103 @@
import asyncio
import os
from logging.config import fileConfig
from sqlalchemy import pool
from sqlalchemy.engine import Connection
from sqlalchemy.ext.asyncio import async_engine_from_config
from alembic import context
# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config
# Inject the runtime DSN from the OS environment.
# %(DATABASE_MIGRATE_URL)s interpolation in alembic.ini only works when the variable
# is defined in the [alembic] section — it does NOT read from the OS environment.
# We override sqlalchemy.url here so that the env var is honoured at runtime.
config.set_main_option(
"sqlalchemy.url",
os.environ.get(
"DATABASE_MIGRATE_URL",
config.get_main_option("sqlalchemy.url") or "",
),
)
# Interpret the config file for Python logging.
# This line sets up loggers basically.
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# Import all models so that Base.metadata is populated for autogenerate support.
# This MUST happen before target_metadata is assigned — Alembic won't see tables
# that haven't been imported (RESEARCH.md Pitfall 2).
from db.models import Base # noqa: F401 — must import to register all models (Pitfall 2)
target_metadata = Base.metadata
# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection: Connection) -> None:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
"""In this scenario we need to create an Engine
and associate a connection with the context.
"""
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+28
View File
@@ -0,0 +1,28 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
"""Upgrade schema."""
${upgrades if upgrades else "pass"}
def downgrade() -> None:
"""Downgrade schema."""
${downgrades if downgrades else "pass"}
@@ -0,0 +1,257 @@
"""Initial v1 schema — all 11 DocuVault tables.
Revision ID: 0001
Revises:
Create Date: 2026-05-22
Creates tables in dependency order:
1. users (no FK dependencies)
2. groups (no FK dependencies — D-02 stub)
3. quotas (FK -> users)
4. refresh_tokens (FK -> users)
5. folders (FK -> users, self-referential FK -> folders)
6. topics (FK -> users)
7. documents (FK -> users [nullable, D-03], FK -> folders)
8. document_topics (FK -> documents, topics)
9. shares (FK -> documents, users x2)
10. audit_log (FK -> users x2)
11. cloud_connections (FK -> users)
Pitfall 4 note: ALTER DEFAULT PRIVILEGES is required so future migrations
(applied by docuvault_migrate) automatically grant access to docuvault_app.
The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql
with CONNECT but no table privileges — this migration grants them.
"""
from __future__ import annotations
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from alembic import op
# revision identifiers, used by Alembic.
revision = "0001"
down_revision = None
branch_labels = None
depends_on = None
def upgrade() -> None:
# ── 1. users ──────────────────────────────────────────────────────────────
op.create_table("users",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("handle", sa.String(), nullable=False),
sa.Column("email", sa.String(), nullable=False),
sa.Column("password_hash", sa.Text(), nullable=False),
sa.Column("totp_secret", sa.Text(), nullable=True),
sa.Column("totp_enabled", sa.Boolean(), nullable=False, server_default="false"),
sa.Column("role", sa.String(), nullable=False, server_default="user"),
sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"),
sa.Column("ai_provider", sa.Text(), nullable=True),
sa.Column("ai_model", sa.Text(), nullable=True),
sa.Column("default_storage_backend", sa.String(), nullable=False, server_default="minio"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("handle"),
sa.UniqueConstraint("email"),
)
# ── 2. groups (D-02 stub — v2 feature) ───────────────────────────────────
op.create_table("groups",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("name"),
)
# ── 3. quotas ─────────────────────────────────────────────────────────────
op.create_table("quotas",
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
# 100 MB default free-tier quota
sa.Column("limit_bytes", sa.BigInteger(), nullable=False, server_default="104857600"),
sa.Column("used_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("user_id"),
)
# ── 4. refresh_tokens ─────────────────────────────────────────────────────
op.create_table("refresh_tokens",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("token_hash", sa.Text(), nullable=False),
sa.Column("expires_at", sa.TIMESTAMP(timezone=True), nullable=False),
sa.Column("revoked", sa.Boolean(), nullable=False, server_default="false"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("token_hash"),
)
op.create_index("ix_refresh_tokens_user_revoked", "refresh_tokens", ["user_id", "revoked"])
# ── 5. folders ────────────────────────────────────────────────────────────
op.create_table("folders",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("parent_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["parent_id"], ["folders.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", "parent_id", "name", name="uq_folders_user_parent_name"),
)
# ── 6. topics ─────────────────────────────────────────────────────────────
op.create_table("topics",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("name", sa.Text(), nullable=False),
sa.Column("description", sa.Text(), nullable=False, server_default=""),
sa.Column("color", sa.String(7), nullable=False, server_default="#6366f1"),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("user_id", "name", name="uq_topics_user_name"),
)
# ── 7. documents ──────────────────────────────────────────────────────────
op.create_table("documents",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
# D-03: user_id is nullable in Phase 1 — no auth yet.
# Phase 2 migration adds NOT NULL constraint.
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("folder_id", postgresql.UUID(as_uuid=True), nullable=True),
# original human-readable filename — stored in DB only, never in the MinIO key
sa.Column("filename", sa.Text(), nullable=False),
# MinIO object key: {user_id}/{document_id}/{uuid4()}{ext}
sa.Column("object_key", sa.Text(), nullable=False),
sa.Column("content_type", sa.Text(), nullable=False),
sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"),
sa.Column("storage_backend", sa.String(), nullable=False, server_default="minio"),
sa.Column("extracted_text", sa.Text(), nullable=True),
sa.Column("status", sa.String(), nullable=False, server_default="pending"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["folder_id"], ["folders.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_documents_user_folder", "documents", ["user_id", "folder_id"])
op.create_index("ix_documents_user_created", "documents", ["user_id", "created_at"])
# ── 8. document_topics ────────────────────────────────────────────────────
op.create_table("document_topics",
sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("topic_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["topic_id"], ["topics.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("document_id", "topic_id"),
)
# ── 9. shares ─────────────────────────────────────────────────────────────
op.create_table("shares",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("recipient_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("permission", sa.String(), nullable=False, server_default="view"),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="CASCADE"),
sa.ForeignKeyConstraint(["recipient_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
sa.UniqueConstraint("document_id", "recipient_id", name="uq_shares_document_recipient"),
)
op.create_index("ix_shares_recipient", "shares", ["recipient_id"])
# ── 10. audit_log ─────────────────────────────────────────────────────────
op.create_table("audit_log",
sa.Column("id", sa.Integer(), autoincrement=True, nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("actor_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("event_type", sa.Text(), nullable=False),
sa.Column("resource_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("ip_address", sa.dialects.postgresql.INET(), nullable=True),
# DB column name is "metadata"; ORM uses "metadata_" to avoid reserved-attr conflict
sa.Column("metadata", sa.dialects.postgresql.JSONB(), nullable=True),
sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="SET NULL"),
sa.ForeignKeyConstraint(["actor_id"], ["users.id"], ondelete="SET NULL"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_audit_user_created", "audit_log", ["user_id", "created_at"])
op.create_index("ix_audit_event_created", "audit_log", ["event_type", "created_at"])
# ── 11. cloud_connections ─────────────────────────────────────────────────
op.create_table("cloud_connections",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("provider", sa.String(), nullable=False),
sa.Column("display_name", sa.Text(), nullable=False),
sa.Column("credentials_enc", sa.Text(), nullable=False),
sa.Column("status", sa.String(), nullable=False, server_default="ACTIVE"),
sa.Column("connected_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")),
sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_cloud_connections_user", "cloud_connections", ["user_id"])
# ── Privilege grants ───────────────────────────────────────────────────────
# Pitfall 4: ALTER DEFAULT PRIVILEGES is required so future migrations
# (applied by docuvault_migrate user) automatically grant DML access to
# docuvault_app on any tables created afterward.
#
# The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql
# with CONNECT privilege only — no table-level access. These two grants establish:
# (a) immediate access to all tables/sequences created by THIS migration
# (b) automatic access to all tables/sequences created by FUTURE migrations
# run by the same docuvault_migrate user
op.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO docuvault_app;")
op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO docuvault_app;")
# Sequences grant is required because audit_log.id uses a sequence (autoincrement).
# docuvault_app must be able to call nextval() on any sequence.
op.execute("GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO docuvault_app;")
op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO docuvault_app;")
def downgrade() -> None:
# Drop indexes first, then tables in reverse dependency order.
# cloud_connections
op.drop_index("ix_cloud_connections_user", table_name="cloud_connections")
op.drop_table("cloud_connections")
# audit_log
op.drop_index("ix_audit_event_created", table_name="audit_log")
op.drop_index("ix_audit_user_created", table_name="audit_log")
op.drop_table("audit_log")
# shares
op.drop_index("ix_shares_recipient", table_name="shares")
op.drop_table("shares")
# document_topics
op.drop_table("document_topics")
# documents
op.drop_index("ix_documents_user_created", table_name="documents")
op.drop_index("ix_documents_user_folder", table_name="documents")
op.drop_table("documents")
# topics
op.drop_table("topics")
# folders
op.drop_table("folders")
# refresh_tokens
op.drop_index("ix_refresh_tokens_user_revoked", table_name="refresh_tokens")
op.drop_table("refresh_tokens")
# quotas
op.drop_table("quotas")
# groups (D-02 stub)
op.drop_table("groups")
# users (last — all other tables depend on it)
op.drop_table("users")