diff --git a/backend/alembic.ini b/backend/alembic.ini new file mode 100644 index 0000000..fc9ea7b --- /dev/null +++ b/backend/alembic.ini @@ -0,0 +1,147 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts. +# this is typically a path given in POSIX (e.g. forward slashes) +# format, relative to the token %(here)s which refers to the location of this +# ini file +script_location = migrations + +# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s +# Uncomment the line below if you want the files to be prepended with date and time +# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file +# for all available tokens +# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s + +# sys.path path, will be prepended to sys.path if present. +# defaults to the current working directory. for multiple paths, the path separator +# is defined by "path_separator" below. +prepend_sys_path = . + +# timezone to use when rendering the date within the migration file +# as well as the filename. +# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library. +# Any required deps can installed by adding `alembic[tz]` to the pip requirements +# string value is passed to ZoneInfo() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the "slug" field +# truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; This defaults +# to /versions. When using multiple version +# directories, initial revisions must be specified with --version-path. +# The path separator used here should be the separator specified by "path_separator" +# below. +# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions + +# path_separator; This indicates what character is used to split lists of file +# paths, including version_locations and prepend_sys_path within configparser +# files such as alembic.ini. +# The default rendered in new alembic.ini files is "os", which uses os.pathsep +# to provide os-dependent path splitting. +# +# Note that in order to support legacy alembic.ini files, this default does NOT +# take place if path_separator is not present in alembic.ini. If this +# option is omitted entirely, fallback logic is as follows: +# +# 1. Parsing of the version_locations option falls back to using the legacy +# "version_path_separator" key, which if absent then falls back to the legacy +# behavior of splitting on spaces and/or commas. +# 2. Parsing of the prepend_sys_path option falls back to the legacy +# behavior of splitting on spaces, commas, or colons. +# +# Valid values for path_separator are: +# +# path_separator = : +# path_separator = ; +# path_separator = space +# path_separator = newline +# +# Use os.pathsep. Default configuration used for new projects. +path_separator = os + + +# set to 'true' to search source files recursively +# in each "version_locations" directory +# new in Alembic version 1.10 +# recursive_version_locations = false + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +# database URL. This is consumed by the user-maintained env.py script only. +# other means of configuring database URLs may be customized within the env.py +# file. +sqlalchemy.url = %(DATABASE_MIGRATE_URL)s + + +[post_write_hooks] +# post_write_hooks defines scripts or Python functions that are run +# on newly generated revision scripts. See the documentation for further +# detail and examples + +# format using "black" - use the console_scripts runner, against the "black" entrypoint +# hooks = black +# black.type = console_scripts +# black.entrypoint = black +# black.options = -l 79 REVISION_SCRIPT_FILENAME + +# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module +# hooks = ruff +# ruff.type = module +# ruff.module = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Alternatively, use the exec runner to execute a binary found on your PATH +# hooks = ruff +# ruff.type = exec +# ruff.executable = ruff +# ruff.options = check --fix REVISION_SCRIPT_FILENAME + +# Logging configuration. This is also consumed by the user-maintained +# env.py script only. +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARNING +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARNING +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/backend/migrations/README b/backend/migrations/README new file mode 100644 index 0000000..e0d0858 --- /dev/null +++ b/backend/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration with an async dbapi. \ No newline at end of file diff --git a/backend/migrations/env.py b/backend/migrations/env.py new file mode 100644 index 0000000..0036b4e --- /dev/null +++ b/backend/migrations/env.py @@ -0,0 +1,103 @@ +import asyncio +import os +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Inject the runtime DSN from the OS environment. +# %(DATABASE_MIGRATE_URL)s interpolation in alembic.ini only works when the variable +# is defined in the [alembic] section — it does NOT read from the OS environment. +# We override sqlalchemy.url here so that the env var is honoured at runtime. +config.set_main_option( + "sqlalchemy.url", + os.environ.get( + "DATABASE_MIGRATE_URL", + config.get_main_option("sqlalchemy.url") or "", + ), +) + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# Import all models so that Base.metadata is populated for autogenerate support. +# This MUST happen before target_metadata is assigned — Alembic won't see tables +# that haven't been imported (RESEARCH.md Pitfall 2). +from db.models import Base # noqa: F401 — must import to register all models (Pitfall 2) + +target_metadata = Base.metadata + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + connectable = async_engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/backend/migrations/script.py.mako b/backend/migrations/script.py.mako new file mode 100644 index 0000000..1101630 --- /dev/null +++ b/backend/migrations/script.py.mako @@ -0,0 +1,28 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision: str = ${repr(up_revision)} +down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + """Upgrade schema.""" + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + """Downgrade schema.""" + ${downgrades if downgrades else "pass"} diff --git a/backend/migrations/versions/0001_initial_schema.py b/backend/migrations/versions/0001_initial_schema.py new file mode 100644 index 0000000..b418fc0 --- /dev/null +++ b/backend/migrations/versions/0001_initial_schema.py @@ -0,0 +1,257 @@ +"""Initial v1 schema — all 11 DocuVault tables. + +Revision ID: 0001 +Revises: +Create Date: 2026-05-22 + +Creates tables in dependency order: + 1. users (no FK dependencies) + 2. groups (no FK dependencies — D-02 stub) + 3. quotas (FK -> users) + 4. refresh_tokens (FK -> users) + 5. folders (FK -> users, self-referential FK -> folders) + 6. topics (FK -> users) + 7. documents (FK -> users [nullable, D-03], FK -> folders) + 8. document_topics (FK -> documents, topics) + 9. shares (FK -> documents, users x2) + 10. audit_log (FK -> users x2) + 11. cloud_connections (FK -> users) + +Pitfall 4 note: ALTER DEFAULT PRIVILEGES is required so future migrations +(applied by docuvault_migrate) automatically grant access to docuvault_app. +The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql +with CONNECT but no table privileges — this migration grants them. +""" +from __future__ import annotations + +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from alembic import op + +# revision identifiers, used by Alembic. +revision = "0001" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ── 1. users ────────────────────────────────────────────────────────────── + op.create_table("users", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("handle", sa.String(), nullable=False), + sa.Column("email", sa.String(), nullable=False), + sa.Column("password_hash", sa.Text(), nullable=False), + sa.Column("totp_secret", sa.Text(), nullable=True), + sa.Column("totp_enabled", sa.Boolean(), nullable=False, server_default="false"), + sa.Column("role", sa.String(), nullable=False, server_default="user"), + sa.Column("is_active", sa.Boolean(), nullable=False, server_default="true"), + sa.Column("ai_provider", sa.Text(), nullable=True), + sa.Column("ai_model", sa.Text(), nullable=True), + sa.Column("default_storage_backend", sa.String(), nullable=False, server_default="minio"), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("handle"), + sa.UniqueConstraint("email"), + ) + + # ── 2. groups (D-02 stub — v2 feature) ─────────────────────────────────── + op.create_table("groups", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("name"), + ) + + # ── 3. quotas ───────────────────────────────────────────────────────────── + op.create_table("quotas", + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + # 100 MB default free-tier quota + sa.Column("limit_bytes", sa.BigInteger(), nullable=False, server_default="104857600"), + sa.Column("used_bytes", sa.BigInteger(), nullable=False, server_default="0"), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("user_id"), + ) + + # ── 4. refresh_tokens ───────────────────────────────────────────────────── + op.create_table("refresh_tokens", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("token_hash", sa.Text(), nullable=False), + sa.Column("expires_at", sa.TIMESTAMP(timezone=True), nullable=False), + sa.Column("revoked", sa.Boolean(), nullable=False, server_default="false"), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("token_hash"), + ) + op.create_index("ix_refresh_tokens_user_revoked", "refresh_tokens", ["user_id", "revoked"]) + + # ── 5. folders ──────────────────────────────────────────────────────────── + op.create_table("folders", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("parent_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["parent_id"], ["folders.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("user_id", "parent_id", "name", name="uq_folders_user_parent_name"), + ) + + # ── 6. topics ───────────────────────────────────────────────────────────── + op.create_table("topics", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("name", sa.Text(), nullable=False), + sa.Column("description", sa.Text(), nullable=False, server_default=""), + sa.Column("color", sa.String(7), nullable=False, server_default="#6366f1"), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("user_id", "name", name="uq_topics_user_name"), + ) + + # ── 7. documents ────────────────────────────────────────────────────────── + op.create_table("documents", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + # D-03: user_id is nullable in Phase 1 — no auth yet. + # Phase 2 migration adds NOT NULL constraint. + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("folder_id", postgresql.UUID(as_uuid=True), nullable=True), + # original human-readable filename — stored in DB only, never in the MinIO key + sa.Column("filename", sa.Text(), nullable=False), + # MinIO object key: {user_id}/{document_id}/{uuid4()}{ext} + sa.Column("object_key", sa.Text(), nullable=False), + sa.Column("content_type", sa.Text(), nullable=False), + sa.Column("size_bytes", sa.BigInteger(), nullable=False, server_default="0"), + sa.Column("storage_backend", sa.String(), nullable=False, server_default="minio"), + sa.Column("extracted_text", sa.Text(), nullable=True), + sa.Column("status", sa.String(), nullable=False, server_default="pending"), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.Column("updated_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["folder_id"], ["folders.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_documents_user_folder", "documents", ["user_id", "folder_id"]) + op.create_index("ix_documents_user_created", "documents", ["user_id", "created_at"]) + + # ── 8. document_topics ──────────────────────────────────────────────────── + op.create_table("document_topics", + sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("topic_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["topic_id"], ["topics.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("document_id", "topic_id"), + ) + + # ── 9. shares ───────────────────────────────────────────────────────────── + op.create_table("shares", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("document_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("owner_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("recipient_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("permission", sa.String(), nullable=False, server_default="view"), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["document_id"], ["documents.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["owner_id"], ["users.id"], ondelete="CASCADE"), + sa.ForeignKeyConstraint(["recipient_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + sa.UniqueConstraint("document_id", "recipient_id", name="uq_shares_document_recipient"), + ) + op.create_index("ix_shares_recipient", "shares", ["recipient_id"]) + + # ── 10. audit_log ───────────────────────────────────────────────────────── + op.create_table("audit_log", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("actor_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("event_type", sa.Text(), nullable=False), + sa.Column("resource_id", postgresql.UUID(as_uuid=True), nullable=True), + sa.Column("ip_address", sa.dialects.postgresql.INET(), nullable=True), + # DB column name is "metadata"; ORM uses "metadata_" to avoid reserved-attr conflict + sa.Column("metadata", sa.dialects.postgresql.JSONB(), nullable=True), + sa.Column("created_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="SET NULL"), + sa.ForeignKeyConstraint(["actor_id"], ["users.id"], ondelete="SET NULL"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_audit_user_created", "audit_log", ["user_id", "created_at"]) + op.create_index("ix_audit_event_created", "audit_log", ["event_type", "created_at"]) + + # ── 11. cloud_connections ───────────────────────────────────────────────── + op.create_table("cloud_connections", + sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("user_id", postgresql.UUID(as_uuid=True), nullable=False), + sa.Column("provider", sa.String(), nullable=False), + sa.Column("display_name", sa.Text(), nullable=False), + sa.Column("credentials_enc", sa.Text(), nullable=False), + sa.Column("status", sa.String(), nullable=False, server_default="ACTIVE"), + sa.Column("connected_at", sa.TIMESTAMP(timezone=True), nullable=False, server_default=sa.text("now()")), + sa.ForeignKeyConstraint(["user_id"], ["users.id"], ondelete="CASCADE"), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index("ix_cloud_connections_user", "cloud_connections", ["user_id"]) + + # ── Privilege grants ─────────────────────────────────────────────────────── + # Pitfall 4: ALTER DEFAULT PRIVILEGES is required so future migrations + # (applied by docuvault_migrate user) automatically grant DML access to + # docuvault_app on any tables created afterward. + # + # The docuvault_app user is created in docker/postgres/initdb.d/01-init-users.sql + # with CONNECT privilege only — no table-level access. These two grants establish: + # (a) immediate access to all tables/sequences created by THIS migration + # (b) automatic access to all tables/sequences created by FUTURE migrations + # run by the same docuvault_migrate user + op.execute("GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO docuvault_app;") + op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO docuvault_app;") + # Sequences grant is required because audit_log.id uses a sequence (autoincrement). + # docuvault_app must be able to call nextval() on any sequence. + op.execute("GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO docuvault_app;") + op.execute("ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT USAGE, SELECT ON SEQUENCES TO docuvault_app;") + + +def downgrade() -> None: + # Drop indexes first, then tables in reverse dependency order. + + # cloud_connections + op.drop_index("ix_cloud_connections_user", table_name="cloud_connections") + op.drop_table("cloud_connections") + + # audit_log + op.drop_index("ix_audit_event_created", table_name="audit_log") + op.drop_index("ix_audit_user_created", table_name="audit_log") + op.drop_table("audit_log") + + # shares + op.drop_index("ix_shares_recipient", table_name="shares") + op.drop_table("shares") + + # document_topics + op.drop_table("document_topics") + + # documents + op.drop_index("ix_documents_user_created", table_name="documents") + op.drop_index("ix_documents_user_folder", table_name="documents") + op.drop_table("documents") + + # topics + op.drop_table("topics") + + # folders + op.drop_table("folders") + + # refresh_tokens + op.drop_index("ix_refresh_tokens_user_revoked", table_name="refresh_tokens") + op.drop_table("refresh_tokens") + + # quotas + op.drop_table("quotas") + + # groups (D-02 stub) + op.drop_table("groups") + + # users (last — all other tables depend on it) + op.drop_table("users")