diff --git a/backend/config.py b/backend/config.py index 3dca512..0b7a54b 100644 --- a/backend/config.py +++ b/backend/config.py @@ -25,6 +25,9 @@ class Settings(BaseSettings): minio_access_key: str = "docuvault_app" minio_secret_key: str = "changeme_minio_app" minio_bucket: str = "docuvault" + # RESEARCH.md Finding 3 — browser-resolvable hostname for presigned URLs. + # Empty string means fall back to minio_endpoint inside MinIOBackend. + minio_public_endpoint: str = "" # Redis / Celery redis_url: str = "redis://:changeme_redis@redis:6379/0" diff --git a/backend/storage/__init__.py b/backend/storage/__init__.py index 7ef87b2..a724cb4 100644 --- a/backend/storage/__init__.py +++ b/backend/storage/__init__.py @@ -17,11 +17,17 @@ def get_storage_backend() -> StorageBackend: secure=False is correct for Docker internal HTTP traffic between containers (RESEARCH.md Pattern 3). + + public_endpoint is the browser-resolvable hostname for presigned PUT URLs. + RESEARCH.md Finding 3 — dual-client pattern: internal endpoint for all + server-side operations; public endpoint for generate_presigned_put_url only. """ + public_ep = settings.minio_public_endpoint or None return MinIOBackend( endpoint=settings.minio_endpoint, access_key=settings.minio_access_key, secret_key=settings.minio_secret_key, bucket=settings.minio_bucket, secure=False, + public_endpoint=public_ep, ) diff --git a/backend/storage/base.py b/backend/storage/base.py index 91b4eba..e4f2389 100644 --- a/backend/storage/base.py +++ b/backend/storage/base.py @@ -4,12 +4,14 @@ StorageBackend ABC for DocuVault. Mirrors backend/ai/base.py — declares the abstract interface that all storage backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement. -Five abstract methods define the contract: - put_object — store bytes, return object key - get_object — fetch bytes by key - delete_object — remove object by key - presigned_get_url — generate a time-limited download URL - health_check — verify backend connectivity +Seven abstract methods define the contract: + put_object — store bytes, return object key + get_object — fetch bytes by key + delete_object — remove object by key + presigned_get_url — generate a time-limited download URL + health_check — verify backend connectivity + generate_presigned_put_url — generate a presigned PUT URL for direct browser upload + stat_object — return authoritative file size from object storage """ from abc import ABC, abstractmethod @@ -52,3 +54,28 @@ class StorageBackend(ABC): async def health_check(self) -> bool: """Return True if the backend is reachable and operational.""" ... + + @abstractmethod + async def generate_presigned_put_url( + self, object_key: str, expires_minutes: int = 15 + ) -> str: + """Return a presigned PUT URL for direct browser-to-storage upload. + + RESEARCH.md Finding 3 — public client requirement: the returned URL must + use a browser-resolvable hostname (not the internal Docker hostname). + The presigned URL is tied to the exact object_key and expires after + expires_minutes (default 15 — D-05). + """ + ... + + @abstractmethod + async def stat_object(self, object_key: str) -> int: + """Return the authoritative file size in bytes for the given object. + + RESEARCH.md Finding 5 — returns the size (.size attribute) from the + object storage stat call. This is the only trusted source of file size; + never use client-supplied size values (D-07, T-03-05). + + Raises S3Error(code='NoSuchKey') if the object does not exist. + """ + ... diff --git a/backend/storage/minio_backend.py b/backend/storage/minio_backend.py index d6c45cf..f57e032 100644 --- a/backend/storage/minio_backend.py +++ b/backend/storage/minio_backend.py @@ -11,10 +11,13 @@ The human-readable filename is NEVER passed into this module — only the file extension (derived by the caller from Path(original_name).suffix.lower()) reaches here. """ +from __future__ import annotations + import asyncio import io import uuid from datetime import timedelta +from typing import Optional from minio import Minio @@ -35,6 +38,7 @@ class MinIOBackend(StorageBackend): secret_key: str, bucket: str, secure: bool = False, + public_endpoint: Optional[str] = None, ) -> None: self._bucket = bucket self._client = Minio( @@ -43,6 +47,15 @@ class MinIOBackend(StorageBackend): secret_key=secret_key, secure=secure, # False for Docker internal HTTP traffic between containers ) + # Second client for presigned URL generation — uses browser-accessible hostname. + # Falls back to internal client endpoint if not configured. + # RESEARCH.md Finding 3 — dual-client pattern to avoid Docker hostname pitfall (T-03-10). + self._public_client = Minio( + endpoint=(public_endpoint or endpoint), + access_key=access_key, + secret_key=secret_key, + secure=secure, + ) async def put_object( self, @@ -104,3 +117,32 @@ class MinIOBackend(StorageBackend): return await asyncio.to_thread(self._client.bucket_exists, self._bucket) except Exception: return False + + async def generate_presigned_put_url( + self, object_key: str, expires_minutes: int = 15 + ) -> str: + """Return a presigned PUT URL using the public-endpoint client. + + Uses self._public_client so the returned URL contains a browser-resolvable + hostname (not the Docker-internal 'minio:9000' address). + RESEARCH.md Finding 2: presigned_put_object(bucket, key, expires=timedelta). + RESEARCH.md Finding 3: dual-client pattern for Docker hostname pitfall (T-03-10). + """ + return await asyncio.to_thread( + self._public_client.presigned_put_object, + self._bucket, + object_key, + timedelta(minutes=expires_minutes), + ) + + async def stat_object(self, object_key: str) -> int: + """Return the authoritative file size in bytes from MinIO stat. + + Calls self._client.stat_object (internal endpoint) and returns .size. + RESEARCH.md Finding 5: stat_object returns .size as int (authoritative). + Raises minio.error.S3Error(code='NoSuchKey') if the object does not exist. + """ + result = await asyncio.to_thread( + self._client.stat_object, self._bucket, object_key + ) + return result.size diff --git a/docker-compose.yml b/docker-compose.yml index 8b30631..301b0ef 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -21,6 +21,8 @@ services: environment: MINIO_ROOT_USER: ${MINIO_ROOT_USER} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} + # RESEARCH.md Finding 3, T-03-09: allow browser CORS preflight for direct PUT uploads. + MINIO_API_CORS_ALLOW_ORIGIN: ${CORS_ORIGINS:-http://localhost:5173} ports: - "9000:9000" - "9001:9001" @@ -55,6 +57,7 @@ services: - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} - MINIO_BUCKET=${MINIO_BUCKET} + - MINIO_PUBLIC_ENDPOINT=${MINIO_PUBLIC_ENDPOINT:-localhost:9000} - REDIS_URL=${REDIS_URL} - PYTHONDONTWRITEBYTECODE=1 extra_hosts: @@ -89,6 +92,27 @@ services: redis: condition: service_healthy + celery-beat: + build: ./backend + environment: + - DATABASE_URL=${DATABASE_URL} + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET=${MINIO_BUCKET} + - REDIS_URL=${REDIS_URL} + - PYTHONDONTWRITEBYTECODE=1 + extra_hosts: + - "host.docker.internal:host-gateway" + command: celery -A celery_app beat --loglevel=info + depends_on: + postgres: + condition: service_healthy + minio: + condition: service_healthy + redis: + condition: service_healthy + frontend: build: ./frontend ports: