feat(03-02): extend StorageBackend ABC and MinIOBackend with presigned PUT and stat_object

- Add generate_presigned_put_url and stat_object abstract methods to StorageBackend ABC
- Extend MinIOBackend with dual client (self._client internal + self._public_client public)
- MinIOBackend.__init__ accepts optional public_endpoint param (RESEARCH.md Finding 3)
- generate_presigned_put_url uses self._public_client for browser-resolvable URLs
- stat_object uses self._client.stat_object and returns .size (authoritative, T-03-05)
- get_storage_backend() passes public_endpoint=settings.minio_public_endpoint
- config.py adds minio_public_endpoint field (RESEARCH.md Finding 3)
- docker-compose.yml: MINIO_API_CORS_ALLOW_ORIGIN on minio service (T-03-09)
- docker-compose.yml: MINIO_PUBLIC_ENDPOINT on backend service
- docker-compose.yml: new celery-beat service (RESEARCH.md Finding 10)
This commit is contained in:
curo1305
2026-05-23 13:52:16 +02:00
parent 4e9b586ec4
commit 3ed6dd494f
5 changed files with 108 additions and 6 deletions
+3
View File
@@ -25,6 +25,9 @@ class Settings(BaseSettings):
minio_access_key: str = "docuvault_app" minio_access_key: str = "docuvault_app"
minio_secret_key: str = "changeme_minio_app" minio_secret_key: str = "changeme_minio_app"
minio_bucket: str = "docuvault" minio_bucket: str = "docuvault"
# RESEARCH.md Finding 3 — browser-resolvable hostname for presigned URLs.
# Empty string means fall back to minio_endpoint inside MinIOBackend.
minio_public_endpoint: str = ""
# Redis / Celery # Redis / Celery
redis_url: str = "redis://:changeme_redis@redis:6379/0" redis_url: str = "redis://:changeme_redis@redis:6379/0"
+6
View File
@@ -17,11 +17,17 @@ def get_storage_backend() -> StorageBackend:
secure=False is correct for Docker internal HTTP traffic between containers secure=False is correct for Docker internal HTTP traffic between containers
(RESEARCH.md Pattern 3). (RESEARCH.md Pattern 3).
public_endpoint is the browser-resolvable hostname for presigned PUT URLs.
RESEARCH.md Finding 3 — dual-client pattern: internal endpoint for all
server-side operations; public endpoint for generate_presigned_put_url only.
""" """
public_ep = settings.minio_public_endpoint or None
return MinIOBackend( return MinIOBackend(
endpoint=settings.minio_endpoint, endpoint=settings.minio_endpoint,
access_key=settings.minio_access_key, access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key, secret_key=settings.minio_secret_key,
bucket=settings.minio_bucket, bucket=settings.minio_bucket,
secure=False, secure=False,
public_endpoint=public_ep,
) )
+33 -6
View File
@@ -4,12 +4,14 @@ StorageBackend ABC for DocuVault.
Mirrors backend/ai/base.py — declares the abstract interface that all storage Mirrors backend/ai/base.py — declares the abstract interface that all storage
backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement. backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement.
Five abstract methods define the contract: Seven abstract methods define the contract:
put_object — store bytes, return object key put_object — store bytes, return object key
get_object — fetch bytes by key get_object — fetch bytes by key
delete_object — remove object by key delete_object — remove object by key
presigned_get_url — generate a time-limited download URL presigned_get_url — generate a time-limited download URL
health_check — verify backend connectivity health_check — verify backend connectivity
generate_presigned_put_url — generate a presigned PUT URL for direct browser upload
stat_object — return authoritative file size from object storage
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
@@ -52,3 +54,28 @@ class StorageBackend(ABC):
async def health_check(self) -> bool: async def health_check(self) -> bool:
"""Return True if the backend is reachable and operational.""" """Return True if the backend is reachable and operational."""
... ...
@abstractmethod
async def generate_presigned_put_url(
self, object_key: str, expires_minutes: int = 15
) -> str:
"""Return a presigned PUT URL for direct browser-to-storage upload.
RESEARCH.md Finding 3 — public client requirement: the returned URL must
use a browser-resolvable hostname (not the internal Docker hostname).
The presigned URL is tied to the exact object_key and expires after
expires_minutes (default 15 — D-05).
"""
...
@abstractmethod
async def stat_object(self, object_key: str) -> int:
"""Return the authoritative file size in bytes for the given object.
RESEARCH.md Finding 5 — returns the size (.size attribute) from the
object storage stat call. This is the only trusted source of file size;
never use client-supplied size values (D-07, T-03-05).
Raises S3Error(code='NoSuchKey') if the object does not exist.
"""
...
+42
View File
@@ -11,10 +11,13 @@ The human-readable filename is NEVER passed into this module — only the
file extension (derived by the caller from Path(original_name).suffix.lower()) file extension (derived by the caller from Path(original_name).suffix.lower())
reaches here. reaches here.
""" """
from __future__ import annotations
import asyncio import asyncio
import io import io
import uuid import uuid
from datetime import timedelta from datetime import timedelta
from typing import Optional
from minio import Minio from minio import Minio
@@ -35,6 +38,7 @@ class MinIOBackend(StorageBackend):
secret_key: str, secret_key: str,
bucket: str, bucket: str,
secure: bool = False, secure: bool = False,
public_endpoint: Optional[str] = None,
) -> None: ) -> None:
self._bucket = bucket self._bucket = bucket
self._client = Minio( self._client = Minio(
@@ -43,6 +47,15 @@ class MinIOBackend(StorageBackend):
secret_key=secret_key, secret_key=secret_key,
secure=secure, # False for Docker internal HTTP traffic between containers secure=secure, # False for Docker internal HTTP traffic between containers
) )
# Second client for presigned URL generation — uses browser-accessible hostname.
# Falls back to internal client endpoint if not configured.
# RESEARCH.md Finding 3 — dual-client pattern to avoid Docker hostname pitfall (T-03-10).
self._public_client = Minio(
endpoint=(public_endpoint or endpoint),
access_key=access_key,
secret_key=secret_key,
secure=secure,
)
async def put_object( async def put_object(
self, self,
@@ -104,3 +117,32 @@ class MinIOBackend(StorageBackend):
return await asyncio.to_thread(self._client.bucket_exists, self._bucket) return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
except Exception: except Exception:
return False return False
async def generate_presigned_put_url(
self, object_key: str, expires_minutes: int = 15
) -> str:
"""Return a presigned PUT URL using the public-endpoint client.
Uses self._public_client so the returned URL contains a browser-resolvable
hostname (not the Docker-internal 'minio:9000' address).
RESEARCH.md Finding 2: presigned_put_object(bucket, key, expires=timedelta).
RESEARCH.md Finding 3: dual-client pattern for Docker hostname pitfall (T-03-10).
"""
return await asyncio.to_thread(
self._public_client.presigned_put_object,
self._bucket,
object_key,
timedelta(minutes=expires_minutes),
)
async def stat_object(self, object_key: str) -> int:
"""Return the authoritative file size in bytes from MinIO stat.
Calls self._client.stat_object (internal endpoint) and returns .size.
RESEARCH.md Finding 5: stat_object returns .size as int (authoritative).
Raises minio.error.S3Error(code='NoSuchKey') if the object does not exist.
"""
result = await asyncio.to_thread(
self._client.stat_object, self._bucket, object_key
)
return result.size
+24
View File
@@ -21,6 +21,8 @@ services:
environment: environment:
MINIO_ROOT_USER: ${MINIO_ROOT_USER} MINIO_ROOT_USER: ${MINIO_ROOT_USER}
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD} MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
# RESEARCH.md Finding 3, T-03-09: allow browser CORS preflight for direct PUT uploads.
MINIO_API_CORS_ALLOW_ORIGIN: ${CORS_ORIGINS:-http://localhost:5173}
ports: ports:
- "9000:9000" - "9000:9000"
- "9001:9001" - "9001:9001"
@@ -55,6 +57,7 @@ services:
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY} - MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET=${MINIO_BUCKET} - MINIO_BUCKET=${MINIO_BUCKET}
- MINIO_PUBLIC_ENDPOINT=${MINIO_PUBLIC_ENDPOINT:-localhost:9000}
- REDIS_URL=${REDIS_URL} - REDIS_URL=${REDIS_URL}
- PYTHONDONTWRITEBYTECODE=1 - PYTHONDONTWRITEBYTECODE=1
extra_hosts: extra_hosts:
@@ -89,6 +92,27 @@ services:
redis: redis:
condition: service_healthy condition: service_healthy
celery-beat:
build: ./backend
environment:
- DATABASE_URL=${DATABASE_URL}
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET=${MINIO_BUCKET}
- REDIS_URL=${REDIS_URL}
- PYTHONDONTWRITEBYTECODE=1
extra_hosts:
- "host.docker.internal:host-gateway"
command: celery -A celery_app beat --loglevel=info
depends_on:
postgres:
condition: service_healthy
minio:
condition: service_healthy
redis:
condition: service_healthy
frontend: frontend:
build: ./frontend build: ./frontend
ports: ports: