feat(03-02): extend StorageBackend ABC and MinIOBackend with presigned PUT and stat_object
- Add generate_presigned_put_url and stat_object abstract methods to StorageBackend ABC - Extend MinIOBackend with dual client (self._client internal + self._public_client public) - MinIOBackend.__init__ accepts optional public_endpoint param (RESEARCH.md Finding 3) - generate_presigned_put_url uses self._public_client for browser-resolvable URLs - stat_object uses self._client.stat_object and returns .size (authoritative, T-03-05) - get_storage_backend() passes public_endpoint=settings.minio_public_endpoint - config.py adds minio_public_endpoint field (RESEARCH.md Finding 3) - docker-compose.yml: MINIO_API_CORS_ALLOW_ORIGIN on minio service (T-03-09) - docker-compose.yml: MINIO_PUBLIC_ENDPOINT on backend service - docker-compose.yml: new celery-beat service (RESEARCH.md Finding 10)
This commit is contained in:
@@ -25,6 +25,9 @@ class Settings(BaseSettings):
|
|||||||
minio_access_key: str = "docuvault_app"
|
minio_access_key: str = "docuvault_app"
|
||||||
minio_secret_key: str = "changeme_minio_app"
|
minio_secret_key: str = "changeme_minio_app"
|
||||||
minio_bucket: str = "docuvault"
|
minio_bucket: str = "docuvault"
|
||||||
|
# RESEARCH.md Finding 3 — browser-resolvable hostname for presigned URLs.
|
||||||
|
# Empty string means fall back to minio_endpoint inside MinIOBackend.
|
||||||
|
minio_public_endpoint: str = ""
|
||||||
|
|
||||||
# Redis / Celery
|
# Redis / Celery
|
||||||
redis_url: str = "redis://:changeme_redis@redis:6379/0"
|
redis_url: str = "redis://:changeme_redis@redis:6379/0"
|
||||||
|
|||||||
@@ -17,11 +17,17 @@ def get_storage_backend() -> StorageBackend:
|
|||||||
|
|
||||||
secure=False is correct for Docker internal HTTP traffic between containers
|
secure=False is correct for Docker internal HTTP traffic between containers
|
||||||
(RESEARCH.md Pattern 3).
|
(RESEARCH.md Pattern 3).
|
||||||
|
|
||||||
|
public_endpoint is the browser-resolvable hostname for presigned PUT URLs.
|
||||||
|
RESEARCH.md Finding 3 — dual-client pattern: internal endpoint for all
|
||||||
|
server-side operations; public endpoint for generate_presigned_put_url only.
|
||||||
"""
|
"""
|
||||||
|
public_ep = settings.minio_public_endpoint or None
|
||||||
return MinIOBackend(
|
return MinIOBackend(
|
||||||
endpoint=settings.minio_endpoint,
|
endpoint=settings.minio_endpoint,
|
||||||
access_key=settings.minio_access_key,
|
access_key=settings.minio_access_key,
|
||||||
secret_key=settings.minio_secret_key,
|
secret_key=settings.minio_secret_key,
|
||||||
bucket=settings.minio_bucket,
|
bucket=settings.minio_bucket,
|
||||||
secure=False,
|
secure=False,
|
||||||
|
public_endpoint=public_ep,
|
||||||
)
|
)
|
||||||
|
|||||||
+28
-1
@@ -4,12 +4,14 @@ StorageBackend ABC for DocuVault.
|
|||||||
Mirrors backend/ai/base.py — declares the abstract interface that all storage
|
Mirrors backend/ai/base.py — declares the abstract interface that all storage
|
||||||
backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement.
|
backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement.
|
||||||
|
|
||||||
Five abstract methods define the contract:
|
Seven abstract methods define the contract:
|
||||||
put_object — store bytes, return object key
|
put_object — store bytes, return object key
|
||||||
get_object — fetch bytes by key
|
get_object — fetch bytes by key
|
||||||
delete_object — remove object by key
|
delete_object — remove object by key
|
||||||
presigned_get_url — generate a time-limited download URL
|
presigned_get_url — generate a time-limited download URL
|
||||||
health_check — verify backend connectivity
|
health_check — verify backend connectivity
|
||||||
|
generate_presigned_put_url — generate a presigned PUT URL for direct browser upload
|
||||||
|
stat_object — return authoritative file size from object storage
|
||||||
"""
|
"""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
@@ -52,3 +54,28 @@ class StorageBackend(ABC):
|
|||||||
async def health_check(self) -> bool:
|
async def health_check(self) -> bool:
|
||||||
"""Return True if the backend is reachable and operational."""
|
"""Return True if the backend is reachable and operational."""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def generate_presigned_put_url(
|
||||||
|
self, object_key: str, expires_minutes: int = 15
|
||||||
|
) -> str:
|
||||||
|
"""Return a presigned PUT URL for direct browser-to-storage upload.
|
||||||
|
|
||||||
|
RESEARCH.md Finding 3 — public client requirement: the returned URL must
|
||||||
|
use a browser-resolvable hostname (not the internal Docker hostname).
|
||||||
|
The presigned URL is tied to the exact object_key and expires after
|
||||||
|
expires_minutes (default 15 — D-05).
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def stat_object(self, object_key: str) -> int:
|
||||||
|
"""Return the authoritative file size in bytes for the given object.
|
||||||
|
|
||||||
|
RESEARCH.md Finding 5 — returns the size (.size attribute) from the
|
||||||
|
object storage stat call. This is the only trusted source of file size;
|
||||||
|
never use client-supplied size values (D-07, T-03-05).
|
||||||
|
|
||||||
|
Raises S3Error(code='NoSuchKey') if the object does not exist.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|||||||
@@ -11,10 +11,13 @@ The human-readable filename is NEVER passed into this module — only the
|
|||||||
file extension (derived by the caller from Path(original_name).suffix.lower())
|
file extension (derived by the caller from Path(original_name).suffix.lower())
|
||||||
reaches here.
|
reaches here.
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import io
|
import io
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from minio import Minio
|
from minio import Minio
|
||||||
|
|
||||||
@@ -35,6 +38,7 @@ class MinIOBackend(StorageBackend):
|
|||||||
secret_key: str,
|
secret_key: str,
|
||||||
bucket: str,
|
bucket: str,
|
||||||
secure: bool = False,
|
secure: bool = False,
|
||||||
|
public_endpoint: Optional[str] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._bucket = bucket
|
self._bucket = bucket
|
||||||
self._client = Minio(
|
self._client = Minio(
|
||||||
@@ -43,6 +47,15 @@ class MinIOBackend(StorageBackend):
|
|||||||
secret_key=secret_key,
|
secret_key=secret_key,
|
||||||
secure=secure, # False for Docker internal HTTP traffic between containers
|
secure=secure, # False for Docker internal HTTP traffic between containers
|
||||||
)
|
)
|
||||||
|
# Second client for presigned URL generation — uses browser-accessible hostname.
|
||||||
|
# Falls back to internal client endpoint if not configured.
|
||||||
|
# RESEARCH.md Finding 3 — dual-client pattern to avoid Docker hostname pitfall (T-03-10).
|
||||||
|
self._public_client = Minio(
|
||||||
|
endpoint=(public_endpoint or endpoint),
|
||||||
|
access_key=access_key,
|
||||||
|
secret_key=secret_key,
|
||||||
|
secure=secure,
|
||||||
|
)
|
||||||
|
|
||||||
async def put_object(
|
async def put_object(
|
||||||
self,
|
self,
|
||||||
@@ -104,3 +117,32 @@ class MinIOBackend(StorageBackend):
|
|||||||
return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
|
return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
async def generate_presigned_put_url(
|
||||||
|
self, object_key: str, expires_minutes: int = 15
|
||||||
|
) -> str:
|
||||||
|
"""Return a presigned PUT URL using the public-endpoint client.
|
||||||
|
|
||||||
|
Uses self._public_client so the returned URL contains a browser-resolvable
|
||||||
|
hostname (not the Docker-internal 'minio:9000' address).
|
||||||
|
RESEARCH.md Finding 2: presigned_put_object(bucket, key, expires=timedelta).
|
||||||
|
RESEARCH.md Finding 3: dual-client pattern for Docker hostname pitfall (T-03-10).
|
||||||
|
"""
|
||||||
|
return await asyncio.to_thread(
|
||||||
|
self._public_client.presigned_put_object,
|
||||||
|
self._bucket,
|
||||||
|
object_key,
|
||||||
|
timedelta(minutes=expires_minutes),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def stat_object(self, object_key: str) -> int:
|
||||||
|
"""Return the authoritative file size in bytes from MinIO stat.
|
||||||
|
|
||||||
|
Calls self._client.stat_object (internal endpoint) and returns .size.
|
||||||
|
RESEARCH.md Finding 5: stat_object returns .size as int (authoritative).
|
||||||
|
Raises minio.error.S3Error(code='NoSuchKey') if the object does not exist.
|
||||||
|
"""
|
||||||
|
result = await asyncio.to_thread(
|
||||||
|
self._client.stat_object, self._bucket, object_key
|
||||||
|
)
|
||||||
|
return result.size
|
||||||
|
|||||||
@@ -21,6 +21,8 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
MINIO_ROOT_USER: ${MINIO_ROOT_USER}
|
MINIO_ROOT_USER: ${MINIO_ROOT_USER}
|
||||||
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
|
MINIO_ROOT_PASSWORD: ${MINIO_ROOT_PASSWORD}
|
||||||
|
# RESEARCH.md Finding 3, T-03-09: allow browser CORS preflight for direct PUT uploads.
|
||||||
|
MINIO_API_CORS_ALLOW_ORIGIN: ${CORS_ORIGINS:-http://localhost:5173}
|
||||||
ports:
|
ports:
|
||||||
- "9000:9000"
|
- "9000:9000"
|
||||||
- "9001:9001"
|
- "9001:9001"
|
||||||
@@ -55,6 +57,7 @@ services:
|
|||||||
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||||
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||||
- MINIO_BUCKET=${MINIO_BUCKET}
|
- MINIO_BUCKET=${MINIO_BUCKET}
|
||||||
|
- MINIO_PUBLIC_ENDPOINT=${MINIO_PUBLIC_ENDPOINT:-localhost:9000}
|
||||||
- REDIS_URL=${REDIS_URL}
|
- REDIS_URL=${REDIS_URL}
|
||||||
- PYTHONDONTWRITEBYTECODE=1
|
- PYTHONDONTWRITEBYTECODE=1
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
@@ -89,6 +92,27 @@ services:
|
|||||||
redis:
|
redis:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
|
celery-beat:
|
||||||
|
build: ./backend
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=${DATABASE_URL}
|
||||||
|
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
|
||||||
|
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
|
||||||
|
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
|
||||||
|
- MINIO_BUCKET=${MINIO_BUCKET}
|
||||||
|
- REDIS_URL=${REDIS_URL}
|
||||||
|
- PYTHONDONTWRITEBYTECODE=1
|
||||||
|
extra_hosts:
|
||||||
|
- "host.docker.internal:host-gateway"
|
||||||
|
command: celery -A celery_app beat --loglevel=info
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
minio:
|
||||||
|
condition: service_healthy
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
frontend:
|
frontend:
|
||||||
build: ./frontend
|
build: ./frontend
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
Reference in New Issue
Block a user