3ed6dd494f
- Add generate_presigned_put_url and stat_object abstract methods to StorageBackend ABC - Extend MinIOBackend with dual client (self._client internal + self._public_client public) - MinIOBackend.__init__ accepts optional public_endpoint param (RESEARCH.md Finding 3) - generate_presigned_put_url uses self._public_client for browser-resolvable URLs - stat_object uses self._client.stat_object and returns .size (authoritative, T-03-05) - get_storage_backend() passes public_endpoint=settings.minio_public_endpoint - config.py adds minio_public_endpoint field (RESEARCH.md Finding 3) - docker-compose.yml: MINIO_API_CORS_ALLOW_ORIGIN on minio service (T-03-09) - docker-compose.yml: MINIO_PUBLIC_ENDPOINT on backend service - docker-compose.yml: new celery-beat service (RESEARCH.md Finding 10)
149 lines
5.1 KiB
Python
149 lines
5.1 KiB
Python
"""
|
|
MinIOBackend — synchronous Minio SDK wrapped in asyncio.to_thread().
|
|
|
|
Every call to the synchronous Minio SDK is offloaded to a thread pool via
|
|
asyncio.to_thread() so the FastAPI event loop is never blocked.
|
|
|
|
Object key schema (STORE-02 / D-06):
|
|
{user_id}/{document_id}/{uuid4()}{ext}
|
|
|
|
The human-readable filename is NEVER passed into this module — only the
|
|
file extension (derived by the caller from Path(original_name).suffix.lower())
|
|
reaches here.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import io
|
|
import uuid
|
|
from datetime import timedelta
|
|
from typing import Optional
|
|
|
|
from minio import Minio
|
|
|
|
from storage.base import StorageBackend
|
|
|
|
|
|
class MinIOBackend(StorageBackend):
|
|
"""MinIO implementation of StorageBackend.
|
|
|
|
All synchronous Minio SDK calls are wrapped in asyncio.to_thread() to
|
|
avoid blocking the FastAPI event loop (RESEARCH.md Pattern 3).
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
endpoint: str,
|
|
access_key: str,
|
|
secret_key: str,
|
|
bucket: str,
|
|
secure: bool = False,
|
|
public_endpoint: Optional[str] = None,
|
|
) -> None:
|
|
self._bucket = bucket
|
|
self._client = Minio(
|
|
endpoint=endpoint,
|
|
access_key=access_key,
|
|
secret_key=secret_key,
|
|
secure=secure, # False for Docker internal HTTP traffic between containers
|
|
)
|
|
# Second client for presigned URL generation — uses browser-accessible hostname.
|
|
# Falls back to internal client endpoint if not configured.
|
|
# RESEARCH.md Finding 3 — dual-client pattern to avoid Docker hostname pitfall (T-03-10).
|
|
self._public_client = Minio(
|
|
endpoint=(public_endpoint or endpoint),
|
|
access_key=access_key,
|
|
secret_key=secret_key,
|
|
secure=secure,
|
|
)
|
|
|
|
async def put_object(
|
|
self,
|
|
user_id: str,
|
|
document_id: str,
|
|
file_bytes: bytes,
|
|
extension: str,
|
|
content_type: str,
|
|
) -> str:
|
|
"""Store bytes in MinIO and return the generated object key.
|
|
|
|
Key schema: {user_id}/{document_id}/{uuid4()}{extension}
|
|
The filename is NOT a parameter — STORE-02 compliance.
|
|
"""
|
|
object_key = f"{user_id}/{document_id}/{uuid.uuid4()}{extension}"
|
|
data = io.BytesIO(file_bytes)
|
|
data.seek(0) # belt-and-braces: BytesIO constructor leaves pointer at 0 already
|
|
await asyncio.to_thread(
|
|
self._client.put_object,
|
|
self._bucket,
|
|
object_key,
|
|
data,
|
|
length=len(file_bytes),
|
|
content_type=content_type,
|
|
)
|
|
return object_key
|
|
|
|
async def get_object(self, object_key: str) -> bytes:
|
|
"""Fetch object bytes from MinIO by key."""
|
|
|
|
def _fetch() -> bytes:
|
|
response = self._client.get_object(self._bucket, object_key)
|
|
try:
|
|
return response.read()
|
|
finally:
|
|
response.close()
|
|
response.release_conn()
|
|
|
|
return await asyncio.to_thread(_fetch)
|
|
|
|
async def delete_object(self, object_key: str) -> None:
|
|
"""Delete an object from MinIO by key."""
|
|
await asyncio.to_thread(self._client.remove_object, self._bucket, object_key)
|
|
|
|
async def presigned_get_url(
|
|
self, object_key: str, expires_minutes: int = 60
|
|
) -> str:
|
|
"""Return a time-limited pre-signed download URL."""
|
|
return await asyncio.to_thread(
|
|
self._client.presigned_get_object,
|
|
self._bucket,
|
|
object_key,
|
|
timedelta(minutes=expires_minutes),
|
|
)
|
|
|
|
async def health_check(self) -> bool:
|
|
"""Return True when the configured bucket is reachable; False on any exception."""
|
|
try:
|
|
return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
|
|
except Exception:
|
|
return False
|
|
|
|
async def generate_presigned_put_url(
|
|
self, object_key: str, expires_minutes: int = 15
|
|
) -> str:
|
|
"""Return a presigned PUT URL using the public-endpoint client.
|
|
|
|
Uses self._public_client so the returned URL contains a browser-resolvable
|
|
hostname (not the Docker-internal 'minio:9000' address).
|
|
RESEARCH.md Finding 2: presigned_put_object(bucket, key, expires=timedelta).
|
|
RESEARCH.md Finding 3: dual-client pattern for Docker hostname pitfall (T-03-10).
|
|
"""
|
|
return await asyncio.to_thread(
|
|
self._public_client.presigned_put_object,
|
|
self._bucket,
|
|
object_key,
|
|
timedelta(minutes=expires_minutes),
|
|
)
|
|
|
|
async def stat_object(self, object_key: str) -> int:
|
|
"""Return the authoritative file size in bytes from MinIO stat.
|
|
|
|
Calls self._client.stat_object (internal endpoint) and returns .size.
|
|
RESEARCH.md Finding 5: stat_object returns .size as int (authoritative).
|
|
Raises minio.error.S3Error(code='NoSuchKey') if the object does not exist.
|
|
"""
|
|
result = await asyncio.to_thread(
|
|
self._client.stat_object, self._bucket, object_key
|
|
)
|
|
return result.size
|