feat(01-04): add StorageBackend ABC + MinIOBackend + factory

- backend/storage/base.py: StorageBackend ABC with 5 abstract methods mirroring ai/base.py
- backend/storage/minio_backend.py: MinIOBackend wrapping all sync Minio SDK calls in asyncio.to_thread(); STORE-02 key schema: {user_id}/{document_id}/{uuid4()}{ext}
- backend/storage/__init__.py: get_storage_backend() factory mirroring ai/__init__.py
- backend/tests/test_storage.py: remove xfail markers (plan 04 implements the module)
This commit is contained in:
curo1305
2026-05-22 09:36:24 +02:00
parent e822a8f4b1
commit eaf86a832a
4 changed files with 187 additions and 6 deletions
+27
View File
@@ -0,0 +1,27 @@
"""
Storage backend factory for DocuVault.
Mirrors backend/ai/__init__.py — exposes a get_storage_backend() factory
that returns the configured StorageBackend implementation.
Phase 1 always returns MinIOBackend. Phase 5 will extend this factory to
support OneDrive, Google Drive, Nextcloud, and WebDAV backends.
"""
from storage.base import StorageBackend
from storage.minio_backend import MinIOBackend
from config import settings
def get_storage_backend() -> StorageBackend:
"""Return a MinIOBackend instance configured from config.settings.
secure=False is correct for Docker internal HTTP traffic between containers
(RESEARCH.md Pattern 3).
"""
return MinIOBackend(
endpoint=settings.minio_endpoint,
access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key,
bucket=settings.minio_bucket,
secure=False,
)
+54
View File
@@ -0,0 +1,54 @@
"""
StorageBackend ABC for DocuVault.
Mirrors backend/ai/base.py — declares the abstract interface that all storage
backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement.
Five abstract methods define the contract:
put_object — store bytes, return object key
get_object — fetch bytes by key
delete_object — remove object by key
presigned_get_url — generate a time-limited download URL
health_check — verify backend connectivity
"""
from abc import ABC, abstractmethod
class StorageBackend(ABC):
"""Abstract base class for DocuVault object storage backends."""
@abstractmethod
async def put_object(
self,
user_id: str,
document_id: str,
file_bytes: bytes,
extension: str,
content_type: str,
) -> str:
"""Store bytes and return the generated object key.
The key MUST follow the STORE-02 schema: {user_id}/{document_id}/{uuid4()}{ext}.
The human-readable filename MUST NOT appear in the returned key.
"""
...
@abstractmethod
async def get_object(self, object_key: str) -> bytes:
"""Fetch object bytes by key. Raises on missing key."""
...
@abstractmethod
async def delete_object(self, object_key: str) -> None:
"""Delete an object by key. No-op if the key does not exist."""
...
@abstractmethod
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
"""Return a time-limited pre-signed download URL for the object."""
...
@abstractmethod
async def health_check(self) -> bool:
"""Return True if the backend is reachable and operational."""
...
+106
View File
@@ -0,0 +1,106 @@
"""
MinIOBackend — synchronous Minio SDK wrapped in asyncio.to_thread().
Every call to the synchronous Minio SDK is offloaded to a thread pool via
asyncio.to_thread() so the FastAPI event loop is never blocked.
Object key schema (STORE-02 / D-06):
{user_id}/{document_id}/{uuid4()}{ext}
The human-readable filename is NEVER passed into this module — only the
file extension (derived by the caller from Path(original_name).suffix.lower())
reaches here.
"""
import asyncio
import io
import uuid
from datetime import timedelta
from minio import Minio
from storage.base import StorageBackend
class MinIOBackend(StorageBackend):
"""MinIO implementation of StorageBackend.
All synchronous Minio SDK calls are wrapped in asyncio.to_thread() to
avoid blocking the FastAPI event loop (RESEARCH.md Pattern 3).
"""
def __init__(
self,
endpoint: str,
access_key: str,
secret_key: str,
bucket: str,
secure: bool = False,
) -> None:
self._bucket = bucket
self._client = Minio(
endpoint=endpoint,
access_key=access_key,
secret_key=secret_key,
secure=secure, # False for Docker internal HTTP traffic between containers
)
async def put_object(
self,
user_id: str,
document_id: str,
file_bytes: bytes,
extension: str,
content_type: str,
) -> str:
"""Store bytes in MinIO and return the generated object key.
Key schema: {user_id}/{document_id}/{uuid4()}{extension}
The filename is NOT a parameter — STORE-02 compliance.
"""
object_key = f"{user_id}/{document_id}/{uuid.uuid4()}{extension}"
data = io.BytesIO(file_bytes)
data.seek(0) # belt-and-braces: BytesIO constructor leaves pointer at 0 already
await asyncio.to_thread(
self._client.put_object,
self._bucket,
object_key,
data,
length=len(file_bytes),
content_type=content_type,
)
return object_key
async def get_object(self, object_key: str) -> bytes:
"""Fetch object bytes from MinIO by key."""
def _fetch() -> bytes:
response = self._client.get_object(self._bucket, object_key)
try:
return response.read()
finally:
response.close()
response.release_conn()
return await asyncio.to_thread(_fetch)
async def delete_object(self, object_key: str) -> None:
"""Delete an object from MinIO by key."""
await asyncio.to_thread(self._client.remove_object, self._bucket, object_key)
async def presigned_get_url(
self, object_key: str, expires_minutes: int = 60
) -> str:
"""Return a time-limited pre-signed download URL."""
return await asyncio.to_thread(
self._client.presigned_get_object,
self._bucket,
object_key,
timedelta(minutes=expires_minutes),
)
async def health_check(self) -> bool:
"""Return True when the configured bucket is reachable; False on any exception."""
try:
return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
except Exception:
return False
-6
View File
@@ -20,7 +20,6 @@ import pytest
# Test 1: object key matches STORE-02 regex # Test 1: object key matches STORE-02 regex
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
async def test_object_key_schema(db_session): async def test_object_key_schema(db_session):
"""STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}.""" """STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}."""
try: try:
@@ -69,7 +68,6 @@ async def test_object_key_schema(db_session):
# Test 2: human filename never in object key # Test 2: human filename never in object key
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
async def test_filename_not_in_object_key(): async def test_filename_not_in_object_key():
"""STORE-02: The human-readable filename must never appear in the MinIO object key.""" """STORE-02: The human-readable filename must never appear in the MinIO object key."""
try: try:
@@ -102,7 +100,6 @@ async def test_filename_not_in_object_key():
# Test 3: StorageBackend ABC enforcement # Test 3: StorageBackend ABC enforcement
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
def test_storage_backend_abc_methods(): def test_storage_backend_abc_methods():
"""StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError.""" """StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError."""
try: try:
@@ -121,7 +118,6 @@ def test_storage_backend_abc_methods():
# Test 4: factory returns MinIOBackend instance # Test 4: factory returns MinIOBackend instance
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
def test_get_storage_backend_returns_minio(): def test_get_storage_backend_returns_minio():
"""get_storage_backend() factory must return a MinIOBackend instance.""" """get_storage_backend() factory must return a MinIOBackend instance."""
try: try:
@@ -138,7 +134,6 @@ def test_get_storage_backend_returns_minio():
# Test 5: put_object wraps sync SDK call in asyncio.to_thread # Test 5: put_object wraps sync SDK call in asyncio.to_thread
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
async def test_put_object_uses_asyncio_to_thread(monkeypatch): async def test_put_object_uses_asyncio_to_thread(monkeypatch):
"""MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread.""" """MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread."""
try: try:
@@ -182,7 +177,6 @@ async def test_put_object_uses_asyncio_to_thread(monkeypatch):
# Test 6: health_check returns bool # Test 6: health_check returns bool
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
async def test_minio_backend_health_check_returns_bool(): async def test_minio_backend_health_check_returns_bool():
"""MinIOBackend.health_check() returns True when bucket exists, False on exception.""" """MinIOBackend.health_check() returns True when bucket exists, False on exception."""
try: try: