feat(01-04): add StorageBackend ABC + MinIOBackend + factory
- backend/storage/base.py: StorageBackend ABC with 5 abstract methods mirroring ai/base.py
- backend/storage/minio_backend.py: MinIOBackend wrapping all sync Minio SDK calls in asyncio.to_thread(); STORE-02 key schema: {user_id}/{document_id}/{uuid4()}{ext}
- backend/storage/__init__.py: get_storage_backend() factory mirroring ai/__init__.py
- backend/tests/test_storage.py: remove xfail markers (plan 04 implements the module)
This commit is contained in:
@@ -0,0 +1,27 @@
|
|||||||
|
"""
|
||||||
|
Storage backend factory for DocuVault.
|
||||||
|
|
||||||
|
Mirrors backend/ai/__init__.py — exposes a get_storage_backend() factory
|
||||||
|
that returns the configured StorageBackend implementation.
|
||||||
|
|
||||||
|
Phase 1 always returns MinIOBackend. Phase 5 will extend this factory to
|
||||||
|
support OneDrive, Google Drive, Nextcloud, and WebDAV backends.
|
||||||
|
"""
|
||||||
|
from storage.base import StorageBackend
|
||||||
|
from storage.minio_backend import MinIOBackend
|
||||||
|
from config import settings
|
||||||
|
|
||||||
|
|
||||||
|
def get_storage_backend() -> StorageBackend:
|
||||||
|
"""Return a MinIOBackend instance configured from config.settings.
|
||||||
|
|
||||||
|
secure=False is correct for Docker internal HTTP traffic between containers
|
||||||
|
(RESEARCH.md Pattern 3).
|
||||||
|
"""
|
||||||
|
return MinIOBackend(
|
||||||
|
endpoint=settings.minio_endpoint,
|
||||||
|
access_key=settings.minio_access_key,
|
||||||
|
secret_key=settings.minio_secret_key,
|
||||||
|
bucket=settings.minio_bucket,
|
||||||
|
secure=False,
|
||||||
|
)
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"""
|
||||||
|
StorageBackend ABC for DocuVault.
|
||||||
|
|
||||||
|
Mirrors backend/ai/base.py — declares the abstract interface that all storage
|
||||||
|
backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement.
|
||||||
|
|
||||||
|
Five abstract methods define the contract:
|
||||||
|
put_object — store bytes, return object key
|
||||||
|
get_object — fetch bytes by key
|
||||||
|
delete_object — remove object by key
|
||||||
|
presigned_get_url — generate a time-limited download URL
|
||||||
|
health_check — verify backend connectivity
|
||||||
|
"""
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
|
class StorageBackend(ABC):
|
||||||
|
"""Abstract base class for DocuVault object storage backends."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def put_object(
|
||||||
|
self,
|
||||||
|
user_id: str,
|
||||||
|
document_id: str,
|
||||||
|
file_bytes: bytes,
|
||||||
|
extension: str,
|
||||||
|
content_type: str,
|
||||||
|
) -> str:
|
||||||
|
"""Store bytes and return the generated object key.
|
||||||
|
|
||||||
|
The key MUST follow the STORE-02 schema: {user_id}/{document_id}/{uuid4()}{ext}.
|
||||||
|
The human-readable filename MUST NOT appear in the returned key.
|
||||||
|
"""
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def get_object(self, object_key: str) -> bytes:
|
||||||
|
"""Fetch object bytes by key. Raises on missing key."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def delete_object(self, object_key: str) -> None:
|
||||||
|
"""Delete an object by key. No-op if the key does not exist."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
|
||||||
|
"""Return a time-limited pre-signed download URL for the object."""
|
||||||
|
...
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def health_check(self) -> bool:
|
||||||
|
"""Return True if the backend is reachable and operational."""
|
||||||
|
...
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
"""
|
||||||
|
MinIOBackend — synchronous Minio SDK wrapped in asyncio.to_thread().
|
||||||
|
|
||||||
|
Every call to the synchronous Minio SDK is offloaded to a thread pool via
|
||||||
|
asyncio.to_thread() so the FastAPI event loop is never blocked.
|
||||||
|
|
||||||
|
Object key schema (STORE-02 / D-06):
|
||||||
|
{user_id}/{document_id}/{uuid4()}{ext}
|
||||||
|
|
||||||
|
The human-readable filename is NEVER passed into this module — only the
|
||||||
|
file extension (derived by the caller from Path(original_name).suffix.lower())
|
||||||
|
reaches here.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import io
|
||||||
|
import uuid
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from minio import Minio
|
||||||
|
|
||||||
|
from storage.base import StorageBackend
|
||||||
|
|
||||||
|
|
||||||
|
class MinIOBackend(StorageBackend):
|
||||||
|
"""MinIO implementation of StorageBackend.
|
||||||
|
|
||||||
|
All synchronous Minio SDK calls are wrapped in asyncio.to_thread() to
|
||||||
|
avoid blocking the FastAPI event loop (RESEARCH.md Pattern 3).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
endpoint: str,
|
||||||
|
access_key: str,
|
||||||
|
secret_key: str,
|
||||||
|
bucket: str,
|
||||||
|
secure: bool = False,
|
||||||
|
) -> None:
|
||||||
|
self._bucket = bucket
|
||||||
|
self._client = Minio(
|
||||||
|
endpoint=endpoint,
|
||||||
|
access_key=access_key,
|
||||||
|
secret_key=secret_key,
|
||||||
|
secure=secure, # False for Docker internal HTTP traffic between containers
|
||||||
|
)
|
||||||
|
|
||||||
|
async def put_object(
|
||||||
|
self,
|
||||||
|
user_id: str,
|
||||||
|
document_id: str,
|
||||||
|
file_bytes: bytes,
|
||||||
|
extension: str,
|
||||||
|
content_type: str,
|
||||||
|
) -> str:
|
||||||
|
"""Store bytes in MinIO and return the generated object key.
|
||||||
|
|
||||||
|
Key schema: {user_id}/{document_id}/{uuid4()}{extension}
|
||||||
|
The filename is NOT a parameter — STORE-02 compliance.
|
||||||
|
"""
|
||||||
|
object_key = f"{user_id}/{document_id}/{uuid.uuid4()}{extension}"
|
||||||
|
data = io.BytesIO(file_bytes)
|
||||||
|
data.seek(0) # belt-and-braces: BytesIO constructor leaves pointer at 0 already
|
||||||
|
await asyncio.to_thread(
|
||||||
|
self._client.put_object,
|
||||||
|
self._bucket,
|
||||||
|
object_key,
|
||||||
|
data,
|
||||||
|
length=len(file_bytes),
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
return object_key
|
||||||
|
|
||||||
|
async def get_object(self, object_key: str) -> bytes:
|
||||||
|
"""Fetch object bytes from MinIO by key."""
|
||||||
|
|
||||||
|
def _fetch() -> bytes:
|
||||||
|
response = self._client.get_object(self._bucket, object_key)
|
||||||
|
try:
|
||||||
|
return response.read()
|
||||||
|
finally:
|
||||||
|
response.close()
|
||||||
|
response.release_conn()
|
||||||
|
|
||||||
|
return await asyncio.to_thread(_fetch)
|
||||||
|
|
||||||
|
async def delete_object(self, object_key: str) -> None:
|
||||||
|
"""Delete an object from MinIO by key."""
|
||||||
|
await asyncio.to_thread(self._client.remove_object, self._bucket, object_key)
|
||||||
|
|
||||||
|
async def presigned_get_url(
|
||||||
|
self, object_key: str, expires_minutes: int = 60
|
||||||
|
) -> str:
|
||||||
|
"""Return a time-limited pre-signed download URL."""
|
||||||
|
return await asyncio.to_thread(
|
||||||
|
self._client.presigned_get_object,
|
||||||
|
self._bucket,
|
||||||
|
object_key,
|
||||||
|
timedelta(minutes=expires_minutes),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def health_check(self) -> bool:
|
||||||
|
"""Return True when the configured bucket is reachable; False on any exception."""
|
||||||
|
try:
|
||||||
|
return await asyncio.to_thread(self._client.bucket_exists, self._bucket)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
@@ -20,7 +20,6 @@ import pytest
|
|||||||
# Test 1: object key matches STORE-02 regex
|
# Test 1: object key matches STORE-02 regex
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
async def test_object_key_schema(db_session):
|
async def test_object_key_schema(db_session):
|
||||||
"""STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}."""
|
"""STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}."""
|
||||||
try:
|
try:
|
||||||
@@ -69,7 +68,6 @@ async def test_object_key_schema(db_session):
|
|||||||
# Test 2: human filename never in object key
|
# Test 2: human filename never in object key
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
async def test_filename_not_in_object_key():
|
async def test_filename_not_in_object_key():
|
||||||
"""STORE-02: The human-readable filename must never appear in the MinIO object key."""
|
"""STORE-02: The human-readable filename must never appear in the MinIO object key."""
|
||||||
try:
|
try:
|
||||||
@@ -102,7 +100,6 @@ async def test_filename_not_in_object_key():
|
|||||||
# Test 3: StorageBackend ABC enforcement
|
# Test 3: StorageBackend ABC enforcement
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
def test_storage_backend_abc_methods():
|
def test_storage_backend_abc_methods():
|
||||||
"""StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError."""
|
"""StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError."""
|
||||||
try:
|
try:
|
||||||
@@ -121,7 +118,6 @@ def test_storage_backend_abc_methods():
|
|||||||
# Test 4: factory returns MinIOBackend instance
|
# Test 4: factory returns MinIOBackend instance
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
def test_get_storage_backend_returns_minio():
|
def test_get_storage_backend_returns_minio():
|
||||||
"""get_storage_backend() factory must return a MinIOBackend instance."""
|
"""get_storage_backend() factory must return a MinIOBackend instance."""
|
||||||
try:
|
try:
|
||||||
@@ -138,7 +134,6 @@ def test_get_storage_backend_returns_minio():
|
|||||||
# Test 5: put_object wraps sync SDK call in asyncio.to_thread
|
# Test 5: put_object wraps sync SDK call in asyncio.to_thread
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
async def test_put_object_uses_asyncio_to_thread(monkeypatch):
|
async def test_put_object_uses_asyncio_to_thread(monkeypatch):
|
||||||
"""MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread."""
|
"""MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread."""
|
||||||
try:
|
try:
|
||||||
@@ -182,7 +177,6 @@ async def test_put_object_uses_asyncio_to_thread(monkeypatch):
|
|||||||
# Test 6: health_check returns bool
|
# Test 6: health_check returns bool
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
@pytest.mark.xfail(strict=False, reason="implemented in plan 04")
|
|
||||||
async def test_minio_backend_health_check_returns_bool():
|
async def test_minio_backend_health_check_returns_bool():
|
||||||
"""MinIOBackend.health_check() returns True when bucket exists, False on exception."""
|
"""MinIOBackend.health_check() returns True when bucket exists, False on exception."""
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user