diff --git a/backend/storage/__init__.py b/backend/storage/__init__.py new file mode 100644 index 0000000..7ef87b2 --- /dev/null +++ b/backend/storage/__init__.py @@ -0,0 +1,27 @@ +""" +Storage backend factory for DocuVault. + +Mirrors backend/ai/__init__.py — exposes a get_storage_backend() factory +that returns the configured StorageBackend implementation. + +Phase 1 always returns MinIOBackend. Phase 5 will extend this factory to +support OneDrive, Google Drive, Nextcloud, and WebDAV backends. +""" +from storage.base import StorageBackend +from storage.minio_backend import MinIOBackend +from config import settings + + +def get_storage_backend() -> StorageBackend: + """Return a MinIOBackend instance configured from config.settings. + + secure=False is correct for Docker internal HTTP traffic between containers + (RESEARCH.md Pattern 3). + """ + return MinIOBackend( + endpoint=settings.minio_endpoint, + access_key=settings.minio_access_key, + secret_key=settings.minio_secret_key, + bucket=settings.minio_bucket, + secure=False, + ) diff --git a/backend/storage/base.py b/backend/storage/base.py new file mode 100644 index 0000000..91b4eba --- /dev/null +++ b/backend/storage/base.py @@ -0,0 +1,54 @@ +""" +StorageBackend ABC for DocuVault. + +Mirrors backend/ai/base.py — declares the abstract interface that all storage +backends (MinIO, OneDrive, Google Drive, Nextcloud, WebDAV) must implement. + +Five abstract methods define the contract: + put_object — store bytes, return object key + get_object — fetch bytes by key + delete_object — remove object by key + presigned_get_url — generate a time-limited download URL + health_check — verify backend connectivity +""" +from abc import ABC, abstractmethod + + +class StorageBackend(ABC): + """Abstract base class for DocuVault object storage backends.""" + + @abstractmethod + async def put_object( + self, + user_id: str, + document_id: str, + file_bytes: bytes, + extension: str, + content_type: str, + ) -> str: + """Store bytes and return the generated object key. + + The key MUST follow the STORE-02 schema: {user_id}/{document_id}/{uuid4()}{ext}. + The human-readable filename MUST NOT appear in the returned key. + """ + ... + + @abstractmethod + async def get_object(self, object_key: str) -> bytes: + """Fetch object bytes by key. Raises on missing key.""" + ... + + @abstractmethod + async def delete_object(self, object_key: str) -> None: + """Delete an object by key. No-op if the key does not exist.""" + ... + + @abstractmethod + async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str: + """Return a time-limited pre-signed download URL for the object.""" + ... + + @abstractmethod + async def health_check(self) -> bool: + """Return True if the backend is reachable and operational.""" + ... diff --git a/backend/storage/minio_backend.py b/backend/storage/minio_backend.py new file mode 100644 index 0000000..d6c45cf --- /dev/null +++ b/backend/storage/minio_backend.py @@ -0,0 +1,106 @@ +""" +MinIOBackend — synchronous Minio SDK wrapped in asyncio.to_thread(). + +Every call to the synchronous Minio SDK is offloaded to a thread pool via +asyncio.to_thread() so the FastAPI event loop is never blocked. + +Object key schema (STORE-02 / D-06): + {user_id}/{document_id}/{uuid4()}{ext} + +The human-readable filename is NEVER passed into this module — only the +file extension (derived by the caller from Path(original_name).suffix.lower()) +reaches here. +""" +import asyncio +import io +import uuid +from datetime import timedelta + +from minio import Minio + +from storage.base import StorageBackend + + +class MinIOBackend(StorageBackend): + """MinIO implementation of StorageBackend. + + All synchronous Minio SDK calls are wrapped in asyncio.to_thread() to + avoid blocking the FastAPI event loop (RESEARCH.md Pattern 3). + """ + + def __init__( + self, + endpoint: str, + access_key: str, + secret_key: str, + bucket: str, + secure: bool = False, + ) -> None: + self._bucket = bucket + self._client = Minio( + endpoint=endpoint, + access_key=access_key, + secret_key=secret_key, + secure=secure, # False for Docker internal HTTP traffic between containers + ) + + async def put_object( + self, + user_id: str, + document_id: str, + file_bytes: bytes, + extension: str, + content_type: str, + ) -> str: + """Store bytes in MinIO and return the generated object key. + + Key schema: {user_id}/{document_id}/{uuid4()}{extension} + The filename is NOT a parameter — STORE-02 compliance. + """ + object_key = f"{user_id}/{document_id}/{uuid.uuid4()}{extension}" + data = io.BytesIO(file_bytes) + data.seek(0) # belt-and-braces: BytesIO constructor leaves pointer at 0 already + await asyncio.to_thread( + self._client.put_object, + self._bucket, + object_key, + data, + length=len(file_bytes), + content_type=content_type, + ) + return object_key + + async def get_object(self, object_key: str) -> bytes: + """Fetch object bytes from MinIO by key.""" + + def _fetch() -> bytes: + response = self._client.get_object(self._bucket, object_key) + try: + return response.read() + finally: + response.close() + response.release_conn() + + return await asyncio.to_thread(_fetch) + + async def delete_object(self, object_key: str) -> None: + """Delete an object from MinIO by key.""" + await asyncio.to_thread(self._client.remove_object, self._bucket, object_key) + + async def presigned_get_url( + self, object_key: str, expires_minutes: int = 60 + ) -> str: + """Return a time-limited pre-signed download URL.""" + return await asyncio.to_thread( + self._client.presigned_get_object, + self._bucket, + object_key, + timedelta(minutes=expires_minutes), + ) + + async def health_check(self) -> bool: + """Return True when the configured bucket is reachable; False on any exception.""" + try: + return await asyncio.to_thread(self._client.bucket_exists, self._bucket) + except Exception: + return False diff --git a/backend/tests/test_storage.py b/backend/tests/test_storage.py index 4292e20..e3acb25 100644 --- a/backend/tests/test_storage.py +++ b/backend/tests/test_storage.py @@ -20,7 +20,6 @@ import pytest # Test 1: object key matches STORE-02 regex # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") async def test_object_key_schema(db_session): """STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}.""" try: @@ -69,7 +68,6 @@ async def test_object_key_schema(db_session): # Test 2: human filename never in object key # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") async def test_filename_not_in_object_key(): """STORE-02: The human-readable filename must never appear in the MinIO object key.""" try: @@ -102,7 +100,6 @@ async def test_filename_not_in_object_key(): # Test 3: StorageBackend ABC enforcement # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") def test_storage_backend_abc_methods(): """StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError.""" try: @@ -121,7 +118,6 @@ def test_storage_backend_abc_methods(): # Test 4: factory returns MinIOBackend instance # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") def test_get_storage_backend_returns_minio(): """get_storage_backend() factory must return a MinIOBackend instance.""" try: @@ -138,7 +134,6 @@ def test_get_storage_backend_returns_minio(): # Test 5: put_object wraps sync SDK call in asyncio.to_thread # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") async def test_put_object_uses_asyncio_to_thread(monkeypatch): """MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread.""" try: @@ -182,7 +177,6 @@ async def test_put_object_uses_asyncio_to_thread(monkeypatch): # Test 6: health_check returns bool # --------------------------------------------------------------------------- -@pytest.mark.xfail(strict=False, reason="implemented in plan 04") async def test_minio_backend_health_check_returns_bool(): """MinIOBackend.health_check() returns True when bucket exists, False on exception.""" try: