""" Wave 0 unit tests for Plan 04 (storage layer). All tests are marked xfail(strict=False) because the modules they reference (storage.base, storage.minio_backend, storage.__init__) are implemented in Plan 04. The xfail markers will be removed once Plan 04 lands and the tests are expected to pass. Requirements covered: STORE-02 — MinIO object key schema: {user_id}/{document_id}/{uuid4()}{ext} STORE-02 — Human filename never appears in the object key """ from __future__ import annotations import re import pytest # --------------------------------------------------------------------------- # Test 1: object key matches STORE-02 regex # --------------------------------------------------------------------------- async def test_object_key_schema(): """STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}.""" try: from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") import asyncio from unittest.mock import MagicMock, AsyncMock backend = MinIOBackend.__new__(MinIOBackend) backend._client = MagicMock() backend._bucket = "docuvault" # put_object is synchronous in the SDK — to_thread wraps it backend._client.put_object = MagicMock(return_value=None) user_id = "11111111-1111-1111-1111-111111111111" document_id = "22222222-2222-2222-2222-222222222222" key = await backend.put_object( user_id=user_id, document_id=document_id, file_bytes=b"x", extension=".pdf", content_type="application/pdf", ) pattern = re.compile( r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$' ) assert pattern.match(key), f"Key '{key}' does not match STORE-02 schema" # The middle UUID segment must NOT equal the user_id or document_id parts = key.split("/") assert len(parts) == 3 uuid_with_ext = parts[2] uuid_part = uuid_with_ext.rsplit(".", 1)[0] if "." in uuid_with_ext else uuid_with_ext assert uuid_part != user_id, "Key UUID segment must not be the user_id" assert uuid_part != document_id, "Key UUID segment must not be the document_id" # Extension must be preserved assert key.endswith(".pdf"), f"Extension not preserved in key: '{key}'" # --------------------------------------------------------------------------- # Test 2: human filename never in object key # --------------------------------------------------------------------------- async def test_filename_not_in_object_key(): """STORE-02: The human-readable filename must never appear in the MinIO object key.""" try: from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") from unittest.mock import MagicMock backend = MinIOBackend.__new__(MinIOBackend) backend._client = MagicMock() backend._bucket = "docuvault" backend._client.put_object = MagicMock(return_value=None) # The original filename is NEVER passed to put_object — only extension is used key = await backend.put_object( user_id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", document_id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", file_bytes=b"invoice content", extension=".pdf", content_type="application/pdf", ) assert "invoice" not in key, f"Human filename fragment 'invoice' found in key: '{key}'" assert "Q3" not in key, f"Human filename fragment 'Q3' found in key: '{key}'" assert "secret" not in key, f"Human filename fragment 'secret' found in key: '{key}'" # --------------------------------------------------------------------------- # Test 3: StorageBackend ABC enforcement # --------------------------------------------------------------------------- def test_storage_backend_abc_methods(): """StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError.""" try: from storage.base import StorageBackend except ImportError as exc: pytest.skip(f"{exc}") class Stub(StorageBackend): pass with pytest.raises(TypeError): Stub() # --------------------------------------------------------------------------- # Test 4: factory returns MinIOBackend instance # --------------------------------------------------------------------------- def test_get_storage_backend_returns_minio(): """get_storage_backend() factory must return a MinIOBackend instance.""" try: from storage import get_storage_backend from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") backend = get_storage_backend() assert isinstance(backend, MinIOBackend) # --------------------------------------------------------------------------- # Test 5: put_object wraps sync SDK call in asyncio.to_thread # --------------------------------------------------------------------------- async def test_put_object_uses_asyncio_to_thread(monkeypatch): """MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread.""" try: from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") import asyncio from unittest.mock import MagicMock, AsyncMock backend = MinIOBackend.__new__(MinIOBackend) backend._client = MagicMock() backend._bucket = "docuvault" backend._client.put_object = MagicMock(return_value=None) to_thread_calls: list = [] original_to_thread = asyncio.to_thread async def tracking_to_thread(func, *args, **kwargs): to_thread_calls.append(func) return await original_to_thread(func, *args, **kwargs) monkeypatch.setattr(asyncio, "to_thread", tracking_to_thread) await backend.put_object( user_id="11111111-1111-1111-1111-111111111111", document_id="22222222-2222-2222-2222-222222222222", file_bytes=b"data", extension=".txt", content_type="text/plain", ) assert len(to_thread_calls) >= 1, "asyncio.to_thread was never called" assert backend._client.put_object in to_thread_calls, ( "asyncio.to_thread was not called with self._client.put_object" ) # --------------------------------------------------------------------------- # Test 6: health_check returns bool # --------------------------------------------------------------------------- async def test_minio_backend_health_check_returns_bool(): """MinIOBackend.health_check() returns True when bucket exists, False on exception.""" try: from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") from unittest.mock import MagicMock # Case 1: bucket_exists returns True backend = MinIOBackend.__new__(MinIOBackend) backend._client = MagicMock() backend._bucket = "docuvault" backend._client.bucket_exists = MagicMock(return_value=True) result = await backend.health_check() assert result is True, f"Expected True, got {result!r}" # Case 2: bucket_exists raises Exception backend2 = MinIOBackend.__new__(MinIOBackend) backend2._client = MagicMock() backend2._bucket = "docuvault" backend2._client.bucket_exists = MagicMock(side_effect=Exception("boom")) result2 = await backend2.health_check() assert result2 is False, f"Expected False on exception, got {result2!r}" # --------------------------------------------------------------------------- # Test 7: STORE-07 — no file locks; concurrent put_object calls both complete # --------------------------------------------------------------------------- async def test_concurrent_put_objects(): """STORE-07: Two concurrent put_object calls must both complete without error and return distinct object keys. This proves there is no shared mutable per-instance lock that would cause one coroutine to block or fail while the other holds a resource. A naive implementation that uses a threading.Lock or asyncio.Lock around the entire put_object body would serialize the calls; a correct async implementation using asyncio.to_thread does not block other coroutines. """ try: from storage.minio_backend import MinIOBackend except ImportError as exc: pytest.skip(f"{exc}") import asyncio from unittest.mock import MagicMock backend = MinIOBackend.__new__(MinIOBackend) backend._client = MagicMock() backend._bucket = "docuvault" backend._client.put_object = MagicMock(return_value=None) user_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" document_id_1 = "11111111-1111-1111-1111-111111111111" document_id_2 = "22222222-2222-2222-2222-222222222222" key1, key2 = await asyncio.gather( backend.put_object( user_id=user_id, document_id=document_id_1, file_bytes=b"first file content", extension=".txt", content_type="text/plain", ), backend.put_object( user_id=user_id, document_id=document_id_2, file_bytes=b"second file content", extension=".pdf", content_type="application/pdf", ), ) # Both calls must have returned a non-empty string key assert key1 and isinstance(key1, str), f"First put_object returned invalid key: {key1!r}" assert key2 and isinstance(key2, str), f"Second put_object returned invalid key: {key2!r}" # Keys must be distinct — they embed a uuid4() per call assert key1 != key2, ( f"Concurrent put_object calls returned the same key: {key1!r}. " "This indicates a shared mutable state bug (e.g., a global counter or lock)." ) # Both keys must follow the STORE-02 schema pattern = re.compile( r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$' ) assert pattern.match(key1), f"key1 '{key1}' does not match STORE-02 schema" assert pattern.match(key2), f"key2 '{key2}' does not match STORE-02 schema" # sdk put_object must have been called exactly twice (one per concurrent call) assert backend._client.put_object.call_count == 2, ( f"Expected 2 put_object SDK calls for 2 concurrent uploads, " f"got {backend._client.put_object.call_count}" )