bd765f69bf
- Add test_concurrent_put_objects to test_storage.py (STORE-07: verifies no per-instance lock blocks concurrent MinIO workers via asyncio.gather) - Remove @pytest.mark.xfail from test_confirm_endpoint; test now passes on SQLite after uuid format fix in api/documents.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
278 lines
10 KiB
Python
278 lines
10 KiB
Python
"""
|
|
Wave 0 unit tests for Plan 04 (storage layer).
|
|
|
|
All tests are marked xfail(strict=False) because the modules they reference
|
|
(storage.base, storage.minio_backend, storage.__init__) are implemented in
|
|
Plan 04. The xfail markers will be removed once Plan 04 lands and the tests
|
|
are expected to pass.
|
|
|
|
Requirements covered:
|
|
STORE-02 — MinIO object key schema: {user_id}/{document_id}/{uuid4()}{ext}
|
|
STORE-02 — Human filename never appears in the object key
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 1: object key matches STORE-02 regex
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def test_object_key_schema():
|
|
"""STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}."""
|
|
try:
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
import asyncio
|
|
from unittest.mock import MagicMock, AsyncMock
|
|
|
|
backend = MinIOBackend.__new__(MinIOBackend)
|
|
backend._client = MagicMock()
|
|
backend._bucket = "docuvault"
|
|
# put_object is synchronous in the SDK — to_thread wraps it
|
|
backend._client.put_object = MagicMock(return_value=None)
|
|
|
|
user_id = "11111111-1111-1111-1111-111111111111"
|
|
document_id = "22222222-2222-2222-2222-222222222222"
|
|
|
|
key = await backend.put_object(
|
|
user_id=user_id,
|
|
document_id=document_id,
|
|
file_bytes=b"x",
|
|
extension=".pdf",
|
|
content_type="application/pdf",
|
|
)
|
|
|
|
pattern = re.compile(
|
|
r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$'
|
|
)
|
|
assert pattern.match(key), f"Key '{key}' does not match STORE-02 schema"
|
|
|
|
# The middle UUID segment must NOT equal the user_id or document_id
|
|
parts = key.split("/")
|
|
assert len(parts) == 3
|
|
uuid_with_ext = parts[2]
|
|
uuid_part = uuid_with_ext.rsplit(".", 1)[0] if "." in uuid_with_ext else uuid_with_ext
|
|
assert uuid_part != user_id, "Key UUID segment must not be the user_id"
|
|
assert uuid_part != document_id, "Key UUID segment must not be the document_id"
|
|
|
|
# Extension must be preserved
|
|
assert key.endswith(".pdf"), f"Extension not preserved in key: '{key}'"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 2: human filename never in object key
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def test_filename_not_in_object_key():
|
|
"""STORE-02: The human-readable filename must never appear in the MinIO object key."""
|
|
try:
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
backend = MinIOBackend.__new__(MinIOBackend)
|
|
backend._client = MagicMock()
|
|
backend._bucket = "docuvault"
|
|
backend._client.put_object = MagicMock(return_value=None)
|
|
|
|
# The original filename is NEVER passed to put_object — only extension is used
|
|
key = await backend.put_object(
|
|
user_id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
|
|
document_id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
|
|
file_bytes=b"invoice content",
|
|
extension=".pdf",
|
|
content_type="application/pdf",
|
|
)
|
|
|
|
assert "invoice" not in key, f"Human filename fragment 'invoice' found in key: '{key}'"
|
|
assert "Q3" not in key, f"Human filename fragment 'Q3' found in key: '{key}'"
|
|
assert "secret" not in key, f"Human filename fragment 'secret' found in key: '{key}'"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 3: StorageBackend ABC enforcement
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_storage_backend_abc_methods():
|
|
"""StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError."""
|
|
try:
|
|
from storage.base import StorageBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
class Stub(StorageBackend):
|
|
pass
|
|
|
|
with pytest.raises(TypeError):
|
|
Stub()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 4: factory returns MinIOBackend instance
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_get_storage_backend_returns_minio():
|
|
"""get_storage_backend() factory must return a MinIOBackend instance."""
|
|
try:
|
|
from storage import get_storage_backend
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
backend = get_storage_backend()
|
|
assert isinstance(backend, MinIOBackend)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 5: put_object wraps sync SDK call in asyncio.to_thread
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def test_put_object_uses_asyncio_to_thread(monkeypatch):
|
|
"""MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread."""
|
|
try:
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
import asyncio
|
|
from unittest.mock import MagicMock, AsyncMock
|
|
|
|
backend = MinIOBackend.__new__(MinIOBackend)
|
|
backend._client = MagicMock()
|
|
backend._bucket = "docuvault"
|
|
backend._client.put_object = MagicMock(return_value=None)
|
|
|
|
to_thread_calls: list = []
|
|
|
|
original_to_thread = asyncio.to_thread
|
|
|
|
async def tracking_to_thread(func, *args, **kwargs):
|
|
to_thread_calls.append(func)
|
|
return await original_to_thread(func, *args, **kwargs)
|
|
|
|
monkeypatch.setattr(asyncio, "to_thread", tracking_to_thread)
|
|
|
|
await backend.put_object(
|
|
user_id="11111111-1111-1111-1111-111111111111",
|
|
document_id="22222222-2222-2222-2222-222222222222",
|
|
file_bytes=b"data",
|
|
extension=".txt",
|
|
content_type="text/plain",
|
|
)
|
|
|
|
assert len(to_thread_calls) >= 1, "asyncio.to_thread was never called"
|
|
assert backend._client.put_object in to_thread_calls, (
|
|
"asyncio.to_thread was not called with self._client.put_object"
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 6: health_check returns bool
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def test_minio_backend_health_check_returns_bool():
|
|
"""MinIOBackend.health_check() returns True when bucket exists, False on exception."""
|
|
try:
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
from unittest.mock import MagicMock
|
|
|
|
# Case 1: bucket_exists returns True
|
|
backend = MinIOBackend.__new__(MinIOBackend)
|
|
backend._client = MagicMock()
|
|
backend._bucket = "docuvault"
|
|
backend._client.bucket_exists = MagicMock(return_value=True)
|
|
|
|
result = await backend.health_check()
|
|
assert result is True, f"Expected True, got {result!r}"
|
|
|
|
# Case 2: bucket_exists raises Exception
|
|
backend2 = MinIOBackend.__new__(MinIOBackend)
|
|
backend2._client = MagicMock()
|
|
backend2._bucket = "docuvault"
|
|
backend2._client.bucket_exists = MagicMock(side_effect=Exception("boom"))
|
|
|
|
result2 = await backend2.health_check()
|
|
assert result2 is False, f"Expected False on exception, got {result2!r}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Test 7: STORE-07 — no file locks; concurrent put_object calls both complete
|
|
# ---------------------------------------------------------------------------
|
|
|
|
async def test_concurrent_put_objects():
|
|
"""STORE-07: Two concurrent put_object calls must both complete without error
|
|
and return distinct object keys.
|
|
|
|
This proves there is no shared mutable per-instance lock that would cause
|
|
one coroutine to block or fail while the other holds a resource. A naive
|
|
implementation that uses a threading.Lock or asyncio.Lock around the entire
|
|
put_object body would serialize the calls; a correct async implementation
|
|
using asyncio.to_thread does not block other coroutines.
|
|
"""
|
|
try:
|
|
from storage.minio_backend import MinIOBackend
|
|
except ImportError as exc:
|
|
pytest.skip(f"{exc}")
|
|
|
|
import asyncio
|
|
from unittest.mock import MagicMock
|
|
|
|
backend = MinIOBackend.__new__(MinIOBackend)
|
|
backend._client = MagicMock()
|
|
backend._bucket = "docuvault"
|
|
backend._client.put_object = MagicMock(return_value=None)
|
|
|
|
user_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
|
|
document_id_1 = "11111111-1111-1111-1111-111111111111"
|
|
document_id_2 = "22222222-2222-2222-2222-222222222222"
|
|
|
|
key1, key2 = await asyncio.gather(
|
|
backend.put_object(
|
|
user_id=user_id,
|
|
document_id=document_id_1,
|
|
file_bytes=b"first file content",
|
|
extension=".txt",
|
|
content_type="text/plain",
|
|
),
|
|
backend.put_object(
|
|
user_id=user_id,
|
|
document_id=document_id_2,
|
|
file_bytes=b"second file content",
|
|
extension=".pdf",
|
|
content_type="application/pdf",
|
|
),
|
|
)
|
|
|
|
# Both calls must have returned a non-empty string key
|
|
assert key1 and isinstance(key1, str), f"First put_object returned invalid key: {key1!r}"
|
|
assert key2 and isinstance(key2, str), f"Second put_object returned invalid key: {key2!r}"
|
|
|
|
# Keys must be distinct — they embed a uuid4() per call
|
|
assert key1 != key2, (
|
|
f"Concurrent put_object calls returned the same key: {key1!r}. "
|
|
"This indicates a shared mutable state bug (e.g., a global counter or lock)."
|
|
)
|
|
|
|
# Both keys must follow the STORE-02 schema
|
|
pattern = re.compile(
|
|
r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$'
|
|
)
|
|
assert pattern.match(key1), f"key1 '{key1}' does not match STORE-02 schema"
|
|
assert pattern.match(key2), f"key2 '{key2}' does not match STORE-02 schema"
|
|
|
|
# sdk put_object must have been called exactly twice (one per concurrent call)
|
|
assert backend._client.put_object.call_count == 2, (
|
|
f"Expected 2 put_object SDK calls for 2 concurrent uploads, "
|
|
f"got {backend._client.put_object.call_count}"
|
|
)
|