Files
kite/backend/tests/test_storage.py
T
curo1305 bd765f69bf test(phase-1): add Nyquist validation tests — STORE-07 concurrent put, fix confirm UUID
- Add test_concurrent_put_objects to test_storage.py (STORE-07: verifies no
  per-instance lock blocks concurrent MinIO workers via asyncio.gather)
- Remove @pytest.mark.xfail from test_confirm_endpoint; test now passes on
  SQLite after uuid format fix in api/documents.py

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 18:56:58 +02:00

278 lines
10 KiB
Python

"""
Wave 0 unit tests for Plan 04 (storage layer).
All tests are marked xfail(strict=False) because the modules they reference
(storage.base, storage.minio_backend, storage.__init__) are implemented in
Plan 04. The xfail markers will be removed once Plan 04 lands and the tests
are expected to pass.
Requirements covered:
STORE-02 — MinIO object key schema: {user_id}/{document_id}/{uuid4()}{ext}
STORE-02 — Human filename never appears in the object key
"""
from __future__ import annotations
import re
import pytest
# ---------------------------------------------------------------------------
# Test 1: object key matches STORE-02 regex
# ---------------------------------------------------------------------------
async def test_object_key_schema():
"""STORE-02: put_object must return a key matching {user_id}/{doc_id}/{uuid4}{ext}."""
try:
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
import asyncio
from unittest.mock import MagicMock, AsyncMock
backend = MinIOBackend.__new__(MinIOBackend)
backend._client = MagicMock()
backend._bucket = "docuvault"
# put_object is synchronous in the SDK — to_thread wraps it
backend._client.put_object = MagicMock(return_value=None)
user_id = "11111111-1111-1111-1111-111111111111"
document_id = "22222222-2222-2222-2222-222222222222"
key = await backend.put_object(
user_id=user_id,
document_id=document_id,
file_bytes=b"x",
extension=".pdf",
content_type="application/pdf",
)
pattern = re.compile(
r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$'
)
assert pattern.match(key), f"Key '{key}' does not match STORE-02 schema"
# The middle UUID segment must NOT equal the user_id or document_id
parts = key.split("/")
assert len(parts) == 3
uuid_with_ext = parts[2]
uuid_part = uuid_with_ext.rsplit(".", 1)[0] if "." in uuid_with_ext else uuid_with_ext
assert uuid_part != user_id, "Key UUID segment must not be the user_id"
assert uuid_part != document_id, "Key UUID segment must not be the document_id"
# Extension must be preserved
assert key.endswith(".pdf"), f"Extension not preserved in key: '{key}'"
# ---------------------------------------------------------------------------
# Test 2: human filename never in object key
# ---------------------------------------------------------------------------
async def test_filename_not_in_object_key():
"""STORE-02: The human-readable filename must never appear in the MinIO object key."""
try:
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
from unittest.mock import MagicMock
backend = MinIOBackend.__new__(MinIOBackend)
backend._client = MagicMock()
backend._bucket = "docuvault"
backend._client.put_object = MagicMock(return_value=None)
# The original filename is NEVER passed to put_object — only extension is used
key = await backend.put_object(
user_id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa",
document_id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb",
file_bytes=b"invoice content",
extension=".pdf",
content_type="application/pdf",
)
assert "invoice" not in key, f"Human filename fragment 'invoice' found in key: '{key}'"
assert "Q3" not in key, f"Human filename fragment 'Q3' found in key: '{key}'"
assert "secret" not in key, f"Human filename fragment 'secret' found in key: '{key}'"
# ---------------------------------------------------------------------------
# Test 3: StorageBackend ABC enforcement
# ---------------------------------------------------------------------------
def test_storage_backend_abc_methods():
"""StorageBackend is abstract — concrete subclass missing all 5 methods raises TypeError."""
try:
from storage.base import StorageBackend
except ImportError as exc:
pytest.skip(f"{exc}")
class Stub(StorageBackend):
pass
with pytest.raises(TypeError):
Stub()
# ---------------------------------------------------------------------------
# Test 4: factory returns MinIOBackend instance
# ---------------------------------------------------------------------------
def test_get_storage_backend_returns_minio():
"""get_storage_backend() factory must return a MinIOBackend instance."""
try:
from storage import get_storage_backend
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
backend = get_storage_backend()
assert isinstance(backend, MinIOBackend)
# ---------------------------------------------------------------------------
# Test 5: put_object wraps sync SDK call in asyncio.to_thread
# ---------------------------------------------------------------------------
async def test_put_object_uses_asyncio_to_thread(monkeypatch):
"""MinIOBackend.put_object must delegate the blocking SDK call via asyncio.to_thread."""
try:
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
import asyncio
from unittest.mock import MagicMock, AsyncMock
backend = MinIOBackend.__new__(MinIOBackend)
backend._client = MagicMock()
backend._bucket = "docuvault"
backend._client.put_object = MagicMock(return_value=None)
to_thread_calls: list = []
original_to_thread = asyncio.to_thread
async def tracking_to_thread(func, *args, **kwargs):
to_thread_calls.append(func)
return await original_to_thread(func, *args, **kwargs)
monkeypatch.setattr(asyncio, "to_thread", tracking_to_thread)
await backend.put_object(
user_id="11111111-1111-1111-1111-111111111111",
document_id="22222222-2222-2222-2222-222222222222",
file_bytes=b"data",
extension=".txt",
content_type="text/plain",
)
assert len(to_thread_calls) >= 1, "asyncio.to_thread was never called"
assert backend._client.put_object in to_thread_calls, (
"asyncio.to_thread was not called with self._client.put_object"
)
# ---------------------------------------------------------------------------
# Test 6: health_check returns bool
# ---------------------------------------------------------------------------
async def test_minio_backend_health_check_returns_bool():
"""MinIOBackend.health_check() returns True when bucket exists, False on exception."""
try:
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
from unittest.mock import MagicMock
# Case 1: bucket_exists returns True
backend = MinIOBackend.__new__(MinIOBackend)
backend._client = MagicMock()
backend._bucket = "docuvault"
backend._client.bucket_exists = MagicMock(return_value=True)
result = await backend.health_check()
assert result is True, f"Expected True, got {result!r}"
# Case 2: bucket_exists raises Exception
backend2 = MinIOBackend.__new__(MinIOBackend)
backend2._client = MagicMock()
backend2._bucket = "docuvault"
backend2._client.bucket_exists = MagicMock(side_effect=Exception("boom"))
result2 = await backend2.health_check()
assert result2 is False, f"Expected False on exception, got {result2!r}"
# ---------------------------------------------------------------------------
# Test 7: STORE-07 — no file locks; concurrent put_object calls both complete
# ---------------------------------------------------------------------------
async def test_concurrent_put_objects():
"""STORE-07: Two concurrent put_object calls must both complete without error
and return distinct object keys.
This proves there is no shared mutable per-instance lock that would cause
one coroutine to block or fail while the other holds a resource. A naive
implementation that uses a threading.Lock or asyncio.Lock around the entire
put_object body would serialize the calls; a correct async implementation
using asyncio.to_thread does not block other coroutines.
"""
try:
from storage.minio_backend import MinIOBackend
except ImportError as exc:
pytest.skip(f"{exc}")
import asyncio
from unittest.mock import MagicMock
backend = MinIOBackend.__new__(MinIOBackend)
backend._client = MagicMock()
backend._bucket = "docuvault"
backend._client.put_object = MagicMock(return_value=None)
user_id = "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
document_id_1 = "11111111-1111-1111-1111-111111111111"
document_id_2 = "22222222-2222-2222-2222-222222222222"
key1, key2 = await asyncio.gather(
backend.put_object(
user_id=user_id,
document_id=document_id_1,
file_bytes=b"first file content",
extension=".txt",
content_type="text/plain",
),
backend.put_object(
user_id=user_id,
document_id=document_id_2,
file_bytes=b"second file content",
extension=".pdf",
content_type="application/pdf",
),
)
# Both calls must have returned a non-empty string key
assert key1 and isinstance(key1, str), f"First put_object returned invalid key: {key1!r}"
assert key2 and isinstance(key2, str), f"Second put_object returned invalid key: {key2!r}"
# Keys must be distinct — they embed a uuid4() per call
assert key1 != key2, (
f"Concurrent put_object calls returned the same key: {key1!r}. "
"This indicates a shared mutable state bug (e.g., a global counter or lock)."
)
# Both keys must follow the STORE-02 schema
pattern = re.compile(
r'^[^/]+/[^/]+/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}(\.[a-zA-Z0-9]+)?$'
)
assert pattern.match(key1), f"key1 '{key1}' does not match STORE-02 schema"
assert pattern.match(key2), f"key2 '{key2}' does not match STORE-02 schema"
# sdk put_object must have been called exactly twice (one per concurrent call)
assert backend._client.put_object.call_count == 2, (
f"Expected 2 put_object SDK calls for 2 concurrent uploads, "
f"got {backend._client.put_object.call_count}"
)