feat(05-02): implement cloud_cache.py and extend storage factory
- cloud_cache.py: module-level TTLCache(maxsize=1000, ttl=60) singleton with threading.Lock for concurrent access safety (RESEARCH.md Pattern 8 / D-16) - get_cloud_folders_cached(): async function; calls fetch_fn OUTSIDE the lock to avoid blocking the event loop during cloud API calls - invalidate_provider_cache(): removes all cache entries for a user+provider prefix - storage/__init__.py: adds get_storage_backend_for_document() async factory — returns MinIOBackend for minio docs; queries CloudConnection (scoped to user.id), decrypts credentials, and lazy-imports cloud backends to avoid circular imports — raises HTTPException(503) if connection missing or not ACTIVE (T-05-02-04)
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
"""
|
||||
Cloud folder listing cache for DocuVault.
|
||||
|
||||
Provides a module-level TTLCache singleton for caching cloud provider folder
|
||||
listings with a 60-second TTL (D-16: live API calls with 60-second in-memory TTL).
|
||||
|
||||
Thread-safety: cachetools.TTLCache is NOT thread-safe by itself. A threading.Lock
|
||||
is required for all reads and writes (RESEARCH.md Pattern 8). The fetch function
|
||||
is called OUTSIDE the lock to prevent blocking the asyncio event loop while an
|
||||
outbound cloud API call is in flight.
|
||||
|
||||
Cache key scheme: "{user_id}:{provider}:{folder_id}"
|
||||
- user_id ensures strict per-user isolation
|
||||
- provider namespace-separates entries from different cloud backends
|
||||
- folder_id identifies the specific folder whose listing is cached
|
||||
|
||||
References:
|
||||
RESEARCH.md Pattern 8 — TTLCache thread safety + asyncio integration
|
||||
D-16 — 60-second TTL, in-memory cache (not Redis)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import threading
|
||||
from typing import Any, Callable, Awaitable
|
||||
|
||||
from cachetools import TTLCache
|
||||
|
||||
|
||||
# Module-level singleton: maxsize=1000 (sufficient for ~50 users × 20 folders),
|
||||
# ttl=60 seconds per D-16.
|
||||
_folder_cache: TTLCache = TTLCache(maxsize=1000, ttl=60)
|
||||
|
||||
# Lock required for all _folder_cache access (cachetools.TTLCache is not thread-safe)
|
||||
_folder_cache_lock = threading.Lock()
|
||||
|
||||
|
||||
async def get_cloud_folders_cached(
|
||||
user_id: str,
|
||||
provider: str,
|
||||
folder_id: str,
|
||||
fetch_fn: Callable[[], Awaitable[list]],
|
||||
) -> list:
|
||||
"""Return cached folder listing, or call fetch_fn and cache the result.
|
||||
|
||||
The cache key is "{user_id}:{provider}:{folder_id}".
|
||||
|
||||
The fetch_fn coroutine is awaited OUTSIDE the lock so that a slow cloud
|
||||
API call does not block other asyncio tasks from acquiring the lock.
|
||||
A race condition where two concurrent callers both miss the cache and
|
||||
both call fetch_fn is acceptable — the second result overwrites the first,
|
||||
and both callers receive consistent data.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user's UUID string.
|
||||
provider: The cloud provider identifier (e.g. "google_drive").
|
||||
folder_id: The provider-native folder/directory identifier.
|
||||
fetch_fn: An async callable (no arguments) that returns the folder listing
|
||||
list when called. Only invoked on cache miss.
|
||||
|
||||
Returns:
|
||||
The folder listing list (from cache or fresh from fetch_fn).
|
||||
"""
|
||||
cache_key = f"{user_id}:{provider}:{folder_id}"
|
||||
|
||||
# Check cache under lock
|
||||
with _folder_cache_lock:
|
||||
if cache_key in _folder_cache:
|
||||
return _folder_cache[cache_key]
|
||||
|
||||
# Cache miss — call fetch_fn outside the lock to not block the event loop
|
||||
result = await fetch_fn()
|
||||
|
||||
# Store result in cache under lock
|
||||
with _folder_cache_lock:
|
||||
_folder_cache[cache_key] = result
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def invalidate_provider_cache(user_id: str, provider: str) -> None:
|
||||
"""Invalidate all cached folder listings for a specific user + provider.
|
||||
|
||||
Called when a cloud connection is disconnected, credentials are updated,
|
||||
or any event that makes the cached listings stale.
|
||||
|
||||
Args:
|
||||
user_id: The authenticated user's UUID string.
|
||||
provider: The cloud provider identifier to invalidate.
|
||||
"""
|
||||
prefix = f"{user_id}:{provider}:"
|
||||
with _folder_cache_lock:
|
||||
keys_to_delete = [k for k in list(_folder_cache.keys()) if k.startswith(prefix)]
|
||||
for key in keys_to_delete:
|
||||
del _folder_cache[key]
|
||||
Reference in New Issue
Block a user