Files
kite/backend/storage/__init__.py
T
curo1305 a9ea33dd18 feat(05-04): fix storage factory to dispatch nextcloud to NextcloudBackend
- Previously both 'nextcloud' and 'webdav' providers were dispatched to WebDAVBackend
- Now 'nextcloud' uses NextcloudBackend (has list_folder); 'webdav' uses WebDAVBackend
- Both share identical constructor signature (server_url, username, password)
- Removes type: ignore[import] concern on nextcloud_backend — module now exists
2026-05-28 21:12:27 +02:00

133 lines
5.2 KiB
Python

"""
Storage backend factory for DocuVault.
Mirrors backend/ai/__init__.py — exposes a get_storage_backend() factory
that returns the configured StorageBackend implementation.
Phase 1: get_storage_backend() always returns MinIOBackend.
Phase 5: get_storage_backend_for_document() extends the factory to support
OneDrive, Google Drive, Nextcloud, and WebDAV backends based on the document's
storage_backend field and the user's active cloud connections.
Security notes:
- get_storage_backend_for_document() is the only function that decrypts cloud
credentials. It receives the user object from get_regular_user dep, ensuring
the CloudConnection query is always scoped to the authenticated user (T-05-02-04).
- Cloud backend modules are imported lazily inside the function body to avoid
circular imports at module load time (RESEARCH.md Pattern 9).
"""
from typing import Optional
from fastapi import HTTPException
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from config import settings
from db.models import CloudConnection, Document, User
from storage.base import StorageBackend
from storage.minio_backend import MinIOBackend
from storage.cloud_utils import decrypt_credentials
def get_storage_backend() -> StorageBackend:
"""Return a MinIOBackend instance configured from config.settings.
secure=False is correct for Docker internal HTTP traffic between containers
(RESEARCH.md Pattern 3).
public_endpoint is the browser-resolvable hostname for presigned PUT URLs.
RESEARCH.md Finding 3 — dual-client pattern: internal endpoint for all
server-side operations; public endpoint for generate_presigned_put_url only.
"""
public_ep = settings.minio_public_endpoint or None
return MinIOBackend(
endpoint=settings.minio_endpoint,
access_key=settings.minio_access_key,
secret_key=settings.minio_secret_key,
bucket=settings.minio_bucket,
secure=False,
public_endpoint=public_ep,
)
async def get_storage_backend_for_document(
document: Document,
user: User,
session: AsyncSession,
) -> StorageBackend:
"""Return the correct StorageBackend for the given document.
For MinIO documents (storage_backend == 'minio'), returns the shared
MinIOBackend instance via the existing get_storage_backend() factory.
For cloud documents, loads the user's active CloudConnection from the DB,
decrypts credentials using HKDF-derived per-user Fernet key (D-18), and
instantiates the appropriate backend class.
Security:
- CloudConnection query is scoped to user.id — cross-user access is
impossible via this function (T-05-02-04, CLOUD-IDOR).
- Cloud backend classes are imported lazily to avoid circular imports
(RESEARCH.md Pattern 9 anti-pattern note).
- Raises HTTPException(503) if the connection is missing or not ACTIVE —
callers receive a clear error, not a silent fallback to MinIO.
Args:
document: The Document ORM instance with a storage_backend field.
user: The authenticated User ORM instance (from get_regular_user dep).
session: An active async SQLAlchemy session.
Returns:
A concrete StorageBackend instance for the document's backend.
Raises:
HTTPException(503): If the cloud connection is not found or not ACTIVE.
ValueError: If the storage_backend value is not recognised.
"""
if document.storage_backend == "minio":
return get_storage_backend()
# Load the active CloudConnection for this user + provider
result = await session.execute(
select(CloudConnection).where(
CloudConnection.user_id == user.id,
CloudConnection.provider == document.storage_backend,
CloudConnection.status == "ACTIVE",
)
)
conn = result.scalar_one_or_none()
if conn is None:
raise HTTPException(
status_code=503,
detail="Cloud connection not found or inactive",
)
# Decrypt per-user credentials (HKDF key derived from master key + user_id)
master_key = settings.cloud_creds_key.encode()
credentials = decrypt_credentials(master_key, str(user.id), conn.credentials_enc)
# Lazy imports to avoid circular dependency at module load time
provider = document.storage_backend
if provider == "google_drive":
from storage.google_drive_backend import GoogleDriveBackend # type: ignore[import]
return GoogleDriveBackend(credentials)
elif provider == "onedrive":
from storage.onedrive_backend import OneDriveBackend # type: ignore[import]
return OneDriveBackend(credentials)
elif provider == "nextcloud":
from storage.nextcloud_backend import NextcloudBackend # type: ignore[import]
return NextcloudBackend(
credentials["server_url"],
credentials["username"],
credentials["password"],
)
elif provider == "webdav":
from storage.webdav_backend import WebDAVBackend # type: ignore[import]
return WebDAVBackend(
credentials["server_url"],
credentials["username"],
credentials["password"],
)
else:
raise ValueError(f"Unknown storage backend: {document.storage_backend}")