a9ea33dd18
- Previously both 'nextcloud' and 'webdav' providers were dispatched to WebDAVBackend - Now 'nextcloud' uses NextcloudBackend (has list_folder); 'webdav' uses WebDAVBackend - Both share identical constructor signature (server_url, username, password) - Removes type: ignore[import] concern on nextcloud_backend — module now exists
133 lines
5.2 KiB
Python
133 lines
5.2 KiB
Python
"""
|
|
Storage backend factory for DocuVault.
|
|
|
|
Mirrors backend/ai/__init__.py — exposes a get_storage_backend() factory
|
|
that returns the configured StorageBackend implementation.
|
|
|
|
Phase 1: get_storage_backend() always returns MinIOBackend.
|
|
Phase 5: get_storage_backend_for_document() extends the factory to support
|
|
OneDrive, Google Drive, Nextcloud, and WebDAV backends based on the document's
|
|
storage_backend field and the user's active cloud connections.
|
|
|
|
Security notes:
|
|
- get_storage_backend_for_document() is the only function that decrypts cloud
|
|
credentials. It receives the user object from get_regular_user dep, ensuring
|
|
the CloudConnection query is always scoped to the authenticated user (T-05-02-04).
|
|
- Cloud backend modules are imported lazily inside the function body to avoid
|
|
circular imports at module load time (RESEARCH.md Pattern 9).
|
|
"""
|
|
from typing import Optional
|
|
|
|
from fastapi import HTTPException
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from config import settings
|
|
from db.models import CloudConnection, Document, User
|
|
from storage.base import StorageBackend
|
|
from storage.minio_backend import MinIOBackend
|
|
from storage.cloud_utils import decrypt_credentials
|
|
|
|
|
|
def get_storage_backend() -> StorageBackend:
|
|
"""Return a MinIOBackend instance configured from config.settings.
|
|
|
|
secure=False is correct for Docker internal HTTP traffic between containers
|
|
(RESEARCH.md Pattern 3).
|
|
|
|
public_endpoint is the browser-resolvable hostname for presigned PUT URLs.
|
|
RESEARCH.md Finding 3 — dual-client pattern: internal endpoint for all
|
|
server-side operations; public endpoint for generate_presigned_put_url only.
|
|
"""
|
|
public_ep = settings.minio_public_endpoint or None
|
|
return MinIOBackend(
|
|
endpoint=settings.minio_endpoint,
|
|
access_key=settings.minio_access_key,
|
|
secret_key=settings.minio_secret_key,
|
|
bucket=settings.minio_bucket,
|
|
secure=False,
|
|
public_endpoint=public_ep,
|
|
)
|
|
|
|
|
|
async def get_storage_backend_for_document(
|
|
document: Document,
|
|
user: User,
|
|
session: AsyncSession,
|
|
) -> StorageBackend:
|
|
"""Return the correct StorageBackend for the given document.
|
|
|
|
For MinIO documents (storage_backend == 'minio'), returns the shared
|
|
MinIOBackend instance via the existing get_storage_backend() factory.
|
|
|
|
For cloud documents, loads the user's active CloudConnection from the DB,
|
|
decrypts credentials using HKDF-derived per-user Fernet key (D-18), and
|
|
instantiates the appropriate backend class.
|
|
|
|
Security:
|
|
- CloudConnection query is scoped to user.id — cross-user access is
|
|
impossible via this function (T-05-02-04, CLOUD-IDOR).
|
|
- Cloud backend classes are imported lazily to avoid circular imports
|
|
(RESEARCH.md Pattern 9 anti-pattern note).
|
|
- Raises HTTPException(503) if the connection is missing or not ACTIVE —
|
|
callers receive a clear error, not a silent fallback to MinIO.
|
|
|
|
Args:
|
|
document: The Document ORM instance with a storage_backend field.
|
|
user: The authenticated User ORM instance (from get_regular_user dep).
|
|
session: An active async SQLAlchemy session.
|
|
|
|
Returns:
|
|
A concrete StorageBackend instance for the document's backend.
|
|
|
|
Raises:
|
|
HTTPException(503): If the cloud connection is not found or not ACTIVE.
|
|
ValueError: If the storage_backend value is not recognised.
|
|
"""
|
|
if document.storage_backend == "minio":
|
|
return get_storage_backend()
|
|
|
|
# Load the active CloudConnection for this user + provider
|
|
result = await session.execute(
|
|
select(CloudConnection).where(
|
|
CloudConnection.user_id == user.id,
|
|
CloudConnection.provider == document.storage_backend,
|
|
CloudConnection.status == "ACTIVE",
|
|
)
|
|
)
|
|
conn = result.scalar_one_or_none()
|
|
if conn is None:
|
|
raise HTTPException(
|
|
status_code=503,
|
|
detail="Cloud connection not found or inactive",
|
|
)
|
|
|
|
# Decrypt per-user credentials (HKDF key derived from master key + user_id)
|
|
master_key = settings.cloud_creds_key.encode()
|
|
credentials = decrypt_credentials(master_key, str(user.id), conn.credentials_enc)
|
|
|
|
# Lazy imports to avoid circular dependency at module load time
|
|
provider = document.storage_backend
|
|
if provider == "google_drive":
|
|
from storage.google_drive_backend import GoogleDriveBackend # type: ignore[import]
|
|
return GoogleDriveBackend(credentials)
|
|
elif provider == "onedrive":
|
|
from storage.onedrive_backend import OneDriveBackend # type: ignore[import]
|
|
return OneDriveBackend(credentials)
|
|
elif provider == "nextcloud":
|
|
from storage.nextcloud_backend import NextcloudBackend # type: ignore[import]
|
|
return NextcloudBackend(
|
|
credentials["server_url"],
|
|
credentials["username"],
|
|
credentials["password"],
|
|
)
|
|
elif provider == "webdav":
|
|
from storage.webdav_backend import WebDAVBackend # type: ignore[import]
|
|
return WebDAVBackend(
|
|
credentials["server_url"],
|
|
credentials["username"],
|
|
credentials["password"],
|
|
)
|
|
else:
|
|
raise ValueError(f"Unknown storage backend: {document.storage_backend}")
|