Files
curo1305 54ef3357ba fix(05): cloud API path param, root sentinel, webdav creds in list, upload path
cloud.py: list_connections now decrypts and surfaces server_url +
connection_username for nextcloud/webdav providers; folder route uses
{folder_id:path} to handle slashes; translates "root" sentinel to "".
nextcloud_backend.py: skip parent directory entry in PROPFIND Depth:1 results.
webdav_backend.py: add cloud_folder + original_filename params to
upload_object so files land in the user's chosen folder with their real name.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-30 11:58:01 +02:00

223 lines
8.6 KiB
Python

"""
WebDAVBackend — Generic WebDAV StorageBackend implementation.
Security design (D-17 / T-05-04-01 / T-05-04-02):
validate_cloud_url() is called at two mandatory points:
1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at
construction time so a private-IP URL never reaches the SDK.
2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks
where a hostname passes the initial check but later resolves to an internal IP.
Path encoding (Pitfall 2 from RESEARCH.md):
_make_path() percent-encodes each path segment using urllib.parse.quote()
so non-ASCII characters and spaces in UUIDs (which don't occur in practice,
but are handled defensively) are properly escaped for Nextcloud/WebDAV.
Object key scheme:
object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}"
This is the path relative to the WebDAV root, stored in the database as
document.object_key for later get/delete operations.
WebDAV credentials dict shape:
{"server_url": str, "username": str, "password": str}
Not implemented (D-14):
presigned_get_url and generate_presigned_put_url raise NotImplementedError.
Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature.
"""
from __future__ import annotations
import asyncio
import io
import urllib.parse
from pathlib import Path
from webdav3.client import Client
from storage.base import StorageBackend
from storage.cloud_utils import validate_cloud_url
class WebDAVBackend(StorageBackend):
"""Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods.
All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid
blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5).
The SSRF guard (validate_cloud_url) is called in __init__ and before every
asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17).
"""
def __init__(self, server_url: str, username: str, password: str) -> None:
"""Construct a WebDAVBackend.
Args:
server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/").
username: HTTP Basic Auth username.
password: HTTP Basic Auth password or app-specific password (D-07).
Raises:
ValueError: If server_url targets a private/internal address (SSRF guard, D-17).
"""
# SSRF guard: validate before any SDK construction — T-05-04-01
validate_cloud_url(server_url)
self._server_url = server_url
options = {
"webdav_hostname": server_url,
"webdav_login": username,
"webdav_password": password,
}
self._client = Client(options)
def _make_path(self, user_id: str, document_id: str, extension: str) -> str:
"""Construct a WebDAV path for a document.
Path schema: "docuvault/{user_id}/{document_id}{extension}"
Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility
with non-ASCII characters and spaces, even though UUIDs are alphanumeric).
Args:
user_id: User UUID string.
document_id: Document UUID string.
extension: File extension including leading dot (e.g. ".pdf").
Returns:
WebDAV path string suitable for use as object_key.
"""
encoded_uid = urllib.parse.quote(str(user_id), safe="")
encoded_did = urllib.parse.quote(str(document_id), safe="")
return f"docuvault/{encoded_uid}/{encoded_did}{extension}"
async def put_object(
self,
user_id: str,
document_id: str,
file_bytes: bytes,
extension: str,
content_type: str,
cloud_folder: str | None = None,
original_filename: str | None = None,
) -> str:
"""Upload bytes to WebDAV and return the object_key (WebDAV path).
When cloud_folder is provided the file is stored inside that folder
(e.g. "Documents/") using the original filename so it appears naturally
in the user's cloud folder browser. When omitted the default
DocuVault-managed UUID path is used.
"""
validate_cloud_url(self._server_url)
if cloud_folder:
parent_dir = cloud_folder.rstrip("/")
# Use original filename (basename only — path traversal guard)
safe_name = Path(original_filename).name if original_filename else f"{document_id}{extension}"
object_key = f"{parent_dir}/{safe_name}" if parent_dir else safe_name
else:
object_key = self._make_path(user_id, document_id, extension)
parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}"
await asyncio.to_thread(self._client.mkdir, parent_dir, True)
buf = io.BytesIO(file_bytes)
await asyncio.to_thread(self._client.upload_to, buf, object_key)
return object_key
async def get_object(self, object_key: str) -> bytes:
"""Download bytes from WebDAV by object_key (WebDAV path).
Args:
object_key: WebDAV path returned by put_object.
Returns:
File content as bytes.
Raises:
ValueError: If SSRF guard fires on re-validation (D-17).
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
buf = io.BytesIO()
await asyncio.to_thread(self._client.download_from, buf, object_key)
return buf.getvalue()
async def delete_object(self, object_key: str) -> None:
"""Delete an object from WebDAV by object_key.
Silently ignores missing files (no-op if object_key does not exist).
Any WebDAV exception during delete is swallowed — consistent with the
StorageBackend ABC contract ("No-op if the key does not exist").
Args:
object_key: WebDAV path returned by put_object.
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
try:
await asyncio.to_thread(self._client.clean, object_key)
except Exception:
# Covers FileNotFoundError, WebDavException, and any other exception
# for missing or already-deleted files
pass
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
"""Not supported for WebDAV backends.
WebDAV does not have a concept of presigned URLs. Cloud document downloads
use the FastAPI proxy endpoint (D-15 — same as all cloud backends).
Raises:
NotImplementedError: Always.
"""
raise NotImplementedError("WebDAV backend does not support presigned GET URLs")
async def generate_presigned_put_url(
self, object_key: str, expires_minutes: int = 15
) -> str:
"""Not supported for WebDAV backends.
Cloud backends use the FastAPI intermediary upload path (D-14).
Presigned PUT URLs are a MinIO/S3-only feature.
Raises:
NotImplementedError: Always.
"""
raise NotImplementedError(
"WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint"
)
async def stat_object(self, object_key: str) -> int:
"""Return the file size in bytes from the WebDAV server.
Uses client.info() which performs a PROPFIND request and returns a dict
including a "size" key with the content length.
Args:
object_key: WebDAV path returned by put_object.
Returns:
File size in bytes. Returns 0 if the server does not report a size.
Raises:
ValueError: If SSRF guard fires on re-validation (D-17).
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
info = await asyncio.to_thread(self._client.info, object_key)
return int(info.get("size", 0))
async def health_check(self) -> bool:
"""Return True if the WebDAV server root ("/") is reachable.
Uses a lightweight client.check("/") PROPFIND call — no file read/write.
Per D-08: connection health is validated before credentials are stored.
Returns:
True if the server is reachable and the WebDAV root exists, False otherwise.
"""
try:
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
result = await asyncio.to_thread(self._client.check, "/")
return bool(result)
except Exception:
return False