Files
kite/backend/storage/webdav_backend.py
T
curo1305 311dfa1513 feat(05-04): implement WebDAVBackend with SSRF guard and asyncio wrapping
- All 7 StorageBackend methods implemented as async coroutines
- validate_cloud_url() called in __init__ (SSRF at construct time) and before
  every asyncio.to_thread() call (D-17 defense-in-depth / T-05-04-01, T-05-04-02)
- _make_path() builds "docuvault/{user_id}/{document_id}{ext}" with urllib.parse.quote
  encoding on path segments (RESEARCH.md Pitfall 2)
- presigned_get_url and generate_presigned_put_url raise NotImplementedError (D-14)
- All webdavclient3 sync calls (upload_to, download_from, clean, info, check, mkdir)
  wrapped in asyncio.to_thread() per MinIOBackend pattern
- delete_object silently ignores missing file exceptions (StorageBackend ABC contract)
2026-05-28 21:09:25 +02:00

228 lines
8.7 KiB
Python

"""
WebDAVBackend — Generic WebDAV StorageBackend implementation.
Security design (D-17 / T-05-04-01 / T-05-04-02):
validate_cloud_url() is called at two mandatory points:
1. In __init__, BEFORE constructing the webdavclient3 Client — blocks SSRF at
construction time so a private-IP URL never reaches the SDK.
2. Before EVERY asyncio.to_thread() call — defends against DNS-rebinding attacks
where a hostname passes the initial check but later resolves to an internal IP.
Path encoding (Pitfall 2 from RESEARCH.md):
_make_path() percent-encodes each path segment using urllib.parse.quote()
so non-ASCII characters and spaces in UUIDs (which don't occur in practice,
but are handled defensively) are properly escaped for Nextcloud/WebDAV.
Object key scheme:
object_key = WebDAV path: "docuvault/{user_id}/{document_id}{extension}"
This is the path relative to the WebDAV root, stored in the database as
document.object_key for later get/delete operations.
WebDAV credentials dict shape:
{"server_url": str, "username": str, "password": str}
Not implemented (D-14):
presigned_get_url and generate_presigned_put_url raise NotImplementedError.
Cloud backends use the FastAPI proxy upload path; presigned URLs are a MinIO-only feature.
"""
from __future__ import annotations
import asyncio
import io
import urllib.parse
from webdav3.client import Client
from storage.base import StorageBackend
from storage.cloud_utils import validate_cloud_url
class WebDAVBackend(StorageBackend):
"""Generic WebDAV storage backend implementing all 7 StorageBackend abstract methods.
All synchronous webdavclient3 calls are wrapped in asyncio.to_thread() to avoid
blocking the FastAPI event loop (mirrors MinIOBackend pattern — RESEARCH.md Pattern 5).
The SSRF guard (validate_cloud_url) is called in __init__ and before every
asyncio.to_thread() call for defense-in-depth against DNS rebinding (D-17).
"""
def __init__(self, server_url: str, username: str, password: str) -> None:
"""Construct a WebDAVBackend.
Args:
server_url: The WebDAV root URL (e.g. "https://dav.example.com/remote.php/dav/").
username: HTTP Basic Auth username.
password: HTTP Basic Auth password or app-specific password (D-07).
Raises:
ValueError: If server_url targets a private/internal address (SSRF guard, D-17).
"""
# SSRF guard: validate before any SDK construction — T-05-04-01
validate_cloud_url(server_url)
self._server_url = server_url
options = {
"webdav_hostname": server_url,
"webdav_login": username,
"webdav_password": password,
}
self._client = Client(options)
def _make_path(self, user_id: str, document_id: str, extension: str) -> str:
"""Construct a WebDAV path for a document.
Path schema: "docuvault/{user_id}/{document_id}{extension}"
Each segment is percent-encoded (Pitfall 2 — Nextcloud/WebDAV compatibility
with non-ASCII characters and spaces, even though UUIDs are alphanumeric).
Args:
user_id: User UUID string.
document_id: Document UUID string.
extension: File extension including leading dot (e.g. ".pdf").
Returns:
WebDAV path string suitable for use as object_key.
"""
encoded_uid = urllib.parse.quote(str(user_id), safe="")
encoded_did = urllib.parse.quote(str(document_id), safe="")
return f"docuvault/{encoded_uid}/{encoded_did}{extension}"
async def put_object(
self,
user_id: str,
document_id: str,
file_bytes: bytes,
extension: str,
content_type: str,
) -> str:
"""Upload bytes to WebDAV and return the object_key (WebDAV path).
Ensures the parent directory "docuvault/{user_id}/" exists before upload
by calling client.mkdir() with recursive=True. webdavclient3 mkdir is a
no-op if the directory already exists.
Args:
user_id: User UUID string.
document_id: Document UUID string.
file_bytes: Raw file content.
extension: File extension with leading dot (e.g. ".pdf").
content_type: MIME type (unused by WebDAV, kept for ABC compliance).
Returns:
object_key: The WebDAV path where the file was stored.
Raises:
ValueError: If SSRF guard fires on re-validation (D-17).
"""
# Re-validate before every outbound request (D-17 / T-05-04-02)
validate_cloud_url(self._server_url)
object_key = self._make_path(user_id, document_id, extension)
# Ensure parent directory exists (idempotent)
parent_dir = f"docuvault/{urllib.parse.quote(str(user_id), safe='')}"
await asyncio.to_thread(self._client.mkdir, parent_dir, True)
buf = io.BytesIO(file_bytes)
await asyncio.to_thread(self._client.upload_to, buf, object_key)
return object_key
async def get_object(self, object_key: str) -> bytes:
"""Download bytes from WebDAV by object_key (WebDAV path).
Args:
object_key: WebDAV path returned by put_object.
Returns:
File content as bytes.
Raises:
ValueError: If SSRF guard fires on re-validation (D-17).
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
buf = io.BytesIO()
await asyncio.to_thread(self._client.download_from, buf, object_key)
return buf.getvalue()
async def delete_object(self, object_key: str) -> None:
"""Delete an object from WebDAV by object_key.
Silently ignores missing files (no-op if object_key does not exist).
Any WebDAV exception during delete is swallowed — consistent with the
StorageBackend ABC contract ("No-op if the key does not exist").
Args:
object_key: WebDAV path returned by put_object.
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
try:
await asyncio.to_thread(self._client.clean, object_key)
except Exception:
# Covers FileNotFoundError, WebDavException, and any other exception
# for missing or already-deleted files
pass
async def presigned_get_url(self, object_key: str, expires_minutes: int = 60) -> str:
"""Not supported for WebDAV backends.
WebDAV does not have a concept of presigned URLs. Cloud document downloads
use the FastAPI proxy endpoint (D-15 — same as all cloud backends).
Raises:
NotImplementedError: Always.
"""
raise NotImplementedError("WebDAV backend does not support presigned GET URLs")
async def generate_presigned_put_url(
self, object_key: str, expires_minutes: int = 15
) -> str:
"""Not supported for WebDAV backends.
Cloud backends use the FastAPI intermediary upload path (D-14).
Presigned PUT URLs are a MinIO/S3-only feature.
Raises:
NotImplementedError: Always.
"""
raise NotImplementedError(
"WebDAV backend does not support presigned PUT URLs — use the upload proxy endpoint"
)
async def stat_object(self, object_key: str) -> int:
"""Return the file size in bytes from the WebDAV server.
Uses client.info() which performs a PROPFIND request and returns a dict
including a "size" key with the content length.
Args:
object_key: WebDAV path returned by put_object.
Returns:
File size in bytes. Returns 0 if the server does not report a size.
Raises:
ValueError: If SSRF guard fires on re-validation (D-17).
"""
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
info = await asyncio.to_thread(self._client.info, object_key)
return int(info.get("size", 0))
async def health_check(self) -> bool:
"""Return True if the WebDAV server root ("/") is reachable.
Uses a lightweight client.check("/") PROPFIND call — no file read/write.
Per D-08: connection health is validated before credentials are stored.
Returns:
True if the server is reachable and the WebDAV root exists, False otherwise.
"""
try:
# Re-validate before every outbound request (D-17)
validate_cloud_url(self._server_url)
result = await asyncio.to_thread(self._client.check, "/")
return bool(result)
except Exception:
return False