diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md
index 45f15c9..4d84cb8 100644
--- a/.planning/ROADMAP.md
+++ b/.planning/ROADMAP.md
@@ -193,8 +193,10 @@ Before any phase is marked complete, all three gates must pass:
**Wave 1** — Test scaffold + dependencies
- [ ] 05-01-PLAN.md — Wave 0 xfail stubs, conftest cloud fixtures, requirements.txt packages, config.py settings
-**Wave 2** — Shared utilities (parallel)
+**Wave 2** — Shared utilities
- [ ] 05-02-PLAN.md — cloud_utils.py (SSRF + HKDF), cloud_cache.py (TTLCache), storage factory extension
+
+**Wave 3** — Cloud backends (parallel, both blocked on Wave 2 / Plan 05-02)
- [ ] 05-03-PLAN.md — GoogleDriveBackend + OneDriveBackend (all 7 StorageBackend methods)
- [ ] 05-04-PLAN.md — NextcloudBackend + WebDAVBackend (all 7 StorageBackend methods)
diff --git a/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md
index 1cddfe0..c5e7ad5 100644
--- a/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md
+++ b/.planning/phases/05-cloud-storage-backends/05-01-PLAN.md
@@ -24,7 +24,7 @@ must_haves:
- "All 15 Phase 5 test stubs exist in test_cloud.py and xfail with strict=False"
- "conftest.py has mock_google_drive_creds, mock_onedrive_creds, mock_webdav_client, cloud_connection_factory fixtures"
- "requirements.txt includes all 6 new packages with correct version pins"
- - "config.py has CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID/SECRET, ONEDRIVE_CLIENT_ID/SECRET/TENANT_ID, BACKEND_URL settings"
+ - "config.py has CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID/SECRET, ONEDRIVE_CLIENT_ID/SECRET/TENANT_ID, BACKEND_URL, FRONTEND_URL settings"
- "pytest -v passes with zero failures after Wave 0 (stubs xfail, not fail)"
artifacts:
- path: "backend/tests/test_cloud.py"
@@ -96,7 +96,7 @@ From backend/api/admin.py:
- requirements.txt contains all 6 new packages with their exact version pins
- - config.py Settings class has: cloud_creds_key (str, default "CHANGEME-32-bytes-padded!!"), google_client_id (str, default ""), google_client_secret (str, default ""), onedrive_client_id (str, default ""), onedrive_client_secret (str, default ""), onedrive_tenant_id (str, default "common"), backend_url (str, default "http://localhost:8000")
+ - config.py Settings class has: cloud_creds_key (str, default "CHANGEME-32-bytes-padded!!"), google_client_id (str, default ""), google_client_secret (str, default ""), onedrive_client_id (str, default ""), onedrive_client_secret (str, default ""), onedrive_tenant_id (str, default "common"), backend_url (str, default "http://localhost:8000"), frontend_url (str, default "http://localhost:5173")
- All new settings have empty-string or safe defaults so the app boots without cloud credentials configured
@@ -113,9 +113,10 @@ From backend/api/admin.py:
- onedrive_client_secret: str = "" (ONEDRIVE_CLIENT_SECRET)
- onedrive_tenant_id: str = "common" (ONEDRIVE_TENANT_ID — "common" works for personal + org accounts)
- backend_url: str = "http://localhost:8000" (BACKEND_URL — used to construct OAuth callback URLs)
+ - frontend_url: str = "http://localhost:5173" (FRONTEND_URL — used to construct OAuth success/error redirect to Vue app; per B4 fix)
.env.example should have the CLOUD_CREDS_KEY, GOOGLE_CLIENT_ID, GOOGLE_CLIENT_SECRET,
- ONEDRIVE_CLIENT_ID, ONEDRIVE_CLIENT_SECRET, ONEDRIVE_TENANT_ID, BACKEND_URL entries
+ ONEDRIVE_CLIENT_ID, ONEDRIVE_CLIENT_SECRET, ONEDRIVE_TENANT_ID, BACKEND_URL, FRONTEND_URL entries
(create .env.example if it doesn't exist, or append if it does).
@@ -123,10 +124,10 @@ From backend/api/admin.py:
- backend/requirements.txt contains lines matching: cryptography>=41.0.0, google-auth-oauthlib>=1.3.1, google-api-python-client>=2.196.0, msal>=1.36.0, webdavclient3>=3.14.7, cachetools>=5.3.0
- - backend/config.py contains `cloud_creds_key: str` and `google_client_id: str` and `backend_url: str`
+ - backend/config.py contains `cloud_creds_key: str` and `google_client_id: str` and `backend_url: str` and `frontend_url: str`
- `python -c "from config import settings; print(settings.cloud_creds_key)"` prints without ImportError
- requirements.txt has all 6 Phase 5 package lines; config.py imports and Settings loads without error; all 7 new cloud settings accessible via settings.{field_name}
+ requirements.txt has all 6 Phase 5 package lines; config.py imports and Settings loads without error; all 8 new cloud settings accessible via settings.{field_name}
@@ -278,7 +279,7 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest
- pytest tests/test_cloud.py exits 0; all 15 stubs show xfailed
- pytest -v (full suite) exits 0 with zero failures
- requirements.txt contains all 6 new package lines
-- config.py Settings loads without error; cloud_creds_key, google_client_id, backend_url all accessible
+- config.py Settings loads without error; cloud_creds_key, google_client_id, backend_url, frontend_url all accessible
- conftest.py has 4 new fixtures: mock_google_drive_creds, mock_onedrive_creds, mock_webdav_client, cloud_connection_factory
diff --git a/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md
index af1a59f..793ef52 100644
--- a/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md
+++ b/.planning/phases/05-cloud-storage-backends/05-03-PLAN.md
@@ -20,7 +20,7 @@ must_haves:
- "OneDriveBackend implements all 7 StorageBackend abstract methods"
- "generate_presigned_put_url and presigned_get_url raise NotImplementedError on both cloud backends (D-14)"
- "All sync SDK calls wrapped in asyncio.to_thread() — event loop never blocked"
- - "On-demand token refresh: 401/token-expiry error triggers transparent refresh; invalid_grant sets REQUIRES_REAUTH"
+ - "Backends are stateless: raise CloudConnectionError(reason="token_expired") on expiry or CloudConnectionError(reason="invalid_grant") on revocation — DB update belongs to API layer (D-05/D-06, B2 design)"
- "Google OAuth Flow uses access_type='offline', prompt='consent' (Pitfall 1 prevention)"
- "OneDrive uses resumable upload sessions (createUploadSession) for all files (Pitfall 6 prevention)"
artifacts:
@@ -44,7 +44,7 @@ must_haves:
Implement GoogleDriveBackend and OneDriveBackend — the two OAuth-based cloud StorageBackend concrete classes.
-Purpose: These backends handle Google Drive v3 and Microsoft Graph file operations. Both use async-wrapped sync SDKs, on-demand token refresh, and handle the invalid_grant → REQUIRES_REAUTH transition per D-05/D-06.
+Purpose: These backends handle Google Drive v3 and Microsoft Graph file operations. Both use async-wrapped sync SDKs and raise CloudConnectionError(reason) for token expiry/revocation. The DB transition (REQUIRES_REAUTH) is handled by the API layer per B2 design — backends are stateless.
Output: google_drive_backend.py and onedrive_backend.py, each implementing all 7 StorageBackend methods.
@@ -91,11 +91,13 @@ Microsoft Graph: GET /me/drive/items/{item_id}/content — streams bytes
Microsoft Graph: DELETE /me/drive/items/{item_id}
OneDrive object_key = item_id from upload response
-
-Custom exception: CloudConnectionError (raised when invalid_grant detected)
-On 401 / token-expiry: refresh token, update credentials_enc in conn, retry once
-On invalid_grant: set conn.status = "REQUIRES_REAUTH", raise CloudConnectionError
-Both backends need session + conn parameters for the refresh/update path (passed by the API layer caller)
+
+Custom exception: CloudConnectionError raised with reason attribute:
+ - reason="token_expired": API layer will refresh the token, update DB, and retry
+ - reason="invalid_grant": API layer will set conn.status="REQUIRES_REAUTH" in DB and raise HTTPException(503)
+Backends are STATELESS — they raise CloudConnectionError but do NOT update DB or conn directly.
+DB updates happen in the _call_cloud_op() helper in cloud.py (Plan 05), which has the session.
+This keeps backends testable without DB fixtures.
@@ -119,8 +121,10 @@ Both backends need session + conn parameters for the refresh/update path (passed
- stat_object: calls service.files().get(fileId=key, fields="size") wrapped in asyncio.to_thread(); returns int(metadata.get("size", 0))
- health_check: tries files().list(pageSize=1) wrapped in asyncio.to_thread(); returns True/False
- All sync googleapiclient calls wrapped in asyncio.to_thread() (Pitfall 7)
- - On-demand token refresh: _is_token_expired(e) detects googleapiclient.errors.HttpError status 401; _refresh_google_creds(credentials) calls google.auth.transport.requests.Request() to refresh; returns updated credentials dict or None on invalid_grant
- - CloudConnectionError exception class defined in this module for invalid_grant signaling
+ - CloudConnectionError exception class defined in this module; raised with reason attribute (not raised directly by the DB operations)
+ - On HttpError 401 (token expired): raise CloudConnectionError(reason="token_expired") — the API layer in cloud.py handles the actual refresh and DB update per D-05 (B2 design)
+ - On invalid_grant detection (googleapiclient.errors.HttpError with specific message or custom check): raise CloudConnectionError(reason="invalid_grant") — the API layer sets REQUIRES_REAUTH per D-06 (B2 design)
+ - Backends have NO session parameter and perform NO DB writes — they are stateless signal-raisers only
Create backend/storage/google_drive_backend.py with:
@@ -136,7 +140,10 @@ Both backends need session + conn parameters for the refresh/update path (passed
from google.auth.transport.requests import Request
from storage.base import StorageBackend
- class CloudConnectionError(Exception): pass
+ class CloudConnectionError(Exception):
+ def __init__(self, msg: str = "", *, reason: str = ""):
+ super().__init__(msg)
+ self.reason = reason # "token_expired" | "invalid_grant"
class GoogleDriveBackend(StorageBackend):
SCOPES = ["https://www.googleapis.com/auth/drive.file"]
@@ -243,8 +250,10 @@ print('All 7 methods are coroutines: OK')
- generate_presigned_put_url: raises NotImplementedError
- stat_object: GET /me/drive/items/{item_id}?$select=size; return int(response["size"])
- health_check: GET /me/drive?$select=id; return True/False
- - _refresh_token(credentials: dict) -> dict | None: calls msal.ConfidentialClientApplication.acquire_token_by_refresh_token(); returns new credentials dict or None if result.get("error") == "invalid_grant"
+ - _refresh_token() -> dict | None: calls msal.ConfidentialClientApplication.acquire_token_by_refresh_token(); returns new credentials dict or None if result.get("error") == "invalid_grant"
+ - _ensure_valid_token(): on expired token calls _refresh_token(); if None raises CloudConnectionError(reason="invalid_grant"); if success updates self._credentials
- All sync msal calls wrapped in asyncio.to_thread(); httpx calls are already async (use await httpx.AsyncClient)
+ - Backend is stateless: raises CloudConnectionError(reason="token_expired") or CloudConnectionError(reason="invalid_grant") — no DB writes (B2 design; DB updates handled by API layer _call_cloud_op helper in cloud.py)
- CHUNK_SIZE = 10 * 1024 * 1024 (10 MB, above Graph's 4 MB limit)
@@ -390,8 +399,8 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest
-- GoogleDriveBackend: all 7 methods async; presigned methods raise NotImplementedError; CloudConnectionError defined
-- OneDriveBackend: all 7 methods async; CHUNK_SIZE=10MB; presigned methods raise NotImplementedError; CloudConnectionError imported
+- GoogleDriveBackend: all 7 methods async; presigned methods raise NotImplementedError; CloudConnectionError(reason=) defined; backend raises errors, does NO DB writes
+- OneDriveBackend: all 7 methods async; CHUNK_SIZE=10MB; presigned methods raise NotImplementedError; CloudConnectionError imported; backend raises errors, does NO DB writes
- pytest -v exits 0, 0 failures; test_cloud.py still all xfailed
diff --git a/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md b/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md
index a461fa0..e9081a0 100644
--- a/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md
+++ b/.planning/phases/05-cloud-storage-backends/05-05-PLAN.md
@@ -9,6 +9,7 @@ depends_on:
files_modified:
- backend/api/cloud.py
- backend/main.py
+ - backend/api/auth.py
autonomous: true
requirements:
- CLOUD-01
@@ -17,11 +18,12 @@ requirements:
- CLOUD-04
- CLOUD-05
- CLOUD-06
+ - SEC-09
must_haves:
truths:
- "GET /api/cloud/oauth/initiate/{provider} redirects to provider OAuth URL; state token in Redis with 30-min TTL"
- - "GET /api/cloud/oauth/callback/{provider} validates state, exchanges code, encrypts credentials, saves CloudConnection, redirects to /settings?cloud_connected={provider}"
+ - "GET /api/cloud/oauth/callback/{provider} validates state, exchanges code, encrypts credentials, saves CloudConnection, redirects to {settings.frontend_url}/settings?cloud_connected={provider}"
- "POST /api/cloud/connections/webdav validates URL (SSRF), tests connection (PROPFIND), encrypts + saves credentials"
- "GET /api/cloud/connections returns CloudConnectionOut list — no credentials_enc"
- "DELETE /api/cloud/connections/{id} deletes credentials_enc row; subsequent use returns 503"
@@ -30,6 +32,8 @@ must_haves:
- "All endpoints use get_regular_user dep — admin blocked (403)"
- "OAuth callback invalid state returns 400; invalid provider returns 400"
- "write_audit_log called on connect, disconnect, and REQUIRES_REAUTH transitions"
+ - "_call_cloud_op(conn, user, session, op_fn) helper in cloud.py wraps all cloud ops: retries once on token_expired (refresh+DB update), sets REQUIRES_REAUTH+HTTPException(503) on invalid_grant"
+ - "Account deletion purges all CloudConnection rows and calls delete_object on cloud-stored documents (SEC-09)"
artifacts:
- path: "backend/api/cloud.py"
provides: "All /api/cloud/* endpoints + /api/users/me/default-storage"
@@ -101,6 +105,7 @@ From backend/config.py (after Plan 01):
settings.google_client_id, google_client_secret: str
settings.onedrive_client_id, onedrive_client_secret, onedrive_tenant_id: str
settings.backend_url: str (used in OAuth callback redirect_uri)
+ settings.frontend_url: str (used in OAuth callback success/error redirect to Vue — per B4 fix)
From backend/storage/cloud_utils.py:
def encrypt_credentials(master_key: bytes, user_id: str, credentials: dict) -> str
@@ -160,6 +165,22 @@ From backend/services/cloud_cache.py: get_cloud_folders_cached(user_id, provider
router = APIRouter(prefix="/api/cloud", tags=["cloud"])
users_router = APIRouter(prefix="/api/users", tags=["users"])
+ _call_cloud_op helper (add as a module-level async function in cloud.py, per B2 design):
+ async def _call_cloud_op(conn: CloudConnection, user: User, session: AsyncSession, op_fn):
+ """Wraps a cloud operation with transparent token refresh (D-05) and invalid_grant handling (D-06).
+
+ 1. Calls op_fn() — a zero-argument async callable that performs the cloud operation.
+ 2. On CloudConnectionError(reason="token_expired"): decrypt current creds, refresh via provider,
+ encrypt new creds, update conn.credentials_enc in DB, rebuild backend, retry op_fn() once.
+ 3. On CloudConnectionError(reason="invalid_grant"): set conn.status="REQUIRES_REAUTH",
+ await session.commit(), call write_audit_log(event_type="cloud.requires_reauth"),
+ raise HTTPException(503, "Cloud connection requires re-authentication. Please reconnect in Settings.").
+ 4. Propagates all other exceptions unchanged.
+ """
+ All upload/download/list calls in cloud.py MUST go through _call_cloud_op.
+ op_fn is a zero-argument async lambda that already has the backend instance captured in closure.
+ The backend instance is rebuilt after refresh using the new credentials dict.
+
Pydantic request models:
class WebDAVConnectRequest(BaseModel): server_url: str; username: str; password: str; provider: str
class DefaultStorageRequest(BaseModel): backend: str
@@ -273,6 +294,84 @@ assert len(cloud_routes) >= 5, f'Expected 5+ cloud routes, got {len(cloud_routes
Both cloud routers registered in main.py; all cloud routes visible in app.routes; full pytest suite passes
+
+ Task 3: Cloud connection cleanup on account deletion (SEC-09)
+ backend/api/auth.py
+
+ - backend/api/auth.py — find the DELETE /api/users/me endpoint (account self-deletion), verify it exists from Phase 2; if it does not exist, check backend/api/admin.py for DELETE /api/admin/users/{id}
+ - backend/db/models.py — CloudConnection (user_id, provider, status), Document (user_id, storage_backend, object_key)
+ - backend/storage/__init__.py — get_storage_backend_for_document signature
+
+
+ - When a user deletes their account (DELETE /api/users/me or admin DELETE /api/admin/users/{id}):
+ 1. Query all CloudConnection rows for the user
+ 2. For each connection, query all Document rows for that user where storage_backend == connection.provider
+ 3. For each such document, call get_storage_backend_for_document(doc, user, session) and await backend.delete_object(doc.object_key) — catch and log exceptions but do NOT abort the deletion
+ 4. Delete all CloudConnection rows for the user (credentials_enc purged)
+ - This runs BEFORE the user row is deleted (FK cascade would remove connections anyway, but credentials must be actively purged from the cloud provider)
+ - Runs in the same DB transaction as user deletion — if user deletion succeeds, cloud cleanup has completed
+ - No orphaned credentials_enc rows after account deletion (SEC-09)
+
+
+ Read backend/api/auth.py to locate the account deletion endpoint. Also check backend/api/admin.py for admin-initiated user deletion.
+
+ In the account deletion handler (DELETE /api/users/me), add a cloud cleanup block BEFORE the user row deletion:
+
+ 1. Import at top of file (if not already present):
+ from db.models import CloudConnection, Document
+ from storage import get_storage_backend_for_document
+ from sqlalchemy import select
+
+ 2. Cloud cleanup block (insert before the DELETE user statement):
+ cloud_conns_result = await session.execute(
+ select(CloudConnection).where(CloudConnection.user_id == current_user.id)
+ )
+ cloud_conns = cloud_conns_result.scalars().all()
+ for conn in cloud_conns:
+ # Delete cloud objects for this provider
+ docs_result = await session.execute(
+ select(Document).where(
+ Document.user_id == current_user.id,
+ Document.storage_backend == conn.provider,
+ )
+ )
+ for doc in docs_result.scalars().all():
+ try:
+ backend = await get_storage_backend_for_document(doc, current_user, session)
+ await backend.delete_object(doc.object_key)
+ except Exception:
+ pass # Do not abort user deletion on cloud error
+ await session.delete(conn)
+ await session.flush() # Flush connection deletes before user delete
+
+ If DELETE /api/users/me does not exist in auth.py, check admin.py for the admin-delete endpoint and add the same cleanup block there. Document which file was modified in the summary.
+
+ write_audit_log call: add event_type="cloud.credentials_purged" after the cleanup loop,
+ with metadata_={"providers": [c.provider for c in cloud_conns]}.
+
+
+ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -c "
+import ast
+import os
+for fname in ['api/auth.py', 'api/admin.py']:
+ if os.path.exists(fname):
+ with open(fname) as f:
+ src = f.read()
+ if 'cloud_conns' in src or 'CloudConnection' in src:
+ print(f'OK: cloud cleanup found in {fname}')
+" && python -m pytest -v --tb=short 2>&1 | tail -5
+
+
+ - Either backend/api/auth.py or backend/api/admin.py contains cloud connection cleanup logic before user deletion
+ - CloudConnection rows are deleted for the user as part of account deletion
+ - delete_object called for each cloud-stored document before credentials are purged
+ - write_audit_log called with event_type="cloud.credentials_purged"
+ - pytest -v exits 0 with 0 failures
+ - No orphaned credentials_enc rows after account deletion (SEC-09)
+
+ Cloud connection cleanup wired into account deletion; credentials_enc purged; SEC-09 satisfied
+
+
@@ -308,6 +407,7 @@ cd /Users/nik/Documents/Progamming/document_scanner/backend && python -m pytest
- main.py: both routers registered; all routes visible in app.routes
- pytest -v exits 0, 0 failures
- test_cloud.py stubs transition from xfail to green for test_credentials_enc_not_exposed, test_connection_status_display, test_disconnect_deletes_credentials, test_ssrf_validation, test_cross_user_idor, test_admin_cannot_see_credentials
+- SEC-09: account deletion endpoint purges CloudConnection rows and cloud-stored document objects before deleting user row