feat(03-02): implement presigned upload flow, quota enforcement, cleanup task

- Replace POST /api/documents/upload with POST /api/documents/upload-url + /{id}/confirm
- upload-url: create pending Document row with user_id=None (Wave 2), return presigned PUT URL
- confirm: stat MinIO for authoritative size (T-03-05), atomic quota UPDATE (T-03-06, STORE-03)
- Confirm returns 413 with {used_bytes, limit_bytes, rejected_bytes} on quota exceeded (STORE-05)
- Wave 2 guard: skip quota UPDATE when doc.user_id is None (Plan 03-03 removes this)
- Add GET /api/auth/me/quota to api/auth.py (STORE-04)
- services/storage.py: remove save_upload (D-04); add GREATEST(0, used_bytes-delta) quota decrement to delete_document (STORE-06)
- tasks/document_tasks.py: add cleanup_abandoned_uploads Celery beat task (D-06)
- celery_app.py: add beat_schedule for cleanup-abandoned-uploads every 30 minutes
- tests/test_documents.py: replace legacy /upload tests with xfail; add real test logic for upload-url/confirm/get-quota
- tests/test_quota.py: implement real test logic with xfail for PostgreSQL-specific SQL
This commit is contained in:
curo1305
2026-05-23 14:32:12 +02:00
parent 3ed6dd494f
commit 0d51d023ce
7 changed files with 626 additions and 196 deletions
+13 -55
View File
@@ -26,10 +26,9 @@ import json
import sys
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from sqlalchemy import select, delete
from sqlalchemy import select, delete, text
from sqlalchemy import func as sql_func
from sqlalchemy.ext.asyncio import AsyncSession
@@ -83,58 +82,6 @@ async def _load_topic_names(session: AsyncSession, doc_id: uuid.UUID) -> list:
# ── Documents ─────────────────────────────────────────────────────────────────
async def save_upload(
session: AsyncSession,
file_bytes: bytes,
original_name: str,
mime_type: str,
) -> dict:
"""Persist file bytes to MinIO and create a Document row in PostgreSQL.
Returns a dict shape compatible with the legacy api/documents.py line 3233:
{"id", "filename", "path", "object_key", "user_id"}
The "path" key is preserved for compatibility — it now contains the MinIO
object_key rather than a filesystem path.
D-03: user_id is None (no auth in Phase 1). Phase 2 will replace the
"null-user" sentinel with str(current_user.id).
"""
doc_id = uuid.uuid4()
suffix = Path(original_name).suffix.lower()
doc = Document(
id=doc_id,
user_id=None, # D-03: nullable in Phase 1
filename=original_name,
content_type=mime_type,
size_bytes=len(file_bytes),
storage_backend="minio",
status="pending",
object_key="", # filled after MinIO upload below
)
session.add(doc)
await session.flush() # materialise doc.id without committing
# D-03: "null-user" sentinel — Phase 2 replaces with str(current_user.id)
object_key = await _backend().put_object(
user_id="null-user",
document_id=str(doc_id),
file_bytes=file_bytes,
extension=suffix,
content_type=mime_type,
)
doc.object_key = object_key
await session.commit()
return {
"id": str(doc_id),
"filename": original_name,
"path": object_key,
"object_key": object_key,
"user_id": None,
}
async def save_metadata(session: AsyncSession, meta: dict) -> None:
"""Update a Document row from the legacy metadata dict shape.
@@ -217,6 +164,18 @@ async def delete_document(session: AsyncSession, doc_id: str) -> bool:
except Exception as exc:
print(f"[storage] WARNING: MinIO delete_object failed for {doc.object_key!r}: {exc}", file=sys.stderr)
# Atomic quota decrement (STORE-06, D-07).
# The user_id is None guard is removed in Plan 03-03.
if doc.user_id is not None:
await session.execute(
text(
"UPDATE quotas "
"SET used_bytes = GREATEST(0, used_bytes - :delta) "
"WHERE user_id = :uid"
),
{"delta": doc.size_bytes, "uid": str(doc.user_id)},
)
await session.delete(doc)
await session.commit()
return True
@@ -452,7 +411,6 @@ def settings_masked(settings: dict) -> dict:
# ── Public surface ─────────────────────────────────────────────────────────────
__all__ = [
"save_upload",
"save_metadata",
"get_metadata",
"list_metadata",