feat(phase-4-05): document streaming proxy GET /api/documents/{id}/content (DOC-02)
- Add _parse_range() helper: validates Range header bounds, raises 416 on invalid - Add stream_document_content endpoint with get_regular_user dep (admin → 403) - Access check: owner OR Share.recipient_id; neither → 404 - Bytes fetched via get_object() only — presigned_get_url() never called - Range requests return 206 + Content-Range header - Add pdf_open_mode column to User ORM model (migration 0004 already applied) - Use HTTP_416_RANGE_NOT_SATISFIABLE (non-deprecated constant)
This commit is contained in:
@@ -20,7 +20,8 @@ import uuid
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request, status
|
||||
from fastapi.responses import StreamingResponse
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import select, text, func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -395,3 +396,97 @@ async def classify_document(
|
||||
raise HTTPException(500, f"Classification failed: {e}")
|
||||
|
||||
return {"topics": topics}
|
||||
|
||||
|
||||
# ── Range header parsing helper ───────────────────────────────────────────────
|
||||
|
||||
def _parse_range(range_header: str, file_size: int) -> tuple:
|
||||
"""Parse a 'bytes=X-Y' Range header and return (start, end).
|
||||
|
||||
Returns (start, end) where both are inclusive byte offsets.
|
||||
Raises HTTP 416 on any invalid or out-of-bounds range.
|
||||
|
||||
T-04-05-03: validates start <= end, start >= 0, end < file_size.
|
||||
"""
|
||||
try:
|
||||
h = range_header.replace("bytes=", "").split("-")
|
||||
start = int(h[0]) if h[0] != "" else 0
|
||||
end = int(h[1]) if h[1] != "" else file_size - 1
|
||||
except (ValueError, IndexError):
|
||||
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
||||
if start > end or start < 0 or end >= file_size:
|
||||
raise HTTPException(status.HTTP_416_RANGE_NOT_SATISFIABLE)
|
||||
return start, end
|
||||
|
||||
|
||||
# ── GET /api/documents/{doc_id}/content ──────────────────────────────────────
|
||||
|
||||
@router.get("/{doc_id}/content")
|
||||
async def stream_document_content(
|
||||
doc_id: str,
|
||||
request: Request,
|
||||
session: AsyncSession = Depends(get_db),
|
||||
current_user: User = Depends(get_regular_user),
|
||||
):
|
||||
"""Stream document bytes directly from MinIO (DOC-02).
|
||||
|
||||
T-04-05-01: uses get_regular_user — admin role → 403 (critical security invariant).
|
||||
T-04-05-02: bytes fetched via get_object() ONLY — presigned_get_url() never called.
|
||||
T-04-05-03: Range header validated via _parse_range(); invalid range → 416.
|
||||
T-04-05-04: access gated on ownership OR active Share.recipient_id.
|
||||
|
||||
Returns 200 (or 206 for Range requests) with:
|
||||
Content-Type: doc.content_type
|
||||
Content-Disposition: inline; filename="<filename>"
|
||||
Accept-Ranges: bytes
|
||||
Content-Length: <size>
|
||||
"""
|
||||
try:
|
||||
uid = uuid.UUID(doc_id)
|
||||
except ValueError:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
doc = await session.get(Document, uid)
|
||||
if doc is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
# Access control: owner OR share recipient (T-04-05-04)
|
||||
if doc.user_id != current_user.id:
|
||||
result = await session.execute(
|
||||
select(Share).where(
|
||||
Share.document_id == doc.id,
|
||||
Share.recipient_id == current_user.id,
|
||||
)
|
||||
)
|
||||
share = result.scalar_one_or_none()
|
||||
if share is None:
|
||||
raise HTTPException(status_code=404, detail="Document not found")
|
||||
|
||||
# Fetch bytes directly from MinIO — NEVER via presigned URL (T-04-05-02)
|
||||
file_bytes = await get_storage_backend().get_object(doc.object_key)
|
||||
file_size = len(file_bytes)
|
||||
|
||||
headers = {
|
||||
"content-type": doc.content_type,
|
||||
"content-disposition": f'inline; filename="{doc.filename}"',
|
||||
"accept-ranges": "bytes",
|
||||
"content-length": str(file_size),
|
||||
}
|
||||
|
||||
range_header = request.headers.get("range")
|
||||
if range_header:
|
||||
start, end = _parse_range(range_header, file_size)
|
||||
chunk = file_bytes[start : end + 1]
|
||||
headers["content-range"] = f"bytes {start}-{end}/{file_size}"
|
||||
headers["content-length"] = str(len(chunk))
|
||||
return StreamingResponse(
|
||||
iter([chunk]),
|
||||
status_code=206,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
return StreamingResponse(
|
||||
iter([file_bytes]),
|
||||
status_code=200,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
@@ -61,6 +61,9 @@ class User(Base):
|
||||
default_storage_backend: Mapped[str] = mapped_column(
|
||||
String, nullable=False, default="minio"
|
||||
)
|
||||
pdf_open_mode: Mapped[str] = mapped_column(
|
||||
String, nullable=False, server_default="in_app"
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP(timezone=True), nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user