""" Input sanitization utilities. Every string that originates from user input and is destined for the database MUST pass through these helpers before reaching a SQLAlchemy model or query. SQLAlchemy's ORM already uses bound parameters (no raw SQL), so these helpers address the layer above: ensuring data is well-formed, length-capped, and free of null bytes or control characters before it is stored. """ import re import unicodedata from datetime import date # ── Constants ───────────────────────────────────────────────────────────────── _PHONE_RE = re.compile(r"^\+?[\d\s\-()\[\]]{7,20}$") # ── Core helper ─────────────────────────────────────────────────────────────── def sanitize_str(value: str | None, max_len: int = 255) -> str | None: """Strip whitespace, reject null bytes and non-printable control characters, enforce a maximum length. Returns None unchanged so optional fields work naturally with ``Optional[str]`` annotations.""" if value is None: return None # Strip leading/trailing whitespace value = value.strip() # Reject null bytes (common injection vector) if "\x00" in value: raise ValueError("Input must not contain null bytes") # Reject ASCII control characters (0x01–0x1F, 0x7F) except tab/newline/CR # which may appear in multi-line fields. Use Unicode category 'Cc'. for ch in value: if unicodedata.category(ch) == "Cc" and ch not in ("\t", "\n", "\r"): raise ValueError("Input contains invalid control characters") if len(value) > max_len: raise ValueError(f"Input must not exceed {max_len} characters") return value if value != "" else None def normalize_email(value: str) -> str: """Lowercase and strip an email address.""" return value.strip().lower() def validate_phone(value: str | None) -> str | None: """Sanitize then validate phone number format.""" value = sanitize_str(value, max_len=20) if value is None: return None if not _PHONE_RE.match(value): raise ValueError( "Phone number may only contain digits, spaces, +, -, (, ) and [ ] " "and must be 7–20 characters" ) return value def validate_date_of_birth(value: date | None) -> date | None: """Reject obviously invalid birth dates (before 1900 or in the future).""" if value is None: return None if value.year < 1900: raise ValueError("Date of birth must be 1900 or later") if value > date.today(): raise ValueError("Date of birth must not be in the future") return value