""" Unit tests for AI provider JSON parsing robustness and classifier orchestration. Uses a mock provider — no real AI calls made. """ import json import pytest from ai.openai_provider import _parse_classification, _parse_suggestions, _strip_code_fences from ai.base import ClassificationResult def test_parse_clean_json(): raw = '{"assigned_topics": ["finance", "invoices"], "new_topic_suggestions": []}' result = _parse_classification(raw) assert result.topics == ["finance", "invoices"] assert result.suggested_new_topics == [] def test_parse_with_code_fence(): raw = '```json\n{"assigned_topics": ["legal"], "new_topic_suggestions": ["contracts"]}\n```' result = _parse_classification(raw) assert result.topics == ["legal"] assert result.suggested_new_topics == ["contracts"] def test_parse_with_preamble(): raw = 'Here is the classification:\n{"assigned_topics": ["hr"], "new_topic_suggestions": []}\nDone.' result = _parse_classification(raw) assert result.topics == ["hr"] def test_parse_malformed_returns_empty(): raw = "I cannot classify this document." result = _parse_classification(raw) assert result.topics == [] assert result.suggested_new_topics == [] def test_strip_code_fences(): raw = "```json\n{}\n```" assert _strip_code_fences(raw) == "{}" def test_parse_suggestions_clean(): raw = '{"suggested_topics": ["Human Resources", "Onboarding"]}' result = _parse_suggestions(raw) assert "Human Resources" in result assert "Onboarding" in result def test_parse_suggestions_with_fence(): raw = "```\n{\"suggested_topics\": [\"Finance\"]}\n```" result = _parse_suggestions(raw) assert result == ["Finance"] def test_parse_suggestions_malformed(): raw = "No suggestions available." result = _parse_suggestions(raw) assert result == [] @pytest.mark.asyncio async def test_classifier_with_mock_provider(isolated_data_dir): """Test classifier orchestration with a mock provider.""" from unittest.mock import AsyncMock, patch from ai.base import ClassificationResult import services.storage as st # Create a document doc_id = "test-doc-1" st.save_metadata({ "id": doc_id, "original_name": "test.txt", "filename": "test-doc-1.txt", "mime_type": "text/plain", "size_bytes": 50, "extracted_text": "Invoice for services rendered in March 2026.", "topics": [], "created_at": "2026-01-01T00:00:00Z", "classified_at": None, }) # Create some topics st.create_topic("Finance") st.create_topic("Legal") mock_result = ClassificationResult( topics=["Finance"], suggested_new_topics=["Invoices"], reasoning="Document is about financial invoicing.", ) with patch("services.classifier.get_provider") as mock_get_provider: mock_provider = AsyncMock() mock_provider.classify = AsyncMock(return_value=mock_result) mock_get_provider.return_value = mock_provider from services.classifier import classify_document topics = await classify_document(doc_id) assert "Finance" in topics assert "Invoices" in topics # Verify new topic was auto-created all_topics = st.load_topics() assert any(t["name"] == "Invoices" for t in all_topics) # Verify document was updated meta = st.get_metadata(doc_id) assert "Finance" in meta["topics"] # --------------------------------------------------------------------------- # Wave 0 xfail stubs for per-user AI provider resolution — Plan 03-04 # --------------------------------------------------------------------------- @pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") async def test_per_user_provider(db_session): """When user.ai_provider='openai' and user.ai_model='gpt-4o', the classifier resolves _settings['active_provider'] == 'openai'. DOC-03: AI provider/model comes from the user's DB record, not from global config or the retired load_settings() flat file (CONTEXT.md D-14). """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") async def test_celery_task_uses_user_provider(db_session): """Calling _run(document_id) for a Document owned by user.ai_provider='anthropic' calls classifier with ai_provider='anthropic'. DOC-05: the Celery extract_and_classify task resolves per-user AI config via a second DB lookup (doc.user_id → user.ai_provider/ai_model) and passes it to the classifier (CONTEXT.md D-14). """ assert True # scaffold @pytest.mark.xfail(strict=False, reason="implemented in plan 03-04") async def test_default_provider_fallback(db_session): """When user.ai_provider is None, the classifier receives config.settings.default_ai_provider. D-15: fallback chain is user.ai_provider → DEFAULT_AI_PROVIDER env var → code default 'ollama' (CONTEXT.md D-15). """ assert True # scaffold