""" Unit tests for AI provider JSON parsing robustness and classifier orchestration. Uses a mock provider — no real AI calls made. """ import json import pytest from ai.openai_provider import _parse_classification, _parse_suggestions, _strip_code_fences from ai.base import ClassificationResult def test_parse_clean_json(): raw = '{"assigned_topics": ["finance", "invoices"], "new_topic_suggestions": []}' result = _parse_classification(raw) assert result.topics == ["finance", "invoices"] assert result.suggested_new_topics == [] def test_parse_with_code_fence(): raw = '```json\n{"assigned_topics": ["legal"], "new_topic_suggestions": ["contracts"]}\n```' result = _parse_classification(raw) assert result.topics == ["legal"] assert result.suggested_new_topics == ["contracts"] def test_parse_with_preamble(): raw = 'Here is the classification:\n{"assigned_topics": ["hr"], "new_topic_suggestions": []}\nDone.' result = _parse_classification(raw) assert result.topics == ["hr"] def test_parse_malformed_returns_empty(): raw = "I cannot classify this document." result = _parse_classification(raw) assert result.topics == [] assert result.suggested_new_topics == [] def test_strip_code_fences(): raw = "```json\n{}\n```" assert _strip_code_fences(raw) == "{}" def test_parse_suggestions_clean(): raw = '{"suggested_topics": ["Human Resources", "Onboarding"]}' result = _parse_suggestions(raw) assert "Human Resources" in result assert "Onboarding" in result def test_parse_suggestions_with_fence(): raw = "```\n{\"suggested_topics\": [\"Finance\"]}\n```" result = _parse_suggestions(raw) assert result == ["Finance"] def test_parse_suggestions_malformed(): raw = "No suggestions available." result = _parse_suggestions(raw) assert result == [] @pytest.mark.asyncio async def test_classifier_with_mock_provider(isolated_data_dir): """Test classifier orchestration with a mock provider.""" from unittest.mock import AsyncMock, patch from ai.base import ClassificationResult import services.storage as st # Create a document doc_id = "test-doc-1" st.save_metadata({ "id": doc_id, "original_name": "test.txt", "filename": "test-doc-1.txt", "mime_type": "text/plain", "size_bytes": 50, "extracted_text": "Invoice for services rendered in March 2026.", "topics": [], "created_at": "2026-01-01T00:00:00Z", "classified_at": None, }) # Create some topics st.create_topic("Finance") st.create_topic("Legal") mock_result = ClassificationResult( topics=["Finance"], suggested_new_topics=["Invoices"], reasoning="Document is about financial invoicing.", ) with patch("services.classifier.get_provider") as mock_get_provider: mock_provider = AsyncMock() mock_provider.classify = AsyncMock(return_value=mock_result) mock_get_provider.return_value = mock_provider from services.classifier import classify_document topics = await classify_document(doc_id) assert "Finance" in topics assert "Invoices" in topics # Verify new topic was auto-created all_topics = st.load_topics() assert any(t["name"] == "Invoices" for t in all_topics) # Verify document was updated meta = st.get_metadata(doc_id) assert "Finance" in meta["topics"]