""" Live integration test against LM Studio at localhost:1234. Skipped automatically if LM Studio is not running. """ import pytest LMSTUDIO_MODEL = "gemma-4-e4b-uncensored-hauhaucs-aggressive" LMSTUDIO_BASE_URL = "http://localhost:1234/v1" @pytest.fixture(autouse=True) def require_lmstudio(): import httpx try: r = httpx.get(f"{LMSTUDIO_BASE_URL}/models", timeout=2.0) r.raise_for_status() except Exception: pytest.skip("LM Studio not reachable at localhost:1234") def test_basic_completion(): import litellm litellm.suppress_debug_info = True response = litellm.completion( model=f"openai/{LMSTUDIO_MODEL}", messages=[{"role": "user", "content": "Reply with exactly the word: PONG"}], api_base=LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=20, stream=False, ) text = response.choices[0].message.content assert text and len(text) > 0 def test_streaming_completion(): import litellm litellm.suppress_debug_info = True stream = litellm.completion( model=f"openai/{LMSTUDIO_MODEL}", messages=[{"role": "user", "content": "Count from 1 to 3."}], api_base=LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=50, stream=True, ) chunks = list(stream) assert len(chunks) > 0 full_text = "".join(c.choices[0].delta.content or "" for c in chunks) assert len(full_text) > 0 def test_injection_scan_on_live_response(tmp_pyra_home): """Verify injection scanner runs on real model output without false positives.""" import litellm from pyra.security.injection import scan_response litellm.suppress_debug_info = True response = litellm.completion( model=f"openai/{LMSTUDIO_MODEL}", messages=[{"role": "user", "content": "Explain what a list is in Python."}], api_base=LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=200, stream=False, ) text = response.choices[0].message.content warnings = scan_response(text) # Normal responses about Python lists should not trigger injection warnings for w in warnings: print(f"[warning] {w.pattern_label}: {w.matched_text!r}") # Not asserting zero warnings — some models may have quirky phrasing — # but at least the scanner must not crash on real output def test_pyra_chat_session_with_lmstudio(tmp_pyra_home): """Full stack: config → vault → history → litellm → injection scan.""" from pyra.config.schema import PyraConfig, ProviderConfig from pyra.config.manager import save_config from pyra.chat.history import ConversationHistory import litellm litellm.suppress_debug_info = True cfg = PyraConfig( ai=ProviderConfig( provider_id="lmstudio", model=LMSTUDIO_MODEL, base_url=LMSTUDIO_BASE_URL, ) ) save_config(cfg) history = ConversationHistory(cfg) history.add_user("Say hello in one word.") messages = history.build_for_api() response = litellm.completion( model=f"openai/{LMSTUDIO_MODEL}", messages=messages, api_base=LMSTUDIO_BASE_URL, api_key="lm-studio", max_tokens=30, stream=False, ) text = response.choices[0].message.content assert text and len(text.strip()) > 0