feat(setup): dynamic model discovery for local providers in wizard

Replace the static model text prompt with live API queries: - _fetch_local_models(): queries /v1/models (LM Studio, llama.cpp) or /api/tags (Ollama) and returns a questionary.select list - _fetch_lmstudio_available_models(): queries LM Studio's beta /api/v0/models to list downloaded-but-not-loaded models - _load_lmstudio_model(): tries /api/v0/models/load to load a model in-place; falls back to telling the user to load manually - Cloud providers keep the existing text-input behaviour Also replace hardcoded LMSTUDIO_MODEL in integration tests with a lmstudio_model fixture that queries the API at runtime and uses whichever model is currently loaded (skips if none). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 10:53:15 +02:00
parent 9735a5559e
commit 5eb81404c2
2 changed files with 123 additions and 30 deletions
@@ -1,31 +1,41 @@
 """
 Live integration test against LM Studio at localhost:1234.
-Skipped automatically if LM Studio is not running.
+Skipped automatically if LM Studio is not running or no model is loaded.
 """
+import httpx
 import pytest

-LMSTUDIO_MODEL = "gemma-4-e4b-uncensored-hauhaucs-aggressive"
-LMSTUDIO_BASE_URL = "http://localhost:1234/v1"
+_LMSTUDIO_BASE_URL = "http://localhost:1234/v1"


-@pytest.fixture(autouse=True)
-def require_lmstudio():
-    import httpx
+def _get_loaded_model() -> str | None:
+    """Return the first currently loaded model ID from LM Studio, or None."""
    try:
-        r = httpx.get(f"{LMSTUDIO_BASE_URL}/models", timeout=2.0)
-        r.raise_for_status()
+        resp = httpx.get(f"{_LMSTUDIO_BASE_URL}/models", timeout=2.0)
+        resp.raise_for_status()
+        models = resp.json().get("data", [])
+        return models[0]["id"] if models else None
    except Exception:
-        pytest.skip("LM Studio not reachable at localhost:1234")
+        return None


-def test_basic_completion():
+@pytest.fixture()
+def lmstudio_model() -> str:
+    """Resolve the first loaded model in LM Studio; skip if none available."""
+    model = _get_loaded_model()
+    if model is None:
+        pytest.skip("LM Studio not reachable or no model currently loaded")
+    return model
+
+
+def test_basic_completion(lmstudio_model):
    import litellm
    litellm.suppress_debug_info = True

    response = litellm.completion(
-        model=f"openai/{LMSTUDIO_MODEL}",
+        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Reply with exactly the word: PONG"}],
-        api_base=LMSTUDIO_BASE_URL,
+        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=20,
        stream=False,
@@ -34,14 +44,14 @@ def test_basic_completion():
    assert text and len(text) > 0


-def test_streaming_completion():
+def test_streaming_completion(lmstudio_model):
    import litellm
    litellm.suppress_debug_info = True

    stream = litellm.completion(
-        model=f"openai/{LMSTUDIO_MODEL}",
+        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Count from 1 to 3."}],
-        api_base=LMSTUDIO_BASE_URL,
+        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=50,
        stream=True,
@@ -52,30 +62,29 @@ def test_streaming_completion():
    assert len(full_text) > 0


-def test_injection_scan_on_live_response(tmp_pyra_home):
+def test_injection_scan_on_live_response(tmp_pyra_home, lmstudio_model):
    """Verify injection scanner runs on real model output without false positives."""
    import litellm
    from pyra.security.injection import scan_response
    litellm.suppress_debug_info = True

    response = litellm.completion(
-        model=f"openai/{LMSTUDIO_MODEL}",
+        model=f"openai/{lmstudio_model}",
        messages=[{"role": "user", "content": "Explain what a list is in Python."}],
-        api_base=LMSTUDIO_BASE_URL,
+        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=200,
        stream=False,
    )
    text = response.choices[0].message.content
    warnings = scan_response(text)
-    # Normal responses about Python lists should not trigger injection warnings
-    for w in warnings:
-        print(f"[warning] {w.pattern_label}: {w.matched_text!r}")
    # Not asserting zero warnings — some models may have quirky phrasing —
    # but at least the scanner must not crash on real output
+    for w in warnings:
+        print(f"[warning] {w.pattern_label}: {w.matched_text!r}")


-def test_pyra_chat_session_with_lmstudio(tmp_pyra_home):
+def test_pyra_chat_session_with_lmstudio(tmp_pyra_home, lmstudio_model):
    """Full stack: config → vault → history → litellm → injection scan."""
    from pyra.config.schema import PyraConfig, ProviderConfig
    from pyra.config.manager import save_config
@@ -87,8 +96,8 @@ def test_pyra_chat_session_with_lmstudio(tmp_pyra_home):
    cfg = PyraConfig(
        ai=ProviderConfig(
            provider_id="lmstudio",
-            model=LMSTUDIO_MODEL,
-            base_url=LMSTUDIO_BASE_URL,
+            model=lmstudio_model,
+            base_url=_LMSTUDIO_BASE_URL,
        )
    )
    save_config(cfg)
@@ -98,9 +107,9 @@ def test_pyra_chat_session_with_lmstudio(tmp_pyra_home):
    messages = history.build_for_api()

    response = litellm.completion(
-        model=f"openai/{LMSTUDIO_MODEL}",
+        model=f"openai/{lmstudio_model}",
        messages=messages,
-        api_base=LMSTUDIO_BASE_URL,
+        api_base=_LMSTUDIO_BASE_URL,
        api_key="lm-studio",
        max_tokens=30,
        stream=False,