fix(setup): filter LM Studio models by state == "loaded_instance"
LM Studio's /v1/models returns all downloaded models, not just loaded ones. Use /api/v0/models with state filtering in both fetch_loaded_models() and _fetch_local_models() so only RAM-resident models are shown as loaded. This also restores the _choose_model() fallback that offers downloaded-but- unloaded models when nothing is active in LM Studio. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -386,7 +386,14 @@ def fetch_loaded_models(provider: Provider) -> list[str]:
|
|||||||
resp = httpx.get(f"{provider.base_url}/api/ps", timeout=3.0)
|
resp = httpx.get(f"{provider.base_url}/api/ps", timeout=3.0)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return [m["name"] for m in resp.json().get("models", [])]
|
return [m["name"] for m in resp.json().get("models", [])]
|
||||||
else:
|
elif provider.id == "lmstudio":
|
||||||
|
resp = httpx.get("http://localhost:1234/api/v0/models", timeout=3.0)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return [
|
||||||
|
m["id"] for m in resp.json().get("data", [])
|
||||||
|
if m.get("state") == "loaded_instance"
|
||||||
|
]
|
||||||
|
else: # llamacpp — /models returns only the active loaded model
|
||||||
resp = httpx.get(f"{provider.base_url}/models", timeout=3.0)
|
resp = httpx.get(f"{provider.base_url}/models", timeout=3.0)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return [m["id"] for m in resp.json().get("data", [])]
|
return [m["id"] for m in resp.json().get("data", [])]
|
||||||
@@ -407,7 +414,7 @@ def _show_local_model_status(provider: Provider) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def _fetch_local_models(provider: Provider) -> list[str]:
|
def _fetch_local_models(provider: Provider) -> list[str]:
|
||||||
"""Return currently loaded/available models from a local provider's API."""
|
"""Return currently loaded models from a local provider's API."""
|
||||||
if not provider.base_url:
|
if not provider.base_url:
|
||||||
return []
|
return []
|
||||||
try:
|
try:
|
||||||
@@ -415,6 +422,13 @@ def _fetch_local_models(provider: Provider) -> list[str]:
|
|||||||
resp = httpx.get(f"{provider.base_url}/api/tags", timeout=3.0)
|
resp = httpx.get(f"{provider.base_url}/api/tags", timeout=3.0)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
return [m["name"] for m in resp.json().get("models", [])]
|
return [m["name"] for m in resp.json().get("models", [])]
|
||||||
|
elif provider.id == "lmstudio":
|
||||||
|
resp = httpx.get("http://localhost:1234/api/v0/models", timeout=3.0)
|
||||||
|
resp.raise_for_status()
|
||||||
|
return [
|
||||||
|
m["id"] for m in resp.json().get("data", [])
|
||||||
|
if m.get("state") == "loaded_instance"
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
resp = httpx.get(f"{provider.base_url}/models", timeout=3.0)
|
resp = httpx.get(f"{provider.base_url}/models", timeout=3.0)
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
|||||||
@@ -96,14 +96,34 @@ def test_suggest_plugins_multiple_categories(monkeypatch):
|
|||||||
|
|
||||||
# ── _fetch_local_models ────────────────────────────────────────────────────────
|
# ── _fetch_local_models ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def test_fetch_local_models_lmstudio_returns_model_ids(monkeypatch):
|
def test_fetch_local_models_lmstudio_returns_loaded_model_ids(monkeypatch):
|
||||||
import pyra.setup.wizard as wiz
|
import pyra.setup.wizard as wiz
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.json.return_value = {"data": [{"id": "gemma-4b"}, {"id": "llama3"}]}
|
mock_resp.json.return_value = {
|
||||||
|
"data": [
|
||||||
|
{"id": "gemma-4b", "state": "loaded_instance"},
|
||||||
|
{"id": "llama3", "state": "not_loaded"},
|
||||||
|
]
|
||||||
|
}
|
||||||
mock_resp.raise_for_status = lambda: None
|
mock_resp.raise_for_status = lambda: None
|
||||||
monkeypatch.setattr(wiz.httpx, "get", lambda *a, **kw: mock_resp)
|
monkeypatch.setattr(wiz.httpx, "get", lambda *a, **kw: mock_resp)
|
||||||
from pyra.setup.providers import get_provider
|
from pyra.setup.providers import get_provider
|
||||||
assert wiz._fetch_local_models(get_provider("lmstudio")) == ["gemma-4b", "llama3"]
|
assert wiz._fetch_local_models(get_provider("lmstudio")) == ["gemma-4b"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_local_models_lmstudio_filters_unloaded(monkeypatch):
|
||||||
|
import pyra.setup.wizard as wiz
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"data": [
|
||||||
|
{"id": "model-a", "state": "not_loaded"},
|
||||||
|
{"id": "model-b", "state": "not_loaded"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
mock_resp.raise_for_status = lambda: None
|
||||||
|
monkeypatch.setattr(wiz.httpx, "get", lambda *a, **kw: mock_resp)
|
||||||
|
from pyra.setup.providers import get_provider
|
||||||
|
assert wiz._fetch_local_models(get_provider("lmstudio")) == []
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_local_models_ollama_returns_model_names(monkeypatch):
|
def test_fetch_local_models_ollama_returns_model_names(monkeypatch):
|
||||||
@@ -392,17 +412,37 @@ def test_fetch_loaded_models_ollama_uses_api_ps(monkeypatch):
|
|||||||
assert any("/api/ps" in u for u in calls)
|
assert any("/api/ps" in u for u in calls)
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_loaded_models_lmstudio_uses_models_endpoint(monkeypatch):
|
def test_fetch_loaded_models_lmstudio_uses_beta_api_and_filters(monkeypatch):
|
||||||
import pyra.setup.wizard as wiz
|
import pyra.setup.wizard as wiz
|
||||||
from pyra.setup.providers import get_provider
|
from pyra.setup.providers import get_provider
|
||||||
mock_resp = MagicMock()
|
mock_resp = MagicMock()
|
||||||
mock_resp.json.return_value = {"data": [{"id": "gemma-4b"}]}
|
mock_resp.json.return_value = {
|
||||||
|
"data": [
|
||||||
|
{"id": "gemma-4b", "state": "loaded_instance"},
|
||||||
|
{"id": "llama3", "state": "not_loaded"},
|
||||||
|
]
|
||||||
|
}
|
||||||
mock_resp.raise_for_status = lambda: None
|
mock_resp.raise_for_status = lambda: None
|
||||||
calls = []
|
calls = []
|
||||||
monkeypatch.setattr(wiz.httpx, "get", lambda url, **kw: (calls.append(url), mock_resp)[1])
|
monkeypatch.setattr(wiz.httpx, "get", lambda url, **kw: (calls.append(url), mock_resp)[1])
|
||||||
result = wiz.fetch_loaded_models(get_provider("lmstudio"))
|
result = wiz.fetch_loaded_models(get_provider("lmstudio"))
|
||||||
assert result == ["gemma-4b"]
|
assert result == ["gemma-4b"]
|
||||||
assert any("/models" in u for u in calls)
|
assert any("/api/v0/models" in u for u in calls)
|
||||||
|
|
||||||
|
|
||||||
|
def test_fetch_loaded_models_lmstudio_filters_unloaded(monkeypatch):
|
||||||
|
import pyra.setup.wizard as wiz
|
||||||
|
from pyra.setup.providers import get_provider
|
||||||
|
mock_resp = MagicMock()
|
||||||
|
mock_resp.json.return_value = {
|
||||||
|
"data": [
|
||||||
|
{"id": "model-a", "state": "not_loaded"},
|
||||||
|
{"id": "model-b", "state": "not_loaded"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
mock_resp.raise_for_status = lambda: None
|
||||||
|
monkeypatch.setattr(wiz.httpx, "get", lambda url, **kw: mock_resp)
|
||||||
|
assert wiz.fetch_loaded_models(get_provider("lmstudio")) == []
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_loaded_models_returns_empty_on_error(monkeypatch):
|
def test_fetch_loaded_models_returns_empty_on_error(monkeypatch):
|
||||||
|
|||||||
Reference in New Issue
Block a user