From 0b0cd07330e8226bd81cb09b6d4c8eb3f6ef44de Mon Sep 17 00:00:00 2001
From: curo1305 <curo1305@proton.me>
Date: Tue, 19 May 2026 00:03:14 +0200
Subject: [PATCH] fix(chat): fall back to streaming when provider rejects
 function calling

Local models (e.g. Gemma on LM Studio) return HTTP 400 when sent a
tools-spec request. Catch litellm.BadRequestError in the tool-use loop,
inform the user once that tools are disabled, and retry as a plain
streaming call so the conversation continues normally.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/pyra/chat/session.py | 42 +++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/src/pyra/chat/session.py b/src/pyra/chat/session.py
index aa24776..f378cab 100644
--- a/src/pyra/chat/session.py
+++ b/src/pyra/chat/session.py
@@ -282,25 +282,35 @@ def _call_ai(
         return render_streaming_response(stream)
 
     # Plugin tool-use loop (non-streaming for tool calls, renders final response)
-    for _iteration in range(10):
-        response = litellm.completion(
+    try:
+        for _iteration in range(10):
+            response = litellm.completion(
+                **base_kwargs,
+                messages=history.build_for_api(),
+                tools=tools_spec,
+                tool_choice="auto",
+                stream=False,
+            )
+            message = response.choices[0].message
+
+            if not message.tool_calls:
+                return render_text_response(message.content or "")
+
+            history.add_tool_call_message(message)
+            results = executor.execute_tool_call_batch(message.tool_calls)
+            for r in results:
+                history.add_tool_result(r["tool_call_id"], r["result"])
+
+        return render_text_response("Error: tool-use loop exceeded maximum iterations.")
+
+    except litellm.BadRequestError:
+        render_info("This model does not support function calling — tools disabled.")
+        stream = litellm.completion(
             **base_kwargs,
             messages=history.build_for_api(),
-            tools=tools_spec,
-            tool_choice="auto",
-            stream=False,
+            stream=True,
         )
-        message = response.choices[0].message
-
-        if not message.tool_calls:
-            return render_text_response(message.content or "")
-
-        history.add_tool_call_message(message)
-        results = executor.execute_tool_call_batch(message.tool_calls)
-        for r in results:
-            history.add_tool_result(r["tool_call_id"], r["result"])
-
-    return render_text_response("Error: tool-use loop exceeded maximum iterations.")
+        return render_streaming_response(stream)
 
 
 def _show_help(plugin_slash: dict) -> None: