Skip to main content

zeroclaw_providers/
ollama.rs

1use crate::multimodal;
2use crate::traits::{
3    ChatMessage, ChatResponse, ModelProvider, ProviderCapabilities, TokenUsage, ToolCall,
4    ToolsPayload,
5};
6use async_trait::async_trait;
7use reqwest::Client;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// Matches Ollama's upstream Modelfile default
12/// (<https://docs.ollama.com/modelfile>): "Increasing the temperature will
13/// make the model answer more creatively. (Default: 0.8)".
14const TEMPERATURE_DEFAULT: f64 = 0.8;
15/// Local inference is CPU/GPU-bound; give it more headroom than cloud calls.
16const TIMEOUT_SECS_DEFAULT: u64 = 600;
17/// Ollama's standard localhost endpoint. Overrideable via
18/// `model_providers.<name>.base-url` for remote GPU boxes or non-default ports.
19pub(crate) const BASE_URL: &str = "http://localhost:11434";
20
21/// Default `num_ctx` (context window, in tokens) sent in every Ollama
22/// `/api/chat` request when no operator override is supplied. Ollama's
23/// server-side default is 2048, which silently truncates prompts; we set
24/// 8192 so callers get useful context without per-call configuration.
25pub const OLLAMA_DEFAULT_NUM_CTX: u32 = 8192;
26
27/// Default `num_predict` (max output tokens) sent in every Ollama
28/// `/api/chat` request when no operator override is supplied. Ollama's
29/// server-side default is 128, which silently truncates responses.
30pub const OLLAMA_DEFAULT_NUM_PREDICT: i32 = 2048;
31
32/// Per-deployment tuning knobs for the Ollama provider. Bundled into
33/// every `/api/chat` request's `options` field so the wire payload is
34/// explicit instead of relying on Ollama server defaults.
35///
36/// Note: temperature is intentionally NOT held as a default here.
37/// `temperature_override` is `Some(v)` only when an operator explicitly
38/// sets `ollama_temperature_override` in `config.toml`; otherwise the
39/// per-call temperature passed through `ModelProvider::chat_with_system(..)`
40/// wins (preserving backward compatibility with `TEMPERATURE_DEFAULT`).
41#[derive(Debug, Clone, Copy, PartialEq)]
42pub struct OllamaTuning {
43    pub num_ctx: u32,
44    pub num_predict: i32,
45    /// Operator-supplied override for the per-call temperature passed
46    /// through `ModelProvider::chat_with_system(.., temperature)`. When
47    /// `Some(v)`, every Ollama `/api/chat` request uses `v` regardless
48    /// of the per-call argument — this is the wire knob behind the
49    /// `ollama_temperature_override` config field. When `None`, the
50    /// per-call temperature wins (full backward compatibility).
51    //
52    // Note: `Option<f64>` here, vs `Option<u32>`/`Option<i32>` on the
53    // runtime-override constructor's first two args, because temperature
54    // has fall-through semantics (None means "let the per-call temp win"),
55    // whereas num_ctx/num_predict unset just falls back to framework
56    // constants — there is no meaningful "let the call decide" mode for
57    // those two.
58    pub temperature_override: Option<f64>,
59}
60
61impl Default for OllamaTuning {
62    fn default() -> Self {
63        Self {
64            num_ctx: OLLAMA_DEFAULT_NUM_CTX,
65            num_predict: OLLAMA_DEFAULT_NUM_PREDICT,
66            temperature_override: None,
67        }
68    }
69}
70
71impl OllamaTuning {
72    /// Build a tuning struct from the three optional `ModelProviderRuntimeOptions`
73    /// fields the `ollama` factory arm consumes. Unset `num_ctx` /
74    /// `num_predict` fall back to framework constants; unset
75    /// `temperature_override` stays `None` so the per-call temperature wins.
76    #[must_use]
77    pub fn from_runtime_overrides(
78        num_ctx: Option<u32>,
79        num_predict: Option<i32>,
80        temperature_override: Option<f64>,
81    ) -> Self {
82        let defaults = Self::default();
83        Self {
84            num_ctx: num_ctx.unwrap_or(defaults.num_ctx),
85            num_predict: num_predict.unwrap_or(defaults.num_predict),
86            temperature_override,
87        }
88    }
89}
90
91pub struct OllamaModelProvider {
92    /// `[providers.models.ollama.<alias>]` config-key alias.
93    alias: String,
94    base_url: String,
95    api_key: Option<String>,
96    reasoning_enabled: Option<bool>,
97    tuning: OllamaTuning,
98}
99
100// ─── Request Structures ───────────────────────────────────────────────────────
101
102#[derive(Debug, Serialize)]
103struct ChatRequest {
104    model: String,
105    messages: Vec<Message>,
106    stream: bool,
107    options: Options,
108    #[serde(skip_serializing_if = "Option::is_none")]
109    think: Option<bool>,
110    #[serde(skip_serializing_if = "Option::is_none")]
111    tools: Option<Vec<serde_json::Value>>,
112}
113
114#[derive(Debug, Clone, Serialize)]
115struct Message {
116    role: String,
117    #[serde(skip_serializing_if = "Option::is_none")]
118    content: Option<String>,
119    #[serde(skip_serializing_if = "Option::is_none")]
120    images: Option<Vec<String>>,
121    #[serde(skip_serializing_if = "Option::is_none")]
122    tool_calls: Option<Vec<OutgoingToolCall>>,
123    #[serde(skip_serializing_if = "Option::is_none")]
124    tool_name: Option<String>,
125}
126
127#[derive(Debug, Clone, Serialize)]
128struct OutgoingToolCall {
129    #[serde(rename = "type")]
130    kind: String,
131    function: OutgoingFunction,
132}
133
134#[derive(Debug, Clone, Serialize)]
135struct OutgoingFunction {
136    name: String,
137    arguments: serde_json::Value,
138}
139
140#[derive(Debug, Serialize)]
141struct Options {
142    #[serde(skip_serializing_if = "Option::is_none")]
143    temperature: Option<f64>,
144    #[serde(skip_serializing_if = "Option::is_none")]
145    num_ctx: Option<u32>,
146    #[serde(skip_serializing_if = "Option::is_none")]
147    num_predict: Option<i32>,
148}
149
150// ─── Response Structures ──────────────────────────────────────────────────────
151
152#[derive(Debug, Deserialize)]
153struct ApiChatResponse {
154    message: ResponseMessage,
155    #[serde(default)]
156    prompt_eval_count: Option<u64>,
157    #[serde(default)]
158    eval_count: Option<u64>,
159}
160
161#[derive(Debug, Deserialize)]
162struct ResponseMessage {
163    #[serde(default)]
164    content: String,
165    #[serde(default)]
166    tool_calls: Vec<OllamaToolCall>,
167    /// Some models return a "thinking" field with internal reasoning
168    #[serde(default)]
169    thinking: Option<String>,
170}
171
172#[derive(Debug, Deserialize)]
173struct OllamaToolCall {
174    id: Option<String>,
175    function: OllamaFunction,
176}
177
178#[derive(Debug, Deserialize)]
179struct OllamaFunction {
180    name: String,
181    #[serde(default, deserialize_with = "deserialize_args")]
182    arguments: serde_json::Value,
183}
184
185// ─── serde Helpers ───────────────────────────────────────────────────────────
186fn deserialize_args<'de, D>(deserializer: D) -> Result<serde_json::Value, D::Error>
187where
188    D: serde::Deserializer<'de>,
189{
190    let value = serde_json::Value::deserialize(deserializer)?;
191
192    if let Some(s) = value.as_str() {
193        match serde_json::from_str::<serde_json::Value>(s) {
194            Ok(v) => Ok(v),
195            Err(_) => Ok(serde_json::json!({})),
196        }
197    } else {
198        Ok(value)
199    }
200}
201// ─── Implementation ───────────────────────────────────────────────────────────
202
203impl OllamaModelProvider {
204    fn normalize_base_url(raw_url: &str) -> String {
205        let trimmed = raw_url.trim().trim_end_matches('/');
206        if trimmed.is_empty() {
207            return String::new();
208        }
209
210        trimmed
211            .strip_suffix("/api/chat")
212            .or_else(|| trimmed.strip_suffix("/api"))
213            .unwrap_or(trimmed)
214            .trim_end_matches('/')
215            .to_string()
216    }
217
218    pub fn new(alias: &str, base_url: Option<&str>, api_key: Option<&str>) -> Self {
219        Self::new_with_reasoning(alias, base_url, api_key, None)
220    }
221
222    pub fn new_with_reasoning(
223        alias: &str,
224        base_url: Option<&str>,
225        api_key: Option<&str>,
226        reasoning_enabled: Option<bool>,
227    ) -> Self {
228        let api_key = api_key.and_then(|value| {
229            let trimmed = value.trim();
230            (!trimmed.is_empty()).then(|| trimmed.to_string())
231        });
232
233        Self {
234            alias: alias.to_string(),
235            base_url: Self::normalize_base_url(base_url.unwrap_or(BASE_URL)),
236            api_key,
237            reasoning_enabled,
238            tuning: OllamaTuning::default(),
239        }
240    }
241    /// Override the per-deployment tuning knobs (`num_ctx`, `num_predict`,
242    /// `temperature_override`) on this provider. Returns `self` for
243    /// chained construction.
244    #[must_use]
245    pub fn with_tuning(mut self, tuning: OllamaTuning) -> Self {
246        self.tuning = tuning;
247        self
248    }
249
250    #[cfg(test)]
251    pub(crate) fn tuning(&self) -> OllamaTuning {
252        self.tuning
253    }
254
255    fn is_local_endpoint(&self) -> bool {
256        reqwest::Url::parse(&self.base_url)
257            .ok()
258            .and_then(|url| url.host_str().map(|host| host.to_string()))
259            .is_some_and(|host| {
260                matches!(host.as_str(), "localhost" | "127.0.0.1" | "::1" | "0.0.0.0")
261            })
262    }
263
264    fn is_official_cloud_endpoint(&self) -> bool {
265        reqwest::Url::parse(&self.base_url)
266            .ok()
267            .and_then(|url| {
268                url.host_str().map(|host| {
269                    host.eq_ignore_ascii_case("ollama.com")
270                        || host.eq_ignore_ascii_case("api.ollama.com")
271                })
272            })
273            .unwrap_or(false)
274    }
275
276    fn http_client(&self) -> Client {
277        zeroclaw_config::schema::build_runtime_proxy_client_with_timeouts(
278            "model_provider.ollama",
279            300,
280            10,
281        )
282    }
283
284    fn resolve_request_details(&self, model: &str) -> anyhow::Result<(String, bool)> {
285        let requests_cloud = model.ends_with(":cloud");
286        let official_cloud_endpoint = self.is_official_cloud_endpoint();
287        let local_endpoint = self.is_local_endpoint();
288        let normalized_model = if requests_cloud && official_cloud_endpoint {
289            model.strip_suffix(":cloud").unwrap_or(model).to_string()
290        } else {
291            model.to_string()
292        };
293
294        if requests_cloud && local_endpoint {
295            anyhow::bail!(
296                "Model '{}' requested cloud routing, but Ollama endpoint is local. Configure api_url with a remote Ollama endpoint.",
297                model
298            );
299        }
300
301        if requests_cloud && official_cloud_endpoint && self.api_key.is_none() {
302            anyhow::bail!(
303                "Model '{}' requested cloud routing, but no API key is configured. Set api_key on [providers.models.ollama.<alias>] or via the schema-mirror grammar.",
304                model
305            );
306        }
307
308        let should_auth = self.api_key.is_some() && !local_endpoint;
309
310        Ok((normalized_model, should_auth))
311    }
312
313    fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
314        serde_json::from_str(arguments).unwrap_or_else(|_| serde_json::json!({}))
315    }
316
317    fn normalize_response_text(content: String) -> Option<String> {
318        let stripped = Self::strip_think_tags(&content);
319        if stripped.trim().is_empty() {
320            None
321        } else {
322            Some(stripped)
323        }
324    }
325
326    /// Remove `<think>...</think>` blocks from model output.
327    /// Qwen and other reasoning models may embed chain-of-thought inline
328    /// in the `content` field using `<think>` tags.  These must be stripped
329    /// before returning text to the user or parsing for tool calls.
330    fn strip_think_tags(s: &str) -> String {
331        let mut result = String::with_capacity(s.len());
332        let mut rest = s;
333        loop {
334            if let Some(start) = rest.find("<think>") {
335                result.push_str(&rest[..start]);
336                if let Some(end) = rest[start..].find("</think>") {
337                    rest = &rest[start + end + "</think>".len()..];
338                } else {
339                    // Unclosed tag: drop the rest to avoid leaking partial reasoning.
340                    break;
341                }
342            } else {
343                result.push_str(rest);
344                break;
345            }
346        }
347        result.trim().to_string()
348    }
349
350    /// Derive the effective text content from a response, stripping `<think>` tags
351    /// and falling back to the `thinking` field when `content` is empty after
352    /// stripping.  This ensures that tool-call XML tags embedded alongside (or
353    /// after) thinking blocks are preserved for downstream parsing.
354    fn effective_content(content: &str, thinking: Option<&str>) -> Option<String> {
355        // First try the content field with think tags stripped.
356        let stripped = Self::strip_think_tags(content);
357        if !stripped.trim().is_empty() {
358            return Some(stripped);
359        }
360
361        // Content was empty or only thinking — check the thinking field.
362        // Some models (Qwen) put the full output including tool-call XML in
363        // the thinking field when `think: true` is set.
364        if let Some(thinking) = thinking.map(str::trim).filter(|t| !t.is_empty()) {
365            let stripped_thinking = Self::strip_think_tags(thinking);
366            if !stripped_thinking.trim().is_empty() {
367                ::zeroclaw_log::record!(
368                    DEBUG,
369                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
370                    &format!(
371                        "Ollama: using thinking field as effective content ({} chars)",
372                        stripped_thinking.len()
373                    )
374                );
375                return Some(stripped_thinking);
376            }
377        }
378
379        None
380    }
381
382    fn fallback_text_for_empty_content(model: &str, thinking: Option<&str>) -> String {
383        if let Some(thinking) = thinking.map(str::trim).filter(|value| !value.is_empty()) {
384            let thinking_log_excerpt: String = thinking.chars().take(100).collect();
385            let thinking_reply_excerpt: String = thinking.chars().take(200).collect();
386            ::zeroclaw_log::record!(
387                WARN,
388                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
389                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
390                &format!(
391                    "Ollama returned empty content with only thinking for model '{}': '{}'. Model may have stopped prematurely.",
392                    model, thinking_log_excerpt
393                )
394            );
395            return format!(
396                "I was thinking about this: {}... but I didn't complete my response. Could you try asking again?",
397                thinking_reply_excerpt
398            );
399        }
400
401        ::zeroclaw_log::record!(
402            WARN,
403            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
404                .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
405            &format!(
406                "Ollama returned empty or whitespace content with no tool calls for model '{}'",
407                model
408            )
409        );
410        "I couldn't get a complete response from Ollama. Please try again or switch to a different model."
411            .to_string()
412    }
413
414    #[allow(dead_code)]
415    fn build_chat_request(
416        &self,
417        messages: Vec<Message>,
418        model: &str,
419        temperature: Option<f64>,
420        tools: Option<&[serde_json::Value]>,
421    ) -> ChatRequest {
422        self.build_chat_request_with_think(
423            messages,
424            model,
425            temperature,
426            tools,
427            self.reasoning_enabled,
428        )
429    }
430
431    /// Build a chat request with an explicit `think` value.
432    fn build_chat_request_with_think(
433        &self,
434        messages: Vec<Message>,
435        model: &str,
436        temperature: Option<f64>,
437        tools: Option<&[serde_json::Value]>,
438        think: Option<bool>,
439    ) -> ChatRequest {
440        ChatRequest {
441            model: model.to_string(),
442            messages,
443            stream: false,
444            options: Options {
445                temperature: self.tuning.temperature_override.or(temperature),
446                num_ctx: Some(self.tuning.num_ctx),
447                num_predict: Some(self.tuning.num_predict),
448            },
449            think,
450            tools: tools.map(|t| t.to_vec()),
451        }
452    }
453
454    fn convert_user_message_content(&self, content: &str) -> (Option<String>, Option<Vec<String>>) {
455        let (cleaned, image_refs) = multimodal::parse_image_markers(content);
456        if image_refs.is_empty() {
457            return (Some(content.to_string()), None);
458        }
459
460        let images: Vec<String> = image_refs
461            .iter()
462            .filter_map(|reference| multimodal::extract_ollama_image_payload(reference))
463            .collect();
464
465        if images.is_empty() {
466            return (Some(content.to_string()), None);
467        }
468
469        let cleaned = cleaned.trim();
470        let content = if cleaned.is_empty() {
471            None
472        } else {
473            Some(cleaned.to_string())
474        };
475
476        (content, Some(images))
477    }
478
479    /// Convert internal chat history format to Ollama's native tool-call message schema.
480    ///
481    /// `run_tool_call_loop` stores native assistant/tool entries as JSON strings in
482    /// `ChatMessage.content`. We decode those payloads here so follow-up requests send
483    /// structured `assistant.tool_calls` and `tool.tool_name`, as expected by Ollama.
484    fn convert_messages(&self, messages: &[ChatMessage]) -> Vec<Message> {
485        let mut tool_name_by_id: HashMap<String, String> = HashMap::new();
486
487        messages
488            .iter()
489            .map(|message| {
490                if message.role == "assistant"
491                    && let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
492                    && let Some(tool_calls_value) = value.get("tool_calls")
493                    && let Ok(parsed_calls) =
494                        serde_json::from_value::<Vec<ToolCall>>(tool_calls_value.clone())
495                {
496                    let outgoing_calls: Vec<OutgoingToolCall> = parsed_calls
497                        .into_iter()
498                        .map(|call| {
499                            tool_name_by_id.insert(call.id.clone(), call.name.clone());
500                            OutgoingToolCall {
501                                kind: "function".to_string(),
502                                function: OutgoingFunction {
503                                    name: call.name,
504                                    arguments: Self::parse_tool_arguments(&call.arguments),
505                                },
506                            }
507                        })
508                        .collect();
509                    let content = value
510                        .get("content")
511                        .and_then(serde_json::Value::as_str)
512                        .map(ToString::to_string);
513                    return Message {
514                        role: "assistant".to_string(),
515                        content,
516                        images: None,
517                        tool_calls: Some(outgoing_calls),
518                        tool_name: None,
519                    };
520                }
521
522                if message.role == "tool"
523                    && let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
524                {
525                    let tool_name = value
526                        .get("tool_name")
527                        .and_then(serde_json::Value::as_str)
528                        .map(ToString::to_string)
529                        .or_else(|| {
530                            value
531                                .get("tool_call_id")
532                                .and_then(serde_json::Value::as_str)
533                                .and_then(|id| tool_name_by_id.get(id))
534                                .cloned()
535                        });
536                    let content = value
537                        .get("content")
538                        .and_then(serde_json::Value::as_str)
539                        .map(ToString::to_string)
540                        .or_else(|| {
541                            (!message.content.trim().is_empty()).then_some(message.content.clone())
542                        });
543
544                    return Message {
545                        role: "tool".to_string(),
546                        content,
547                        images: None,
548                        tool_calls: None,
549                        tool_name,
550                    };
551                }
552
553                if message.role == "user" {
554                    let (content, images) = self.convert_user_message_content(&message.content);
555                    return Message {
556                        role: "user".to_string(),
557                        content,
558                        images,
559                        tool_calls: None,
560                        tool_name: None,
561                    };
562                }
563
564                Message {
565                    role: message.role.clone(),
566                    content: Some(message.content.clone()),
567                    images: None,
568                    tool_calls: None,
569                    tool_name: None,
570                }
571            })
572            .collect()
573    }
574
575    fn with_prompt_guided_tool_instructions(
576        &self,
577        messages: &[ChatMessage],
578        tools: Option<&[zeroclaw_api::tool::ToolSpec]>,
579    ) -> anyhow::Result<Vec<ChatMessage>> {
580        let Some(tools) = tools.filter(|items| !items.is_empty()) else {
581            return Ok(messages.to_vec());
582        };
583
584        let ToolsPayload::PromptGuided { instructions } = self.convert_tools(tools) else {
585            anyhow::bail!(
586                "Ollama returned non-prompt-guided tools payload while native tools are disabled"
587            );
588        };
589        let mut modified_messages = messages.to_vec();
590
591        if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") {
592            if !system_message.content.is_empty() {
593                system_message.content.push_str("\n\n");
594            }
595            system_message.content.push_str(&instructions);
596        } else {
597            modified_messages.insert(0, ChatMessage::system(instructions));
598        }
599
600        Ok(modified_messages)
601    }
602
603    fn response_to_chat_response(&self, response: ApiChatResponse, model: &str) -> ChatResponse {
604        let usage = if response.prompt_eval_count.is_some() || response.eval_count.is_some() {
605            Some(TokenUsage {
606                input_tokens: response.prompt_eval_count,
607                output_tokens: response.eval_count,
608                cached_input_tokens: None,
609            })
610        } else {
611            None
612        };
613
614        if !response.message.tool_calls.is_empty() {
615            let tool_calls: Vec<ToolCall> = response
616                .message
617                .tool_calls
618                .iter()
619                .map(|tc| {
620                    let (name, args) = self.extract_tool_name_and_args(tc);
621                    ToolCall {
622                        id: tc
623                            .id
624                            .clone()
625                            .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
626                        name,
627                        arguments: serde_json::to_string(&args)
628                            .unwrap_or_else(|_| "{}".to_string()),
629                        extra_content: None,
630                    }
631                })
632                .collect();
633            let text = Self::normalize_response_text(response.message.content);
634            return ChatResponse {
635                text,
636                tool_calls,
637                usage,
638                reasoning_content: None,
639            };
640        }
641
642        let text = Self::effective_content(
643            &response.message.content,
644            response.message.thinking.as_deref(),
645        )
646        .unwrap_or_else(|| {
647            Self::fallback_text_for_empty_content(model, response.message.thinking.as_deref())
648        });
649
650        ChatResponse {
651            text: Some(text),
652            tool_calls: vec![],
653            usage,
654            reasoning_content: None,
655        }
656    }
657
658    /// Send a single HTTP request to Ollama and parse the response.
659    async fn send_request_inner(
660        &self,
661        messages: &[Message],
662        model: &str,
663        temperature: Option<f64>,
664        should_auth: bool,
665        tools: Option<&[serde_json::Value]>,
666        think: Option<bool>,
667    ) -> anyhow::Result<ApiChatResponse> {
668        let request =
669            self.build_chat_request_with_think(messages.to_vec(), model, temperature, tools, think);
670
671        let url = format!("{}/api/chat", self.base_url);
672
673        ::zeroclaw_log::record!(
674            DEBUG,
675            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
676            &format!(
677                "Ollama request: url={} model={} message_count={} temperature={:?} think={:?} tool_count={}",
678                url,
679                model,
680                request.messages.len(),
681                temperature,
682                request.think,
683                request.tools.as_ref().map_or(0, |t| t.len())
684            )
685        );
686
687        let mut request_builder = self.http_client().post(&url).json(&request);
688
689        if should_auth && let Some(key) = self.api_key.as_ref() {
690            request_builder = request_builder.bearer_auth(key);
691        }
692
693        let response = request_builder.send().await?;
694        let status = response.status();
695        ::zeroclaw_log::record!(
696            DEBUG,
697            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
698            &format!("response status: {}", status)
699        );
700
701        let body = response.bytes().await?;
702        ::zeroclaw_log::record!(
703            DEBUG,
704            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
705            &format!("response body length: {} bytes", body.len())
706        );
707
708        if !status.is_success() {
709            let raw = String::from_utf8_lossy(&body);
710            let sanitized = super::sanitize_api_error(&raw);
711            ::zeroclaw_log::record!(
712                ERROR,
713                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
714                    .with_outcome(::zeroclaw_log::EventOutcome::Failure),
715                &format!(
716                    "Ollama error response: status={} body_excerpt={}",
717                    status, sanitized
718                )
719            );
720            anyhow::bail!(
721                "Ollama API error ({}): {}. Is Ollama running? (brew install ollama && ollama serve)",
722                status,
723                sanitized
724            );
725        }
726
727        let chat_response: ApiChatResponse = match serde_json::from_slice(&body) {
728            Ok(r) => r,
729            Err(e) => {
730                let raw = String::from_utf8_lossy(&body);
731                let sanitized = super::sanitize_api_error(&raw);
732                ::zeroclaw_log::record!(
733                    ERROR,
734                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
735                        .with_outcome(::zeroclaw_log::EventOutcome::Failure),
736                    &format!(
737                        "Ollama response deserialization failed: {e}. body_excerpt={}",
738                        sanitized
739                    )
740                );
741                anyhow::bail!("Failed to parse Ollama response: {e}");
742            }
743        };
744
745        Ok(chat_response)
746    }
747
748    /// Send a request to Ollama and get the parsed response.
749    /// Pass `tools` to enable native function-calling for models that support it.
750    ///
751    /// When `reasoning_enabled` (`think`) is set to `true`, the first request
752    /// includes `think: true`.  If that request fails (the model may not support
753    /// the `think` parameter), we automatically retry once with `think` omitted
754    /// so the call succeeds instead of entering an infinite retry loop.
755    async fn send_request(
756        &self,
757        messages: Vec<Message>,
758        model: &str,
759        temperature: Option<f64>,
760        should_auth: bool,
761        tools: Option<&[serde_json::Value]>,
762    ) -> anyhow::Result<ApiChatResponse> {
763        let result = self
764            .send_request_inner(
765                &messages,
766                model,
767                temperature,
768                should_auth,
769                tools,
770                self.reasoning_enabled,
771            )
772            .await;
773
774        match result {
775            Ok(resp) => Ok(resp),
776            Err(first_err) if self.reasoning_enabled == Some(true) => {
777                ::zeroclaw_log::record!(
778                    WARN,
779                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
780                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
781                        .with_attrs(
782                            ::serde_json::json!({"model": model, "error": format!("{}", first_err)})
783                        ),
784                    "Ollama request failed with think=true; retrying without reasoning \
785                     (model may not support it)"
786                );
787                // Retry with think omitted from the request entirely.
788                self.send_request_inner(&messages, model, temperature, should_auth, tools, None)
789                    .await
790                    .map_err(|retry_err| {
791                        // Both attempts failed — return the original error for clarity.
792                        ::zeroclaw_log::record!(ERROR, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail).with_outcome(::zeroclaw_log::EventOutcome::Failure).with_attrs(::serde_json::json!({"model": model, "original_error": first_err.to_string(), "retry_error": retry_err.to_string()})), "Ollama request also failed without think; returning original error");
793                        first_err
794                    })
795            }
796            Err(e) => Err(e),
797        }
798    }
799
800    /// Convert Ollama tool calls to the JSON format expected by parse_tool_calls in loop_.rs
801    ///
802    /// Handles quirky model behavior where tool calls are wrapped:
803    /// - `{"name": "tool_call", "arguments": {"name": "shell", "arguments": {...}}}`
804    /// - `{"name": "tool.shell", "arguments": {...}}`
805    fn format_tool_calls_for_loop(&self, tool_calls: &[OllamaToolCall]) -> String {
806        let formatted_calls: Vec<serde_json::Value> = tool_calls
807            .iter()
808            .map(|tc| {
809                let (tool_name, tool_args) = self.extract_tool_name_and_args(tc);
810
811                // Arguments must be a JSON string for parse_tool_calls compatibility
812                let args_str =
813                    serde_json::to_string(&tool_args).unwrap_or_else(|_| "{}".to_string());
814
815                serde_json::json!({
816                    "id": tc.id,
817                    "type": "function",
818                    "function": {
819                        "name": tool_name,
820                        "arguments": args_str
821                    }
822                })
823            })
824            .collect();
825
826        serde_json::json!({
827            "content": "",
828            "tool_calls": formatted_calls
829        })
830        .to_string()
831    }
832
833    /// Extract the actual tool name and arguments from potentially nested structures
834    fn extract_tool_name_and_args(&self, tc: &OllamaToolCall) -> (String, serde_json::Value) {
835        let name = &tc.function.name;
836        let args = &tc.function.arguments;
837
838        // Pattern 1: Nested tool_call wrapper (various malformed versions)
839        // {"name": "tool_call", "arguments": {"name": "shell", "arguments": {"command": "date"}}}
840        // {"name": "tool_call><json", "arguments": {"name": "shell", ...}}
841        // {"name": "tool.call", "arguments": {"name": "shell", ...}}
842        if (name == "tool_call"
843            || name == "tool.call"
844            || name.starts_with("tool_call>")
845            || name.starts_with("tool_call<"))
846            && let Some(nested_name) = args.get("name").and_then(|v| v.as_str())
847        {
848            let nested_args = args
849                .get("arguments")
850                .cloned()
851                .unwrap_or(serde_json::json!({}));
852            ::zeroclaw_log::record!(
853                DEBUG,
854                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
855                &format!(
856                    "Unwrapped nested tool call: {} -> {} with args {:?}",
857                    name, nested_name, nested_args
858                )
859            );
860            return (nested_name.to_string(), nested_args);
861        }
862
863        // Pattern 2: Prefixed tool name (tool.shell, tool.file_read, etc.)
864        if let Some(stripped) = name.strip_prefix("tool.") {
865            return (stripped.to_string(), args.clone());
866        }
867
868        // Pattern 3: Normal tool call
869        (name.clone(), args.clone())
870    }
871}
872
873#[async_trait]
874impl ModelProvider for OllamaModelProvider {
875    // ── ModelProvider-family defaults ──
876    fn default_temperature(&self) -> f64 {
877        TEMPERATURE_DEFAULT
878    }
879
880    fn default_timeout_secs(&self) -> u64 {
881        TIMEOUT_SECS_DEFAULT
882    }
883
884    fn default_base_url(&self) -> Option<&str> {
885        Some(BASE_URL)
886    }
887
888    fn capabilities(&self) -> ProviderCapabilities {
889        ProviderCapabilities {
890            native_tool_calling: false,
891            vision: true,
892            prompt_caching: false,
893            extended_thinking: false,
894        }
895    }
896
897    async fn chat_with_system(
898        &self,
899        system_prompt: Option<&str>,
900        message: &str,
901        model: &str,
902        temperature: Option<f64>,
903    ) -> anyhow::Result<String> {
904        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
905
906        let mut messages = Vec::new();
907
908        if let Some(sys) = system_prompt {
909            messages.push(Message {
910                role: "system".to_string(),
911                content: Some(sys.to_string()),
912                images: None,
913                tool_calls: None,
914                tool_name: None,
915            });
916        }
917
918        let (user_content, user_images) = self.convert_user_message_content(message);
919        messages.push(Message {
920            role: "user".to_string(),
921            content: user_content,
922            images: user_images,
923            tool_calls: None,
924            tool_name: None,
925        });
926
927        let response = self
928            .send_request(messages, &normalized_model, temperature, should_auth, None)
929            .await?;
930
931        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
932        if !response.message.tool_calls.is_empty() {
933            ::zeroclaw_log::record!(
934                DEBUG,
935                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
936                &format!(
937                    "Ollama returned {} tool call(s), formatting for loop parser",
938                    response.message.tool_calls.len()
939                )
940            );
941            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
942        }
943
944        // Plain text response — strip <think> tags and fall back to thinking field.
945        if let Some(content) = Self::effective_content(
946            &response.message.content,
947            response.message.thinking.as_deref(),
948        ) {
949            return Ok(content);
950        }
951
952        Ok(Self::fallback_text_for_empty_content(
953            &normalized_model,
954            response.message.thinking.as_deref(),
955        ))
956    }
957
958    async fn chat_with_history(
959        &self,
960        messages: &[crate::traits::ChatMessage],
961        model: &str,
962        temperature: Option<f64>,
963    ) -> anyhow::Result<String> {
964        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
965
966        let api_messages = self.convert_messages(messages);
967
968        let response = self
969            .send_request(
970                api_messages,
971                &normalized_model,
972                temperature,
973                should_auth,
974                None,
975            )
976            .await?;
977
978        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
979        if !response.message.tool_calls.is_empty() {
980            ::zeroclaw_log::record!(
981                DEBUG,
982                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
983                &format!(
984                    "Ollama returned {} tool call(s), formatting for loop parser",
985                    response.message.tool_calls.len()
986                )
987            );
988            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
989        }
990
991        // Plain text response — strip <think> tags and fall back to thinking field.
992        if let Some(content) = Self::effective_content(
993            &response.message.content,
994            response.message.thinking.as_deref(),
995        ) {
996            return Ok(content);
997        }
998
999        Ok(Self::fallback_text_for_empty_content(
1000            &normalized_model,
1001            response.message.thinking.as_deref(),
1002        ))
1003    }
1004
1005    async fn chat_with_tools(
1006        &self,
1007        messages: &[ChatMessage],
1008        tools: &[serde_json::Value],
1009        model: &str,
1010        temperature: Option<f64>,
1011    ) -> anyhow::Result<ChatResponse> {
1012        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
1013
1014        let api_messages = self.convert_messages(messages);
1015
1016        // Tools arrive pre-formatted in OpenAI/Ollama-compatible JSON from
1017        // tools_to_openai_format() in loop_.rs — pass them through directly.
1018        let tools_opt = if tools.is_empty() { None } else { Some(tools) };
1019
1020        let response = self
1021            .send_request(
1022                api_messages,
1023                &normalized_model,
1024                temperature,
1025                should_auth,
1026                tools_opt,
1027            )
1028            .await?;
1029
1030        Ok(self.response_to_chat_response(response, &normalized_model))
1031    }
1032
1033    fn supports_native_tools(&self) -> bool {
1034        // Default to prompt-guided tool calling (XML instructions in system prompt)
1035        // because many Ollama-served models do not support Ollama's native
1036        // /api/chat tool-calling parameter. Models that lack support silently
1037        // ignore the tools array and emit tool-call JSON as plain text, which the
1038        // agent loop cannot parse without the XML protocol instructions.
1039        // See: https://github.com/zeroclaw-labs/zeroclaw/issues/3999
1040        false
1041    }
1042
1043    async fn chat(
1044        &self,
1045        request: zeroclaw_api::model_provider::ChatRequest<'_>,
1046        model: &str,
1047        temperature: Option<f64>,
1048    ) -> anyhow::Result<ChatResponse> {
1049        let temperature = temperature.unwrap_or(self.default_temperature());
1050        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
1051        let messages =
1052            self.with_prompt_guided_tool_instructions(request.messages, request.tools)?;
1053        let api_messages = self.convert_messages(&messages);
1054        let response = self
1055            .send_request(
1056                api_messages,
1057                &normalized_model,
1058                Some(temperature),
1059                should_auth,
1060                None,
1061            )
1062            .await?;
1063
1064        Ok(self.response_to_chat_response(response, &normalized_model))
1065    }
1066
1067    async fn list_models(&self) -> anyhow::Result<Vec<String>> {
1068        // Local Ollama's /api/tags lists installed models and requires no auth.
1069        // Remote Ollama endpoints attach the Bearer key; local ones don't.
1070        let url = format!("{}/api/tags", self.base_url.trim_end_matches('/'));
1071        let mut request = self.http_client().get(&url);
1072        if !self.is_local_endpoint()
1073            && let Some(key) = self.api_key.as_deref()
1074        {
1075            request = request.header("Authorization", format!("Bearer {key}"));
1076        }
1077        let response = request.send().await?.error_for_status()?;
1078
1079        #[derive(Deserialize)]
1080        struct Resp {
1081            models: Vec<Entry>,
1082        }
1083        #[derive(Deserialize)]
1084        struct Entry {
1085            name: String,
1086        }
1087
1088        let body: Resp = response.json().await?;
1089        Ok(body.models.into_iter().map(|e| e.name).collect())
1090    }
1091}
1092
1093// ─── Tests ────────────────────────────────────────────────────────────────────
1094
1095impl ::zeroclaw_api::attribution::Attributable for OllamaModelProvider {
1096    fn role(&self) -> ::zeroclaw_api::attribution::Role {
1097        ::zeroclaw_api::attribution::Role::Provider(
1098            ::zeroclaw_api::attribution::ProviderKind::Model(
1099                ::zeroclaw_api::attribution::ModelProviderKind::Ollama,
1100            ),
1101        )
1102    }
1103    fn alias(&self) -> &str {
1104        &self.alias
1105    }
1106}
1107
1108#[cfg(test)]
1109mod tests {
1110    use super::*;
1111    use std::sync::{Arc, Mutex};
1112
1113    #[test]
1114    fn default_url() {
1115        let p = OllamaModelProvider::new("test", None, None);
1116        assert_eq!(p.base_url, "http://localhost:11434");
1117    }
1118
1119    #[test]
1120    fn custom_url_trailing_slash() {
1121        let p = OllamaModelProvider::new("test", Some("http://192.168.1.100:11434/"), None);
1122        assert_eq!(p.base_url, "http://192.168.1.100:11434");
1123    }
1124
1125    #[test]
1126    fn custom_url_no_trailing_slash() {
1127        let p = OllamaModelProvider::new("test", Some("http://myserver:11434"), None);
1128        assert_eq!(p.base_url, "http://myserver:11434");
1129    }
1130
1131    #[test]
1132    fn custom_url_strips_api_suffix() {
1133        let p = OllamaModelProvider::new("test", Some("https://ollama.com/api/"), None);
1134        assert_eq!(p.base_url, "https://ollama.com");
1135    }
1136
1137    #[test]
1138    fn custom_url_strips_api_chat_suffix() {
1139        let p = OllamaModelProvider::new("test", Some("http://172.30.30.50:11434/api/chat"), None);
1140        assert_eq!(p.base_url, "http://172.30.30.50:11434");
1141    }
1142
1143    #[test]
1144    fn empty_url_uses_empty() {
1145        let p = OllamaModelProvider::new("test", Some(""), None);
1146        assert_eq!(p.base_url, "");
1147    }
1148
1149    #[test]
1150    fn cloud_suffix_strips_model_name() {
1151        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), Some("ollama-key"));
1152        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1153        assert_eq!(model, "qwen3");
1154        assert!(should_auth);
1155    }
1156
1157    #[test]
1158    fn cloud_suffix_with_local_endpoint_errors() {
1159        let p = OllamaModelProvider::new("test", None, Some("ollama-key"));
1160        let error = p
1161            .resolve_request_details("qwen3:cloud")
1162            .expect_err("cloud suffix should fail on local endpoint");
1163        assert!(
1164            error
1165                .to_string()
1166                .contains("requested cloud routing, but Ollama endpoint is local")
1167        );
1168    }
1169
1170    #[test]
1171    fn cloud_suffix_with_unspecified_local_endpoint_errors() {
1172        let p = OllamaModelProvider::new("test", Some("http://0.0.0.0:11434"), Some("ollama-key"));
1173        let error = p
1174            .resolve_request_details("qwen3:cloud")
1175            .expect_err("cloud suffix should fail on unspecified local endpoint");
1176        assert!(
1177            error
1178                .to_string()
1179                .contains("requested cloud routing, but Ollama endpoint is local")
1180        );
1181    }
1182
1183    #[test]
1184    fn cloud_suffix_without_api_key_errors() {
1185        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), None);
1186        let error = p
1187            .resolve_request_details("qwen3:cloud")
1188            .expect_err("cloud suffix should require API key");
1189        assert!(
1190            error
1191                .to_string()
1192                .contains("Set api_key on [providers.models.ollama.<alias>]")
1193        );
1194    }
1195
1196    #[test]
1197    fn cloud_suffix_preserved_for_private_remote_without_api_key() {
1198        let p = OllamaModelProvider::new("test", Some("http://192.168.1.100:11434"), None);
1199        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1200        assert_eq!(model, "qwen3:cloud");
1201        assert!(!should_auth);
1202    }
1203
1204    #[test]
1205    fn cloud_suffix_preserved_for_private_remote_with_api_key() {
1206        let p = OllamaModelProvider::new(
1207            "test",
1208            Some("https://private-ollama.example.com"),
1209            Some("ollama-key"),
1210        );
1211        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1212        assert_eq!(model, "qwen3:cloud");
1213        assert!(should_auth);
1214    }
1215
1216    #[test]
1217    fn remote_endpoint_auth_enabled_when_key_present() {
1218        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), Some("ollama-key"));
1219        let (_model, should_auth) = p.resolve_request_details("qwen3").unwrap();
1220        assert!(should_auth);
1221    }
1222
1223    #[test]
1224    fn remote_endpoint_with_api_suffix_still_allows_cloud_models() {
1225        let p =
1226            OllamaModelProvider::new("test", Some("https://ollama.com/api"), Some("ollama-key"));
1227        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1228        assert_eq!(model, "qwen3");
1229        assert!(should_auth);
1230    }
1231
1232    #[test]
1233    fn local_endpoint_auth_disabled_even_with_key() {
1234        let p = OllamaModelProvider::new("test", None, Some("ollama-key"));
1235        let (_model, should_auth) = p.resolve_request_details("llama3").unwrap();
1236        assert!(!should_auth);
1237    }
1238
1239    #[tokio::test]
1240    async fn chat_with_tool_specs_omits_native_tools_payload() {
1241        use axum::{Json, Router, extract::State, routing::post};
1242        use tokio::net::TcpListener;
1243        use zeroclaw_api::model_provider::ChatRequest;
1244        use zeroclaw_api::tool::ToolSpec;
1245
1246        type CapturedBody = Arc<Mutex<Option<serde_json::Value>>>;
1247
1248        async fn capture_request(
1249            State(captured): State<CapturedBody>,
1250            Json(body): Json<serde_json::Value>,
1251        ) -> Json<serde_json::Value> {
1252            *captured.lock().expect("capture mutex poisoned") = Some(body);
1253            Json(serde_json::json!({
1254                "message": {
1255                    "role": "assistant",
1256                    "content": "done"
1257                },
1258                "prompt_eval_count": 10,
1259                "eval_count": 3
1260            }))
1261        }
1262
1263        let captured: CapturedBody = Arc::new(Mutex::new(None));
1264        let app = Router::new()
1265            .route("/api/chat", post(capture_request))
1266            .with_state(captured.clone());
1267        let listener = TcpListener::bind("127.0.0.1:0")
1268            .await
1269            .expect("test listener should bind");
1270        let addr = listener.local_addr().expect("listener should have address");
1271        let server = zeroclaw_spawn::spawn!(async move {
1272            axum::serve(listener, app)
1273                .await
1274                .expect("test server should run");
1275        });
1276
1277        let provider = OllamaModelProvider::new("test", Some(&format!("http://{addr}")), None);
1278        let messages = vec![
1279            ChatMessage::system("You are helpful."),
1280            ChatMessage::user("read a file"),
1281        ];
1282        let tools = vec![ToolSpec {
1283            name: "file_read".to_string(),
1284            description: "Read a file".to_string(),
1285            parameters: serde_json::json!({
1286                "type": "object",
1287                "properties": {
1288                    "path": {"type": "string"}
1289                },
1290                "required": ["path"]
1291            }),
1292        }];
1293
1294        let response = provider
1295            .chat(
1296                ChatRequest {
1297                    messages: &messages,
1298                    tools: Some(&tools),
1299                    thinking: None,
1300                },
1301                "llama3",
1302                Some(0.2),
1303            )
1304            .await
1305            .expect("ollama chat request should succeed");
1306
1307        server.abort();
1308
1309        assert_eq!(response.text.as_deref(), Some("done"));
1310        assert_eq!(
1311            response.usage.as_ref().and_then(|usage| usage.input_tokens),
1312            Some(10)
1313        );
1314        assert_eq!(
1315            response
1316                .usage
1317                .as_ref()
1318                .and_then(|usage| usage.output_tokens),
1319            Some(3)
1320        );
1321        let body = captured
1322            .lock()
1323            .expect("capture mutex poisoned")
1324            .take()
1325            .expect("request body should be captured");
1326        assert!(
1327            body.get("tools").is_none(),
1328            "Ollama chat() must not serialize native tools while supports_native_tools() is false: {body}"
1329        );
1330        let request_messages = body
1331            .get("messages")
1332            .and_then(serde_json::Value::as_array)
1333            .expect("request messages should be serialized");
1334        assert!(
1335            request_messages.iter().any(|message| {
1336                message.get("role").and_then(serde_json::Value::as_str) == Some("system")
1337                    && message
1338                        .get("content")
1339                        .and_then(serde_json::Value::as_str)
1340                        .is_some_and(|content| {
1341                            content.contains("## Tool Use Protocol")
1342                                && content.contains("file_read")
1343                                && content.contains("\"path\"")
1344                        })
1345            }),
1346            "prompt-guided tool instructions should be generated from ToolSpec: {body}"
1347        );
1348    }
1349
1350    #[test]
1351    fn request_omits_think_when_reasoning_not_configured() {
1352        let model_provider = OllamaModelProvider::new("test", None, None);
1353        let request = model_provider.build_chat_request(
1354            vec![Message {
1355                role: "user".to_string(),
1356                content: Some("hello".to_string()),
1357                images: None,
1358                tool_calls: None,
1359                tool_name: None,
1360            }],
1361            "llama3",
1362            Some(0.7),
1363            None,
1364        );
1365
1366        let json = serde_json::to_value(request).unwrap();
1367        assert!(json.get("think").is_none());
1368        let options = json.get("options").expect("options present");
1369        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1370        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1371    }
1372
1373    #[test]
1374    fn request_includes_think_when_reasoning_configured() {
1375        let model_provider =
1376            OllamaModelProvider::new_with_reasoning("test", None, None, Some(false));
1377        let request = model_provider.build_chat_request(
1378            vec![Message {
1379                role: "user".to_string(),
1380                content: Some("hello".to_string()),
1381                images: None,
1382                tool_calls: None,
1383                tool_name: None,
1384            }],
1385            "llama3",
1386            Some(0.7),
1387            None,
1388        );
1389
1390        let json = serde_json::to_value(request).unwrap();
1391        assert_eq!(json.get("think"), Some(&serde_json::json!(false)));
1392        let options = json.get("options").expect("options present");
1393        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1394        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1395    }
1396
1397    #[test]
1398    fn request_includes_default_num_ctx_and_num_predict() {
1399        let provider = OllamaModelProvider::new("test", None, None);
1400        let request = provider.build_chat_request(
1401            vec![Message {
1402                role: "user".to_string(),
1403                content: Some("hello".to_string()),
1404                images: None,
1405                tool_calls: None,
1406                tool_name: None,
1407            }],
1408            "llama3",
1409            Some(0.2),
1410            None,
1411        );
1412
1413        let json = serde_json::to_value(request).unwrap();
1414        let options = json.get("options").expect("options present");
1415        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.2)));
1416        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1417        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1418    }
1419
1420    #[test]
1421    fn build_chat_request_with_think_emits_explicit_options() {
1422        // Wire-shape snapshot: when temperature is Some, the JSON body of
1423        // every Ollama /api/chat request must carry an `options` object
1424        // with `num_ctx` and `num_predict`, and a `temperature` matching
1425        // the value passed. None must omit the temperature key entirely.
1426        let provider = OllamaModelProvider::new("test", None, None);
1427        let request = provider.build_chat_request_with_think(
1428            vec![Message {
1429                role: "user".to_string(),
1430                content: Some("hello".to_string()),
1431                images: None,
1432                tool_calls: None,
1433                tool_name: None,
1434            }],
1435            "llama3",
1436            Some(0.3),
1437            None,
1438            Some(true),
1439        );
1440
1441        let json = serde_json::to_value(request).unwrap();
1442        let options = json
1443            .get("options")
1444            .expect("options object missing from request body");
1445
1446        assert_eq!(
1447            options.get("temperature"),
1448            Some(&serde_json::json!(0.3)),
1449            "options.temperature must match the value passed in"
1450        );
1451        assert!(
1452            options.get("num_ctx").is_some(),
1453            "options.num_ctx must be present on every wire request"
1454        );
1455        assert!(
1456            options.get("num_predict").is_some(),
1457            "options.num_predict must be present on every wire request"
1458        );
1459
1460        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.3)));
1461        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1462        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1463    }
1464
1465    #[test]
1466    fn request_includes_overridden_tuning() {
1467        let provider = OllamaModelProvider::new("test", None, None).with_tuning(OllamaTuning {
1468            num_ctx: 4096,
1469            num_predict: 1024,
1470            temperature_override: None,
1471        });
1472        let request = provider.build_chat_request(
1473            vec![Message {
1474                role: "user".to_string(),
1475                content: Some("hello".to_string()),
1476                images: None,
1477                tool_calls: None,
1478                tool_name: None,
1479            }],
1480            "llama3",
1481            Some(0.5),
1482            None,
1483        );
1484
1485        let json = serde_json::to_value(request).unwrap();
1486        let options = json.get("options").expect("options present");
1487        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(4096)));
1488        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(1024)));
1489    }
1490
1491    #[test]
1492    fn temperature_override_replaces_per_call_temperature() {
1493        let provider = OllamaModelProvider::new("test", None, None).with_tuning(OllamaTuning {
1494            num_ctx: 8192,
1495            num_predict: 2048,
1496            temperature_override: Some(0.1),
1497        });
1498        let request = provider.build_chat_request(
1499            vec![Message {
1500                role: "user".to_string(),
1501                content: Some("hello".to_string()),
1502                images: None,
1503                tool_calls: None,
1504                tool_name: None,
1505            }],
1506            "llama3",
1507            Some(0.9),
1508            None,
1509        );
1510
1511        let json = serde_json::to_value(request).unwrap();
1512        let options = json.get("options").expect("options present");
1513        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.1)));
1514    }
1515
1516    #[test]
1517    fn temperature_override_unset_passes_per_call_temperature() {
1518        let provider = OllamaModelProvider::new("test", None, None);
1519        let request = provider.build_chat_request(
1520            vec![Message {
1521                role: "user".to_string(),
1522                content: Some("hello".to_string()),
1523                images: None,
1524                tool_calls: None,
1525                tool_name: None,
1526            }],
1527            "llama3",
1528            Some(0.42),
1529            None,
1530        );
1531
1532        let json = serde_json::to_value(request).unwrap();
1533        let options = json.get("options").expect("options present");
1534        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.42)));
1535    }
1536
1537    #[test]
1538    fn retry_path_carries_options() {
1539        // The think=true → retry-without-think path in `send_request` uses the
1540        // same `build_chat_request_with_think` builder for both attempts; verify
1541        // the builder produces identical option fields when only `think` differs.
1542        let provider = OllamaModelProvider::new_with_reasoning("test", None, None, Some(true))
1543            .with_tuning(OllamaTuning {
1544                num_ctx: 16384,
1545                num_predict: 4096,
1546                temperature_override: None,
1547            });
1548
1549        let messages = vec![Message {
1550            role: "user".to_string(),
1551            content: Some("hello".to_string()),
1552            images: None,
1553            tool_calls: None,
1554            tool_name: None,
1555        }];
1556
1557        let first = provider.build_chat_request_with_think(
1558            messages.clone(),
1559            "llama3",
1560            Some(0.4),
1561            None,
1562            Some(true),
1563        );
1564        let retry =
1565            provider.build_chat_request_with_think(messages, "llama3", Some(0.4), None, None);
1566
1567        let first_json = serde_json::to_value(first).unwrap();
1568        let retry_json = serde_json::to_value(retry).unwrap();
1569        assert_eq!(
1570            first_json.get("options"),
1571            retry_json.get("options"),
1572            "retry must carry the same options as the first attempt"
1573        );
1574        assert_eq!(first_json.get("think"), Some(&serde_json::json!(true)));
1575        assert!(retry_json.get("think").is_none());
1576        let options = first_json.get("options").unwrap();
1577        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(16384)));
1578        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(4096)));
1579    }
1580
1581    #[test]
1582    fn response_deserializes() {
1583        let json = r#"{"message":{"role":"assistant","content":"Hello from Ollama!"}}"#;
1584        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1585        assert_eq!(resp.message.content, "Hello from Ollama!");
1586    }
1587
1588    #[test]
1589    fn response_with_empty_content() {
1590        let json = r#"{"message":{"role":"assistant","content":""}}"#;
1591        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1592        assert!(resp.message.content.is_empty());
1593    }
1594
1595    #[test]
1596    fn normalize_response_text_rejects_whitespace_only_content() {
1597        assert_eq!(
1598            OllamaModelProvider::normalize_response_text("\n \t".to_string()),
1599            None
1600        );
1601        assert_eq!(
1602            OllamaModelProvider::normalize_response_text(" hello ".to_string()),
1603            Some("hello".to_string())
1604        );
1605    }
1606
1607    #[test]
1608    fn normalize_response_text_strips_think_tags() {
1609        assert_eq!(
1610            OllamaModelProvider::normalize_response_text(
1611                "<think>reasoning</think> hello".to_string()
1612            ),
1613            Some("hello".to_string())
1614        );
1615    }
1616
1617    #[test]
1618    fn normalize_response_text_rejects_think_only_content() {
1619        assert_eq!(
1620            OllamaModelProvider::normalize_response_text(
1621                "<think>only thinking here</think>".to_string()
1622            ),
1623            None
1624        );
1625    }
1626
1627    #[test]
1628    fn fallback_text_for_empty_content_without_thinking_is_generic() {
1629        let text = OllamaModelProvider::fallback_text_for_empty_content("qwen3-coder", None);
1630        assert!(text.contains("couldn't get a complete response from Ollama"));
1631    }
1632
1633    #[test]
1634    fn response_with_missing_content_defaults_to_empty() {
1635        let json = r#"{"message":{"role":"assistant"}}"#;
1636        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1637        assert!(resp.message.content.is_empty());
1638    }
1639
1640    #[test]
1641    fn response_with_thinking_field_extracts_content() {
1642        let json =
1643            r#"{"message":{"role":"assistant","content":"hello","thinking":"internal reasoning"}}"#;
1644        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1645        assert_eq!(resp.message.content, "hello");
1646    }
1647
1648    #[test]
1649    fn response_with_tool_calls_parses_correctly() {
1650        let json = r#"{"message":{"role":"assistant","content":"","tool_calls":[{"id":"call_123","function":{"name":"shell","arguments":{"command":"date"}}}]}}"#;
1651        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1652        assert!(resp.message.content.is_empty());
1653        assert_eq!(resp.message.tool_calls.len(), 1);
1654        assert_eq!(resp.message.tool_calls[0].function.name, "shell");
1655    }
1656
1657    #[test]
1658    fn extract_tool_name_handles_nested_tool_call() {
1659        let model_provider = OllamaModelProvider::new("test", None, None);
1660        let tc = OllamaToolCall {
1661            id: Some("call_123".into()),
1662            function: OllamaFunction {
1663                name: "tool_call".into(),
1664                arguments: serde_json::json!({
1665                    "name": "shell",
1666                    "arguments": {"command": "date"}
1667                }),
1668            },
1669        };
1670        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1671        assert_eq!(name, "shell");
1672        assert_eq!(args.get("command").unwrap(), "date");
1673    }
1674
1675    #[test]
1676    fn extract_tool_name_handles_prefixed_name() {
1677        let model_provider = OllamaModelProvider::new("test", None, None);
1678        let tc = OllamaToolCall {
1679            id: Some("call_123".into()),
1680            function: OllamaFunction {
1681                name: "tool.shell".into(),
1682                arguments: serde_json::json!({"command": "ls"}),
1683            },
1684        };
1685        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1686        assert_eq!(name, "shell");
1687        assert_eq!(args.get("command").unwrap(), "ls");
1688    }
1689
1690    #[test]
1691    fn extract_tool_name_handles_normal_call() {
1692        let model_provider = OllamaModelProvider::new("test", None, None);
1693        let tc = OllamaToolCall {
1694            id: Some("call_123".into()),
1695            function: OllamaFunction {
1696                name: "file_read".into(),
1697                arguments: serde_json::json!({"path": "/tmp/test"}),
1698            },
1699        };
1700        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1701        assert_eq!(name, "file_read");
1702        assert_eq!(args.get("path").unwrap(), "/tmp/test");
1703    }
1704
1705    #[test]
1706    fn format_tool_calls_produces_valid_json() {
1707        let model_provider = OllamaModelProvider::new("test", None, None);
1708        let tool_calls = vec![OllamaToolCall {
1709            id: Some("call_abc".into()),
1710            function: OllamaFunction {
1711                name: "shell".into(),
1712                arguments: serde_json::json!({"command": "date"}),
1713            },
1714        }];
1715
1716        let formatted = model_provider.format_tool_calls_for_loop(&tool_calls);
1717        let parsed: serde_json::Value = serde_json::from_str(&formatted).unwrap();
1718
1719        assert!(parsed.get("tool_calls").is_some());
1720        let calls = parsed.get("tool_calls").unwrap().as_array().unwrap();
1721        assert_eq!(calls.len(), 1);
1722
1723        let func = calls[0].get("function").unwrap();
1724        assert_eq!(func.get("name").unwrap(), "shell");
1725        // arguments should be a string (JSON-encoded)
1726        assert!(func.get("arguments").unwrap().is_string());
1727    }
1728
1729    #[test]
1730    fn convert_messages_parses_native_assistant_tool_calls() {
1731        let model_provider = OllamaModelProvider::new("test", None, None);
1732        let messages = vec![ChatMessage {
1733            role: "assistant".into(),
1734            content: r#"{"content":null,"tool_calls":[{"id":"call_1","name":"shell","arguments":"{\"command\":\"ls\"}"}]}"#.into(),
1735        }];
1736
1737        let converted = model_provider.convert_messages(&messages);
1738
1739        assert_eq!(converted.len(), 1);
1740        assert_eq!(converted[0].role, "assistant");
1741        assert!(converted[0].content.is_none());
1742        let calls = converted[0]
1743            .tool_calls
1744            .as_ref()
1745            .expect("tool calls expected");
1746        assert_eq!(calls.len(), 1);
1747        assert_eq!(calls[0].kind, "function");
1748        assert_eq!(calls[0].function.name, "shell");
1749        assert_eq!(calls[0].function.arguments.get("command").unwrap(), "ls");
1750    }
1751
1752    #[test]
1753    fn convert_messages_maps_tool_result_call_id_to_tool_name() {
1754        let model_provider = OllamaModelProvider::new("test", None, None);
1755        let messages = vec![
1756            ChatMessage {
1757                role: "assistant".into(),
1758                content: r#"{"content":null,"tool_calls":[{"id":"call_7","name":"file_read","arguments":"{\"path\":\"README.md\"}"}]}"#.into(),
1759            },
1760            ChatMessage {
1761                role: "tool".into(),
1762                content: r#"{"tool_call_id":"call_7","content":"ok"}"#.into(),
1763            },
1764        ];
1765
1766        let converted = model_provider.convert_messages(&messages);
1767
1768        assert_eq!(converted.len(), 2);
1769        assert_eq!(converted[1].role, "tool");
1770        assert_eq!(converted[1].tool_name.as_deref(), Some("file_read"));
1771        assert_eq!(converted[1].content.as_deref(), Some("ok"));
1772        assert!(converted[1].tool_calls.is_none());
1773    }
1774
1775    #[test]
1776    fn convert_messages_extracts_images_from_user_marker() {
1777        let model_provider = OllamaModelProvider::new("test", None, None);
1778        let messages = vec![ChatMessage {
1779            role: "user".into(),
1780            content: "Inspect this screenshot [IMAGE:data:image/png;base64,abcd==]".into(),
1781        }];
1782
1783        let converted = model_provider.convert_messages(&messages);
1784        assert_eq!(converted.len(), 1);
1785        assert_eq!(converted[0].role, "user");
1786        assert_eq!(
1787            converted[0].content.as_deref(),
1788            Some("Inspect this screenshot")
1789        );
1790        let images = converted[0]
1791            .images
1792            .as_ref()
1793            .expect("images should be present");
1794        assert_eq!(images, &vec!["abcd==".to_string()]);
1795    }
1796
1797    #[test]
1798    fn capabilities_disable_native_tools_and_enable_vision() {
1799        let model_provider = OllamaModelProvider::new("test", None, None);
1800        let caps = <OllamaModelProvider as ModelProvider>::capabilities(&model_provider);
1801        assert!(
1802            !caps.native_tool_calling,
1803            "Ollama should default to prompt-guided tool calling"
1804        );
1805        assert!(caps.vision);
1806    }
1807
1808    #[test]
1809    fn api_response_parses_eval_counts() {
1810        let json = r#"{
1811            "message": {"content": "Hello", "tool_calls": []},
1812            "prompt_eval_count": 50,
1813            "eval_count": 25
1814        }"#;
1815        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1816        assert_eq!(resp.prompt_eval_count, Some(50));
1817        assert_eq!(resp.eval_count, Some(25));
1818    }
1819
1820    #[test]
1821    fn api_response_parses_without_eval_counts() {
1822        let json = r#"{"message": {"content": "Hello", "tool_calls": []}}"#;
1823        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1824        assert!(resp.prompt_eval_count.is_none());
1825        assert!(resp.eval_count.is_none());
1826    }
1827
1828    // ═══════════════════════════════════════════════════════════════════════
1829    // <think> tag stripping tests
1830    // ═══════════════════════════════════════════════════════════════════════
1831
1832    #[test]
1833    fn strip_think_tags_removes_single_block() {
1834        let input = "<think>internal reasoning</think>Hello world";
1835        assert_eq!(OllamaModelProvider::strip_think_tags(input), "Hello world");
1836    }
1837
1838    #[test]
1839    fn strip_think_tags_removes_multiple_blocks() {
1840        let input = "<think>first</think>A<think>second</think>B";
1841        assert_eq!(OllamaModelProvider::strip_think_tags(input), "AB");
1842    }
1843
1844    #[test]
1845    fn strip_think_tags_handles_unclosed_block() {
1846        let input = "visible<think>hidden tail";
1847        assert_eq!(OllamaModelProvider::strip_think_tags(input), "visible");
1848    }
1849
1850    #[test]
1851    fn strip_think_tags_preserves_text_without_tags() {
1852        let input = "plain text response";
1853        assert_eq!(
1854            OllamaModelProvider::strip_think_tags(input),
1855            "plain text response"
1856        );
1857    }
1858
1859    #[test]
1860    fn strip_think_tags_returns_empty_for_think_only() {
1861        let input = "<think>only thinking</think>";
1862        assert_eq!(OllamaModelProvider::strip_think_tags(input), "");
1863    }
1864
1865    // ═══════════════════════════════════════════════════════════════════════
1866    // effective_content tests
1867    // ═══════════════════════════════════════════════════════════════════════
1868
1869    #[test]
1870    fn effective_content_strips_think_and_returns_rest() {
1871        let result = OllamaModelProvider::effective_content(
1872            "<think>reasoning</think>\n<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}</tool_call>",
1873            None,
1874        );
1875        assert!(result.is_some());
1876        let text = result.unwrap();
1877        assert!(text.contains("<tool_call>"));
1878        assert!(!text.contains("<think>"));
1879    }
1880
1881    #[test]
1882    fn effective_content_falls_back_to_thinking_field() {
1883        let result = OllamaModelProvider::effective_content(
1884            "",
1885            Some(
1886                "<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}</tool_call>",
1887            ),
1888        );
1889        assert!(result.is_some());
1890        assert!(result.unwrap().contains("<tool_call>"));
1891    }
1892
1893    #[test]
1894    fn effective_content_returns_none_when_both_empty() {
1895        assert!(OllamaModelProvider::effective_content("", None).is_none());
1896        assert!(OllamaModelProvider::effective_content("", Some("")).is_none());
1897        assert!(
1898            OllamaModelProvider::effective_content(
1899                "<think>only thinking</think>",
1900                Some("<think>also only thinking</think>")
1901            )
1902            .is_none()
1903        );
1904    }
1905
1906    #[test]
1907    fn effective_content_prefers_content_over_thinking() {
1908        let result = OllamaModelProvider::effective_content("content text", Some("thinking text"));
1909        assert_eq!(result, Some("content text".to_string()));
1910    }
1911
1912    #[test]
1913    fn effective_content_uses_thinking_when_content_is_think_only() {
1914        let result = OllamaModelProvider::effective_content(
1915            "<think>just reasoning</think>",
1916            Some("actual useful text from thinking field"),
1917        );
1918        assert_eq!(
1919            result,
1920            Some("actual useful text from thinking field".to_string())
1921        );
1922    }
1923
1924    // ═══════════════════════════════════════════════════════════════════════
1925    // Qwen tool-call regression scenario tests
1926    // ═══════════════════════════════════════════════════════════════════════
1927
1928    #[test]
1929    fn qwen_think_with_tool_call_in_content_preserved() {
1930        // Qwen produces <think> tags followed by <tool_call> in content,
1931        // with no structured tool_calls. The <tool_call> tags must survive
1932        // for downstream parse_tool_calls to extract them.
1933        let content = "<think>I should list files</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
1934        let result = OllamaModelProvider::effective_content(content, None);
1935        assert!(result.is_some());
1936        let text = result.unwrap();
1937        assert!(text.contains("<tool_call>"));
1938        assert!(text.contains("shell"));
1939        assert!(!text.contains("<think>"));
1940    }
1941
1942    #[test]
1943    fn qwen_thinking_field_with_tool_call_xml_extracted() {
1944        // When think=true, Ollama separates thinking, but Qwen may put tool
1945        // call XML in the thinking field with empty content.
1946        let content = "";
1947        let thinking = "I need to check the date\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>";
1948        let result = OllamaModelProvider::effective_content(content, Some(thinking));
1949        assert!(result.is_some());
1950        let text = result.unwrap();
1951        assert!(text.contains("<tool_call>"));
1952        assert!(text.contains("date"));
1953    }
1954}