Skip to main content

zeroclaw_providers/
ollama.rs

1use crate::multimodal;
2use crate::traits::{
3    ChatMessage, ChatResponse, ModelProvider, ProviderCapabilities, TokenUsage, ToolCall,
4};
5use async_trait::async_trait;
6use reqwest::Client;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10/// Matches Ollama's upstream Modelfile default
11/// (<https://docs.ollama.com/modelfile>): "Increasing the temperature will
12/// make the model answer more creatively. (Default: 0.8)".
13const TEMPERATURE_DEFAULT: f64 = 0.8;
14/// Local inference is CPU/GPU-bound; give it more headroom than cloud calls.
15const TIMEOUT_SECS_DEFAULT: u64 = 600;
16/// Ollama's standard localhost endpoint. Overrideable via
17/// `model_providers.<name>.base-url` for remote GPU boxes or non-default ports.
18pub(crate) const BASE_URL: &str = "http://localhost:11434";
19
20/// Default `num_ctx` (context window, in tokens) sent in every Ollama
21/// `/api/chat` request when no operator override is supplied. Ollama's
22/// server-side default is 2048, which silently truncates prompts; we set
23/// 8192 so callers get useful context without per-call configuration.
24pub const OLLAMA_DEFAULT_NUM_CTX: u32 = 8192;
25
26/// Default `num_predict` (max output tokens) sent in every Ollama
27/// `/api/chat` request when no operator override is supplied. Ollama's
28/// server-side default is 128, which silently truncates responses.
29pub const OLLAMA_DEFAULT_NUM_PREDICT: i32 = 2048;
30
31/// Per-deployment tuning knobs for the Ollama provider. Bundled into
32/// every `/api/chat` request's `options` field so the wire payload is
33/// explicit instead of relying on Ollama server defaults.
34///
35/// Note: temperature is intentionally NOT held as a default here.
36/// `temperature_override` is `Some(v)` only when an operator explicitly
37/// sets `ollama_temperature_override` in `config.toml`; otherwise the
38/// per-call temperature passed through `ModelProvider::chat_with_system(..)`
39/// wins (preserving backward compatibility with `TEMPERATURE_DEFAULT`).
40#[derive(Debug, Clone, Copy, PartialEq)]
41pub struct OllamaTuning {
42    pub num_ctx: u32,
43    pub num_predict: i32,
44    /// Operator-supplied override for the per-call temperature passed
45    /// through `ModelProvider::chat_with_system(.., temperature)`. When
46    /// `Some(v)`, every Ollama `/api/chat` request uses `v` regardless
47    /// of the per-call argument — this is the wire knob behind the
48    /// `ollama_temperature_override` config field. When `None`, the
49    /// per-call temperature wins (full backward compatibility).
50    //
51    // Note: `Option<f64>` here, vs `Option<u32>`/`Option<i32>` on the
52    // runtime-override constructor's first two args, because temperature
53    // has fall-through semantics (None means "let the per-call temp win"),
54    // whereas num_ctx/num_predict unset just falls back to framework
55    // constants — there is no meaningful "let the call decide" mode for
56    // those two.
57    pub temperature_override: Option<f64>,
58}
59
60impl Default for OllamaTuning {
61    fn default() -> Self {
62        Self {
63            num_ctx: OLLAMA_DEFAULT_NUM_CTX,
64            num_predict: OLLAMA_DEFAULT_NUM_PREDICT,
65            temperature_override: None,
66        }
67    }
68}
69
70impl OllamaTuning {
71    /// Build a tuning struct from the three optional `ModelProviderRuntimeOptions`
72    /// fields the `ollama` factory arm consumes. Unset `num_ctx` /
73    /// `num_predict` fall back to framework constants; unset
74    /// `temperature_override` stays `None` so the per-call temperature wins.
75    #[must_use]
76    pub fn from_runtime_overrides(
77        num_ctx: Option<u32>,
78        num_predict: Option<i32>,
79        temperature_override: Option<f64>,
80    ) -> Self {
81        let defaults = Self::default();
82        Self {
83            num_ctx: num_ctx.unwrap_or(defaults.num_ctx),
84            num_predict: num_predict.unwrap_or(defaults.num_predict),
85            temperature_override,
86        }
87    }
88}
89
90pub struct OllamaModelProvider {
91    /// `[model_providers.ollama.<alias>]` config-key alias.
92    alias: String,
93    base_url: String,
94    api_key: Option<String>,
95    reasoning_enabled: Option<bool>,
96    tuning: OllamaTuning,
97}
98
99// ─── Request Structures ───────────────────────────────────────────────────────
100
101#[derive(Debug, Serialize)]
102struct ChatRequest {
103    model: String,
104    messages: Vec<Message>,
105    stream: bool,
106    options: Options,
107    #[serde(skip_serializing_if = "Option::is_none")]
108    think: Option<bool>,
109    #[serde(skip_serializing_if = "Option::is_none")]
110    tools: Option<Vec<serde_json::Value>>,
111}
112
113#[derive(Debug, Clone, Serialize)]
114struct Message {
115    role: String,
116    #[serde(skip_serializing_if = "Option::is_none")]
117    content: Option<String>,
118    #[serde(skip_serializing_if = "Option::is_none")]
119    images: Option<Vec<String>>,
120    #[serde(skip_serializing_if = "Option::is_none")]
121    tool_calls: Option<Vec<OutgoingToolCall>>,
122    #[serde(skip_serializing_if = "Option::is_none")]
123    tool_name: Option<String>,
124}
125
126#[derive(Debug, Clone, Serialize)]
127struct OutgoingToolCall {
128    #[serde(rename = "type")]
129    kind: String,
130    function: OutgoingFunction,
131}
132
133#[derive(Debug, Clone, Serialize)]
134struct OutgoingFunction {
135    name: String,
136    arguments: serde_json::Value,
137}
138
139#[derive(Debug, Serialize)]
140struct Options {
141    temperature: f64,
142    #[serde(skip_serializing_if = "Option::is_none")]
143    num_ctx: Option<u32>,
144    #[serde(skip_serializing_if = "Option::is_none")]
145    num_predict: Option<i32>,
146}
147
148// ─── Response Structures ──────────────────────────────────────────────────────
149
150#[derive(Debug, Deserialize)]
151struct ApiChatResponse {
152    message: ResponseMessage,
153    #[serde(default)]
154    prompt_eval_count: Option<u64>,
155    #[serde(default)]
156    eval_count: Option<u64>,
157}
158
159#[derive(Debug, Deserialize)]
160struct ResponseMessage {
161    #[serde(default)]
162    content: String,
163    #[serde(default)]
164    tool_calls: Vec<OllamaToolCall>,
165    /// Some models return a "thinking" field with internal reasoning
166    #[serde(default)]
167    thinking: Option<String>,
168}
169
170#[derive(Debug, Deserialize)]
171struct OllamaToolCall {
172    id: Option<String>,
173    function: OllamaFunction,
174}
175
176#[derive(Debug, Deserialize)]
177struct OllamaFunction {
178    name: String,
179    #[serde(default, deserialize_with = "deserialize_args")]
180    arguments: serde_json::Value,
181}
182
183// ─── serde Helpers ───────────────────────────────────────────────────────────
184fn deserialize_args<'de, D>(deserializer: D) -> Result<serde_json::Value, D::Error>
185where
186    D: serde::Deserializer<'de>,
187{
188    let value = serde_json::Value::deserialize(deserializer)?;
189
190    if let Some(s) = value.as_str() {
191        match serde_json::from_str::<serde_json::Value>(s) {
192            Ok(v) => Ok(v),
193            Err(_) => Ok(serde_json::json!({})),
194        }
195    } else {
196        Ok(value)
197    }
198}
199// ─── Implementation ───────────────────────────────────────────────────────────
200
201impl OllamaModelProvider {
202    fn normalize_base_url(raw_url: &str) -> String {
203        let trimmed = raw_url.trim().trim_end_matches('/');
204        if trimmed.is_empty() {
205            return String::new();
206        }
207
208        trimmed
209            .strip_suffix("/api/chat")
210            .or_else(|| trimmed.strip_suffix("/api"))
211            .unwrap_or(trimmed)
212            .trim_end_matches('/')
213            .to_string()
214    }
215
216    pub fn new(alias: &str, base_url: Option<&str>, api_key: Option<&str>) -> Self {
217        Self::new_with_reasoning(alias, base_url, api_key, None)
218    }
219
220    pub fn new_with_reasoning(
221        alias: &str,
222        base_url: Option<&str>,
223        api_key: Option<&str>,
224        reasoning_enabled: Option<bool>,
225    ) -> Self {
226        let api_key = api_key.and_then(|value| {
227            let trimmed = value.trim();
228            (!trimmed.is_empty()).then(|| trimmed.to_string())
229        });
230
231        Self {
232            alias: alias.to_string(),
233            base_url: Self::normalize_base_url(base_url.unwrap_or(BASE_URL)),
234            api_key,
235            reasoning_enabled,
236            tuning: OllamaTuning::default(),
237        }
238    }
239    /// Override the per-deployment tuning knobs (`num_ctx`, `num_predict`,
240    /// `temperature_override`) on this provider. Returns `self` for
241    /// chained construction.
242    #[must_use]
243    pub fn with_tuning(mut self, tuning: OllamaTuning) -> Self {
244        self.tuning = tuning;
245        self
246    }
247
248    #[cfg(test)]
249    pub(crate) fn tuning(&self) -> OllamaTuning {
250        self.tuning
251    }
252
253    fn is_local_endpoint(&self) -> bool {
254        reqwest::Url::parse(&self.base_url)
255            .ok()
256            .and_then(|url| url.host_str().map(|host| host.to_string()))
257            .is_some_and(|host| {
258                matches!(host.as_str(), "localhost" | "127.0.0.1" | "::1" | "0.0.0.0")
259            })
260    }
261
262    fn is_official_cloud_endpoint(&self) -> bool {
263        reqwest::Url::parse(&self.base_url)
264            .ok()
265            .and_then(|url| {
266                url.host_str().map(|host| {
267                    host.eq_ignore_ascii_case("ollama.com")
268                        || host.eq_ignore_ascii_case("api.ollama.com")
269                })
270            })
271            .unwrap_or(false)
272    }
273
274    fn http_client(&self) -> Client {
275        zeroclaw_config::schema::build_runtime_proxy_client_with_timeouts(
276            "model_provider.ollama",
277            300,
278            10,
279        )
280    }
281
282    fn resolve_request_details(&self, model: &str) -> anyhow::Result<(String, bool)> {
283        let requests_cloud = model.ends_with(":cloud");
284        let official_cloud_endpoint = self.is_official_cloud_endpoint();
285        let local_endpoint = self.is_local_endpoint();
286        let normalized_model = if requests_cloud && official_cloud_endpoint {
287            model.strip_suffix(":cloud").unwrap_or(model).to_string()
288        } else {
289            model.to_string()
290        };
291
292        if requests_cloud && local_endpoint {
293            anyhow::bail!(
294                "Model '{}' requested cloud routing, but Ollama endpoint is local. Configure api_url with a remote Ollama endpoint.",
295                model
296            );
297        }
298
299        if requests_cloud && official_cloud_endpoint && self.api_key.is_none() {
300            anyhow::bail!(
301                "Model '{}' requested cloud routing, but no API key is configured. Set api_key on [providers.models.ollama.<alias>] or via the schema-mirror grammar.",
302                model
303            );
304        }
305
306        let should_auth = self.api_key.is_some() && !local_endpoint;
307
308        Ok((normalized_model, should_auth))
309    }
310
311    fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
312        serde_json::from_str(arguments).unwrap_or_else(|_| serde_json::json!({}))
313    }
314
315    fn normalize_response_text(content: String) -> Option<String> {
316        let stripped = Self::strip_think_tags(&content);
317        if stripped.trim().is_empty() {
318            None
319        } else {
320            Some(stripped)
321        }
322    }
323
324    /// Remove `<think>...</think>` blocks from model output.
325    /// Qwen and other reasoning models may embed chain-of-thought inline
326    /// in the `content` field using `<think>` tags.  These must be stripped
327    /// before returning text to the user or parsing for tool calls.
328    fn strip_think_tags(s: &str) -> String {
329        let mut result = String::with_capacity(s.len());
330        let mut rest = s;
331        loop {
332            if let Some(start) = rest.find("<think>") {
333                result.push_str(&rest[..start]);
334                if let Some(end) = rest[start..].find("</think>") {
335                    rest = &rest[start + end + "</think>".len()..];
336                } else {
337                    // Unclosed tag: drop the rest to avoid leaking partial reasoning.
338                    break;
339                }
340            } else {
341                result.push_str(rest);
342                break;
343            }
344        }
345        result.trim().to_string()
346    }
347
348    /// Derive the effective text content from a response, stripping `<think>` tags
349    /// and falling back to the `thinking` field when `content` is empty after
350    /// stripping.  This ensures that tool-call XML tags embedded alongside (or
351    /// after) thinking blocks are preserved for downstream parsing.
352    fn effective_content(content: &str, thinking: Option<&str>) -> Option<String> {
353        // First try the content field with think tags stripped.
354        let stripped = Self::strip_think_tags(content);
355        if !stripped.trim().is_empty() {
356            return Some(stripped);
357        }
358
359        // Content was empty or only thinking — check the thinking field.
360        // Some models (Qwen) put the full output including tool-call XML in
361        // the thinking field when `think: true` is set.
362        if let Some(thinking) = thinking.map(str::trim).filter(|t| !t.is_empty()) {
363            let stripped_thinking = Self::strip_think_tags(thinking);
364            if !stripped_thinking.trim().is_empty() {
365                ::zeroclaw_log::record!(
366                    DEBUG,
367                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
368                    &format!(
369                        "Ollama: using thinking field as effective content ({} chars)",
370                        stripped_thinking.len()
371                    )
372                );
373                return Some(stripped_thinking);
374            }
375        }
376
377        None
378    }
379
380    fn fallback_text_for_empty_content(model: &str, thinking: Option<&str>) -> String {
381        if let Some(thinking) = thinking.map(str::trim).filter(|value| !value.is_empty()) {
382            let thinking_log_excerpt: String = thinking.chars().take(100).collect();
383            let thinking_reply_excerpt: String = thinking.chars().take(200).collect();
384            ::zeroclaw_log::record!(
385                WARN,
386                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
387                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
388                &format!(
389                    "Ollama returned empty content with only thinking for model '{}': '{}'. Model may have stopped prematurely.",
390                    model, thinking_log_excerpt
391                )
392            );
393            return format!(
394                "I was thinking about this: {}... but I didn't complete my response. Could you try asking again?",
395                thinking_reply_excerpt
396            );
397        }
398
399        ::zeroclaw_log::record!(
400            WARN,
401            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
402                .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
403            &format!(
404                "Ollama returned empty or whitespace content with no tool calls for model '{}'",
405                model
406            )
407        );
408        "I couldn't get a complete response from Ollama. Please try again or switch to a different model."
409            .to_string()
410    }
411
412    #[allow(dead_code)]
413    fn build_chat_request(
414        &self,
415        messages: Vec<Message>,
416        model: &str,
417        temperature: f64,
418        tools: Option<&[serde_json::Value]>,
419    ) -> ChatRequest {
420        self.build_chat_request_with_think(
421            messages,
422            model,
423            temperature,
424            tools,
425            self.reasoning_enabled,
426        )
427    }
428
429    /// Build a chat request with an explicit `think` value.
430    fn build_chat_request_with_think(
431        &self,
432        messages: Vec<Message>,
433        model: &str,
434        temperature: f64,
435        tools: Option<&[serde_json::Value]>,
436        think: Option<bool>,
437    ) -> ChatRequest {
438        ChatRequest {
439            model: model.to_string(),
440            messages,
441            stream: false,
442            options: Options {
443                temperature: self.tuning.temperature_override.unwrap_or(temperature),
444                num_ctx: Some(self.tuning.num_ctx),
445                num_predict: Some(self.tuning.num_predict),
446            },
447            think,
448            tools: tools.map(|t| t.to_vec()),
449        }
450    }
451
452    fn convert_user_message_content(&self, content: &str) -> (Option<String>, Option<Vec<String>>) {
453        let (cleaned, image_refs) = multimodal::parse_image_markers(content);
454        if image_refs.is_empty() {
455            return (Some(content.to_string()), None);
456        }
457
458        let images: Vec<String> = image_refs
459            .iter()
460            .filter_map(|reference| multimodal::extract_ollama_image_payload(reference))
461            .collect();
462
463        if images.is_empty() {
464            return (Some(content.to_string()), None);
465        }
466
467        let cleaned = cleaned.trim();
468        let content = if cleaned.is_empty() {
469            None
470        } else {
471            Some(cleaned.to_string())
472        };
473
474        (content, Some(images))
475    }
476
477    /// Convert internal chat history format to Ollama's native tool-call message schema.
478    ///
479    /// `run_tool_call_loop` stores native assistant/tool entries as JSON strings in
480    /// `ChatMessage.content`. We decode those payloads here so follow-up requests send
481    /// structured `assistant.tool_calls` and `tool.tool_name`, as expected by Ollama.
482    fn convert_messages(&self, messages: &[ChatMessage]) -> Vec<Message> {
483        let mut tool_name_by_id: HashMap<String, String> = HashMap::new();
484
485        messages
486            .iter()
487            .map(|message| {
488                if message.role == "assistant"
489                    && let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
490                    && let Some(tool_calls_value) = value.get("tool_calls")
491                    && let Ok(parsed_calls) =
492                        serde_json::from_value::<Vec<ToolCall>>(tool_calls_value.clone())
493                {
494                    let outgoing_calls: Vec<OutgoingToolCall> = parsed_calls
495                        .into_iter()
496                        .map(|call| {
497                            tool_name_by_id.insert(call.id.clone(), call.name.clone());
498                            OutgoingToolCall {
499                                kind: "function".to_string(),
500                                function: OutgoingFunction {
501                                    name: call.name,
502                                    arguments: Self::parse_tool_arguments(&call.arguments),
503                                },
504                            }
505                        })
506                        .collect();
507                    let content = value
508                        .get("content")
509                        .and_then(serde_json::Value::as_str)
510                        .map(ToString::to_string);
511                    return Message {
512                        role: "assistant".to_string(),
513                        content,
514                        images: None,
515                        tool_calls: Some(outgoing_calls),
516                        tool_name: None,
517                    };
518                }
519
520                if message.role == "tool"
521                    && let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content)
522                {
523                    let tool_name = value
524                        .get("tool_name")
525                        .and_then(serde_json::Value::as_str)
526                        .map(ToString::to_string)
527                        .or_else(|| {
528                            value
529                                .get("tool_call_id")
530                                .and_then(serde_json::Value::as_str)
531                                .and_then(|id| tool_name_by_id.get(id))
532                                .cloned()
533                        });
534                    let content = value
535                        .get("content")
536                        .and_then(serde_json::Value::as_str)
537                        .map(ToString::to_string)
538                        .or_else(|| {
539                            (!message.content.trim().is_empty()).then_some(message.content.clone())
540                        });
541
542                    return Message {
543                        role: "tool".to_string(),
544                        content,
545                        images: None,
546                        tool_calls: None,
547                        tool_name,
548                    };
549                }
550
551                if message.role == "user" {
552                    let (content, images) = self.convert_user_message_content(&message.content);
553                    return Message {
554                        role: "user".to_string(),
555                        content,
556                        images,
557                        tool_calls: None,
558                        tool_name: None,
559                    };
560                }
561
562                Message {
563                    role: message.role.clone(),
564                    content: Some(message.content.clone()),
565                    images: None,
566                    tool_calls: None,
567                    tool_name: None,
568                }
569            })
570            .collect()
571    }
572
573    /// Send a single HTTP request to Ollama and parse the response.
574    async fn send_request_inner(
575        &self,
576        messages: &[Message],
577        model: &str,
578        temperature: f64,
579        should_auth: bool,
580        tools: Option<&[serde_json::Value]>,
581        think: Option<bool>,
582    ) -> anyhow::Result<ApiChatResponse> {
583        let request =
584            self.build_chat_request_with_think(messages.to_vec(), model, temperature, tools, think);
585
586        let url = format!("{}/api/chat", self.base_url);
587
588        ::zeroclaw_log::record!(
589            DEBUG,
590            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
591            &format!(
592                "Ollama request: url={} model={} message_count={} temperature={} think={:?} tool_count={}",
593                url,
594                model,
595                request.messages.len(),
596                temperature,
597                request.think,
598                request.tools.as_ref().map_or(0, |t| t.len())
599            )
600        );
601
602        let mut request_builder = self.http_client().post(&url).json(&request);
603
604        if should_auth && let Some(key) = self.api_key.as_ref() {
605            request_builder = request_builder.bearer_auth(key);
606        }
607
608        let response = request_builder.send().await?;
609        let status = response.status();
610        ::zeroclaw_log::record!(
611            DEBUG,
612            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
613            &format!("response status: {}", status)
614        );
615
616        let body = response.bytes().await?;
617        ::zeroclaw_log::record!(
618            DEBUG,
619            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
620            &format!("response body length: {} bytes", body.len())
621        );
622
623        if !status.is_success() {
624            let raw = String::from_utf8_lossy(&body);
625            let sanitized = super::sanitize_api_error(&raw);
626            ::zeroclaw_log::record!(
627                ERROR,
628                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
629                    .with_outcome(::zeroclaw_log::EventOutcome::Failure),
630                &format!(
631                    "Ollama error response: status={} body_excerpt={}",
632                    status, sanitized
633                )
634            );
635            anyhow::bail!(
636                "Ollama API error ({}): {}. Is Ollama running? (brew install ollama && ollama serve)",
637                status,
638                sanitized
639            );
640        }
641
642        let chat_response: ApiChatResponse = match serde_json::from_slice(&body) {
643            Ok(r) => r,
644            Err(e) => {
645                let raw = String::from_utf8_lossy(&body);
646                let sanitized = super::sanitize_api_error(&raw);
647                ::zeroclaw_log::record!(
648                    ERROR,
649                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
650                        .with_outcome(::zeroclaw_log::EventOutcome::Failure),
651                    &format!(
652                        "Ollama response deserialization failed: {e}. body_excerpt={}",
653                        sanitized
654                    )
655                );
656                anyhow::bail!("Failed to parse Ollama response: {e}");
657            }
658        };
659
660        Ok(chat_response)
661    }
662
663    /// Send a request to Ollama and get the parsed response.
664    /// Pass `tools` to enable native function-calling for models that support it.
665    ///
666    /// When `reasoning_enabled` (`think`) is set to `true`, the first request
667    /// includes `think: true`.  If that request fails (the model may not support
668    /// the `think` parameter), we automatically retry once with `think` omitted
669    /// so the call succeeds instead of entering an infinite retry loop.
670    async fn send_request(
671        &self,
672        messages: Vec<Message>,
673        model: &str,
674        temperature: f64,
675        should_auth: bool,
676        tools: Option<&[serde_json::Value]>,
677    ) -> anyhow::Result<ApiChatResponse> {
678        let result = self
679            .send_request_inner(
680                &messages,
681                model,
682                temperature,
683                should_auth,
684                tools,
685                self.reasoning_enabled,
686            )
687            .await;
688
689        match result {
690            Ok(resp) => Ok(resp),
691            Err(first_err) if self.reasoning_enabled == Some(true) => {
692                ::zeroclaw_log::record!(
693                    WARN,
694                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
695                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
696                        .with_attrs(
697                            ::serde_json::json!({"model": model, "error": format!("{}", first_err)})
698                        ),
699                    "Ollama request failed with think=true; retrying without reasoning \
700                     (model may not support it)"
701                );
702                // Retry with think omitted from the request entirely.
703                self.send_request_inner(&messages, model, temperature, should_auth, tools, None)
704                    .await
705                    .map_err(|retry_err| {
706                        // Both attempts failed — return the original error for clarity.
707                        ::zeroclaw_log::record!(ERROR, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail).with_outcome(::zeroclaw_log::EventOutcome::Failure).with_attrs(::serde_json::json!({"model": model, "original_error": first_err.to_string(), "retry_error": retry_err.to_string()})), "Ollama request also failed without think; returning original error");
708                        first_err
709                    })
710            }
711            Err(e) => Err(e),
712        }
713    }
714
715    /// Convert Ollama tool calls to the JSON format expected by parse_tool_calls in loop_.rs
716    ///
717    /// Handles quirky model behavior where tool calls are wrapped:
718    /// - `{"name": "tool_call", "arguments": {"name": "shell", "arguments": {...}}}`
719    /// - `{"name": "tool.shell", "arguments": {...}}`
720    fn format_tool_calls_for_loop(&self, tool_calls: &[OllamaToolCall]) -> String {
721        let formatted_calls: Vec<serde_json::Value> = tool_calls
722            .iter()
723            .map(|tc| {
724                let (tool_name, tool_args) = self.extract_tool_name_and_args(tc);
725
726                // Arguments must be a JSON string for parse_tool_calls compatibility
727                let args_str =
728                    serde_json::to_string(&tool_args).unwrap_or_else(|_| "{}".to_string());
729
730                serde_json::json!({
731                    "id": tc.id,
732                    "type": "function",
733                    "function": {
734                        "name": tool_name,
735                        "arguments": args_str
736                    }
737                })
738            })
739            .collect();
740
741        serde_json::json!({
742            "content": "",
743            "tool_calls": formatted_calls
744        })
745        .to_string()
746    }
747
748    /// Extract the actual tool name and arguments from potentially nested structures
749    fn extract_tool_name_and_args(&self, tc: &OllamaToolCall) -> (String, serde_json::Value) {
750        let name = &tc.function.name;
751        let args = &tc.function.arguments;
752
753        // Pattern 1: Nested tool_call wrapper (various malformed versions)
754        // {"name": "tool_call", "arguments": {"name": "shell", "arguments": {"command": "date"}}}
755        // {"name": "tool_call><json", "arguments": {"name": "shell", ...}}
756        // {"name": "tool.call", "arguments": {"name": "shell", ...}}
757        if (name == "tool_call"
758            || name == "tool.call"
759            || name.starts_with("tool_call>")
760            || name.starts_with("tool_call<"))
761            && let Some(nested_name) = args.get("name").and_then(|v| v.as_str())
762        {
763            let nested_args = args
764                .get("arguments")
765                .cloned()
766                .unwrap_or(serde_json::json!({}));
767            ::zeroclaw_log::record!(
768                DEBUG,
769                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
770                &format!(
771                    "Unwrapped nested tool call: {} -> {} with args {:?}",
772                    name, nested_name, nested_args
773                )
774            );
775            return (nested_name.to_string(), nested_args);
776        }
777
778        // Pattern 2: Prefixed tool name (tool.shell, tool.file_read, etc.)
779        if let Some(stripped) = name.strip_prefix("tool.") {
780            return (stripped.to_string(), args.clone());
781        }
782
783        // Pattern 3: Normal tool call
784        (name.clone(), args.clone())
785    }
786}
787
788#[async_trait]
789impl ModelProvider for OllamaModelProvider {
790    // ── ModelProvider-family defaults ──
791    fn default_temperature(&self) -> f64 {
792        TEMPERATURE_DEFAULT
793    }
794
795    fn default_timeout_secs(&self) -> u64 {
796        TIMEOUT_SECS_DEFAULT
797    }
798
799    fn default_base_url(&self) -> Option<&str> {
800        Some(BASE_URL)
801    }
802
803    fn capabilities(&self) -> ProviderCapabilities {
804        ProviderCapabilities {
805            native_tool_calling: false,
806            vision: true,
807            prompt_caching: false,
808            extended_thinking: false,
809        }
810    }
811
812    async fn chat_with_system(
813        &self,
814        system_prompt: Option<&str>,
815        message: &str,
816        model: &str,
817        temperature: Option<f64>,
818    ) -> anyhow::Result<String> {
819        let temperature = temperature.unwrap_or(self.default_temperature());
820        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
821
822        let mut messages = Vec::new();
823
824        if let Some(sys) = system_prompt {
825            messages.push(Message {
826                role: "system".to_string(),
827                content: Some(sys.to_string()),
828                images: None,
829                tool_calls: None,
830                tool_name: None,
831            });
832        }
833
834        let (user_content, user_images) = self.convert_user_message_content(message);
835        messages.push(Message {
836            role: "user".to_string(),
837            content: user_content,
838            images: user_images,
839            tool_calls: None,
840            tool_name: None,
841        });
842
843        let response = self
844            .send_request(messages, &normalized_model, temperature, should_auth, None)
845            .await?;
846
847        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
848        if !response.message.tool_calls.is_empty() {
849            ::zeroclaw_log::record!(
850                DEBUG,
851                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
852                &format!(
853                    "Ollama returned {} tool call(s), formatting for loop parser",
854                    response.message.tool_calls.len()
855                )
856            );
857            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
858        }
859
860        // Plain text response — strip <think> tags and fall back to thinking field.
861        if let Some(content) = Self::effective_content(
862            &response.message.content,
863            response.message.thinking.as_deref(),
864        ) {
865            return Ok(content);
866        }
867
868        Ok(Self::fallback_text_for_empty_content(
869            &normalized_model,
870            response.message.thinking.as_deref(),
871        ))
872    }
873
874    async fn chat_with_history(
875        &self,
876        messages: &[crate::traits::ChatMessage],
877        model: &str,
878        temperature: Option<f64>,
879    ) -> anyhow::Result<String> {
880        let temperature = temperature.unwrap_or(self.default_temperature());
881        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
882
883        let api_messages = self.convert_messages(messages);
884
885        let response = self
886            .send_request(
887                api_messages,
888                &normalized_model,
889                temperature,
890                should_auth,
891                None,
892            )
893            .await?;
894
895        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
896        if !response.message.tool_calls.is_empty() {
897            ::zeroclaw_log::record!(
898                DEBUG,
899                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
900                &format!(
901                    "Ollama returned {} tool call(s), formatting for loop parser",
902                    response.message.tool_calls.len()
903                )
904            );
905            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
906        }
907
908        // Plain text response — strip <think> tags and fall back to thinking field.
909        if let Some(content) = Self::effective_content(
910            &response.message.content,
911            response.message.thinking.as_deref(),
912        ) {
913            return Ok(content);
914        }
915
916        Ok(Self::fallback_text_for_empty_content(
917            &normalized_model,
918            response.message.thinking.as_deref(),
919        ))
920    }
921
922    async fn chat_with_tools(
923        &self,
924        messages: &[ChatMessage],
925        tools: &[serde_json::Value],
926        model: &str,
927        temperature: Option<f64>,
928    ) -> anyhow::Result<ChatResponse> {
929        let temperature = temperature.unwrap_or(self.default_temperature());
930        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
931
932        let api_messages = self.convert_messages(messages);
933
934        // Tools arrive pre-formatted in OpenAI/Ollama-compatible JSON from
935        // tools_to_openai_format() in loop_.rs — pass them through directly.
936        let tools_opt = if tools.is_empty() { None } else { Some(tools) };
937
938        let response = self
939            .send_request(
940                api_messages,
941                &normalized_model,
942                temperature,
943                should_auth,
944                tools_opt,
945            )
946            .await?;
947
948        let usage = if response.prompt_eval_count.is_some() || response.eval_count.is_some() {
949            Some(TokenUsage {
950                input_tokens: response.prompt_eval_count,
951                output_tokens: response.eval_count,
952                cached_input_tokens: None,
953            })
954        } else {
955            None
956        };
957
958        // Native tool calls returned by the model.
959        if !response.message.tool_calls.is_empty() {
960            let tool_calls: Vec<ToolCall> = response
961                .message
962                .tool_calls
963                .iter()
964                .map(|tc| {
965                    let (name, args) = self.extract_tool_name_and_args(tc);
966                    ToolCall {
967                        id: tc
968                            .id
969                            .clone()
970                            .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
971                        name,
972                        arguments: serde_json::to_string(&args)
973                            .unwrap_or_else(|_| "{}".to_string()),
974                        extra_content: None,
975                    }
976                })
977                .collect();
978            let text = Self::normalize_response_text(response.message.content);
979            return Ok(ChatResponse {
980                text,
981                tool_calls,
982                usage,
983                reasoning_content: None,
984            });
985        }
986
987        // No native tool calls — use the effective content (content with
988        // `<think>` tags stripped, falling back to thinking field).
989        // The loop_.rs `parse_tool_calls` will extract any XML-style tool
990        // calls from the text, so preserve `<tool_call>` tags here.
991        let effective = Self::effective_content(
992            &response.message.content,
993            response.message.thinking.as_deref(),
994        );
995        let text = if let Some(content) = effective {
996            content
997        } else {
998            Self::fallback_text_for_empty_content(
999                &normalized_model,
1000                response.message.thinking.as_deref(),
1001            )
1002        };
1003        Ok(ChatResponse {
1004            text: Some(text),
1005            tool_calls: vec![],
1006            usage,
1007            reasoning_content: None,
1008        })
1009    }
1010
1011    fn supports_native_tools(&self) -> bool {
1012        // Default to prompt-guided tool calling (XML instructions in system prompt)
1013        // because many Ollama-served models do not support Ollama's native
1014        // /api/chat tool-calling parameter. Models that lack support silently
1015        // ignore the tools array and emit tool-call JSON as plain text, which the
1016        // agent loop cannot parse without the XML protocol instructions.
1017        // See: https://github.com/zeroclaw-labs/zeroclaw/issues/3999
1018        false
1019    }
1020
1021    async fn chat(
1022        &self,
1023        request: zeroclaw_api::model_provider::ChatRequest<'_>,
1024        model: &str,
1025        temperature: Option<f64>,
1026    ) -> anyhow::Result<ChatResponse> {
1027        // Convert ToolSpec to OpenAI-compatible JSON and delegate to chat_with_tools.
1028        if let Some(specs) = request.tools
1029            && !specs.is_empty()
1030        {
1031            let tools: Vec<serde_json::Value> = specs
1032                .iter()
1033                .map(|s| {
1034                    let params =
1035                        zeroclaw_api::schema::SchemaCleanr::clean_for_openai(s.parameters.clone());
1036                    serde_json::json!({
1037                        "type": "function",
1038                        "function": {
1039                            "name": s.name,
1040                            "description": s.description,
1041                            "parameters": params
1042                        }
1043                    })
1044                })
1045                .collect();
1046            return self
1047                .chat_with_tools(request.messages, &tools, model, temperature)
1048                .await;
1049        }
1050
1051        // No tools — fall back to plain text chat.
1052        let text = self
1053            .chat_with_history(request.messages, model, temperature)
1054            .await?;
1055        Ok(ChatResponse {
1056            text: Some(text),
1057            tool_calls: vec![],
1058            usage: None,
1059            reasoning_content: None,
1060        })
1061    }
1062
1063    async fn list_models(&self) -> anyhow::Result<Vec<String>> {
1064        // Local Ollama's /api/tags lists installed models and requires no auth.
1065        // Remote Ollama endpoints attach the Bearer key; local ones don't.
1066        let url = format!("{}/api/tags", self.base_url.trim_end_matches('/'));
1067        let mut request = self.http_client().get(&url);
1068        if !self.is_local_endpoint()
1069            && let Some(key) = self.api_key.as_deref()
1070        {
1071            request = request.header("Authorization", format!("Bearer {key}"));
1072        }
1073        let response = request.send().await?.error_for_status()?;
1074
1075        #[derive(Deserialize)]
1076        struct Resp {
1077            models: Vec<Entry>,
1078        }
1079        #[derive(Deserialize)]
1080        struct Entry {
1081            name: String,
1082        }
1083
1084        let body: Resp = response.json().await?;
1085        Ok(body.models.into_iter().map(|e| e.name).collect())
1086    }
1087}
1088
1089// ─── Tests ────────────────────────────────────────────────────────────────────
1090
1091impl ::zeroclaw_api::attribution::Attributable for OllamaModelProvider {
1092    fn role(&self) -> ::zeroclaw_api::attribution::Role {
1093        ::zeroclaw_api::attribution::Role::Provider(
1094            ::zeroclaw_api::attribution::ProviderKind::Model(
1095                ::zeroclaw_api::attribution::ModelProviderKind::Ollama,
1096            ),
1097        )
1098    }
1099    fn alias(&self) -> &str {
1100        &self.alias
1101    }
1102}
1103
1104#[cfg(test)]
1105mod tests {
1106    use super::*;
1107
1108    #[test]
1109    fn default_url() {
1110        let p = OllamaModelProvider::new("test", None, None);
1111        assert_eq!(p.base_url, "http://localhost:11434");
1112    }
1113
1114    #[test]
1115    fn custom_url_trailing_slash() {
1116        let p = OllamaModelProvider::new("test", Some("http://192.168.1.100:11434/"), None);
1117        assert_eq!(p.base_url, "http://192.168.1.100:11434");
1118    }
1119
1120    #[test]
1121    fn custom_url_no_trailing_slash() {
1122        let p = OllamaModelProvider::new("test", Some("http://myserver:11434"), None);
1123        assert_eq!(p.base_url, "http://myserver:11434");
1124    }
1125
1126    #[test]
1127    fn custom_url_strips_api_suffix() {
1128        let p = OllamaModelProvider::new("test", Some("https://ollama.com/api/"), None);
1129        assert_eq!(p.base_url, "https://ollama.com");
1130    }
1131
1132    #[test]
1133    fn custom_url_strips_api_chat_suffix() {
1134        let p = OllamaModelProvider::new("test", Some("http://172.30.30.50:11434/api/chat"), None);
1135        assert_eq!(p.base_url, "http://172.30.30.50:11434");
1136    }
1137
1138    #[test]
1139    fn empty_url_uses_empty() {
1140        let p = OllamaModelProvider::new("test", Some(""), None);
1141        assert_eq!(p.base_url, "");
1142    }
1143
1144    #[test]
1145    fn cloud_suffix_strips_model_name() {
1146        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), Some("ollama-key"));
1147        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1148        assert_eq!(model, "qwen3");
1149        assert!(should_auth);
1150    }
1151
1152    #[test]
1153    fn cloud_suffix_with_local_endpoint_errors() {
1154        let p = OllamaModelProvider::new("test", None, Some("ollama-key"));
1155        let error = p
1156            .resolve_request_details("qwen3:cloud")
1157            .expect_err("cloud suffix should fail on local endpoint");
1158        assert!(
1159            error
1160                .to_string()
1161                .contains("requested cloud routing, but Ollama endpoint is local")
1162        );
1163    }
1164
1165    #[test]
1166    fn cloud_suffix_with_unspecified_local_endpoint_errors() {
1167        let p = OllamaModelProvider::new("test", Some("http://0.0.0.0:11434"), Some("ollama-key"));
1168        let error = p
1169            .resolve_request_details("qwen3:cloud")
1170            .expect_err("cloud suffix should fail on unspecified local endpoint");
1171        assert!(
1172            error
1173                .to_string()
1174                .contains("requested cloud routing, but Ollama endpoint is local")
1175        );
1176    }
1177
1178    #[test]
1179    fn cloud_suffix_without_api_key_errors() {
1180        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), None);
1181        let error = p
1182            .resolve_request_details("qwen3:cloud")
1183            .expect_err("cloud suffix should require API key");
1184        assert!(
1185            error
1186                .to_string()
1187                .contains("Set api_key on [providers.models.ollama.<alias>]")
1188        );
1189    }
1190
1191    #[test]
1192    fn cloud_suffix_preserved_for_private_remote_without_api_key() {
1193        let p = OllamaModelProvider::new("test", Some("http://192.168.1.100:11434"), None);
1194        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1195        assert_eq!(model, "qwen3:cloud");
1196        assert!(!should_auth);
1197    }
1198
1199    #[test]
1200    fn cloud_suffix_preserved_for_private_remote_with_api_key() {
1201        let p = OllamaModelProvider::new(
1202            "test",
1203            Some("https://private-ollama.example.com"),
1204            Some("ollama-key"),
1205        );
1206        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1207        assert_eq!(model, "qwen3:cloud");
1208        assert!(should_auth);
1209    }
1210
1211    #[test]
1212    fn remote_endpoint_auth_enabled_when_key_present() {
1213        let p = OllamaModelProvider::new("test", Some("https://ollama.com"), Some("ollama-key"));
1214        let (_model, should_auth) = p.resolve_request_details("qwen3").unwrap();
1215        assert!(should_auth);
1216    }
1217
1218    #[test]
1219    fn remote_endpoint_with_api_suffix_still_allows_cloud_models() {
1220        let p =
1221            OllamaModelProvider::new("test", Some("https://ollama.com/api"), Some("ollama-key"));
1222        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
1223        assert_eq!(model, "qwen3");
1224        assert!(should_auth);
1225    }
1226
1227    #[test]
1228    fn local_endpoint_auth_disabled_even_with_key() {
1229        let p = OllamaModelProvider::new("test", None, Some("ollama-key"));
1230        let (_model, should_auth) = p.resolve_request_details("llama3").unwrap();
1231        assert!(!should_auth);
1232    }
1233
1234    #[test]
1235    fn request_omits_think_when_reasoning_not_configured() {
1236        let model_provider = OllamaModelProvider::new("test", None, None);
1237        let request = model_provider.build_chat_request(
1238            vec![Message {
1239                role: "user".to_string(),
1240                content: Some("hello".to_string()),
1241                images: None,
1242                tool_calls: None,
1243                tool_name: None,
1244            }],
1245            "llama3",
1246            0.7,
1247            None,
1248        );
1249
1250        let json = serde_json::to_value(request).unwrap();
1251        assert!(json.get("think").is_none());
1252        let options = json.get("options").expect("options present");
1253        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1254        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1255    }
1256
1257    #[test]
1258    fn request_includes_think_when_reasoning_configured() {
1259        let model_provider =
1260            OllamaModelProvider::new_with_reasoning("test", None, None, Some(false));
1261        let request = model_provider.build_chat_request(
1262            vec![Message {
1263                role: "user".to_string(),
1264                content: Some("hello".to_string()),
1265                images: None,
1266                tool_calls: None,
1267                tool_name: None,
1268            }],
1269            "llama3",
1270            0.7,
1271            None,
1272        );
1273
1274        let json = serde_json::to_value(request).unwrap();
1275        assert_eq!(json.get("think"), Some(&serde_json::json!(false)));
1276        let options = json.get("options").expect("options present");
1277        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1278        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1279    }
1280
1281    #[test]
1282    fn request_includes_default_num_ctx_and_num_predict() {
1283        let provider = OllamaModelProvider::new("test", None, None);
1284        let request = provider.build_chat_request(
1285            vec![Message {
1286                role: "user".to_string(),
1287                content: Some("hello".to_string()),
1288                images: None,
1289                tool_calls: None,
1290                tool_name: None,
1291            }],
1292            "llama3",
1293            0.2,
1294            None,
1295        );
1296
1297        let json = serde_json::to_value(request).unwrap();
1298        let options = json.get("options").expect("options present");
1299        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.2)));
1300        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1301        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1302    }
1303
1304    #[test]
1305    fn build_chat_request_with_think_emits_explicit_options() {
1306        // Wire-shape snapshot: the JSON body of every Ollama /api/chat
1307        // request MUST carry an `options` object with all three keys
1308        // (`temperature`, `num_ctx`, `num_predict`) populated. Older
1309        // tests cover individual fields piecemeal; this one locks the
1310        // full shape so a future refactor can't silently drop a field.
1311        let provider = OllamaModelProvider::new("test", None, None);
1312        let request = provider.build_chat_request_with_think(
1313            vec![Message {
1314                role: "user".to_string(),
1315                content: Some("hello".to_string()),
1316                images: None,
1317                tool_calls: None,
1318                tool_name: None,
1319            }],
1320            "llama3",
1321            0.3,
1322            None,
1323            Some(true),
1324        );
1325
1326        let json = serde_json::to_value(request).unwrap();
1327        let options = json
1328            .get("options")
1329            .expect("options object missing from request body");
1330
1331        assert!(
1332            options.get("temperature").is_some(),
1333            "options.temperature must be present on every wire request"
1334        );
1335        assert!(
1336            options.get("num_ctx").is_some(),
1337            "options.num_ctx must be present on every wire request"
1338        );
1339        assert!(
1340            options.get("num_predict").is_some(),
1341            "options.num_predict must be present on every wire request"
1342        );
1343
1344        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.3)));
1345        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(8192)));
1346        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(2048)));
1347    }
1348
1349    #[test]
1350    fn request_includes_overridden_tuning() {
1351        let provider = OllamaModelProvider::new("test", None, None).with_tuning(OllamaTuning {
1352            num_ctx: 4096,
1353            num_predict: 1024,
1354            temperature_override: None,
1355        });
1356        let request = provider.build_chat_request(
1357            vec![Message {
1358                role: "user".to_string(),
1359                content: Some("hello".to_string()),
1360                images: None,
1361                tool_calls: None,
1362                tool_name: None,
1363            }],
1364            "llama3",
1365            0.5,
1366            None,
1367        );
1368
1369        let json = serde_json::to_value(request).unwrap();
1370        let options = json.get("options").expect("options present");
1371        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(4096)));
1372        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(1024)));
1373    }
1374
1375    #[test]
1376    fn temperature_override_replaces_per_call_temperature() {
1377        let provider = OllamaModelProvider::new("test", None, None).with_tuning(OllamaTuning {
1378            num_ctx: 8192,
1379            num_predict: 2048,
1380            temperature_override: Some(0.1),
1381        });
1382        let request = provider.build_chat_request(
1383            vec![Message {
1384                role: "user".to_string(),
1385                content: Some("hello".to_string()),
1386                images: None,
1387                tool_calls: None,
1388                tool_name: None,
1389            }],
1390            "llama3",
1391            0.9,
1392            None,
1393        );
1394
1395        let json = serde_json::to_value(request).unwrap();
1396        let options = json.get("options").expect("options present");
1397        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.1)));
1398    }
1399
1400    #[test]
1401    fn temperature_override_unset_passes_per_call_temperature() {
1402        let provider = OllamaModelProvider::new("test", None, None);
1403        let request = provider.build_chat_request(
1404            vec![Message {
1405                role: "user".to_string(),
1406                content: Some("hello".to_string()),
1407                images: None,
1408                tool_calls: None,
1409                tool_name: None,
1410            }],
1411            "llama3",
1412            0.42,
1413            None,
1414        );
1415
1416        let json = serde_json::to_value(request).unwrap();
1417        let options = json.get("options").expect("options present");
1418        assert_eq!(options.get("temperature"), Some(&serde_json::json!(0.42)));
1419    }
1420
1421    #[test]
1422    fn retry_path_carries_options() {
1423        // The think=true → retry-without-think path in `send_request` uses the
1424        // same `build_chat_request_with_think` builder for both attempts; verify
1425        // the builder produces identical option fields when only `think` differs.
1426        let provider = OllamaModelProvider::new_with_reasoning("test", None, None, Some(true))
1427            .with_tuning(OllamaTuning {
1428                num_ctx: 16384,
1429                num_predict: 4096,
1430                temperature_override: None,
1431            });
1432
1433        let messages = vec![Message {
1434            role: "user".to_string(),
1435            content: Some("hello".to_string()),
1436            images: None,
1437            tool_calls: None,
1438            tool_name: None,
1439        }];
1440
1441        let first = provider.build_chat_request_with_think(
1442            messages.clone(),
1443            "llama3",
1444            0.4,
1445            None,
1446            Some(true),
1447        );
1448        let retry = provider.build_chat_request_with_think(messages, "llama3", 0.4, None, None);
1449
1450        let first_json = serde_json::to_value(first).unwrap();
1451        let retry_json = serde_json::to_value(retry).unwrap();
1452        assert_eq!(
1453            first_json.get("options"),
1454            retry_json.get("options"),
1455            "retry must carry the same options as the first attempt"
1456        );
1457        assert_eq!(first_json.get("think"), Some(&serde_json::json!(true)));
1458        assert!(retry_json.get("think").is_none());
1459        let options = first_json.get("options").unwrap();
1460        assert_eq!(options.get("num_ctx"), Some(&serde_json::json!(16384)));
1461        assert_eq!(options.get("num_predict"), Some(&serde_json::json!(4096)));
1462    }
1463
1464    #[test]
1465    fn response_deserializes() {
1466        let json = r#"{"message":{"role":"assistant","content":"Hello from Ollama!"}}"#;
1467        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1468        assert_eq!(resp.message.content, "Hello from Ollama!");
1469    }
1470
1471    #[test]
1472    fn response_with_empty_content() {
1473        let json = r#"{"message":{"role":"assistant","content":""}}"#;
1474        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1475        assert!(resp.message.content.is_empty());
1476    }
1477
1478    #[test]
1479    fn normalize_response_text_rejects_whitespace_only_content() {
1480        assert_eq!(
1481            OllamaModelProvider::normalize_response_text("\n \t".to_string()),
1482            None
1483        );
1484        assert_eq!(
1485            OllamaModelProvider::normalize_response_text(" hello ".to_string()),
1486            Some("hello".to_string())
1487        );
1488    }
1489
1490    #[test]
1491    fn normalize_response_text_strips_think_tags() {
1492        assert_eq!(
1493            OllamaModelProvider::normalize_response_text(
1494                "<think>reasoning</think> hello".to_string()
1495            ),
1496            Some("hello".to_string())
1497        );
1498    }
1499
1500    #[test]
1501    fn normalize_response_text_rejects_think_only_content() {
1502        assert_eq!(
1503            OllamaModelProvider::normalize_response_text(
1504                "<think>only thinking here</think>".to_string()
1505            ),
1506            None
1507        );
1508    }
1509
1510    #[test]
1511    fn fallback_text_for_empty_content_without_thinking_is_generic() {
1512        let text = OllamaModelProvider::fallback_text_for_empty_content("qwen3-coder", None);
1513        assert!(text.contains("couldn't get a complete response from Ollama"));
1514    }
1515
1516    #[test]
1517    fn response_with_missing_content_defaults_to_empty() {
1518        let json = r#"{"message":{"role":"assistant"}}"#;
1519        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1520        assert!(resp.message.content.is_empty());
1521    }
1522
1523    #[test]
1524    fn response_with_thinking_field_extracts_content() {
1525        let json =
1526            r#"{"message":{"role":"assistant","content":"hello","thinking":"internal reasoning"}}"#;
1527        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1528        assert_eq!(resp.message.content, "hello");
1529    }
1530
1531    #[test]
1532    fn response_with_tool_calls_parses_correctly() {
1533        let json = r#"{"message":{"role":"assistant","content":"","tool_calls":[{"id":"call_123","function":{"name":"shell","arguments":{"command":"date"}}}]}}"#;
1534        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1535        assert!(resp.message.content.is_empty());
1536        assert_eq!(resp.message.tool_calls.len(), 1);
1537        assert_eq!(resp.message.tool_calls[0].function.name, "shell");
1538    }
1539
1540    #[test]
1541    fn extract_tool_name_handles_nested_tool_call() {
1542        let model_provider = OllamaModelProvider::new("test", None, None);
1543        let tc = OllamaToolCall {
1544            id: Some("call_123".into()),
1545            function: OllamaFunction {
1546                name: "tool_call".into(),
1547                arguments: serde_json::json!({
1548                    "name": "shell",
1549                    "arguments": {"command": "date"}
1550                }),
1551            },
1552        };
1553        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1554        assert_eq!(name, "shell");
1555        assert_eq!(args.get("command").unwrap(), "date");
1556    }
1557
1558    #[test]
1559    fn extract_tool_name_handles_prefixed_name() {
1560        let model_provider = OllamaModelProvider::new("test", None, None);
1561        let tc = OllamaToolCall {
1562            id: Some("call_123".into()),
1563            function: OllamaFunction {
1564                name: "tool.shell".into(),
1565                arguments: serde_json::json!({"command": "ls"}),
1566            },
1567        };
1568        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1569        assert_eq!(name, "shell");
1570        assert_eq!(args.get("command").unwrap(), "ls");
1571    }
1572
1573    #[test]
1574    fn extract_tool_name_handles_normal_call() {
1575        let model_provider = OllamaModelProvider::new("test", None, None);
1576        let tc = OllamaToolCall {
1577            id: Some("call_123".into()),
1578            function: OllamaFunction {
1579                name: "file_read".into(),
1580                arguments: serde_json::json!({"path": "/tmp/test"}),
1581            },
1582        };
1583        let (name, args) = model_provider.extract_tool_name_and_args(&tc);
1584        assert_eq!(name, "file_read");
1585        assert_eq!(args.get("path").unwrap(), "/tmp/test");
1586    }
1587
1588    #[test]
1589    fn format_tool_calls_produces_valid_json() {
1590        let model_provider = OllamaModelProvider::new("test", None, None);
1591        let tool_calls = vec![OllamaToolCall {
1592            id: Some("call_abc".into()),
1593            function: OllamaFunction {
1594                name: "shell".into(),
1595                arguments: serde_json::json!({"command": "date"}),
1596            },
1597        }];
1598
1599        let formatted = model_provider.format_tool_calls_for_loop(&tool_calls);
1600        let parsed: serde_json::Value = serde_json::from_str(&formatted).unwrap();
1601
1602        assert!(parsed.get("tool_calls").is_some());
1603        let calls = parsed.get("tool_calls").unwrap().as_array().unwrap();
1604        assert_eq!(calls.len(), 1);
1605
1606        let func = calls[0].get("function").unwrap();
1607        assert_eq!(func.get("name").unwrap(), "shell");
1608        // arguments should be a string (JSON-encoded)
1609        assert!(func.get("arguments").unwrap().is_string());
1610    }
1611
1612    #[test]
1613    fn convert_messages_parses_native_assistant_tool_calls() {
1614        let model_provider = OllamaModelProvider::new("test", None, None);
1615        let messages = vec![ChatMessage {
1616            role: "assistant".into(),
1617            content: r#"{"content":null,"tool_calls":[{"id":"call_1","name":"shell","arguments":"{\"command\":\"ls\"}"}]}"#.into(),
1618        }];
1619
1620        let converted = model_provider.convert_messages(&messages);
1621
1622        assert_eq!(converted.len(), 1);
1623        assert_eq!(converted[0].role, "assistant");
1624        assert!(converted[0].content.is_none());
1625        let calls = converted[0]
1626            .tool_calls
1627            .as_ref()
1628            .expect("tool calls expected");
1629        assert_eq!(calls.len(), 1);
1630        assert_eq!(calls[0].kind, "function");
1631        assert_eq!(calls[0].function.name, "shell");
1632        assert_eq!(calls[0].function.arguments.get("command").unwrap(), "ls");
1633    }
1634
1635    #[test]
1636    fn convert_messages_maps_tool_result_call_id_to_tool_name() {
1637        let model_provider = OllamaModelProvider::new("test", None, None);
1638        let messages = vec![
1639            ChatMessage {
1640                role: "assistant".into(),
1641                content: r#"{"content":null,"tool_calls":[{"id":"call_7","name":"file_read","arguments":"{\"path\":\"README.md\"}"}]}"#.into(),
1642            },
1643            ChatMessage {
1644                role: "tool".into(),
1645                content: r#"{"tool_call_id":"call_7","content":"ok"}"#.into(),
1646            },
1647        ];
1648
1649        let converted = model_provider.convert_messages(&messages);
1650
1651        assert_eq!(converted.len(), 2);
1652        assert_eq!(converted[1].role, "tool");
1653        assert_eq!(converted[1].tool_name.as_deref(), Some("file_read"));
1654        assert_eq!(converted[1].content.as_deref(), Some("ok"));
1655        assert!(converted[1].tool_calls.is_none());
1656    }
1657
1658    #[test]
1659    fn convert_messages_extracts_images_from_user_marker() {
1660        let model_provider = OllamaModelProvider::new("test", None, None);
1661        let messages = vec![ChatMessage {
1662            role: "user".into(),
1663            content: "Inspect this screenshot [IMAGE:data:image/png;base64,abcd==]".into(),
1664        }];
1665
1666        let converted = model_provider.convert_messages(&messages);
1667        assert_eq!(converted.len(), 1);
1668        assert_eq!(converted[0].role, "user");
1669        assert_eq!(
1670            converted[0].content.as_deref(),
1671            Some("Inspect this screenshot")
1672        );
1673        let images = converted[0]
1674            .images
1675            .as_ref()
1676            .expect("images should be present");
1677        assert_eq!(images, &vec!["abcd==".to_string()]);
1678    }
1679
1680    #[test]
1681    fn capabilities_disable_native_tools_and_enable_vision() {
1682        let model_provider = OllamaModelProvider::new("test", None, None);
1683        let caps = <OllamaModelProvider as ModelProvider>::capabilities(&model_provider);
1684        assert!(
1685            !caps.native_tool_calling,
1686            "Ollama should default to prompt-guided tool calling"
1687        );
1688        assert!(caps.vision);
1689    }
1690
1691    #[test]
1692    fn api_response_parses_eval_counts() {
1693        let json = r#"{
1694            "message": {"content": "Hello", "tool_calls": []},
1695            "prompt_eval_count": 50,
1696            "eval_count": 25
1697        }"#;
1698        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1699        assert_eq!(resp.prompt_eval_count, Some(50));
1700        assert_eq!(resp.eval_count, Some(25));
1701    }
1702
1703    #[test]
1704    fn api_response_parses_without_eval_counts() {
1705        let json = r#"{"message": {"content": "Hello", "tool_calls": []}}"#;
1706        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1707        assert!(resp.prompt_eval_count.is_none());
1708        assert!(resp.eval_count.is_none());
1709    }
1710
1711    // ═══════════════════════════════════════════════════════════════════════
1712    // <think> tag stripping tests
1713    // ═══════════════════════════════════════════════════════════════════════
1714
1715    #[test]
1716    fn strip_think_tags_removes_single_block() {
1717        let input = "<think>internal reasoning</think>Hello world";
1718        assert_eq!(OllamaModelProvider::strip_think_tags(input), "Hello world");
1719    }
1720
1721    #[test]
1722    fn strip_think_tags_removes_multiple_blocks() {
1723        let input = "<think>first</think>A<think>second</think>B";
1724        assert_eq!(OllamaModelProvider::strip_think_tags(input), "AB");
1725    }
1726
1727    #[test]
1728    fn strip_think_tags_handles_unclosed_block() {
1729        let input = "visible<think>hidden tail";
1730        assert_eq!(OllamaModelProvider::strip_think_tags(input), "visible");
1731    }
1732
1733    #[test]
1734    fn strip_think_tags_preserves_text_without_tags() {
1735        let input = "plain text response";
1736        assert_eq!(
1737            OllamaModelProvider::strip_think_tags(input),
1738            "plain text response"
1739        );
1740    }
1741
1742    #[test]
1743    fn strip_think_tags_returns_empty_for_think_only() {
1744        let input = "<think>only thinking</think>";
1745        assert_eq!(OllamaModelProvider::strip_think_tags(input), "");
1746    }
1747
1748    // ═══════════════════════════════════════════════════════════════════════
1749    // effective_content tests
1750    // ═══════════════════════════════════════════════════════════════════════
1751
1752    #[test]
1753    fn effective_content_strips_think_and_returns_rest() {
1754        let result = OllamaModelProvider::effective_content(
1755            "<think>reasoning</think>\n<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}</tool_call>",
1756            None,
1757        );
1758        assert!(result.is_some());
1759        let text = result.unwrap();
1760        assert!(text.contains("<tool_call>"));
1761        assert!(!text.contains("<think>"));
1762    }
1763
1764    #[test]
1765    fn effective_content_falls_back_to_thinking_field() {
1766        let result = OllamaModelProvider::effective_content(
1767            "",
1768            Some(
1769                "<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}</tool_call>",
1770            ),
1771        );
1772        assert!(result.is_some());
1773        assert!(result.unwrap().contains("<tool_call>"));
1774    }
1775
1776    #[test]
1777    fn effective_content_returns_none_when_both_empty() {
1778        assert!(OllamaModelProvider::effective_content("", None).is_none());
1779        assert!(OllamaModelProvider::effective_content("", Some("")).is_none());
1780        assert!(
1781            OllamaModelProvider::effective_content(
1782                "<think>only thinking</think>",
1783                Some("<think>also only thinking</think>")
1784            )
1785            .is_none()
1786        );
1787    }
1788
1789    #[test]
1790    fn effective_content_prefers_content_over_thinking() {
1791        let result = OllamaModelProvider::effective_content("content text", Some("thinking text"));
1792        assert_eq!(result, Some("content text".to_string()));
1793    }
1794
1795    #[test]
1796    fn effective_content_uses_thinking_when_content_is_think_only() {
1797        let result = OllamaModelProvider::effective_content(
1798            "<think>just reasoning</think>",
1799            Some("actual useful text from thinking field"),
1800        );
1801        assert_eq!(
1802            result,
1803            Some("actual useful text from thinking field".to_string())
1804        );
1805    }
1806
1807    // ═══════════════════════════════════════════════════════════════════════
1808    // Qwen tool-call regression scenario tests
1809    // ═══════════════════════════════════════════════════════════════════════
1810
1811    #[test]
1812    fn qwen_think_with_tool_call_in_content_preserved() {
1813        // Qwen produces <think> tags followed by <tool_call> in content,
1814        // with no structured tool_calls. The <tool_call> tags must survive
1815        // for downstream parse_tool_calls to extract them.
1816        let content = "<think>I should list files</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
1817        let result = OllamaModelProvider::effective_content(content, None);
1818        assert!(result.is_some());
1819        let text = result.unwrap();
1820        assert!(text.contains("<tool_call>"));
1821        assert!(text.contains("shell"));
1822        assert!(!text.contains("<think>"));
1823    }
1824
1825    #[test]
1826    fn qwen_thinking_field_with_tool_call_xml_extracted() {
1827        // When think=true, Ollama separates thinking, but Qwen may put tool
1828        // call XML in the thinking field with empty content.
1829        let content = "";
1830        let thinking = "I need to check the date\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>";
1831        let result = OllamaModelProvider::effective_content(content, Some(thinking));
1832        assert!(result.is_some());
1833        let text = result.unwrap();
1834        assert!(text.contains("<tool_call>"));
1835        assert!(text.contains("date"));
1836    }
1837}