Skip to main content

zeroclaw_api/
model_provider.rs

1use crate::tool::ToolSpec;
2use async_trait::async_trait;
3use futures_util::{StreamExt, stream};
4use serde::{Deserialize, Serialize};
5use std::fmt::Write;
6use std::sync::Arc;
7
8pub const MAX_BUDGET_TOKENS: u32 = 128_000;
9/// Anthropic's documented minimum for extended-thinking `budget_tokens`.
10/// Requests below this are rejected with 400 by the provider; clamping at
11/// resolution time gives a clearer error site than the first API call.
12pub const MIN_BUDGET_TOKENS: u32 = 1_024;
13
14/// Parameters for native extended thinking support.
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16pub struct NativeThinkingParams {
17    pub budget_tokens: u32,
18}
19
20/// A single message in a conversation.
21#[derive(Debug, Clone, Serialize, Deserialize)]
22pub struct ChatMessage {
23    pub role: String,
24    pub content: String,
25}
26
27impl ChatMessage {
28    pub fn system(content: impl Into<String>) -> Self {
29        Self {
30            role: "system".into(),
31            content: content.into(),
32        }
33    }
34
35    pub fn user(content: impl Into<String>) -> Self {
36        Self {
37            role: "user".into(),
38            content: content.into(),
39        }
40    }
41
42    pub fn assistant(content: impl Into<String>) -> Self {
43        Self {
44            role: "assistant".into(),
45            content: content.into(),
46        }
47    }
48
49    pub fn tool(content: impl Into<String>) -> Self {
50        Self {
51            role: "tool".into(),
52            content: content.into(),
53        }
54    }
55}
56
57/// A tool call requested by the LLM.
58#[derive(Debug, Clone, Serialize, Deserialize)]
59pub struct ToolCall {
60    pub id: String,
61    pub name: String,
62    pub arguments: String,
63    /// ModelProvider-specific opaque extension fields that must round-trip
64    /// unchanged on follow-up turns (e.g. Gemini 3 `thoughtSignature`
65    /// carried as `extra_content.google.thought_signature`).
66    #[serde(default, skip_serializing_if = "Option::is_none")]
67    pub extra_content: Option<serde_json::Value>,
68}
69
70/// Raw token counts from a single LLM API response.
71///
72/// Contract: `input_tokens` is the **total prompt size** sent to the model
73/// (every token the model saw, regardless of cache state).
74/// `cached_input_tokens` is the **subset** of `input_tokens` that was served
75/// from the prompt cache. So `cached_input_tokens <= input_tokens`, and the
76/// billable uncached portion is `input_tokens - cached_input_tokens`.
77///
78/// Providers normalize to this shape:
79/// - OpenAI/Compatible: `prompt_tokens` is already total, `cached_tokens` is
80///   already a subset — used directly.
81/// - Anthropic: the API reports three DISJOINT buckets per
82///   <https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching>:
83///   `total_input = cache_read_input_tokens + cache_creation_input_tokens + input_tokens`,
84///   where Anthropic's `input_tokens` is *only* the tokens after the last
85///   cache breakpoint. The adapter sums all three to produce the total here.
86///   `cached_input_tokens` is set to `cache_read_input_tokens` (the
87///   discount-billed subset).
88#[derive(Debug, Clone, Default)]
89pub struct TokenUsage {
90    /// Total prompt size: uncached + cached input tokens.
91    pub input_tokens: Option<u64>,
92    pub output_tokens: Option<u64>,
93    /// Subset of `input_tokens` that was served from the model_provider's
94    /// prompt cache (Anthropic `cache_read_input_tokens`,
95    /// OpenAI `prompt_tokens_details.cached_tokens`).
96    pub cached_input_tokens: Option<u64>,
97}
98
99/// An LLM response that may contain text, tool calls, or both.
100#[derive(Debug, Clone)]
101pub struct ChatResponse {
102    /// Text content of the response (may be empty if only tool calls).
103    pub text: Option<String>,
104    /// Tool calls requested by the LLM.
105    pub tool_calls: Vec<ToolCall>,
106    /// Token usage reported by the model_provider, if available.
107    pub usage: Option<TokenUsage>,
108    /// Raw reasoning/thinking content from thinking models (e.g. DeepSeek-R1,
109    /// Kimi K2.5, GLM-4.7). Preserved as an opaque pass-through so it can be
110    /// sent back in subsequent API requests — some model_providers reject tool-call
111    /// history that omits this field.
112    pub reasoning_content: Option<String>,
113}
114
115impl ChatResponse {
116    /// True when the LLM wants to invoke at least one tool.
117    pub fn has_tool_calls(&self) -> bool {
118        !self.tool_calls.is_empty()
119    }
120
121    /// Convenience: return text content or empty string.
122    pub fn text_or_empty(&self) -> &str {
123        self.text.as_deref().unwrap_or("")
124    }
125}
126
127/// Request payload for model_provider chat calls.
128#[derive(Debug, Clone, Copy)]
129pub struct ChatRequest<'a> {
130    pub messages: &'a [ChatMessage],
131    pub tools: Option<&'a [ToolSpec]>,
132    /// Native extended thinking parameters. When `Some`, providers that
133    /// support extended thinking should send a dedicated thinking budget
134    /// in the API request and force `temperature = 1.0`.
135    pub thinking: Option<NativeThinkingParams>,
136}
137
138/// A tool result to feed back to the LLM.
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct ToolResultMessage {
141    pub tool_call_id: String,
142    pub content: String,
143}
144
145/// A message in a multi-turn conversation, including tool interactions.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147#[serde(tag = "type", content = "data")]
148pub enum ConversationMessage {
149    /// Regular chat message (system, user, assistant).
150    Chat(ChatMessage),
151    /// Tool calls from the assistant (stored for history fidelity).
152    AssistantToolCalls {
153        text: Option<String>,
154        tool_calls: Vec<ToolCall>,
155        /// Raw reasoning content from thinking models, preserved for round-trip
156        /// fidelity with model_provider APIs that require it.
157        reasoning_content: Option<String>,
158    },
159    /// Results of tool executions, fed back to the LLM.
160    ToolResults(Vec<ToolResultMessage>),
161}
162
163/// A chunk of content from a streaming response.
164#[derive(Debug, Clone)]
165pub struct StreamChunk {
166    /// Text delta for this chunk.
167    pub delta: String,
168    /// Reasoning/thinking delta (chain-of-thought from thinking models).
169    pub reasoning: Option<String>,
170    /// Whether this is the final chunk.
171    pub is_final: bool,
172    /// Approximate token count for this chunk (estimated).
173    pub token_count: usize,
174}
175
176impl StreamChunk {
177    /// Create a new non-final chunk.
178    pub fn delta(text: impl Into<String>) -> Self {
179        Self {
180            delta: text.into(),
181            reasoning: None,
182            is_final: false,
183            token_count: 0,
184        }
185    }
186
187    /// Create a reasoning/thinking chunk.
188    pub fn reasoning(text: impl Into<String>) -> Self {
189        Self {
190            delta: String::new(),
191            reasoning: Some(text.into()),
192            is_final: false,
193            token_count: 0,
194        }
195    }
196
197    /// Create a final chunk.
198    pub fn final_chunk() -> Self {
199        Self {
200            delta: String::new(),
201            reasoning: None,
202            is_final: true,
203            token_count: 0,
204        }
205    }
206
207    /// Create an error chunk.
208    pub fn error(message: impl Into<String>) -> Self {
209        Self {
210            delta: message.into(),
211            reasoning: None,
212            is_final: true,
213            token_count: 0,
214        }
215    }
216
217    /// Estimate tokens (rough approximation: ~4 chars per token).
218    pub fn with_token_estimate(mut self) -> Self {
219        self.token_count = self.delta.len().div_ceil(4);
220        self
221    }
222}
223
224/// Structured events emitted by model_provider streaming APIs.
225///
226/// This extends plain text chunk streaming with explicit tool-call signals so
227/// agent loops can preserve native tool semantics without parsing payload text.
228#[derive(Debug, Clone)]
229pub enum StreamEvent {
230    /// Text delta from the assistant.
231    TextDelta(StreamChunk),
232    /// Structured tool call emitted during streaming.
233    ToolCall(ToolCall),
234    /// A tool call that was already executed by the model_provider (e.g. Claude Code proxy).
235    /// Emitted for observability only — not re-executed by the agent's dispatcher.
236    PreExecutedToolCall { name: String, args: String },
237    /// The result of a pre-executed tool call.
238    PreExecutedToolResult { name: String, output: String },
239    /// Token usage reported by the provider, typically just before [`StreamEvent::Final`].
240    /// Providers that do not surface usage in streaming responses simply omit this event.
241    Usage(TokenUsage),
242    /// Stream has completed.
243    Final,
244}
245
246impl StreamEvent {
247    pub fn from_chunk(chunk: StreamChunk) -> Self {
248        if chunk.is_final {
249            Self::Final
250        } else {
251            Self::TextDelta(chunk)
252        }
253    }
254}
255
256/// Options for streaming chat requests.
257#[derive(Debug, Clone, Copy, Default)]
258pub struct StreamOptions {
259    /// Whether to enable streaming (default: true).
260    pub enabled: bool,
261    /// Whether to include token counts in chunks.
262    pub count_tokens: bool,
263}
264
265impl StreamOptions {
266    /// Create new streaming options with enabled flag.
267    pub fn new(enabled: bool) -> Self {
268        Self {
269            enabled,
270            count_tokens: false,
271        }
272    }
273
274    /// Enable token counting.
275    pub fn with_token_count(mut self) -> Self {
276        self.count_tokens = true;
277        self
278    }
279}
280
281/// Result type for streaming operations.
282pub type StreamResult<T> = std::result::Result<T, StreamError>;
283
284/// Errors that can occur during streaming.
285#[derive(Debug, thiserror::Error)]
286pub enum StreamError {
287    #[error("HTTP error: {0}")]
288    Http(String),
289
290    #[error("JSON parse error: {0}")]
291    Json(serde_json::Error),
292
293    #[error("Invalid SSE format: {0}")]
294    InvalidSse(String),
295
296    #[error("ModelProvider error: {0}")]
297    ModelProvider(String),
298
299    #[error("IO error: {0}")]
300    Io(#[from] std::io::Error),
301}
302
303/// Structured error returned when a requested capability is not supported.
304#[derive(Debug, Clone, thiserror::Error)]
305#[error(
306    "provider_capability_error model_provider={model_provider} capability={capability} message={message}"
307)]
308pub struct ProviderCapabilityError {
309    pub model_provider: String,
310    pub capability: String,
311    pub message: String,
312}
313
314/// ModelProvider capabilities declaration.
315///
316/// Describes what features a model_provider supports, enabling intelligent
317/// adaptation of tool calling modes and request formatting.
318#[allow(clippy::struct_excessive_bools)]
319#[derive(Debug, Clone, Default, PartialEq, Eq)]
320pub struct ProviderCapabilities {
321    /// Whether the model_provider supports native tool calling via API primitives.
322    pub native_tool_calling: bool,
323    /// Whether the model_provider supports vision / image inputs.
324    pub vision: bool,
325    /// Whether the model_provider supports prompt caching.
326    pub prompt_caching: bool,
327    /// Whether the provider supports native extended thinking.
328    pub extended_thinking: bool,
329}
330
331/// ModelProvider-specific tool payload formats.
332#[derive(Debug, Clone)]
333pub enum ToolsPayload {
334    /// Gemini API format (functionDeclarations).
335    Gemini {
336        function_declarations: Vec<serde_json::Value>,
337    },
338    /// Anthropic Messages API format (tools with input_schema).
339    Anthropic { tools: Vec<serde_json::Value> },
340    /// OpenAI Chat Completions API format (tools with function).
341    OpenAI { tools: Vec<serde_json::Value> },
342    /// Prompt-guided fallback (tools injected as text in system prompt).
343    PromptGuided { instructions: String },
344}
345
346/// Industry-neutral sampling temperature. OpenAI, Gemini, OpenRouter, and
347/// most OpenAI-compatible endpoints document 0.7 as their typical default;
348/// Anthropic and Ollama override (1.0 and 0.0 respectively).
349pub const BASELINE_TEMPERATURE: f64 = 0.7;
350
351/// Output-token budget roomy enough for typical agent turns. Providers
352/// override per family where the model's own context window is the
353/// binding constraint.
354pub const BASELINE_MAX_TOKENS: u32 = 4096;
355
356/// HTTP timeout for cloud inference. Local model_providers (Ollama) override
357/// upward since CPU/GPU-bound inference runs slower than round-tripping to
358/// a hyperscaler.
359pub const BASELINE_TIMEOUT_SECS: u64 = 120;
360
361/// Wire protocol used when the model_provider doesn't declare one. Only OpenAI's
362/// Codex stack uses the "responses" protocol; everything else speaks the
363/// classic chat completions shape.
364pub const BASELINE_WIRE_API: &str = "chat_completions";
365
366/// Per-token pricing for a model. All values are per-token rates as strings
367/// expressed in USD per token — e.g. `"0.000005"` = $5.00 per 1M tokens.
368///
369/// Deserialized from the `pricing` object in OpenAI-compatible `/models`
370/// responses (Kilo Gateway, OpenRouter, etc.).
371#[derive(Debug, Clone, Deserialize, Serialize)]
372pub struct ModelPricing {
373    /// Input/prompt tokens per-token rate (USD per token, e.g. `"0.000005"` = $5/1M tokens).
374    #[serde(default, skip_serializing_if = "Option::is_none")]
375    pub prompt: Option<String>,
376    /// Output/completion tokens per-token rate (USD per token, e.g. `"0.000020"` = $20/1M tokens).
377    #[serde(default, skip_serializing_if = "Option::is_none")]
378    pub completion: Option<String>,
379    /// Cached input read rate — per-token charge for reading cached prompt data
380    /// (USD per token, e.g. `"0.000001"` = $1/1M tokens). Kilo Gateway specific.
381    #[serde(default, skip_serializing_if = "Option::is_none")]
382    pub input_cache_read: Option<String>,
383    /// Cached input write rate — per-token charge for writing prompt data to cache
384    /// (USD per token, e.g. `"0.000001"` = $1/1M tokens). Kilo Gateway specific.
385    #[serde(default, skip_serializing_if = "Option::is_none")]
386    pub input_cache_write: Option<String>,
387}
388
389/// Model info with optional pricing — returned by `list_models_with_pricing`.
390#[derive(Debug, Clone, Serialize)]
391pub struct ModelInfo {
392    pub id: String,
393    #[serde(skip_serializing_if = "Option::is_none")]
394    pub pricing: Option<ModelPricing>,
395}
396
397#[async_trait]
398pub trait ModelProvider: Send + Sync + crate::attribution::Attributable {
399    /// Query model_provider capabilities.
400    fn capabilities(&self) -> ProviderCapabilities {
401        ProviderCapabilities::default()
402    }
403
404    // ── ModelProvider-family defaults ────────────────────────────────────────────
405    // `temperature` is `Option<f64>` end-to-end on the wire. `None` from the
406    // caller means "do not send a `temperature` field"; serialization handles
407    // that via `#[serde(skip_serializing_if)]`. The `default_temperature()`
408    // method below documents the family's preferred default for non-wire uses
409    // (introspection, tests). It is NOT consulted to substitute a value for
410    // `None` in chat methods.
411
412    /// Family-preferred temperature default. Override per family. Documented
413    /// for introspection only; never use to convert `None` into a wire value.
414    fn default_temperature(&self) -> f64 {
415        BASELINE_TEMPERATURE
416    }
417
418    /// Max output tokens used when the caller / config doesn't set one.
419    fn default_max_tokens(&self) -> u32 {
420        BASELINE_MAX_TOKENS
421    }
422
423    /// HTTP timeout (seconds) used when the caller / config doesn't set one.
424    fn default_timeout_secs(&self) -> u64 {
425        BASELINE_TIMEOUT_SECS
426    }
427
428    /// Canonical public API endpoint, when there is one. Returned as a
429    /// string slice so model_provider impls can serve from `const &'static str`s
430    /// without allocations. `None` = model_provider has no universal endpoint
431    /// (local model_providers, auth-less CLIs, user-BYO endpoints).
432    fn default_base_url(&self) -> Option<&str> {
433        None
434    }
435
436    /// Wire protocol variant. Either `"responses"` (OpenAI Codex-style) or
437    /// `"chat_completions"` (everything else). Providers override to their
438    /// native format.
439    fn default_wire_api(&self) -> &str {
440        BASELINE_WIRE_API
441    }
442
443    /// Convert tool specifications to provider-native format.
444    fn convert_tools(&self, tools: &[ToolSpec]) -> ToolsPayload {
445        ToolsPayload::PromptGuided {
446            instructions: build_tool_instructions_text(tools),
447        }
448    }
449
450    /// Simple one-shot chat (single user message, no explicit system prompt).
451    ///
452    /// `temperature == None` means the field is omitted on the wire.
453    async fn simple_chat(
454        &self,
455        message: &str,
456        model: &str,
457        temperature: Option<f64>,
458    ) -> anyhow::Result<String> {
459        self.chat_with_system(None, message, model, temperature)
460            .await
461    }
462
463    /// One-shot chat with optional system prompt. See `simple_chat` for
464    /// the `temperature` contract.
465    async fn chat_with_system(
466        &self,
467        system_prompt: Option<&str>,
468        message: &str,
469        model: &str,
470        temperature: Option<f64>,
471    ) -> anyhow::Result<String>;
472
473    /// Fetch the list of available model IDs for this model_provider.
474    ///
475    /// Used by onboard to present a live model picker. Default bails with
476    /// "not supported"; concrete model_providers override to hit their own public
477    /// endpoint (OpenRouter, Ollama) or delegate to the shared models.dev
478    /// catalog (no auth required) in `zeroclaw_providers::models_dev`.
479    async fn list_models(&self) -> anyhow::Result<Vec<String>> {
480        anyhow::bail!("live model listing is not supported for this model_provider")
481    }
482
483    /// Fetch the list of available models with pricing data for this
484    /// model_provider. Default delegates to `list_models` and returns no
485    /// pricing. Concrete providers that receive pricing from their `/models`
486    /// endpoint override this to return enriched data.
487    async fn list_models_with_pricing(&self) -> anyhow::Result<Vec<ModelInfo>> {
488        Ok(self
489            .list_models()
490            .await?
491            .into_iter()
492            .map(|id| ModelInfo { id, pricing: None })
493            .collect())
494    }
495
496    /// Multi-turn conversation. See `simple_chat` for the `temperature`
497    /// contract.
498    async fn chat_with_history(
499        &self,
500        messages: &[ChatMessage],
501        model: &str,
502        temperature: Option<f64>,
503    ) -> anyhow::Result<String> {
504        let system = messages
505            .iter()
506            .find(|m| m.role == "system")
507            .map(|m| m.content.as_str());
508        let last_user = messages
509            .iter()
510            .rfind(|m| m.role == "user")
511            .map(|m| m.content.as_str())
512            .unwrap_or("");
513        self.chat_with_system(system, last_user, model, temperature)
514            .await
515    }
516
517    /// Structured chat API for agent loop callers. See `simple_chat` for
518    /// the `temperature` contract.
519    async fn chat(
520        &self,
521        request: ChatRequest<'_>,
522        model: &str,
523        temperature: Option<f64>,
524    ) -> anyhow::Result<ChatResponse> {
525        if let Some(tools) = request.tools
526            && !tools.is_empty()
527            && !self.supports_native_tools()
528        {
529            let tool_instructions = match self.convert_tools(tools) {
530                ToolsPayload::PromptGuided { instructions } => instructions,
531                payload => {
532                    anyhow::bail!(
533                        "ModelProvider returned non-prompt-guided tools payload ({payload:?}) while supports_native_tools() is false"
534                    )
535                }
536            };
537            let mut modified_messages = request.messages.to_vec();
538
539            if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system")
540            {
541                if !system_message.content.is_empty() {
542                    system_message.content.push_str("\n\n");
543                }
544                system_message.content.push_str(&tool_instructions);
545            } else {
546                modified_messages.insert(0, ChatMessage::system(tool_instructions));
547            }
548
549            let text = self
550                .chat_with_history(&modified_messages, model, temperature)
551                .await?;
552            return Ok(ChatResponse {
553                text: Some(text),
554                tool_calls: Vec::new(),
555                usage: None,
556                reasoning_content: None,
557            });
558        }
559
560        let text = self
561            .chat_with_history(request.messages, model, temperature)
562            .await?;
563        Ok(ChatResponse {
564            text: Some(text),
565            tool_calls: Vec::new(),
566            usage: None,
567            reasoning_content: None,
568        })
569    }
570
571    /// Whether model_provider supports native tool calls over API.
572    fn supports_native_tools(&self) -> bool {
573        self.capabilities().native_tool_calling
574    }
575
576    /// Whether model_provider supports multimodal vision input.
577    fn supports_vision(&self) -> bool {
578        self.capabilities().vision
579    }
580
581    /// Warm up the HTTP connection pool.
582    async fn warmup(&self) -> anyhow::Result<()> {
583        Ok(())
584    }
585
586    /// Chat with tool definitions for native function calling support.
587    /// See `simple_chat` for the `temperature` contract.
588    async fn chat_with_tools(
589        &self,
590        messages: &[ChatMessage],
591        _tools: &[serde_json::Value],
592        model: &str,
593        temperature: Option<f64>,
594    ) -> anyhow::Result<ChatResponse> {
595        let text = self.chat_with_history(messages, model, temperature).await?;
596        Ok(ChatResponse {
597            text: Some(text),
598            tool_calls: Vec::new(),
599            usage: None,
600            reasoning_content: None,
601        })
602    }
603
604    /// Whether model_provider supports streaming responses.
605    fn supports_streaming(&self) -> bool {
606        false
607    }
608
609    /// Whether model_provider can emit structured tool-call stream events.
610    fn supports_streaming_tool_events(&self) -> bool {
611        false
612    }
613
614    /// Streaming chat with optional system prompt. See `simple_chat` for
615    /// the `temperature` contract.
616    fn stream_chat_with_system(
617        &self,
618        _system_prompt: Option<&str>,
619        _message: &str,
620        _model: &str,
621        _temperature: Option<f64>,
622        _options: StreamOptions,
623    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
624        stream::empty().boxed()
625    }
626
627    /// Streaming chat with history. See `simple_chat` for the `temperature`
628    /// contract.
629    fn stream_chat_with_history(
630        &self,
631        messages: &[ChatMessage],
632        model: &str,
633        temperature: Option<f64>,
634        options: StreamOptions,
635    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
636        let system = messages
637            .iter()
638            .find(|m| m.role == "system")
639            .map(|m| m.content.as_str());
640        let last_user = messages
641            .iter()
642            .rfind(|m| m.role == "user")
643            .map(|m| m.content.as_str())
644            .unwrap_or("");
645        self.stream_chat_with_system(system, last_user, model, temperature, options)
646    }
647
648    /// Structured streaming chat interface. See `simple_chat` for the
649    /// `temperature` contract.
650    fn stream_chat(
651        &self,
652        request: ChatRequest<'_>,
653        model: &str,
654        temperature: Option<f64>,
655        options: StreamOptions,
656    ) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
657        self.stream_chat_with_history(request.messages, model, temperature, options)
658            .map(|chunk_result| chunk_result.map(StreamEvent::from_chunk))
659            .boxed()
660    }
661}
662
663/// Blanket implementation: `Arc<T>` delegates all `ModelProvider` methods to `T`.
664///
665/// This eliminates the need for manual `impl ModelProvider for Arc<MyModelProvider>`
666/// boilerplate in test and production code.
667#[async_trait]
668impl<T: ModelProvider + ?Sized> ModelProvider for Arc<T> {
669    fn capabilities(&self) -> ProviderCapabilities {
670        self.as_ref().capabilities()
671    }
672
673    fn default_max_tokens(&self) -> u32 {
674        self.as_ref().default_max_tokens()
675    }
676
677    fn default_temperature(&self) -> f64 {
678        self.as_ref().default_temperature()
679    }
680
681    fn default_timeout_secs(&self) -> u64 {
682        self.as_ref().default_timeout_secs()
683    }
684
685    fn default_base_url(&self) -> Option<&str> {
686        self.as_ref().default_base_url()
687    }
688
689    fn default_wire_api(&self) -> &str {
690        self.as_ref().default_wire_api()
691    }
692
693    fn convert_tools(&self, tools: &[ToolSpec]) -> ToolsPayload {
694        self.as_ref().convert_tools(tools)
695    }
696
697    fn supports_native_tools(&self) -> bool {
698        self.as_ref().supports_native_tools()
699    }
700
701    fn supports_vision(&self) -> bool {
702        self.as_ref().supports_vision()
703    }
704
705    async fn chat_with_system(
706        &self,
707        system_prompt: Option<&str>,
708        message: &str,
709        model: &str,
710        temperature: Option<f64>,
711    ) -> anyhow::Result<String> {
712        self.as_ref()
713            .chat_with_system(system_prompt, message, model, temperature)
714            .await
715    }
716
717    async fn chat_with_history(
718        &self,
719        messages: &[ChatMessage],
720        model: &str,
721        temperature: Option<f64>,
722    ) -> anyhow::Result<String> {
723        self.as_ref()
724            .chat_with_history(messages, model, temperature)
725            .await
726    }
727
728    async fn chat(
729        &self,
730        request: ChatRequest<'_>,
731        model: &str,
732        temperature: Option<f64>,
733    ) -> anyhow::Result<ChatResponse> {
734        self.as_ref().chat(request, model, temperature).await
735    }
736
737    async fn warmup(&self) -> anyhow::Result<()> {
738        self.as_ref().warmup().await
739    }
740
741    async fn chat_with_tools(
742        &self,
743        messages: &[ChatMessage],
744        tools: &[serde_json::Value],
745        model: &str,
746        temperature: Option<f64>,
747    ) -> anyhow::Result<ChatResponse> {
748        self.as_ref()
749            .chat_with_tools(messages, tools, model, temperature)
750            .await
751    }
752
753    fn supports_streaming(&self) -> bool {
754        self.as_ref().supports_streaming()
755    }
756
757    fn supports_streaming_tool_events(&self) -> bool {
758        self.as_ref().supports_streaming_tool_events()
759    }
760
761    fn stream_chat_with_system(
762        &self,
763        system_prompt: Option<&str>,
764        message: &str,
765        model: &str,
766        temperature: Option<f64>,
767        options: StreamOptions,
768    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
769        self.as_ref()
770            .stream_chat_with_system(system_prompt, message, model, temperature, options)
771    }
772
773    fn stream_chat_with_history(
774        &self,
775        messages: &[ChatMessage],
776        model: &str,
777        temperature: Option<f64>,
778        options: StreamOptions,
779    ) -> stream::BoxStream<'static, StreamResult<StreamChunk>> {
780        self.as_ref()
781            .stream_chat_with_history(messages, model, temperature, options)
782    }
783
784    fn stream_chat(
785        &self,
786        request: ChatRequest<'_>,
787        model: &str,
788        temperature: Option<f64>,
789        options: StreamOptions,
790    ) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
791        self.as_ref()
792            .stream_chat(request, model, temperature, options)
793    }
794}
795
796/// Build tool instructions text for prompt-guided tool calling.
797pub fn build_tool_instructions_text(tools: &[ToolSpec]) -> String {
798    let mut instructions = String::new();
799
800    instructions.push_str("## Tool Use Protocol\n\n");
801    instructions.push_str("To use a tool, wrap a JSON object in <tool_call></tool_call> tags:\n\n");
802    instructions.push_str("<tool_call>\n");
803    instructions.push_str(r#"{"name": "tool_name", "arguments": {"param": "value"}}"#);
804    instructions.push_str("\n</tool_call>\n\n");
805    instructions.push_str("You may use multiple tool calls in a single response. ");
806    instructions.push_str("After tool execution, results appear in <tool_result> tags. ");
807    instructions
808        .push_str("Continue reasoning with the results until you can give a final answer.\n\n");
809    instructions.push_str("### Available Tools\n\n");
810
811    for tool in tools {
812        writeln!(&mut instructions, "**{}**: {}", tool.name, tool.description)
813            .expect("writing to String cannot fail");
814
815        let parameters =
816            serde_json::to_string(&tool.parameters).unwrap_or_else(|_| "{}".to_string());
817        writeln!(&mut instructions, "Parameters: `{parameters}`")
818            .expect("writing to String cannot fail");
819        instructions.push('\n');
820    }
821
822    instructions
823}