Skip to main content

zeroclaw_providers/
anthropic.rs

1use crate::traits::{
2    ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse,
3    ModelProvider, ProviderCapabilities, StreamChunk, StreamError, StreamEvent, StreamOptions,
4    StreamResult, TokenUsage, ToolCall as ProviderToolCall,
5};
6use async_trait::async_trait;
7use base64::Engine as _;
8use futures_util::stream::{self, StreamExt};
9use reqwest::Client;
10use serde::{Deserialize, Serialize};
11use zeroclaw_api::tool::ToolSpec;
12
13/// Anthropic's API documentation lists 1.0 as the default sampling temperature.
14const TEMPERATURE_DEFAULT: f64 = 1.0;
15/// Anthropic's public API endpoint. Overrideable via `model_providers.<name>.base_url`.
16pub(crate) const BASE_URL: &str = "https://api.anthropic.com";
17
18pub struct AnthropicModelProvider {
19    /// `[model_providers.anthropic.<alias>]` config-key alias.
20    alias: String,
21    credential: Option<String>,
22    base_url: String,
23    max_tokens: u32,
24}
25
26#[cfg(test)]
27#[derive(Debug, Serialize)]
28struct ChatRequest {
29    model: String,
30    max_tokens: u32,
31    #[serde(skip_serializing_if = "Option::is_none")]
32    system: Option<String>,
33    messages: Vec<Message>,
34    temperature: f64,
35}
36
37#[cfg(test)]
38#[derive(Debug, Serialize)]
39struct Message {
40    role: String,
41    content: String,
42}
43
44#[cfg(test)]
45#[derive(Debug, Deserialize)]
46struct ChatResponse {
47    content: Vec<ContentBlock>,
48}
49
50#[cfg(test)]
51#[derive(Debug, Deserialize)]
52struct ContentBlock {
53    #[serde(rename = "type")]
54    kind: String,
55    #[serde(default)]
56    text: Option<String>,
57}
58
59#[derive(Debug, Serialize)]
60struct NativeChatRequest<'a> {
61    model: String,
62    max_tokens: u32,
63    #[serde(skip_serializing_if = "Option::is_none")]
64    system: Option<SystemPrompt>,
65    messages: Vec<NativeMessage>,
66    #[serde(skip_serializing_if = "Option::is_none")]
67    temperature: Option<f64>,
68    #[serde(skip_serializing_if = "Option::is_none")]
69    tools: Option<Vec<NativeToolSpec<'a>>>,
70    #[serde(skip_serializing_if = "Option::is_none")]
71    tool_choice: Option<serde_json::Value>,
72    #[serde(skip_serializing_if = "Option::is_none")]
73    stream: Option<bool>,
74    #[serde(skip_serializing_if = "Option::is_none")]
75    thinking: Option<NativeThinkingConfig>,
76}
77
78#[derive(Debug, Serialize)]
79struct NativeThinkingConfig {
80    #[serde(rename = "type")]
81    kind: &'static str,
82    budget_tokens: u32,
83}
84
85/// Claude opus-4-7 rejects `temperature` with a 400 on the native Anthropic API,
86/// matching the Bedrock behavior fixed in #6144. Omit `temperature` for the
87/// opus-4-7 family so that confirmed #6147 requests use the model default.
88/// Substring match covers any future inference-profile or version-suffix
89/// variants.
90fn anthropic_model_omits_temperature(model: &str) -> bool {
91    model.contains("claude-opus-4-7")
92}
93
94/// Whether a model accepts the fixed-budget native-thinking request shape
95/// (`{"thinking": {"type": "enabled", "budget_tokens": N}}`). Opus 4.7 supports
96/// only adaptive thinking and rejects fixed budgets with a 400; until adaptive
97/// thinking is implemented, those models stay on prompt-based reasoning.
98/// Anthropic's extended-thinking docs:
99/// <https://platform.claude.com/docs/en/build-with-claude/extended-thinking>
100fn anthropic_model_supports_native_thinking(model: &str) -> bool {
101    !model.contains("claude-opus-4-7")
102}
103
104#[derive(Debug, Serialize)]
105struct NativeMessage {
106    role: String,
107    content: Vec<NativeContentOut>,
108}
109
110#[derive(Debug, Serialize)]
111struct ImageSource {
112    #[serde(rename = "type")]
113    source_type: String,
114    media_type: String,
115    data: String,
116}
117
118#[derive(Debug, Serialize)]
119#[serde(tag = "type")]
120enum NativeContentOut {
121    #[serde(rename = "text")]
122    Text {
123        text: String,
124        #[serde(skip_serializing_if = "Option::is_none")]
125        cache_control: Option<CacheControl>,
126    },
127    #[serde(rename = "image")]
128    Image { source: ImageSource },
129    #[serde(rename = "tool_use")]
130    ToolUse {
131        id: String,
132        name: String,
133        input: serde_json::Value,
134        #[serde(skip_serializing_if = "Option::is_none")]
135        cache_control: Option<CacheControl>,
136    },
137    #[serde(rename = "tool_result")]
138    ToolResult {
139        tool_use_id: String,
140        content: String,
141        #[serde(skip_serializing_if = "Option::is_none")]
142        cache_control: Option<CacheControl>,
143    },
144    /// Thinking block for round-tripping extended thinking in conversation
145    /// history. Required when thinking is enabled and assistant messages
146    /// contain tool_use blocks.
147    #[serde(rename = "thinking")]
148    Thinking {
149        thinking: String,
150        #[serde(skip_serializing_if = "Option::is_none")]
151        signature: Option<String>,
152    },
153}
154
155#[derive(Debug, Serialize)]
156struct NativeToolSpec<'a> {
157    name: &'a str,
158    description: &'a str,
159    input_schema: &'a serde_json::Value,
160    #[serde(skip_serializing_if = "Option::is_none")]
161    cache_control: Option<CacheControl>,
162}
163
164#[derive(Debug, Clone, Serialize)]
165struct CacheControl {
166    #[serde(rename = "type")]
167    cache_type: String,
168}
169
170impl CacheControl {
171    fn ephemeral() -> Self {
172        Self {
173            cache_type: "ephemeral".to_string(),
174        }
175    }
176}
177
178#[derive(Debug, Serialize)]
179#[serde(untagged)]
180enum SystemPrompt {
181    String(String),
182    Blocks(Vec<SystemBlock>),
183}
184
185#[derive(Debug, Serialize)]
186struct SystemBlock {
187    #[serde(rename = "type")]
188    block_type: String,
189    text: String,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    cache_control: Option<CacheControl>,
192}
193
194#[derive(Debug, Deserialize)]
195struct NativeChatResponse {
196    #[serde(default)]
197    content: Vec<NativeContentIn>,
198    #[serde(default)]
199    usage: Option<AnthropicUsage>,
200}
201
202#[derive(Debug, Deserialize)]
203struct AnthropicUsage {
204    #[serde(default)]
205    input_tokens: Option<u64>,
206    #[serde(default)]
207    output_tokens: Option<u64>,
208    #[serde(default)]
209    cache_read_input_tokens: Option<u64>,
210}
211
212#[derive(Debug, Deserialize)]
213struct NativeContentIn {
214    #[serde(rename = "type")]
215    kind: String,
216    #[serde(default)]
217    text: Option<String>,
218    #[serde(default)]
219    thinking: Option<String>,
220    /// Signature for integrity verification of thinking blocks.
221    #[serde(default)]
222    signature: Option<String>,
223    #[serde(default)]
224    id: Option<String>,
225    #[serde(default)]
226    name: Option<String>,
227    #[serde(default)]
228    input: Option<serde_json::Value>,
229}
230
231impl AnthropicModelProvider {
232    pub fn new(alias: &str, credential: Option<&str>) -> Self {
233        Self::with_base_url(alias, credential, None)
234    }
235
236    pub fn with_base_url(alias: &str, credential: Option<&str>, base_url: Option<&str>) -> Self {
237        let base_url = base_url
238            .map(|u| u.trim_end_matches('/'))
239            .unwrap_or(BASE_URL)
240            .to_string();
241        Self {
242            alias: alias.to_string(),
243            credential: credential
244                .map(str::trim)
245                .filter(|k| !k.is_empty())
246                .map(ToString::to_string),
247            base_url,
248            max_tokens: zeroclaw_api::model_provider::BASELINE_MAX_TOKENS,
249        }
250    }
251
252    /// Override the maximum output tokens for API requests.
253    pub fn with_max_tokens(mut self, max_tokens: u32) -> Self {
254        self.max_tokens = max_tokens;
255        self
256    }
257
258    fn is_setup_token(token: &str) -> bool {
259        token.starts_with("sk-ant-oat01-")
260    }
261
262    fn apply_auth(
263        &self,
264        request: reqwest::RequestBuilder,
265        credential: &str,
266    ) -> reqwest::RequestBuilder {
267        let is_setup = Self::is_setup_token(credential);
268        // Diagnostic for "401 invalid x-api-key" mysteries: when a provider
269        // is sending a credential the upstream rejects, this is the only
270        // line that nails what bytes actually went out. Logs header kind,
271        // length, first 8 chars (enough to identify api03 vs oat01 vs an
272        // accidental enc2: blob) and last 4 (smudge for tail integrity).
273        // No full credential — that stays out of logs.
274        let len = credential.len();
275        let head: String = credential.chars().take(8).collect();
276        let tail: String = credential
277            .chars()
278            .rev()
279            .take(4)
280            .collect::<String>()
281            .chars()
282            .rev()
283            .collect();
284        ::zeroclaw_log::record!(DEBUG, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(::serde_json::json!({"header": if is_setup { "Authorization" } else { "x-api-key" }, "credential_len": len, "credential_head": head, "credential_tail": tail})), "Anthropic auth header applied");
285        if is_setup {
286            request
287                .header("Authorization", format!("Bearer {credential}"))
288                .header(
289                    "anthropic-beta",
290                    "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14",
291                )
292                .header("anthropic-dangerous-direct-browser-access", "true")
293        } else {
294            request.header("x-api-key", credential)
295        }
296    }
297
298    /// For OAuth tokens, Anthropic requires the system prompt to start with the
299    /// Claude Code identity prefix. This prepends it to any existing system prompt.
300    fn apply_oauth_system_prompt(system: Option<SystemPrompt>) -> Option<SystemPrompt> {
301        let prefix = SystemBlock {
302            block_type: "text".to_string(),
303            text: "You are Claude Code, Anthropic's official CLI for Claude.".to_string(),
304            cache_control: Some(CacheControl::ephemeral()),
305        };
306        match system {
307            Some(SystemPrompt::Blocks(mut blocks)) => {
308                blocks.insert(0, prefix);
309                Some(SystemPrompt::Blocks(blocks))
310            }
311            Some(SystemPrompt::String(s)) => Some(SystemPrompt::Blocks(vec![
312                prefix,
313                SystemBlock {
314                    block_type: "text".to_string(),
315                    text: s,
316                    cache_control: Some(CacheControl::ephemeral()),
317                },
318            ])),
319            None => Some(SystemPrompt::Blocks(vec![prefix])),
320        }
321    }
322
323    /// Cache conversations with more than 1 non-system message (i.e. after first exchange)
324    fn should_cache_conversation(messages: &[ChatMessage]) -> bool {
325        messages.iter().filter(|m| m.role != "system").count() > 1
326    }
327
328    /// Apply cache control to the last message content block
329    fn apply_cache_to_last_message(messages: &mut [NativeMessage]) {
330        if let Some(last_msg) = messages.last_mut()
331            && let Some(last_content) = last_msg.content.last_mut()
332        {
333            match last_content {
334                NativeContentOut::Text { cache_control, .. }
335                | NativeContentOut::ToolResult { cache_control, .. } => {
336                    *cache_control = Some(CacheControl::ephemeral());
337                }
338                NativeContentOut::ToolUse { .. }
339                | NativeContentOut::Image { .. }
340                | NativeContentOut::Thinking { .. } => {}
341            }
342        }
343    }
344
345    fn convert_tools<'a>(tools: Option<&'a [ToolSpec]>) -> Option<Vec<NativeToolSpec<'a>>> {
346        let items = tools?;
347        if items.is_empty() {
348            return None;
349        }
350        let mut native_tools: Vec<NativeToolSpec<'a>> = items
351            .iter()
352            .map(|tool| NativeToolSpec {
353                name: &tool.name,
354                description: &tool.description,
355                input_schema: &tool.parameters,
356                cache_control: None,
357            })
358            .collect();
359
360        // Cache the last tool definition (caches all tools)
361        if let Some(last_tool) = native_tools.last_mut() {
362            last_tool.cache_control = Some(CacheControl::ephemeral());
363        }
364
365        Some(native_tools)
366    }
367
368    fn parse_assistant_tool_call_message(content: &str) -> Option<Vec<NativeContentOut>> {
369        let value = serde_json::from_str::<serde_json::Value>(content).ok()?;
370        let tool_calls = value
371            .get("tool_calls")
372            .and_then(|v| serde_json::from_value::<Vec<ProviderToolCall>>(v.clone()).ok())?;
373
374        let mut blocks = Vec::new();
375
376        // When extended thinking is enabled, assistant messages must start
377        // with thinking blocks (including signatures) before any tool_use
378        // blocks. The reasoning_content field stores JSON-encoded thinking
379        // blocks from the original response.
380        if let Some(reasoning) = value
381            .get("reasoning_content")
382            .and_then(serde_json::Value::as_str)
383            .filter(|r| !r.is_empty())
384        {
385            for part in reasoning.split('\n') {
386                if let Ok(block) = serde_json::from_str::<serde_json::Value>(part) {
387                    let thinking = block
388                        .get("thinking")
389                        .and_then(|t| t.as_str())
390                        .unwrap_or("")
391                        .to_string();
392                    let signature = block
393                        .get("signature")
394                        .and_then(|s| s.as_str())
395                        .filter(|s| !s.is_empty())
396                        .map(|s| s.to_string());
397                    blocks.push(NativeContentOut::Thinking {
398                        thinking,
399                        signature,
400                    });
401                }
402            }
403        }
404
405        if let Some(text) = value
406            .get("content")
407            .and_then(serde_json::Value::as_str)
408            .map(str::trim)
409            .filter(|t| !t.is_empty())
410        {
411            blocks.push(NativeContentOut::Text {
412                text: text.to_string(),
413                cache_control: None,
414            });
415        }
416        for call in tool_calls {
417            let input = serde_json::from_str::<serde_json::Value>(&call.arguments)
418                .unwrap_or_else(|_| serde_json::Value::Object(serde_json::Map::new()));
419            blocks.push(NativeContentOut::ToolUse {
420                id: call.id,
421                name: call.name,
422                input,
423                cache_control: None,
424            });
425        }
426        Some(blocks)
427    }
428
429    fn parse_tool_result_message(content: &str) -> Option<NativeMessage> {
430        let value = serde_json::from_str::<serde_json::Value>(content).ok()?;
431        let tool_use_id = value
432            .get("tool_call_id")
433            .and_then(serde_json::Value::as_str)?
434            .to_string();
435        let result = value
436            .get("content")
437            .and_then(serde_json::Value::as_str)
438            .unwrap_or("")
439            .to_string();
440        Some(NativeMessage {
441            role: "user".to_string(),
442            content: vec![NativeContentOut::ToolResult {
443                tool_use_id,
444                content: result,
445                cache_control: None,
446            }],
447        })
448    }
449
450    fn convert_messages(messages: &[ChatMessage]) -> (Option<SystemPrompt>, Vec<NativeMessage>) {
451        let mut system_text = None;
452        let mut native_messages = Vec::new();
453
454        for msg in messages {
455            match msg.role.as_str() {
456                "system" => {
457                    if system_text.is_none() {
458                        system_text = Some(msg.content.clone());
459                    }
460                }
461                "assistant" => {
462                    if let Some(blocks) = Self::parse_assistant_tool_call_message(&msg.content) {
463                        native_messages.push(NativeMessage {
464                            role: "assistant".to_string(),
465                            content: blocks,
466                        });
467                    } else if !msg.content.trim().is_empty() {
468                        native_messages.push(NativeMessage {
469                            role: "assistant".to_string(),
470                            content: vec![NativeContentOut::Text {
471                                text: msg.content.clone(),
472                                cache_control: None,
473                            }],
474                        });
475                    }
476                }
477                "tool" => {
478                    let tool_msg = if let Some(tr) = Self::parse_tool_result_message(&msg.content) {
479                        tr
480                    } else if !msg.content.trim().is_empty() {
481                        NativeMessage {
482                            role: "user".to_string(),
483                            content: vec![NativeContentOut::Text {
484                                text: msg.content.clone(),
485                                cache_control: None,
486                            }],
487                        }
488                    } else {
489                        continue;
490                    };
491                    // Tool results map to role "user"; merge consecutive ones
492                    // into a single message so Anthropic doesn't reject the
493                    // request for having adjacent same-role messages.
494                    if native_messages
495                        .last()
496                        .is_some_and(|m| m.role == tool_msg.role)
497                    {
498                        native_messages
499                            .last_mut()
500                            .unwrap()
501                            .content
502                            .extend(tool_msg.content);
503                    } else {
504                        native_messages.push(tool_msg);
505                    }
506                }
507                _ => {
508                    // Parse image markers from user message content
509                    let (text, image_refs) = crate::multimodal::parse_image_markers(&msg.content);
510                    let mut content_blocks: Vec<NativeContentOut> = Vec::new();
511
512                    // Add image content blocks for each image reference
513                    for img_ref in &image_refs {
514                        let (media_type, data) = if img_ref.starts_with("data:") {
515                            // Data URI format: data:image/jpeg;base64,/9j/4AAQ...
516                            if let Some(comma) = img_ref.find(',') {
517                                let header = &img_ref[5..comma];
518                                let mime =
519                                    header.split(';').next().unwrap_or("image/jpeg").to_string();
520                                let b64 = img_ref[comma + 1..].trim().to_string();
521                                (mime, b64)
522                            } else {
523                                continue;
524                            }
525                        } else if std::path::Path::new(img_ref.trim()).exists() {
526                            // Local file path
527                            match std::fs::read(img_ref.trim()) {
528                                Ok(bytes) => {
529                                    let b64 =
530                                        base64::engine::general_purpose::STANDARD.encode(&bytes);
531                                    let ext = std::path::Path::new(img_ref.trim())
532                                        .extension()
533                                        .and_then(|e| e.to_str())
534                                        .unwrap_or("jpg");
535                                    let mime = match ext {
536                                        "png" => "image/png",
537                                        "gif" => "image/gif",
538                                        "webp" => "image/webp",
539                                        _ => "image/jpeg",
540                                    }
541                                    .to_string();
542                                    (mime, b64)
543                                }
544                                Err(_) => continue,
545                            }
546                        } else {
547                            continue;
548                        };
549
550                        content_blocks.push(NativeContentOut::Image {
551                            source: ImageSource {
552                                source_type: "base64".to_string(),
553                                media_type,
554                                data,
555                            },
556                        });
557                    }
558
559                    // Add text content block (skip empty text when images are present)
560                    if text.is_empty() && !image_refs.is_empty() {
561                        content_blocks.push(NativeContentOut::Text {
562                            text: "[image]".to_string(),
563                            cache_control: None,
564                        });
565                    } else if !text.trim().is_empty() {
566                        content_blocks.push(NativeContentOut::Text {
567                            text,
568                            cache_control: None,
569                        });
570                    }
571
572                    // Merge into previous user message if present (e.g.
573                    // when a user message immediately follows tool results
574                    // which are also role "user" in Anthropic's format).
575                    if native_messages.last().is_some_and(|m| m.role == "user") {
576                        native_messages
577                            .last_mut()
578                            .unwrap()
579                            .content
580                            .extend(content_blocks);
581                    } else {
582                        native_messages.push(NativeMessage {
583                            role: "user".to_string(),
584                            content: content_blocks,
585                        });
586                    }
587                }
588            }
589        }
590
591        // Always use Blocks format with cache_control for system prompts
592        let system_prompt = system_text.map(|text| {
593            SystemPrompt::Blocks(vec![SystemBlock {
594                block_type: "text".to_string(),
595                text,
596                cache_control: Some(CacheControl::ephemeral()),
597            }])
598        });
599
600        (system_prompt, native_messages)
601    }
602
603    fn parse_native_response(response: NativeChatResponse) -> ProviderChatResponse {
604        let mut text_parts = Vec::new();
605        let mut thinking_parts = Vec::new();
606        let mut tool_calls = Vec::new();
607
608        let usage = response.usage.map(|u| TokenUsage {
609            input_tokens: u.input_tokens,
610            output_tokens: u.output_tokens,
611            cached_input_tokens: u.cache_read_input_tokens,
612        });
613
614        for block in response.content {
615            match block.kind.as_str() {
616                "text" => {
617                    if let Some(text) = block.text.map(|t| t.trim().to_string())
618                        && !text.is_empty()
619                    {
620                        text_parts.push(text);
621                    }
622                }
623                "thinking" => {
624                    // Store thinking text byte-for-byte: the signature is
625                    // computed over the exact bytes the model returned, so
626                    // any mutation (including trim()) invalidates it on
627                    // replay. Only skip when the provider returns genuinely
628                    // empty content.
629                    if let Some(thinking) = block.thinking.as_deref().or(block.text.as_deref())
630                        && !thinking.is_empty()
631                    {
632                        let json_block = serde_json::json!({
633                            "thinking": thinking,
634                            "signature": block.signature.as_deref().unwrap_or(""),
635                        });
636                        thinking_parts.push(json_block.to_string());
637                    }
638                }
639                "tool_use" => {
640                    let name = block.name.unwrap_or_default();
641                    if name.is_empty() {
642                        continue;
643                    }
644                    let arguments = block
645                        .input
646                        .unwrap_or_else(|| serde_json::Value::Object(serde_json::Map::new()));
647                    tool_calls.push(ProviderToolCall {
648                        id: block.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
649                        name,
650                        arguments: arguments.to_string(),
651                        extra_content: None,
652                    });
653                }
654                _ => {}
655            }
656        }
657
658        let reasoning_content = if thinking_parts.is_empty() {
659            None
660        } else {
661            Some(thinking_parts.join("\n"))
662        };
663
664        ProviderChatResponse {
665            text: if text_parts.is_empty() {
666                None
667            } else {
668                Some(text_parts.join("\n"))
669            },
670            tool_calls,
671            usage,
672            reasoning_content,
673        }
674    }
675
676    /// Resolve thinking parameters for an API request. Returns the effective
677    /// temperature (forced to 1.0 when thinking is active), the thinking
678    /// config for the request body, and the effective max_tokens (raised to
679    /// meet budget_tokens minimum when needed).
680    fn resolve_thinking(
681        &self,
682        thinking: Option<zeroclaw_api::model_provider::NativeThinkingParams>,
683        temperature: Option<f64>,
684        model: &str,
685    ) -> (Option<f64>, Option<NativeThinkingConfig>, u32) {
686        match thinking {
687            Some(params) if anthropic_model_supports_native_thinking(model) => {
688                ::zeroclaw_log::record!(
689                    INFO,
690                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
691                        .with_attrs(::serde_json::json!({"budget_tokens": params.budget_tokens})),
692                    "Native extended thinking enabled; forcing temperature=1.0"
693                );
694                // API requires max_tokens > budget_tokens (strictly greater).
695                let min_required = params.budget_tokens + 1;
696                let max_tokens = self.max_tokens.max(min_required);
697                (
698                    Some(1.0),
699                    Some(NativeThinkingConfig {
700                        kind: "enabled",
701                        budget_tokens: params.budget_tokens,
702                    }),
703                    max_tokens,
704                )
705            }
706            Some(_) => {
707                // Caller asked for native thinking but the model rejects the
708                // fixed-budget request shape. Drop to prompt-based reasoning
709                // (the agent loop's prefix already injected) and keep the
710                // caller-supplied temperature so per-model guards still apply.
711                ::zeroclaw_log::record!(
712                    WARN,
713                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
714                        .with_attrs(::serde_json::json!({"model": model})),
715                    "Native extended thinking requested but model only supports adaptive thinking; falling back to prompt-based reasoning"
716                );
717                (temperature, None, self.max_tokens)
718            }
719            None => (temperature, None, self.max_tokens),
720        }
721    }
722
723    fn http_client(&self) -> Client {
724        zeroclaw_config::schema::build_runtime_proxy_client_with_timeouts(
725            "model_provider.anthropic",
726            120,
727            10,
728        )
729    }
730
731    /// Build a streaming request body from a `NativeChatRequest`.
732    fn build_streaming_request(request: &NativeChatRequest<'_>) -> serde_json::Value {
733        let mut body =
734            serde_json::to_value(request).expect("NativeChatRequest should serialize to JSON");
735        body["stream"] = serde_json::Value::Bool(true);
736        body
737    }
738
739    /// Parse Anthropic SSE lines from `response` and send `StreamEvent`s to `tx`.
740    async fn parse_anthropic_sse(
741        response: reqwest::Response,
742        tx: &tokio::sync::mpsc::Sender<StreamResult<StreamEvent>>,
743    ) {
744        use tokio_util::io::StreamReader;
745
746        let byte_stream = response
747            .bytes_stream()
748            .map(|result| result.map_err(std::io::Error::other));
749        let reader = StreamReader::new(byte_stream);
750        Self::parse_anthropic_sse_from_reader(reader, tx).await;
751    }
752
753    /// Inner loop split out of `parse_anthropic_sse` so unit tests can feed a
754    /// `Cursor<&[u8]>` directly without spinning up a mock HTTP server.
755    async fn parse_anthropic_sse_from_reader<R>(
756        reader: R,
757        tx: &tokio::sync::mpsc::Sender<StreamResult<StreamEvent>>,
758    ) where
759        R: tokio::io::AsyncBufRead + Unpin,
760    {
761        use tokio::io::AsyncBufReadExt;
762
763        let mut lines = reader.lines();
764
765        let mut tool_id: Option<String> = None;
766        let mut tool_name: Option<String> = None;
767        let mut tool_input_json = String::new();
768
769        // Anthropic emits usage in two places: `message_start` carries the
770        // input-token count + prompt-cache reads; `message_delta` carries
771        // running output-token totals (each delta supersedes the prior). We
772        // capture both, then emit one `StreamEvent::Usage` at `message_stop`
773        // so the gateway accumulator and `record_turn_cost()` see the same
774        // signal Anthropic sends — closes the original #6001 live repro,
775        // which was Anthropic-shaped streaming.
776        let mut input_tokens: Option<u64> = None;
777        let mut output_tokens: Option<u64> = None;
778        let mut cached_input_tokens: Option<u64> = None;
779
780        while let Ok(Some(line)) = lines.next_line().await {
781            let line = line.trim().to_string();
782            if !line.starts_with("data: ") {
783                continue;
784            }
785            let json_str = &line["data: ".len()..];
786
787            let event: serde_json::Value = match serde_json::from_str(json_str) {
788                Ok(v) => v,
789                Err(_) => continue,
790            };
791
792            let event_type = event
793                .get("type")
794                .and_then(|t| t.as_str())
795                .unwrap_or_default();
796
797            match event_type {
798                "message_start" => {
799                    let model = event
800                        .get("message")
801                        .and_then(|m| m.get("model"))
802                        .and_then(|m| m.as_str())
803                        .unwrap_or("unknown");
804                    let usage = event.get("message").and_then(|m| m.get("usage"));
805                    let observed_input = usage
806                        .and_then(|u| u.get("input_tokens"))
807                        .and_then(|t| t.as_u64());
808                    let observed_cached = usage
809                        .and_then(|u| u.get("cache_read_input_tokens"))
810                        .and_then(|t| t.as_u64());
811                    if let Some(v) = observed_input {
812                        input_tokens = Some(v);
813                    }
814                    if let Some(v) = observed_cached {
815                        cached_input_tokens = Some(v);
816                    }
817                    ::zeroclaw_log::record!(DEBUG, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(::serde_json::json!({"model": model, "input_tokens": observed_input, "cached_input_tokens": observed_cached})), "stream: message_start");
818                }
819                "content_block_start" => {
820                    if let Some(block) = event.get("content_block") {
821                        let block_type = block
822                            .get("type")
823                            .and_then(|t| t.as_str())
824                            .unwrap_or_default();
825                        if block_type == "tool_use" {
826                            if let Some(id) = tool_id.take() {
827                                let name = tool_name.take().unwrap_or_default();
828                                let input = std::mem::take(&mut tool_input_json);
829                                let _ = tx
830                                    .send(Ok(StreamEvent::ToolCall(ProviderToolCall {
831                                        id,
832                                        name,
833                                        arguments: input,
834                                        extra_content: None,
835                                    })))
836                                    .await;
837                            }
838                            tool_id = block
839                                .get("id")
840                                .and_then(|v| v.as_str())
841                                .map(ToString::to_string);
842                            tool_name = block
843                                .get("name")
844                                .and_then(|v| v.as_str())
845                                .map(ToString::to_string);
846                            tool_input_json.clear();
847                        }
848                    }
849                }
850                "content_block_delta" => {
851                    if let Some(delta) = event.get("delta") {
852                        let delta_type = delta
853                            .get("type")
854                            .and_then(|t| t.as_str())
855                            .unwrap_or_default();
856                        match delta_type {
857                            "text_delta" => {
858                                if let Some(text) = delta.get("text").and_then(|t| t.as_str())
859                                    && !text.is_empty()
860                                    && tx
861                                        .send(Ok(StreamEvent::TextDelta(StreamChunk::delta(
862                                            text.to_string(),
863                                        ))))
864                                        .await
865                                        .is_err()
866                                {
867                                    return;
868                                }
869                            }
870                            "input_json_delta" => {
871                                if let Some(json) =
872                                    delta.get("partial_json").and_then(|j| j.as_str())
873                                {
874                                    tool_input_json.push_str(json);
875                                }
876                            }
877                            // TODO: handle "thinking_delta" events for streaming
878                            // extended thinking content. Currently thinking blocks
879                            // are only captured in non-streaming parse_native_response().
880                            _ => {}
881                        }
882                    }
883                }
884                "content_block_stop" => {
885                    if let Some(id) = tool_id.take() {
886                        let name = tool_name.take().unwrap_or_default();
887                        let input = std::mem::take(&mut tool_input_json);
888                        let _ = tx
889                            .send(Ok(StreamEvent::ToolCall(ProviderToolCall {
890                                id,
891                                name,
892                                arguments: input,
893                                extra_content: None,
894                            })))
895                            .await;
896                    }
897                }
898                "message_delta" => {
899                    let stop_reason = event
900                        .get("delta")
901                        .and_then(|d| d.get("stop_reason"))
902                        .and_then(|s| s.as_str())
903                        .unwrap_or("none");
904                    // Anthropic's running-total: each `message_delta`
905                    // supersedes the previous one, so we always overwrite.
906                    let observed_output = event
907                        .get("usage")
908                        .and_then(|u| u.get("output_tokens"))
909                        .and_then(|t| t.as_u64());
910                    if let Some(v) = observed_output {
911                        output_tokens = Some(v);
912                    }
913                    if stop_reason == "max_tokens" {
914                        ::zeroclaw_log::record!(
915                            WARN,
916                            ::zeroclaw_log::Event::new(
917                                module_path!(),
918                                ::zeroclaw_log::Action::Note
919                            )
920                            .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
921                            .with_attrs(::serde_json::json!({"output_tokens": observed_output})),
922                            "response truncated: hit max_tokens limit. Increase provider_max_tokens in config."
923                        );
924                    } else {
925                        ::zeroclaw_log::record!(DEBUG, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(::serde_json::json!({"stop_reason": stop_reason, "output_tokens": observed_output})), "stream: message_delta");
926                    }
927                }
928                "message_stop" => {
929                    ::zeroclaw_log::record!(
930                        DEBUG,
931                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
932                        "stream: message_stop"
933                    );
934                    if input_tokens.is_some() || output_tokens.is_some() {
935                        let _ = tx
936                            .send(Ok(StreamEvent::Usage(TokenUsage {
937                                input_tokens,
938                                output_tokens,
939                                cached_input_tokens,
940                            })))
941                            .await;
942                    }
943                    let _ = tx.send(Ok(StreamEvent::Final)).await;
944                    return;
945                }
946                "error" => {
947                    let msg = event
948                        .get("error")
949                        .and_then(|e| e.get("message"))
950                        .and_then(|m| m.as_str())
951                        .unwrap_or("unknown streaming error");
952                    let _ = tx
953                        .send(Err(StreamError::ModelProvider(msg.to_string())))
954                        .await;
955                    return;
956                }
957                _ => {}
958            }
959        }
960
961        let _ = tx.send(Ok(StreamEvent::Final)).await;
962    }
963}
964
965#[async_trait]
966impl ModelProvider for AnthropicModelProvider {
967    // ── ModelProvider-family defaults ──
968    fn default_temperature(&self) -> f64 {
969        TEMPERATURE_DEFAULT
970    }
971
972    fn default_base_url(&self) -> Option<&str> {
973        Some(BASE_URL)
974    }
975
976    async fn chat_with_system(
977        &self,
978        system_prompt: Option<&str>,
979        message: &str,
980        model: &str,
981        temperature: Option<f64>,
982    ) -> anyhow::Result<String> {
983        let credential = self.credential.as_ref().ok_or_else(|| {
984            ::zeroclaw_log::record!(
985                ERROR,
986                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
987                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
988                    .with_attrs(::serde_json::json!({"missing": "credentials"})),
989                "anthropic: no credentials configured"
990            );
991            anyhow::Error::msg(
992                "Anthropic credentials not set. Set ANTHROPIC_API_KEY or ANTHROPIC_OAUTH_TOKEN (setup-token).",
993            )
994        })?;
995
996        let system = system_prompt.map(|s| SystemPrompt::String(s.to_string()));
997        let system = if Self::is_setup_token(credential) {
998            Self::apply_oauth_system_prompt(system)
999        } else {
1000            system
1001        };
1002
1003        ::zeroclaw_log::record!(
1004            DEBUG,
1005            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
1006                .with_attrs(::serde_json::json!({"max_tokens": self.max_tokens, "model": model})),
1007            "API request"
1008        );
1009        let request = NativeChatRequest {
1010            model: model.to_string(),
1011            max_tokens: self.max_tokens,
1012            system,
1013            messages: vec![NativeMessage {
1014                role: "user".to_string(),
1015                content: vec![NativeContentOut::Text {
1016                    text: message.to_string(),
1017                    cache_control: None,
1018                }],
1019            }],
1020            temperature: if anthropic_model_omits_temperature(model) {
1021                None
1022            } else {
1023                temperature
1024            },
1025            tools: None,
1026            tool_choice: None,
1027            stream: None,
1028            thinking: None,
1029        };
1030
1031        let mut request = self
1032            .http_client()
1033            .post(format!("{}/v1/messages", self.base_url))
1034            .header("anthropic-version", "2023-06-01")
1035            .header("content-type", "application/json")
1036            .json(&request);
1037
1038        request = self.apply_auth(request, credential);
1039
1040        let response = request.send().await?;
1041
1042        if !response.status().is_success() {
1043            return Err(super::api_error("Anthropic", response).await);
1044        }
1045
1046        let chat_response: NativeChatResponse = response.json().await?;
1047        let parsed = Self::parse_native_response(chat_response);
1048        parsed.text.ok_or_else(|| {
1049            ::zeroclaw_log::record!(
1050                ERROR,
1051                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
1052                    .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1053                "anthropic: empty text in response"
1054            );
1055            anyhow::Error::msg("No response from Anthropic")
1056        })
1057    }
1058
1059    async fn chat(
1060        &self,
1061        request: ProviderChatRequest<'_>,
1062        model: &str,
1063        temperature: Option<f64>,
1064    ) -> anyhow::Result<ProviderChatResponse> {
1065        let credential = self.credential.as_ref().ok_or_else(|| {
1066            ::zeroclaw_log::record!(
1067                ERROR,
1068                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1069                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
1070                    .with_attrs(::serde_json::json!({"missing": "credentials"})),
1071                "anthropic: no credentials configured"
1072            );
1073            anyhow::Error::msg(
1074                "Anthropic credentials not set. Set ANTHROPIC_API_KEY or ANTHROPIC_OAUTH_TOKEN (setup-token).",
1075            )
1076        })?;
1077
1078        let (system_prompt, mut messages) = Self::convert_messages(request.messages);
1079
1080        // Auto-cache last message if conversation is long
1081        if Self::should_cache_conversation(request.messages) {
1082            Self::apply_cache_to_last_message(&mut messages);
1083        }
1084
1085        // Check for tool_choice override from the agent loop (e.g. "any"
1086        // to force tool use for hardware requests).
1087        let tool_choice_override = zeroclaw_api::TOOL_CHOICE_OVERRIDE
1088            .try_with(Clone::clone)
1089            .ok()
1090            .flatten();
1091        let native_tools = Self::convert_tools(request.tools);
1092        let tool_choice = if native_tools.is_some() {
1093            tool_choice_override.map(|tc| serde_json::json!({ "type": tc }))
1094        } else {
1095            None
1096        };
1097
1098        // For OAuth tokens, prepend Claude Code identity to system prompt
1099        let system_prompt = if Self::is_setup_token(credential) {
1100            Self::apply_oauth_system_prompt(system_prompt)
1101        } else {
1102            system_prompt
1103        };
1104
1105        let (effective_temperature, thinking_config, effective_max_tokens) =
1106            self.resolve_thinking(request.thinking, temperature, model);
1107
1108        ::zeroclaw_log::record!(
1109            DEBUG,
1110            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(
1111                ::serde_json::json!({"max_tokens": effective_max_tokens, "model": model})
1112            ),
1113            "non-streaming API request"
1114        );
1115        let native_request = NativeChatRequest {
1116            model: model.to_string(),
1117            max_tokens: effective_max_tokens,
1118            system: system_prompt,
1119            messages,
1120            temperature: if anthropic_model_omits_temperature(model) {
1121                None
1122            } else {
1123                effective_temperature
1124            },
1125            tools: native_tools,
1126            tool_choice,
1127            stream: None,
1128            thinking: thinking_config,
1129        };
1130
1131        let req = self
1132            .http_client()
1133            .post(format!("{}/v1/messages", self.base_url))
1134            .header("anthropic-version", "2023-06-01")
1135            .header("content-type", "application/json")
1136            .json(&native_request);
1137
1138        let response = self.apply_auth(req, credential).send().await?;
1139        if !response.status().is_success() {
1140            return Err(super::api_error("Anthropic", response).await);
1141        }
1142
1143        let native_response: NativeChatResponse = response.json().await?;
1144        Ok(Self::parse_native_response(native_response))
1145    }
1146
1147    fn capabilities(&self) -> ProviderCapabilities {
1148        ProviderCapabilities {
1149            native_tool_calling: true,
1150            vision: true,
1151            prompt_caching: true,
1152            extended_thinking: true,
1153        }
1154    }
1155
1156    fn supports_native_tools(&self) -> bool {
1157        true
1158    }
1159
1160    async fn chat_with_tools(
1161        &self,
1162        messages: &[ChatMessage],
1163        tools: &[serde_json::Value],
1164        model: &str,
1165        temperature: Option<f64>,
1166    ) -> anyhow::Result<ProviderChatResponse> {
1167        // Convert OpenAI-format tool JSON to ToolSpec so we can reuse the
1168        // existing `chat()` method which handles full message history,
1169        // system prompt extraction, caching, and Anthropic native formatting.
1170        let tool_specs: Vec<ToolSpec> = tools
1171            .iter()
1172            .filter_map(|t| {
1173                let func = t.get("function").or_else(|| {
1174                    ::zeroclaw_log::record!(
1175                        WARN,
1176                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
1177                            .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
1178                        "Skipping malformed tool definition (missing 'function' key)"
1179                    );
1180                    None
1181                })?;
1182                let name = func.get("name").and_then(|n| n.as_str()).or_else(|| {
1183                    ::zeroclaw_log::record!(
1184                        WARN,
1185                        ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
1186                            .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
1187                        "Skipping tool with missing or non-string 'name'"
1188                    );
1189                    None
1190                })?;
1191                Some(ToolSpec {
1192                    name: name.to_string(),
1193                    description: func
1194                        .get("description")
1195                        .and_then(|d| d.as_str())
1196                        .unwrap_or("")
1197                        .to_string(),
1198                    parameters: func
1199                        .get("parameters")
1200                        .cloned()
1201                        .unwrap_or(serde_json::json!({"type": "object"})),
1202                })
1203            })
1204            .collect();
1205
1206        let request = ProviderChatRequest {
1207            messages,
1208            tools: if tool_specs.is_empty() {
1209                None
1210            } else {
1211                Some(&tool_specs)
1212            },
1213            thinking: None,
1214        };
1215        self.chat(request, model, temperature).await
1216    }
1217
1218    async fn warmup(&self) -> anyhow::Result<()> {
1219        if let Some(credential) = self.credential.as_ref() {
1220            let mut request = self
1221                .http_client()
1222                .post(format!("{}/v1/messages", self.base_url))
1223                .header("anthropic-version", "2023-06-01");
1224            request = self.apply_auth(request, credential);
1225            // Send a minimal request; the goal is TLS + HTTP/2 setup, not a valid response.
1226            // Anthropic has no lightweight GET endpoint, so we accept any non-network error.
1227            let _ = request.send().await?;
1228        }
1229        Ok(())
1230    }
1231
1232    async fn list_models(&self) -> anyhow::Result<Vec<String>> {
1233        // Anthropic's /v1/models requires a credential. Onboard pulls the
1234        // catalog from models.dev before the user has entered a key.
1235        crate::models_dev::list_models_for("anthropic").await
1236    }
1237
1238    fn supports_streaming(&self) -> bool {
1239        true
1240    }
1241
1242    fn supports_streaming_tool_events(&self) -> bool {
1243        true
1244    }
1245
1246    fn stream_chat(
1247        &self,
1248        request: ProviderChatRequest<'_>,
1249        model: &str,
1250        temperature: Option<f64>,
1251        options: StreamOptions,
1252    ) -> stream::BoxStream<'static, StreamResult<StreamEvent>> {
1253        if !options.enabled {
1254            return stream::once(async { Ok(StreamEvent::Final) }).boxed();
1255        }
1256
1257        let credential = match self.credential.as_ref() {
1258            Some(c) => c.clone(),
1259            None => {
1260                return stream::once(async {
1261                    Err(StreamError::ModelProvider(
1262                        "Anthropic credentials not set".to_string(),
1263                    ))
1264                })
1265                .boxed();
1266            }
1267        };
1268
1269        let (system_prompt, mut messages) = Self::convert_messages(request.messages);
1270        if Self::should_cache_conversation(request.messages) {
1271            Self::apply_cache_to_last_message(&mut messages);
1272        }
1273
1274        let tool_choice_override = zeroclaw_api::TOOL_CHOICE_OVERRIDE
1275            .try_with(Clone::clone)
1276            .ok()
1277            .flatten();
1278        let native_tools = Self::convert_tools(request.tools);
1279        let tool_choice = if native_tools.is_some() {
1280            tool_choice_override.map(|tc| serde_json::json!({ "type": tc }))
1281        } else {
1282            None
1283        };
1284
1285        let system_prompt = if Self::is_setup_token(&credential) {
1286            Self::apply_oauth_system_prompt(system_prompt)
1287        } else {
1288            system_prompt
1289        };
1290
1291        let (effective_temperature, thinking_config, effective_max_tokens) =
1292            self.resolve_thinking(request.thinking, temperature, model);
1293
1294        // When native thinking is enabled, streamed `thinking_delta` /
1295        // `signature_delta` SSE events are not yet parsed into
1296        // `reasoning_content`, which means a tool-use turn could emit a
1297        // tool call without preserving the signed thinking block that
1298        // justified it — breaking Anthropic's signature round-trip. Fall
1299        // back to a non-streaming request so `parse_native_response` can
1300        // preserve the signed blocks, and synthesize a short stream from
1301        // the completed response. Full streaming thinking_delta
1302        // preservation is tracked as a follow-up.
1303        if thinking_config.is_some() {
1304            ::zeroclaw_log::record!(
1305                INFO,
1306                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
1307                    .with_attrs(::serde_json::json!({"model": model})),
1308                "native thinking enabled; using non-streaming fallback to preserve signed thinking blocks"
1309            );
1310            let native_request = NativeChatRequest {
1311                model: model.to_string(),
1312                max_tokens: effective_max_tokens,
1313                system: system_prompt,
1314                messages,
1315                temperature: if anthropic_model_omits_temperature(model) {
1316                    None
1317                } else {
1318                    effective_temperature
1319                },
1320                tools: native_tools,
1321                tool_choice,
1322                stream: None,
1323                thinking: thinking_config,
1324            };
1325            // Serialize eagerly so the request body is owned and `'static`
1326            // across the async boundary — `NativeToolSpec<'a>` borrows from
1327            // `request.tools`, which prevents moving `native_request` into
1328            // the spawned future otherwise.
1329            let body = serde_json::to_value(&native_request)
1330                .expect("NativeChatRequest should serialize to JSON");
1331            let client = self.http_client();
1332            let url = format!("{}/v1/messages", self.base_url);
1333            let is_oauth = Self::is_setup_token(&credential);
1334
1335            return stream::once(async move {
1336                let mut req = client
1337                    .post(&url)
1338                    .header("anthropic-version", "2023-06-01")
1339                    .header("content-type", "application/json")
1340                    .json(&body);
1341                if is_oauth {
1342                    req = req
1343                        .header("Authorization", format!("Bearer {credential}"))
1344                        .header(
1345                            "anthropic-beta",
1346                            "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14",
1347                        )
1348                        .header("anthropic-dangerous-direct-browser-access", "true");
1349                } else {
1350                    req = req.header("x-api-key", &credential);
1351                }
1352                let response = req
1353                    .send()
1354                    .await
1355                    .map_err(|e| StreamError::Http(e.to_string()))?;
1356                if !response.status().is_success() {
1357                    let status = response.status();
1358                    let body = response
1359                        .text()
1360                        .await
1361                        .unwrap_or_else(|_| format!("HTTP error: {status}"));
1362                    return Err(StreamError::ModelProvider(format!("{status}: {body}")));
1363                }
1364                let parsed: NativeChatResponse = response
1365                    .json()
1366                    .await
1367                    .map_err(|e| StreamError::ModelProvider(format!("response decode: {e}")))?;
1368                Ok(Self::parse_native_response(parsed))
1369            })
1370            .flat_map(|result| match result {
1371                Ok(resp) => {
1372                    let mut events: Vec<StreamResult<StreamEvent>> = Vec::new();
1373                    // Emit signed thinking blocks first via `StreamChunk.reasoning`
1374                    // so the agent loop can accumulate them into
1375                    // `ChatResponse.reasoning_content` for multi-turn replay.
1376                    // Anthropic requires signed thinking blocks to precede
1377                    // tool-use blocks in conversation history.
1378                    if let Some(rc) = resp.reasoning_content {
1379                        events.push(Ok(StreamEvent::TextDelta(StreamChunk {
1380                            delta: String::new(),
1381                            reasoning: Some(rc),
1382                            is_final: false,
1383                            token_count: 0,
1384                        })));
1385                    }
1386                    if let Some(text) = resp.text.filter(|t| !t.is_empty()) {
1387                        events.push(Ok(StreamEvent::TextDelta(StreamChunk::delta(text))));
1388                    }
1389                    for tc in resp.tool_calls {
1390                        events.push(Ok(StreamEvent::ToolCall(tc)));
1391                    }
1392                    if let Some(usage) = resp.usage {
1393                        events.push(Ok(StreamEvent::Usage(usage)));
1394                    }
1395                    events.push(Ok(StreamEvent::Final));
1396                    stream::iter(events)
1397                }
1398                Err(e) => stream::iter(vec![Err(e)]),
1399            })
1400            .boxed();
1401        }
1402
1403        ::zeroclaw_log::record!(
1404            DEBUG,
1405            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_attrs(
1406                ::serde_json::json!({"max_tokens": effective_max_tokens, "model": model})
1407            ),
1408            "stream_chat request"
1409        );
1410        let native_request = NativeChatRequest {
1411            model: model.to_string(),
1412            max_tokens: effective_max_tokens,
1413            system: system_prompt,
1414            messages,
1415            temperature: if anthropic_model_omits_temperature(model) {
1416                None
1417            } else {
1418                effective_temperature
1419            },
1420            tools: native_tools,
1421            tool_choice,
1422            stream: Some(true),
1423            thinking: thinking_config,
1424        };
1425
1426        let body = Self::build_streaming_request(&native_request);
1427        let client = self.http_client();
1428        let url = format!("{}/v1/messages", self.base_url);
1429        let is_oauth = Self::is_setup_token(&credential);
1430
1431        let (tx, rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(64);
1432
1433        tokio::spawn(async move {
1434            let mut req = client
1435                .post(&url)
1436                .header("anthropic-version", "2023-06-01")
1437                .header("content-type", "application/json")
1438                .json(&body);
1439
1440            if is_oauth {
1441                req = req
1442                    .header("Authorization", format!("Bearer {credential}"))
1443                    .header(
1444                        "anthropic-beta",
1445                        "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14",
1446                    )
1447                    .header("anthropic-dangerous-direct-browser-access", "true");
1448            } else {
1449                req = req.header("x-api-key", &credential);
1450            }
1451
1452            let response = match req.send().await {
1453                Ok(r) => r,
1454                Err(e) => {
1455                    let _ = tx
1456                        .send(Err(StreamError::Http(super::format_error_chain(&e))))
1457                        .await;
1458                    return;
1459                }
1460            };
1461
1462            if !response.status().is_success() {
1463                let status = response.status();
1464                let error = response
1465                    .text()
1466                    .await
1467                    .unwrap_or_else(|_| format!("HTTP error: {status}"));
1468                let _ = tx
1469                    .send(Err(StreamError::ModelProvider(format!(
1470                        "{status}: {error}"
1471                    ))))
1472                    .await;
1473                return;
1474            }
1475
1476            Self::parse_anthropic_sse(response, &tx).await;
1477        });
1478
1479        stream::unfold(rx, |mut rx| async move {
1480            rx.recv().await.map(|event| (event, rx))
1481        })
1482        .boxed()
1483    }
1484}
1485
1486impl ::zeroclaw_api::attribution::Attributable for AnthropicModelProvider {
1487    fn role(&self) -> ::zeroclaw_api::attribution::Role {
1488        ::zeroclaw_api::attribution::Role::Provider(
1489            ::zeroclaw_api::attribution::ProviderKind::Model(
1490                ::zeroclaw_api::attribution::ModelProviderKind::Anthropic,
1491            ),
1492        )
1493    }
1494    fn alias(&self) -> &str {
1495        &self.alias
1496    }
1497}
1498
1499#[cfg(test)]
1500mod tests {
1501    use super::*;
1502    use crate::auth::anthropic_token::{AnthropicAuthKind, detect_auth_kind};
1503
1504    /// Fake Anthropic SSE stream covering the message_start → content → delta
1505    /// → stop sequence with usage in both the start frame and the stop delta.
1506    /// Each `data:` line is one Anthropic event per the streaming spec.
1507    fn fake_anthropic_sse() -> &'static [u8] {
1508        b"event: message_start\n\
1509data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_1\",\"type\":\"message\",\"role\":\"assistant\",\"model\":\"claude-sonnet-4-5\",\"usage\":{\"input_tokens\":314,\"cache_read_input_tokens\":42}}}\n\n\
1510event: content_block_start\n\
1511data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}}\n\n\
1512event: content_block_delta\n\
1513data: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"hi\"}}\n\n\
1514event: content_block_stop\n\
1515data: {\"type\":\"content_block_stop\",\"index\":0}\n\n\
1516event: message_delta\n\
1517data: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\"},\"usage\":{\"output_tokens\":27}}\n\n\
1518event: message_stop\n\
1519data: {\"type\":\"message_stop\"}\n\n"
1520    }
1521
1522    #[tokio::test]
1523    async fn streaming_usage_emitted_before_final() {
1524        // The original #6001 live repro was Anthropic streaming; before this
1525        // PR the message_start / message_delta usage frames were only logged
1526        // at DEBUG and never surfaced as `StreamEvent::Usage`. Now they are.
1527        use std::io::Cursor;
1528
1529        let bytes = fake_anthropic_sse();
1530        let reader = tokio::io::BufReader::new(Cursor::new(bytes));
1531        let (tx, mut rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(64);
1532        AnthropicModelProvider::parse_anthropic_sse_from_reader(reader, &tx).await;
1533
1534        let mut events = Vec::new();
1535        while let Ok(Some(ev)) =
1536            tokio::time::timeout(std::time::Duration::from_millis(50), rx.recv()).await
1537        {
1538            events.push(ev);
1539        }
1540
1541        let states: Vec<&str> = events
1542            .iter()
1543            .map(|e| match e.as_ref() {
1544                Ok(StreamEvent::TextDelta(_)) => "text",
1545                Ok(StreamEvent::ToolCall(_)) => "tool_call",
1546                Ok(StreamEvent::PreExecutedToolCall { .. }) => "pre_tool_call",
1547                Ok(StreamEvent::PreExecutedToolResult { .. }) => "pre_tool_result",
1548                Ok(StreamEvent::Usage(_)) => "usage",
1549                Ok(StreamEvent::Final) => "final",
1550                Err(_) => "err",
1551            })
1552            .collect();
1553
1554        // Required ordering: usage event must appear before Final so the
1555        // gateway accumulator can capture it within the same turn boundary.
1556        let usage_pos = states
1557            .iter()
1558            .position(|s| *s == "usage")
1559            .unwrap_or_else(|| panic!("expected Usage event in stream, got {states:?}"));
1560        let final_pos = states
1561            .iter()
1562            .position(|s| *s == "final")
1563            .unwrap_or_else(|| panic!("expected Final event in stream, got {states:?}"));
1564        assert!(
1565            usage_pos < final_pos,
1566            "Usage must come before Final, got {states:?}"
1567        );
1568
1569        // The Usage payload must carry both input + output token counts plus
1570        // the cached-input prompt-cache reads from message_start.
1571        let usage = events
1572            .iter()
1573            .find_map(|e| match e.as_ref() {
1574                Ok(StreamEvent::Usage(u)) => Some(u.clone()),
1575                _ => None,
1576            })
1577            .unwrap();
1578        assert_eq!(
1579            usage.input_tokens,
1580            Some(314),
1581            "input_tokens from message_start usage frame"
1582        );
1583        assert_eq!(
1584            usage.output_tokens,
1585            Some(27),
1586            "output_tokens from message_delta usage frame"
1587        );
1588        assert_eq!(
1589            usage.cached_input_tokens,
1590            Some(42),
1591            "cache_read_input_tokens from message_start"
1592        );
1593    }
1594
1595    #[tokio::test]
1596    async fn streaming_usage_omitted_when_provider_does_not_send_usage() {
1597        // Backward-compat: a stream that never emits a usage frame must not
1598        // synthesize a zero-valued Usage event. Consumers should treat
1599        // absence as "usage unavailable" rather than "usage was zero."
1600        use std::io::Cursor;
1601
1602        let bytes = b"event: message_start\n\
1603data: {\"type\":\"message_start\",\"message\":{\"id\":\"msg_1\",\"model\":\"claude\"}}\n\n\
1604event: content_block_start\n\
1605data: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}}\n\n\
1606event: content_block_stop\n\
1607data: {\"type\":\"content_block_stop\",\"index\":0}\n\n\
1608event: message_stop\n\
1609data: {\"type\":\"message_stop\"}\n\n";
1610        let reader = tokio::io::BufReader::new(Cursor::new(bytes.as_slice()));
1611        let (tx, mut rx) = tokio::sync::mpsc::channel::<StreamResult<StreamEvent>>(64);
1612        AnthropicModelProvider::parse_anthropic_sse_from_reader(reader, &tx).await;
1613
1614        let mut saw_usage = false;
1615        while let Ok(Some(ev)) =
1616            tokio::time::timeout(std::time::Duration::from_millis(50), rx.recv()).await
1617        {
1618            if matches!(ev, Ok(StreamEvent::Usage(_))) {
1619                saw_usage = true;
1620            }
1621        }
1622        assert!(
1623            !saw_usage,
1624            "must not emit Usage when provider sent no usage frames"
1625        );
1626    }
1627
1628    #[test]
1629    fn creates_with_key() {
1630        let p = AnthropicModelProvider::new("test", Some("anthropic-test-credential"));
1631        assert!(p.credential.is_some());
1632        assert_eq!(p.credential.as_deref(), Some("anthropic-test-credential"));
1633        assert_eq!(p.base_url, "https://api.anthropic.com");
1634    }
1635
1636    #[test]
1637    fn creates_without_key() {
1638        let p = AnthropicModelProvider::new("test", None);
1639        assert!(p.credential.is_none());
1640        assert_eq!(p.base_url, "https://api.anthropic.com");
1641    }
1642
1643    #[test]
1644    fn creates_with_empty_key() {
1645        let p = AnthropicModelProvider::new("test", Some(""));
1646        assert!(p.credential.is_none());
1647    }
1648
1649    #[test]
1650    fn creates_with_whitespace_key() {
1651        let p = AnthropicModelProvider::new("test", Some("  anthropic-test-credential  "));
1652        assert!(p.credential.is_some());
1653        assert_eq!(p.credential.as_deref(), Some("anthropic-test-credential"));
1654    }
1655
1656    #[test]
1657    fn creates_with_custom_base_url() {
1658        let p = AnthropicModelProvider::with_base_url(
1659            "test",
1660            Some("anthropic-credential"),
1661            Some("https://api.example.com"),
1662        );
1663        assert_eq!(p.base_url, "https://api.example.com");
1664        assert_eq!(p.credential.as_deref(), Some("anthropic-credential"));
1665    }
1666
1667    #[test]
1668    fn custom_base_url_trims_trailing_slash() {
1669        let p =
1670            AnthropicModelProvider::with_base_url("test", None, Some("https://api.example.com/"));
1671        assert_eq!(p.base_url, "https://api.example.com");
1672    }
1673
1674    #[test]
1675    fn no_base_url_uses_published_endpoint() {
1676        let p = AnthropicModelProvider::with_base_url("test", None, None);
1677        assert_eq!(p.base_url, "https://api.anthropic.com");
1678    }
1679
1680    #[tokio::test]
1681    async fn chat_fails_without_key() {
1682        let p = AnthropicModelProvider::new("test", None);
1683        let result = p
1684            .chat_with_system(None, "hello", "claude-3-opus", Some(0.7))
1685            .await;
1686        assert!(result.is_err());
1687        let err = result.unwrap_err().to_string();
1688        assert!(
1689            err.contains("credentials not set"),
1690            "Expected key error, got: {err}"
1691        );
1692    }
1693
1694    #[test]
1695    fn setup_token_detection_works() {
1696        assert!(AnthropicModelProvider::is_setup_token(
1697            "sk-ant-oat01-abcdef"
1698        ));
1699        assert!(!AnthropicModelProvider::is_setup_token("sk-ant-api-key"));
1700    }
1701
1702    #[test]
1703    fn apply_auth_uses_bearer_and_beta_for_setup_tokens() {
1704        let model_provider = AnthropicModelProvider::new("test", None);
1705        let request = model_provider
1706            .apply_auth(
1707                model_provider
1708                    .http_client()
1709                    .get("https://api.anthropic.com/v1/models"),
1710                "sk-ant-oat01-test-token",
1711            )
1712            .build()
1713            .expect("request should build");
1714
1715        assert_eq!(
1716            request
1717                .headers()
1718                .get("authorization")
1719                .and_then(|v| v.to_str().ok()),
1720            Some("Bearer sk-ant-oat01-test-token")
1721        );
1722        assert_eq!(
1723            request
1724                .headers()
1725                .get("anthropic-beta")
1726                .and_then(|v| v.to_str().ok()),
1727            Some("claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14")
1728        );
1729        assert_eq!(
1730            request
1731                .headers()
1732                .get("anthropic-dangerous-direct-browser-access")
1733                .and_then(|v| v.to_str().ok()),
1734            Some("true")
1735        );
1736        assert!(request.headers().get("x-api-key").is_none());
1737    }
1738
1739    #[test]
1740    fn apply_auth_uses_x_api_key_for_regular_tokens() {
1741        let model_provider = AnthropicModelProvider::new("test", None);
1742        let request = model_provider
1743            .apply_auth(
1744                model_provider
1745                    .http_client()
1746                    .get("https://api.anthropic.com/v1/models"),
1747                "sk-ant-api-key",
1748            )
1749            .build()
1750            .expect("request should build");
1751
1752        assert_eq!(
1753            request
1754                .headers()
1755                .get("x-api-key")
1756                .and_then(|v| v.to_str().ok()),
1757            Some("sk-ant-api-key")
1758        );
1759        assert!(request.headers().get("authorization").is_none());
1760        assert!(request.headers().get("anthropic-beta").is_none());
1761    }
1762
1763    #[tokio::test]
1764    async fn chat_with_system_fails_without_key() {
1765        let p = AnthropicModelProvider::new("test", None);
1766        let result = p
1767            .chat_with_system(
1768                Some("You are ZeroClaw"),
1769                "hello",
1770                "claude-3-opus",
1771                Some(0.7),
1772            )
1773            .await;
1774        assert!(result.is_err());
1775    }
1776
1777    #[test]
1778    fn chat_request_serializes_without_system() {
1779        let req = ChatRequest {
1780            model: "claude-3-opus".to_string(),
1781            max_tokens: 4096,
1782            system: None,
1783            messages: vec![Message {
1784                role: "user".to_string(),
1785                content: "hello".to_string(),
1786            }],
1787            temperature: 0.7,
1788        };
1789        let json = serde_json::to_string(&req).unwrap();
1790        assert!(
1791            !json.contains("system"),
1792            "system field should be skipped when None"
1793        );
1794        assert!(json.contains("claude-3-opus"));
1795        assert!(json.contains("hello"));
1796    }
1797
1798    #[test]
1799    fn chat_request_serializes_with_system() {
1800        let req = ChatRequest {
1801            model: "claude-3-opus".to_string(),
1802            max_tokens: 4096,
1803            system: Some("You are ZeroClaw".to_string()),
1804            messages: vec![Message {
1805                role: "user".to_string(),
1806                content: "hello".to_string(),
1807            }],
1808            temperature: 0.7,
1809        };
1810        let json = serde_json::to_string(&req).unwrap();
1811        assert!(json.contains("\"system\":\"You are ZeroClaw\""));
1812    }
1813
1814    #[test]
1815    fn chat_response_deserializes() {
1816        let json = r#"{"content":[{"type":"text","text":"Hello there!"}]}"#;
1817        let resp: ChatResponse = serde_json::from_str(json).unwrap();
1818        assert_eq!(resp.content.len(), 1);
1819        assert_eq!(resp.content[0].kind, "text");
1820        assert_eq!(resp.content[0].text.as_deref(), Some("Hello there!"));
1821    }
1822
1823    #[test]
1824    fn chat_response_empty_content() {
1825        let json = r#"{"content":[]}"#;
1826        let resp: ChatResponse = serde_json::from_str(json).unwrap();
1827        assert!(resp.content.is_empty());
1828    }
1829
1830    #[test]
1831    fn chat_response_multiple_blocks() {
1832        let json =
1833            r#"{"content":[{"type":"text","text":"First"},{"type":"text","text":"Second"}]}"#;
1834        let resp: ChatResponse = serde_json::from_str(json).unwrap();
1835        assert_eq!(resp.content.len(), 2);
1836        assert_eq!(resp.content[0].text.as_deref(), Some("First"));
1837        assert_eq!(resp.content[1].text.as_deref(), Some("Second"));
1838    }
1839
1840    #[test]
1841    fn temperature_range_serializes() {
1842        for temp in [0.0, 0.5, 1.0, 2.0] {
1843            let req = ChatRequest {
1844                model: "claude-3-opus".to_string(),
1845                max_tokens: 4096,
1846                system: None,
1847                messages: vec![],
1848                temperature: temp,
1849            };
1850            let json = serde_json::to_string(&req).unwrap();
1851            assert!(json.contains(&format!("{temp}")));
1852        }
1853    }
1854
1855    // ── Opus 4.7 temperature-omission tests (issue #6147) ────────
1856
1857    #[test]
1858    fn anthropic_model_omits_temperature_matches_opus_4_7() {
1859        assert!(anthropic_model_omits_temperature("claude-opus-4-7"));
1860        assert!(anthropic_model_omits_temperature(
1861            "claude-opus-4-7-20260101"
1862        ));
1863    }
1864
1865    #[test]
1866    fn anthropic_model_omits_temperature_skips_other_models() {
1867        assert!(!anthropic_model_omits_temperature("claude-opus-4-6"));
1868        assert!(!anthropic_model_omits_temperature("claude-sonnet-4-6"));
1869        assert!(!anthropic_model_omits_temperature("claude-haiku-4-5"));
1870        assert!(!anthropic_model_omits_temperature("claude-3-opus"));
1871    }
1872
1873    #[test]
1874    fn anthropic_model_supports_native_thinking_excludes_opus_4_7() {
1875        // Opus 4.7 only supports adaptive thinking; fixed-budget returns 400.
1876        assert!(!anthropic_model_supports_native_thinking("claude-opus-4-7"));
1877        assert!(!anthropic_model_supports_native_thinking(
1878            "claude-opus-4-7-20260101"
1879        ));
1880    }
1881
1882    #[test]
1883    fn anthropic_model_supports_native_thinking_allows_other_models() {
1884        assert!(anthropic_model_supports_native_thinking("claude-opus-4-6"));
1885        assert!(anthropic_model_supports_native_thinking(
1886            "claude-sonnet-4-6"
1887        ));
1888        assert!(anthropic_model_supports_native_thinking("claude-haiku-4-5"));
1889    }
1890
1891    #[test]
1892    fn resolve_thinking_drops_native_for_opus_4_7() {
1893        let provider = AnthropicModelProvider::new("test", Some("test-key"));
1894        let params = zeroclaw_api::model_provider::NativeThinkingParams {
1895            budget_tokens: 10_000,
1896        };
1897        let (temp, config, max_tokens) =
1898            provider.resolve_thinking(Some(params), Some(0.7_f64), "claude-opus-4-7");
1899        assert!(
1900            config.is_none(),
1901            "native thinking should be gated off for opus-4-7"
1902        );
1903        // Caller-supplied temperature is preserved (so per-model omit guard
1904        // can still take effect downstream).
1905        assert!((temp.unwrap() - 0.7_f64).abs() < f64::EPSILON);
1906        assert_eq!(max_tokens, provider.max_tokens);
1907    }
1908
1909    #[test]
1910    fn resolve_thinking_keeps_native_for_supported_models() {
1911        let provider = AnthropicModelProvider::new("test", Some("test-key"));
1912        let params = zeroclaw_api::model_provider::NativeThinkingParams {
1913            budget_tokens: 10_000,
1914        };
1915        let (temp, config, _) =
1916            provider.resolve_thinking(Some(params), Some(0.7_f64), "claude-sonnet-4-6");
1917        assert!(
1918            config.is_some(),
1919            "native thinking should activate on supported models"
1920        );
1921        // Forced to 1.0 per Anthropic native-thinking contract.
1922        assert!((temp.unwrap() - 1.0_f64).abs() < f64::EPSILON);
1923    }
1924
1925    #[test]
1926    fn native_chat_request_serializes_without_temperature_when_none() {
1927        let req = NativeChatRequest {
1928            model: "claude-opus-4-7".to_string(),
1929            max_tokens: 4096,
1930            system: None,
1931            messages: vec![],
1932            temperature: None,
1933            tools: None,
1934            tool_choice: None,
1935            stream: None,
1936            thinking: None,
1937        };
1938        let json = serde_json::to_string(&req).unwrap();
1939        assert!(json.contains("max_tokens"));
1940        assert!(
1941            !json.contains("temperature"),
1942            "expected temperature to be omitted, got: {json}"
1943        );
1944    }
1945
1946    #[test]
1947    fn native_chat_request_serializes_with_temperature_when_some() {
1948        let req = NativeChatRequest {
1949            model: "claude-sonnet-4-6".to_string(),
1950            max_tokens: 4096,
1951            system: None,
1952            messages: vec![],
1953            temperature: Some(0.7),
1954            tools: None,
1955            tool_choice: None,
1956            stream: None,
1957            thinking: None,
1958        };
1959        let json = serde_json::to_string(&req).unwrap();
1960        assert!(
1961            json.contains("\"temperature\":0.7"),
1962            "expected temperature to be present, got: {json}"
1963        );
1964    }
1965
1966    #[test]
1967    fn detects_auth_from_jwt_shape() {
1968        let kind = detect_auth_kind("a.b.c", None);
1969        assert_eq!(kind, AnthropicAuthKind::Authorization);
1970    }
1971
1972    #[test]
1973    fn cache_control_serializes_correctly() {
1974        let cache = CacheControl::ephemeral();
1975        let json = serde_json::to_string(&cache).unwrap();
1976        assert_eq!(json, r#"{"type":"ephemeral"}"#);
1977    }
1978
1979    #[test]
1980    fn system_prompt_string_variant_serializes() {
1981        let prompt = SystemPrompt::String("You are a helpful assistant".to_string());
1982        let json = serde_json::to_string(&prompt).unwrap();
1983        assert_eq!(json, r#""You are a helpful assistant""#);
1984    }
1985
1986    #[test]
1987    fn system_prompt_blocks_variant_serializes() {
1988        let prompt = SystemPrompt::Blocks(vec![SystemBlock {
1989            block_type: "text".to_string(),
1990            text: "You are a helpful assistant".to_string(),
1991            cache_control: Some(CacheControl::ephemeral()),
1992        }]);
1993        let json = serde_json::to_string(&prompt).unwrap();
1994        assert!(json.contains(r#""type":"text""#));
1995        assert!(json.contains("You are a helpful assistant"));
1996        assert!(json.contains(r#""type":"ephemeral""#));
1997    }
1998
1999    #[test]
2000    fn system_prompt_blocks_without_cache_control() {
2001        let prompt = SystemPrompt::Blocks(vec![SystemBlock {
2002            block_type: "text".to_string(),
2003            text: "Short prompt".to_string(),
2004            cache_control: None,
2005        }]);
2006        let json = serde_json::to_string(&prompt).unwrap();
2007        assert!(json.contains("Short prompt"));
2008        assert!(!json.contains("cache_control"));
2009    }
2010
2011    #[test]
2012    fn native_content_text_without_cache_control() {
2013        let content = NativeContentOut::Text {
2014            text: "Hello".to_string(),
2015            cache_control: None,
2016        };
2017        let json = serde_json::to_string(&content).unwrap();
2018        assert!(json.contains(r#""type":"text""#));
2019        assert!(json.contains("Hello"));
2020        assert!(!json.contains("cache_control"));
2021    }
2022
2023    #[test]
2024    fn native_content_text_with_cache_control() {
2025        let content = NativeContentOut::Text {
2026            text: "Hello".to_string(),
2027            cache_control: Some(CacheControl::ephemeral()),
2028        };
2029        let json = serde_json::to_string(&content).unwrap();
2030        assert!(json.contains(r#""type":"text""#));
2031        assert!(json.contains("Hello"));
2032        assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#));
2033    }
2034
2035    #[test]
2036    fn native_content_tool_use_without_cache_control() {
2037        let content = NativeContentOut::ToolUse {
2038            id: "tool_123".to_string(),
2039            name: "get_weather".to_string(),
2040            input: serde_json::json!({"location": "San Francisco"}),
2041            cache_control: None,
2042        };
2043        let json = serde_json::to_string(&content).unwrap();
2044        assert!(json.contains(r#""type":"tool_use""#));
2045        assert!(json.contains("tool_123"));
2046        assert!(json.contains("get_weather"));
2047        assert!(!json.contains("cache_control"));
2048    }
2049
2050    #[test]
2051    fn native_content_tool_result_with_cache_control() {
2052        let content = NativeContentOut::ToolResult {
2053            tool_use_id: "tool_123".to_string(),
2054            content: "Result data".to_string(),
2055            cache_control: Some(CacheControl::ephemeral()),
2056        };
2057        let json = serde_json::to_string(&content).unwrap();
2058        assert!(json.contains(r#""type":"tool_result""#));
2059        assert!(json.contains("tool_123"));
2060        assert!(json.contains("Result data"));
2061        assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#));
2062    }
2063
2064    #[test]
2065    fn native_tool_spec_without_cache_control() {
2066        let schema = serde_json::json!({"type": "object"});
2067        let tool = NativeToolSpec {
2068            name: "get_weather",
2069            description: "Get weather info",
2070            input_schema: &schema,
2071            cache_control: None,
2072        };
2073        let json = serde_json::to_string(&tool).unwrap();
2074        assert!(json.contains("get_weather"));
2075        assert!(!json.contains("cache_control"));
2076    }
2077
2078    #[test]
2079    fn native_tool_spec_with_cache_control() {
2080        let schema = serde_json::json!({"type": "object"});
2081        let tool = NativeToolSpec {
2082            name: "get_weather",
2083            description: "Get weather info",
2084            input_schema: &schema,
2085            cache_control: Some(CacheControl::ephemeral()),
2086        };
2087        let json = serde_json::to_string(&tool).unwrap();
2088        assert!(json.contains("get_weather"));
2089        assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#));
2090    }
2091
2092    #[test]
2093    fn should_cache_conversation_short() {
2094        let messages = vec![
2095            ChatMessage {
2096                role: "system".to_string(),
2097                content: "System prompt".to_string(),
2098            },
2099            ChatMessage {
2100                role: "user".to_string(),
2101                content: "Hello".to_string(),
2102            },
2103        ];
2104        // Only 1 non-system message — should not cache
2105        assert!(!AnthropicModelProvider::should_cache_conversation(
2106            &messages
2107        ));
2108    }
2109
2110    #[test]
2111    fn should_cache_conversation_long() {
2112        let mut messages = vec![ChatMessage {
2113            role: "system".to_string(),
2114            content: "System prompt".to_string(),
2115        }];
2116        // Add 3 non-system messages
2117        for i in 0..3 {
2118            messages.push(ChatMessage {
2119                role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(),
2120                content: format!("Message {i}"),
2121            });
2122        }
2123        assert!(AnthropicModelProvider::should_cache_conversation(&messages));
2124    }
2125
2126    #[test]
2127    fn should_cache_conversation_boundary() {
2128        let messages = vec![ChatMessage {
2129            role: "user".to_string(),
2130            content: "Hello".to_string(),
2131        }];
2132        // Exactly 1 non-system message — should not cache
2133        assert!(!AnthropicModelProvider::should_cache_conversation(
2134            &messages
2135        ));
2136
2137        // Add one more to cross boundary (>1)
2138        let messages = vec![
2139            ChatMessage {
2140                role: "user".to_string(),
2141                content: "Hello".to_string(),
2142            },
2143            ChatMessage {
2144                role: "assistant".to_string(),
2145                content: "Hi".to_string(),
2146            },
2147        ];
2148        assert!(AnthropicModelProvider::should_cache_conversation(&messages));
2149    }
2150
2151    #[test]
2152    fn apply_cache_to_last_message_text() {
2153        let mut messages = vec![NativeMessage {
2154            role: "user".to_string(),
2155            content: vec![NativeContentOut::Text {
2156                text: "Hello".to_string(),
2157                cache_control: None,
2158            }],
2159        }];
2160
2161        AnthropicModelProvider::apply_cache_to_last_message(&mut messages);
2162
2163        match &messages[0].content[0] {
2164            NativeContentOut::Text { cache_control, .. } => {
2165                assert!(cache_control.is_some());
2166            }
2167            _ => panic!("Expected Text variant"),
2168        }
2169    }
2170
2171    #[test]
2172    fn apply_cache_to_last_message_tool_result() {
2173        let mut messages = vec![NativeMessage {
2174            role: "user".to_string(),
2175            content: vec![NativeContentOut::ToolResult {
2176                tool_use_id: "tool_123".to_string(),
2177                content: "Result".to_string(),
2178                cache_control: None,
2179            }],
2180        }];
2181
2182        AnthropicModelProvider::apply_cache_to_last_message(&mut messages);
2183
2184        match &messages[0].content[0] {
2185            NativeContentOut::ToolResult { cache_control, .. } => {
2186                assert!(cache_control.is_some());
2187            }
2188            _ => panic!("Expected ToolResult variant"),
2189        }
2190    }
2191
2192    #[test]
2193    fn apply_cache_to_last_message_does_not_affect_tool_use() {
2194        let mut messages = vec![NativeMessage {
2195            role: "assistant".to_string(),
2196            content: vec![NativeContentOut::ToolUse {
2197                id: "tool_123".to_string(),
2198                name: "get_weather".to_string(),
2199                input: serde_json::json!({}),
2200                cache_control: None,
2201            }],
2202        }];
2203
2204        AnthropicModelProvider::apply_cache_to_last_message(&mut messages);
2205
2206        // ToolUse should not be affected
2207        match &messages[0].content[0] {
2208            NativeContentOut::ToolUse { cache_control, .. } => {
2209                assert!(cache_control.is_none());
2210            }
2211            _ => panic!("Expected ToolUse variant"),
2212        }
2213    }
2214
2215    #[test]
2216    fn apply_cache_empty_messages() {
2217        let mut messages = vec![];
2218        AnthropicModelProvider::apply_cache_to_last_message(&mut messages);
2219        // Should not panic
2220        assert!(messages.is_empty());
2221    }
2222
2223    #[test]
2224    fn convert_tools_adds_cache_to_last_tool() {
2225        let tools = vec![
2226            ToolSpec {
2227                name: "tool1".to_string(),
2228                description: "First tool".to_string(),
2229                parameters: serde_json::json!({"type": "object"}),
2230            },
2231            ToolSpec {
2232                name: "tool2".to_string(),
2233                description: "Second tool".to_string(),
2234                parameters: serde_json::json!({"type": "object"}),
2235            },
2236        ];
2237
2238        let native_tools = AnthropicModelProvider::convert_tools(Some(&tools)).unwrap();
2239
2240        assert_eq!(native_tools.len(), 2);
2241        assert!(native_tools[0].cache_control.is_none());
2242        assert!(native_tools[1].cache_control.is_some());
2243    }
2244
2245    #[test]
2246    fn convert_tools_single_tool_gets_cache() {
2247        let tools = vec![ToolSpec {
2248            name: "tool1".to_string(),
2249            description: "Only tool".to_string(),
2250            parameters: serde_json::json!({"type": "object"}),
2251        }];
2252
2253        let native_tools = AnthropicModelProvider::convert_tools(Some(&tools)).unwrap();
2254
2255        assert_eq!(native_tools.len(), 1);
2256        assert!(native_tools[0].cache_control.is_some());
2257    }
2258
2259    #[test]
2260    fn convert_messages_small_system_prompt_uses_blocks_with_cache() {
2261        let messages = vec![ChatMessage {
2262            role: "system".to_string(),
2263            content: "Short system prompt".to_string(),
2264        }];
2265
2266        let (system_prompt, _) = AnthropicModelProvider::convert_messages(&messages);
2267
2268        match system_prompt.unwrap() {
2269            SystemPrompt::Blocks(blocks) => {
2270                assert_eq!(blocks.len(), 1);
2271                assert_eq!(blocks[0].text, "Short system prompt");
2272                assert!(
2273                    blocks[0].cache_control.is_some(),
2274                    "Small system prompts should have cache_control"
2275                );
2276            }
2277            SystemPrompt::String(_) => {
2278                panic!("Expected Blocks variant with cache_control for small prompt")
2279            }
2280        }
2281    }
2282
2283    #[test]
2284    fn convert_messages_large_system_prompt() {
2285        let large_content = "a".repeat(3073);
2286        let messages = vec![ChatMessage {
2287            role: "system".to_string(),
2288            content: large_content.clone(),
2289        }];
2290
2291        let (system_prompt, _) = AnthropicModelProvider::convert_messages(&messages);
2292
2293        match system_prompt.unwrap() {
2294            SystemPrompt::Blocks(blocks) => {
2295                assert_eq!(blocks.len(), 1);
2296                assert_eq!(blocks[0].text, large_content);
2297                assert!(blocks[0].cache_control.is_some());
2298            }
2299            SystemPrompt::String(_) => panic!("Expected Blocks variant for large prompt"),
2300        }
2301    }
2302
2303    #[test]
2304    fn native_chat_request_with_blocks_system() {
2305        // System prompts now always use Blocks format with cache_control
2306        let req = NativeChatRequest {
2307            model: "claude-3-opus".to_string(),
2308            max_tokens: 4096,
2309            system: Some(SystemPrompt::Blocks(vec![SystemBlock {
2310                block_type: "text".to_string(),
2311                text: "System".to_string(),
2312                cache_control: Some(CacheControl::ephemeral()),
2313            }])),
2314            messages: vec![NativeMessage {
2315                role: "user".to_string(),
2316                content: vec![NativeContentOut::Text {
2317                    text: "Hello".to_string(),
2318                    cache_control: None,
2319                }],
2320            }],
2321            temperature: Some(0.7),
2322            tools: None,
2323            tool_choice: None,
2324            stream: None,
2325            thinking: None,
2326        };
2327
2328        let json = serde_json::to_string(&req).unwrap();
2329        assert!(json.contains("System"));
2330        assert!(
2331            json.contains(r#""cache_control":{"type":"ephemeral"}"#),
2332            "System prompt should include cache_control"
2333        );
2334    }
2335
2336    #[test]
2337    fn native_chat_request_omits_temperature_when_none() {
2338        let req = NativeChatRequest {
2339            model: "claude-opus-4-7".to_string(),
2340            max_tokens: 4096,
2341            system: None,
2342            messages: vec![NativeMessage {
2343                role: "user".to_string(),
2344                content: vec![NativeContentOut::Text {
2345                    text: "hi".to_string(),
2346                    cache_control: None,
2347                }],
2348            }],
2349            temperature: None,
2350            tools: None,
2351            tool_choice: None,
2352            stream: None,
2353            thinking: None,
2354        };
2355
2356        let json = serde_json::to_string(&req).unwrap();
2357        assert!(
2358            !json.contains("temperature"),
2359            "temperature should be omitted when None; got: {json}"
2360        );
2361    }
2362
2363    #[tokio::test]
2364    async fn warmup_without_key_is_noop() {
2365        let model_provider = AnthropicModelProvider::new("test", None);
2366        let result = model_provider.warmup().await;
2367        assert!(result.is_ok());
2368    }
2369
2370    #[test]
2371    fn convert_messages_preserves_multi_turn_history() {
2372        let messages = vec![
2373            ChatMessage {
2374                role: "system".to_string(),
2375                content: "You are helpful.".to_string(),
2376            },
2377            ChatMessage {
2378                role: "user".to_string(),
2379                content: "gen a 2 sum in golang".to_string(),
2380            },
2381            ChatMessage {
2382                role: "assistant".to_string(),
2383                content: "```go\nfunc twoSum(nums []int) {}\n```".to_string(),
2384            },
2385            ChatMessage {
2386                role: "user".to_string(),
2387                content: "what's meaning of make here?".to_string(),
2388            },
2389        ];
2390
2391        let (system, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2392
2393        // System prompt extracted
2394        assert!(system.is_some());
2395        // All 3 non-system messages preserved in order
2396        assert_eq!(native_msgs.len(), 3);
2397        assert_eq!(native_msgs[0].role, "user");
2398        assert_eq!(native_msgs[1].role, "assistant");
2399        assert_eq!(native_msgs[2].role, "user");
2400    }
2401
2402    /// Integration test: spin up a mock Anthropic API server, call chat_with_tools
2403    /// with a multi-turn conversation + tools, and verify the request body contains
2404    /// ALL conversation turns and native tool definitions.
2405    #[tokio::test]
2406    async fn chat_with_tools_sends_full_history_and_native_tools() {
2407        use axum::{Json, Router, routing::post};
2408        use std::sync::{Arc, Mutex};
2409        use tokio::net::TcpListener;
2410
2411        // Captured request body for assertion
2412        let captured: Arc<Mutex<Option<serde_json::Value>>> = Arc::new(Mutex::new(None));
2413        let captured_clone = captured.clone();
2414
2415        let app = Router::new().route(
2416            "/v1/messages",
2417            post(move |Json(body): Json<serde_json::Value>| {
2418                let cap = captured_clone.clone();
2419                async move {
2420                    *cap.lock().unwrap() = Some(body);
2421                    // Return a minimal valid Anthropic response
2422                    Json(serde_json::json!({
2423                        "id": "msg_test",
2424                        "type": "message",
2425                        "role": "assistant",
2426                        "content": [{"type": "text", "text": "The make function creates a map."}],
2427                        "model": "claude-opus-4-6",
2428                        "stop_reason": "end_turn",
2429                        "usage": {"input_tokens": 100, "output_tokens": 20}
2430                    }))
2431                }
2432            }),
2433        );
2434
2435        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
2436        let addr = listener.local_addr().unwrap();
2437        let server_handle = tokio::spawn(async move {
2438            axum::serve(listener, app).await.unwrap();
2439        });
2440
2441        // Create model_provider pointing at mock server
2442        let model_provider = AnthropicModelProvider {
2443            alias: "test".to_string(),
2444            credential: Some("test-key".to_string()),
2445            base_url: format!("http://{addr}"),
2446            max_tokens: 4096,
2447        };
2448
2449        // Multi-turn conversation: system → user (Go code) → assistant (code response) → user (follow-up)
2450        let messages = vec![
2451            ChatMessage::system("You are a helpful assistant."),
2452            ChatMessage::user("gen a 2 sum in golang"),
2453            ChatMessage::assistant(
2454                "```go\nfunc twoSum(nums []int, target int) []int {\n    m := make(map[int]int)\n    for i, n := range nums {\n        if j, ok := m[target-n]; ok {\n            return []int{j, i}\n        }\n        m[n] = i\n    }\n    return nil\n}\n```",
2455            ),
2456            ChatMessage::user("what's meaning of make here?"),
2457        ];
2458
2459        let tools = vec![serde_json::json!({
2460            "type": "function",
2461            "function": {
2462                "name": "shell",
2463                "description": "Run a shell command",
2464                "parameters": {
2465                    "type": "object",
2466                    "properties": {
2467                        "command": {"type": "string"}
2468                    },
2469                    "required": ["command"]
2470                }
2471            }
2472        })];
2473
2474        let result = model_provider
2475            .chat_with_tools(&messages, &tools, "claude-opus-4-6", Some(0.7))
2476            .await;
2477        assert!(result.is_ok(), "chat_with_tools failed: {:?}", result.err());
2478
2479        let body = captured
2480            .lock()
2481            .unwrap()
2482            .take()
2483            .expect("No request captured");
2484
2485        // Verify system prompt extracted to top-level field
2486        let system = &body["system"];
2487        assert!(
2488            system.to_string().contains("helpful assistant"),
2489            "System prompt missing: {system}"
2490        );
2491
2492        // Verify ALL conversation turns present in messages array
2493        let msgs = body["messages"].as_array().expect("messages not an array");
2494        assert_eq!(
2495            msgs.len(),
2496            3,
2497            "Expected 3 messages (2 user + 1 assistant), got {}",
2498            msgs.len()
2499        );
2500
2501        // Turn 1: user with Go request
2502        assert_eq!(msgs[0]["role"], "user");
2503        let turn1_text = msgs[0]["content"].to_string();
2504        assert!(
2505            turn1_text.contains("2 sum"),
2506            "Turn 1 missing Go request: {turn1_text}"
2507        );
2508
2509        // Turn 2: assistant with Go code
2510        assert_eq!(msgs[1]["role"], "assistant");
2511        let turn2_text = msgs[1]["content"].to_string();
2512        assert!(
2513            turn2_text.contains("make(map[int]int)"),
2514            "Turn 2 missing Go code: {turn2_text}"
2515        );
2516
2517        // Turn 3: user follow-up
2518        assert_eq!(msgs[2]["role"], "user");
2519        let turn3_text = msgs[2]["content"].to_string();
2520        assert!(
2521            turn3_text.contains("meaning of make"),
2522            "Turn 3 missing follow-up: {turn3_text}"
2523        );
2524
2525        // Verify native tools are present
2526        let api_tools = body["tools"].as_array().expect("tools not an array");
2527        assert_eq!(api_tools.len(), 1);
2528        assert_eq!(api_tools[0]["name"], "shell");
2529        assert!(
2530            api_tools[0]["input_schema"].is_object(),
2531            "Missing input_schema"
2532        );
2533
2534        server_handle.abort();
2535    }
2536
2537    #[test]
2538    fn native_response_parses_usage() {
2539        let json = r#"{
2540            "content": [{"type": "text", "text": "Hello"}],
2541            "usage": {"input_tokens": 300, "output_tokens": 75}
2542        }"#;
2543        let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
2544        let result = AnthropicModelProvider::parse_native_response(resp);
2545        let usage = result.usage.unwrap();
2546        assert_eq!(usage.input_tokens, Some(300));
2547        assert_eq!(usage.output_tokens, Some(75));
2548    }
2549
2550    #[test]
2551    fn native_response_parses_without_usage() {
2552        let json = r#"{"content": [{"type": "text", "text": "Hello"}]}"#;
2553        let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
2554        let result = AnthropicModelProvider::parse_native_response(resp);
2555        assert!(result.usage.is_none());
2556    }
2557
2558    #[test]
2559    fn native_response_preserves_thinking_text_byte_for_byte() {
2560        // Signatures on extended-thinking blocks are computed over the exact
2561        // bytes the model returned. Any mutation — including trim() — breaks
2562        // signature validation on replay in a multi-turn tool-use conversation.
2563        let json = r#"{
2564            "content": [
2565                {
2566                    "type": "thinking",
2567                    "thinking": "  \nStep 1: consider the request.\nStep 2: respond.\n  ",
2568                    "signature": "sig_abc123"
2569                },
2570                {"type": "text", "text": "ok"}
2571            ]
2572        }"#;
2573        let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
2574        let result = AnthropicModelProvider::parse_native_response(resp);
2575        let reasoning = result.reasoning_content.expect("thinking preserved");
2576        let parsed: serde_json::Value = serde_json::from_str(&reasoning).unwrap();
2577        assert_eq!(
2578            parsed.get("thinking").and_then(|v| v.as_str()),
2579            Some("  \nStep 1: consider the request.\nStep 2: respond.\n  ")
2580        );
2581        assert_eq!(
2582            parsed.get("signature").and_then(|v| v.as_str()),
2583            Some("sig_abc123")
2584        );
2585    }
2586
2587    #[test]
2588    fn native_response_drops_empty_thinking_blocks() {
2589        let json = r#"{
2590            "content": [
2591                {"type": "thinking", "thinking": "", "signature": "sig_xyz"},
2592                {"type": "text", "text": "hello"}
2593            ]
2594        }"#;
2595        let resp: NativeChatResponse = serde_json::from_str(json).unwrap();
2596        let result = AnthropicModelProvider::parse_native_response(resp);
2597        assert!(result.reasoning_content.is_none());
2598    }
2599
2600    #[test]
2601    fn capabilities_returns_vision_and_native_tools() {
2602        let model_provider = AnthropicModelProvider::new("test", Some("test-key"));
2603        let caps = model_provider.capabilities();
2604        assert!(
2605            caps.native_tool_calling,
2606            "Anthropic should support native tool calling"
2607        );
2608        assert!(caps.vision, "Anthropic should support vision");
2609    }
2610
2611    #[test]
2612    fn convert_messages_with_image_marker_data_uri() {
2613        let messages = vec![ChatMessage {
2614            role: "user".to_string(),
2615            content: "Check this image: [IMAGE:data:image/jpeg;base64,/9j/4AAQ] What do you see?"
2616                .to_string(),
2617        }];
2618
2619        let (_, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2620
2621        assert_eq!(native_msgs.len(), 1);
2622        assert_eq!(native_msgs[0].role, "user");
2623        // Should have 2 content blocks: image + text
2624        assert_eq!(native_msgs[0].content.len(), 2);
2625
2626        // First block should be image
2627        match &native_msgs[0].content[0] {
2628            NativeContentOut::Image { source } => {
2629                assert_eq!(source.source_type, "base64");
2630                assert_eq!(source.media_type, "image/jpeg");
2631                assert_eq!(source.data, "/9j/4AAQ");
2632            }
2633            _ => panic!("Expected Image content block"),
2634        }
2635
2636        // Second block should be text (parse_image_markers may leave extra spaces)
2637        match &native_msgs[0].content[1] {
2638            NativeContentOut::Text { text, .. } => {
2639                // The text may have extra spaces where the marker was removed
2640                assert!(
2641                    text.contains("Check this image:") && text.contains("What do you see?"),
2642                    "Expected text to contain 'Check this image:' and 'What do you see?', got: {}",
2643                    text
2644                );
2645            }
2646            _ => panic!("Expected Text content block"),
2647        }
2648    }
2649
2650    #[test]
2651    fn convert_messages_with_only_image_marker() {
2652        let messages = vec![ChatMessage {
2653            role: "user".to_string(),
2654            content: "[IMAGE:data:image/png;base64,iVBORw0KGgo]".to_string(),
2655        }];
2656
2657        let (_, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2658
2659        assert_eq!(native_msgs.len(), 1);
2660        assert_eq!(native_msgs[0].content.len(), 2);
2661
2662        // First block should be image
2663        match &native_msgs[0].content[0] {
2664            NativeContentOut::Image { source } => {
2665                assert_eq!(source.media_type, "image/png");
2666            }
2667            _ => panic!("Expected Image content block"),
2668        }
2669
2670        // Second block should be placeholder text
2671        match &native_msgs[0].content[1] {
2672            NativeContentOut::Text { text, .. } => {
2673                assert_eq!(text, "[image]");
2674            }
2675            _ => panic!("Expected Text content block with [image] placeholder"),
2676        }
2677    }
2678
2679    #[test]
2680    fn convert_messages_without_image_marker() {
2681        let messages = vec![ChatMessage {
2682            role: "user".to_string(),
2683            content: "Hello, how are you?".to_string(),
2684        }];
2685
2686        let (_, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2687
2688        assert_eq!(native_msgs.len(), 1);
2689        assert_eq!(native_msgs[0].content.len(), 1);
2690
2691        match &native_msgs[0].content[0] {
2692            NativeContentOut::Text { text, .. } => {
2693                assert_eq!(text, "Hello, how are you?");
2694            }
2695            _ => panic!("Expected Text content block"),
2696        }
2697    }
2698
2699    #[test]
2700    fn image_content_serializes_correctly() {
2701        let content = NativeContentOut::Image {
2702            source: ImageSource {
2703                source_type: "base64".to_string(),
2704                media_type: "image/jpeg".to_string(),
2705                data: "testdata".to_string(),
2706            },
2707        };
2708        let json = serde_json::to_string(&content).unwrap();
2709        // The outer "type" is the enum tag, inner "type" (source_type) is renamed
2710        assert!(json.contains(r#""type":"image""#), "JSON: {}", json);
2711        assert!(json.contains(r#""type":"base64""#), "JSON: {}", json); // source_type is serialized as "type"
2712        assert!(
2713            json.contains(r#""media_type":"image/jpeg""#),
2714            "JSON: {}",
2715            json
2716        );
2717        assert!(json.contains(r#""data":"testdata""#), "JSON: {}", json);
2718    }
2719
2720    #[test]
2721    fn convert_messages_merges_consecutive_tool_results() {
2722        // Simulate a multi-tool-call turn: assistant with two tool_use blocks
2723        // followed by two separate tool result messages.
2724        let messages = vec![
2725            ChatMessage {
2726                role: "system".to_string(),
2727                content: "You are helpful.".to_string(),
2728            },
2729            ChatMessage {
2730                role: "user".to_string(),
2731                content: "Do two things.".to_string(),
2732            },
2733            ChatMessage {
2734                role: "assistant".to_string(),
2735                content: serde_json::json!({
2736                    "content": "",
2737                    "tool_calls": [
2738                        {"id": "call_1", "name": "shell", "arguments": "{\"command\":\"ls\"}"},
2739                        {"id": "call_2", "name": "shell", "arguments": "{\"command\":\"pwd\"}"}
2740                    ]
2741                })
2742                .to_string(),
2743            },
2744            ChatMessage {
2745                role: "tool".to_string(),
2746                content: serde_json::json!({
2747                    "tool_call_id": "call_1",
2748                    "content": "file1.txt\nfile2.txt"
2749                })
2750                .to_string(),
2751            },
2752            ChatMessage {
2753                role: "tool".to_string(),
2754                content: serde_json::json!({
2755                    "tool_call_id": "call_2",
2756                    "content": "/home/user"
2757                })
2758                .to_string(),
2759            },
2760        ];
2761
2762        let (system, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2763
2764        assert!(system.is_some());
2765        // Should be: user, assistant, user (merged tool results)
2766        // NOT: user, assistant, user, user (which Anthropic rejects)
2767        assert_eq!(
2768            native_msgs.len(),
2769            3,
2770            "Expected 3 messages (user, assistant, merged tool results), got {}.\nRoles: {:?}",
2771            native_msgs.len(),
2772            native_msgs.iter().map(|m| &m.role).collect::<Vec<_>>()
2773        );
2774        assert_eq!(native_msgs[0].role, "user");
2775        assert_eq!(native_msgs[1].role, "assistant");
2776        assert_eq!(native_msgs[2].role, "user");
2777        // The merged user message should contain both tool results
2778        assert_eq!(
2779            native_msgs[2].content.len(),
2780            2,
2781            "Expected 2 tool_result blocks in merged message"
2782        );
2783    }
2784
2785    #[test]
2786    fn convert_messages_no_adjacent_same_role() {
2787        // Verify that convert_messages never produces adjacent messages with the
2788        // same role, regardless of input ordering.
2789        let messages = vec![
2790            ChatMessage {
2791                role: "user".to_string(),
2792                content: "Hello".to_string(),
2793            },
2794            ChatMessage {
2795                role: "assistant".to_string(),
2796                content: serde_json::json!({
2797                    "content": "I'll run a command",
2798                    "tool_calls": [
2799                        {"id": "tc1", "name": "shell", "arguments": "{\"command\":\"echo hi\"}"}
2800                    ]
2801                })
2802                .to_string(),
2803            },
2804            ChatMessage {
2805                role: "tool".to_string(),
2806                content: serde_json::json!({
2807                    "tool_call_id": "tc1",
2808                    "content": "hi"
2809                })
2810                .to_string(),
2811            },
2812            ChatMessage {
2813                role: "user".to_string(),
2814                content: "Thanks!".to_string(),
2815            },
2816        ];
2817
2818        let (_system, native_msgs) = AnthropicModelProvider::convert_messages(&messages);
2819
2820        for window in native_msgs.windows(2) {
2821            assert_ne!(
2822                window[0].role, window[1].role,
2823                "Adjacent messages must not share the same role: found two '{}' messages in a row",
2824                window[0].role
2825            );
2826        }
2827    }
2828}