Skip to main content

zeroclaw_runtime/agent/
context_compressor.rs

1use std::fmt::Write;
2use std::time::Duration;
3
4use anyhow::Result;
5use std::sync::Arc;
6
7use zeroclaw_api::model_provider::{ChatMessage, ModelProvider};
8use zeroclaw_memory::traits::Memory;
9use zeroclaw_providers::multimodal;
10
11pub use zeroclaw_config::scattered_types::ContextCompressionConfig;
12
13// ---------------------------------------------------------------------------
14// Result
15// ---------------------------------------------------------------------------
16
17#[derive(Debug, Clone)]
18pub struct CompressionResult {
19    pub compressed: bool,
20    pub tokens_before: usize,
21    pub tokens_after: usize,
22    pub passes_used: u32,
23}
24
25// ---------------------------------------------------------------------------
26// Probe tiers for unknown model context windows
27// ---------------------------------------------------------------------------
28
29const PROBE_TIERS: &[usize] = &[
30    2_000_000, 1_000_000, 512_000, 200_000, 128_000, 64_000, 32_000,
31];
32
33fn next_probe_tier(current: usize) -> usize {
34    PROBE_TIERS
35        .iter()
36        .copied()
37        .find(|&tier| tier < current)
38        .unwrap_or(32_000)
39}
40
41// ---------------------------------------------------------------------------
42// Error message parsing
43// ---------------------------------------------------------------------------
44
45/// Try to extract the actual context window limit from a model_provider error message.
46pub fn parse_context_limit_from_error(msg: &str) -> Option<usize> {
47    // Match patterns like "maximum context length is 128000" or "limit of 200000 tokens"
48    // or "context window of 131072" or "available context size (8448 tokens)"
49    let re_patterns: &[&str] = &[
50        // "maximum context length is 128000"
51        r"(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})",
52        // "context length is 128000" / "context window of 131072"
53        r"context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})",
54        // "128000 token context" / "128000 limit"
55        r"(\d{4,})\s*(?:tokens?\s*)?(?:context|limit)",
56        // "available context size (8448 tokens)"
57        r"available context size\s*\(\s*(\d{4,})",
58        // "> 128000 maximum context length" (Anthropic-style)
59        r">\s*(\d{4,})\s*(?:maximum|max)?\s*(?:context)?\s*(?:length|size|window|tokens?)",
60    ];
61    let lower = msg.to_lowercase();
62    for pattern in re_patterns {
63        if let Ok(re) = regex::Regex::new(pattern)
64            && let Some(caps) = re.captures(&lower)
65            && let Some(m) = caps.get(1)
66            && let Ok(limit) = m.as_str().parse::<usize>()
67            && (1024..=10_000_000).contains(&limit)
68        {
69            return Some(limit);
70        }
71    }
72    None
73}
74
75// ---------------------------------------------------------------------------
76// Token estimation
77// ---------------------------------------------------------------------------
78
79/// Estimate token count for a message history using ~4 chars/token heuristic
80/// with a 1.2x safety margin.
81pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
82    let raw: usize = messages
83        .iter()
84        .map(|m| m.content.len().div_ceil(4) + 4)
85        .sum();
86    // 1.2x safety margin to account for underestimation
87    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
88    {
89        (raw as f64 * 1.2) as usize
90    }
91}
92
93// ---------------------------------------------------------------------------
94// Summarizer prompt
95// ---------------------------------------------------------------------------
96
97const SUMMARIZER_SYSTEM: &str = "\
98You are a conversation compaction engine. Summarize the conversation segment below into concise context.
99
100PRESERVE exactly:
101- All identifiers (UUIDs, hashes, file paths, URLs, tokens, IPs)
102- Actions taken (tool calls, file operations, commands run)
103- Key information obtained (data, results, error messages)
104- Decisions made and user preferences expressed
105- Current task status and unresolved items
106- Constraints and requirements mentioned
107
108OMIT:
109- Verbose tool output (keep only key results)
110- Repeated greetings or filler
111- Redundant information already stated
112
113Output concise bullet points. Be thorough but brief.";
114
115// ---------------------------------------------------------------------------
116// ContextCompressor
117// ---------------------------------------------------------------------------
118
119pub struct ContextCompressor {
120    config: ContextCompressionConfig,
121    context_window: usize,
122    memory: Option<Arc<dyn Memory>>,
123}
124
125impl ContextCompressor {
126    pub fn new(config: ContextCompressionConfig, context_window: usize) -> Self {
127        Self {
128            config,
129            context_window,
130            memory: None,
131        }
132    }
133
134    /// Attach a memory handle so compression summaries are persisted before
135    /// old messages are discarded. Without this, compressed facts are lost.
136    pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
137        self.memory = Some(memory);
138        self
139    }
140
141    /// Update the context window size (e.g. after error-driven probing).
142    pub fn set_context_window(&mut self, window: usize) {
143        self.context_window = window;
144    }
145
146    /// Fast-path: trim oversized tool results in non-protected messages.
147    /// Returns total characters saved. No LLM call needed.
148    fn fast_trim_tool_results(&self, history: &mut [ChatMessage]) -> usize {
149        let max = self.config.tool_result_retrim_chars;
150        if max == 0 {
151            return 0;
152        }
153        let mut saved = 0;
154        let protect_start = self.config.protect_first_n.min(history.len());
155        let protect_end = history.len().saturating_sub(self.config.protect_last_n);
156
157        if protect_start >= protect_end {
158            return 0;
159        }
160
161        for msg in &mut history[protect_start..protect_end] {
162            if msg.role != "tool" {
163                continue;
164            }
165            if msg.content.len() <= max {
166                continue;
167            }
168            // Skip exempt tools
169            if self
170                .config
171                .tool_result_trim_exempt
172                .iter()
173                .any(|t| msg.content.contains(t.as_str()))
174            {
175                continue;
176            }
177            // Skip base64 images
178            if msg.content.contains("data:image/") {
179                continue;
180            }
181            let original_len = msg.content.len();
182            msg.content = crate::agent::history::truncate_tool_message(&msg.content, max);
183            saved += original_len - msg.content.len();
184        }
185        saved
186    }
187
188    /// Main entry point. Compresses history in-place if over threshold.
189    ///
190    /// `temperature` is forwarded verbatim to the summarizer LLM call.
191    /// Pass `None` to let the provider decide (required for models that
192    /// reject `temperature`, e.g. claude-opus-4-7).
193    pub async fn compress_if_needed(
194        &self,
195        history: &mut Vec<ChatMessage>,
196        model_provider: &dyn ModelProvider,
197        model: &str,
198        temperature: Option<f64>,
199    ) -> Result<CompressionResult> {
200        if !self.config.enabled {
201            let tokens = estimate_tokens(history);
202            return Ok(CompressionResult {
203                compressed: false,
204                tokens_before: tokens,
205                tokens_after: tokens,
206                passes_used: 0,
207            });
208        }
209
210        let tokens_before = estimate_tokens(history);
211        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
212        let threshold = (self.context_window as f64 * self.config.threshold_ratio) as usize;
213
214        if tokens_before <= threshold {
215            return Ok(CompressionResult {
216                compressed: false,
217                tokens_before,
218                tokens_after: tokens_before,
219                passes_used: 0,
220            });
221        }
222
223        // Fast-trim pass — may resolve overflow without an LLM call
224        let chars_saved = self.fast_trim_tool_results(history);
225        if chars_saved > 0 {
226            ::zeroclaw_log::record!(
227                INFO,
228                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
229                    .with_attrs(::serde_json::json!({"chars_saved": chars_saved})),
230                "Fast-trim saved chars from old tool results"
231            );
232            let recheck = estimate_tokens(history);
233            if recheck <= threshold {
234                return Ok(CompressionResult {
235                    compressed: true,
236                    tokens_before,
237                    tokens_after: recheck,
238                    passes_used: 0,
239                });
240            }
241        }
242
243        let mut passes_used = 0;
244        for _ in 0..self.config.max_passes {
245            let did_compress = self
246                .compress_once(history, model_provider, model, temperature)
247                .await?;
248            if did_compress {
249                passes_used += 1;
250            }
251            if estimate_tokens(history) <= threshold || !did_compress {
252                break;
253            }
254        }
255
256        let tokens_after = estimate_tokens(history);
257        Ok(CompressionResult {
258            compressed: passes_used > 0,
259            tokens_before,
260            tokens_after,
261            passes_used,
262        })
263    }
264
265    /// Reactive compression triggered by a context_length_exceeded error.
266    /// Parses the actual limit from the error, steps down probe tiers, and re-compresses.
267    pub async fn compress_on_error(
268        &mut self,
269        history: &mut Vec<ChatMessage>,
270        model_provider: &dyn ModelProvider,
271        model: &str,
272        temperature: Option<f64>,
273        error_msg: &str,
274    ) -> Result<bool> {
275        // Try to extract actual limit from error message
276        if let Some(limit) = parse_context_limit_from_error(error_msg) {
277            self.context_window = limit;
278        } else {
279            // Step down to next probe tier
280            self.context_window = next_probe_tier(self.context_window);
281        }
282
283        ::zeroclaw_log::record!(
284            INFO,
285            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
286                .with_attrs(::serde_json::json!({"context_window": self.context_window})),
287            "Context limit adjusted, re-compressing"
288        );
289
290        let result = self
291            .compress_if_needed(history, model_provider, model, temperature)
292            .await?;
293        Ok(result.compressed)
294    }
295
296    /// Single compression pass: protect head/tail, summarize middle.
297    async fn compress_once(
298        &self,
299        history: &mut Vec<ChatMessage>,
300        model_provider: &dyn ModelProvider,
301        model: &str,
302        temperature: Option<f64>,
303    ) -> Result<bool> {
304        let n = history.len();
305        let protected_total = self.config.protect_first_n + self.config.protect_last_n;
306        if n <= protected_total {
307            return Ok(false);
308        }
309
310        let mut start = self.config.protect_first_n.min(n);
311        let mut end = n.saturating_sub(self.config.protect_last_n);
312
313        // Align boundaries to avoid orphaning tool_call/tool_result pairs
314        start = align_boundary_forward(history, start);
315        end = align_boundary_backward(history, end);
316
317        if start >= end {
318            return Ok(false);
319        }
320
321        let summary_model = self.config.summary_model.as_deref().unwrap_or(model);
322        let preserve_media_markers =
323            self.config.summary_model.is_none() && model_provider.supports_vision();
324
325        // Build transcript from the middle section
326        let middle = &history[start..end];
327        let transcript = build_summarizer_transcript(
328            middle,
329            self.config.source_max_chars,
330            preserve_media_markers,
331        );
332
333        if transcript.is_empty() {
334            return Ok(false);
335        }
336
337        let message_count = end - start;
338
339        let identifier_note = if self.config.identifier_policy == "strict" {
340            "\nIMPORTANT: Preserve all identifiers exactly as they appear."
341        } else {
342            ""
343        };
344
345        let user_prompt = format!(
346            "Summarize the following conversation history ({message_count} messages) for context preservation. \
347             Keep it concise (max 20 bullet points).{identifier_note}\n\n{transcript}"
348        );
349
350        // LLM summarization with safety timeout
351        let timeout = Duration::from_secs(self.config.timeout_secs);
352        let summary_raw = match tokio::time::timeout(
353            timeout,
354            model_provider.chat_with_system(
355                Some(SUMMARIZER_SYSTEM),
356                &user_prompt,
357                summary_model,
358                temperature,
359            ),
360        )
361        .await
362        {
363            Ok(Ok(s)) => s,
364            Ok(Err(e)) => {
365                ::zeroclaw_log::record!(
366                    WARN,
367                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
368                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
369                        .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
370                    "Summarization LLM call failed, using transcript truncation"
371                );
372                truncate_chars(&transcript, self.config.summary_max_chars)
373            }
374            Err(_) => {
375                ::zeroclaw_log::record!(
376                    WARN,
377                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
378                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
379                    &format!(
380                        "Summarization timed out after {}s, using transcript truncation",
381                        self.config.timeout_secs
382                    )
383                );
384                truncate_chars(&transcript, self.config.summary_max_chars)
385            }
386        };
387
388        let summary = truncate_chars(&summary_raw, self.config.summary_max_chars);
389
390        // Persist the compression summary to memory before discarding old messages.
391        // This ensures facts from compressed turns remain retrievable via memory recall.
392        if let Some(ref memory) = self.memory {
393            let facts_key = format!("compressed_context_{}", uuid::Uuid::new_v4());
394            if let Err(e) = memory
395                .store(
396                    &facts_key,
397                    &summary,
398                    zeroclaw_memory::traits::MemoryCategory::Daily,
399                    None,
400                )
401                .await
402            {
403                ::zeroclaw_log::record!(
404                    DEBUG,
405                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
406                        .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
407                    "Failed to save compression summary to memory"
408                );
409            } else {
410                ::zeroclaw_log::record!(
411                    DEBUG,
412                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
413                        .with_attrs(::serde_json::json!({"message_count": message_count})),
414                    "Saved compression summary to memory before discarding  messages"
415                );
416            }
417        }
418
419        // Splice: head + [SUMMARY] + tail
420        let summary_msg = build_summary_message(&history[start..end], &summary, message_count);
421        history.splice(start..end, std::iter::once(summary_msg));
422
423        // Repair orphaned tool pairs
424        let tool_pairs_removed = repair_tool_pairs(history);
425
426        ::zeroclaw_log::record!(
427            WARN,
428            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
429                .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
430                .with_attrs(::serde_json::json!({
431                    "messages_summarized": message_count,
432                    "summary_chars": summary.len(),
433                    "tool_pairs_removed": tool_pairs_removed,
434                    "protect_first_n": self.config.protect_first_n,
435                    "protect_last_n": self.config.protect_last_n,
436                })),
437            "context_compressor: middle of conversation replaced with a \
438             text summary. The model loses structural tool_use/tool_result \
439             pairs from this range. If this fires mid-turn the model can \
440             act like it just woke up. Raise protect_last_n / \
441             protect_first_n, or raise max_context_tokens, or lower \
442             threshold_ratio carefully."
443        );
444
445        Ok(true)
446    }
447}
448
449// ---------------------------------------------------------------------------
450// Boundary alignment
451// ---------------------------------------------------------------------------
452
453/// Move boundary forward past any orphaned tool results at the start.
454fn align_boundary_forward(messages: &[ChatMessage], idx: usize) -> usize {
455    let mut i = idx;
456    while i < messages.len() && messages[i].role == "tool" {
457        i += 1;
458    }
459    i
460}
461
462/// Move the tail boundary backward past any orphan-creating split, and
463/// past the assistant + user pair that initiated the current turn.
464///
465/// The goal is "the protected tail starts on a turn boundary, not in
466/// the middle of a turn." Without this the compressor summarises the
467/// user's question while leaving the assistant's response + tool work
468/// in the tail, and the model re-enters the loop seeing dispatched
469/// tools with no prompt — looks exactly like "the model woke up
470/// mid-action."
471///
472/// One-shot, not iterative: this aligns the *tail edge* to the nearest
473/// preceding turn boundary, it does NOT keep eating preceding turns.
474/// The middle still gets compressed; only the boundary moves.
475///
476/// Returns the new `end` index such that `middle = messages[start..end]`
477/// and `tail = messages[end..]`.
478fn align_boundary_backward(messages: &[ChatMessage], idx: usize) -> usize {
479    let mut i = idx;
480
481    // Step past any leading `tool` messages — their owning assistant
482    // is earlier and must travel with them into the protected tail.
483    while i > 0 && messages[i - 1].role == "tool" {
484        i -= 1;
485    }
486
487    // Step past trailing `assistant` messages — both the tool-dispatching
488    // one and any preamble assistant immediately preceding it. The
489    // original code only stepped past assistants carrying `tool_calls`,
490    // which left preamble text in the middle.
491    while i > 0 && messages[i - 1].role == "assistant" {
492        i -= 1;
493    }
494
495    // Step past exactly one user message — the one that initiated this
496    // turn — so the turn is protected atomically. Do NOT loop back
497    // further: that would eat the entire conversation and produce an
498    // empty middle.
499    if i > 0 && messages[i - 1].role == "user" {
500        i -= 1;
501    }
502
503    i
504}
505
506// ---------------------------------------------------------------------------
507// Tool pair repair
508// ---------------------------------------------------------------------------
509
510/// Remove orphaned tool_results and add stubs for orphaned tool_calls.
511///
512/// After compression, some tool results may reference tool_calls that were
513/// summarized away, and vice versa. This function cleans up the history
514/// so every tool_result has a matching assistant message and every
515/// tool_call-bearing assistant message has results.
516fn repair_tool_pairs(messages: &mut Vec<ChatMessage>) -> usize {
517    let mut removed = 0;
518    // Heuristic: tool messages whose content references a call ID that no longer
519    // exists in any assistant message should be removed. Since ChatMessage is a
520    // simple role+content struct (no structured tool_call_id field), we use a
521    // simpler approach: remove any "tool" message that immediately follows the
522    // [CONTEXT SUMMARY] message (it's orphaned by definition).
523    let mut i = 0;
524    while i < messages.len() {
525        if messages[i].content.contains("[CONTEXT SUMMARY") {
526            // Remove any immediately following orphaned tool results
527            while i + 1 < messages.len() && messages[i + 1].role == "tool" {
528                messages.remove(i + 1);
529                removed += 1;
530            }
531        }
532        i += 1;
533    }
534
535    // Also check for tool results at the very start (after system prompt) that
536    // are orphaned because their assistant message was compressed.
537    let start = if messages.first().is_some_and(|m| m.role == "system") {
538        1
539    } else {
540        0
541    };
542    while start < messages.len() && messages[start].role == "tool" {
543        messages.remove(start);
544        removed += 1;
545    }
546    removed
547}
548
549// ---------------------------------------------------------------------------
550// Helpers
551// ---------------------------------------------------------------------------
552
553fn build_full_transcript(messages: &[ChatMessage]) -> String {
554    let mut transcript = String::new();
555    for msg in messages {
556        let role = msg.role.to_uppercase();
557        let _ = writeln!(transcript, "{role}: {}", msg.content.trim());
558    }
559    transcript
560}
561
562fn build_summarizer_transcript(
563    messages: &[ChatMessage],
564    max_chars: usize,
565    preserve_media_markers: bool,
566) -> String {
567    let transcript = build_full_transcript(messages);
568    if preserve_media_markers {
569        // Vision-capable summarizer can read media markers; preserve them so
570        // visual content is reflected in the summary (per #6189 contract).
571        return truncate_owned_if_needed(transcript, max_chars);
572    }
573
574    // Non-vision summarizer cannot consume media markers. Strip ALL inbound
575    // attachment-kind markers (IMAGE, PHOTO, DOCUMENT, FILE, VIDEO, VOICE,
576    // AUDIO — case-insensitive) instead of just `[IMAGE:...]`, otherwise a
577    // local filesystem path can leak into the auxiliary `chat_with_system`
578    // payload and the upstream API rejects it as a malformed `image_url.url`.
579    truncate_owned_if_needed(multimodal::strip_media_markers(&transcript), max_chars)
580}
581
582fn truncate_owned_if_needed(s: String, max: usize) -> String {
583    if s.len() > max {
584        truncate_chars(&s, max)
585    } else {
586        s
587    }
588}
589
590fn truncate_chars(s: &str, max: usize) -> String {
591    if s.len() <= max {
592        return s.to_string();
593    }
594    // Find a safe char boundary
595    let mut end = max;
596    while end > 0 && !s.is_char_boundary(end) {
597        end -= 1;
598    }
599    let mut result = s[..end].to_string();
600    result.push_str("...");
601    result
602}
603
604/// Construct the synthesized assistant message that replaces a compressed
605/// range. When the compressed range contains an assistant turn with
606/// `reasoning_content` (a thinking-mode response from providers like
607/// DeepSeek V4), embed the most recent such payload in the summary as a
608/// JSON-encoded `{content, reasoning_content}` body — matching the shape
609/// `build_native_assistant_history` already produces — so the next request
610/// to the provider passes its reasoning round-trip check. See #6269.
611fn build_summary_message(
612    compressed: &[ChatMessage],
613    summary: &str,
614    message_count: usize,
615) -> ChatMessage {
616    let summary_text = format!(
617        "[CONTEXT SUMMARY \u{2014} {message_count} earlier messages compressed]\n\n{summary}"
618    );
619
620    let last_reasoning = compressed
621        .iter()
622        .rev()
623        .filter(|m| m.role == "assistant")
624        .find_map(|m| {
625            serde_json::from_str::<serde_json::Value>(&m.content)
626                .ok()
627                .and_then(|v| {
628                    v.get("reasoning_content")
629                        .and_then(|rc| rc.as_str().map(ToString::to_string))
630                })
631        });
632
633    if let Some(rc) = last_reasoning {
634        let payload = serde_json::json!({
635            "content": summary_text,
636            "reasoning_content": rc,
637        });
638        ChatMessage::assistant(payload.to_string())
639    } else {
640        ChatMessage::assistant(summary_text)
641    }
642}
643
644// ---------------------------------------------------------------------------
645// Tests
646// ---------------------------------------------------------------------------
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651    use async_trait::async_trait;
652    use parking_lot::Mutex;
653
654    fn msg(role: &str, content: &str) -> ChatMessage {
655        ChatMessage {
656            role: role.to_string(),
657            content: content.to_string(),
658        }
659    }
660
661    struct CaptureSummarizerModelProvider {
662        supports_vision: bool,
663        seen_messages: Mutex<Vec<String>>,
664    }
665
666    #[async_trait]
667    impl ModelProvider for CaptureSummarizerModelProvider {
668        async fn chat_with_system(
669            &self,
670            _system_prompt: Option<&str>,
671            message: &str,
672            _model: &str,
673            _temperature: Option<f64>,
674        ) -> Result<String> {
675            self.seen_messages.lock().push(message.to_string());
676            Ok("summary".to_string())
677        }
678
679        async fn chat(
680            &self,
681            _request: zeroclaw_api::model_provider::ChatRequest<'_>,
682            _model: &str,
683            _temperature: Option<f64>,
684        ) -> Result<zeroclaw_api::model_provider::ChatResponse> {
685            unreachable!("context compressor uses chat_with_system")
686        }
687
688        fn supports_vision(&self) -> bool {
689            self.supports_vision
690        }
691    }
692    impl ::zeroclaw_api::attribution::Attributable for CaptureSummarizerModelProvider {
693        fn role(&self) -> ::zeroclaw_api::attribution::Role {
694            ::zeroclaw_api::attribution::Role::Provider(
695                ::zeroclaw_api::attribution::ProviderKind::Model(
696                    ::zeroclaw_api::attribution::ModelProviderKind::Custom,
697                ),
698            )
699        }
700        fn alias(&self) -> &str {
701            "CaptureSummarizerModelProvider"
702        }
703    }
704
705    #[test]
706    fn test_estimate_tokens() {
707        let messages = vec![msg("user", "hello world")]; // 11 chars
708        let tokens = estimate_tokens(&messages);
709        // 11/4 ceil = 3, +4 framing = 7, *1.2 = 8.4 -> 8
710        assert!(tokens > 0);
711    }
712
713    #[test]
714    fn test_estimate_tokens_empty() {
715        assert_eq!(estimate_tokens(&[]), 0);
716    }
717
718    #[test]
719    fn test_parse_context_limit_anthropic() {
720        let msg = "prompt is too long: 150000 tokens > 128000 maximum context length";
721        assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
722    }
723
724    #[test]
725    fn test_parse_context_limit_openai() {
726        let msg = "This model's maximum context length is 128000 tokens. However, your messages resulted in 150000 tokens.";
727        assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
728    }
729
730    #[test]
731    fn test_parse_context_limit_llamacpp() {
732        let msg = "request (8968 tokens) exceeds the available context size (8448 tokens)";
733        assert_eq!(parse_context_limit_from_error(msg), Some(8448));
734    }
735
736    #[test]
737    fn test_parse_context_limit_none() {
738        assert_eq!(parse_context_limit_from_error("some random error"), None);
739    }
740
741    #[test]
742    fn test_parse_context_limit_rejects_small() {
743        let msg = "limit is 100 tokens";
744        assert_eq!(parse_context_limit_from_error(msg), None); // < 1024
745    }
746
747    #[test]
748    fn test_next_probe_tier() {
749        assert_eq!(next_probe_tier(2_000_001), 2_000_000);
750        assert_eq!(next_probe_tier(2_000_000), 1_000_000);
751        assert_eq!(next_probe_tier(200_000), 128_000);
752        assert_eq!(next_probe_tier(64_000), 32_000);
753        assert_eq!(next_probe_tier(32_000), 32_000); // floor
754        assert_eq!(next_probe_tier(10_000), 32_000); // below all tiers
755    }
756
757    #[test]
758    fn test_align_boundary_forward_skips_tool() {
759        let messages = vec![
760            msg("system", "sys"),
761            msg("user", "q"),
762            msg("tool", "result1"),
763            msg("tool", "result2"),
764            msg("user", "next"),
765        ];
766        // Starting at index 2 (tool), should skip to index 4
767        assert_eq!(align_boundary_forward(&messages, 2), 4);
768    }
769
770    #[test]
771    fn test_align_boundary_forward_noop() {
772        let messages = vec![
773            msg("system", "sys"),
774            msg("user", "q"),
775            msg("assistant", "a"),
776        ];
777        assert_eq!(align_boundary_forward(&messages, 1), 1);
778    }
779
780    #[test]
781    fn test_repair_tool_pairs_removes_orphaned() {
782        let mut messages = vec![
783            msg("system", "sys"),
784            msg(
785                "assistant",
786                "[CONTEXT SUMMARY — 5 earlier messages compressed]\nstuff",
787            ),
788            msg("tool", "orphaned result"),
789            msg("user", "next question"),
790        ];
791        repair_tool_pairs(&mut messages);
792        assert_eq!(messages.len(), 3);
793        assert_eq!(messages[2].role, "user");
794    }
795
796    #[test]
797    fn test_repair_tool_pairs_no_false_positives() {
798        let mut messages = vec![
799            msg("system", "sys"),
800            msg("user", "q"),
801            msg("assistant", "calling tool"),
802            msg("tool", "result"),
803            msg("user", "thanks"),
804        ];
805        repair_tool_pairs(&mut messages);
806        assert_eq!(messages.len(), 5); // no change
807    }
808
809    /// Regression test for the root-cause #5813 fix: when the tail
810    /// boundary lands on an assistant with `tool_calls`, the function
811    /// must back up past it so the assistant travels with its
812    /// `tool_result` blocks into the protected tail. Otherwise the
813    /// assistant gets summarized while its results survive, creating an
814    /// orphan and producing the 400 "unexpected tool_use_id" failure.
815    #[test]
816    fn test_align_boundary_backward_backs_up_past_tool_call_assistant() {
817        let messages = vec![
818            msg("system", "sys"),
819            msg("user", "q1"),
820            msg("assistant", "old reply 1"),
821            msg("user", "q2"),
822            msg(
823                "assistant",
824                r#"{"content":null,"tool_calls":[{"id":"toolu_X","name":"shell","arguments":"{}"}]}"#,
825            ),
826            msg("tool", r#"{"tool_call_id":"toolu_X","content":"result"}"#),
827            msg("user", "follow-up"),
828        ];
829        // Initial boundary lands on the assistant(tool_calls) at index 4.
830        // The function must back up past it so the pair stays in the tail.
831        let aligned = align_boundary_backward(&messages, 4);
832        assert!(
833            aligned < 4,
834            "boundary should retreat past assistant(tool_calls) at idx 4, got {aligned}"
835        );
836    }
837
838    #[test]
839    fn test_align_boundary_backward_protects_whole_turn() {
840        // Boundary alignment must back up past the assistant AND the
841        // user that initiated the turn so the protected tail contains
842        // complete user→assistant turns. Previously this returned 2,
843        // splitting `[U:q, A:plain] | [U:next]` and leaving the model
844        // looking at a tail that starts with a user message but missing
845        // the prior assistant's framing.
846        let messages = vec![
847            msg("system", "sys"),
848            msg("user", "q"),
849            msg("assistant", "plain text reply"),
850            msg("user", "next"),
851        ];
852        // Tail starts at `[A: plain]`. Function steps back past the
853        // assistant + its initiating user, landing on 1 (the protected
854        // tail is `[U:q, A:plain, U:next]`).
855        assert_eq!(align_boundary_backward(&messages, 2), 1);
856    }
857
858    #[test]
859    fn test_build_transcript() {
860        let messages = vec![msg("user", "hello"), msg("assistant", "hi there")];
861        let t = build_full_transcript(&messages);
862        assert!(t.contains("USER: hello"));
863        assert!(t.contains("ASSISTANT: hi there"));
864    }
865
866    #[test]
867    fn test_build_summarizer_transcript_strips_all_attachment_kinds_for_non_vision_provider() {
868        // The non-vision summarizer branch must strip every inbound
869        // attachment-kind alias the channel parsers can emit, not just
870        // `[IMAGE:]`. Mirrors `ATTACHMENT_KINDS` in
871        // `crates/zeroclaw-channels/src/util.rs`. Regression: a `[PHOTO:]`
872        // or `[DOCUMENT:]` marker still leaking through would surface a
873        // local filesystem path in the auxiliary `chat_with_system` payload
874        // and the upstream API would reject it.
875        let messages = vec![msg(
876            "user",
877            "Take a look at [IMAGE:/a.jpg] [PHOTO:/b.jpg] [DOCUMENT:/c.pdf] \
878             [FILE:/d.zip] [VIDEO:/e.mp4] [VOICE:/f.ogg] [AUDIO:/g.wav] please",
879        )];
880        let transcript = build_summarizer_transcript(&messages, 10_000, false);
881        for prefix in [
882            "[IMAGE:",
883            "[PHOTO:",
884            "[DOCUMENT:",
885            "[FILE:",
886            "[VIDEO:",
887            "[VOICE:",
888            "[AUDIO:",
889        ] {
890            assert!(
891                !transcript.contains(prefix),
892                "non-vision transcript should not contain raw {prefix} marker: {transcript}"
893            );
894        }
895        assert!(
896            transcript.contains("[media attachment]"),
897            "non-vision transcript should contain placeholder: {transcript}"
898        );
899        assert!(transcript.contains("Take a look at"));
900        assert!(transcript.contains("please"));
901    }
902
903    #[test]
904    fn test_build_summarizer_transcript_strips_media_markers_before_truncation() {
905        let long_path = format!(
906            "/private/tmp/zeroclaw/signal_inbound/{}",
907            "nested-directory/".repeat(12)
908        );
909        let messages = vec![msg(
910            "user",
911            &format!("Please summarize [IMAGE:{long_path}photo.png] after text"),
912        )];
913
914        let transcript = build_summarizer_transcript(&messages, 64, false);
915
916        assert!(
917            !transcript.contains("[IMAGE:"),
918            "non-vision transcript should not retain a split image marker: {transcript}"
919        );
920        assert!(
921            !transcript.contains("/private/tmp"),
922            "non-vision transcript should not leak local path fragments: {transcript}"
923        );
924        assert!(
925            transcript.contains("[media attachment]"),
926            "non-vision transcript should preserve an attachment placeholder: {transcript}"
927        );
928    }
929
930    #[test]
931    fn test_build_transcript_truncates() {
932        let messages = vec![msg("user", &"x".repeat(1000))];
933        let t = truncate_owned_if_needed(build_full_transcript(&messages), 100);
934        assert!(t.len() <= 103); // 100 + "..."
935    }
936
937    #[test]
938    fn test_build_summarizer_transcript_strips_image_markers_for_non_vision_provider() {
939        let messages = vec![msg(
940            "user",
941            "Describe this photo [IMAGE:/tmp/test.png]\nKeep the caption",
942        )];
943        let transcript = build_summarizer_transcript(&messages, 10_000, false);
944        assert!(!transcript.contains("[IMAGE:"));
945        assert!(transcript.contains("Describe this photo"));
946        assert!(transcript.contains("Keep the caption"));
947    }
948
949    #[test]
950    fn test_build_summarizer_transcript_keeps_image_markers_for_vision_provider() {
951        let messages = vec![msg("user", "Describe this photo [IMAGE:/tmp/test.png]")];
952        let transcript = build_summarizer_transcript(&messages, 10_000, true);
953        assert!(transcript.contains("[IMAGE:/tmp/test.png]"));
954    }
955
956    #[test]
957    fn test_truncate_chars() {
958        assert_eq!(truncate_chars("hello world", 5), "hello...");
959        assert_eq!(truncate_chars("hi", 10), "hi");
960    }
961
962    #[test]
963    fn test_config_defaults() {
964        let config = ContextCompressionConfig::default();
965        assert!(config.enabled);
966        assert!((config.threshold_ratio - 0.50).abs() < f64::EPSILON);
967        assert_eq!(config.protect_first_n, 3);
968        assert_eq!(config.protect_last_n, 4);
969        assert_eq!(config.max_passes, 3);
970        assert_eq!(config.summary_max_chars, 4_000);
971        assert_eq!(config.source_max_chars, 50_000);
972        assert_eq!(config.timeout_secs, 60);
973        assert!(config.summary_model.is_none());
974        assert_eq!(config.identifier_policy, "strict");
975    }
976
977    #[test]
978    fn test_config_serde_defaults() {
979        let json = "{}";
980        let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
981        assert!(config.enabled);
982        assert_eq!(config.protect_first_n, 3);
983        assert_eq!(config.max_passes, 3);
984    }
985
986    #[test]
987    fn test_config_serde_override() {
988        let json = r#"{"enabled": false, "protect_first_n": 5, "max_passes": 1}"#;
989        let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
990        assert!(!config.enabled);
991        assert_eq!(config.protect_first_n, 5);
992        assert_eq!(config.max_passes, 1);
993    }
994
995    #[tokio::test]
996    async fn compress_if_needed_strips_image_markers_before_non_vision_summarization() {
997        let config = ContextCompressionConfig {
998            protect_first_n: 1,
999            protect_last_n: 1,
1000            threshold_ratio: 0.01,
1001            ..Default::default()
1002        };
1003        let compressor = ContextCompressor::new(config, 64);
1004        let model_provider = CaptureSummarizerModelProvider {
1005            supports_vision: false,
1006            seen_messages: Mutex::new(Vec::new()),
1007        };
1008        let mut history = vec![
1009            msg("system", "sys"),
1010            msg("user", "First question"),
1011            msg("assistant", "First answer"),
1012            msg("user", "Middle question [IMAGE:/tmp/example.png]"),
1013            msg("assistant", "Middle answer about the image"),
1014            msg("user", "Another middle question"),
1015            msg("assistant", "Another middle answer"),
1016            msg("user", "Newest question"),
1017        ];
1018
1019        let result = compressor
1020            .compress_if_needed(&mut history, &model_provider, "model", None)
1021            .await
1022            .expect("compression should succeed");
1023
1024        assert!(result.compressed);
1025        let seen = model_provider.seen_messages.lock();
1026        let prompt = seen.last().expect("summarizer should be invoked");
1027        assert!(!prompt.contains("[IMAGE:"));
1028        assert!(!prompt.contains("/tmp/example.png"));
1029    }
1030
1031    #[tokio::test]
1032    async fn compress_if_needed_strips_image_markers_when_summary_model_overrides() {
1033        let config = ContextCompressionConfig {
1034            protect_first_n: 1,
1035            protect_last_n: 1,
1036            threshold_ratio: 0.01,
1037            summary_model: Some("text-summary-model".to_string()),
1038            ..Default::default()
1039        };
1040        let compressor = ContextCompressor::new(config, 64);
1041        let model_provider = CaptureSummarizerModelProvider {
1042            supports_vision: true,
1043            seen_messages: Mutex::new(Vec::new()),
1044        };
1045        let mut history = vec![
1046            msg("system", "sys"),
1047            msg("user", "First question"),
1048            msg("assistant", "First answer"),
1049            msg("user", "Middle question [IMAGE:/tmp/summary-override.png]"),
1050            msg("assistant", "Middle answer about the image"),
1051            msg("user", "Another middle question"),
1052            msg("assistant", "Another middle answer"),
1053            msg("user", "Newest question"),
1054        ];
1055
1056        let result = compressor
1057            .compress_if_needed(&mut history, &model_provider, "default-vision-model", None)
1058            .await
1059            .expect("compression should succeed");
1060
1061        assert!(result.compressed);
1062        let seen = model_provider.seen_messages.lock();
1063        let prompt = seen.last().expect("summarizer should be invoked");
1064        assert!(!prompt.contains("[IMAGE:"));
1065        assert!(!prompt.contains("/tmp/summary-override.png"));
1066    }
1067
1068    // ── fast_trim_tool_results tests ────────────────────────────────
1069
1070    #[test]
1071    fn test_fast_trim_protects_first_and_last_n() {
1072        let config = ContextCompressionConfig {
1073            protect_first_n: 2,
1074            protect_last_n: 2,
1075            tool_result_retrim_chars: 100,
1076            ..Default::default()
1077        };
1078        let compressor = ContextCompressor::new(config, 128_000);
1079        let big = "x".repeat(5_000);
1080        let mut history = vec![
1081            msg("system", "sys"),
1082            msg("tool", &big), // index 1 — protected (first 2)
1083            msg("user", "q"),
1084            msg("tool", &big),   // index 3 — trimmable
1085            msg("user", "next"), // index 4 — protected (last 2)
1086            msg("tool", &big),   // index 5 — protected (last 2)
1087        ];
1088        let saved = compressor.fast_trim_tool_results(&mut history);
1089        assert!(saved > 0);
1090        // Protected messages unchanged
1091        assert_eq!(history[1].content.len(), 5_000);
1092        assert_eq!(history[5].content.len(), 5_000);
1093        // Trimmable message was trimmed
1094        assert!(history[3].content.len() <= 200); // 100 + marker overhead
1095    }
1096
1097    #[test]
1098    fn test_fast_trim_skips_images() {
1099        let config = ContextCompressionConfig {
1100            protect_first_n: 0,
1101            protect_last_n: 0,
1102            tool_result_retrim_chars: 100,
1103            ..Default::default()
1104        };
1105        let compressor = ContextCompressor::new(config, 128_000);
1106        let img = format!("data:image/{}", "x".repeat(5_000));
1107        let mut history = vec![msg("tool", &img)];
1108        let saved = compressor.fast_trim_tool_results(&mut history);
1109        assert_eq!(saved, 0);
1110        assert!(history[0].content.len() > 5_000);
1111    }
1112
1113    #[test]
1114    fn test_fast_trim_skips_exempt_tools() {
1115        let config = ContextCompressionConfig {
1116            protect_first_n: 0,
1117            protect_last_n: 0,
1118            tool_result_retrim_chars: 100,
1119            tool_result_trim_exempt: vec!["KEEPME".to_string()],
1120            ..Default::default()
1121        };
1122        let compressor = ContextCompressor::new(config, 128_000);
1123        let content = format!("KEEPME {}", "x".repeat(5_000));
1124        let mut history = vec![msg("tool", &content)];
1125        let saved = compressor.fast_trim_tool_results(&mut history);
1126        assert_eq!(saved, 0);
1127    }
1128
1129    #[test]
1130    fn test_fast_trim_skips_small_results() {
1131        let config = ContextCompressionConfig {
1132            protect_first_n: 0,
1133            protect_last_n: 0,
1134            tool_result_retrim_chars: 2_000,
1135            ..Default::default()
1136        };
1137        let compressor = ContextCompressor::new(config, 128_000);
1138        let mut history = vec![msg("tool", "small result")];
1139        let saved = compressor.fast_trim_tool_results(&mut history);
1140        assert_eq!(saved, 0);
1141    }
1142
1143    #[test]
1144    fn test_fast_trim_skips_non_tool_messages() {
1145        let config = ContextCompressionConfig {
1146            protect_first_n: 0,
1147            protect_last_n: 0,
1148            tool_result_retrim_chars: 100,
1149            ..Default::default()
1150        };
1151        let compressor = ContextCompressor::new(config, 128_000);
1152        let big = "x".repeat(5_000);
1153        let mut history = vec![msg("user", &big), msg("assistant", &big)];
1154        let saved = compressor.fast_trim_tool_results(&mut history);
1155        assert_eq!(saved, 0);
1156    }
1157
1158    #[test]
1159    fn test_fast_trim_config_defaults() {
1160        let config = ContextCompressionConfig::default();
1161        assert_eq!(config.tool_result_retrim_chars, 2_000);
1162        assert!(config.tool_result_trim_exempt.is_empty());
1163    }
1164
1165    #[test]
1166    fn test_fast_trim_disabled_when_zero() {
1167        let config = ContextCompressionConfig {
1168            protect_first_n: 0,
1169            protect_last_n: 0,
1170            tool_result_retrim_chars: 0,
1171            ..Default::default()
1172        };
1173        let compressor = ContextCompressor::new(config, 128_000);
1174        let big = "x".repeat(5_000);
1175        let mut history = vec![msg("tool", &big)];
1176        let saved = compressor.fast_trim_tool_results(&mut history);
1177        assert_eq!(saved, 0);
1178    }
1179
1180    /// When the compressed range has no thinking-mode reasoning_content,
1181    /// the synthesized summary is plain text — same as before #6269.
1182    #[test]
1183    fn build_summary_message_uses_plain_text_when_no_reasoning() {
1184        let compressed = vec![
1185            msg("user", "what's the weather"),
1186            msg("assistant", "it's sunny"),
1187        ];
1188        let out = build_summary_message(&compressed, "weather chat", 2);
1189        assert_eq!(out.role, "assistant");
1190        assert!(out.content.starts_with("[CONTEXT SUMMARY"));
1191        assert!(out.content.contains("weather chat"));
1192        assert!(
1193            serde_json::from_str::<serde_json::Value>(&out.content).is_err(),
1194            "plain-text summary must not parse as JSON"
1195        );
1196    }
1197
1198    /// Regression test for #6269 — when an assistant message in the
1199    /// compressed range carries `reasoning_content` (thinking-mode replay
1200    /// payload), the synthesized summary preserves it via JSON-encoded
1201    /// content matching `build_native_assistant_history`'s shape.
1202    /// Without this, providers that require reasoning round-trip
1203    /// (DeepSeek V4 thinking) reject every post-compression request.
1204    #[test]
1205    fn build_summary_message_preserves_reasoning_content_when_present() {
1206        let assistant_with_reasoning = serde_json::json!({
1207            "content": "let me look",
1208            "reasoning_content": "user wants weather; need to check",
1209        })
1210        .to_string();
1211        let compressed = vec![
1212            msg("user", "what's the weather"),
1213            msg("assistant", &assistant_with_reasoning),
1214        ];
1215
1216        let out = build_summary_message(&compressed, "weather chat", 2);
1217        assert_eq!(out.role, "assistant");
1218        let parsed: serde_json::Value = serde_json::from_str(&out.content)
1219            .expect("summary must be JSON when reasoning_content is preserved");
1220        assert!(
1221            parsed["content"]
1222                .as_str()
1223                .is_some_and(|s| s.starts_with("[CONTEXT SUMMARY")),
1224            "summary text belongs in `content`",
1225        );
1226        assert_eq!(
1227            parsed["reasoning_content"].as_str(),
1228            Some("user wants weather; need to check"),
1229            "must carry reasoning_content from the most recent compressed assistant turn",
1230        );
1231    }
1232
1233    /// When multiple compressed assistant turns have reasoning_content,
1234    /// the most recent one survives — this matches DeepSeek's protocol
1235    /// expectation that the *immediately prior* assistant turn's
1236    /// reasoning is what gets replayed.
1237    #[test]
1238    fn build_summary_message_picks_last_reasoning_content() {
1239        let earlier = serde_json::json!({
1240            "content": "first answer",
1241            "reasoning_content": "EARLIER reasoning",
1242        })
1243        .to_string();
1244        let later = serde_json::json!({
1245            "content": "second answer",
1246            "reasoning_content": "LATER reasoning",
1247        })
1248        .to_string();
1249        let compressed = vec![
1250            msg("user", "q1"),
1251            msg("assistant", &earlier),
1252            msg("user", "q2"),
1253            msg("assistant", &later),
1254        ];
1255
1256        let out = build_summary_message(&compressed, "two-turn chat", 4);
1257        let parsed: serde_json::Value = serde_json::from_str(&out.content).unwrap();
1258        assert_eq!(
1259            parsed["reasoning_content"].as_str(),
1260            Some("LATER reasoning"),
1261            "must pick the most recent reasoning_content, not the earliest",
1262        );
1263    }
1264}