1use std::fmt::Write;
2use std::time::Duration;
3
4use anyhow::Result;
5use std::sync::Arc;
6
7use zeroclaw_api::model_provider::{ChatMessage, ModelProvider};
8use zeroclaw_memory::traits::Memory;
9use zeroclaw_providers::multimodal;
10
11pub use zeroclaw_config::scattered_types::ContextCompressionConfig;
12
13#[derive(Debug, Clone)]
18pub struct CompressionResult {
19 pub compressed: bool,
20 pub tokens_before: usize,
21 pub tokens_after: usize,
22 pub passes_used: u32,
23}
24
25const PROBE_TIERS: &[usize] = &[
30 2_000_000, 1_000_000, 512_000, 200_000, 128_000, 64_000, 32_000,
31];
32
33fn next_probe_tier(current: usize) -> usize {
34 PROBE_TIERS
35 .iter()
36 .copied()
37 .find(|&tier| tier < current)
38 .unwrap_or(32_000)
39}
40
41pub fn parse_context_limit_from_error(msg: &str) -> Option<usize> {
47 let re_patterns: &[&str] = &[
50 r"(?:max(?:imum)?|limit)\s*(?:context\s*)?(?:length|size|window)?\s*(?:is|of|:)?\s*(\d{4,})",
52 r"context\s*(?:length|size|window)\s*(?:is|of|:)?\s*(\d{4,})",
54 r"(\d{4,})\s*(?:tokens?\s*)?(?:context|limit)",
56 r"available context size\s*\(\s*(\d{4,})",
58 r">\s*(\d{4,})\s*(?:maximum|max)?\s*(?:context)?\s*(?:length|size|window|tokens?)",
60 ];
61 let lower = msg.to_lowercase();
62 for pattern in re_patterns {
63 if let Ok(re) = regex::Regex::new(pattern)
64 && let Some(caps) = re.captures(&lower)
65 && let Some(m) = caps.get(1)
66 && let Ok(limit) = m.as_str().parse::<usize>()
67 && (1024..=10_000_000).contains(&limit)
68 {
69 return Some(limit);
70 }
71 }
72 None
73}
74
75pub fn estimate_tokens(messages: &[ChatMessage]) -> usize {
82 let raw: usize = messages
83 .iter()
84 .map(|m| m.content.len().div_ceil(4) + 4)
85 .sum();
86 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
88 {
89 (raw as f64 * 1.2) as usize
90 }
91}
92
93const SUMMARIZER_SYSTEM: &str = "\
98You are a conversation compaction engine. Summarize the conversation segment below into concise context.
99
100PRESERVE exactly:
101- All identifiers (UUIDs, hashes, file paths, URLs, tokens, IPs)
102- Actions taken (tool calls, file operations, commands run)
103- Key information obtained (data, results, error messages)
104- Decisions made and user preferences expressed
105- Current task status and unresolved items
106- Constraints and requirements mentioned
107
108OMIT:
109- Verbose tool output (keep only key results)
110- Repeated greetings or filler
111- Redundant information already stated
112
113Output concise bullet points. Be thorough but brief.";
114
115pub struct ContextCompressor {
120 config: ContextCompressionConfig,
121 context_window: usize,
122 memory: Option<Arc<dyn Memory>>,
123}
124
125impl ContextCompressor {
126 pub fn new(config: ContextCompressionConfig, context_window: usize) -> Self {
127 Self {
128 config,
129 context_window,
130 memory: None,
131 }
132 }
133
134 pub fn with_memory(mut self, memory: Arc<dyn Memory>) -> Self {
137 self.memory = Some(memory);
138 self
139 }
140
141 pub fn set_context_window(&mut self, window: usize) {
143 self.context_window = window;
144 }
145
146 fn fast_trim_tool_results(&self, history: &mut [ChatMessage]) -> usize {
149 let max = self.config.tool_result_retrim_chars;
150 if max == 0 {
151 return 0;
152 }
153 let mut saved = 0;
154 let protect_start = self.config.protect_first_n.min(history.len());
155 let protect_end = history.len().saturating_sub(self.config.protect_last_n);
156
157 if protect_start >= protect_end {
158 return 0;
159 }
160
161 for msg in &mut history[protect_start..protect_end] {
162 if msg.role != "tool" {
163 continue;
164 }
165 if msg.content.len() <= max {
166 continue;
167 }
168 if self
170 .config
171 .tool_result_trim_exempt
172 .iter()
173 .any(|t| msg.content.contains(t.as_str()))
174 {
175 continue;
176 }
177 if msg.content.contains("data:image/") {
179 continue;
180 }
181 let original_len = msg.content.len();
182 msg.content = crate::agent::history::truncate_tool_message(&msg.content, max);
183 saved += original_len - msg.content.len();
184 }
185 saved
186 }
187
188 pub async fn compress_if_needed(
194 &self,
195 history: &mut Vec<ChatMessage>,
196 model_provider: &dyn ModelProvider,
197 model: &str,
198 temperature: Option<f64>,
199 ) -> Result<CompressionResult> {
200 if !self.config.enabled {
201 let tokens = estimate_tokens(history);
202 return Ok(CompressionResult {
203 compressed: false,
204 tokens_before: tokens,
205 tokens_after: tokens,
206 passes_used: 0,
207 });
208 }
209
210 let tokens_before = estimate_tokens(history);
211 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
212 let threshold = (self.context_window as f64 * self.config.threshold_ratio) as usize;
213
214 if tokens_before <= threshold {
215 return Ok(CompressionResult {
216 compressed: false,
217 tokens_before,
218 tokens_after: tokens_before,
219 passes_used: 0,
220 });
221 }
222
223 let chars_saved = self.fast_trim_tool_results(history);
225 if chars_saved > 0 {
226 ::zeroclaw_log::record!(
227 INFO,
228 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
229 .with_attrs(::serde_json::json!({"chars_saved": chars_saved})),
230 "Fast-trim saved chars from old tool results"
231 );
232 let recheck = estimate_tokens(history);
233 if recheck <= threshold {
234 return Ok(CompressionResult {
235 compressed: true,
236 tokens_before,
237 tokens_after: recheck,
238 passes_used: 0,
239 });
240 }
241 }
242
243 let mut passes_used = 0;
244 for _ in 0..self.config.max_passes {
245 let did_compress = self
246 .compress_once(history, model_provider, model, temperature)
247 .await?;
248 if did_compress {
249 passes_used += 1;
250 }
251 if estimate_tokens(history) <= threshold || !did_compress {
252 break;
253 }
254 }
255
256 let tokens_after = estimate_tokens(history);
257 Ok(CompressionResult {
258 compressed: passes_used > 0,
259 tokens_before,
260 tokens_after,
261 passes_used,
262 })
263 }
264
265 pub async fn compress_on_error(
268 &mut self,
269 history: &mut Vec<ChatMessage>,
270 model_provider: &dyn ModelProvider,
271 model: &str,
272 temperature: Option<f64>,
273 error_msg: &str,
274 ) -> Result<bool> {
275 if let Some(limit) = parse_context_limit_from_error(error_msg) {
277 self.context_window = limit;
278 } else {
279 self.context_window = next_probe_tier(self.context_window);
281 }
282
283 ::zeroclaw_log::record!(
284 INFO,
285 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
286 .with_attrs(::serde_json::json!({"context_window": self.context_window})),
287 "Context limit adjusted, re-compressing"
288 );
289
290 let result = self
291 .compress_if_needed(history, model_provider, model, temperature)
292 .await?;
293 Ok(result.compressed)
294 }
295
296 async fn compress_once(
298 &self,
299 history: &mut Vec<ChatMessage>,
300 model_provider: &dyn ModelProvider,
301 model: &str,
302 temperature: Option<f64>,
303 ) -> Result<bool> {
304 let n = history.len();
305 let protected_total = self.config.protect_first_n + self.config.protect_last_n;
306 if n <= protected_total {
307 return Ok(false);
308 }
309
310 let mut start = self.config.protect_first_n.min(n);
311 let mut end = n.saturating_sub(self.config.protect_last_n);
312
313 start = align_boundary_forward(history, start);
315 end = align_boundary_backward(history, end);
316
317 if start >= end {
318 return Ok(false);
319 }
320
321 let summary_model = self.config.summary_model.as_deref().unwrap_or(model);
322 let preserve_media_markers =
323 self.config.summary_model.is_none() && model_provider.supports_vision();
324
325 let middle = &history[start..end];
327 let transcript = build_summarizer_transcript(
328 middle,
329 self.config.source_max_chars,
330 preserve_media_markers,
331 );
332
333 if transcript.is_empty() {
334 return Ok(false);
335 }
336
337 let message_count = end - start;
338
339 let identifier_note = if self.config.identifier_policy == "strict" {
340 "\nIMPORTANT: Preserve all identifiers exactly as they appear."
341 } else {
342 ""
343 };
344
345 let user_prompt = format!(
346 "Summarize the following conversation history ({message_count} messages) for context preservation. \
347 Keep it concise (max 20 bullet points).{identifier_note}\n\n{transcript}"
348 );
349
350 let timeout = Duration::from_secs(self.config.timeout_secs);
352 let summary_raw = match tokio::time::timeout(
353 timeout,
354 model_provider.chat_with_system(
355 Some(SUMMARIZER_SYSTEM),
356 &user_prompt,
357 summary_model,
358 temperature,
359 ),
360 )
361 .await
362 {
363 Ok(Ok(s)) => s,
364 Ok(Err(e)) => {
365 ::zeroclaw_log::record!(
366 WARN,
367 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
368 .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
369 .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
370 "Summarization LLM call failed, using transcript truncation"
371 );
372 truncate_chars(&transcript, self.config.summary_max_chars)
373 }
374 Err(_) => {
375 ::zeroclaw_log::record!(
376 WARN,
377 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
378 .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
379 &format!(
380 "Summarization timed out after {}s, using transcript truncation",
381 self.config.timeout_secs
382 )
383 );
384 truncate_chars(&transcript, self.config.summary_max_chars)
385 }
386 };
387
388 let summary = truncate_chars(&summary_raw, self.config.summary_max_chars);
389
390 if let Some(ref memory) = self.memory {
393 let facts_key = format!("compressed_context_{}", uuid::Uuid::new_v4());
394 if let Err(e) = memory
395 .store(
396 &facts_key,
397 &summary,
398 zeroclaw_memory::traits::MemoryCategory::Daily,
399 None,
400 )
401 .await
402 {
403 ::zeroclaw_log::record!(
404 DEBUG,
405 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
406 .with_attrs(::serde_json::json!({"error": format!("{}", e)})),
407 "Failed to save compression summary to memory"
408 );
409 } else {
410 ::zeroclaw_log::record!(
411 DEBUG,
412 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
413 .with_attrs(::serde_json::json!({"message_count": message_count})),
414 "Saved compression summary to memory before discarding messages"
415 );
416 }
417 }
418
419 let summary_msg = build_summary_message(&history[start..end], &summary, message_count);
421 history.splice(start..end, std::iter::once(summary_msg));
422
423 let tool_pairs_removed = repair_tool_pairs(history);
425
426 ::zeroclaw_log::record!(
427 WARN,
428 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
429 .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
430 .with_attrs(::serde_json::json!({
431 "messages_summarized": message_count,
432 "summary_chars": summary.len(),
433 "tool_pairs_removed": tool_pairs_removed,
434 "protect_first_n": self.config.protect_first_n,
435 "protect_last_n": self.config.protect_last_n,
436 })),
437 "context_compressor: middle of conversation replaced with a \
438 text summary. The model loses structural tool_use/tool_result \
439 pairs from this range. If this fires mid-turn the model can \
440 act like it just woke up. Raise protect_last_n / \
441 protect_first_n, or raise max_context_tokens, or lower \
442 threshold_ratio carefully."
443 );
444
445 Ok(true)
446 }
447}
448
449fn align_boundary_forward(messages: &[ChatMessage], idx: usize) -> usize {
455 let mut i = idx;
456 while i < messages.len() && messages[i].role == "tool" {
457 i += 1;
458 }
459 i
460}
461
462fn align_boundary_backward(messages: &[ChatMessage], idx: usize) -> usize {
479 let mut i = idx;
480
481 while i > 0 && messages[i - 1].role == "tool" {
484 i -= 1;
485 }
486
487 while i > 0 && messages[i - 1].role == "assistant" {
492 i -= 1;
493 }
494
495 if i > 0 && messages[i - 1].role == "user" {
500 i -= 1;
501 }
502
503 i
504}
505
506fn repair_tool_pairs(messages: &mut Vec<ChatMessage>) -> usize {
517 let mut removed = 0;
518 let mut i = 0;
524 while i < messages.len() {
525 if messages[i].content.contains("[CONTEXT SUMMARY") {
526 while i + 1 < messages.len() && messages[i + 1].role == "tool" {
528 messages.remove(i + 1);
529 removed += 1;
530 }
531 }
532 i += 1;
533 }
534
535 let start = if messages.first().is_some_and(|m| m.role == "system") {
538 1
539 } else {
540 0
541 };
542 while start < messages.len() && messages[start].role == "tool" {
543 messages.remove(start);
544 removed += 1;
545 }
546 removed
547}
548
549fn build_full_transcript(messages: &[ChatMessage]) -> String {
554 let mut transcript = String::new();
555 for msg in messages {
556 let role = msg.role.to_uppercase();
557 let _ = writeln!(transcript, "{role}: {}", msg.content.trim());
558 }
559 transcript
560}
561
562fn build_summarizer_transcript(
563 messages: &[ChatMessage],
564 max_chars: usize,
565 preserve_media_markers: bool,
566) -> String {
567 let transcript = build_full_transcript(messages);
568 if preserve_media_markers {
569 return truncate_owned_if_needed(transcript, max_chars);
572 }
573
574 truncate_owned_if_needed(multimodal::strip_media_markers(&transcript), max_chars)
580}
581
582fn truncate_owned_if_needed(s: String, max: usize) -> String {
583 if s.len() > max {
584 truncate_chars(&s, max)
585 } else {
586 s
587 }
588}
589
590fn truncate_chars(s: &str, max: usize) -> String {
591 if s.len() <= max {
592 return s.to_string();
593 }
594 let mut end = max;
596 while end > 0 && !s.is_char_boundary(end) {
597 end -= 1;
598 }
599 let mut result = s[..end].to_string();
600 result.push_str("...");
601 result
602}
603
604fn build_summary_message(
612 compressed: &[ChatMessage],
613 summary: &str,
614 message_count: usize,
615) -> ChatMessage {
616 let summary_text = format!(
617 "[CONTEXT SUMMARY \u{2014} {message_count} earlier messages compressed]\n\n{summary}"
618 );
619
620 let last_reasoning = compressed
621 .iter()
622 .rev()
623 .filter(|m| m.role == "assistant")
624 .find_map(|m| {
625 serde_json::from_str::<serde_json::Value>(&m.content)
626 .ok()
627 .and_then(|v| {
628 v.get("reasoning_content")
629 .and_then(|rc| rc.as_str().map(ToString::to_string))
630 })
631 });
632
633 if let Some(rc) = last_reasoning {
634 let payload = serde_json::json!({
635 "content": summary_text,
636 "reasoning_content": rc,
637 });
638 ChatMessage::assistant(payload.to_string())
639 } else {
640 ChatMessage::assistant(summary_text)
641 }
642}
643
644#[cfg(test)]
649mod tests {
650 use super::*;
651 use async_trait::async_trait;
652 use parking_lot::Mutex;
653
654 fn msg(role: &str, content: &str) -> ChatMessage {
655 ChatMessage {
656 role: role.to_string(),
657 content: content.to_string(),
658 }
659 }
660
661 struct CaptureSummarizerModelProvider {
662 supports_vision: bool,
663 seen_messages: Mutex<Vec<String>>,
664 }
665
666 #[async_trait]
667 impl ModelProvider for CaptureSummarizerModelProvider {
668 async fn chat_with_system(
669 &self,
670 _system_prompt: Option<&str>,
671 message: &str,
672 _model: &str,
673 _temperature: Option<f64>,
674 ) -> Result<String> {
675 self.seen_messages.lock().push(message.to_string());
676 Ok("summary".to_string())
677 }
678
679 async fn chat(
680 &self,
681 _request: zeroclaw_api::model_provider::ChatRequest<'_>,
682 _model: &str,
683 _temperature: Option<f64>,
684 ) -> Result<zeroclaw_api::model_provider::ChatResponse> {
685 unreachable!("context compressor uses chat_with_system")
686 }
687
688 fn supports_vision(&self) -> bool {
689 self.supports_vision
690 }
691 }
692 impl ::zeroclaw_api::attribution::Attributable for CaptureSummarizerModelProvider {
693 fn role(&self) -> ::zeroclaw_api::attribution::Role {
694 ::zeroclaw_api::attribution::Role::Provider(
695 ::zeroclaw_api::attribution::ProviderKind::Model(
696 ::zeroclaw_api::attribution::ModelProviderKind::Custom,
697 ),
698 )
699 }
700 fn alias(&self) -> &str {
701 "CaptureSummarizerModelProvider"
702 }
703 }
704
705 #[test]
706 fn test_estimate_tokens() {
707 let messages = vec![msg("user", "hello world")]; let tokens = estimate_tokens(&messages);
709 assert!(tokens > 0);
711 }
712
713 #[test]
714 fn test_estimate_tokens_empty() {
715 assert_eq!(estimate_tokens(&[]), 0);
716 }
717
718 #[test]
719 fn test_parse_context_limit_anthropic() {
720 let msg = "prompt is too long: 150000 tokens > 128000 maximum context length";
721 assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
722 }
723
724 #[test]
725 fn test_parse_context_limit_openai() {
726 let msg = "This model's maximum context length is 128000 tokens. However, your messages resulted in 150000 tokens.";
727 assert_eq!(parse_context_limit_from_error(msg), Some(128_000));
728 }
729
730 #[test]
731 fn test_parse_context_limit_llamacpp() {
732 let msg = "request (8968 tokens) exceeds the available context size (8448 tokens)";
733 assert_eq!(parse_context_limit_from_error(msg), Some(8448));
734 }
735
736 #[test]
737 fn test_parse_context_limit_none() {
738 assert_eq!(parse_context_limit_from_error("some random error"), None);
739 }
740
741 #[test]
742 fn test_parse_context_limit_rejects_small() {
743 let msg = "limit is 100 tokens";
744 assert_eq!(parse_context_limit_from_error(msg), None); }
746
747 #[test]
748 fn test_next_probe_tier() {
749 assert_eq!(next_probe_tier(2_000_001), 2_000_000);
750 assert_eq!(next_probe_tier(2_000_000), 1_000_000);
751 assert_eq!(next_probe_tier(200_000), 128_000);
752 assert_eq!(next_probe_tier(64_000), 32_000);
753 assert_eq!(next_probe_tier(32_000), 32_000); assert_eq!(next_probe_tier(10_000), 32_000); }
756
757 #[test]
758 fn test_align_boundary_forward_skips_tool() {
759 let messages = vec![
760 msg("system", "sys"),
761 msg("user", "q"),
762 msg("tool", "result1"),
763 msg("tool", "result2"),
764 msg("user", "next"),
765 ];
766 assert_eq!(align_boundary_forward(&messages, 2), 4);
768 }
769
770 #[test]
771 fn test_align_boundary_forward_noop() {
772 let messages = vec![
773 msg("system", "sys"),
774 msg("user", "q"),
775 msg("assistant", "a"),
776 ];
777 assert_eq!(align_boundary_forward(&messages, 1), 1);
778 }
779
780 #[test]
781 fn test_repair_tool_pairs_removes_orphaned() {
782 let mut messages = vec![
783 msg("system", "sys"),
784 msg(
785 "assistant",
786 "[CONTEXT SUMMARY — 5 earlier messages compressed]\nstuff",
787 ),
788 msg("tool", "orphaned result"),
789 msg("user", "next question"),
790 ];
791 repair_tool_pairs(&mut messages);
792 assert_eq!(messages.len(), 3);
793 assert_eq!(messages[2].role, "user");
794 }
795
796 #[test]
797 fn test_repair_tool_pairs_no_false_positives() {
798 let mut messages = vec![
799 msg("system", "sys"),
800 msg("user", "q"),
801 msg("assistant", "calling tool"),
802 msg("tool", "result"),
803 msg("user", "thanks"),
804 ];
805 repair_tool_pairs(&mut messages);
806 assert_eq!(messages.len(), 5); }
808
809 #[test]
816 fn test_align_boundary_backward_backs_up_past_tool_call_assistant() {
817 let messages = vec![
818 msg("system", "sys"),
819 msg("user", "q1"),
820 msg("assistant", "old reply 1"),
821 msg("user", "q2"),
822 msg(
823 "assistant",
824 r#"{"content":null,"tool_calls":[{"id":"toolu_X","name":"shell","arguments":"{}"}]}"#,
825 ),
826 msg("tool", r#"{"tool_call_id":"toolu_X","content":"result"}"#),
827 msg("user", "follow-up"),
828 ];
829 let aligned = align_boundary_backward(&messages, 4);
832 assert!(
833 aligned < 4,
834 "boundary should retreat past assistant(tool_calls) at idx 4, got {aligned}"
835 );
836 }
837
838 #[test]
839 fn test_align_boundary_backward_protects_whole_turn() {
840 let messages = vec![
847 msg("system", "sys"),
848 msg("user", "q"),
849 msg("assistant", "plain text reply"),
850 msg("user", "next"),
851 ];
852 assert_eq!(align_boundary_backward(&messages, 2), 1);
856 }
857
858 #[test]
859 fn test_build_transcript() {
860 let messages = vec![msg("user", "hello"), msg("assistant", "hi there")];
861 let t = build_full_transcript(&messages);
862 assert!(t.contains("USER: hello"));
863 assert!(t.contains("ASSISTANT: hi there"));
864 }
865
866 #[test]
867 fn test_build_summarizer_transcript_strips_all_attachment_kinds_for_non_vision_provider() {
868 let messages = vec![msg(
876 "user",
877 "Take a look at [IMAGE:/a.jpg] [PHOTO:/b.jpg] [DOCUMENT:/c.pdf] \
878 [FILE:/d.zip] [VIDEO:/e.mp4] [VOICE:/f.ogg] [AUDIO:/g.wav] please",
879 )];
880 let transcript = build_summarizer_transcript(&messages, 10_000, false);
881 for prefix in [
882 "[IMAGE:",
883 "[PHOTO:",
884 "[DOCUMENT:",
885 "[FILE:",
886 "[VIDEO:",
887 "[VOICE:",
888 "[AUDIO:",
889 ] {
890 assert!(
891 !transcript.contains(prefix),
892 "non-vision transcript should not contain raw {prefix} marker: {transcript}"
893 );
894 }
895 assert!(
896 transcript.contains("[media attachment]"),
897 "non-vision transcript should contain placeholder: {transcript}"
898 );
899 assert!(transcript.contains("Take a look at"));
900 assert!(transcript.contains("please"));
901 }
902
903 #[test]
904 fn test_build_summarizer_transcript_strips_media_markers_before_truncation() {
905 let long_path = format!(
906 "/private/tmp/zeroclaw/signal_inbound/{}",
907 "nested-directory/".repeat(12)
908 );
909 let messages = vec![msg(
910 "user",
911 &format!("Please summarize [IMAGE:{long_path}photo.png] after text"),
912 )];
913
914 let transcript = build_summarizer_transcript(&messages, 64, false);
915
916 assert!(
917 !transcript.contains("[IMAGE:"),
918 "non-vision transcript should not retain a split image marker: {transcript}"
919 );
920 assert!(
921 !transcript.contains("/private/tmp"),
922 "non-vision transcript should not leak local path fragments: {transcript}"
923 );
924 assert!(
925 transcript.contains("[media attachment]"),
926 "non-vision transcript should preserve an attachment placeholder: {transcript}"
927 );
928 }
929
930 #[test]
931 fn test_build_transcript_truncates() {
932 let messages = vec![msg("user", &"x".repeat(1000))];
933 let t = truncate_owned_if_needed(build_full_transcript(&messages), 100);
934 assert!(t.len() <= 103); }
936
937 #[test]
938 fn test_build_summarizer_transcript_strips_image_markers_for_non_vision_provider() {
939 let messages = vec![msg(
940 "user",
941 "Describe this photo [IMAGE:/tmp/test.png]\nKeep the caption",
942 )];
943 let transcript = build_summarizer_transcript(&messages, 10_000, false);
944 assert!(!transcript.contains("[IMAGE:"));
945 assert!(transcript.contains("Describe this photo"));
946 assert!(transcript.contains("Keep the caption"));
947 }
948
949 #[test]
950 fn test_build_summarizer_transcript_keeps_image_markers_for_vision_provider() {
951 let messages = vec![msg("user", "Describe this photo [IMAGE:/tmp/test.png]")];
952 let transcript = build_summarizer_transcript(&messages, 10_000, true);
953 assert!(transcript.contains("[IMAGE:/tmp/test.png]"));
954 }
955
956 #[test]
957 fn test_truncate_chars() {
958 assert_eq!(truncate_chars("hello world", 5), "hello...");
959 assert_eq!(truncate_chars("hi", 10), "hi");
960 }
961
962 #[test]
963 fn test_config_defaults() {
964 let config = ContextCompressionConfig::default();
965 assert!(config.enabled);
966 assert!((config.threshold_ratio - 0.50).abs() < f64::EPSILON);
967 assert_eq!(config.protect_first_n, 3);
968 assert_eq!(config.protect_last_n, 4);
969 assert_eq!(config.max_passes, 3);
970 assert_eq!(config.summary_max_chars, 4_000);
971 assert_eq!(config.source_max_chars, 50_000);
972 assert_eq!(config.timeout_secs, 60);
973 assert!(config.summary_model.is_none());
974 assert_eq!(config.identifier_policy, "strict");
975 }
976
977 #[test]
978 fn test_config_serde_defaults() {
979 let json = "{}";
980 let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
981 assert!(config.enabled);
982 assert_eq!(config.protect_first_n, 3);
983 assert_eq!(config.max_passes, 3);
984 }
985
986 #[test]
987 fn test_config_serde_override() {
988 let json = r#"{"enabled": false, "protect_first_n": 5, "max_passes": 1}"#;
989 let config: ContextCompressionConfig = serde_json::from_str(json).unwrap();
990 assert!(!config.enabled);
991 assert_eq!(config.protect_first_n, 5);
992 assert_eq!(config.max_passes, 1);
993 }
994
995 #[tokio::test]
996 async fn compress_if_needed_strips_image_markers_before_non_vision_summarization() {
997 let config = ContextCompressionConfig {
998 protect_first_n: 1,
999 protect_last_n: 1,
1000 threshold_ratio: 0.01,
1001 ..Default::default()
1002 };
1003 let compressor = ContextCompressor::new(config, 64);
1004 let model_provider = CaptureSummarizerModelProvider {
1005 supports_vision: false,
1006 seen_messages: Mutex::new(Vec::new()),
1007 };
1008 let mut history = vec![
1009 msg("system", "sys"),
1010 msg("user", "First question"),
1011 msg("assistant", "First answer"),
1012 msg("user", "Middle question [IMAGE:/tmp/example.png]"),
1013 msg("assistant", "Middle answer about the image"),
1014 msg("user", "Another middle question"),
1015 msg("assistant", "Another middle answer"),
1016 msg("user", "Newest question"),
1017 ];
1018
1019 let result = compressor
1020 .compress_if_needed(&mut history, &model_provider, "model", None)
1021 .await
1022 .expect("compression should succeed");
1023
1024 assert!(result.compressed);
1025 let seen = model_provider.seen_messages.lock();
1026 let prompt = seen.last().expect("summarizer should be invoked");
1027 assert!(!prompt.contains("[IMAGE:"));
1028 assert!(!prompt.contains("/tmp/example.png"));
1029 }
1030
1031 #[tokio::test]
1032 async fn compress_if_needed_strips_image_markers_when_summary_model_overrides() {
1033 let config = ContextCompressionConfig {
1034 protect_first_n: 1,
1035 protect_last_n: 1,
1036 threshold_ratio: 0.01,
1037 summary_model: Some("text-summary-model".to_string()),
1038 ..Default::default()
1039 };
1040 let compressor = ContextCompressor::new(config, 64);
1041 let model_provider = CaptureSummarizerModelProvider {
1042 supports_vision: true,
1043 seen_messages: Mutex::new(Vec::new()),
1044 };
1045 let mut history = vec![
1046 msg("system", "sys"),
1047 msg("user", "First question"),
1048 msg("assistant", "First answer"),
1049 msg("user", "Middle question [IMAGE:/tmp/summary-override.png]"),
1050 msg("assistant", "Middle answer about the image"),
1051 msg("user", "Another middle question"),
1052 msg("assistant", "Another middle answer"),
1053 msg("user", "Newest question"),
1054 ];
1055
1056 let result = compressor
1057 .compress_if_needed(&mut history, &model_provider, "default-vision-model", None)
1058 .await
1059 .expect("compression should succeed");
1060
1061 assert!(result.compressed);
1062 let seen = model_provider.seen_messages.lock();
1063 let prompt = seen.last().expect("summarizer should be invoked");
1064 assert!(!prompt.contains("[IMAGE:"));
1065 assert!(!prompt.contains("/tmp/summary-override.png"));
1066 }
1067
1068 #[test]
1071 fn test_fast_trim_protects_first_and_last_n() {
1072 let config = ContextCompressionConfig {
1073 protect_first_n: 2,
1074 protect_last_n: 2,
1075 tool_result_retrim_chars: 100,
1076 ..Default::default()
1077 };
1078 let compressor = ContextCompressor::new(config, 128_000);
1079 let big = "x".repeat(5_000);
1080 let mut history = vec![
1081 msg("system", "sys"),
1082 msg("tool", &big), msg("user", "q"),
1084 msg("tool", &big), msg("user", "next"), msg("tool", &big), ];
1088 let saved = compressor.fast_trim_tool_results(&mut history);
1089 assert!(saved > 0);
1090 assert_eq!(history[1].content.len(), 5_000);
1092 assert_eq!(history[5].content.len(), 5_000);
1093 assert!(history[3].content.len() <= 200); }
1096
1097 #[test]
1098 fn test_fast_trim_skips_images() {
1099 let config = ContextCompressionConfig {
1100 protect_first_n: 0,
1101 protect_last_n: 0,
1102 tool_result_retrim_chars: 100,
1103 ..Default::default()
1104 };
1105 let compressor = ContextCompressor::new(config, 128_000);
1106 let img = format!("data:image/{}", "x".repeat(5_000));
1107 let mut history = vec![msg("tool", &img)];
1108 let saved = compressor.fast_trim_tool_results(&mut history);
1109 assert_eq!(saved, 0);
1110 assert!(history[0].content.len() > 5_000);
1111 }
1112
1113 #[test]
1114 fn test_fast_trim_skips_exempt_tools() {
1115 let config = ContextCompressionConfig {
1116 protect_first_n: 0,
1117 protect_last_n: 0,
1118 tool_result_retrim_chars: 100,
1119 tool_result_trim_exempt: vec!["KEEPME".to_string()],
1120 ..Default::default()
1121 };
1122 let compressor = ContextCompressor::new(config, 128_000);
1123 let content = format!("KEEPME {}", "x".repeat(5_000));
1124 let mut history = vec![msg("tool", &content)];
1125 let saved = compressor.fast_trim_tool_results(&mut history);
1126 assert_eq!(saved, 0);
1127 }
1128
1129 #[test]
1130 fn test_fast_trim_skips_small_results() {
1131 let config = ContextCompressionConfig {
1132 protect_first_n: 0,
1133 protect_last_n: 0,
1134 tool_result_retrim_chars: 2_000,
1135 ..Default::default()
1136 };
1137 let compressor = ContextCompressor::new(config, 128_000);
1138 let mut history = vec![msg("tool", "small result")];
1139 let saved = compressor.fast_trim_tool_results(&mut history);
1140 assert_eq!(saved, 0);
1141 }
1142
1143 #[test]
1144 fn test_fast_trim_skips_non_tool_messages() {
1145 let config = ContextCompressionConfig {
1146 protect_first_n: 0,
1147 protect_last_n: 0,
1148 tool_result_retrim_chars: 100,
1149 ..Default::default()
1150 };
1151 let compressor = ContextCompressor::new(config, 128_000);
1152 let big = "x".repeat(5_000);
1153 let mut history = vec![msg("user", &big), msg("assistant", &big)];
1154 let saved = compressor.fast_trim_tool_results(&mut history);
1155 assert_eq!(saved, 0);
1156 }
1157
1158 #[test]
1159 fn test_fast_trim_config_defaults() {
1160 let config = ContextCompressionConfig::default();
1161 assert_eq!(config.tool_result_retrim_chars, 2_000);
1162 assert!(config.tool_result_trim_exempt.is_empty());
1163 }
1164
1165 #[test]
1166 fn test_fast_trim_disabled_when_zero() {
1167 let config = ContextCompressionConfig {
1168 protect_first_n: 0,
1169 protect_last_n: 0,
1170 tool_result_retrim_chars: 0,
1171 ..Default::default()
1172 };
1173 let compressor = ContextCompressor::new(config, 128_000);
1174 let big = "x".repeat(5_000);
1175 let mut history = vec![msg("tool", &big)];
1176 let saved = compressor.fast_trim_tool_results(&mut history);
1177 assert_eq!(saved, 0);
1178 }
1179
1180 #[test]
1183 fn build_summary_message_uses_plain_text_when_no_reasoning() {
1184 let compressed = vec![
1185 msg("user", "what's the weather"),
1186 msg("assistant", "it's sunny"),
1187 ];
1188 let out = build_summary_message(&compressed, "weather chat", 2);
1189 assert_eq!(out.role, "assistant");
1190 assert!(out.content.starts_with("[CONTEXT SUMMARY"));
1191 assert!(out.content.contains("weather chat"));
1192 assert!(
1193 serde_json::from_str::<serde_json::Value>(&out.content).is_err(),
1194 "plain-text summary must not parse as JSON"
1195 );
1196 }
1197
1198 #[test]
1205 fn build_summary_message_preserves_reasoning_content_when_present() {
1206 let assistant_with_reasoning = serde_json::json!({
1207 "content": "let me look",
1208 "reasoning_content": "user wants weather; need to check",
1209 })
1210 .to_string();
1211 let compressed = vec![
1212 msg("user", "what's the weather"),
1213 msg("assistant", &assistant_with_reasoning),
1214 ];
1215
1216 let out = build_summary_message(&compressed, "weather chat", 2);
1217 assert_eq!(out.role, "assistant");
1218 let parsed: serde_json::Value = serde_json::from_str(&out.content)
1219 .expect("summary must be JSON when reasoning_content is preserved");
1220 assert!(
1221 parsed["content"]
1222 .as_str()
1223 .is_some_and(|s| s.starts_with("[CONTEXT SUMMARY")),
1224 "summary text belongs in `content`",
1225 );
1226 assert_eq!(
1227 parsed["reasoning_content"].as_str(),
1228 Some("user wants weather; need to check"),
1229 "must carry reasoning_content from the most recent compressed assistant turn",
1230 );
1231 }
1232
1233 #[test]
1238 fn build_summary_message_picks_last_reasoning_content() {
1239 let earlier = serde_json::json!({
1240 "content": "first answer",
1241 "reasoning_content": "EARLIER reasoning",
1242 })
1243 .to_string();
1244 let later = serde_json::json!({
1245 "content": "second answer",
1246 "reasoning_content": "LATER reasoning",
1247 })
1248 .to_string();
1249 let compressed = vec![
1250 msg("user", "q1"),
1251 msg("assistant", &earlier),
1252 msg("user", "q2"),
1253 msg("assistant", &later),
1254 ];
1255
1256 let out = build_summary_message(&compressed, "two-turn chat", 4);
1257 let parsed: serde_json::Value = serde_json::from_str(&out.content).unwrap();
1258 assert_eq!(
1259 parsed["reasoning_content"].as_str(),
1260 Some("LATER reasoning"),
1261 "must pick the most recent reasoning_content, not the earliest",
1262 );
1263 }
1264}