Skip to main content

zeroclaw_runtime/agent/
thinking.rs

1//! Thinking/Reasoning Level Control
2//!
3//! Allows users to control how deeply the model reasons per message,
4//! trading speed for depth. Levels range from `Off` (fastest, most concise)
5//! to `Max` (deepest reasoning, slowest).
6//!
7//! Users can set the level via:
8//! - Inline directive: `/think:high` at the start of a message
9//! - Agent config: `[agent.thinking]` section with `default_level`
10//!
11//! Resolution hierarchy (highest priority first):
12//! 1. Inline directive (`/think:<level>`)
13//! 2. Session override (reserved for future use)
14//! 3. Agent config (`agent.thinking.default_level`)
15//! 4. Global default (`Medium`)
16
17// Re-exported from zeroclaw-config.
18pub use zeroclaw_config::scattered_types::{ThinkingConfig, ThinkingLevel};
19
20/// Parameters derived from a thinking level, applied to the LLM request.
21#[derive(Debug, Clone, PartialEq)]
22pub struct ThinkingParams {
23    /// Temperature adjustment (added to the base temperature, clamped to 0.0..=2.0).
24    pub temperature_adjustment: f64,
25    /// Maximum tokens adjustment (added to any existing max_tokens setting).
26    pub max_tokens_adjustment: i64,
27    /// Optional system prompt prefix injected before the existing system prompt.
28    pub system_prompt_prefix: Option<String>,
29    /// Native extended thinking parameters, populated when the config enables
30    /// native thinking and the level has a `budget_tokens` value.
31    pub native_thinking: Option<zeroclaw_config::scattered_types::NativeThinkingParams>,
32}
33
34/// Parse a `/think:<level>` directive from the start of a message.
35///
36/// Returns `Some((level, remaining_message))` if a directive is found,
37/// or `None` if no directive is present. The remaining message has
38/// leading whitespace after the directive trimmed.
39pub fn parse_thinking_directive(message: &str) -> Option<(ThinkingLevel, String)> {
40    let trimmed = message.trim_start();
41    if !trimmed.starts_with("/think:") {
42        return None;
43    }
44
45    // Extract the level token (everything between `/think:` and the next whitespace or end).
46    let after_prefix = &trimmed["/think:".len()..];
47    let level_end = after_prefix
48        .find(|c: char| c.is_whitespace())
49        .unwrap_or(after_prefix.len());
50    let level_str = &after_prefix[..level_end];
51
52    let level = ThinkingLevel::from_str_insensitive(level_str)?;
53
54    let remaining = after_prefix[level_end..].trim_start().to_string();
55    Some((level, remaining))
56}
57
58/// Convert a `ThinkingLevel` into concrete parameters for the LLM request.
59pub fn apply_thinking_level(level: ThinkingLevel) -> ThinkingParams {
60    match level {
61        ThinkingLevel::Off => ThinkingParams {
62            temperature_adjustment: -0.2,
63            max_tokens_adjustment: -1000,
64            system_prompt_prefix: Some(
65                "Be extremely concise. Give direct answers without explanation \
66                 unless explicitly asked. No preamble."
67                    .into(),
68            ),
69            native_thinking: None,
70        },
71        ThinkingLevel::Minimal => ThinkingParams {
72            temperature_adjustment: -0.1,
73            max_tokens_adjustment: -500,
74            system_prompt_prefix: Some(
75                "Be concise and fast. Keep explanations brief. \
76                 Prioritize speed over thoroughness."
77                    .into(),
78            ),
79            native_thinking: None,
80        },
81        ThinkingLevel::Low => ThinkingParams {
82            temperature_adjustment: -0.05,
83            max_tokens_adjustment: 0,
84            system_prompt_prefix: Some("Keep reasoning light. Explain only when helpful.".into()),
85            native_thinking: None,
86        },
87        ThinkingLevel::Medium => ThinkingParams {
88            temperature_adjustment: 0.0,
89            max_tokens_adjustment: 0,
90            system_prompt_prefix: None,
91            native_thinking: None,
92        },
93        ThinkingLevel::High => ThinkingParams {
94            temperature_adjustment: 0.05,
95            max_tokens_adjustment: 1000,
96            system_prompt_prefix: Some(
97                "Think step by step. Provide thorough analysis and \
98                 consider edge cases before answering."
99                    .into(),
100            ),
101            native_thinking: None,
102        },
103        ThinkingLevel::Max => ThinkingParams {
104            temperature_adjustment: 0.1,
105            max_tokens_adjustment: 2000,
106            system_prompt_prefix: Some(
107                "Think very carefully and exhaustively. Break down the problem \
108                 into sub-problems, consider all angles, verify your reasoning, \
109                 and provide the most thorough analysis possible."
110                    .into(),
111            ),
112            native_thinking: None,
113        },
114    }
115}
116
117/// Convert a `ThinkingLevel` into parameters, resolving native extended
118/// thinking from the provided config.
119pub fn apply_thinking_level_with_config(
120    level: ThinkingLevel,
121    config: &ThinkingConfig,
122) -> ThinkingParams {
123    use zeroclaw_config::scattered_types::{MAX_BUDGET_TOKENS, MIN_BUDGET_TOKENS};
124    let mut params = apply_thinking_level(level);
125    if config.native_thinking
126        && let Some(budget) = config.budget_tokens_for(level)
127    {
128        let clamped = budget.clamp(MIN_BUDGET_TOKENS, MAX_BUDGET_TOKENS);
129        if clamped != budget {
130            ::zeroclaw_log::record!(
131                WARN,
132                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
133                    .with_attrs(::serde_json::json!({
134                        "requested": budget,
135                        "clamped": clamped,
136                        "min": MIN_BUDGET_TOKENS,
137                        "max": MAX_BUDGET_TOKENS
138                    })),
139                "budget_tokens outside accepted range; clamping"
140            );
141        }
142        params.native_thinking = Some(zeroclaw_config::scattered_types::NativeThinkingParams {
143            budget_tokens: clamped,
144        });
145    }
146    params
147}
148
149/// Resolve the effective thinking level using the priority hierarchy:
150/// 1. Inline directive (if present)
151/// 2. Session override (reserved, currently always `None`)
152/// 3. Agent config default
153/// 4. Global default (`Medium`)
154pub fn resolve_thinking_level(
155    inline_directive: Option<ThinkingLevel>,
156    session_override: Option<ThinkingLevel>,
157    config: &ThinkingConfig,
158) -> ThinkingLevel {
159    inline_directive
160        .or(session_override)
161        .unwrap_or(config.default_level)
162}
163
164/// Clamp a temperature value to the valid range `[0.0, 2.0]`.
165pub fn clamp_temperature(temp: f64) -> f64 {
166    temp.clamp(0.0, 2.0)
167}
168
169pub struct ResolvedThinking {
170    pub effective_message: String,
171    pub params: ThinkingParams,
172    pub effective_temperature: f64,
173}
174
175/// Validate thinking config at startup. Call once during agent
176/// initialization to warn about unrecognized budget_tokens keys.
177pub fn validate_thinking_config(config: &ThinkingConfig) {
178    config.warn_unknown_budget_keys();
179}
180
181pub fn resolve_thinking_from_message(
182    message: &str,
183    config: &ThinkingConfig,
184    base_temperature: f64,
185) -> ResolvedThinking {
186    let (directive, effective_message) = match parse_thinking_directive(message) {
187        Some((level, remaining)) => {
188            ::zeroclaw_log::record!(
189                INFO,
190                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
191                    .with_attrs(::serde_json::json!({"thinking_level": format!("{level:?}")})),
192                "Thinking directive parsed from message"
193            );
194            (Some(level), remaining)
195        }
196        None => (None, message.to_string()),
197    };
198    let level = resolve_thinking_level(directive, None, config);
199    let params = apply_thinking_level_with_config(level, config);
200    let effective_temperature = clamp_temperature(base_temperature + params.temperature_adjustment);
201    ResolvedThinking {
202        effective_message,
203        params,
204        effective_temperature,
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    // ── ThinkingLevel parsing ────────────────────────────────────
213
214    #[test]
215    fn thinking_level_from_str_canonical_names() {
216        assert_eq!(
217            ThinkingLevel::from_str_insensitive("off"),
218            Some(ThinkingLevel::Off)
219        );
220        assert_eq!(
221            ThinkingLevel::from_str_insensitive("minimal"),
222            Some(ThinkingLevel::Minimal)
223        );
224        assert_eq!(
225            ThinkingLevel::from_str_insensitive("low"),
226            Some(ThinkingLevel::Low)
227        );
228        assert_eq!(
229            ThinkingLevel::from_str_insensitive("medium"),
230            Some(ThinkingLevel::Medium)
231        );
232        assert_eq!(
233            ThinkingLevel::from_str_insensitive("high"),
234            Some(ThinkingLevel::High)
235        );
236        assert_eq!(
237            ThinkingLevel::from_str_insensitive("max"),
238            Some(ThinkingLevel::Max)
239        );
240    }
241
242    #[test]
243    fn thinking_level_from_str_aliases() {
244        assert_eq!(
245            ThinkingLevel::from_str_insensitive("none"),
246            Some(ThinkingLevel::Off)
247        );
248        assert_eq!(
249            ThinkingLevel::from_str_insensitive("min"),
250            Some(ThinkingLevel::Minimal)
251        );
252        assert_eq!(
253            ThinkingLevel::from_str_insensitive("med"),
254            Some(ThinkingLevel::Medium)
255        );
256        assert_eq!(
257            ThinkingLevel::from_str_insensitive("default"),
258            Some(ThinkingLevel::Medium)
259        );
260        assert_eq!(
261            ThinkingLevel::from_str_insensitive("maximum"),
262            Some(ThinkingLevel::Max)
263        );
264    }
265
266    #[test]
267    fn thinking_level_from_str_case_insensitive() {
268        assert_eq!(
269            ThinkingLevel::from_str_insensitive("HIGH"),
270            Some(ThinkingLevel::High)
271        );
272        assert_eq!(
273            ThinkingLevel::from_str_insensitive("Max"),
274            Some(ThinkingLevel::Max)
275        );
276        assert_eq!(
277            ThinkingLevel::from_str_insensitive("OFF"),
278            Some(ThinkingLevel::Off)
279        );
280    }
281
282    #[test]
283    fn thinking_level_from_str_invalid_returns_none() {
284        assert_eq!(ThinkingLevel::from_str_insensitive("turbo"), None);
285        assert_eq!(ThinkingLevel::from_str_insensitive(""), None);
286        assert_eq!(ThinkingLevel::from_str_insensitive("super-high"), None);
287    }
288
289    // ── Directive parsing ────────────────────────────────────────
290
291    #[test]
292    fn parse_directive_extracts_level_and_remaining_message() {
293        let result = parse_thinking_directive("/think:high What is Rust?");
294        assert!(result.is_some());
295        let (level, remaining) = result.unwrap();
296        assert_eq!(level, ThinkingLevel::High);
297        assert_eq!(remaining, "What is Rust?");
298    }
299
300    #[test]
301    fn parse_directive_handles_directive_only() {
302        let result = parse_thinking_directive("/think:off");
303        assert!(result.is_some());
304        let (level, remaining) = result.unwrap();
305        assert_eq!(level, ThinkingLevel::Off);
306        assert_eq!(remaining, "");
307    }
308
309    #[test]
310    fn parse_directive_strips_leading_whitespace() {
311        let result = parse_thinking_directive("  /think:low  Tell me about Rust");
312        assert!(result.is_some());
313        let (level, remaining) = result.unwrap();
314        assert_eq!(level, ThinkingLevel::Low);
315        assert_eq!(remaining, "Tell me about Rust");
316    }
317
318    #[test]
319    fn parse_directive_returns_none_for_no_directive() {
320        assert!(parse_thinking_directive("Hello world").is_none());
321        assert!(parse_thinking_directive("").is_none());
322        assert!(parse_thinking_directive("/think").is_none());
323    }
324
325    #[test]
326    fn parse_directive_returns_none_for_invalid_level() {
327        assert!(parse_thinking_directive("/think:turbo What?").is_none());
328    }
329
330    #[test]
331    fn parse_directive_not_triggered_mid_message() {
332        assert!(parse_thinking_directive("Hello /think:high world").is_none());
333    }
334
335    // ── Level application ────────────────────────────────────────
336
337    #[test]
338    fn apply_thinking_level_off_is_concise() {
339        let params = apply_thinking_level(ThinkingLevel::Off);
340        assert!(params.temperature_adjustment < 0.0);
341        assert!(params.max_tokens_adjustment < 0);
342        assert!(params.system_prompt_prefix.is_some());
343        assert!(
344            params
345                .system_prompt_prefix
346                .unwrap()
347                .to_lowercase()
348                .contains("concise")
349        );
350    }
351
352    #[test]
353    fn apply_thinking_level_medium_is_neutral() {
354        let params = apply_thinking_level(ThinkingLevel::Medium);
355        assert!((params.temperature_adjustment - 0.0).abs() < f64::EPSILON);
356        assert_eq!(params.max_tokens_adjustment, 0);
357        assert!(params.system_prompt_prefix.is_none());
358    }
359
360    #[test]
361    fn apply_thinking_level_high_adds_step_by_step() {
362        let params = apply_thinking_level(ThinkingLevel::High);
363        assert!(params.temperature_adjustment > 0.0);
364        assert!(params.max_tokens_adjustment > 0);
365        let prefix = params.system_prompt_prefix.unwrap();
366        assert!(prefix.to_lowercase().contains("step by step"));
367    }
368
369    #[test]
370    fn apply_thinking_level_max_is_most_thorough() {
371        let params = apply_thinking_level(ThinkingLevel::Max);
372        assert!(params.temperature_adjustment > 0.0);
373        assert!(params.max_tokens_adjustment > 0);
374        let prefix = params.system_prompt_prefix.unwrap();
375        assert!(prefix.to_lowercase().contains("exhaustively"));
376    }
377
378    // ── Resolution hierarchy ─────────────────────────────────────
379
380    #[test]
381    fn resolve_inline_directive_takes_priority() {
382        let config = ThinkingConfig {
383            default_level: ThinkingLevel::Low,
384            ..ThinkingConfig::default()
385        };
386        let result =
387            resolve_thinking_level(Some(ThinkingLevel::Max), Some(ThinkingLevel::High), &config);
388        assert_eq!(result, ThinkingLevel::Max);
389    }
390
391    #[test]
392    fn resolve_session_override_takes_priority_over_config() {
393        let config = ThinkingConfig {
394            default_level: ThinkingLevel::Low,
395            ..ThinkingConfig::default()
396        };
397        let result = resolve_thinking_level(None, Some(ThinkingLevel::High), &config);
398        assert_eq!(result, ThinkingLevel::High);
399    }
400
401    #[test]
402    fn resolve_falls_back_to_config_default() {
403        let config = ThinkingConfig {
404            default_level: ThinkingLevel::Minimal,
405            ..ThinkingConfig::default()
406        };
407        let result = resolve_thinking_level(None, None, &config);
408        assert_eq!(result, ThinkingLevel::Minimal);
409    }
410
411    #[test]
412    fn resolve_default_config_uses_medium() {
413        let config = ThinkingConfig::default();
414        let result = resolve_thinking_level(None, None, &config);
415        assert_eq!(result, ThinkingLevel::Medium);
416    }
417
418    // ── Temperature clamping ─────────────────────────────────────
419
420    #[test]
421    fn clamp_temperature_within_range() {
422        assert!((clamp_temperature(0.7) - 0.7).abs() < f64::EPSILON);
423        assert!((clamp_temperature(0.0) - 0.0).abs() < f64::EPSILON);
424        assert!((clamp_temperature(2.0) - 2.0).abs() < f64::EPSILON);
425    }
426
427    #[test]
428    fn clamp_temperature_below_minimum() {
429        assert!((clamp_temperature(-0.5) - 0.0).abs() < f64::EPSILON);
430    }
431
432    #[test]
433    fn clamp_temperature_above_maximum() {
434        assert!((clamp_temperature(3.0) - 2.0).abs() < f64::EPSILON);
435    }
436
437    // ── Budget-token clamping ────────────────────────────────────
438
439    #[test]
440    fn budget_tokens_clamped_to_min_when_below() {
441        use std::collections::HashMap;
442        use zeroclaw_config::scattered_types::MIN_BUDGET_TOKENS;
443        let mut overrides = HashMap::new();
444        overrides.insert("high".to_string(), 100);
445        let config = ThinkingConfig {
446            default_level: ThinkingLevel::High,
447            native_thinking: true,
448            budget_tokens: overrides,
449        };
450        let params = apply_thinking_level_with_config(ThinkingLevel::High, &config);
451        let native = params
452            .native_thinking
453            .expect("native thinking should be set");
454        assert_eq!(native.budget_tokens, MIN_BUDGET_TOKENS);
455    }
456
457    #[test]
458    fn budget_tokens_preserved_within_range() {
459        use std::collections::HashMap;
460        let mut overrides = HashMap::new();
461        overrides.insert("high".to_string(), 8_000);
462        let config = ThinkingConfig {
463            default_level: ThinkingLevel::High,
464            native_thinking: true,
465            budget_tokens: overrides,
466        };
467        let params = apply_thinking_level_with_config(ThinkingLevel::High, &config);
468        let native = params
469            .native_thinking
470            .expect("native thinking should be set");
471        assert_eq!(native.budget_tokens, 8_000);
472    }
473
474    #[test]
475    fn budget_tokens_clamped_to_max_when_above() {
476        use std::collections::HashMap;
477        use zeroclaw_config::scattered_types::MAX_BUDGET_TOKENS;
478        let mut overrides = HashMap::new();
479        overrides.insert("high".to_string(), MAX_BUDGET_TOKENS + 1_000);
480        let config = ThinkingConfig {
481            default_level: ThinkingLevel::High,
482            native_thinking: true,
483            budget_tokens: overrides,
484        };
485        let params = apply_thinking_level_with_config(ThinkingLevel::High, &config);
486        let native = params
487            .native_thinking
488            .expect("native thinking should be set");
489        assert_eq!(native.budget_tokens, MAX_BUDGET_TOKENS);
490    }
491
492    // ── Serde round-trip ─────────────────────────────────────────
493
494    #[test]
495    fn thinking_config_deserializes_from_toml() {
496        let toml_str = r#"default_level = "high""#;
497        let config: ThinkingConfig = toml::from_str(toml_str).unwrap();
498        assert_eq!(config.default_level, ThinkingLevel::High);
499    }
500
501    #[test]
502    fn thinking_config_default_level_deserializes() {
503        let toml_str = "";
504        let config: ThinkingConfig = toml::from_str(toml_str).unwrap();
505        assert_eq!(config.default_level, ThinkingLevel::Medium);
506    }
507
508    #[test]
509    fn thinking_level_serializes_lowercase() {
510        let level = ThinkingLevel::High;
511        let json = serde_json::to_string(&level).unwrap();
512        assert_eq!(json, "\"high\"");
513    }
514
515    /// Regression test for the wiring fix in PR #5652: when
516    /// `NATIVE_THINKING_OVERRIDE.scope(params, fut)` is installed by the
517    /// dispatch sites in `loop_.rs`, the inner `try_with(Clone::clone)`
518    /// read-back used by `consume_provider_streaming_response` must
519    /// recover the same params. Without this, `agent.thinking.native_thinking
520    /// = true` is a no-op even though `apply_thinking_level_with_config`
521    /// populates the params correctly.
522    #[tokio::test]
523    async fn native_thinking_override_round_trips_through_scope() {
524        use zeroclaw_config::scattered_types::NativeThinkingParams;
525        let installed = Some(NativeThinkingParams {
526            budget_tokens: 32_000,
527        });
528        let read_back = zeroclaw_api::NATIVE_THINKING_OVERRIDE
529            .scope(installed, async {
530                zeroclaw_api::NATIVE_THINKING_OVERRIDE
531                    .try_with(Clone::clone)
532                    .ok()
533                    .flatten()
534            })
535            .await;
536        assert_eq!(
537            read_back, installed,
538            "NATIVE_THINKING_OVERRIDE.scope must round-trip params to the inner read-back"
539        );
540    }
541
542    /// Regression test: outside any `NATIVE_THINKING_OVERRIDE.scope(...)`,
543    /// the read-back must produce `None` (not panic, not a stale value
544    /// from a previous task). This is the original fallback path —
545    /// `agent.thinking.native_thinking = false` users keep prompt-based
546    /// reasoning with no provider-side `thinking` block.
547    #[tokio::test]
548    async fn native_thinking_override_returns_none_outside_scope() {
549        let read_back = async {
550            zeroclaw_api::NATIVE_THINKING_OVERRIDE
551                .try_with(Clone::clone)
552                .ok()
553                .flatten()
554        }
555        .await;
556        assert!(
557            read_back.is_none(),
558            "NATIVE_THINKING_OVERRIDE outside a scope must read None, got: {read_back:?}"
559        );
560    }
561
562    /// Regression test: `validate_thinking_config` is called once at agent
563    /// initialization (from `loop_::run` and `loop_::process_message`) so a
564    /// typo such as an unknown `agent.thinking.budget_tokens.foo` key warns
565    /// once at startup instead of being silently ignored. The function must
566    /// accept arbitrary configs without panicking — including unknown keys,
567    /// empty configs, and configs with all valid keys — since it runs in
568    /// the request-processing hot path's startup section.
569    #[test]
570    fn validate_thinking_config_accepts_arbitrary_inputs_without_panicking() {
571        let mut cfg_with_unknown_key = ThinkingConfig::default();
572        cfg_with_unknown_key
573            .budget_tokens
574            .insert("turbo".to_string(), 5_000); // not a valid ThinkingLevel
575        validate_thinking_config(&cfg_with_unknown_key);
576
577        let cfg_default = ThinkingConfig::default();
578        validate_thinking_config(&cfg_default);
579
580        let mut cfg_all_valid = ThinkingConfig::default();
581        for level in ["off", "minimal", "low", "medium", "high", "max"] {
582            cfg_all_valid
583                .budget_tokens
584                .insert(level.to_string(), 10_000);
585        }
586        validate_thinking_config(&cfg_all_valid);
587    }
588}