Skip to main content

zeroclaw_runtime/doctor/
mod.rs

1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use std::io::Write;
4use std::path::Path;
5use zeroclaw_config::schema::Config;
6
7const DAEMON_STALE_SECONDS: i64 = 30;
8const SCHEDULER_STALE_SECONDS: i64 = 120;
9const CHANNEL_STALE_SECONDS: i64 = 300;
10const COMMAND_VERSION_PREVIEW_CHARS: usize = 60;
11
12// ── Diagnostic item ──────────────────────────────────────────────
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)]
15#[serde(rename_all = "lowercase")]
16pub enum Severity {
17    Ok,
18    Warn,
19    Error,
20}
21
22/// Structured diagnostic result for programmatic consumption (web dashboard, API).
23#[derive(Debug, Clone, serde::Serialize)]
24pub struct DiagResult {
25    pub severity: Severity,
26    pub category: String,
27    pub message: String,
28}
29
30struct DiagItem {
31    severity: Severity,
32    category: &'static str,
33    message: String,
34}
35
36impl DiagItem {
37    fn ok(category: &'static str, msg: impl Into<String>) -> Self {
38        Self {
39            severity: Severity::Ok,
40            category,
41            message: msg.into(),
42        }
43    }
44    fn warn(category: &'static str, msg: impl Into<String>) -> Self {
45        Self {
46            severity: Severity::Warn,
47            category,
48            message: msg.into(),
49        }
50    }
51    fn error(category: &'static str, msg: impl Into<String>) -> Self {
52        Self {
53            severity: Severity::Error,
54            category,
55            message: msg.into(),
56        }
57    }
58
59    #[cfg(test)]
60    fn icon(&self) -> &'static str {
61        match self.severity {
62            Severity::Ok => "✅",
63            Severity::Warn => "⚠️ ",
64            Severity::Error => "❌",
65        }
66    }
67
68    fn into_result(self) -> DiagResult {
69        DiagResult {
70            severity: self.severity,
71            category: self.category.to_string(),
72            message: self.message,
73        }
74    }
75}
76
77// ── Public entry points ──────────────────────────────────────────
78
79/// Run diagnostics and return structured results (for API/web dashboard).
80pub fn diagnose(config: &Config) -> Vec<DiagResult> {
81    let mut items: Vec<DiagItem> = Vec::new();
82
83    check_config_semantics(config, &mut items);
84    check_workspace(config, &mut items);
85    check_daemon_state(config, &mut items);
86    check_environment(&mut items);
87    check_cli_tools(&mut items);
88
89    items.into_iter().map(DiagItem::into_result).collect()
90}
91
92/// Run diagnostics and print human-readable report to stdout.
93async fn probe_models(config: &Config) -> Vec<DiagResult> {
94    let targets = doctor_model_targets(config, None);
95    let mut out = Vec::new();
96
97    for provider_name in &targets {
98        let result = match create_doctor_model_provider(config, provider_name) {
99            Ok(handle) => handle.list_models().await,
100            Err(e) => Err(e),
101        };
102        match result {
103            Ok(models) => out.push(DiagResult {
104                severity: Severity::Ok,
105                category: "providers.models".to_string(),
106                message: format!("{}: {} models", provider_name, models.len()),
107            }),
108            Err(e) => {
109                let text = format_error_chain(&e);
110                let severity = match classify_model_probe_error(&text) {
111                    ModelProbeOutcome::Skipped => Severity::Warn,
112                    ModelProbeOutcome::AuthOrAccess => Severity::Warn,
113                    ModelProbeOutcome::Ok | ModelProbeOutcome::Error => Severity::Error,
114                };
115                out.push(DiagResult {
116                    severity,
117                    category: "providers.models".to_string(),
118                    message: format!("{}: {}", provider_name, truncate_for_display(&text, 120)),
119                });
120            }
121        }
122    }
123
124    out
125}
126
127pub async fn run(config: &Config) -> Result<()> {
128    let mut results = diagnose(config);
129    results.extend(probe_models(config).await);
130
131    println!("🩺 ZeroClaw Doctor (enhanced)");
132    println!();
133
134    let mut current_cat = String::new();
135    for item in &results {
136        if item.category != current_cat {
137            current_cat = item.category.clone();
138            println!("  [{current_cat}]");
139        }
140        let icon = match item.severity {
141            Severity::Ok => "✅",
142            Severity::Warn => "⚠️ ",
143            Severity::Error => "❌",
144        };
145        println!("    {} {}", icon, item.message);
146    }
147
148    let errors = results
149        .iter()
150        .filter(|i| i.severity == Severity::Error)
151        .count();
152    let warns = results
153        .iter()
154        .filter(|i| i.severity == Severity::Warn)
155        .count();
156    let oks = results
157        .iter()
158        .filter(|i| i.severity == Severity::Ok)
159        .count();
160
161    println!();
162    println!("  Summary: {oks} ok, {warns} warnings, {errors} errors");
163
164    if errors > 0 {
165        println!("  💡 Fix the errors above, then run `zeroclaw doctor` again.");
166    }
167
168    Ok(())
169}
170
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
172enum ModelProbeOutcome {
173    Ok,
174    Skipped,
175    AuthOrAccess,
176    Error,
177}
178
179fn model_probe_status_label(outcome: ModelProbeOutcome) -> &'static str {
180    match outcome {
181        ModelProbeOutcome::Ok => "ok",
182        ModelProbeOutcome::Skipped => "skipped",
183        ModelProbeOutcome::AuthOrAccess => "auth/access",
184        ModelProbeOutcome::Error => "error",
185    }
186}
187
188fn classify_model_probe_error(err_message: &str) -> ModelProbeOutcome {
189    let lower = err_message.to_lowercase();
190
191    if lower.contains("does not support live model discovery") {
192        return ModelProbeOutcome::Skipped;
193    }
194
195    if [
196        "401",
197        "403",
198        "429",
199        "unauthorized",
200        "forbidden",
201        "api key",
202        "token",
203        "insufficient balance",
204        "insufficient quota",
205        "plan does not include",
206        "rate limit",
207    ]
208    .iter()
209    .any(|hint| lower.contains(hint))
210    {
211        return ModelProbeOutcome::AuthOrAccess;
212    }
213
214    ModelProbeOutcome::Error
215}
216
217fn doctor_model_targets(config: &Config, provider_override: Option<&str>) -> Vec<String> {
218    if let Some(model_provider) = provider_override.map(str::trim).filter(|p| !p.is_empty()) {
219        return vec![model_provider.to_string()];
220    }
221
222    config
223        .providers
224        .models
225        .iter_entries()
226        .map(|(type_k, alias_k, _)| format!("{type_k}.{alias_k}"))
227        .collect()
228}
229
230fn configured_model_provider_api_key<'a>(
231    config: &'a Config,
232    provider_name: &str,
233) -> Option<&'a str> {
234    let (family, alias) = provider_name
235        .split_once('.')
236        .unwrap_or((provider_name, "default"));
237
238    config
239        .providers
240        .models
241        .find(family, alias)
242        .and_then(|entry| entry.api_key.as_deref())
243}
244
245fn create_doctor_model_provider(
246    config: &Config,
247    provider_name: &str,
248) -> anyhow::Result<Box<dyn zeroclaw_api::model_provider::ModelProvider>> {
249    let api_key = configured_model_provider_api_key(config, provider_name);
250    let options = zeroclaw_providers::options_for_provider_ref(
251        config,
252        provider_name,
253        &zeroclaw_providers::ModelProviderRuntimeOptions::default(),
254    );
255
256    match provider_name.split_once('.') {
257        Some((family, alias)) => zeroclaw_providers::create_model_provider_for_alias(
258            config, family, alias, api_key, &options,
259        ),
260        None => {
261            zeroclaw_providers::create_model_provider_with_options(provider_name, api_key, &options)
262        }
263    }
264}
265
266pub async fn run_models(
267    config: &Config,
268    provider_override: Option<&str>,
269    _use_cache: bool,
270) -> Result<()> {
271    let targets = doctor_model_targets(config, provider_override);
272
273    if targets.is_empty() {
274        anyhow::bail!(
275            "No configured model_providers to probe — run `zeroclaw onboard model_providers` first"
276        );
277    }
278
279    println!("🩺 ZeroClaw Doctor — Model Catalog Probe");
280    println!("  Providers to probe: {}", targets.len());
281    println!();
282
283    let mut ok_count = 0usize;
284    let mut skipped_count = 0usize;
285    let mut auth_count = 0usize;
286    let mut error_count = 0usize;
287    let mut matrix_rows: Vec<(String, ModelProbeOutcome, Option<usize>, String)> = Vec::new();
288
289    for provider_name in &targets {
290        println!("  [{}]", provider_name);
291
292        let outcome = match create_doctor_model_provider(config, provider_name) {
293            Ok(handle) => handle.list_models().await,
294            Err(e) => Err(e),
295        };
296
297        match outcome {
298            Ok(models) => {
299                ok_count += 1;
300                println!("    ✅ {} models", models.len());
301                matrix_rows.push((
302                    provider_name.clone(),
303                    ModelProbeOutcome::Ok,
304                    Some(models.len()),
305                    "catalog fetched".to_string(),
306                ));
307            }
308            Err(error) => {
309                let error_text = format_error_chain(&error);
310                match classify_model_probe_error(&error_text) {
311                    ModelProbeOutcome::Skipped => {
312                        skipped_count += 1;
313                        println!("    ⚪ skipped: {}", truncate_for_display(&error_text, 160));
314                        matrix_rows.push((
315                            provider_name.clone(),
316                            ModelProbeOutcome::Skipped,
317                            None,
318                            truncate_for_display(&error_text, 120),
319                        ));
320                    }
321                    ModelProbeOutcome::AuthOrAccess => {
322                        auth_count += 1;
323                        println!(
324                            "    ⚠️  auth/access: {}",
325                            truncate_for_display(&error_text, 160)
326                        );
327                        matrix_rows.push((
328                            provider_name.clone(),
329                            ModelProbeOutcome::AuthOrAccess,
330                            None,
331                            truncate_for_display(&error_text, 120),
332                        ));
333                    }
334                    ModelProbeOutcome::Error | ModelProbeOutcome::Ok => {
335                        error_count += 1;
336                        println!("    ❌ error: {}", truncate_for_display(&error_text, 160));
337                        matrix_rows.push((
338                            provider_name.clone(),
339                            ModelProbeOutcome::Error,
340                            None,
341                            truncate_for_display(&error_text, 120),
342                        ));
343                    }
344                }
345            }
346        }
347
348        println!();
349    }
350
351    println!(
352        "  Summary: {} ok, {} skipped, {} auth/access, {} errors",
353        ok_count, skipped_count, auth_count, error_count
354    );
355
356    if !matrix_rows.is_empty() {
357        println!();
358        println!("  Connectivity matrix:");
359        println!(
360            "  {:<18} {:<12} {:<8} detail",
361            "model_provider", "status", "models"
362        );
363        println!(
364            "  {:<18} {:<12} {:<8} ------",
365            "------------------", "------------", "--------"
366        );
367        for (model_provider, outcome, models_count, detail) in matrix_rows {
368            let models_text = models_count
369                .map(|count| count.to_string())
370                .unwrap_or_else(|| "-".to_string());
371            println!(
372                "  {:<18} {:<12} {:<8} {}",
373                model_provider,
374                model_probe_status_label(outcome),
375                models_text,
376                detail
377            );
378        }
379    }
380
381    if auth_count > 0 {
382        println!(
383            "  💡 Some model_providers need valid API keys/plan access before `/models` can be fetched."
384        );
385    }
386
387    if provider_override.is_some() && ok_count == 0 {
388        anyhow::bail!("Model probe failed for target model_provider")
389    }
390
391    Ok(())
392}
393
394pub fn run_traces(
395    config: &Config,
396    id: Option<&str>,
397    event_filter: Option<&str>,
398    contains: Option<&str>,
399    limit: usize,
400) -> Result<()> {
401    let path = crate::observability::runtime_trace::resolve_trace_path(
402        &config.observability,
403        &config.data_dir,
404    );
405
406    if let Some(target_id) = id.map(str::trim).filter(|value| !value.is_empty()) {
407        match crate::observability::runtime_trace::find_event_by_id(&path, target_id)? {
408            Some(event) => {
409                println!("{}", serde_json::to_string_pretty(&event)?);
410            }
411            None => {
412                println!(
413                    "No runtime trace event found for id '{}' (path: {}).",
414                    target_id,
415                    path.display()
416                );
417            }
418        }
419        return Ok(());
420    }
421
422    if !path.exists() {
423        println!(
424            "Runtime trace file not found: {}.\n\
425             Enable [observability] log_persistence = \"rolling\" or \"full\", then reproduce the issue.",
426            path.display()
427        );
428        return Ok(());
429    }
430
431    let safe_limit = limit.max(1);
432    let events = crate::observability::runtime_trace::load_events(
433        &path,
434        safe_limit,
435        event_filter,
436        contains,
437    )?;
438
439    if events.is_empty() {
440        println!(
441            "No runtime trace events matched query (path: {}).",
442            path.display()
443        );
444        return Ok(());
445    }
446
447    println!("Runtime traces (newest first)");
448    println!("Path: {}", path.display().to_string());
449    println!(
450        "Filters: event={} contains={} limit={}",
451        event_filter.unwrap_or("*"),
452        contains.unwrap_or("*"),
453        safe_limit
454    );
455    println!();
456
457    for event in events {
458        let outcome = match event.event.outcome.as_str() {
459            "success" => "ok",
460            "failure" => "fail",
461            _ => "-",
462        };
463        let message = event.message.unwrap_or_default();
464        let preview = truncate_for_display(&message, 80);
465        println!(
466            "- {} | {} | {} | {} | {}",
467            event.timestamp, event.id, event.event.action, outcome, preview
468        );
469    }
470
471    println!();
472    println!("Use `zeroclaw doctor traces --id <trace-id>` to inspect a full event payload.");
473    Ok(())
474}
475
476// ── Config semantic validation ───────────────────────────────────
477
478fn check_config_semantics(config: &Config, items: &mut Vec<DiagItem>) {
479    let cat = "config";
480
481    // Config file exists
482    if config.config_path.exists() {
483        items.push(DiagItem::ok(
484            cat,
485            format!("config file: {}", config.config_path.display().to_string()),
486        ));
487    } else {
488        items.push(DiagItem::error(
489            cat,
490            format!(
491                "config file not found: {}",
492                config.config_path.display().to_string()
493            ),
494        ));
495    }
496
497    // ModelProvider validity (first configured model model_provider)
498    let primary_model_provider_doc = config.first_model_provider();
499    let primary_model_provider = config.first_model_provider_type();
500    if let Some(model_provider) = primary_model_provider {
501        if let Some(reason) = provider_validation_error(model_provider) {
502            items.push(DiagItem::error(
503                cat,
504                format!("model_provider \"{model_provider}\" is invalid: {reason}"),
505            ));
506        } else {
507            items.push(DiagItem::ok(
508                cat,
509                format!("model_provider \"{model_provider}\" is valid"),
510            ));
511        }
512    } else {
513        items.push(DiagItem::error(cat, "no model model_provider configured"));
514    }
515
516    // API key presence
517    if primary_model_provider != Some("ollama") {
518        if primary_model_provider_doc
519            .and_then(|e| e.api_key.as_deref())
520            .is_some()
521        {
522            items.push(DiagItem::ok(cat, "API key configured"));
523        } else {
524            items.push(DiagItem::warn(
525                cat,
526                "no api_key set (may rely on env vars or model_provider defaults)",
527            ));
528        }
529    }
530
531    // Model configured
532    let primary_model = primary_model_provider_doc.and_then(|e| e.model.as_deref());
533    if primary_model.is_some() {
534        items.push(DiagItem::ok(
535            cat,
536            format!("model: {}", primary_model.unwrap_or("?")),
537        ));
538    } else {
539        items.push(DiagItem::warn(
540            cat,
541            "no model configured on primary model_provider",
542        ));
543    }
544
545    // Temperature range
546    let primary_temperature = primary_model_provider_doc
547        .and_then(|e| e.temperature)
548        .unwrap_or(0.7);
549    if (0.0..=2.0).contains(&primary_temperature) {
550        items.push(DiagItem::ok(
551            cat,
552            format!(
553                "temperature {:.1} (valid range 0.0–2.0)",
554                primary_temperature
555            ),
556        ));
557    } else {
558        items.push(DiagItem::error(
559            cat,
560            format!(
561                "temperature {:.1} is out of range (expected 0.0–2.0)",
562                primary_temperature
563            ),
564        ));
565    }
566
567    // Gateway port range
568    let port = config.gateway.port;
569    if port > 0 {
570        items.push(DiagItem::ok(cat, format!("gateway port: {port}")));
571    } else {
572        items.push(DiagItem::error(cat, "gateway port is 0 (invalid)"));
573    }
574
575    // Model routes validation
576    for route in &config.model_routes {
577        if route.hint.is_empty() {
578            items.push(DiagItem::warn(cat, "model route with empty hint"));
579        }
580        if let Some(reason) = provider_validation_error(&route.model_provider) {
581            items.push(DiagItem::warn(
582                cat,
583                format!(
584                    "model route \"{}\" uses invalid model_provider \"{}\": {}",
585                    route.hint, route.model_provider, reason
586                ),
587            ));
588        }
589        if route.model.is_empty() {
590            items.push(DiagItem::warn(
591                cat,
592                format!("model route \"{}\" has empty model", route.hint),
593            ));
594        }
595    }
596
597    // Embedding routes validation
598    for route in &config.embedding_routes {
599        if route.hint.trim().is_empty() {
600            items.push(DiagItem::warn(cat, "embedding route with empty hint"));
601        }
602        if let Some(reason) = embedding_provider_validation_error(&route.model_provider) {
603            items.push(DiagItem::warn(
604                cat,
605                format!(
606                    "embedding route \"{}\" uses invalid model_provider \"{}\": {}",
607                    route.hint, route.model_provider, reason
608                ),
609            ));
610        }
611        if route.model.trim().is_empty() {
612            items.push(DiagItem::warn(
613                cat,
614                format!("embedding route \"{}\" has empty model", route.hint),
615            ));
616        }
617        if route.dimensions.is_some_and(|value| value == 0) {
618            items.push(DiagItem::warn(
619                cat,
620                format!(
621                    "embedding route \"{}\" has invalid dimensions=0",
622                    route.hint
623                ),
624            ));
625        }
626    }
627
628    if let Some(hint) = config
629        .memory
630        .embedding_model
631        .strip_prefix("hint:")
632        .map(str::trim)
633        .filter(|value| !value.is_empty())
634        && !config
635            .embedding_routes
636            .iter()
637            .any(|route| route.hint.trim() == hint)
638    {
639        items.push(DiagItem::warn(
640                cat,
641                format!(
642                    "memory.embedding_model uses hint \"{hint}\" but no matching [[embedding_routes]] entry exists"
643                ),
644            ));
645    }
646
647    // Channel: at least one configured
648    let cc = &config.channels;
649    let has_channel = cc.channels().iter().any(|info| info.configured);
650
651    if has_channel {
652        items.push(DiagItem::ok(cat, "at least one channel configured"));
653    } else {
654        items.push(DiagItem::warn(
655            cat,
656            "no channels configured — run `zeroclaw onboard` to set one up",
657        ));
658    }
659
660    // Delegate agents: model_provider validity (resolved from model_provider alias)
661    let mut agent_names: Vec<_> = config.agents.keys().collect();
662    agent_names.sort();
663    for name in agent_names {
664        let agent = config.agents.get(name).unwrap();
665        let provider_type = agent
666            .model_provider
667            .split_once('.')
668            .map_or(agent.model_provider.as_str(), |(t, _)| t);
669        if provider_type.is_empty() {
670            continue;
671        }
672        if let Some(reason) = provider_validation_error(provider_type) {
673            items.push(DiagItem::warn(
674                cat,
675                format!(
676                    "agent \"{name}\" uses invalid model_provider \"{provider_type}\": {reason}",
677                ),
678            ));
679        }
680    }
681}
682
683fn provider_validation_error(name: &str) -> Option<String> {
684    match zeroclaw_providers::create_model_provider(name, None) {
685        Ok(_) => None,
686        Err(err) => Some(
687            err.to_string()
688                .lines()
689                .next()
690                .unwrap_or("invalid model_provider")
691                .into(),
692        ),
693    }
694}
695
696fn embedding_provider_validation_error(name: &str) -> Option<String> {
697    let normalized = name.trim();
698    if normalized.eq_ignore_ascii_case("none") || normalized.eq_ignore_ascii_case("openai") {
699        return None;
700    }
701
702    let Some(url) = normalized.strip_prefix("custom:") else {
703        return Some("supported values: none, openai, custom:<url>".into());
704    };
705
706    let url = url.trim();
707    if url.is_empty() {
708        return Some("custom model_provider requires a non-empty URL after 'custom:'".into());
709    }
710
711    match reqwest::Url::parse(url) {
712        Ok(parsed) if matches!(parsed.scheme(), "http" | "https") => None,
713        Ok(parsed) => Some(format!(
714            "custom model_provider URL must use http/https, got '{}'",
715            parsed.scheme()
716        )),
717        Err(err) => Some(format!("invalid custom model_provider URL: {err}")),
718    }
719}
720
721// ── Workspace integrity ──────────────────────────────────────────
722
723fn check_workspace(config: &Config, items: &mut Vec<DiagItem>) {
724    let cat = "workspace";
725    let ws = &config.data_dir;
726
727    if ws.exists() {
728        items.push(DiagItem::ok(
729            cat,
730            format!("directory exists: {}", ws.display().to_string()),
731        ));
732    } else {
733        items.push(DiagItem::error(
734            cat,
735            format!("directory missing: {}", ws.display().to_string()),
736        ));
737        return;
738    }
739
740    // Writable check
741    let probe = workspace_probe_path(ws);
742    match std::fs::OpenOptions::new()
743        .write(true)
744        .create_new(true)
745        .open(&probe)
746    {
747        Ok(mut probe_file) => {
748            let write_result = probe_file.write_all(b"probe");
749            drop(probe_file);
750            let _ = std::fs::remove_file(&probe);
751            match write_result {
752                Ok(()) => items.push(DiagItem::ok(cat, "directory is writable")),
753                Err(e) => items.push(DiagItem::error(
754                    cat,
755                    format!("directory write probe failed: {e}"),
756                )),
757            }
758        }
759        Err(e) => {
760            items.push(DiagItem::error(
761                cat,
762                format!("directory is not writable: {e}"),
763            ));
764        }
765    }
766
767    // Disk space (best-effort via `df`)
768    if let Some(avail_mb) = disk_available_mb(ws) {
769        if avail_mb >= 100 {
770            items.push(DiagItem::ok(
771                cat,
772                format!("disk space: {avail_mb} MB available"),
773            ));
774        } else {
775            items.push(DiagItem::warn(
776                cat,
777                format!("low disk space: only {avail_mb} MB available"),
778            ));
779        }
780    }
781
782    // Key workspace files
783    check_file_exists(ws, "SOUL.md", false, cat, items);
784    check_file_exists(ws, "AGENTS.md", false, cat, items);
785}
786
787fn check_file_exists(
788    base: &Path,
789    name: &str,
790    required: bool,
791    cat: &'static str,
792    items: &mut Vec<DiagItem>,
793) {
794    let path = base.join(name);
795    if path.is_file() {
796        items.push(DiagItem::ok(cat, format!("{name} present")));
797    } else if required {
798        items.push(DiagItem::error(cat, format!("{name} missing")));
799    } else {
800        items.push(DiagItem::warn(cat, format!("{name} not found (optional)")));
801    }
802}
803
804fn disk_available_mb(path: &Path) -> Option<u64> {
805    let output = std::process::Command::new("df")
806        .arg("-m")
807        .arg(path)
808        .output()
809        .ok()?;
810    if !output.status.success() {
811        return None;
812    }
813    let stdout = String::from_utf8_lossy(&output.stdout);
814    parse_df_available_mb(&stdout)
815}
816
817fn parse_df_available_mb(stdout: &str) -> Option<u64> {
818    let line = stdout.lines().rev().find(|line| !line.trim().is_empty())?;
819    let avail = line.split_whitespace().nth(3)?;
820    avail.parse::<u64>().ok()
821}
822
823fn workspace_probe_path(workspace_dir: &Path) -> std::path::PathBuf {
824    let nanos = std::time::SystemTime::now()
825        .duration_since(std::time::UNIX_EPOCH)
826        .map_or(0, |duration| duration.as_nanos());
827    workspace_dir.join(format!(
828        ".zeroclaw_doctor_probe_{}_{}",
829        std::process::id(),
830        nanos
831    ))
832}
833
834// ── Daemon state (original logic, preserved) ─────────────────────
835
836fn check_daemon_state(config: &Config, items: &mut Vec<DiagItem>) {
837    let cat = "daemon";
838    let state_file = crate::daemon::state_file_path(config);
839
840    if !state_file.exists() {
841        items.push(DiagItem::error(
842            cat,
843            format!(
844                "state file not found: {} — is the daemon running?",
845                state_file.display()
846            ),
847        ));
848        return;
849    }
850
851    let raw = match std::fs::read_to_string(&state_file) {
852        Ok(r) => r,
853        Err(e) => {
854            items.push(DiagItem::error(cat, format!("cannot read state file: {e}")));
855            return;
856        }
857    };
858
859    let snapshot: serde_json::Value = match serde_json::from_str(&raw) {
860        Ok(v) => v,
861        Err(e) => {
862            items.push(DiagItem::error(cat, format!("invalid state JSON: {e}")));
863            return;
864        }
865    };
866
867    // Daemon heartbeat freshness
868    let updated_at = snapshot
869        .get("updated_at")
870        .and_then(serde_json::Value::as_str)
871        .unwrap_or("");
872
873    if let Ok(ts) = DateTime::parse_from_rfc3339(updated_at) {
874        let age = Utc::now()
875            .signed_duration_since(ts.with_timezone(&Utc))
876            .num_seconds();
877        if age <= DAEMON_STALE_SECONDS {
878            items.push(DiagItem::ok(cat, format!("heartbeat fresh ({age}s ago)")));
879        } else {
880            items.push(DiagItem::error(
881                cat,
882                format!("heartbeat stale ({age}s ago)"),
883            ));
884        }
885    } else {
886        items.push(DiagItem::error(
887            cat,
888            format!("invalid daemon timestamp: {updated_at}"),
889        ));
890    }
891
892    // Components
893    if let Some(components) = snapshot
894        .get("components")
895        .and_then(serde_json::Value::as_object)
896    {
897        // Scheduler
898        if let Some(scheduler) = components.get("scheduler") {
899            let scheduler_ok = scheduler
900                .get("status")
901                .and_then(serde_json::Value::as_str)
902                .is_some_and(|s| s == "ok");
903            let scheduler_age = scheduler
904                .get("last_ok")
905                .and_then(serde_json::Value::as_str)
906                .and_then(parse_rfc3339)
907                .map_or(i64::MAX, |dt| {
908                    Utc::now().signed_duration_since(dt).num_seconds()
909                });
910
911            if scheduler_ok && scheduler_age <= SCHEDULER_STALE_SECONDS {
912                items.push(DiagItem::ok(
913                    cat,
914                    format!("scheduler healthy (last ok {scheduler_age}s ago)"),
915                ));
916            } else {
917                items.push(DiagItem::error(
918                    cat,
919                    format!("scheduler unhealthy (ok={scheduler_ok}, age={scheduler_age}s)"),
920                ));
921            }
922        } else {
923            items.push(DiagItem::warn(cat, "scheduler component not tracked yet"));
924        }
925
926        // Channels
927        let mut channel_count = 0u32;
928        let mut stale = 0u32;
929        for (name, component) in components {
930            if !name.starts_with("channel:") {
931                continue;
932            }
933            channel_count += 1;
934            let status_ok = component
935                .get("status")
936                .and_then(serde_json::Value::as_str)
937                .is_some_and(|s| s == "ok");
938            let age = component
939                .get("last_ok")
940                .and_then(serde_json::Value::as_str)
941                .and_then(parse_rfc3339)
942                .map_or(i64::MAX, |dt| {
943                    Utc::now().signed_duration_since(dt).num_seconds()
944                });
945
946            if status_ok && age <= CHANNEL_STALE_SECONDS {
947                items.push(DiagItem::ok(cat, format!("{name} fresh ({age}s ago)")));
948            } else {
949                stale += 1;
950                items.push(DiagItem::error(
951                    cat,
952                    format!("{name} stale (ok={status_ok}, age={age}s)"),
953                ));
954            }
955        }
956
957        if channel_count == 0 {
958            items.push(DiagItem::warn(cat, "no channel components tracked yet"));
959        } else if stale > 0 {
960            items.push(DiagItem::warn(
961                cat,
962                format!("{channel_count} channels, {stale} stale"),
963            ));
964        }
965    }
966}
967
968// ── Environment checks ───────────────────────────────────────────
969
970fn check_environment(items: &mut Vec<DiagItem>) {
971    let cat = "environment";
972
973    // git
974    check_command_available("git", &["--version"], cat, items);
975
976    // Shell — Unix uses $SHELL, Windows uses %ComSpec% (path to cmd.exe).
977    let shell = std::env::var("SHELL")
978        .ok()
979        .filter(|s| !s.is_empty())
980        .or_else(|| std::env::var("ComSpec").ok().filter(|s| !s.is_empty()));
981    match shell {
982        Some(s) => items.push(DiagItem::ok(cat, format!("shell: {s}"))),
983        None => items.push(DiagItem::warn(cat, "neither $SHELL nor %ComSpec% is set")),
984    }
985
986    // HOME
987    if std::env::var("HOME").is_ok() || std::env::var("USERPROFILE").is_ok() {
988        items.push(DiagItem::ok(cat, "home directory env set"));
989    } else {
990        items.push(DiagItem::error(
991            cat,
992            "neither $HOME nor $USERPROFILE is set",
993        ));
994    }
995
996    // Optional tools
997    check_command_available("curl", &["--version"], cat, items);
998}
999
1000fn check_cli_tools(items: &mut Vec<DiagItem>) {
1001    let cat = "cli-tools";
1002
1003    let discovered = crate::tools::discover_cli_tools(&[], &[]);
1004
1005    if discovered.is_empty() {
1006        items.push(DiagItem::warn(cat, "No CLI tools found in PATH"));
1007    } else {
1008        for cli in &discovered {
1009            let version_info = cli
1010                .version
1011                .as_deref()
1012                .map(|v| truncate_for_display(v, COMMAND_VERSION_PREVIEW_CHARS))
1013                .unwrap_or_else(|| "unknown version".to_string());
1014            items.push(DiagItem::ok(
1015                cat,
1016                format!("{} ({}) — {}", cli.name, cli.category, version_info),
1017            ));
1018        }
1019        items.push(DiagItem::ok(
1020            cat,
1021            format!("{} CLI tools discovered", discovered.len()),
1022        ));
1023    }
1024}
1025
1026fn check_command_available(cmd: &str, args: &[&str], cat: &'static str, items: &mut Vec<DiagItem>) {
1027    match std::process::Command::new(cmd)
1028        .args(args)
1029        .stdout(std::process::Stdio::piped())
1030        .stderr(std::process::Stdio::piped())
1031        .output()
1032    {
1033        Ok(output) if output.status.success() => {
1034            let ver = String::from_utf8_lossy(&output.stdout);
1035            let first_line = ver.lines().next().unwrap_or("").trim();
1036            let display = truncate_for_display(first_line, COMMAND_VERSION_PREVIEW_CHARS);
1037            items.push(DiagItem::ok(cat, format!("{cmd}: {display}")));
1038        }
1039        Ok(_) => {
1040            items.push(DiagItem::warn(
1041                cat,
1042                format!("{cmd} found but returned non-zero"),
1043            ));
1044        }
1045        Err(_) => {
1046            items.push(DiagItem::warn(cat, format!("{cmd} not found in PATH")));
1047        }
1048    }
1049}
1050
1051fn format_error_chain(error: &anyhow::Error) -> String {
1052    let mut parts = Vec::new();
1053    for cause in error.chain() {
1054        let message = cause.to_string();
1055        if !message.is_empty() {
1056            parts.push(message);
1057        }
1058    }
1059
1060    if parts.is_empty() {
1061        return String::new();
1062    }
1063
1064    parts.join(": ")
1065}
1066
1067fn truncate_for_display(input: &str, max_chars: usize) -> String {
1068    let mut chars = input.chars();
1069    let preview: String = chars.by_ref().take(max_chars).collect();
1070    if chars.next().is_some() {
1071        format!("{preview}…")
1072    } else {
1073        preview
1074    }
1075}
1076
1077// ── Helpers ──────────────────────────────────────────────────────
1078
1079fn parse_rfc3339(raw: &str) -> Option<DateTime<Utc>> {
1080    DateTime::parse_from_rfc3339(raw)
1081        .ok()
1082        .map(|dt| dt.with_timezone(&Utc))
1083}
1084
1085#[cfg(test)]
1086mod tests {
1087    use super::*;
1088    use tempfile::TempDir;
1089
1090    #[test]
1091    fn provider_validation_checks_custom_url_shape() {
1092        assert!(provider_validation_error("openrouter").is_none());
1093        assert!(provider_validation_error("custom:https://example.com").is_none());
1094        assert!(provider_validation_error("anthropic-custom:https://example.com").is_none());
1095
1096        let invalid_custom = provider_validation_error("custom:").unwrap_or_default();
1097        assert!(invalid_custom.contains("requires a URL"));
1098
1099        let invalid_unknown = provider_validation_error("totally-fake").unwrap_or_default();
1100        assert!(invalid_unknown.contains("Unknown model_provider"));
1101    }
1102
1103    #[test]
1104    fn diag_item_icons() {
1105        assert_eq!(DiagItem::ok("t", "m").icon(), "✅");
1106        assert_eq!(DiagItem::warn("t", "m").icon(), "⚠️ ");
1107        assert_eq!(DiagItem::error("t", "m").icon(), "❌");
1108    }
1109
1110    #[test]
1111    fn config_validation_catches_bad_temperature() {
1112        // Single model_provider entry with an out-of-range temperature so the
1113        // doctor's `first_model_provider()` lookup deterministically picks it
1114        // (HashMap iteration order is unspecified — multiple entries
1115        // produce a coin-flip first pick).
1116        let mut config = Config::default();
1117        config
1118            .providers
1119            .models
1120            .ensure("openrouter", "default")
1121            .expect("known model_provider type")
1122            .temperature = Some(5.0);
1123        let mut items = Vec::new();
1124        check_config_semantics(&config, &mut items);
1125        let temp_item = items.iter().find(|i| i.message.contains("temperature"));
1126        assert!(temp_item.is_some());
1127        assert_eq!(temp_item.unwrap().severity, Severity::Error);
1128    }
1129
1130    #[test]
1131    fn config_validation_accepts_valid_temperature() {
1132        let mut config = Config::default();
1133        config
1134            .providers
1135            .models
1136            .ensure("openrouter", "default")
1137            .expect("known model_provider type")
1138            .temperature = Some(0.7);
1139        let mut items = Vec::new();
1140        check_config_semantics(&config, &mut items);
1141        let temp_item = items.iter().find(|i| i.message.contains("temperature"));
1142        assert!(temp_item.is_some());
1143        assert_eq!(temp_item.unwrap().severity, Severity::Ok);
1144    }
1145
1146    #[test]
1147    fn config_validation_warns_no_channels() {
1148        let config = Config::default();
1149        let mut items = Vec::new();
1150        check_config_semantics(&config, &mut items);
1151        let ch_item = items.iter().find(|i| i.message.contains("channel"));
1152        assert!(ch_item.is_some());
1153        assert_eq!(ch_item.unwrap().severity, Severity::Warn);
1154    }
1155
1156    #[test]
1157    fn configured_model_provider_api_key_uses_alias_profile() {
1158        let mut config = Config::default();
1159        config
1160            .providers
1161            .models
1162            .ensure("custom", "local")
1163            .expect("known model_provider type")
1164            .api_key = Some("redacted-test-key".to_string());
1165
1166        assert_eq!(
1167            configured_model_provider_api_key(&config, "custom.local"),
1168            Some("redacted-test-key")
1169        );
1170        assert_eq!(configured_model_provider_api_key(&config, "custom"), None);
1171    }
1172
1173    #[test]
1174    fn doctor_model_provider_uses_alias_profile() {
1175        let mut config = Config::default();
1176        let profile = config
1177            .providers
1178            .models
1179            .ensure("custom", "local")
1180            .expect("known model_provider type");
1181        profile.api_key = Some("redacted-test-key".to_string());
1182        profile.uri = Some("https://models.example.test/v1".to_string());
1183
1184        if let Err(error) = create_doctor_model_provider(&config, "custom.local") {
1185            panic!("doctor model probe should build custom providers from alias config: {error}");
1186        }
1187    }
1188
1189    #[test]
1190    fn config_validation_catches_unknown_provider() {
1191        // Typed slots can only hold canonical family names, so an unknown
1192        // family can no longer reach `first_model_provider_type()`. The
1193        // remaining reachable path is `agent.model_provider`, which is a
1194        // free-form `String` an operator can set to any dotted ref.
1195        let mut config = Config::default();
1196        config.agents.insert(
1197            "broken".to_string(),
1198            zeroclaw_config::schema::AliasedAgentConfig {
1199                model_provider: "totally-fake.default".into(),
1200                risk_profile: "default".to_string(),
1201                ..Default::default()
1202            },
1203        );
1204        let mut items = Vec::new();
1205        check_config_semantics(&config, &mut items);
1206        let prov_item = items.iter().find(|i| {
1207            i.message
1208                .contains("agent \"broken\" uses invalid model_provider \"totally-fake\"")
1209        });
1210        assert!(
1211            prov_item.is_some(),
1212            "doctor should flag unknown agent model_provider"
1213        );
1214        assert_eq!(prov_item.unwrap().severity, Severity::Warn);
1215    }
1216
1217    // The pre-Phase-6 tests `config_validation_catches_malformed_custom_provider`
1218    // and `config_validation_accepts_custom_provider` are obsolete: the typed
1219    // ModelProviders container can't represent malformed `custom:` outer keys at
1220    // all. Custom-URL model_providers now live under the `custom` typed slot with the
1221    // operator-supplied URL in `base.uri`. The malformed-custom-key validator
1222    // path is unreachable.
1223
1224    #[test]
1225    fn config_validation_warns_empty_model_route() {
1226        let config = Config {
1227            model_routes: vec![zeroclaw_config::schema::ModelRouteConfig {
1228                hint: "fast".into(),
1229                model_provider: "groq".into(),
1230                model: String::new(),
1231                api_key: None,
1232            }],
1233            ..Config::default()
1234        };
1235        let mut items = Vec::new();
1236        check_config_semantics(&config, &mut items);
1237        let route_item = items.iter().find(|i| i.message.contains("empty model"));
1238        assert!(route_item.is_some());
1239        assert_eq!(route_item.unwrap().severity, Severity::Warn);
1240    }
1241
1242    #[test]
1243    fn config_validation_warns_empty_embedding_route_model() {
1244        let config = Config {
1245            embedding_routes: vec![zeroclaw_config::schema::EmbeddingRouteConfig {
1246                hint: "semantic".into(),
1247                model_provider: "openai".into(),
1248                model: String::new(),
1249                dimensions: Some(1536),
1250                api_key: None,
1251            }],
1252            ..Config::default()
1253        };
1254
1255        let mut items = Vec::new();
1256        check_config_semantics(&config, &mut items);
1257        let route_item = items.iter().find(|item| {
1258            item.message
1259                .contains("embedding route \"semantic\" has empty model")
1260        });
1261        assert!(route_item.is_some());
1262        assert_eq!(route_item.unwrap().severity, Severity::Warn);
1263    }
1264
1265    #[test]
1266    fn config_validation_warns_invalid_embedding_route_provider() {
1267        let config = Config {
1268            embedding_routes: vec![zeroclaw_config::schema::EmbeddingRouteConfig {
1269                hint: "semantic".into(),
1270                model_provider: "groq".into(),
1271                model: "text-embedding-3-small".into(),
1272                dimensions: None,
1273                api_key: None,
1274            }],
1275            ..Config::default()
1276        };
1277
1278        let mut items = Vec::new();
1279        check_config_semantics(&config, &mut items);
1280        let route_item = items.iter().find(|item| {
1281            item.message
1282                .contains("uses invalid model_provider \"groq\"")
1283        });
1284        assert!(route_item.is_some());
1285        assert_eq!(route_item.unwrap().severity, Severity::Warn);
1286    }
1287
1288    #[test]
1289    fn config_validation_warns_missing_embedding_hint_target() {
1290        let mut config = Config::default();
1291        config.memory.embedding_model = "hint:semantic".into();
1292
1293        let mut items = Vec::new();
1294        check_config_semantics(&config, &mut items);
1295        let route_item = items.iter().find(|item| {
1296            item.message
1297                .contains("no matching [[embedding_routes]] entry exists")
1298        });
1299        assert!(route_item.is_some());
1300        assert_eq!(route_item.unwrap().severity, Severity::Warn);
1301    }
1302
1303    #[test]
1304    fn environment_check_finds_git() {
1305        let mut items = Vec::new();
1306        check_environment(&mut items);
1307        let git_item = items.iter().find(|i| i.message.starts_with("git:"));
1308        // git should be available in any CI/dev environment
1309        assert!(git_item.is_some());
1310        assert_eq!(git_item.unwrap().severity, Severity::Ok);
1311    }
1312
1313    #[test]
1314    fn parse_df_available_mb_uses_last_data_line() {
1315        let stdout =
1316            "Filesystem 1M-blocks Used Available Use% Mounted on\n/dev/sda1 1000 500 500 50% /\n";
1317        assert_eq!(parse_df_available_mb(stdout), Some(500));
1318    }
1319
1320    #[test]
1321    fn truncate_for_display_preserves_utf8_boundaries() {
1322        let preview = truncate_for_display("🙂example-alpha-build", 3);
1323        assert_eq!(preview, "🙂ex…");
1324    }
1325
1326    #[test]
1327    fn workspace_probe_path_is_hidden_and_unique() {
1328        let tmp = TempDir::new().unwrap();
1329        let first = workspace_probe_path(tmp.path());
1330        let second = workspace_probe_path(tmp.path());
1331
1332        assert_ne!(first, second);
1333        assert!(
1334            first
1335                .file_name()
1336                .and_then(|name| name.to_str())
1337                .is_some_and(|name| name.starts_with(".zeroclaw_doctor_probe_"))
1338        );
1339    }
1340
1341    #[test]
1342    fn config_validation_reports_delegate_agents_in_sorted_order() {
1343        let mut config = Config::default();
1344        config.agents.insert(
1345            "zeta".into(),
1346            zeroclaw_config::schema::AliasedAgentConfig {
1347                model_provider: "totally-fake.default".into(),
1348                ..Default::default()
1349            },
1350        );
1351        config.agents.insert(
1352            "alpha".into(),
1353            zeroclaw_config::schema::AliasedAgentConfig {
1354                model_provider: "totally-fake.default".into(),
1355                ..Default::default()
1356            },
1357        );
1358
1359        let mut items = Vec::new();
1360        check_config_semantics(&config, &mut items);
1361
1362        let agent_messages: Vec<_> = items
1363            .iter()
1364            .filter(|item| item.message.starts_with("agent \""))
1365            .map(|item| item.message.as_str())
1366            .collect();
1367
1368        assert_eq!(agent_messages.len(), 2);
1369        assert!(agent_messages[0].contains("agent \"alpha\""));
1370        assert!(agent_messages[1].contains("agent \"zeta\""));
1371    }
1372}