zeroclaw_runtime/skills/
testing.rs

1use anyhow::{Context, Result};
2use regex::Regex;
3use std::path::{Path, PathBuf};
4use std::process::Command;
5
6const TEST_FILE_NAME: &str = "TEST.sh";
7
8/// Result of running all tests for a single skill.
9#[derive(Debug, Clone)]
10pub struct SkillTestResult {
11    pub skill_name: String,
12    pub tests_run: usize,
13    pub tests_passed: usize,
14    pub failures: Vec<TestFailure>,
15}
16
17/// Details about a single failed test case.
18#[derive(Debug, Clone)]
19pub struct TestFailure {
20    pub command: String,
21    pub expected_exit: i32,
22    pub actual_exit: i32,
23    pub expected_pattern: String,
24    pub actual_output: String,
25}
26
27/// A parsed test case from a TEST.sh line.
28#[derive(Debug, Clone)]
29struct TestCase {
30    command: String,
31    expected_exit: i32,
32    expected_pattern: String,
33}
34
35/// Parse a single TEST.sh line into a `TestCase`.
36///
37/// Expected format: `command | expected_exit_code | expected_output_pattern`
38fn parse_test_line(line: &str) -> Option<TestCase> {
39    let trimmed = line.trim();
40    if trimmed.is_empty() || trimmed.starts_with('#') {
41        return None;
42    }
43
44    // Split on ` | ` (pipe surrounded by spaces) to avoid splitting on shell
45    // pipes inside the command itself. Fall back to bare `|` splitting only if
46    // the line contains exactly two ` | ` delimiters.
47    let parts: Vec<&str> = trimmed.split(" | ").collect();
48    if parts.len() < 3 {
49        // Try splitting on `|` as fallback
50        let parts: Vec<&str> = trimmed.splitn(3, '|').collect();
51        if parts.len() < 3 {
52            return None;
53        }
54        let command = parts[0].trim().to_string();
55        let expected_exit = parts[1].trim().parse::<i32>().ok()?;
56        let expected_pattern = parts[2].trim().to_string();
57        return Some(TestCase {
58            command,
59            expected_exit,
60            expected_pattern,
61        });
62    }
63
64    let command = parts[0].trim().to_string();
65    let expected_exit = parts[1].trim().parse::<i32>().ok()?;
66    // Rejoin remaining parts in case the pattern itself contains ` | `
67    let expected_pattern = parts[2..].join(" | ").trim().to_string();
68
69    Some(TestCase {
70        command,
71        expected_exit,
72        expected_pattern,
73    })
74}
75
76/// Check whether `output` matches `pattern`.
77///
78/// If the pattern looks like a regex (contains regex metacharacters beyond a
79/// simple `/` path), we attempt a regex match. Otherwise we fall back to a
80/// simple substring check.
81fn pattern_matches(output: &str, pattern: &str) -> bool {
82    if pattern.is_empty() {
83        return true;
84    }
85    // Try regex first
86    if let Ok(re) = Regex::new(pattern)
87        && re.is_match(output)
88    {
89        return true;
90    }
91    // Fallback: substring match
92    output.contains(pattern)
93}
94
95/// Run a single test case and return a possible failure.
96fn run_test_case(case: &TestCase, skill_dir: &Path, verbose: bool) -> Option<TestFailure> {
97    if verbose {
98        println!("    running: {}", case.command);
99    }
100
101    let result = Command::new("sh")
102        .arg("-c")
103        .arg(&case.command)
104        .current_dir(skill_dir)
105        .output();
106
107    let output = match result {
108        Ok(o) => o,
109        Err(err) => {
110            return Some(TestFailure {
111                command: case.command.clone(),
112                expected_exit: case.expected_exit,
113                actual_exit: -1,
114                expected_pattern: case.expected_pattern.clone(),
115                actual_output: format!("failed to execute command: {err}"),
116            });
117        }
118    };
119
120    let actual_exit = output.status.code().unwrap_or(-1);
121    let stdout = String::from_utf8_lossy(&output.stdout);
122    let stderr = String::from_utf8_lossy(&output.stderr);
123    let combined = format!("{stdout}{stderr}");
124
125    if verbose {
126        if !stdout.is_empty() {
127            println!("    stdout: {}", stdout.trim());
128        }
129        if !stderr.is_empty() {
130            println!("    stderr: {}", stderr.trim());
131        }
132        println!("    exit: {actual_exit}");
133    }
134
135    let exit_ok = actual_exit == case.expected_exit;
136    let pattern_ok = pattern_matches(&combined, &case.expected_pattern);
137
138    if exit_ok && pattern_ok {
139        None
140    } else {
141        Some(TestFailure {
142            command: case.command.clone(),
143            expected_exit: case.expected_exit,
144            actual_exit,
145            expected_pattern: case.expected_pattern.clone(),
146            actual_output: combined.to_string(),
147        })
148    }
149}
150
151/// Test a single skill by parsing and running its TEST.sh.
152pub fn test_skill(skill_dir: &Path, skill_name: &str, verbose: bool) -> Result<SkillTestResult> {
153    let test_file = skill_dir.join(TEST_FILE_NAME);
154    if !test_file.exists() {
155        return Ok(SkillTestResult {
156            skill_name: skill_name.to_string(),
157            tests_run: 0,
158            tests_passed: 0,
159            failures: Vec::new(),
160        });
161    }
162
163    let content = std::fs::read_to_string(&test_file)
164        .with_context(|| format!("failed to read {}", test_file.display().to_string()))?;
165
166    let cases: Vec<TestCase> = content.lines().filter_map(parse_test_line).collect();
167
168    let mut result = SkillTestResult {
169        skill_name: skill_name.to_string(),
170        tests_run: cases.len(),
171        tests_passed: 0,
172        failures: Vec::new(),
173    };
174
175    for case in &cases {
176        match run_test_case(case, skill_dir, verbose) {
177            None => result.tests_passed += 1,
178            Some(failure) => result.failures.push(failure),
179        }
180    }
181
182    Ok(result)
183}
184
185/// Test all skills that have a TEST.sh file within the given skill directories.
186pub fn test_all_skills(skills_dirs: &[PathBuf], verbose: bool) -> Result<Vec<SkillTestResult>> {
187    let mut results = Vec::new();
188
189    for dir in skills_dirs {
190        if !dir.exists() || !dir.is_dir() {
191            continue;
192        }
193
194        let entries = std::fs::read_dir(dir)
195            .with_context(|| format!("failed to read directory {}", dir.display().to_string()))?;
196
197        for entry in entries.flatten() {
198            let path = entry.path();
199            if !path.is_dir() {
200                continue;
201            }
202            let test_file = path.join(TEST_FILE_NAME);
203            if !test_file.exists() {
204                continue;
205            }
206            let skill_name = path
207                .file_name()
208                .map(|n| n.to_string_lossy().to_string())
209                .unwrap_or_default();
210
211            if verbose {
212                println!(
213                    "  Testing skill: {} ({})",
214                    skill_name,
215                    path.display().to_string()
216                );
217            }
218
219            let r = test_skill(&path, &skill_name, verbose)?;
220            results.push(r);
221        }
222    }
223
224    Ok(results)
225}
226
227/// Pretty-print test results using the `console` crate.
228pub fn print_results(results: &[SkillTestResult]) {
229    if results.is_empty() {
230        println!("No skills with {} found.", TEST_FILE_NAME);
231        return;
232    }
233
234    println!();
235    for r in results {
236        if r.tests_run == 0 {
237            println!(
238                "  {} {} — no test cases",
239                console::style("-").dim(),
240                r.skill_name,
241            );
242            continue;
243        }
244
245        if r.failures.is_empty() {
246            println!(
247                "  {} {} — {}/{} passed",
248                console::style("✓").green().bold(),
249                console::style(&r.skill_name).white().bold(),
250                r.tests_passed,
251                r.tests_run,
252            );
253        } else {
254            println!(
255                "  {} {} — {}/{} passed",
256                console::style("✗").red().bold(),
257                console::style(&r.skill_name).white().bold(),
258                r.tests_passed,
259                r.tests_run,
260            );
261            for f in &r.failures {
262                println!("    command:  {}", console::style(&f.command).dim(),);
263                println!(
264                    "    expected: exit={}, pattern={}",
265                    f.expected_exit, f.expected_pattern,
266                );
267                println!(
268                    "    actual:   exit={}, output={}",
269                    f.actual_exit,
270                    truncate_output(&f.actual_output, 200),
271                );
272                println!();
273            }
274        }
275    }
276
277    let total_run: usize = results.iter().map(|r| r.tests_run).sum();
278    let total_passed: usize = results.iter().map(|r| r.tests_passed).sum();
279    let total_failed = total_run - total_passed;
280
281    println!();
282    if total_failed == 0 {
283        println!(
284            "  {} All {total_run} test(s) passed across {} skill(s).",
285            console::style("✓").green().bold(),
286            results.len(),
287        );
288    } else {
289        println!(
290            "  {} {total_failed} of {total_run} test(s) failed across {} skill(s).",
291            console::style("✗").red().bold(),
292            results.len(),
293        );
294    }
295    println!();
296}
297
298fn truncate_output(s: &str, max: usize) -> String {
299    let trimmed = s.trim();
300    if trimmed.len() <= max {
301        trimmed.replace('\n', " ")
302    } else {
303        format!("{}...", &trimmed[..max].replace('\n', " "))
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310    use std::fs;
311
312    #[test]
313    fn parse_comment_and_empty_lines() {
314        assert!(parse_test_line("").is_none());
315        assert!(parse_test_line("   ").is_none());
316        assert!(parse_test_line("# this is a comment").is_none());
317        assert!(parse_test_line("  # indented comment").is_none());
318    }
319
320    #[test]
321    fn parse_valid_test_line() {
322        let case = parse_test_line("echo hello | 0 | hello").unwrap();
323        assert_eq!(case.command, "echo hello");
324        assert_eq!(case.expected_exit, 0);
325        assert_eq!(case.expected_pattern, "hello");
326    }
327
328    #[test]
329    fn parse_line_with_spaces_in_pattern() {
330        let case = parse_test_line("echo 'hello world' | 0 | hello world").unwrap();
331        assert_eq!(case.command, "echo 'hello world'");
332        assert_eq!(case.expected_exit, 0);
333        assert_eq!(case.expected_pattern, "hello world");
334    }
335
336    #[test]
337    fn parse_invalid_line_missing_parts() {
338        assert!(parse_test_line("just a command").is_none());
339        assert!(parse_test_line("cmd | notanumber | pattern").is_none());
340    }
341
342    #[test]
343    fn pattern_matches_empty() {
344        assert!(pattern_matches("anything", ""));
345    }
346
347    #[test]
348    fn pattern_matches_substring() {
349        assert!(pattern_matches("hello world", "hello"));
350        assert!(pattern_matches("hello world", "world"));
351        assert!(!pattern_matches("hello world", "missing"));
352    }
353
354    #[test]
355    fn pattern_matches_regex() {
356        assert!(pattern_matches("hello world 42", r"world \d+"));
357        assert!(pattern_matches("/usr/bin/bash", r"/"));
358        assert!(!pattern_matches("hello", r"^\d+$"));
359    }
360
361    #[test]
362    fn test_skill_with_echo() {
363        let dir = tempfile::tempdir().unwrap();
364        let skill_dir = dir.path().join("echo-skill");
365        fs::create_dir_all(&skill_dir).unwrap();
366        fs::write(
367            skill_dir.join("TEST.sh"),
368            "# Echo test\necho hello | 0 | hello\n",
369        )
370        .unwrap();
371
372        let result = test_skill(&skill_dir, "echo-skill", false).unwrap();
373        assert_eq!(result.tests_run, 1);
374        assert_eq!(result.tests_passed, 1);
375        assert!(result.failures.is_empty());
376    }
377
378    #[test]
379    fn test_skill_without_test_file() {
380        let dir = tempfile::tempdir().unwrap();
381        let skill_dir = dir.path().join("no-tests");
382        fs::create_dir_all(&skill_dir).unwrap();
383
384        let result = test_skill(&skill_dir, "no-tests", false).unwrap();
385        assert_eq!(result.tests_run, 0);
386        assert_eq!(result.tests_passed, 0);
387        assert!(result.failures.is_empty());
388    }
389
390    #[test]
391    fn test_skill_with_failing_test() {
392        let dir = tempfile::tempdir().unwrap();
393        let skill_dir = dir.path().join("fail-skill");
394        fs::create_dir_all(&skill_dir).unwrap();
395        fs::write(skill_dir.join("TEST.sh"), "echo hello | 1 | goodbye\n").unwrap();
396
397        let result = test_skill(&skill_dir, "fail-skill", false).unwrap();
398        assert_eq!(result.tests_run, 1);
399        assert_eq!(result.tests_passed, 0);
400        assert_eq!(result.failures.len(), 1);
401        assert_eq!(result.failures[0].expected_exit, 1);
402        assert_eq!(result.failures[0].actual_exit, 0);
403    }
404
405    #[test]
406    fn test_skill_exit_code_mismatch() {
407        let dir = tempfile::tempdir().unwrap();
408        let skill_dir = dir.path().join("exit-mismatch");
409        fs::create_dir_all(&skill_dir).unwrap();
410        fs::write(skill_dir.join("TEST.sh"), "false | 0 | \n").unwrap();
411
412        let result = test_skill(&skill_dir, "exit-mismatch", false).unwrap();
413        assert_eq!(result.tests_run, 1);
414        assert_eq!(result.tests_passed, 0);
415        assert_eq!(result.failures[0].actual_exit, 1);
416    }
417
418    #[test]
419    fn test_result_aggregation() {
420        let results = [
421            SkillTestResult {
422                skill_name: "a".to_string(),
423                tests_run: 3,
424                tests_passed: 3,
425                failures: Vec::new(),
426            },
427            SkillTestResult {
428                skill_name: "b".to_string(),
429                tests_run: 2,
430                tests_passed: 1,
431                failures: vec![TestFailure {
432                    command: "false".to_string(),
433                    expected_exit: 0,
434                    actual_exit: 1,
435                    expected_pattern: String::new(),
436                    actual_output: String::new(),
437                }],
438            },
439        ];
440
441        let total_run: usize = results.iter().map(|r| r.tests_run).sum();
442        let total_passed: usize = results.iter().map(|r| r.tests_passed).sum();
443        assert_eq!(total_run, 5);
444        assert_eq!(total_passed, 4);
445    }
446
447    #[test]
448    fn test_all_skills_finds_skills_with_tests() {
449        let dir = tempfile::tempdir().unwrap();
450        let skills_dir = dir.path().join("skills");
451
452        // Skill with TEST.sh
453        let skill_a = skills_dir.join("skill-a");
454        fs::create_dir_all(&skill_a).unwrap();
455        fs::write(skill_a.join("TEST.sh"), "echo ok | 0 | ok\n").unwrap();
456
457        // Skill without TEST.sh — should be skipped
458        let skill_b = skills_dir.join("skill-b");
459        fs::create_dir_all(&skill_b).unwrap();
460
461        let results = test_all_skills(std::slice::from_ref(&skills_dir), false).unwrap();
462        assert_eq!(results.len(), 1);
463        assert_eq!(results[0].skill_name, "skill-a");
464        assert_eq!(results[0].tests_passed, 1);
465    }
466
467    #[test]
468    fn test_truncate_output() {
469        assert_eq!(truncate_output("short", 100), "short");
470        let long = "a".repeat(300);
471        let truncated = truncate_output(&long, 200);
472        assert!(truncated.ends_with("..."));
473        assert!(truncated.len() <= 204); // 200 + "..."
474    }
475}
zeroclaw_runtime/skills/testing.rs

zeroclaw_runtime/skills/
testing.rs