Skip to main content

zeroclaw_runtime/skillforge/
scout.rs

1//! Scout — skill discovery from external sources.
2
3use anyhow::Result;
4use async_trait::async_trait;
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8// ---------------------------------------------------------------------------
9// ScoutSource
10// ---------------------------------------------------------------------------
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
13pub enum ScoutSource {
14    GitHub,
15    ClawHub,
16    HuggingFace,
17}
18
19impl std::str::FromStr for ScoutSource {
20    type Err = std::convert::Infallible;
21
22    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
23        Ok(match s.to_lowercase().as_str() {
24            "github" => Self::GitHub,
25            "clawhub" => Self::ClawHub,
26            "huggingface" | "hf" => Self::HuggingFace,
27            _ => {
28                ::zeroclaw_log::record!(
29                    WARN,
30                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
31                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
32                        .with_attrs(::serde_json::json!({"source": s})),
33                    "Unknown scout source, defaulting to GitHub"
34                );
35                Self::GitHub
36            }
37        })
38    }
39}
40
41// ---------------------------------------------------------------------------
42// ScoutResult
43// ---------------------------------------------------------------------------
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct ScoutResult {
47    pub name: String,
48    pub url: String,
49    pub description: String,
50    pub stars: u64,
51    pub language: Option<String>,
52    pub updated_at: Option<DateTime<Utc>>,
53    pub source: ScoutSource,
54    /// Owner / org extracted from the URL or API response.
55    pub owner: String,
56    /// Whether the repo has a license file.
57    pub has_license: bool,
58}
59
60// ---------------------------------------------------------------------------
61// Scout trait
62// ---------------------------------------------------------------------------
63
64#[async_trait]
65pub trait Scout: Send + Sync {
66    /// Discover candidate skills from the source.
67    async fn discover(&self) -> Result<Vec<ScoutResult>>;
68}
69
70// ---------------------------------------------------------------------------
71// GitHubScout
72// ---------------------------------------------------------------------------
73
74/// Searches GitHub for repos matching skill-related queries.
75pub struct GitHubScout {
76    client: reqwest::Client,
77    queries: Vec<String>,
78}
79
80impl GitHubScout {
81    pub fn new(token: Option<String>) -> Self {
82        use std::time::Duration;
83
84        let mut headers = reqwest::header::HeaderMap::new();
85        headers.insert(
86            reqwest::header::ACCEPT,
87            "application/vnd.github+json".parse().expect("valid header"),
88        );
89        headers.insert(
90            reqwest::header::USER_AGENT,
91            "ZeroClaw-SkillForge/0.1".parse().expect("valid header"),
92        );
93        if let Some(ref t) = token
94            && let Ok(val) = format!("Bearer {t}").parse()
95        {
96            headers.insert(reqwest::header::AUTHORIZATION, val);
97        }
98
99        let client = reqwest::Client::builder()
100            .default_headers(headers)
101            .timeout(Duration::from_secs(30))
102            .build()
103            .expect("failed to build reqwest client");
104
105        Self {
106            client,
107            queries: vec!["zeroclaw skill".into(), "ai agent skill".into()],
108        }
109    }
110
111    /// Parse the GitHub search/repositories JSON response.
112    fn parse_items(body: &serde_json::Value) -> Vec<ScoutResult> {
113        let items = match body.get("items").and_then(|v| v.as_array()) {
114            Some(arr) => arr,
115            None => return vec![],
116        };
117
118        items
119            .iter()
120            .filter_map(|item| {
121                let name = item.get("name")?.as_str()?.to_string();
122                let url = item.get("html_url")?.as_str()?.to_string();
123                let description = item
124                    .get("description")
125                    .and_then(|v| v.as_str())
126                    .unwrap_or("")
127                    .to_string();
128                let stars = item
129                    .get("stargazers_count")
130                    .and_then(|v| v.as_u64())
131                    .unwrap_or(0);
132                let language = item
133                    .get("language")
134                    .and_then(|v| v.as_str())
135                    .map(String::from);
136                let updated_at = item
137                    .get("updated_at")
138                    .and_then(|v| v.as_str())
139                    .and_then(|s| s.parse::<DateTime<Utc>>().ok());
140                let owner = item
141                    .get("owner")
142                    .and_then(|o| o.get("login"))
143                    .and_then(|v| v.as_str())
144                    .unwrap_or("unknown")
145                    .to_string();
146                let has_license = item.get("license").map(|v| !v.is_null()).unwrap_or(false);
147
148                Some(ScoutResult {
149                    name,
150                    url,
151                    description,
152                    stars,
153                    language,
154                    updated_at,
155                    source: ScoutSource::GitHub,
156                    owner,
157                    has_license,
158                })
159            })
160            .collect()
161    }
162}
163
164#[async_trait]
165impl Scout for GitHubScout {
166    async fn discover(&self) -> Result<Vec<ScoutResult>> {
167        let mut all: Vec<ScoutResult> = Vec::new();
168
169        for query in &self.queries {
170            let url = format!(
171                "https://api.github.com/search/repositories?q={}&sort=stars&order=desc&per_page=30",
172                urlencoding(query)
173            );
174            ::zeroclaw_log::record!(
175                DEBUG,
176                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
177                    .with_attrs(::serde_json::json!({"query": query.as_str()})),
178                "Searching GitHub"
179            );
180
181            let resp = match self.client.get(&url).send().await {
182                Ok(r) => r,
183                Err(e) => {
184                    ::zeroclaw_log::record!(WARN, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_outcome(::zeroclaw_log::EventOutcome::Unknown).with_attrs(::serde_json::json!({"query": query.as_str(), "error": format!("{}", e)})), "GitHub API request failed, skipping query");
185                    continue;
186                }
187            };
188
189            if !resp.status().is_success() {
190                ::zeroclaw_log::record!(WARN, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_outcome(::zeroclaw_log::EventOutcome::Unknown).with_attrs(::serde_json::json!({"status": resp.status().to_string(), "query": query.as_str()})), "GitHub search returned non-200");
191                continue;
192            }
193
194            let body: serde_json::Value = match resp.json().await {
195                Ok(v) => v,
196                Err(e) => {
197                    ::zeroclaw_log::record!(WARN, ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note).with_outcome(::zeroclaw_log::EventOutcome::Unknown).with_attrs(::serde_json::json!({"query": query.as_str(), "error": format!("{}", e)})), "Failed to parse GitHub response, skipping query");
198                    continue;
199                }
200            };
201
202            let mut items = Self::parse_items(&body);
203            ::zeroclaw_log::record!(
204                DEBUG,
205                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
206                    .with_attrs(
207                        ::serde_json::json!({"count": items.len(), "query": query.as_str()})
208                    ),
209                "Parsed items"
210            );
211            all.append(&mut items);
212        }
213
214        dedup(&mut all);
215        Ok(all)
216    }
217}
218
219// ---------------------------------------------------------------------------
220// Helpers
221// ---------------------------------------------------------------------------
222
223/// Minimal percent-encoding for query strings (space → +).
224fn urlencoding(s: &str) -> String {
225    s.replace(' ', "+").replace('&', "%26").replace('#', "%23")
226}
227
228/// Deduplicate scout results by URL (keeps first occurrence).
229pub fn dedup(results: &mut Vec<ScoutResult>) {
230    let mut seen = std::collections::HashSet::new();
231    results.retain(|r| seen.insert(r.url.clone()));
232}
233
234// ---------------------------------------------------------------------------
235// Tests
236// ---------------------------------------------------------------------------
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn scout_source_from_str() {
244        assert_eq!(
245            "github".parse::<ScoutSource>().unwrap(),
246            ScoutSource::GitHub
247        );
248        assert_eq!(
249            "GitHub".parse::<ScoutSource>().unwrap(),
250            ScoutSource::GitHub
251        );
252        assert_eq!(
253            "clawhub".parse::<ScoutSource>().unwrap(),
254            ScoutSource::ClawHub
255        );
256        assert_eq!(
257            "huggingface".parse::<ScoutSource>().unwrap(),
258            ScoutSource::HuggingFace
259        );
260        assert_eq!(
261            "hf".parse::<ScoutSource>().unwrap(),
262            ScoutSource::HuggingFace
263        );
264        // unknown falls back to GitHub
265        assert_eq!(
266            "unknown".parse::<ScoutSource>().unwrap(),
267            ScoutSource::GitHub
268        );
269    }
270
271    #[test]
272    fn dedup_removes_duplicates() {
273        let mut results = vec![
274            ScoutResult {
275                name: "a".into(),
276                url: "https://github.com/x/a".into(),
277                description: String::new(),
278                stars: 10,
279                language: None,
280                updated_at: None,
281                source: ScoutSource::GitHub,
282                owner: "x".into(),
283                has_license: true,
284            },
285            ScoutResult {
286                name: "a-dup".into(),
287                url: "https://github.com/x/a".into(),
288                description: String::new(),
289                stars: 10,
290                language: None,
291                updated_at: None,
292                source: ScoutSource::GitHub,
293                owner: "x".into(),
294                has_license: true,
295            },
296            ScoutResult {
297                name: "b".into(),
298                url: "https://github.com/x/b".into(),
299                description: String::new(),
300                stars: 5,
301                language: None,
302                updated_at: None,
303                source: ScoutSource::GitHub,
304                owner: "x".into(),
305                has_license: false,
306            },
307        ];
308        dedup(&mut results);
309        assert_eq!(results.len(), 2);
310        assert_eq!(results[0].name, "a");
311        assert_eq!(results[1].name, "b");
312    }
313
314    #[test]
315    fn parse_github_items() {
316        let json = serde_json::json!({
317            "total_count": 1,
318            "items": [
319                {
320                    "name": "cool-skill",
321                    "html_url": "https://github.com/user/cool-skill",
322                    "description": "A cool skill",
323                    "stargazers_count": 42,
324                    "language": "Rust",
325                    "updated_at": "2026-01-15T10:00:00Z",
326                    "owner": { "login": "user" },
327                    "license": { "spdx_id": "MIT" }
328                }
329            ]
330        });
331        let items = GitHubScout::parse_items(&json);
332        assert_eq!(items.len(), 1);
333        assert_eq!(items[0].name, "cool-skill");
334        assert_eq!(items[0].stars, 42);
335        assert!(items[0].has_license);
336        assert_eq!(items[0].owner, "user");
337    }
338
339    #[test]
340    fn urlencoding_works() {
341        assert_eq!(urlencoding("hello world"), "hello+world");
342        assert_eq!(urlencoding("a&b#c"), "a%26b%23c");
343    }
344}