Skip to main content

zeroclaw_tools/
text_browser.rs

1use async_trait::async_trait;
2use serde_json::json;
3use std::sync::Arc;
4use std::time::Duration;
5use zeroclaw_api::tool::{Tool, ToolResult};
6use zeroclaw_config::policy::SecurityPolicy;
7
8/// Text browser tool: renders web pages as plain text using text-based browsers
9/// (lynx, links, w3m). Ideal for headless/SSH environments where graphical
10/// browsers are unavailable.
11pub struct TextBrowserTool {
12    security: Arc<SecurityPolicy>,
13    preferred_browser: Option<String>,
14    timeout_secs: u64,
15    max_response_size: usize,
16}
17
18/// The text browsers we support, in order of auto-detection preference.
19const SUPPORTED_BROWSERS: &[&str] = &["lynx", "links", "w3m"];
20
21impl TextBrowserTool {
22    pub fn new(
23        security: Arc<SecurityPolicy>,
24        preferred_browser: Option<String>,
25        timeout_secs: u64,
26    ) -> Self {
27        Self {
28            security,
29            preferred_browser,
30            timeout_secs,
31            max_response_size: 500_000, // 500KB, consistent with web_fetch
32        }
33    }
34
35    fn validate_url(url: &str) -> anyhow::Result<String> {
36        let url = url.trim();
37
38        if url.is_empty() {
39            anyhow::bail!("URL cannot be empty");
40        }
41
42        if url.chars().any(char::is_whitespace) {
43            anyhow::bail!("URL cannot contain whitespace");
44        }
45
46        if !url.starts_with("http://") && !url.starts_with("https://") {
47            anyhow::bail!("Only http:// and https:// URLs are allowed");
48        }
49
50        Ok(url.to_string())
51    }
52
53    fn truncate_response(&self, text: &str) -> String {
54        if text.len() > self.max_response_size {
55            let mut truncated = text
56                .chars()
57                .take(self.max_response_size)
58                .collect::<String>();
59            truncated.push_str("\n\n... [Response truncated due to size limit] ...");
60            truncated
61        } else {
62            text.to_string()
63        }
64    }
65
66    /// Detect which text browser is available on the system.
67    async fn detect_browser() -> Option<String> {
68        for browser in SUPPORTED_BROWSERS {
69            if let Ok(output) = tokio::process::Command::new("which")
70                .arg(browser)
71                .output()
72                .await
73                && output.status.success()
74            {
75                return Some((*browser).to_string());
76            }
77        }
78        None
79    }
80
81    /// Resolve which browser to use: prefer configured, then auto-detect.
82    async fn resolve_browser(&self, requested: Option<&str>) -> anyhow::Result<String> {
83        // If the caller explicitly requested a browser via the tool parameter, use it.
84        if let Some(browser) = requested {
85            let browser = browser.trim().to_lowercase();
86            if !SUPPORTED_BROWSERS.contains(&browser.as_str()) {
87                anyhow::bail!(
88                    "Unsupported text browser '{browser}'. Supported: {}",
89                    SUPPORTED_BROWSERS.join(", ")
90                );
91            }
92            // Verify it's installed
93            let installed = tokio::process::Command::new("which")
94                .arg(&browser)
95                .output()
96                .await
97                .map(|o| o.status.success())
98                .unwrap_or(false);
99            if !installed {
100                anyhow::bail!("Requested text browser '{browser}' is not installed");
101            }
102            return Ok(browser);
103        }
104
105        // If a preferred browser is set in config, try it first.
106        if let Some(ref preferred) = self.preferred_browser {
107            let preferred = preferred.trim().to_lowercase();
108            if SUPPORTED_BROWSERS.contains(&preferred.as_str()) {
109                let installed = tokio::process::Command::new("which")
110                    .arg(&preferred)
111                    .output()
112                    .await
113                    .map(|o| o.status.success())
114                    .unwrap_or(false);
115                if installed {
116                    return Ok(preferred);
117                }
118                ::zeroclaw_log::record!(
119                    WARN,
120                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
121                        .with_outcome(::zeroclaw_log::EventOutcome::Unknown)
122                        .with_attrs(::serde_json::json!({"preferred": preferred})),
123                    "Configured preferred text browser '' is not installed, falling back to auto-detect"
124                );
125            }
126        }
127
128        // Auto-detect
129        Self::detect_browser().await.ok_or_else(|| {
130            let supported = SUPPORTED_BROWSERS.join(", ");
131            ::zeroclaw_log::record!(
132                ERROR,
133                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
134                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
135                    .with_attrs(::serde_json::json!({"supported": &supported})),
136                "text_browser: no text browser installed"
137            );
138            anyhow::Error::msg(format!(
139                "No text browser found. Install one of: {supported}"
140            ))
141        })
142    }
143
144    /// Build the command arguments for the selected browser with `-dump` flag.
145    fn build_dump_args(_browser: &str, url: &str) -> Vec<String> {
146        // All supported browsers (lynx, links, w3m) use the same `-dump` flag
147        vec!["-dump".to_string(), url.to_string()]
148    }
149}
150
151#[async_trait]
152impl Tool for TextBrowserTool {
153    fn name(&self) -> &str {
154        "text_browser"
155    }
156
157    fn description(&self) -> &str {
158        "Render a web page as plain text using a text-based browser (lynx, links, or w3m). \
159         Ideal for headless/SSH environments without a graphical browser. \
160         Auto-detects available browser or uses a configured preference."
161    }
162
163    fn parameters_schema(&self) -> serde_json::Value {
164        json!({
165            "type": "object",
166            "properties": {
167                "url": {
168                    "type": "string",
169                    "description": "The HTTP or HTTPS URL to render as plain text"
170                },
171                "browser": {
172                    "type": "string",
173                    "description": "Text browser to use: \"lynx\", \"links\", or \"w3m\". If omitted, auto-detects an available browser.",
174                    "enum": ["lynx", "links", "w3m"]
175                }
176            },
177            "required": ["url"]
178        })
179    }
180
181    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
182        let url = args.get("url").and_then(|v| v.as_str()).ok_or_else(|| {
183            ::zeroclaw_log::record!(
184                WARN,
185                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
186                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
187                    .with_attrs(::serde_json::json!({"param": "url"})),
188                "text_browser: missing url parameter"
189            );
190            anyhow::Error::msg("Missing 'url' parameter")
191        })?;
192
193        if !self.security.can_act() {
194            return Ok(ToolResult {
195                success: false,
196                output: String::new(),
197                error: Some("Action blocked: autonomy is read-only".into()),
198            });
199        }
200
201        if !self.security.record_action() {
202            return Ok(ToolResult {
203                success: false,
204                output: String::new(),
205                error: Some("Action blocked: rate limit exceeded".into()),
206            });
207        }
208
209        let url = match Self::validate_url(url) {
210            Ok(v) => v,
211            Err(e) => {
212                return Ok(ToolResult {
213                    success: false,
214                    output: String::new(),
215                    error: Some(e.to_string()),
216                });
217            }
218        };
219
220        let requested_browser = args.get("browser").and_then(|v| v.as_str());
221
222        let browser = match self.resolve_browser(requested_browser).await {
223            Ok(b) => b,
224            Err(e) => {
225                return Ok(ToolResult {
226                    success: false,
227                    output: String::new(),
228                    error: Some(e.to_string()),
229                });
230            }
231        };
232
233        let dump_args = Self::build_dump_args(&browser, &url);
234
235        let timeout = Duration::from_secs(if self.timeout_secs == 0 {
236            ::zeroclaw_log::record!(
237                WARN,
238                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
239                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
240                "text_browser: timeout_secs is 0, using safe default of 30s"
241            );
242            30
243        } else {
244            self.timeout_secs
245        });
246
247        let result = tokio::time::timeout(
248            timeout,
249            tokio::process::Command::new(&browser)
250                .args(&dump_args)
251                .output(),
252        )
253        .await;
254
255        match result {
256            Ok(Ok(output)) => {
257                if output.status.success() {
258                    let text = String::from_utf8_lossy(&output.stdout).into_owned();
259                    let text = self.truncate_response(&text);
260                    Ok(ToolResult {
261                        success: true,
262                        output: text,
263                        error: None,
264                    })
265                } else {
266                    let stderr = String::from_utf8_lossy(&output.stderr);
267                    Ok(ToolResult {
268                        success: false,
269                        output: String::new(),
270                        error: Some(format!(
271                            "{browser} exited with status {}: {}",
272                            output.status,
273                            stderr.trim()
274                        )),
275                    })
276                }
277            }
278            Ok(Err(e)) => Ok(ToolResult {
279                success: false,
280                output: String::new(),
281                error: Some(format!("Failed to execute {browser}: {e}")),
282            }),
283            Err(_) => Ok(ToolResult {
284                success: false,
285                output: String::new(),
286                error: Some(format!(
287                    "{browser} timed out after {} seconds",
288                    timeout.as_secs()
289                )),
290            }),
291        }
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use zeroclaw_config::autonomy::AutonomyLevel;
299    use zeroclaw_config::policy::SecurityPolicy;
300
301    fn test_tool() -> TextBrowserTool {
302        let security = Arc::new(SecurityPolicy {
303            autonomy: AutonomyLevel::Supervised,
304            ..SecurityPolicy::default()
305        });
306        TextBrowserTool::new(security, None, 30)
307    }
308
309    #[test]
310    fn name_is_text_browser() {
311        let tool = test_tool();
312        assert_eq!(tool.name(), "text_browser");
313    }
314
315    #[test]
316    fn parameters_schema_requires_url() {
317        let tool = test_tool();
318        let schema = tool.parameters_schema();
319        assert!(schema["properties"]["url"].is_object());
320        let required = schema["required"].as_array().unwrap();
321        assert!(required.iter().any(|v| v.as_str() == Some("url")));
322    }
323
324    #[test]
325    fn parameters_schema_has_optional_browser() {
326        let tool = test_tool();
327        let schema = tool.parameters_schema();
328        assert!(schema["properties"]["browser"].is_object());
329        let required = schema["required"].as_array().unwrap();
330        assert!(!required.iter().any(|v| v.as_str() == Some("browser")));
331    }
332
333    #[test]
334    fn validate_url_accepts_http() {
335        let got = TextBrowserTool::validate_url("http://example.com/page").unwrap();
336        assert_eq!(got, "http://example.com/page");
337    }
338
339    #[test]
340    fn validate_url_accepts_https() {
341        let got = TextBrowserTool::validate_url("https://example.com/page").unwrap();
342        assert_eq!(got, "https://example.com/page");
343    }
344
345    #[test]
346    fn validate_url_rejects_empty() {
347        let err = TextBrowserTool::validate_url("").unwrap_err().to_string();
348        assert!(err.contains("empty"));
349    }
350
351    #[test]
352    fn validate_url_rejects_ftp() {
353        let err = TextBrowserTool::validate_url("ftp://example.com")
354            .unwrap_err()
355            .to_string();
356        assert!(err.contains("http://") || err.contains("https://"));
357    }
358
359    #[test]
360    fn validate_url_rejects_whitespace() {
361        let err = TextBrowserTool::validate_url("https://example.com/hello world")
362            .unwrap_err()
363            .to_string();
364        assert!(err.contains("whitespace"));
365    }
366
367    #[test]
368    fn truncate_within_limit() {
369        let tool = test_tool();
370        let text = "hello world";
371        assert_eq!(tool.truncate_response(text), "hello world");
372    }
373
374    #[test]
375    fn truncate_over_limit() {
376        let security = Arc::new(SecurityPolicy::default());
377        let mut tool = TextBrowserTool::new(security, None, 30);
378        tool.max_response_size = 10;
379        let text = "hello world this is long";
380        let truncated = tool.truncate_response(text);
381        assert!(truncated.contains("[Response truncated"));
382    }
383
384    #[test]
385    fn build_dump_args_lynx() {
386        let args = TextBrowserTool::build_dump_args("lynx", "https://example.com");
387        assert_eq!(args, vec!["-dump", "https://example.com"]);
388    }
389
390    #[test]
391    fn build_dump_args_links() {
392        let args = TextBrowserTool::build_dump_args("links", "https://example.com");
393        assert_eq!(args, vec!["-dump", "https://example.com"]);
394    }
395
396    #[test]
397    fn build_dump_args_w3m() {
398        let args = TextBrowserTool::build_dump_args("w3m", "https://example.com");
399        assert_eq!(args, vec!["-dump", "https://example.com"]);
400    }
401
402    #[tokio::test]
403    async fn blocks_readonly_mode() {
404        let security = Arc::new(SecurityPolicy {
405            autonomy: AutonomyLevel::ReadOnly,
406            ..SecurityPolicy::default()
407        });
408        let tool = TextBrowserTool::new(security, None, 30);
409        let result = tool
410            .execute(json!({"url": "https://example.com"}))
411            .await
412            .unwrap();
413        assert!(!result.success);
414        assert!(result.error.unwrap().contains("read-only"));
415    }
416
417    #[tokio::test]
418    async fn blocks_rate_limited() {
419        let security = Arc::new(SecurityPolicy {
420            max_actions_per_hour: 0,
421            ..SecurityPolicy::default()
422        });
423        let tool = TextBrowserTool::new(security, None, 30);
424        let result = tool
425            .execute(json!({"url": "https://example.com"}))
426            .await
427            .unwrap();
428        assert!(!result.success);
429        assert!(result.error.unwrap().contains("rate limit"));
430    }
431}