1use anyhow::Context;
9use async_trait::async_trait;
10use serde::{Deserialize, Serialize};
11use serde_json::{Value, json};
12use std::net::ToSocketAddrs;
13use std::process::Stdio;
14use std::sync::Arc;
15use std::time::Duration;
16use tokio::process::Command;
17use zeroclaw_api::tool::{Tool, ToolResult};
18use zeroclaw_config::policy::SecurityPolicy;
19
20#[derive(Clone)]
22pub struct ComputerUseConfig {
23 pub endpoint: String,
24 pub api_key: Option<String>,
25 pub timeout_ms: u64,
26 pub allow_remote_endpoint: bool,
27 pub window_allowlist: Vec<String>,
28 pub max_coordinate_x: Option<i64>,
29 pub max_coordinate_y: Option<i64>,
30}
31
32impl std::fmt::Debug for ComputerUseConfig {
33 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34 f.debug_struct("ComputerUseConfig")
35 .field("endpoint", &self.endpoint)
36 .field("timeout_ms", &self.timeout_ms)
37 .field("allow_remote_endpoint", &self.allow_remote_endpoint)
38 .field("window_allowlist", &self.window_allowlist)
39 .field("max_coordinate_x", &self.max_coordinate_x)
40 .field("max_coordinate_y", &self.max_coordinate_y)
41 .finish_non_exhaustive()
42 }
43}
44
45impl Default for ComputerUseConfig {
46 fn default() -> Self {
47 Self {
48 endpoint: "http://127.0.0.1:8787/v1/actions".into(),
49 api_key: None,
50 timeout_ms: 15_000,
51 allow_remote_endpoint: false,
52 window_allowlist: Vec::new(),
53 max_coordinate_x: None,
54 max_coordinate_y: None,
55 }
56 }
57}
58
59pub struct BrowserTool {
61 security: Arc<SecurityPolicy>,
62 allowed_domains: Vec<String>,
63 session_name: Option<String>,
64 backend: String,
65 headed: Option<bool>,
66 #[allow(dead_code)] native_headless: bool,
68 #[allow(dead_code)]
69 native_webdriver_url: String,
70 #[allow(dead_code)]
71 native_chrome_path: Option<String>,
72 computer_use: ComputerUseConfig,
73 #[cfg(feature = "browser-native")]
74 native_state: tokio::sync::Mutex<native_backend::NativeBrowserState>,
75}
76
77#[derive(Debug, Clone, Copy, PartialEq, Eq)]
78enum BrowserBackendKind {
79 AgentBrowser,
80 RustNative,
81 ComputerUse,
82 Auto,
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86enum ResolvedBackend {
87 AgentBrowser,
88 RustNative,
89 ComputerUse,
90}
91
92impl BrowserBackendKind {
93 fn parse(raw: &str) -> anyhow::Result<Self> {
94 let key = raw.trim().to_ascii_lowercase().replace('-', "_");
95 match key.as_str() {
96 "agent_browser" | "agentbrowser" => Ok(Self::AgentBrowser),
97 "rust_native" | "native" => Ok(Self::RustNative),
98 "computer_use" | "computeruse" => Ok(Self::ComputerUse),
99 "auto" => Ok(Self::Auto),
100 _ => anyhow::bail!(
101 "Unsupported browser backend '{raw}'. Use 'agent_browser', 'rust_native', 'computer_use', or 'auto'"
102 ),
103 }
104 }
105
106 fn as_str(self) -> &'static str {
107 match self {
108 Self::AgentBrowser => "agent_browser",
109 Self::RustNative => "rust_native",
110 Self::ComputerUse => "computer_use",
111 Self::Auto => "auto",
112 }
113 }
114}
115
116#[derive(Debug, Deserialize)]
118struct AgentBrowserResponse {
119 success: bool,
120 data: Option<Value>,
121 error: Option<String>,
122}
123
124#[derive(Debug, Deserialize)]
126struct ComputerUseResponse {
127 #[serde(default)]
128 success: Option<bool>,
129 #[serde(default)]
130 data: Option<Value>,
131 #[serde(default)]
132 error: Option<String>,
133}
134
135#[derive(Debug, Clone, Serialize, Deserialize)]
137#[serde(rename_all = "snake_case")]
138pub enum BrowserAction {
139 Open { url: String },
141 Snapshot {
143 #[serde(default)]
144 interactive_only: bool,
145 #[serde(default)]
146 compact: bool,
147 #[serde(default)]
148 depth: Option<u32>,
149 },
150 Click { selector: String },
152 Fill { selector: String, value: String },
154 Type { selector: String, text: String },
156 GetText { selector: String },
158 GetTitle,
160 GetUrl,
162 Screenshot {
164 #[serde(default)]
165 path: Option<String>,
166 #[serde(default)]
167 full_page: bool,
168 },
169 Wait {
171 #[serde(default)]
172 selector: Option<String>,
173 #[serde(default)]
174 ms: Option<u64>,
175 #[serde(default)]
176 text: Option<String>,
177 },
178 Press { key: String },
180 Hover { selector: String },
182 Scroll {
184 direction: String,
185 #[serde(default)]
186 pixels: Option<u32>,
187 },
188 IsVisible { selector: String },
190 Close,
192 Find {
194 by: String, value: String,
196 action: String, #[serde(default)]
198 fill_value: Option<String>,
199 },
200}
201
202impl BrowserTool {
203 pub fn new(
204 security: Arc<SecurityPolicy>,
205 allowed_domains: Vec<String>,
206 session_name: Option<String>,
207 ) -> anyhow::Result<Self> {
208 Self::new_with_backend(
209 security,
210 allowed_domains,
211 session_name,
212 "agent_browser".into(),
213 None,
214 true,
215 "http://127.0.0.1:9515".into(),
216 None,
217 ComputerUseConfig::default(),
218 )
219 }
220
221 #[allow(clippy::too_many_arguments)]
222 pub fn new_with_backend(
223 security: Arc<SecurityPolicy>,
224 allowed_domains: Vec<String>,
225 session_name: Option<String>,
226 backend: String,
227 headed: Option<bool>,
228 native_headless: bool,
229 native_webdriver_url: String,
230 native_chrome_path: Option<String>,
231 computer_use: ComputerUseConfig,
232 ) -> anyhow::Result<Self> {
233 Ok(Self {
234 security,
235 allowed_domains: normalize_allowed_domains(allowed_domains)?,
236 session_name,
237 backend,
238 headed,
239 native_headless,
240 native_webdriver_url,
241 native_chrome_path,
242 computer_use,
243 #[cfg(feature = "browser-native")]
244 native_state: tokio::sync::Mutex::new(native_backend::NativeBrowserState::default()),
245 })
246 }
247
248 pub async fn is_agent_browser_available() -> bool {
250 let cmd = if cfg!(target_os = "windows") {
251 "agent-browser.cmd"
252 } else {
253 "agent-browser"
254 };
255 Command::new(cmd)
256 .arg("--version")
257 .stdout(Stdio::null())
258 .stderr(Stdio::null())
259 .status()
260 .await
261 .map(|s| s.success())
262 .unwrap_or(false)
263 }
264
265 pub async fn is_available() -> bool {
267 Self::is_agent_browser_available().await
268 }
269
270 fn configured_backend(&self) -> anyhow::Result<BrowserBackendKind> {
271 BrowserBackendKind::parse(&self.backend)
272 }
273
274 fn rust_native_compiled() -> bool {
275 cfg!(feature = "browser-native")
276 }
277
278 fn rust_native_available(&self) -> bool {
279 #[cfg(feature = "browser-native")]
280 {
281 native_backend::NativeBrowserState::is_available(
282 self.native_headless,
283 &self.native_webdriver_url,
284 self.native_chrome_path.as_deref(),
285 )
286 }
287 #[cfg(not(feature = "browser-native"))]
288 {
289 false
290 }
291 }
292
293 fn computer_use_endpoint_url(&self) -> anyhow::Result<reqwest::Url> {
294 if self.computer_use.timeout_ms == 0 {
295 anyhow::bail!("browser.computer_use.timeout_ms must be > 0");
296 }
297
298 let endpoint = self.computer_use.endpoint.trim();
299 if endpoint.is_empty() {
300 anyhow::bail!("browser.computer_use.endpoint cannot be empty");
301 }
302
303 let parsed = reqwest::Url::parse(endpoint).map_err(|_| {
304 ::zeroclaw_log::record!(
305 WARN,
306 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
307 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
308 .with_attrs(::serde_json::json!({"endpoint": endpoint})),
309 "browser: invalid computer_use endpoint URL"
310 );
311 anyhow::Error::msg(format!(
312 "Invalid browser.computer_use.endpoint: '{endpoint}'. Expected http(s) URL"
313 ))
314 })?;
315
316 let scheme = parsed.scheme();
317 if scheme != "http" && scheme != "https" {
318 anyhow::bail!("browser.computer_use.endpoint must use http:// or https://");
319 }
320
321 let host = parsed.host_str().ok_or_else(|| {
322 ::zeroclaw_log::record!(
323 WARN,
324 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
325 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
326 "browser: browser.computer_use.endpoint must include host"
327 );
328 anyhow::Error::msg("browser.computer_use.endpoint must include host")
329 })?;
330
331 let host_is_private = is_private_host(host);
332 if !self.computer_use.allow_remote_endpoint && !host_is_private {
333 anyhow::bail!(
334 "browser.computer_use.endpoint host '{host}' is public. Set browser.computer_use.allow_remote_endpoint=true to allow it"
335 );
336 }
337
338 if self.computer_use.allow_remote_endpoint && !host_is_private && scheme != "https" {
339 anyhow::bail!(
340 "browser.computer_use.endpoint must use https:// when allow_remote_endpoint=true and host is public"
341 );
342 }
343
344 Ok(parsed)
345 }
346
347 fn computer_use_available(&self) -> anyhow::Result<bool> {
348 let endpoint = self.computer_use_endpoint_url()?;
349 Ok(endpoint_reachable(&endpoint, Duration::from_millis(500)))
350 }
351
352 async fn resolve_backend(&self) -> anyhow::Result<ResolvedBackend> {
353 let configured = self.configured_backend()?;
354
355 match configured {
356 BrowserBackendKind::AgentBrowser => {
357 if Self::is_agent_browser_available().await {
358 Ok(ResolvedBackend::AgentBrowser)
359 } else {
360 #[cfg(target_os = "windows")]
361 let install_hint = "Install with: npm install -g agent-browser (ensure npm global bin is in PATH)";
362 #[cfg(not(target_os = "windows"))]
363 let install_hint = "Install with: npm install -g agent-browser";
364 anyhow::bail!(
365 "browser.backend='{}' but agent-browser CLI is unavailable. {}",
366 configured.as_str(),
367 install_hint
368 )
369 }
370 }
371 BrowserBackendKind::RustNative => {
372 if !Self::rust_native_compiled() {
373 anyhow::bail!(
374 "browser.backend='rust_native' requires build feature 'browser-native'"
375 );
376 }
377 if !self.rust_native_available() {
378 anyhow::bail!(
379 "Rust-native browser backend is enabled but WebDriver endpoint is unreachable. Set browser.native_webdriver_url and start a compatible driver"
380 );
381 }
382 Ok(ResolvedBackend::RustNative)
383 }
384 BrowserBackendKind::ComputerUse => {
385 if !self.computer_use_available()? {
386 anyhow::bail!(
387 "browser.backend='computer_use' but sidecar endpoint is unreachable. Check browser.computer_use.endpoint and sidecar status"
388 );
389 }
390 Ok(ResolvedBackend::ComputerUse)
391 }
392 BrowserBackendKind::Auto => {
393 if Self::rust_native_compiled() && self.rust_native_available() {
394 return Ok(ResolvedBackend::RustNative);
395 }
396 if Self::is_agent_browser_available().await {
397 return Ok(ResolvedBackend::AgentBrowser);
398 }
399
400 let computer_use_err = match self.computer_use_available() {
401 Ok(true) => return Ok(ResolvedBackend::ComputerUse),
402 Ok(false) => None,
403 Err(err) => Some(err.to_string()),
404 };
405
406 if Self::rust_native_compiled() {
407 if let Some(err) = computer_use_err {
408 anyhow::bail!(
409 "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use invalid: {err})"
410 );
411 }
412 anyhow::bail!(
413 "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use sidecar unreachable)"
414 )
415 }
416
417 if let Some(err) = computer_use_err {
418 anyhow::bail!(
419 "browser.backend='auto' needs agent-browser CLI, browser-native, or valid computer-use sidecar (error: {err})"
420 );
421 }
422
423 anyhow::bail!(
424 "browser.backend='auto' needs agent-browser CLI, browser-native, or computer-use sidecar"
425 )
426 }
427 }
428 }
429
430 fn validate_url(&self, url: &str) -> anyhow::Result<()> {
432 let url = url.trim();
433
434 if url.is_empty() {
435 anyhow::bail!("URL cannot be empty");
436 }
437
438 if url.starts_with("file://") {
441 anyhow::bail!("file:// URLs are not allowed in browser automation");
442 }
443
444 if !url.starts_with("https://") && !url.starts_with("http://") {
445 anyhow::bail!("Only http:// and https:// URLs are allowed");
446 }
447
448 if self.allowed_domains.is_empty() {
449 anyhow::bail!(
450 "Browser tool enabled but no allowed_domains configured. \
451 Add [browser].allowed_domains in config.toml"
452 );
453 }
454
455 let host = extract_host(url)?;
456
457 if is_private_host(&host) {
458 anyhow::bail!("Blocked local/private host: {host}");
459 }
460
461 if !host_matches_allowlist(&host, &self.allowed_domains) {
462 anyhow::bail!("Host '{host}' not in browser.allowed_domains");
463 }
464
465 Ok(())
466 }
467
468 async fn run_command(&self, args: &[&str]) -> anyhow::Result<AgentBrowserResponse> {
470 let mut cmd = self.agent_browser_command();
471
472 cmd.args(args).arg("--json");
474
475 ::zeroclaw_log::record!(
476 DEBUG,
477 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
478 &format!("Running: agent-browser {} --json", args.join(" "))
479 );
480
481 let output = cmd
482 .stdout(Stdio::piped())
483 .stderr(Stdio::piped())
484 .output()
485 .await?;
486
487 let stdout = String::from_utf8_lossy(&output.stdout);
488 let stderr = String::from_utf8_lossy(&output.stderr);
489
490 if !stderr.is_empty() {
491 ::zeroclaw_log::record!(
492 DEBUG,
493 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
494 &format!("agent-browser stderr: {}", stderr)
495 );
496 }
497
498 if let Ok(resp) = serde_json::from_str::<AgentBrowserResponse>(&stdout) {
500 return Ok(resp);
501 }
502
503 if output.status.success() {
505 Ok(AgentBrowserResponse {
506 success: true,
507 data: Some(json!({ "output": stdout.trim() })),
508 error: None,
509 })
510 } else {
511 Ok(AgentBrowserResponse {
512 success: false,
513 data: None,
514 error: Some(stderr.trim().to_string()),
515 })
516 }
517 }
518
519 fn agent_browser_command(&self) -> Command {
520 let agent_browser_bin = if cfg!(target_os = "windows") {
521 "agent-browser.cmd"
522 } else {
523 "agent-browser"
524 };
525 let mut cmd = Command::new(agent_browser_bin);
526
527 match self.headed {
528 Some(true) => {
529 cmd.env("AGENT_BROWSER_HEADED", "1");
530 }
531 Some(false) => {
532 cmd.env_remove("AGENT_BROWSER_HEADED");
533 }
534 None => {}
535 }
536
537 if is_service_environment() {
540 ensure_browser_env(&mut cmd);
541 }
542
543 if let Some(ref session) = self.session_name {
545 cmd.arg("--session").arg(session);
546 }
547
548 cmd
549 }
550
551 #[allow(clippy::too_many_lines)]
553 async fn execute_agent_browser_action(
554 &self,
555 action: BrowserAction,
556 ) -> anyhow::Result<ToolResult> {
557 match action {
558 BrowserAction::Open { url } => {
559 self.validate_url(&url)?;
560 let resp = self.run_command(&["open", &url]).await?;
561 self.to_result(resp)
562 }
563
564 BrowserAction::Snapshot {
565 interactive_only,
566 compact,
567 depth,
568 } => {
569 let mut args = vec!["snapshot"];
570 if interactive_only {
571 args.push("-i");
572 }
573 if compact {
574 args.push("-c");
575 }
576 let depth_str;
577 if let Some(d) = depth {
578 args.push("-d");
579 depth_str = d.to_string();
580 args.push(&depth_str);
581 }
582 let resp = self.run_command(&args).await?;
583 self.to_result(resp)
584 }
585
586 BrowserAction::Click { selector } => {
587 let resp = self.run_command(&["click", &selector]).await?;
588 self.to_result(resp)
589 }
590
591 BrowserAction::Fill { selector, value } => {
592 let resp = self.run_command(&["fill", &selector, &value]).await?;
593 self.to_result(resp)
594 }
595
596 BrowserAction::Type { selector, text } => {
597 let resp = self.run_command(&["type", &selector, &text]).await?;
598 self.to_result(resp)
599 }
600
601 BrowserAction::GetText { selector } => {
602 let resp = self.run_command(&["get", "text", &selector]).await?;
603 self.to_result(resp)
604 }
605
606 BrowserAction::GetTitle => {
607 let resp = self.run_command(&["get", "title"]).await?;
608 self.to_result(resp)
609 }
610
611 BrowserAction::GetUrl => {
612 let resp = self.run_command(&["get", "url"]).await?;
613 self.to_result(resp)
614 }
615
616 BrowserAction::Screenshot { path, full_page } => {
617 let mut args = vec!["screenshot"];
618 if let Some(ref p) = path {
619 args.push(p);
620 }
621 if full_page {
622 args.push("--full");
623 }
624 let resp = self.run_command(&args).await?;
625 self.to_result(resp)
626 }
627
628 BrowserAction::Wait { selector, ms, text } => {
629 let mut args = vec!["wait"];
630 let ms_str;
631 if let Some(sel) = selector.as_ref() {
632 args.push(sel);
633 } else if let Some(millis) = ms {
634 ms_str = millis.to_string();
635 args.push(&ms_str);
636 } else if let Some(ref t) = text {
637 args.push("--text");
638 args.push(t);
639 }
640 let resp = self.run_command(&args).await?;
641 self.to_result(resp)
642 }
643
644 BrowserAction::Press { key } => {
645 let resp = self.run_command(&["press", &key]).await?;
646 self.to_result(resp)
647 }
648
649 BrowserAction::Hover { selector } => {
650 let resp = self.run_command(&["hover", &selector]).await?;
651 self.to_result(resp)
652 }
653
654 BrowserAction::Scroll { direction, pixels } => {
655 let mut args = vec!["scroll", &direction];
656 let px_str;
657 if let Some(px) = pixels {
658 px_str = px.to_string();
659 args.push(&px_str);
660 }
661 let resp = self.run_command(&args).await?;
662 self.to_result(resp)
663 }
664
665 BrowserAction::IsVisible { selector } => {
666 let resp = self.run_command(&["is", "visible", &selector]).await?;
667 self.to_result(resp)
668 }
669
670 BrowserAction::Close => {
671 let resp = self.run_command(&["close"]).await?;
672 self.to_result(resp)
673 }
674
675 BrowserAction::Find {
676 by,
677 value,
678 action,
679 fill_value,
680 } => {
681 let mut args = vec!["find", &by, &value, &action];
682 if let Some(ref fv) = fill_value {
683 args.push(fv);
684 }
685 let resp = self.run_command(&args).await?;
686 self.to_result(resp)
687 }
688 }
689 }
690
691 #[allow(clippy::unused_async)]
692 async fn execute_rust_native_action(
693 &self,
694 action: BrowserAction,
695 ) -> anyhow::Result<ToolResult> {
696 #[cfg(feature = "browser-native")]
697 {
698 let mut state = self.native_state.lock().await;
699
700 let first_attempt = state
701 .execute_action(
702 action.clone(),
703 self.native_headless,
704 &self.native_webdriver_url,
705 self.native_chrome_path.as_deref(),
706 )
707 .await;
708
709 let output = match first_attempt {
710 Ok(output) => output,
711 Err(err) => {
712 if !is_recoverable_rust_native_error(&err) {
713 return Err(err);
714 }
715
716 state.reset_session().await;
717 state
718 .execute_action(
719 action,
720 self.native_headless,
721 &self.native_webdriver_url,
722 self.native_chrome_path.as_deref(),
723 )
724 .await
725 .with_context(|| "rust_native backend retry after session reset failed")?
726 }
727 };
728
729 Ok(ToolResult {
730 success: true,
731 output: serde_json::to_string_pretty(&output).unwrap_or_default(),
732 error: None,
733 })
734 }
735
736 #[cfg(not(feature = "browser-native"))]
737 {
738 let _ = action;
739 anyhow::bail!(
740 "Rust-native browser backend is not compiled. Rebuild with --features browser-native"
741 )
742 }
743 }
744
745 fn validate_coordinate(&self, key: &str, value: i64, max: Option<i64>) -> anyhow::Result<()> {
746 if value < 0 {
747 anyhow::bail!("'{key}' must be >= 0")
748 }
749 if let Some(limit) = max {
750 if limit < 0 {
751 anyhow::bail!("Configured coordinate limit for '{key}' must be >= 0")
752 }
753 if value > limit {
754 anyhow::bail!("'{key}'={value} exceeds configured limit {limit}")
755 }
756 }
757 Ok(())
758 }
759
760 fn read_required_i64(
761 &self,
762 params: &serde_json::Map<String, Value>,
763 key: &str,
764 ) -> anyhow::Result<i64> {
765 params.get(key).and_then(Value::as_i64).ok_or_else(|| {
766 ::zeroclaw_log::record!(
767 WARN,
768 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
769 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
770 "browser: Missing or invalid '{key}' parameter"
771 );
772 anyhow::Error::msg("Missing or invalid '{key}' parameter")
773 })
774 }
775
776 fn validate_computer_use_action(
777 &self,
778 action: &str,
779 params: &serde_json::Map<String, Value>,
780 ) -> anyhow::Result<()> {
781 match action {
782 "open" => {
783 let url = params.get("url").and_then(Value::as_str).ok_or_else(|| {
784 ::zeroclaw_log::record!(
785 WARN,
786 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
787 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
788 "browser: Missing 'url' for open action"
789 );
790 anyhow::Error::msg("Missing 'url' for open action")
791 })?;
792 self.validate_url(url)?;
793 }
794 "mouse_move" | "mouse_click" => {
795 let x = self.read_required_i64(params, "x")?;
796 let y = self.read_required_i64(params, "y")?;
797 self.validate_coordinate("x", x, self.computer_use.max_coordinate_x)?;
798 self.validate_coordinate("y", y, self.computer_use.max_coordinate_y)?;
799 }
800 "mouse_drag" => {
801 let from_x = self.read_required_i64(params, "from_x")?;
802 let from_y = self.read_required_i64(params, "from_y")?;
803 let to_x = self.read_required_i64(params, "to_x")?;
804 let to_y = self.read_required_i64(params, "to_y")?;
805 self.validate_coordinate("from_x", from_x, self.computer_use.max_coordinate_x)?;
806 self.validate_coordinate("to_x", to_x, self.computer_use.max_coordinate_x)?;
807 self.validate_coordinate("from_y", from_y, self.computer_use.max_coordinate_y)?;
808 self.validate_coordinate("to_y", to_y, self.computer_use.max_coordinate_y)?;
809 }
810 _ => {}
811 }
812 Ok(())
813 }
814
815 async fn execute_computer_use_action(
816 &self,
817 action: &str,
818 args: &Value,
819 ) -> anyhow::Result<ToolResult> {
820 let endpoint = self.computer_use_endpoint_url()?;
821
822 let mut params = args.as_object().cloned().ok_or_else(|| {
823 ::zeroclaw_log::record!(
824 WARN,
825 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
826 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
827 "browser: browser args must be a JSON object"
828 );
829 anyhow::Error::msg("browser args must be a JSON object")
830 })?;
831 params.remove("action");
832
833 self.validate_computer_use_action(action, ¶ms)?;
834
835 let payload = json!({
836 "action": action,
837 "params": params,
838 "policy": {
839 "allowed_domains": self.allowed_domains,
840 "window_allowlist": self.computer_use.window_allowlist,
841 "max_coordinate_x": self.computer_use.max_coordinate_x,
842 "max_coordinate_y": self.computer_use.max_coordinate_y,
843 },
844 "metadata": {
845 "session_name": self.session_name,
846 "source": "zeroclaw.browser",
847 "version": env!("CARGO_PKG_VERSION"),
848 }
849 });
850
851 let client = zeroclaw_config::schema::build_runtime_proxy_client("tool.browser");
852 let mut request = client
853 .post(endpoint)
854 .timeout(Duration::from_millis(self.computer_use.timeout_ms))
855 .json(&payload);
856
857 if let Some(api_key) = self.computer_use.api_key.as_deref() {
858 let token = api_key.trim();
859 if !token.is_empty() {
860 request = request.bearer_auth(token);
861 }
862 }
863
864 let response = request.send().await.with_context(|| {
865 format!(
866 "Failed to call computer-use sidecar at {}",
867 self.computer_use.endpoint
868 )
869 })?;
870
871 let status = response.status();
872 let body = response
873 .text()
874 .await
875 .context("Failed to read computer-use sidecar response body")?;
876
877 if let Ok(parsed) = serde_json::from_str::<ComputerUseResponse>(&body) {
878 if status.is_success() && parsed.success.unwrap_or(true) {
879 let output = parsed
880 .data
881 .map(|data| serde_json::to_string_pretty(&data).unwrap_or_default())
882 .unwrap_or_else(|| {
883 serde_json::to_string_pretty(&json!({
884 "backend": "computer_use",
885 "action": action,
886 "ok": true,
887 }))
888 .unwrap_or_default()
889 });
890
891 return Ok(ToolResult {
892 success: true,
893 output,
894 error: None,
895 });
896 }
897
898 let error = parsed.error.or_else(|| {
899 if status.is_success() && parsed.success == Some(false) {
900 Some("computer-use sidecar returned success=false".to_string())
901 } else {
902 Some(format!(
903 "computer-use sidecar request failed with status {status}"
904 ))
905 }
906 });
907
908 return Ok(ToolResult {
909 success: false,
910 output: String::new(),
911 error,
912 });
913 }
914
915 if status.is_success() {
916 return Ok(ToolResult {
917 success: true,
918 output: body,
919 error: None,
920 });
921 }
922
923 Ok(ToolResult {
924 success: false,
925 output: String::new(),
926 error: Some(format!(
927 "computer-use sidecar request failed with status {status}: {}",
928 body.trim()
929 )),
930 })
931 }
932
933 async fn execute_action(
934 &self,
935 action: BrowserAction,
936 backend: ResolvedBackend,
937 ) -> anyhow::Result<ToolResult> {
938 match backend {
939 ResolvedBackend::AgentBrowser => self.execute_agent_browser_action(action).await,
940 ResolvedBackend::RustNative => self.execute_rust_native_action(action).await,
941 ResolvedBackend::ComputerUse => anyhow::bail!(
942 "Internal error: computer_use backend must be handled before BrowserAction parsing"
943 ),
944 }
945 }
946
947 #[allow(clippy::unnecessary_wraps, clippy::unused_self)]
948 fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result<ToolResult> {
949 if resp.success {
950 let output = resp
951 .data
952 .map(|d| serde_json::to_string_pretty(&d).unwrap_or_default())
953 .unwrap_or_default();
954 Ok(ToolResult {
955 success: true,
956 output,
957 error: None,
958 })
959 } else {
960 Ok(ToolResult {
961 success: false,
962 output: String::new(),
963 error: resp.error,
964 })
965 }
966 }
967}
968
969#[async_trait]
970impl Tool for BrowserTool {
971 fn name(&self) -> &str {
972 "browser"
973 }
974
975 fn description(&self) -> &str {
976 concat!(
977 "Web/browser automation with pluggable backends (agent-browser, rust-native, computer_use). ",
978 "Supports DOM actions plus optional OS-level actions (mouse_move, mouse_click, mouse_drag, ",
979 "key_type, key_press, screen_capture) through a computer-use sidecar. Use 'snapshot' to map ",
980 "interactive elements to refs (@e1, @e2). Enforces browser.allowed_domains for open actions."
981 )
982 }
983
984 fn parameters_schema(&self) -> Value {
985 json!({
986 "type": "object",
987 "properties": {
988 "action": {
989 "type": "string",
990 "enum": ["open", "snapshot", "click", "fill", "type", "get_text",
991 "get_title", "get_url", "screenshot", "wait", "press",
992 "hover", "scroll", "is_visible", "close", "find",
993 "mouse_move", "mouse_click", "mouse_drag", "key_type",
994 "key_press", "screen_capture"],
995 "description": "Browser action to perform (OS-level actions require backend=computer_use)"
996 },
997 "url": {
998 "type": "string",
999 "description": "URL to navigate to (for 'open' action)"
1000 },
1001 "selector": {
1002 "type": "string",
1003 "description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..."
1004 },
1005 "value": {
1006 "type": "string",
1007 "description": "Value to fill or type"
1008 },
1009 "text": {
1010 "type": "string",
1011 "description": "Text to type or wait for"
1012 },
1013 "key": {
1014 "type": "string",
1015 "description": "Key to press (Enter, Tab, Escape, etc.)"
1016 },
1017 "x": {
1018 "type": "integer",
1019 "description": "Screen X coordinate (computer_use: mouse_move/mouse_click)"
1020 },
1021 "y": {
1022 "type": "integer",
1023 "description": "Screen Y coordinate (computer_use: mouse_move/mouse_click)"
1024 },
1025 "from_x": {
1026 "type": "integer",
1027 "description": "Drag source X coordinate (computer_use: mouse_drag)"
1028 },
1029 "from_y": {
1030 "type": "integer",
1031 "description": "Drag source Y coordinate (computer_use: mouse_drag)"
1032 },
1033 "to_x": {
1034 "type": "integer",
1035 "description": "Drag target X coordinate (computer_use: mouse_drag)"
1036 },
1037 "to_y": {
1038 "type": "integer",
1039 "description": "Drag target Y coordinate (computer_use: mouse_drag)"
1040 },
1041 "button": {
1042 "type": "string",
1043 "enum": ["left", "right", "middle"],
1044 "description": "Mouse button for computer_use mouse_click"
1045 },
1046 "direction": {
1047 "type": "string",
1048 "enum": ["up", "down", "left", "right"],
1049 "description": "Scroll direction"
1050 },
1051 "pixels": {
1052 "type": "integer",
1053 "description": "Pixels to scroll"
1054 },
1055 "interactive_only": {
1056 "type": "boolean",
1057 "description": "For snapshot: only show interactive elements"
1058 },
1059 "compact": {
1060 "type": "boolean",
1061 "description": "For snapshot: remove empty structural elements"
1062 },
1063 "depth": {
1064 "type": "integer",
1065 "description": "For snapshot: limit tree depth"
1066 },
1067 "full_page": {
1068 "type": "boolean",
1069 "description": "For screenshot: capture full page"
1070 },
1071 "path": {
1072 "type": "string",
1073 "description": "File path for screenshot"
1074 },
1075 "ms": {
1076 "type": "integer",
1077 "description": "Milliseconds to wait"
1078 },
1079 "by": {
1080 "type": "string",
1081 "enum": ["role", "text", "label", "placeholder", "testid"],
1082 "description": "For find: semantic locator type"
1083 },
1084 "find_action": {
1085 "type": "string",
1086 "enum": ["click", "fill", "text", "hover", "check"],
1087 "description": "For find: action to perform on found element"
1088 },
1089 "fill_value": {
1090 "type": "string",
1091 "description": "For find with fill action: value to fill"
1092 }
1093 },
1094 "required": ["action"]
1095 })
1096 }
1097
1098 async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
1099 if !self.security.can_act() {
1101 return Ok(ToolResult {
1102 success: false,
1103 output: String::new(),
1104 error: Some("Action blocked: autonomy is read-only".into()),
1105 });
1106 }
1107
1108 let backend = match self.resolve_backend().await {
1112 Ok(selected) => selected,
1113 Err(error) => {
1114 return Ok(ToolResult {
1115 success: false,
1116 output: String::new(),
1117 error: Some(error.to_string()),
1118 });
1119 }
1120 };
1121
1122 let action_str = args.get("action").and_then(|v| v.as_str()).ok_or_else(|| {
1124 ::zeroclaw_log::record!(
1125 WARN,
1126 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1127 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1128 "browser: Missing 'action' parameter"
1129 );
1130 anyhow::Error::msg("Missing 'action' parameter")
1131 })?;
1132
1133 if !is_supported_browser_action(action_str) {
1134 return Ok(ToolResult {
1135 success: false,
1136 output: String::new(),
1137 error: Some(format!("Unknown action: {action_str}")),
1138 });
1139 }
1140
1141 if backend == ResolvedBackend::ComputerUse {
1142 return self.execute_computer_use_action(action_str, &args).await;
1143 }
1144
1145 if is_computer_use_only_action(action_str) {
1146 return Ok(ToolResult {
1147 success: false,
1148 output: String::new(),
1149 error: Some(unavailable_action_for_backend_error(action_str, backend)),
1150 });
1151 }
1152
1153 let action = match parse_browser_action(action_str, &args) {
1154 Ok(a) => a,
1155 Err(e) => {
1156 return Ok(ToolResult {
1157 success: false,
1158 output: String::new(),
1159 error: Some(e.to_string()),
1160 });
1161 }
1162 };
1163
1164 self.execute_action(action, backend).await
1165 }
1166}
1167
1168#[cfg(feature = "browser-native")]
1169mod native_backend {
1170 use super::BrowserAction;
1171 use anyhow::{Context, Result};
1172 use base64::Engine;
1173 use fantoccini::actions::{InputSource, MouseActions, PointerAction};
1174 use fantoccini::key::Key;
1175 use fantoccini::{Client, ClientBuilder, Locator};
1176 use serde_json::{Map, Value, json};
1177 use std::net::{TcpStream, ToSocketAddrs};
1178 use std::time::Duration;
1179
1180 #[derive(Default)]
1181 pub struct NativeBrowserState {
1182 client: Option<Client>,
1183 }
1184
1185 impl NativeBrowserState {
1186 pub fn is_available(
1187 _headless: bool,
1188 webdriver_url: &str,
1189 _chrome_path: Option<&str>,
1190 ) -> bool {
1191 webdriver_endpoint_reachable(webdriver_url, Duration::from_millis(500))
1192 }
1193
1194 #[allow(clippy::too_many_lines)]
1195 pub async fn execute_action(
1196 &mut self,
1197 action: BrowserAction,
1198 headless: bool,
1199 webdriver_url: &str,
1200 chrome_path: Option<&str>,
1201 ) -> Result<Value> {
1202 match action {
1203 BrowserAction::Open { url } => {
1204 self.ensure_session(headless, webdriver_url, chrome_path)
1205 .await?;
1206 let client = self.active_client()?;
1207 client
1208 .goto(&url)
1209 .await
1210 .with_context(|| format!("Failed to open URL: {url}"))?;
1211 let current_url = client
1212 .current_url()
1213 .await
1214 .context("Failed to read current URL after navigation")?;
1215
1216 Ok(json!({
1217 "backend": "rust_native",
1218 "action": "open",
1219 "url": current_url.as_str(),
1220 }))
1221 }
1222 BrowserAction::Snapshot {
1223 interactive_only,
1224 compact,
1225 depth,
1226 } => {
1227 let client = self.active_client()?;
1228 let snapshot = client
1229 .execute(
1230 &snapshot_script(interactive_only, compact, depth.map(i64::from)),
1231 vec![],
1232 )
1233 .await
1234 .context("Failed to evaluate snapshot script")?;
1235
1236 Ok(json!({
1237 "backend": "rust_native",
1238 "action": "snapshot",
1239 "data": snapshot,
1240 }))
1241 }
1242 BrowserAction::Click { selector } => {
1243 let client = self.active_client()?;
1244 find_element(client, &selector).await?.click().await?;
1245
1246 Ok(json!({
1247 "backend": "rust_native",
1248 "action": "click",
1249 "selector": selector,
1250 }))
1251 }
1252 BrowserAction::Fill { selector, value } => {
1253 let client = self.active_client()?;
1254 let element = find_element(client, &selector).await?;
1255 let _ = element.clear().await;
1256 element.send_keys(&value).await?;
1257
1258 Ok(json!({
1259 "backend": "rust_native",
1260 "action": "fill",
1261 "selector": selector,
1262 }))
1263 }
1264 BrowserAction::Type { selector, text } => {
1265 let client = self.active_client()?;
1266 find_element(client, &selector)
1267 .await?
1268 .send_keys(&text)
1269 .await?;
1270
1271 Ok(json!({
1272 "backend": "rust_native",
1273 "action": "type",
1274 "selector": selector,
1275 "typed": text.len(),
1276 }))
1277 }
1278 BrowserAction::GetText { selector } => {
1279 let client = self.active_client()?;
1280 let text = find_element(client, &selector).await?.text().await?;
1281
1282 Ok(json!({
1283 "backend": "rust_native",
1284 "action": "get_text",
1285 "selector": selector,
1286 "text": text,
1287 }))
1288 }
1289 BrowserAction::GetTitle => {
1290 let client = self.active_client()?;
1291 let title = client.title().await.context("Failed to read page title")?;
1292
1293 Ok(json!({
1294 "backend": "rust_native",
1295 "action": "get_title",
1296 "title": title,
1297 }))
1298 }
1299 BrowserAction::GetUrl => {
1300 let client = self.active_client()?;
1301 let url = client
1302 .current_url()
1303 .await
1304 .context("Failed to read current URL")?;
1305
1306 Ok(json!({
1307 "backend": "rust_native",
1308 "action": "get_url",
1309 "url": url.as_str(),
1310 }))
1311 }
1312 BrowserAction::Screenshot { path, full_page } => {
1313 let client = self.active_client()?;
1314 let png = client
1315 .screenshot()
1316 .await
1317 .context("Failed to capture screenshot")?;
1318 let mut payload = json!({
1319 "backend": "rust_native",
1320 "action": "screenshot",
1321 "full_page": full_page,
1322 "bytes": png.len(),
1323 });
1324
1325 if let Some(path_str) = path {
1326 tokio::fs::write(&path_str, &png)
1327 .await
1328 .with_context(|| format!("Failed to write screenshot to {path_str}"))?;
1329 payload["path"] = Value::String(path_str);
1330 } else {
1331 payload["png_base64"] =
1332 Value::String(base64::engine::general_purpose::STANDARD.encode(&png));
1333 }
1334
1335 Ok(payload)
1336 }
1337 BrowserAction::Wait { selector, ms, text } => {
1338 let client = self.active_client()?;
1339 if let Some(sel) = selector.as_ref() {
1340 wait_for_selector(client, sel).await?;
1341 Ok(json!({
1342 "backend": "rust_native",
1343 "action": "wait",
1344 "selector": sel,
1345 }))
1346 } else if let Some(duration_ms) = ms {
1347 tokio::time::sleep(Duration::from_millis(duration_ms)).await;
1348 Ok(json!({
1349 "backend": "rust_native",
1350 "action": "wait",
1351 "ms": duration_ms,
1352 }))
1353 } else if let Some(needle) = text.as_ref() {
1354 let xpath = xpath_contains_text(needle);
1355 client
1356 .wait()
1357 .for_element(Locator::XPath(&xpath))
1358 .await
1359 .with_context(|| {
1360 format!("Timed out waiting for text to appear: {needle}")
1361 })?;
1362 Ok(json!({
1363 "backend": "rust_native",
1364 "action": "wait",
1365 "text": needle,
1366 }))
1367 } else {
1368 tokio::time::sleep(Duration::from_millis(250)).await;
1369 Ok(json!({
1370 "backend": "rust_native",
1371 "action": "wait",
1372 "ms": 250,
1373 }))
1374 }
1375 }
1376 BrowserAction::Press { key } => {
1377 let client = self.active_client()?;
1378 let key_input = webdriver_key(&key);
1379 match client.active_element().await {
1380 Ok(element) => {
1381 element.send_keys(&key_input).await?;
1382 }
1383 Err(_) => {
1384 find_element(client, "body")
1385 .await?
1386 .send_keys(&key_input)
1387 .await?;
1388 }
1389 }
1390
1391 Ok(json!({
1392 "backend": "rust_native",
1393 "action": "press",
1394 "key": key,
1395 }))
1396 }
1397 BrowserAction::Hover { selector } => {
1398 let client = self.active_client()?;
1399 let element = find_element(client, &selector).await?;
1400 hover_element(client, &element).await?;
1401
1402 Ok(json!({
1403 "backend": "rust_native",
1404 "action": "hover",
1405 "selector": selector,
1406 }))
1407 }
1408 BrowserAction::Scroll { direction, pixels } => {
1409 let client = self.active_client()?;
1410 let amount = i64::from(pixels.unwrap_or(600));
1411 let (dx, dy) = match direction.as_str() {
1412 "up" => (0, -amount),
1413 "down" => (0, amount),
1414 "left" => (-amount, 0),
1415 "right" => (amount, 0),
1416 _ => anyhow::bail!(
1417 "Unsupported scroll direction '{direction}'. Use up/down/left/right"
1418 ),
1419 };
1420
1421 let position = client
1422 .execute(
1423 "window.scrollBy(arguments[0], arguments[1]); return { x: window.scrollX, y: window.scrollY };",
1424 vec![json!(dx), json!(dy)],
1425 )
1426 .await
1427 .context("Failed to execute scroll script")?;
1428
1429 Ok(json!({
1430 "backend": "rust_native",
1431 "action": "scroll",
1432 "position": position,
1433 }))
1434 }
1435 BrowserAction::IsVisible { selector } => {
1436 let client = self.active_client()?;
1437 let visible = find_element(client, &selector)
1438 .await?
1439 .is_displayed()
1440 .await?;
1441
1442 Ok(json!({
1443 "backend": "rust_native",
1444 "action": "is_visible",
1445 "selector": selector,
1446 "visible": visible,
1447 }))
1448 }
1449 BrowserAction::Close => {
1450 self.reset_session().await;
1451
1452 Ok(json!({
1453 "backend": "rust_native",
1454 "action": "close",
1455 "closed": true,
1456 }))
1457 }
1458 BrowserAction::Find {
1459 by,
1460 value,
1461 action,
1462 fill_value,
1463 } => {
1464 let client = self.active_client()?;
1465 let selector = selector_for_find(&by, &value);
1466 let element = find_element(client, &selector).await?;
1467
1468 let payload = match action.as_str() {
1469 "click" => {
1470 element.click().await?;
1471 json!({"result": "clicked"})
1472 }
1473 "fill" => {
1474 let fill = fill_value.ok_or_else(|| {
1475 ::zeroclaw_log::record!(
1476 WARN,
1477 ::zeroclaw_log::Event::new(
1478 module_path!(),
1479 ::zeroclaw_log::Action::Reject
1480 )
1481 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
1482 .with_attrs(
1483 ::serde_json::json!({
1484 "find_action": "fill",
1485 "missing": "fill_value",
1486 })
1487 ),
1488 "browser: fill action requires fill_value"
1489 );
1490 anyhow::Error::msg("find_action='fill' requires fill_value")
1491 })?;
1492 let _ = element.clear().await;
1493 element.send_keys(&fill).await?;
1494 json!({"result": "filled", "typed": fill.len()})
1495 }
1496 "text" => {
1497 let text = element.text().await?;
1498 json!({"result": "text", "text": text})
1499 }
1500 "hover" => {
1501 hover_element(client, &element).await?;
1502 json!({"result": "hovered"})
1503 }
1504 "check" => {
1505 let checked_before = element_checked(&element).await?;
1506 if !checked_before {
1507 element.click().await?;
1508 }
1509 let checked_after = element_checked(&element).await?;
1510 json!({
1511 "result": "checked",
1512 "checked_before": checked_before,
1513 "checked_after": checked_after,
1514 })
1515 }
1516 _ => anyhow::bail!(
1517 "Unsupported find_action '{action}'. Use click/fill/text/hover/check"
1518 ),
1519 };
1520
1521 Ok(json!({
1522 "backend": "rust_native",
1523 "action": "find",
1524 "by": by,
1525 "value": value,
1526 "selector": selector,
1527 "data": payload,
1528 }))
1529 }
1530 }
1531 }
1532
1533 pub async fn reset_session(&mut self) {
1534 if let Some(client) = self.client.take() {
1535 let _ = client.close().await;
1536 }
1537 }
1538
1539 async fn ensure_session(
1540 &mut self,
1541 headless: bool,
1542 webdriver_url: &str,
1543 chrome_path: Option<&str>,
1544 ) -> Result<()> {
1545 if self.client.is_some() {
1546 return Ok(());
1547 }
1548
1549 let mut capabilities: Map<String, Value> = Map::new();
1550 let mut chrome_options: Map<String, Value> = Map::new();
1551 let mut args: Vec<Value> = Vec::new();
1552
1553 if headless {
1554 args.push(Value::String("--headless=new".to_string()));
1555 args.push(Value::String("--disable-gpu".to_string()));
1556 }
1557
1558 if super::is_service_environment() {
1562 args.push(Value::String("--no-sandbox".to_string()));
1563 args.push(Value::String("--disable-dev-shm-usage".to_string()));
1564 }
1565
1566 if !args.is_empty() {
1567 chrome_options.insert("args".to_string(), Value::Array(args));
1568 }
1569
1570 if let Some(path) = chrome_path {
1571 let trimmed = path.trim();
1572 if !trimmed.is_empty() {
1573 chrome_options.insert("binary".to_string(), Value::String(trimmed.to_string()));
1574 }
1575 }
1576
1577 if !chrome_options.is_empty() {
1578 capabilities.insert(
1579 "goog:chromeOptions".to_string(),
1580 Value::Object(chrome_options),
1581 );
1582 }
1583
1584 let mut builder =
1585 ClientBuilder::rustls().context("Failed to initialize rustls connector")?;
1586 if !capabilities.is_empty() {
1587 builder.capabilities(capabilities);
1588 }
1589
1590 let client = builder
1591 .connect(webdriver_url)
1592 .await
1593 .with_context(|| {
1594 format!(
1595 "Failed to connect to WebDriver at {webdriver_url}. Start chromedriver/geckodriver first"
1596 )
1597 })?;
1598
1599 self.client = Some(client);
1600 Ok(())
1601 }
1602
1603 fn active_client(&self) -> Result<&Client> {
1604 self.client.as_ref().ok_or_else(|| {
1605 ::zeroclaw_log::record!(
1606 WARN,
1607 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1608 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1609 "browser: no active native browser session"
1610 );
1611 anyhow::Error::msg(
1612 "No active native browser session. Run browser action='open' first",
1613 )
1614 })
1615 }
1616 }
1617
1618 fn webdriver_endpoint_reachable(webdriver_url: &str, timeout: Duration) -> bool {
1619 let parsed = match reqwest::Url::parse(webdriver_url) {
1620 Ok(url) => url,
1621 Err(_) => return false,
1622 };
1623
1624 if parsed.scheme() != "http" && parsed.scheme() != "https" {
1625 return false;
1626 }
1627
1628 let host = match parsed.host_str() {
1629 Some(h) if !h.is_empty() => h,
1630 _ => return false,
1631 };
1632
1633 let port = parsed.port_or_known_default().unwrap_or(4444);
1634 let mut addrs = match (host, port).to_socket_addrs() {
1635 Ok(iter) => iter,
1636 Err(_) => return false,
1637 };
1638
1639 let addr = match addrs.next() {
1640 Some(a) => a,
1641 None => return false,
1642 };
1643
1644 TcpStream::connect_timeout(&addr, timeout).is_ok()
1645 }
1646
1647 fn selector_for_find(by: &str, value: &str) -> String {
1648 let escaped = css_attr_escape(value);
1649 match by {
1650 "role" => format!(r#"[role=\"{escaped}\"]"#),
1651 "label" => format!("label={value}"),
1652 "placeholder" => format!(r#"[placeholder=\"{escaped}\"]"#),
1653 "testid" => format!(r#"[data-testid=\"{escaped}\"]"#),
1654 _ => format!("text={value}"),
1655 }
1656 }
1657
1658 async fn wait_for_selector(client: &Client, selector: &str) -> Result<()> {
1659 match parse_selector(selector) {
1660 SelectorKind::Css(css) => {
1661 client
1662 .wait()
1663 .for_element(Locator::Css(&css))
1664 .await
1665 .with_context(|| format!("Timed out waiting for selector '{selector}'"))?;
1666 }
1667 SelectorKind::XPath(xpath) => {
1668 client
1669 .wait()
1670 .for_element(Locator::XPath(&xpath))
1671 .await
1672 .with_context(|| format!("Timed out waiting for selector '{selector}'"))?;
1673 }
1674 }
1675 Ok(())
1676 }
1677
1678 async fn find_element(
1679 client: &Client,
1680 selector: &str,
1681 ) -> Result<fantoccini::elements::Element> {
1682 let element = match parse_selector(selector) {
1683 SelectorKind::Css(css) => client
1684 .find(Locator::Css(&css))
1685 .await
1686 .with_context(|| format!("Failed to find element by CSS '{css}'"))?,
1687 SelectorKind::XPath(xpath) => client
1688 .find(Locator::XPath(&xpath))
1689 .await
1690 .with_context(|| format!("Failed to find element by XPath '{xpath}'"))?,
1691 };
1692 Ok(element)
1693 }
1694
1695 async fn hover_element(client: &Client, element: &fantoccini::elements::Element) -> Result<()> {
1696 let actions = MouseActions::new("mouse".to_string()).then(PointerAction::MoveToElement {
1697 element: element.clone(),
1698 duration: Some(Duration::from_millis(150)),
1699 x: 0.0,
1700 y: 0.0,
1701 });
1702
1703 client
1704 .perform_actions(actions)
1705 .await
1706 .context("Failed to perform hover action")?;
1707 let _ = client.release_actions().await;
1708 Ok(())
1709 }
1710
1711 async fn element_checked(element: &fantoccini::elements::Element) -> Result<bool> {
1712 let checked = element
1713 .prop("checked")
1714 .await
1715 .context("Failed to read checkbox checked property")?
1716 .unwrap_or_default()
1717 .to_ascii_lowercase();
1718 Ok(matches!(checked.as_str(), "true" | "checked" | "1"))
1719 }
1720
1721 enum SelectorKind {
1722 Css(String),
1723 XPath(String),
1724 }
1725
1726 fn parse_selector(selector: &str) -> SelectorKind {
1727 let trimmed = selector.trim();
1728 if let Some(text_query) = trimmed.strip_prefix("text=") {
1729 return SelectorKind::XPath(xpath_contains_text(text_query));
1730 }
1731
1732 if let Some(label_query) = trimmed.strip_prefix("label=") {
1733 let literal = xpath_literal(label_query);
1734 return SelectorKind::XPath(format!(
1735 "(//label[contains(normalize-space(.), {literal})]/following::*[self::input or self::textarea or self::select][1] | //*[@aria-label and contains(normalize-space(@aria-label), {literal})] | //label[contains(normalize-space(.), {literal})])"
1736 ));
1737 }
1738
1739 if trimmed.starts_with('@') {
1740 let escaped = css_attr_escape(trimmed);
1741 return SelectorKind::Css(format!(r#"[data-zc-ref=\"{escaped}\"]"#));
1742 }
1743
1744 SelectorKind::Css(trimmed.to_string())
1745 }
1746
1747 fn css_attr_escape(input: &str) -> String {
1748 input
1749 .replace('\\', "\\\\")
1750 .replace('"', "\\\"")
1751 .replace('\n', " ")
1752 }
1753
1754 fn xpath_contains_text(text: &str) -> String {
1755 format!("//*[contains(normalize-space(.), {})]", xpath_literal(text))
1756 }
1757
1758 fn xpath_literal(input: &str) -> String {
1759 if !input.contains('"') {
1760 return format!("\"{input}\"");
1761 }
1762 if !input.contains('\'') {
1763 return format!("'{input}'");
1764 }
1765
1766 let segments: Vec<&str> = input.split('"').collect();
1767 let mut parts: Vec<String> = Vec::new();
1768 for (index, part) in segments.iter().enumerate() {
1769 if !part.is_empty() {
1770 parts.push(format!("\"{part}\""));
1771 }
1772 if index + 1 < segments.len() {
1773 parts.push("'\"'".to_string());
1774 }
1775 }
1776
1777 if parts.is_empty() {
1778 "\"\"".to_string()
1779 } else {
1780 format!("concat({})", parts.join(","))
1781 }
1782 }
1783
1784 fn webdriver_key(key: &str) -> String {
1785 match key.trim().to_ascii_lowercase().as_str() {
1786 "enter" => Key::Enter.to_string(),
1787 "return" => Key::Return.to_string(),
1788 "tab" => Key::Tab.to_string(),
1789 "escape" | "esc" => Key::Escape.to_string(),
1790 "backspace" => Key::Backspace.to_string(),
1791 "delete" => Key::Delete.to_string(),
1792 "space" => Key::Space.to_string(),
1793 "arrowup" | "up" => Key::Up.to_string(),
1794 "arrowdown" | "down" => Key::Down.to_string(),
1795 "arrowleft" | "left" => Key::Left.to_string(),
1796 "arrowright" | "right" => Key::Right.to_string(),
1797 "home" => Key::Home.to_string(),
1798 "end" => Key::End.to_string(),
1799 "pageup" => Key::PageUp.to_string(),
1800 "pagedown" => Key::PageDown.to_string(),
1801 other => other.to_string(),
1802 }
1803 }
1804
1805 fn snapshot_script(interactive_only: bool, compact: bool, depth: Option<i64>) -> String {
1806 let depth_literal = depth
1807 .map(|level| level.to_string())
1808 .unwrap_or_else(|| "null".to_string());
1809
1810 format!(
1811 r#"(() => {{
1812 const interactiveOnly = {interactive_only};
1813 const compact = {compact};
1814 const maxDepth = {depth_literal};
1815 const nodes = [];
1816 const root = document.body || document.documentElement;
1817 let counter = 0;
1818
1819 const isVisible = (el) => {{
1820 const style = window.getComputedStyle(el);
1821 if (style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity || 1) === 0) {{
1822 return false;
1823 }}
1824 const rect = el.getBoundingClientRect();
1825 return rect.width > 0 && rect.height > 0;
1826 }};
1827
1828 const isInteractive = (el) => {{
1829 if (el.matches('a,button,input,select,textarea,summary,[role],*[tabindex]')) return true;
1830 return typeof el.onclick === 'function';
1831 }};
1832
1833 const describe = (el, depth) => {{
1834 const interactive = isInteractive(el);
1835 const text = (el.innerText || el.textContent || '').trim().replace(/\s+/g, ' ').slice(0, 140);
1836 if (interactiveOnly && !interactive) return;
1837 if (compact && !interactive && !text) return;
1838
1839 const ref = '@e' + (++counter);
1840 el.setAttribute('data-zc-ref', ref);
1841 nodes.push({{
1842 ref,
1843 depth,
1844 tag: el.tagName.toLowerCase(),
1845 id: el.id || null,
1846 role: el.getAttribute('role'),
1847 text,
1848 interactive,
1849 }});
1850 }};
1851
1852 const walk = (el, depth) => {{
1853 if (!(el instanceof Element)) return;
1854 if (maxDepth !== null && depth > maxDepth) return;
1855 if (isVisible(el)) {{
1856 describe(el, depth);
1857 }}
1858 for (const child of el.children) {{
1859 walk(child, depth + 1);
1860 if (nodes.length >= 400) return;
1861 }}
1862 }};
1863
1864 if (root) walk(root, 0);
1865
1866 return {{
1867 title: document.title,
1868 url: window.location.href,
1869 count: nodes.length,
1870 nodes,
1871 }};
1872}})();"#
1873 )
1874 }
1875}
1876
1877fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<BrowserAction> {
1881 match action_str {
1882 "open" => {
1883 let url = args.get("url").and_then(|v| v.as_str()).ok_or_else(|| {
1884 ::zeroclaw_log::record!(
1885 WARN,
1886 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1887 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1888 "browser: Missing 'url' for open action"
1889 );
1890 anyhow::Error::msg("Missing 'url' for open action")
1891 })?;
1892 Ok(BrowserAction::Open { url: url.into() })
1893 }
1894 "snapshot" => Ok(BrowserAction::Snapshot {
1895 interactive_only: args
1896 .get("interactive_only")
1897 .and_then(serde_json::Value::as_bool)
1898 .unwrap_or(true),
1899 compact: args
1900 .get("compact")
1901 .and_then(serde_json::Value::as_bool)
1902 .unwrap_or(true),
1903 depth: args
1904 .get("depth")
1905 .and_then(serde_json::Value::as_u64)
1906 .map(|d| u32::try_from(d).unwrap_or(u32::MAX)),
1907 }),
1908 "click" => {
1909 let selector = args
1910 .get("selector")
1911 .and_then(|v| v.as_str())
1912 .ok_or_else(|| {
1913 ::zeroclaw_log::record!(
1914 WARN,
1915 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1916 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1917 "browser: Missing 'selector' for click"
1918 );
1919 anyhow::Error::msg("Missing 'selector' for click")
1920 })?;
1921 Ok(BrowserAction::Click {
1922 selector: selector.into(),
1923 })
1924 }
1925 "fill" => {
1926 let selector = args
1927 .get("selector")
1928 .and_then(|v| v.as_str())
1929 .ok_or_else(|| {
1930 ::zeroclaw_log::record!(
1931 WARN,
1932 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1933 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1934 "browser: Missing 'selector' for fill"
1935 );
1936 anyhow::Error::msg("Missing 'selector' for fill")
1937 })?;
1938 let value = args.get("value").and_then(|v| v.as_str()).ok_or_else(|| {
1939 ::zeroclaw_log::record!(
1940 WARN,
1941 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1942 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1943 "browser: Missing 'value' for fill"
1944 );
1945 anyhow::Error::msg("Missing 'value' for fill")
1946 })?;
1947 Ok(BrowserAction::Fill {
1948 selector: selector.into(),
1949 value: value.into(),
1950 })
1951 }
1952 "type" => {
1953 let selector = args
1954 .get("selector")
1955 .and_then(|v| v.as_str())
1956 .ok_or_else(|| {
1957 ::zeroclaw_log::record!(
1958 WARN,
1959 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1960 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1961 "browser: Missing 'selector' for type"
1962 );
1963 anyhow::Error::msg("Missing 'selector' for type")
1964 })?;
1965 let text = args.get("text").and_then(|v| v.as_str()).ok_or_else(|| {
1966 ::zeroclaw_log::record!(
1967 WARN,
1968 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1969 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1970 "browser: Missing 'text' for type"
1971 );
1972 anyhow::Error::msg("Missing 'text' for type")
1973 })?;
1974 Ok(BrowserAction::Type {
1975 selector: selector.into(),
1976 text: text.into(),
1977 })
1978 }
1979 "get_text" => {
1980 let selector = args
1981 .get("selector")
1982 .and_then(|v| v.as_str())
1983 .ok_or_else(|| {
1984 ::zeroclaw_log::record!(
1985 WARN,
1986 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
1987 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
1988 "browser: Missing 'selector' for get_text"
1989 );
1990 anyhow::Error::msg("Missing 'selector' for get_text")
1991 })?;
1992 Ok(BrowserAction::GetText {
1993 selector: selector.into(),
1994 })
1995 }
1996 "get_title" => Ok(BrowserAction::GetTitle),
1997 "get_url" => Ok(BrowserAction::GetUrl),
1998 "screenshot" => Ok(BrowserAction::Screenshot {
1999 path: args.get("path").and_then(|v| v.as_str()).map(String::from),
2000 full_page: args
2001 .get("full_page")
2002 .and_then(serde_json::Value::as_bool)
2003 .unwrap_or(false),
2004 }),
2005 "wait" => Ok(BrowserAction::Wait {
2006 selector: args
2007 .get("selector")
2008 .and_then(|v| v.as_str())
2009 .map(String::from),
2010 ms: args.get("ms").and_then(serde_json::Value::as_u64),
2011 text: args.get("text").and_then(|v| v.as_str()).map(String::from),
2012 }),
2013 "press" => {
2014 let key = args.get("key").and_then(|v| v.as_str()).ok_or_else(|| {
2015 ::zeroclaw_log::record!(
2016 WARN,
2017 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2018 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2019 "browser: Missing 'key' for press"
2020 );
2021 anyhow::Error::msg("Missing 'key' for press")
2022 })?;
2023 Ok(BrowserAction::Press { key: key.into() })
2024 }
2025 "hover" => {
2026 let selector = args
2027 .get("selector")
2028 .and_then(|v| v.as_str())
2029 .ok_or_else(|| {
2030 ::zeroclaw_log::record!(
2031 WARN,
2032 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2033 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2034 "browser: Missing 'selector' for hover"
2035 );
2036 anyhow::Error::msg("Missing 'selector' for hover")
2037 })?;
2038 Ok(BrowserAction::Hover {
2039 selector: selector.into(),
2040 })
2041 }
2042 "scroll" => {
2043 let direction = args
2044 .get("direction")
2045 .and_then(|v| v.as_str())
2046 .ok_or_else(|| {
2047 ::zeroclaw_log::record!(
2048 WARN,
2049 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2050 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2051 "browser: Missing 'direction' for scroll"
2052 );
2053 anyhow::Error::msg("Missing 'direction' for scroll")
2054 })?;
2055 Ok(BrowserAction::Scroll {
2056 direction: direction.into(),
2057 pixels: args
2058 .get("pixels")
2059 .and_then(serde_json::Value::as_u64)
2060 .map(|p| u32::try_from(p).unwrap_or(u32::MAX)),
2061 })
2062 }
2063 "is_visible" => {
2064 let selector = args
2065 .get("selector")
2066 .and_then(|v| v.as_str())
2067 .ok_or_else(|| {
2068 ::zeroclaw_log::record!(
2069 WARN,
2070 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2071 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2072 "browser: Missing 'selector' for is_visible"
2073 );
2074 anyhow::Error::msg("Missing 'selector' for is_visible")
2075 })?;
2076 Ok(BrowserAction::IsVisible {
2077 selector: selector.into(),
2078 })
2079 }
2080 "close" => Ok(BrowserAction::Close),
2081 "find" => {
2082 let by = args.get("by").and_then(|v| v.as_str()).ok_or_else(|| {
2083 ::zeroclaw_log::record!(
2084 WARN,
2085 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2086 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2087 "browser: Missing 'by' for find"
2088 );
2089 anyhow::Error::msg("Missing 'by' for find")
2090 })?;
2091 let value = args.get("value").and_then(|v| v.as_str()).ok_or_else(|| {
2092 ::zeroclaw_log::record!(
2093 WARN,
2094 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2095 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2096 "browser: Missing 'value' for find"
2097 );
2098 anyhow::Error::msg("Missing 'value' for find")
2099 })?;
2100 let action = args
2101 .get("find_action")
2102 .and_then(|v| v.as_str())
2103 .ok_or_else(|| {
2104 ::zeroclaw_log::record!(
2105 WARN,
2106 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
2107 .with_outcome(::zeroclaw_log::EventOutcome::Failure),
2108 "browser: Missing 'find_action' for find"
2109 );
2110 anyhow::Error::msg("Missing 'find_action' for find")
2111 })?;
2112 Ok(BrowserAction::Find {
2113 by: by.into(),
2114 value: value.into(),
2115 action: action.into(),
2116 fill_value: args
2117 .get("fill_value")
2118 .and_then(|v| v.as_str())
2119 .map(String::from),
2120 })
2121 }
2122 other => anyhow::bail!("Unsupported browser action: {other}"),
2123 }
2124}
2125
2126fn is_supported_browser_action(action: &str) -> bool {
2129 matches!(
2130 action,
2131 "open"
2132 | "snapshot"
2133 | "click"
2134 | "fill"
2135 | "type"
2136 | "get_text"
2137 | "get_title"
2138 | "get_url"
2139 | "screenshot"
2140 | "wait"
2141 | "press"
2142 | "hover"
2143 | "scroll"
2144 | "is_visible"
2145 | "close"
2146 | "find"
2147 | "mouse_move"
2148 | "mouse_click"
2149 | "mouse_drag"
2150 | "key_type"
2151 | "key_press"
2152 | "screen_capture"
2153 )
2154}
2155
2156fn is_computer_use_only_action(action: &str) -> bool {
2157 matches!(
2158 action,
2159 "mouse_move" | "mouse_click" | "mouse_drag" | "key_type" | "key_press" | "screen_capture"
2160 )
2161}
2162
2163fn backend_name(backend: ResolvedBackend) -> &'static str {
2164 match backend {
2165 ResolvedBackend::AgentBrowser => "agent_browser",
2166 ResolvedBackend::RustNative => "rust_native",
2167 ResolvedBackend::ComputerUse => "computer_use",
2168 }
2169}
2170
2171fn unavailable_action_for_backend_error(action: &str, backend: ResolvedBackend) -> String {
2172 format!(
2173 "Action '{action}' is unavailable for backend '{}'",
2174 backend_name(backend)
2175 )
2176}
2177
2178#[allow(dead_code)] fn is_recoverable_rust_native_error(err: &anyhow::Error) -> bool {
2180 let message = format!("{err:#}").to_ascii_lowercase();
2181
2182 if message.contains("invalid session id")
2183 || message.contains("no such window")
2184 || message.contains("session not created")
2185 || message.contains("connection reset")
2186 || message.contains("broken pipe")
2187 {
2188 return true;
2189 }
2190
2191 message.contains("webdriver") && (message.contains("timed out") || message.contains("timeout"))
2192}
2193
2194fn normalize_allowed_domains(domains: Vec<String>) -> anyhow::Result<Vec<String>> {
2195 let mut rejected = Vec::new();
2196 let mut normalized = domains
2197 .into_iter()
2198 .filter_map(|d| {
2199 normalize_domain(&d).or_else(|| {
2200 rejected.push(d.clone());
2201 None
2202 })
2203 })
2204 .collect::<Vec<_>>();
2205 if !rejected.is_empty() {
2206 anyhow::bail!(
2207 "Invalid browser.allowed_domains entry(s): [{}]. Each entry must be a valid domain, hostname, IPv4, or IPv6 address.",
2208 rejected.join(", ")
2209 );
2210 }
2211 normalized.sort_unstable();
2212 normalized.dedup();
2213 Ok(normalized)
2214}
2215
2216fn normalize_domain(raw: &str) -> Option<String> {
2217 let input = raw.trim();
2218 if input.is_empty() || input.chars().any(char::is_whitespace) {
2219 return None;
2220 }
2221
2222 let bare_ip = match (input.starts_with('['), input.ends_with(']')) {
2223 (true, true) => &input[1..input.len() - 1],
2224 (false, false) => input,
2225 _ => return None,
2226 };
2227 if let Ok(ip) = bare_ip.parse::<std::net::IpAddr>() {
2228 return Some(ip.to_string().to_lowercase());
2229 }
2230
2231 let parsed = reqwest::Url::parse(input)
2232 .or_else(|_| reqwest::Url::parse(&format!("https://{input}")))
2233 .ok()?;
2234
2235 if !parsed.username().is_empty() || parsed.password().is_some() {
2236 return None;
2237 }
2238
2239 let host = parsed.host_str()?;
2240 let trimmed = host.trim();
2241 let host_no_brackets = match (trimmed.starts_with('['), trimmed.ends_with(']')) {
2242 (true, true) => &trimmed[1..trimmed.len() - 1],
2243 (false, false) => trimmed,
2244 _ => return None,
2245 };
2246 let normalized = host_no_brackets
2247 .trim_start_matches('.')
2248 .trim_end_matches('.');
2249 if normalized.is_empty() {
2250 return None;
2251 }
2252
2253 Some(normalized.to_lowercase())
2254}
2255
2256fn endpoint_reachable(endpoint: &reqwest::Url, timeout: Duration) -> bool {
2257 let host = match endpoint.host_str() {
2258 Some(host) if !host.is_empty() => host,
2259 _ => return false,
2260 };
2261
2262 let port = match endpoint.port_or_known_default() {
2263 Some(port) => port,
2264 None => return false,
2265 };
2266
2267 let mut addrs = match (host, port).to_socket_addrs() {
2268 Ok(addrs) => addrs,
2269 Err(_) => return false,
2270 };
2271
2272 let addr = match addrs.next() {
2273 Some(addr) => addr,
2274 None => return false,
2275 };
2276
2277 std::net::TcpStream::connect_timeout(&addr, timeout).is_ok()
2278}
2279
2280fn extract_host(url_str: &str) -> anyhow::Result<String> {
2281 let url = url_str.trim();
2283 let without_scheme = url
2284 .strip_prefix("https://")
2285 .or_else(|| url.strip_prefix("http://"))
2286 .or_else(|| url.strip_prefix("file://"))
2287 .unwrap_or(url);
2288
2289 let authority = without_scheme.split('/').next().unwrap_or(without_scheme);
2291
2292 let host = if authority.starts_with('[') {
2293 authority.find(']').map_or(authority, |i| &authority[..=i])
2295 } else {
2296 authority.split(':').next().unwrap_or(authority)
2298 };
2299
2300 if host.is_empty() {
2301 anyhow::bail!("Invalid URL: no host");
2302 }
2303
2304 Ok(host.to_lowercase())
2305}
2306
2307fn is_private_host(host: &str) -> bool {
2308 let bare = host
2310 .strip_prefix('[')
2311 .and_then(|h| h.strip_suffix(']'))
2312 .unwrap_or(host);
2313
2314 if bare == "localhost" || bare.ends_with(".localhost") {
2315 return true;
2316 }
2317
2318 if bare
2320 .rsplit('.')
2321 .next()
2322 .is_some_and(|label| label == "local")
2323 {
2324 return true;
2325 }
2326
2327 if let Ok(ip) = bare.parse::<std::net::IpAddr>() {
2329 return match ip {
2330 std::net::IpAddr::V4(v4) => is_non_global_v4(v4),
2331 std::net::IpAddr::V6(v6) => is_non_global_v6(v6),
2332 };
2333 }
2334
2335 false
2336}
2337
2338fn is_non_global_v4(v4: std::net::Ipv4Addr) -> bool {
2340 let [a, b, _, _] = v4.octets();
2341 v4.is_loopback()
2342 || v4.is_private()
2343 || v4.is_link_local()
2344 || v4.is_unspecified()
2345 || v4.is_broadcast()
2346 || v4.is_multicast()
2347 || (a == 100 && (64..=127).contains(&b))
2349 || a >= 240
2351 || (a == 192 && b == 0)
2353 || (a == 198 && b == 51)
2354 || (a == 203 && b == 0)
2355 || (a == 198 && (18..=19).contains(&b))
2357}
2358
2359fn is_non_global_v6(v6: std::net::Ipv6Addr) -> bool {
2361 let segs = v6.segments();
2362 v6.is_loopback()
2363 || v6.is_unspecified()
2364 || v6.is_multicast()
2365 || (segs[0] & 0xfe00) == 0xfc00
2367 || (segs[0] & 0xffc0) == 0xfe80
2369 || v6.to_ipv4_mapped().is_some_and(is_non_global_v4)
2371}
2372
2373fn is_service_environment() -> bool {
2377 if std::env::var_os("INVOCATION_ID").is_some() {
2378 return true;
2379 }
2380 if std::env::var_os("JOURNAL_STREAM").is_some() {
2381 return true;
2382 }
2383 #[cfg(target_os = "linux")]
2384 if std::path::Path::new("/run/openrc").exists() && std::env::var_os("HOME").is_none() {
2385 return true;
2386 }
2387 #[cfg(target_os = "linux")]
2388 if std::env::var_os("HOME").is_none() {
2389 return true;
2390 }
2391 false
2392}
2393
2394fn ensure_browser_env(cmd: &mut Command) {
2397 if std::env::var_os("HOME").is_none() {
2398 cmd.env("HOME", "/tmp");
2399 }
2400 let existing = std::env::var("CHROMIUM_FLAGS").unwrap_or_default();
2401 if !existing.contains("--no-sandbox") {
2402 let new_flags = if existing.is_empty() {
2403 "--no-sandbox --disable-dev-shm-usage".to_string()
2404 } else {
2405 format!("{existing} --no-sandbox --disable-dev-shm-usage")
2406 };
2407 cmd.env("CHROMIUM_FLAGS", new_flags);
2408 }
2409}
2410
2411fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool {
2412 allowed.iter().any(|pattern| {
2413 if pattern == "*" {
2414 return true;
2415 }
2416 if pattern.starts_with("*.") {
2417 let suffix = &pattern[1..]; host.ends_with(suffix) || host == &pattern[2..]
2420 } else {
2421 host == pattern || host.ends_with(&format!(".{pattern}"))
2423 }
2424 })
2425}
2426
2427#[cfg(test)]
2428mod tests {
2429 use super::*;
2430
2431 #[test]
2432 fn normalize_allowed_domains_works() {
2433 let domains = vec![
2434 " Example.COM ".into(),
2435 "docs.example.com".into(),
2436 "example.com".into(),
2437 ];
2438 let normalized = normalize_allowed_domains(domains).unwrap();
2439 assert_eq!(normalized, vec!["docs.example.com", "example.com"]);
2440 }
2441
2442 #[test]
2443 fn normalize_allowed_domains_rejects_invalid() {
2444 let err =
2445 normalize_allowed_domains(vec!["".into(), " ".into(), "user@example.com".into()])
2446 .unwrap_err();
2447 assert!(
2448 err.to_string()
2449 .contains("Invalid browser.allowed_domains entry")
2450 );
2451 }
2452
2453 #[test]
2454 fn normalize_domain_rejects_unmatched_brackets() {
2455 assert!(normalize_domain("[::1").is_none());
2456 assert!(normalize_domain("::1]").is_none());
2457 assert!(normalize_domain("[127.0.0.1").is_none());
2458 assert!(normalize_domain("127.0.0.1]").is_none());
2459 }
2460
2461 #[test]
2462 fn extract_host_works() {
2463 assert_eq!(
2464 extract_host("https://example.com/path").unwrap(),
2465 "example.com"
2466 );
2467 assert_eq!(
2468 extract_host("https://Sub.Example.COM:8080/").unwrap(),
2469 "sub.example.com"
2470 );
2471 }
2472
2473 #[test]
2474 fn extract_host_handles_ipv6() {
2475 assert_eq!(extract_host("https://[::1]/path").unwrap(), "[::1]");
2477 assert_eq!(
2479 extract_host("https://[2001:db8::1]:8080/path").unwrap(),
2480 "[2001:db8::1]"
2481 );
2482 assert_eq!(extract_host("https://[fe80::1]/").unwrap(), "[fe80::1]");
2484 }
2485
2486 #[test]
2487 fn is_private_host_detects_local() {
2488 assert!(is_private_host("localhost"));
2489 assert!(is_private_host("app.localhost"));
2490 assert!(is_private_host("printer.local"));
2491 assert!(is_private_host("127.0.0.1"));
2492 assert!(is_private_host("192.168.1.1"));
2493 assert!(is_private_host("10.0.0.1"));
2494 assert!(!is_private_host("example.com"));
2495 assert!(!is_private_host("google.com"));
2496 }
2497
2498 #[test]
2499 fn is_private_host_blocks_multicast_and_reserved() {
2500 assert!(is_private_host("224.0.0.1")); assert!(is_private_host("255.255.255.255")); assert!(is_private_host("100.64.0.1")); assert!(is_private_host("240.0.0.1")); assert!(is_private_host("192.0.2.1")); assert!(is_private_host("198.51.100.1")); assert!(is_private_host("203.0.113.1")); assert!(is_private_host("198.18.0.1")); }
2509
2510 #[test]
2511 fn is_private_host_catches_ipv6() {
2512 assert!(is_private_host("::1"));
2513 assert!(is_private_host("[::1]"));
2514 assert!(is_private_host("0.0.0.0"));
2515 }
2516
2517 #[test]
2518 fn is_private_host_catches_mapped_ipv4() {
2519 assert!(is_private_host("::ffff:127.0.0.1"));
2521 assert!(is_private_host("::ffff:10.0.0.1"));
2522 assert!(is_private_host("::ffff:192.168.1.1"));
2523 }
2524
2525 #[test]
2526 fn is_private_host_catches_ipv6_private_ranges() {
2527 assert!(is_private_host("fd00::1"));
2529 assert!(is_private_host("fc00::1"));
2530 assert!(is_private_host("fe80::1"));
2532 assert!(!is_private_host("2001:db8::1"));
2534 }
2535
2536 #[test]
2537 fn validate_url_blocks_ipv6_ssrf() {
2538 let security = Arc::new(SecurityPolicy::default());
2539 let tool = BrowserTool::new(security, vec!["*".into()], None).unwrap();
2540 assert!(tool.validate_url("https://[::1]/").is_err());
2541 assert!(tool.validate_url("https://[::ffff:127.0.0.1]/").is_err());
2542 assert!(
2543 tool.validate_url("https://[::ffff:10.0.0.1]:8080/")
2544 .is_err()
2545 );
2546 }
2547
2548 #[test]
2549 fn host_matches_allowlist_exact() {
2550 let allowed = vec!["example.com".into()];
2551 assert!(host_matches_allowlist("example.com", &allowed));
2552 assert!(host_matches_allowlist("sub.example.com", &allowed));
2553 assert!(!host_matches_allowlist("notexample.com", &allowed));
2554 }
2555
2556 #[test]
2557 fn host_matches_allowlist_wildcard() {
2558 let allowed = vec!["*.example.com".into()];
2559 assert!(host_matches_allowlist("sub.example.com", &allowed));
2560 assert!(host_matches_allowlist("example.com", &allowed));
2561 assert!(!host_matches_allowlist("other.com", &allowed));
2562 }
2563
2564 #[test]
2565 fn host_matches_allowlist_star() {
2566 let allowed = vec!["*".into()];
2567 assert!(host_matches_allowlist("anything.com", &allowed));
2568 assert!(host_matches_allowlist("example.org", &allowed));
2569 }
2570
2571 #[test]
2572 fn browser_backend_parser_accepts_supported_values() {
2573 assert_eq!(
2574 BrowserBackendKind::parse("agent_browser").unwrap(),
2575 BrowserBackendKind::AgentBrowser
2576 );
2577 assert_eq!(
2578 BrowserBackendKind::parse("rust-native").unwrap(),
2579 BrowserBackendKind::RustNative
2580 );
2581 assert_eq!(
2582 BrowserBackendKind::parse("computer_use").unwrap(),
2583 BrowserBackendKind::ComputerUse
2584 );
2585 assert_eq!(
2586 BrowserBackendKind::parse("auto").unwrap(),
2587 BrowserBackendKind::Auto
2588 );
2589 }
2590
2591 #[test]
2592 fn browser_backend_parser_rejects_unknown_values() {
2593 assert!(BrowserBackendKind::parse("playwright").is_err());
2594 }
2595
2596 #[test]
2597 fn browser_tool_default_backend_is_agent_browser() {
2598 let security = Arc::new(SecurityPolicy::default());
2599 let tool = BrowserTool::new(security, vec!["example.com".into()], None).unwrap();
2600 assert_eq!(
2601 tool.configured_backend().unwrap(),
2602 BrowserBackendKind::AgentBrowser
2603 );
2604 }
2605
2606 #[test]
2607 fn agent_browser_command_inherits_headed_env_by_default() {
2608 let headed_key = std::ffi::OsStr::new("AGENT_BROWSER_HEADED");
2609 let security = Arc::new(SecurityPolicy::default());
2610 let tool = BrowserTool::new(security, vec!["example.com".into()], None).unwrap();
2611 let cmd = tool.agent_browser_command();
2612
2613 assert_eq!(
2614 cmd.as_std()
2615 .get_envs()
2616 .find(|(key, _)| *key == headed_key)
2617 .map(|(_, value)| value),
2618 None
2619 );
2620 }
2621
2622 #[test]
2623 fn agent_browser_command_clears_headed_env_when_configured_false() {
2624 let headed_key = std::ffi::OsStr::new("AGENT_BROWSER_HEADED");
2625 let security = Arc::new(SecurityPolicy::default());
2626 let tool = BrowserTool::new_with_backend(
2627 security,
2628 vec!["example.com".into()],
2629 None,
2630 "agent_browser".into(),
2631 Some(false),
2632 true,
2633 "http://127.0.0.1:9515".into(),
2634 None,
2635 ComputerUseConfig::default(),
2636 )
2637 .unwrap();
2638 let cmd = tool.agent_browser_command();
2639
2640 assert_eq!(
2641 cmd.as_std()
2642 .get_envs()
2643 .find(|(key, _)| *key == headed_key)
2644 .map(|(_, value)| value),
2645 Some(None)
2646 );
2647 }
2648
2649 #[test]
2650 fn agent_browser_command_sets_headed_env_when_configured() {
2651 let headed_key = std::ffi::OsStr::new("AGENT_BROWSER_HEADED");
2652 let security = Arc::new(SecurityPolicy::default());
2653 let tool = BrowserTool::new_with_backend(
2654 security,
2655 vec!["example.com".into()],
2656 None,
2657 "agent_browser".into(),
2658 Some(true),
2659 true,
2660 "http://127.0.0.1:9515".into(),
2661 None,
2662 ComputerUseConfig::default(),
2663 )
2664 .unwrap();
2665 let cmd = tool.agent_browser_command();
2666
2667 assert_eq!(
2668 cmd.as_std()
2669 .get_envs()
2670 .find(|(key, _)| *key == headed_key)
2671 .and_then(|(_, value)| value)
2672 .and_then(|value| value.to_str()),
2673 Some("1")
2674 );
2675 }
2676
2677 #[test]
2678 fn browser_tool_accepts_auto_backend_config() {
2679 let security = Arc::new(SecurityPolicy::default());
2680 let tool = BrowserTool::new_with_backend(
2681 security,
2682 vec!["example.com".into()],
2683 None,
2684 "auto".into(),
2685 None,
2686 true,
2687 "http://127.0.0.1:9515".into(),
2688 None,
2689 ComputerUseConfig::default(),
2690 )
2691 .unwrap();
2692 assert_eq!(tool.configured_backend().unwrap(), BrowserBackendKind::Auto);
2693 }
2694
2695 #[test]
2696 fn browser_tool_accepts_computer_use_backend_config() {
2697 let security = Arc::new(SecurityPolicy::default());
2698 let tool = BrowserTool::new_with_backend(
2699 security,
2700 vec!["example.com".into()],
2701 None,
2702 "computer_use".into(),
2703 None,
2704 true,
2705 "http://127.0.0.1:9515".into(),
2706 None,
2707 ComputerUseConfig::default(),
2708 )
2709 .unwrap();
2710 assert_eq!(
2711 tool.configured_backend().unwrap(),
2712 BrowserBackendKind::ComputerUse
2713 );
2714 }
2715
2716 #[test]
2717 fn computer_use_endpoint_rejects_public_http_by_default() {
2718 let security = Arc::new(SecurityPolicy::default());
2719 let tool = BrowserTool::new_with_backend(
2720 security,
2721 vec!["example.com".into()],
2722 None,
2723 "computer_use".into(),
2724 None,
2725 true,
2726 "http://127.0.0.1:9515".into(),
2727 None,
2728 ComputerUseConfig {
2729 endpoint: "http://computer-use.example.com/v1/actions".into(),
2730 ..ComputerUseConfig::default()
2731 },
2732 )
2733 .unwrap();
2734
2735 assert!(tool.computer_use_endpoint_url().is_err());
2736 }
2737
2738 #[test]
2739 fn computer_use_endpoint_requires_https_for_public_remote() {
2740 let security = Arc::new(SecurityPolicy::default());
2741 let tool = BrowserTool::new_with_backend(
2742 security,
2743 vec!["example.com".into()],
2744 None,
2745 "computer_use".into(),
2746 None,
2747 true,
2748 "http://127.0.0.1:9515".into(),
2749 None,
2750 ComputerUseConfig {
2751 endpoint: "https://computer-use.example.com/v1/actions".into(),
2752 allow_remote_endpoint: true,
2753 ..ComputerUseConfig::default()
2754 },
2755 )
2756 .unwrap();
2757
2758 assert!(tool.computer_use_endpoint_url().is_ok());
2759 }
2760
2761 #[test]
2762 fn computer_use_coordinate_validation_applies_limits() {
2763 let security = Arc::new(SecurityPolicy::default());
2764 let tool = BrowserTool::new_with_backend(
2765 security,
2766 vec!["example.com".into()],
2767 None,
2768 "computer_use".into(),
2769 None,
2770 true,
2771 "http://127.0.0.1:9515".into(),
2772 None,
2773 ComputerUseConfig {
2774 max_coordinate_x: Some(100),
2775 max_coordinate_y: Some(100),
2776 ..ComputerUseConfig::default()
2777 },
2778 )
2779 .unwrap();
2780
2781 assert!(
2782 tool.validate_coordinate("x", 50, tool.computer_use.max_coordinate_x)
2783 .is_ok()
2784 );
2785 assert!(
2786 tool.validate_coordinate("x", 101, tool.computer_use.max_coordinate_x)
2787 .is_err()
2788 );
2789 assert!(
2790 tool.validate_coordinate("y", -1, tool.computer_use.max_coordinate_y)
2791 .is_err()
2792 );
2793 }
2794
2795 #[test]
2796 fn browser_tool_name() {
2797 let security = Arc::new(SecurityPolicy::default());
2798 let tool = BrowserTool::new(security, vec!["example.com".into()], None).unwrap();
2799 assert_eq!(tool.name(), "browser");
2800 }
2801
2802 #[test]
2803 fn browser_tool_validates_url() {
2804 let security = Arc::new(SecurityPolicy::default());
2805 let tool = BrowserTool::new(security, vec!["example.com".into()], None).unwrap();
2806
2807 assert!(tool.validate_url("https://example.com").is_ok());
2809 assert!(tool.validate_url("https://sub.example.com/path").is_ok());
2810
2811 assert!(tool.validate_url("https://other.com").is_err());
2813
2814 assert!(tool.validate_url("https://localhost").is_err());
2816 assert!(tool.validate_url("https://127.0.0.1").is_err());
2817
2818 assert!(tool.validate_url("ftp://example.com").is_err());
2820
2821 assert!(tool.validate_url("file:///tmp/test.html").is_err());
2823 }
2824
2825 #[test]
2826 fn browser_tool_empty_allowlist_blocks() {
2827 let security = Arc::new(SecurityPolicy::default());
2828 let tool = BrowserTool::new(security, vec![], None).unwrap();
2829 assert!(tool.validate_url("https://example.com").is_err());
2830 }
2831
2832 #[test]
2833 fn computer_use_only_action_detection_is_correct() {
2834 assert!(is_computer_use_only_action("mouse_move"));
2835 assert!(is_computer_use_only_action("mouse_click"));
2836 assert!(is_computer_use_only_action("mouse_drag"));
2837 assert!(is_computer_use_only_action("key_type"));
2838 assert!(is_computer_use_only_action("key_press"));
2839 assert!(is_computer_use_only_action("screen_capture"));
2840 assert!(!is_computer_use_only_action("open"));
2841 assert!(!is_computer_use_only_action("snapshot"));
2842 }
2843
2844 #[test]
2845 fn unavailable_action_error_preserves_backend_context() {
2846 assert_eq!(
2847 unavailable_action_for_backend_error("mouse_move", ResolvedBackend::AgentBrowser),
2848 "Action 'mouse_move' is unavailable for backend 'agent_browser'"
2849 );
2850 assert_eq!(
2851 unavailable_action_for_backend_error("mouse_move", ResolvedBackend::RustNative),
2852 "Action 'mouse_move' is unavailable for backend 'rust_native'"
2853 );
2854 }
2855
2856 #[test]
2857 fn recoverable_error_detection_matches_session_patterns() {
2858 for message in [
2859 "invalid session id",
2860 "No Such Window",
2861 "session not created",
2862 "connection reset by peer",
2863 "broken pipe while writing webdriver command",
2864 "WebDriver request timed out",
2865 ] {
2866 let err = anyhow::Error::msg(message);
2867 assert!(is_recoverable_rust_native_error(&err), "{message}");
2868 }
2869
2870 let allowlist_error =
2871 anyhow::Error::msg("URL host 'localhost' is not in browser allowlist [example.com]");
2872 assert!(!is_recoverable_rust_native_error(&allowlist_error));
2873 }
2874
2875 #[test]
2876 fn non_recoverable_error_detection_rejects_policy_errors() {
2877 for message in [
2878 "Blocked by security policy",
2879 "URL host '127.0.0.1' is private and disallowed",
2880 "Action 'mouse_move' is unavailable for backend 'rust_native'",
2881 ] {
2882 let err = anyhow::Error::msg(message);
2883 assert!(!is_recoverable_rust_native_error(&err), "{message}");
2884 }
2885 }
2886
2887 #[cfg(feature = "browser-native")]
2888 #[test]
2889 fn reset_session_is_idempotent_without_client() {
2890 tokio_test::block_on(async {
2891 let mut state = native_backend::NativeBrowserState::default();
2892 state.reset_session().await;
2893 state.reset_session().await;
2894 });
2895 }
2896
2897 #[test]
2898 fn ensure_browser_env_sets_home_when_missing() {
2899 let original_home = std::env::var_os("HOME");
2900 unsafe { std::env::remove_var("HOME") };
2901
2902 let mut cmd = Command::new("true");
2903 ensure_browser_env(&mut cmd);
2904 if let Some(home) = original_home {
2907 unsafe { std::env::set_var("HOME", home) };
2908 }
2909 }
2910
2911 #[test]
2912 fn ensure_browser_env_sets_chromium_flags() {
2913 let original = std::env::var_os("CHROMIUM_FLAGS");
2914 unsafe { std::env::remove_var("CHROMIUM_FLAGS") };
2915
2916 let mut cmd = Command::new("true");
2917 ensure_browser_env(&mut cmd);
2918
2919 if let Some(val) = original {
2920 unsafe { std::env::set_var("CHROMIUM_FLAGS", val) };
2921 }
2922 }
2923
2924 #[test]
2925 fn is_service_environment_detects_invocation_id() {
2926 let original = std::env::var_os("INVOCATION_ID");
2927 unsafe { std::env::set_var("INVOCATION_ID", "test-unit-id") };
2928
2929 assert!(is_service_environment());
2930
2931 if let Some(val) = original {
2932 unsafe { std::env::set_var("INVOCATION_ID", val) };
2933 } else {
2934 unsafe { std::env::remove_var("INVOCATION_ID") };
2935 }
2936 }
2937
2938 #[test]
2939 fn is_service_environment_detects_journal_stream() {
2940 let original = std::env::var_os("JOURNAL_STREAM");
2941 unsafe { std::env::set_var("JOURNAL_STREAM", "8:12345") };
2942
2943 assert!(is_service_environment());
2944
2945 if let Some(val) = original {
2946 unsafe { std::env::set_var("JOURNAL_STREAM", val) };
2947 } else {
2948 unsafe { std::env::remove_var("JOURNAL_STREAM") };
2949 }
2950 }
2951
2952 #[test]
2953 fn is_service_environment_false_in_normal_context() {
2954 let inv = std::env::var_os("INVOCATION_ID");
2955 let journal = std::env::var_os("JOURNAL_STREAM");
2956 unsafe { std::env::remove_var("INVOCATION_ID") };
2957 unsafe { std::env::remove_var("JOURNAL_STREAM") };
2958
2959 if std::env::var_os("HOME").is_some() {
2960 assert!(!is_service_environment());
2961 }
2962
2963 if let Some(val) = inv {
2964 unsafe { std::env::set_var("INVOCATION_ID", val) };
2965 }
2966 if let Some(val) = journal {
2967 unsafe { std::env::set_var("JOURNAL_STREAM", val) };
2968 }
2969 }
2970
2971 #[test]
2972 fn windows_command_name_selection() {
2973 let cmd = if cfg!(target_os = "windows") {
2976 "agent-browser.cmd"
2977 } else {
2978 "agent-browser"
2979 };
2980
2981 if cfg!(target_os = "windows") {
2982 assert_eq!(cmd, "agent-browser.cmd");
2983 } else {
2984 assert_eq!(cmd, "agent-browser");
2985 }
2986 }
2987}