1use super::web_search_provider_routing::{WebSearchProviderRoute, resolve_web_search_provider};
2use async_trait::async_trait;
3use regex::Regex;
4use serde_json::json;
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7use zeroclaw_api::tool::{Tool, ToolResult};
8
9pub struct WebSearchTool {
18 model_provider: String,
20 boot_brave_api_key: Option<String>,
22 boot_tavily_api_key: Option<String>,
24 searxng_instance_url: Option<String>,
26 max_results: usize,
27 timeout_secs: u64,
28 config_path: PathBuf,
30 secrets_encrypt: bool,
32}
33
34impl WebSearchTool {
35 pub fn new(
36 model_provider: String,
37 brave_api_key: Option<String>,
38 max_results: usize,
39 timeout_secs: u64,
40 ) -> Self {
41 Self {
42 model_provider: model_provider.trim().to_lowercase(),
43 boot_brave_api_key: brave_api_key,
44 boot_tavily_api_key: None,
45 searxng_instance_url: None,
46 max_results: max_results.clamp(1, 10),
47 timeout_secs: timeout_secs.max(1),
48 config_path: PathBuf::new(),
49 secrets_encrypt: false,
50 }
51 }
52
53 #[allow(clippy::too_many_arguments)]
59 pub fn new_with_config(
60 model_provider: String,
61 brave_api_key: Option<String>,
62 tavily_api_key: Option<String>,
63 searxng_instance_url: Option<String>,
64 max_results: usize,
65 timeout_secs: u64,
66 config_path: PathBuf,
67 secrets_encrypt: bool,
68 ) -> Self {
69 Self {
70 model_provider: model_provider.trim().to_lowercase(),
71 boot_brave_api_key: brave_api_key,
72 boot_tavily_api_key: tavily_api_key,
73 searxng_instance_url,
74 max_results: max_results.clamp(1, 10),
75 timeout_secs: timeout_secs.max(1),
76 config_path,
77 secrets_encrypt,
78 }
79 }
80
81 fn resolve_brave_api_key(&self) -> anyhow::Result<String> {
85 if let Some(ref key) = self.boot_brave_api_key
87 && !key.is_empty()
88 && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
89 {
90 return Ok(key.clone());
91 }
92
93 self.reload_brave_api_key()
95 }
96
97 fn reload_brave_api_key(&self) -> anyhow::Result<String> {
99 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
100 ::zeroclaw_log::record!(
101 ERROR,
102 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
103 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
104 .with_attrs(::serde_json::json!({
105 "path": self.config_path.display().to_string(),
106 "search_provider": "brave",
107 "error": format!("{}", e),
108 })),
109 "web_search: failed to read config for Brave API key"
110 );
111 anyhow::Error::msg(format!(
112 "Failed to read config file {} for Brave API key: {e}",
113 self.config_path.display()
114 ))
115 })?;
116
117 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
118 ::zeroclaw_log::record!(
119 ERROR,
120 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
121 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
122 .with_attrs(::serde_json::json!({
123 "path": self.config_path.display().to_string(),
124 "search_provider": "brave",
125 "error": format!("{}", e),
126 })),
127 "web_search: failed to parse config for Brave API key"
128 );
129 anyhow::Error::msg(format!(
130 "Failed to parse config file {} for Brave API key: {e}",
131 self.config_path.display()
132 ))
133 })?;
134
135 let raw_key = config
136 .web_search
137 .brave_api_key
138 .filter(|k| !k.is_empty())
139 .ok_or_else(|| {
140 ::zeroclaw_log::record!(
141 ERROR,
142 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
143 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
144 .with_attrs(::serde_json::json!({"search_provider": "brave"})),
145 "web_search: Brave API key not configured"
146 );
147 anyhow::Error::msg("Brave API key not configured")
148 })?;
149
150 if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
152 let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
153 let store =
154 zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
155 let plaintext = store.decrypt(&raw_key)?;
156 if plaintext.is_empty() {
157 anyhow::bail!("Brave API key not configured (decrypted value is empty)");
158 }
159 Ok(plaintext)
160 } else {
161 Ok(raw_key)
162 }
163 }
164
165 async fn search_duckduckgo(&self, query: &str) -> anyhow::Result<String> {
166 self.search_duckduckgo_at("https://html.duckduckgo.com/html/", query)
167 .await
168 }
169
170 async fn search_duckduckgo_at(
174 &self,
175 endpoint_url: &str,
176 query: &str,
177 ) -> anyhow::Result<String> {
178 let encoded_query = urlencoding::encode(query);
179 let search_url = format!("{}?q={}", endpoint_url, encoded_query);
180
181 let builder = reqwest::Client::builder()
182 .timeout(Duration::from_secs(self.timeout_secs))
183 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
184 let builder =
185 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
186 let client = builder.build()?;
187
188 let response = client.get(&search_url).send().await?;
189 let status = response.status();
190 let final_url_is_block =
191 contains_ascii_case_insensitive(response.url().as_str(), "/wr.do?");
192
193 if !status.is_success() {
194 if let Some(message) = duckduckgo_block_message(status, final_url_is_block, false) {
195 anyhow::bail!(message);
196 }
197 anyhow::bail!("DuckDuckGo search failed with status: {}", status);
198 }
199
200 let html = response.text().await?;
201 let html_contains_block = contains_ascii_case_insensitive(&html, "/wr.do?")
202 || contains_ascii_case_insensitive(&html, "anomaly-modal");
203 if let Some(message) =
204 duckduckgo_block_message(status, final_url_is_block, html_contains_block)
205 {
206 anyhow::bail!(message);
207 }
208 self.parse_duckduckgo_results(&html, query)
209 }
210
211 fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result<String> {
212 let link_regex = Regex::new(
214 r#"<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>"#,
215 )?;
216
217 let snippet_regex = Regex::new(r#"<a class="result__snippet[^"]*"[^>]*>([\s\S]*?)</a>"#)?;
219
220 let link_matches: Vec<_> = link_regex
221 .captures_iter(html)
222 .take(self.max_results + 2)
223 .collect();
224
225 let snippet_matches: Vec<_> = snippet_regex
226 .captures_iter(html)
227 .take(self.max_results + 2)
228 .collect();
229
230 if link_matches.is_empty() {
231 return Ok(format!("No results found for: {}", query));
232 }
233
234 let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)];
235
236 let count = link_matches.len().min(self.max_results);
237
238 for i in 0..count {
239 let caps = &link_matches[i];
240 let url_str = decode_ddg_redirect_url(&caps[1]);
241 let title = strip_tags(&caps[2]);
242
243 lines.push(format!("{}. {}", i + 1, title.trim()));
244 lines.push(format!(" {}", url_str.trim()));
245
246 if i < snippet_matches.len() {
248 let snippet = strip_tags(&snippet_matches[i][1]);
249 let snippet = snippet.trim();
250 if !snippet.is_empty() {
251 lines.push(format!(" {}", snippet));
252 }
253 }
254 }
255
256 Ok(lines.join("\n"))
257 }
258
259 async fn search_brave(&self, query: &str) -> anyhow::Result<String> {
260 let api_key = self.resolve_brave_api_key()?;
261
262 let encoded_query = urlencoding::encode(query);
263 let search_url = format!(
264 "https://api.search.brave.com/res/v1/web/search?q={}&count={}",
265 encoded_query, self.max_results
266 );
267
268 let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
269 let builder =
270 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
271 let client = builder.build()?;
272
273 let response = client
274 .get(&search_url)
275 .header("Accept", "application/json")
276 .header("X-Subscription-Token", &api_key)
277 .send()
278 .await?;
279
280 if !response.status().is_success() {
281 anyhow::bail!("Brave search failed with status: {}", response.status());
282 }
283
284 let json: serde_json::Value = response.json().await?;
285 self.parse_brave_results(&json, query)
286 }
287
288 fn resolve_tavily_api_key(&self) -> anyhow::Result<String> {
291 if let Some(ref key) = self.boot_tavily_api_key
292 && !key.is_empty()
293 && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
294 {
295 return Ok(key.clone());
296 }
297 self.reload_tavily_api_key()
298 }
299
300 fn reload_tavily_api_key(&self) -> anyhow::Result<String> {
302 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
303 ::zeroclaw_log::record!(
304 ERROR,
305 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
306 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
307 .with_attrs(::serde_json::json!({
308 "path": self.config_path.display().to_string(),
309 "search_provider": "tavily",
310 "error": format!("{}", e),
311 })),
312 "web_search: failed to read config for Tavily API key"
313 );
314 anyhow::Error::msg(format!(
315 "Failed to read config file {} for Tavily API key: {e}",
316 self.config_path.display()
317 ))
318 })?;
319
320 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
321 ::zeroclaw_log::record!(
322 ERROR,
323 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
324 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
325 .with_attrs(::serde_json::json!({
326 "path": self.config_path.display().to_string(),
327 "search_provider": "tavily",
328 "error": format!("{}", e),
329 })),
330 "web_search: failed to parse config for Tavily API key"
331 );
332 anyhow::Error::msg(format!(
333 "Failed to parse config file {} for Tavily API key: {e}",
334 self.config_path.display()
335 ))
336 })?;
337
338 let raw_key = config
339 .web_search
340 .tavily_api_key
341 .filter(|k| !k.is_empty())
342 .ok_or_else(|| {
343 ::zeroclaw_log::record!(
344 ERROR,
345 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
346 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
347 .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
348 "web_search: Tavily API key not configured"
349 );
350 anyhow::Error::msg("Tavily API key not configured")
351 })?;
352
353 if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
354 let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
355 let store =
356 zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
357 let plaintext = store.decrypt(&raw_key)?;
358 if plaintext.is_empty() {
359 anyhow::bail!("Tavily API key not configured (decrypted value is empty)");
360 }
361 Ok(plaintext)
362 } else {
363 Ok(raw_key)
364 }
365 }
366
367 async fn search_tavily(&self, query: &str) -> anyhow::Result<String> {
368 let client = self.build_tavily_client()?;
369 self.search_tavily_with_client(&client, "https://api.tavily.com/search", query)
370 .await
371 }
372
373 fn build_tavily_client(&self) -> anyhow::Result<reqwest::Client> {
381 let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
382 let builder =
383 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
384 Ok(builder.build()?)
385 }
386
387 async fn search_tavily_with_client(
392 &self,
393 client: &reqwest::Client,
394 url: &str,
395 query: &str,
396 ) -> anyhow::Result<String> {
397 let api_key = self.resolve_tavily_api_key()?;
398
399 let body = serde_json::json!({
404 "query": query,
405 "max_results": self.max_results,
406 "search_depth": "basic",
407 "include_answer": false,
408 "include_raw_content": false,
409 });
410
411 let response = client
412 .post(url)
413 .bearer_auth(&api_key)
414 .json(&body)
415 .send()
416 .await?;
417
418 if !response.status().is_success() {
419 anyhow::bail!("Tavily search failed with status: {}", response.status());
420 }
421
422 let json: serde_json::Value = response.json().await?;
423 self.parse_tavily_results(&json, query)
424 }
425
426 fn parse_tavily_results(
427 &self,
428 json: &serde_json::Value,
429 query: &str,
430 ) -> anyhow::Result<String> {
431 let results = json
432 .get("results")
433 .and_then(|r| r.as_array())
434 .ok_or_else(|| {
435 ::zeroclaw_log::record!(
436 ERROR,
437 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
438 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
439 .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
440 "web_search: invalid Tavily response"
441 );
442 anyhow::Error::msg("Invalid Tavily API response")
443 })?;
444
445 if results.is_empty() {
446 return Ok(format!("No results found for: {}", query));
447 }
448
449 let mut lines = vec![format!("Search results for: {} (via Tavily)", query)];
450
451 for (i, result) in results.iter().take(self.max_results).enumerate() {
452 let title = result
453 .get("title")
454 .and_then(|t| t.as_str())
455 .unwrap_or("No title");
456 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
457 let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
460
461 lines.push(format!("{}. {}", i + 1, title));
462 lines.push(format!(" {}", url));
463 if !content.is_empty() {
464 lines.push(format!(" {}", content));
465 }
466 }
467
468 Ok(lines.join("\n"))
469 }
470
471 fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
472 let results = json
473 .get("web")
474 .and_then(|w| w.get("results"))
475 .and_then(|r| r.as_array())
476 .ok_or_else(|| {
477 ::zeroclaw_log::record!(
478 ERROR,
479 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
480 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
481 .with_attrs(::serde_json::json!({"search_provider": "brave"})),
482 "web_search: invalid Brave response"
483 );
484 anyhow::Error::msg("Invalid Brave API response")
485 })?;
486
487 if results.is_empty() {
488 return Ok(format!("No results found for: {}", query));
489 }
490
491 let mut lines = vec![format!("Search results for: {} (via Brave)", query)];
492
493 for (i, result) in results.iter().take(self.max_results).enumerate() {
494 let title = result
495 .get("title")
496 .and_then(|t| t.as_str())
497 .unwrap_or("No title");
498 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
499 let description = result
500 .get("description")
501 .and_then(|d| d.as_str())
502 .unwrap_or("");
503
504 lines.push(format!("{}. {}", i + 1, title));
505 lines.push(format!(" {}", url));
506 if !description.is_empty() {
507 lines.push(format!(" {}", description));
508 }
509 }
510
511 Ok(lines.join("\n"))
512 }
513
514 fn resolve_searxng_instance_url(&self) -> anyhow::Result<String> {
517 if let Some(ref url) = self.searxng_instance_url
518 && !url.is_empty()
519 {
520 return Ok(url.clone());
521 }
522
523 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
525 ::zeroclaw_log::record!(
526 ERROR,
527 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
528 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
529 .with_attrs(::serde_json::json!({
530 "path": self.config_path.display().to_string(),
531 "search_provider": "searxng",
532 "error": format!("{}", e),
533 })),
534 "web_search: failed to read config for SearXNG URL"
535 );
536 anyhow::Error::msg(format!(
537 "Failed to read config file {} for SearXNG instance URL: {e}",
538 self.config_path.display()
539 ))
540 })?;
541
542 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
543 ::zeroclaw_log::record!(
544 ERROR,
545 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
546 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
547 .with_attrs(::serde_json::json!({
548 "path": self.config_path.display().to_string(),
549 "search_provider": "searxng",
550 "error": format!("{}", e),
551 })),
552 "web_search: failed to parse config for SearXNG URL"
553 );
554 anyhow::Error::msg(format!(
555 "Failed to parse config file {} for SearXNG instance URL: {e}",
556 self.config_path.display()
557 ))
558 })?;
559
560 config
561 .web_search
562 .searxng_instance_url
563 .filter(|u| !u.is_empty())
564 .ok_or_else(|| {
565 ::zeroclaw_log::record!(
566 ERROR,
567 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
568 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
569 .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
570 "web_search: SearXNG instance URL not configured"
571 );
572 anyhow::Error::msg(
573 "SearXNG instance URL not configured. Set [web_search] searxng_instance_url \
574 in config.toml or the SEARXNG_INSTANCE_URL environment variable.",
575 )
576 })
577 }
578
579 async fn search_searxng(&self, query: &str) -> anyhow::Result<String> {
580 let instance_url = self.resolve_searxng_instance_url()?;
581 let base_url = instance_url.trim_end_matches('/');
582
583 let encoded_query = urlencoding::encode(query);
584 let search_url = format!(
585 "{}/search?q={}&format=json&pageno=1",
586 base_url, encoded_query
587 );
588
589 let builder = reqwest::Client::builder()
590 .timeout(Duration::from_secs(self.timeout_secs))
591 .user_agent("ZeroClaw/1.0");
592 let builder =
593 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
594 let client = builder.build()?;
595
596 let response = client
597 .get(&search_url)
598 .header("Accept", "application/json")
599 .send()
600 .await?;
601
602 if !response.status().is_success() {
603 anyhow::bail!("SearXNG search failed with status: {}", response.status());
604 }
605
606 let json: serde_json::Value = response.json().await?;
607 self.parse_searxng_results(&json, query)
608 }
609
610 fn parse_searxng_results(
611 &self,
612 json: &serde_json::Value,
613 query: &str,
614 ) -> anyhow::Result<String> {
615 let results = json
616 .get("results")
617 .and_then(|r| r.as_array())
618 .ok_or_else(|| {
619 ::zeroclaw_log::record!(
620 ERROR,
621 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
622 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
623 .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
624 "web_search: invalid SearXNG response"
625 );
626 anyhow::Error::msg("Invalid SearXNG API response")
627 })?;
628
629 if results.is_empty() {
630 return Ok(format!("No results found for: {}", query));
631 }
632
633 let mut lines = vec![format!("Search results for: {} (via SearXNG)", query)];
634
635 for (i, result) in results.iter().take(self.max_results).enumerate() {
636 let title = result
637 .get("title")
638 .and_then(|t| t.as_str())
639 .unwrap_or("No title");
640 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
641 let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
642
643 lines.push(format!("{}. {}", i + 1, title));
644 lines.push(format!(" {}", url));
645 if !content.is_empty() {
646 lines.push(format!(" {}", content));
647 }
648 }
649
650 Ok(lines.join("\n"))
651 }
652}
653
654fn decode_ddg_redirect_url(raw_url: &str) -> String {
655 if let Some(index) = raw_url.find("uddg=") {
656 let encoded = &raw_url[index + 5..];
657 let encoded = encoded.split('&').next().unwrap_or(encoded);
658 if let Ok(decoded) = urlencoding::decode(encoded) {
659 return decoded.into_owned();
660 }
661 }
662
663 raw_url.to_string()
664}
665
666const DUCKDUCKGO_BLOCK_MESSAGE: &str = "DuckDuckGo blocked the automated search request. Try configuring SearXNG, Brave, or Tavily as the web search provider.";
667
668fn duckduckgo_block_message(
669 status: reqwest::StatusCode,
670 final_url_is_block: bool,
671 html_contains_block: bool,
672) -> Option<&'static str> {
673 if status == reqwest::StatusCode::FORBIDDEN || final_url_is_block || html_contains_block {
674 Some(DUCKDUCKGO_BLOCK_MESSAGE)
675 } else {
676 None
677 }
678}
679
680fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
681 haystack
682 .as_bytes()
683 .windows(needle.len())
684 .any(|window| window.eq_ignore_ascii_case(needle.as_bytes()))
685}
686
687fn strip_tags(content: &str) -> String {
688 let re = Regex::new(r"<[^>]+>").unwrap();
689 re.replace_all(content, "").to_string()
690}
691
692#[async_trait]
693impl Tool for WebSearchTool {
694 fn name(&self) -> &str {
695 "web_search_tool"
696 }
697
698 fn description(&self) -> &str {
699 "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics."
700 }
701
702 fn parameters_schema(&self) -> serde_json::Value {
703 json!({
704 "type": "object",
705 "properties": {
706 "query": {
707 "type": "string",
708 "description": "The search query. Be specific for better results."
709 }
710 },
711 "required": ["query"]
712 })
713 }
714
715 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
716 let query = args.get("query").and_then(|q| q.as_str()).ok_or_else(|| {
717 ::zeroclaw_log::record!(
718 WARN,
719 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
720 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
721 .with_attrs(::serde_json::json!({"param": "query"})),
722 "web_search: missing query parameter"
723 );
724 anyhow::Error::msg("Missing required parameter: query")
725 })?;
726
727 if query.trim().is_empty() {
728 anyhow::bail!("Search query cannot be empty");
729 }
730
731 ::zeroclaw_log::record!(
732 INFO,
733 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
734 &format!("Searching web for: {}", query)
735 );
736
737 let resolution = resolve_web_search_provider(&self.model_provider);
738 if resolution.used_fallback {
739 ::zeroclaw_log::record!(
740 WARN,
741 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
742 .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
743 &format!(
744 "Unknown web search model_provider '{}'; falling back to '{}'",
745 self.model_provider, resolution.canonical_provider
746 )
747 );
748 }
749
750 let result = match resolution.route {
751 WebSearchProviderRoute::DuckDuckGo => self.search_duckduckgo(query).await?,
752 WebSearchProviderRoute::Brave => self.search_brave(query).await?,
753 WebSearchProviderRoute::Tavily => self.search_tavily(query).await?,
754 WebSearchProviderRoute::SearXNG => self.search_searxng(query).await?,
755 };
756
757 Ok(ToolResult {
758 success: true,
759 output: result,
760 error: None,
761 })
762 }
763}
764
765#[cfg(test)]
766mod tests {
767 use super::*;
768
769 #[test]
770 fn test_tool_name() {
771 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
772 assert_eq!(tool.name(), "web_search_tool");
773 }
774
775 #[test]
776 fn test_tool_description() {
777 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
778 assert!(tool.description().contains("Search the web"));
779 }
780
781 #[test]
782 fn test_parameters_schema() {
783 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
784 let schema = tool.parameters_schema();
785 assert_eq!(schema["type"], "object");
786 assert!(schema["properties"]["query"].is_object());
787 }
788
789 #[test]
790 fn test_strip_tags() {
791 let html = "<b>Hello</b> <i>World</i>";
792 assert_eq!(strip_tags(html), "Hello World");
793 }
794
795 #[test]
796 fn test_parse_duckduckgo_results_empty() {
797 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
798 let result = tool
799 .parse_duckduckgo_results("<html>No results here</html>", "test")
800 .unwrap();
801 assert!(result.contains("No results found"));
802 }
803
804 #[test]
805 fn test_parse_duckduckgo_results_with_data() {
806 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
807 let html = r#"
808 <a class="result__a" href="https://example.com">Example Title</a>
809 <a class="result__snippet">This is a description</a>
810 "#;
811 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
812 assert!(result.contains("Example Title"));
813 assert!(result.contains("https://example.com"));
814 }
815
816 #[test]
817 fn test_parse_duckduckgo_results_decodes_redirect_url() {
818 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
819 let html = r#"
820 <a class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath%3Fa%3D1&rut=test">Example Title</a>
821 <a class="result__snippet">This is a description</a>
822 "#;
823 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
824 assert!(result.contains("https://example.com/path?a=1"));
825 assert!(!result.contains("rut=test"));
826 }
827
828 #[test]
829 fn test_duckduckgo_block_detection_reports_forbidden_status() {
830 let message = duckduckgo_block_message(reqwest::StatusCode::FORBIDDEN, false, false)
831 .expect("403 responses should be classified as a DuckDuckGo block");
832
833 assert!(message.contains("DuckDuckGo blocked"));
834 assert!(message.contains("SearXNG"));
835 }
836
837 #[test]
838 fn test_duckduckgo_block_detection_reports_verification_redirect() {
839 let message = duckduckgo_block_message(reqwest::StatusCode::OK, true, false)
840 .expect("verification redirects should be classified as a DuckDuckGo block");
841
842 assert!(message.contains("DuckDuckGo blocked"));
843 assert!(message.contains("SearXNG"));
844 }
845
846 #[test]
847 fn test_duckduckgo_block_detection_reports_verification_form_in_html() {
848 let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, true)
849 .expect("verification form HTML should be classified as a DuckDuckGo block");
850
851 assert!(message.contains("DuckDuckGo blocked"));
852 assert!(message.contains("SearXNG"));
853 }
854
855 #[test]
856 fn test_duckduckgo_block_detection_ignores_normal_empty_results() {
857 let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, false);
858
859 assert!(message.is_none());
860 }
861
862 #[test]
863 fn test_duckduckgo_block_detection_is_case_insensitive_without_allocating_html() {
864 assert!(contains_ascii_case_insensitive(
865 r#"<form action="/WR.DO?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
866 "/wr.do?"
867 ));
868 }
869
870 #[tokio::test]
871 async fn test_duckduckgo_request_reports_forbidden_status() {
872 use wiremock::matchers::{method, path, query_param};
873 use wiremock::{Mock, MockServer, ResponseTemplate};
874
875 let server = MockServer::start().await;
876 Mock::given(method("GET"))
877 .and(path("/html/"))
878 .and(query_param("q", "test"))
879 .respond_with(ResponseTemplate::new(403))
880 .mount(&server)
881 .await;
882
883 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
884 let err = tool
885 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
886 .await
887 .expect_err("403 should be reported as a DuckDuckGo block");
888
889 assert!(err.to_string().contains("DuckDuckGo blocked"));
890 assert!(err.to_string().contains("SearXNG"));
891 }
892
893 #[tokio::test]
894 async fn test_duckduckgo_request_reports_verification_redirect_url() {
895 use wiremock::matchers::{method, path, query_param};
896 use wiremock::{Mock, MockServer, ResponseTemplate};
897
898 let server = MockServer::start().await;
899 Mock::given(method("GET"))
900 .and(path("/html/"))
901 .and(query_param("q", "test"))
902 .respond_with(
903 ResponseTemplate::new(302)
904 .insert_header("location", format!("{}/wr.do?u=blocked", server.uri())),
905 )
906 .mount(&server)
907 .await;
908 Mock::given(method("GET"))
909 .and(path("/wr.do"))
910 .respond_with(ResponseTemplate::new(200).set_body_string("<html></html>"))
911 .mount(&server)
912 .await;
913
914 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
915 let err = tool
916 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
917 .await
918 .expect_err("verification redirects should be reported as a DuckDuckGo block");
919
920 assert!(err.to_string().contains("DuckDuckGo blocked"));
921 assert!(err.to_string().contains("SearXNG"));
922 }
923
924 #[tokio::test]
925 async fn test_duckduckgo_request_reports_verification_form_html() {
926 use wiremock::matchers::{method, path, query_param};
927 use wiremock::{Mock, MockServer, ResponseTemplate};
928
929 let server = MockServer::start().await;
930 Mock::given(method("GET"))
931 .and(path("/html/"))
932 .and(query_param("q", "test"))
933 .respond_with(ResponseTemplate::new(200).set_body_string(
934 r#"<form action="/wr.do?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
935 ))
936 .mount(&server)
937 .await;
938
939 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
940 let err = tool
941 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
942 .await
943 .expect_err("verification HTML should be reported as a DuckDuckGo block");
944
945 assert!(err.to_string().contains("DuckDuckGo blocked"));
946 assert!(err.to_string().contains("SearXNG"));
947 }
948
949 #[tokio::test]
950 async fn test_duckduckgo_request_reports_anomaly_modal_block() {
951 use wiremock::matchers::{method, path, query_param};
956 use wiremock::{Mock, MockServer, ResponseTemplate};
957
958 let server = MockServer::start().await;
959 Mock::given(method("GET"))
960 .and(path("/html/"))
961 .and(query_param("q", "test"))
962 .respond_with(ResponseTemplate::new(202).set_body_string(
963 r#"<html><body><div class="anomaly-modal__title">Unusual Traffic Detected</div></body></html>"#,
964 ))
965 .mount(&server)
966 .await;
967
968 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
969 let err = tool
970 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
971 .await
972 .expect_err("anomaly-modal page should be reported as a DuckDuckGo block");
973
974 assert!(err.to_string().contains("DuckDuckGo blocked"));
975 assert!(err.to_string().contains("SearXNG"));
976 }
977
978 #[tokio::test]
979 async fn test_duckduckgo_request_preserves_normal_empty_results() {
980 use wiremock::matchers::{method, path, query_param};
981 use wiremock::{Mock, MockServer, ResponseTemplate};
982
983 let server = MockServer::start().await;
984 Mock::given(method("GET"))
985 .and(path("/html/"))
986 .and(query_param("q", "test"))
987 .respond_with(
988 ResponseTemplate::new(200).set_body_string("<html>No results here</html>"),
989 )
990 .mount(&server)
991 .await;
992
993 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
994 let result = tool
995 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
996 .await
997 .expect("normal empty result HTML should still parse");
998
999 assert!(result.contains("No results found"));
1000 }
1001
1002 #[test]
1003 fn test_constructor_clamps_web_search_limits() {
1004 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 0, 0);
1005 let html = r#"
1006 <a class="result__a" href="https://example.com">Example Title</a>
1007 <a class="result__snippet">This is a description</a>
1008 "#;
1009 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
1010 assert!(result.contains("Example Title"));
1011 }
1012
1013 #[tokio::test]
1014 async fn test_execute_missing_query() {
1015 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
1016 let result = tool.execute(json!({})).await;
1017 assert!(result.is_err());
1018 }
1019
1020 #[tokio::test]
1021 async fn test_execute_empty_query() {
1022 let tool = WebSearchTool::new("duckduckgo".to_string(), None, 5, 15);
1023 let result = tool.execute(json!({"query": ""})).await;
1024 assert!(result.is_err());
1025 }
1026
1027 #[tokio::test]
1028 async fn test_execute_brave_without_api_key() {
1029 let tool = WebSearchTool::new("brave".to_string(), None, 5, 15);
1030 let result = tool.execute(json!({"query": "test"})).await;
1031 assert!(result.is_err());
1032 assert!(result.unwrap_err().to_string().contains("API key"));
1033 }
1034
1035 #[test]
1036 fn test_resolve_brave_api_key_uses_boot_key() {
1037 let tool = WebSearchTool::new(
1038 "brave".to_string(),
1039 Some("sk-plaintext-key".to_string()),
1040 5,
1041 15,
1042 );
1043 let key = tool.resolve_brave_api_key().unwrap();
1044 assert_eq!(key, "sk-plaintext-key");
1045 }
1046
1047 #[test]
1048 fn test_resolve_brave_api_key_reloads_from_config() {
1049 let tmp = tempfile::TempDir::new().unwrap();
1050 let config_path = tmp.path().join("config.toml");
1051 std::fs::write(
1052 &config_path,
1053 "[web_search]\nbrave_api_key = \"fresh-key-from-disk\"\n",
1054 )
1055 .unwrap();
1056
1057 let tool = WebSearchTool::new_with_config(
1059 "brave".to_string(),
1060 None,
1061 None,
1062 None,
1063 5,
1064 15,
1065 config_path,
1066 false,
1067 );
1068 let key = tool.resolve_brave_api_key().unwrap();
1069 assert_eq!(key, "fresh-key-from-disk");
1070 }
1071
1072 #[test]
1073 fn test_resolve_brave_api_key_decrypts_encrypted_key() {
1074 let tmp = tempfile::TempDir::new().unwrap();
1075 let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1076 let encrypted = store.encrypt("brave-secret-key").unwrap();
1077
1078 let config_path = tmp.path().join("config.toml");
1079 std::fs::write(
1080 &config_path,
1081 format!("[web_search]\nbrave_api_key = \"{}\"\n", encrypted),
1082 )
1083 .unwrap();
1084
1085 let tool = WebSearchTool::new_with_config(
1087 "brave".to_string(),
1088 Some(encrypted),
1089 None,
1090 None,
1091 5,
1092 15,
1093 config_path,
1094 true,
1095 );
1096 let key = tool.resolve_brave_api_key().unwrap();
1097 assert_eq!(key, "brave-secret-key");
1098 }
1099
1100 #[tokio::test]
1101 async fn test_execute_searxng_without_instance_url() {
1102 let tmp = tempfile::TempDir::new().unwrap();
1103 let config_path = tmp.path().join("config.toml");
1104 std::fs::write(&config_path, "[web_search]\n").unwrap();
1105
1106 let tool = WebSearchTool::new_with_config(
1107 "searxng".to_string(),
1108 None,
1109 None,
1110 None,
1111 5,
1112 15,
1113 config_path,
1114 false,
1115 );
1116 let result = tool.execute(json!({"query": "test"})).await;
1117 assert!(result.is_err());
1118 assert!(
1119 result
1120 .unwrap_err()
1121 .to_string()
1122 .contains("SearXNG instance URL not configured")
1123 );
1124 }
1125
1126 #[test]
1127 fn test_parse_tavily_results_empty() {
1128 let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1129 let json = serde_json::json!({"results": []});
1130 let result = tool.parse_tavily_results(&json, "test").unwrap();
1131 assert!(result.contains("No results found"));
1132 }
1133
1134 #[test]
1135 fn test_parse_tavily_results_with_data() {
1136 let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1137 let json = serde_json::json!({
1138 "query": "test",
1139 "results": [
1140 {
1141 "title": "Tavily Example",
1142 "url": "https://example.com",
1143 "content": "Pre-cleaned summary content from Tavily",
1144 "score": 0.91
1145 },
1146 {
1147 "title": "Another Result",
1148 "url": "https://example.org",
1149 "content": "Second result body"
1150 }
1151 ]
1152 });
1153 let result = tool.parse_tavily_results(&json, "test").unwrap();
1154 assert!(result.contains("Tavily Example"));
1155 assert!(result.contains("https://example.com"));
1156 assert!(result.contains("Pre-cleaned summary content from Tavily"));
1157 assert!(result.contains("via Tavily"));
1158 }
1159
1160 #[test]
1161 fn test_parse_tavily_results_invalid_response() {
1162 let tool = WebSearchTool::new("tavily".to_string(), None, 5, 15);
1163 let json = serde_json::json!({"error": "bad api key"});
1164 let result = tool.parse_tavily_results(&json, "test");
1165 assert!(result.is_err());
1166 assert!(
1167 result
1168 .unwrap_err()
1169 .to_string()
1170 .contains("Invalid Tavily API response")
1171 );
1172 }
1173
1174 #[tokio::test]
1175 async fn test_execute_tavily_without_api_key() {
1176 let tmp = tempfile::tempdir().unwrap();
1179 let config_path = tmp.path().join("config.toml");
1180 std::fs::write(&config_path, "[web_search]\n").unwrap();
1181
1182 let tool = WebSearchTool::new_with_config(
1183 "tavily".to_string(),
1184 None,
1185 None,
1186 None,
1187 5,
1188 15,
1189 config_path,
1190 false,
1191 );
1192 let result = tool.execute(json!({"query": "test"})).await;
1193 assert!(result.is_err());
1194 assert!(
1195 result
1196 .unwrap_err()
1197 .to_string()
1198 .contains("Tavily API key not configured")
1199 );
1200 }
1201
1202 #[test]
1203 fn test_resolve_tavily_api_key_uses_boot_key() {
1204 let tool = WebSearchTool::new_with_config(
1205 "tavily".to_string(),
1206 None,
1207 Some("tvly-boot-key".to_string()),
1208 None,
1209 5,
1210 15,
1211 PathBuf::new(),
1212 false,
1213 );
1214 let key = tool.resolve_tavily_api_key().unwrap();
1215 assert_eq!(key, "tvly-boot-key");
1216 }
1217
1218 #[test]
1219 fn test_resolve_tavily_api_key_reloads_from_config() {
1220 let tmp = tempfile::tempdir().unwrap();
1221 let config_path = tmp.path().join("config.toml");
1222 std::fs::write(
1223 &config_path,
1224 "[web_search]\ntavily_api_key = \"tvly-fresh-from-disk\"\n",
1225 )
1226 .unwrap();
1227
1228 let tool = WebSearchTool::new_with_config(
1230 "tavily".to_string(),
1231 None,
1232 None,
1233 None,
1234 5,
1235 15,
1236 config_path,
1237 false,
1238 );
1239 let key = tool.resolve_tavily_api_key().unwrap();
1240 assert_eq!(key, "tvly-fresh-from-disk");
1241 }
1242
1243 #[test]
1244 fn test_resolve_tavily_api_key_decrypts_encrypted_key() {
1245 let tmp = tempfile::TempDir::new().unwrap();
1246 let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1247 let encrypted = store.encrypt("tvly-secret-key").unwrap();
1248
1249 let config_path = tmp.path().join("config.toml");
1250 std::fs::write(
1251 &config_path,
1252 format!("[web_search]\ntavily_api_key = \"{}\"\n", encrypted),
1253 )
1254 .unwrap();
1255
1256 let tool = WebSearchTool::new_with_config(
1258 "tavily".to_string(),
1259 None,
1260 Some(encrypted),
1261 None,
1262 5,
1263 15,
1264 config_path,
1265 true,
1266 );
1267 let key = tool.resolve_tavily_api_key().unwrap();
1268 assert_eq!(key, "tvly-secret-key");
1269 }
1270
1271 #[tokio::test]
1278 async fn test_tavily_request_uses_bearer_auth_header_not_body_field() {
1279 use wiremock::matchers::{header, method, path};
1280 use wiremock::{Mock, MockServer, ResponseTemplate};
1281
1282 let server = MockServer::start().await;
1283
1284 Mock::given(method("POST"))
1285 .and(path("/search"))
1286 .and(header("authorization", "Bearer tvly-test-key"))
1287 .and(header("content-type", "application/json"))
1288 .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
1289 "query": "what is rust",
1290 "results": []
1291 })))
1292 .mount(&server)
1293 .await;
1294
1295 let tool = WebSearchTool::new_with_config(
1296 "tavily".to_string(),
1297 None,
1298 Some("tvly-test-key".to_string()),
1299 None,
1300 5,
1301 15,
1302 PathBuf::new(),
1303 false,
1304 );
1305
1306 let client = reqwest::Client::builder()
1310 .timeout(Duration::from_secs(15))
1311 .build()
1312 .expect("client builder should succeed without a proxy");
1313 let result = tool
1314 .search_tavily_with_client(&client, &format!("{}/search", server.uri()), "what is rust")
1315 .await
1316 .expect("request should succeed against the mock");
1317 assert!(
1318 result.contains("No results found"),
1319 "parser should report empty results: {result}"
1320 );
1321
1322 let recorded = server
1323 .received_requests()
1324 .await
1325 .expect("wiremock should have captured the request");
1326 assert_eq!(recorded.len(), 1, "expected exactly one POST /search");
1327
1328 let body: serde_json::Value =
1329 serde_json::from_slice(&recorded[0].body).expect("body should be JSON");
1330
1331 assert!(
1333 body.get("api_key").is_none(),
1334 "api_key must not appear in the request body; got: {body}"
1335 );
1336
1337 assert_eq!(body["query"], "what is rust");
1340 assert_eq!(body["search_depth"], "basic");
1341 assert_eq!(body["max_results"], 5);
1342 assert_eq!(body["include_answer"], false);
1343 assert_eq!(body["include_raw_content"], false);
1344 }
1345
1346 #[test]
1347 fn test_parse_searxng_results_empty() {
1348 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1349 let json = serde_json::json!({"results": []});
1350 let result = tool.parse_searxng_results(&json, "test").unwrap();
1351 assert!(result.contains("No results found"));
1352 }
1353
1354 #[test]
1355 fn test_parse_searxng_results_with_data() {
1356 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1357 let json = serde_json::json!({
1358 "results": [
1359 {
1360 "title": "SearXNG Example",
1361 "url": "https://example.com",
1362 "content": "A privacy-respecting metasearch engine"
1363 },
1364 {
1365 "title": "Another Result",
1366 "url": "https://example.org",
1367 "content": "More information here"
1368 }
1369 ]
1370 });
1371 let result = tool.parse_searxng_results(&json, "test").unwrap();
1372 assert!(result.contains("SearXNG Example"));
1373 assert!(result.contains("https://example.com"));
1374 assert!(result.contains("A privacy-respecting metasearch engine"));
1375 assert!(result.contains("via SearXNG"));
1376 }
1377
1378 #[test]
1379 fn test_parse_searxng_results_invalid_response() {
1380 let tool = WebSearchTool::new("searxng".to_string(), None, 5, 15);
1381 let json = serde_json::json!({"error": "bad request"});
1382 let result = tool.parse_searxng_results(&json, "test");
1383 assert!(result.is_err());
1384 assert!(
1385 result
1386 .unwrap_err()
1387 .to_string()
1388 .contains("Invalid SearXNG API response")
1389 );
1390 }
1391
1392 #[test]
1393 fn test_resolve_searxng_instance_url_from_boot() {
1394 let tool = WebSearchTool {
1395 model_provider: "searxng".into(),
1396 boot_brave_api_key: None,
1397 boot_tavily_api_key: None,
1398 searxng_instance_url: Some("https://searx.example.com".to_string()),
1399 max_results: 5,
1400 timeout_secs: 15,
1401 config_path: PathBuf::new(),
1402 secrets_encrypt: false,
1403 };
1404 let url = tool.resolve_searxng_instance_url().unwrap();
1405 assert_eq!(url, "https://searx.example.com");
1406 }
1407
1408 #[test]
1409 fn test_resolve_searxng_instance_url_reloads_from_config() {
1410 let tmp = tempfile::TempDir::new().unwrap();
1411 let config_path = tmp.path().join("config.toml");
1412 std::fs::write(
1413 &config_path,
1414 "[web_search]\nsearxng_instance_url = \"https://search.local\"\n",
1415 )
1416 .unwrap();
1417
1418 let tool = WebSearchTool::new_with_config(
1419 "searxng".to_string(),
1420 None,
1421 None,
1422 None,
1423 5,
1424 15,
1425 config_path,
1426 false,
1427 );
1428 let url = tool.resolve_searxng_instance_url().unwrap();
1429 assert_eq!(url, "https://search.local");
1430 }
1431
1432 #[test]
1433 fn test_resolve_brave_api_key_picks_up_runtime_update() {
1434 let tmp = tempfile::TempDir::new().unwrap();
1435 let config_path = tmp.path().join("config.toml");
1436
1437 std::fs::write(&config_path, "[web_search]\n").unwrap();
1439
1440 let tool = WebSearchTool::new_with_config(
1441 "brave".to_string(),
1442 None,
1443 None,
1444 None,
1445 5,
1446 15,
1447 config_path.clone(),
1448 false,
1449 );
1450
1451 assert!(tool.resolve_brave_api_key().is_err());
1453
1454 std::fs::write(
1456 &config_path,
1457 "[web_search]\nbrave_api_key = \"runtime-updated-key\"\n",
1458 )
1459 .unwrap();
1460
1461 let key = tool.resolve_brave_api_key().unwrap();
1463 assert_eq!(key, "runtime-updated-key");
1464 }
1465}