1use super::web_search_provider_routing::{WebSearchProviderRoute, resolve_web_search_provider};
2use async_trait::async_trait;
3use regex::Regex;
4use serde_json::json;
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7use zeroclaw_api::tool::{Tool, ToolResult};
8
9pub struct WebSearchTool {
19 model_provider: String,
21 boot_brave_api_key: Option<String>,
23 boot_tavily_api_key: Option<String>,
25 boot_jina_api_key: Option<String>,
27 searxng_instance_url: Option<String>,
29 max_results: usize,
30 timeout_secs: u64,
31 config_path: PathBuf,
33 secrets_encrypt: bool,
35}
36
37impl WebSearchTool {
38 pub fn new(
39 model_provider: String,
40 brave_api_key: Option<String>,
41 jina_api_key: Option<String>,
42 max_results: usize,
43 timeout_secs: u64,
44 ) -> Self {
45 Self {
46 model_provider: model_provider.trim().to_lowercase(),
47 boot_brave_api_key: brave_api_key,
48 boot_tavily_api_key: None,
49 boot_jina_api_key: jina_api_key,
50 searxng_instance_url: None,
51 max_results: max_results.clamp(1, 10),
52 timeout_secs: timeout_secs.max(1),
53 config_path: PathBuf::new(),
54 secrets_encrypt: false,
55 }
56 }
57
58 #[allow(clippy::too_many_arguments)]
64 pub fn new_with_config(
65 model_provider: String,
66 brave_api_key: Option<String>,
67 tavily_api_key: Option<String>,
68 jina_api_key: Option<String>,
69 searxng_instance_url: Option<String>,
70 max_results: usize,
71 timeout_secs: u64,
72 config_path: PathBuf,
73 secrets_encrypt: bool,
74 ) -> Self {
75 Self {
76 model_provider: model_provider.trim().to_lowercase(),
77 boot_brave_api_key: brave_api_key,
78 boot_tavily_api_key: tavily_api_key,
79 boot_jina_api_key: jina_api_key,
80 searxng_instance_url,
81 max_results: max_results.clamp(1, 10),
82 timeout_secs: timeout_secs.max(1),
83 config_path,
84 secrets_encrypt,
85 }
86 }
87
88 fn resolve_brave_api_key(&self) -> anyhow::Result<String> {
92 if let Some(ref key) = self.boot_brave_api_key
94 && !key.is_empty()
95 && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
96 {
97 return Ok(key.clone());
98 }
99
100 self.reload_brave_api_key()
102 }
103
104 fn reload_brave_api_key(&self) -> anyhow::Result<String> {
106 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
107 ::zeroclaw_log::record!(
108 ERROR,
109 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
110 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
111 .with_attrs(::serde_json::json!({
112 "path": self.config_path.display().to_string(),
113 "search_provider": "brave",
114 "error": format!("{}", e),
115 })),
116 "web_search: failed to read config for Brave API key"
117 );
118 anyhow::Error::msg(format!(
119 "Failed to read config file {} for Brave API key: {e}",
120 self.config_path.display()
121 ))
122 })?;
123
124 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
125 ::zeroclaw_log::record!(
126 ERROR,
127 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
128 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
129 .with_attrs(::serde_json::json!({
130 "path": self.config_path.display().to_string(),
131 "search_provider": "brave",
132 "error": format!("{}", e),
133 })),
134 "web_search: failed to parse config for Brave API key"
135 );
136 anyhow::Error::msg(format!(
137 "Failed to parse config file {} for Brave API key: {e}",
138 self.config_path.display()
139 ))
140 })?;
141
142 let raw_key = config
143 .web_search
144 .brave_api_key
145 .filter(|k| !k.is_empty())
146 .ok_or_else(|| {
147 ::zeroclaw_log::record!(
148 ERROR,
149 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
150 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
151 .with_attrs(::serde_json::json!({"search_provider": "brave"})),
152 "web_search: Brave API key not configured"
153 );
154 anyhow::Error::msg("Brave API key not configured")
155 })?;
156
157 if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
159 let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
160 let store =
161 zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
162 let plaintext = store.decrypt(&raw_key)?;
163 if plaintext.is_empty() {
164 anyhow::bail!("Brave API key not configured (decrypted value is empty)");
165 }
166 Ok(plaintext)
167 } else {
168 Ok(raw_key)
169 }
170 }
171
172 async fn search_duckduckgo(&self, query: &str) -> anyhow::Result<String> {
173 self.search_duckduckgo_at("https://html.duckduckgo.com/html/", query)
174 .await
175 }
176
177 async fn search_duckduckgo_at(
181 &self,
182 endpoint_url: &str,
183 query: &str,
184 ) -> anyhow::Result<String> {
185 let encoded_query = urlencoding::encode(query);
186 let search_url = format!("{}?q={}", endpoint_url, encoded_query);
187
188 let builder = reqwest::Client::builder()
189 .timeout(Duration::from_secs(self.timeout_secs))
190 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
191 let builder =
192 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
193 let client = builder.build()?;
194
195 let response = client.get(&search_url).send().await?;
196 let status = response.status();
197 let final_url_is_block =
198 contains_ascii_case_insensitive(response.url().as_str(), "/wr.do?");
199
200 if !status.is_success() {
201 if let Some(message) = duckduckgo_block_message(status, final_url_is_block, false) {
202 anyhow::bail!(message);
203 }
204 anyhow::bail!("DuckDuckGo search failed with status: {}", status);
205 }
206
207 let html = response.text().await?;
208 let html_contains_block = contains_ascii_case_insensitive(&html, "/wr.do?")
209 || contains_ascii_case_insensitive(&html, "anomaly-modal");
210 if let Some(message) =
211 duckduckgo_block_message(status, final_url_is_block, html_contains_block)
212 {
213 anyhow::bail!(message);
214 }
215 self.parse_duckduckgo_results(&html, query)
216 }
217
218 fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result<String> {
219 let link_regex = Regex::new(
221 r#"<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>"#,
222 )?;
223
224 let snippet_regex = Regex::new(r#"<a class="result__snippet[^"]*"[^>]*>([\s\S]*?)</a>"#)?;
226
227 let link_matches: Vec<_> = link_regex
228 .captures_iter(html)
229 .take(self.max_results + 2)
230 .collect();
231
232 let snippet_matches: Vec<_> = snippet_regex
233 .captures_iter(html)
234 .take(self.max_results + 2)
235 .collect();
236
237 if link_matches.is_empty() {
238 return Ok(format!("No results found for: {}", query));
239 }
240
241 let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)];
242
243 let count = link_matches.len().min(self.max_results);
244
245 for i in 0..count {
246 let caps = &link_matches[i];
247 let url_str = decode_ddg_redirect_url(&caps[1]);
248 let title = strip_tags(&caps[2]);
249
250 lines.push(format!("{}. {}", i + 1, title.trim()));
251 lines.push(format!(" {}", url_str.trim()));
252
253 if i < snippet_matches.len() {
255 let snippet = strip_tags(&snippet_matches[i][1]);
256 let snippet = snippet.trim();
257 if !snippet.is_empty() {
258 lines.push(format!(" {}", snippet));
259 }
260 }
261 }
262
263 Ok(lines.join("\n"))
264 }
265
266 async fn search_brave(&self, query: &str) -> anyhow::Result<String> {
267 let api_key = self.resolve_brave_api_key()?;
268
269 let encoded_query = urlencoding::encode(query);
270 let search_url = format!(
271 "https://api.search.brave.com/res/v1/web/search?q={}&count={}",
272 encoded_query, self.max_results
273 );
274
275 let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
276 let builder =
277 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
278 let client = builder.build()?;
279
280 let response = client
281 .get(&search_url)
282 .header("Accept", "application/json")
283 .header("X-Subscription-Token", &api_key)
284 .send()
285 .await?;
286
287 if !response.status().is_success() {
288 anyhow::bail!("Brave search failed with status: {}", response.status());
289 }
290
291 let json: serde_json::Value = response.json().await?;
292 self.parse_brave_results(&json, query)
293 }
294
295 fn resolve_tavily_api_key(&self) -> anyhow::Result<String> {
298 if let Some(ref key) = self.boot_tavily_api_key
299 && !key.is_empty()
300 && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
301 {
302 return Ok(key.clone());
303 }
304 self.reload_tavily_api_key()
305 }
306
307 fn reload_tavily_api_key(&self) -> anyhow::Result<String> {
309 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
310 ::zeroclaw_log::record!(
311 ERROR,
312 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
313 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
314 .with_attrs(::serde_json::json!({
315 "path": self.config_path.display().to_string(),
316 "search_provider": "tavily",
317 "error": format!("{}", e),
318 })),
319 "web_search: failed to read config for Tavily API key"
320 );
321 anyhow::Error::msg(format!(
322 "Failed to read config file {} for Tavily API key: {e}",
323 self.config_path.display()
324 ))
325 })?;
326
327 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
328 ::zeroclaw_log::record!(
329 ERROR,
330 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
331 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
332 .with_attrs(::serde_json::json!({
333 "path": self.config_path.display().to_string(),
334 "search_provider": "tavily",
335 "error": format!("{}", e),
336 })),
337 "web_search: failed to parse config for Tavily API key"
338 );
339 anyhow::Error::msg(format!(
340 "Failed to parse config file {} for Tavily API key: {e}",
341 self.config_path.display()
342 ))
343 })?;
344
345 let raw_key = config
346 .web_search
347 .tavily_api_key
348 .filter(|k| !k.is_empty())
349 .ok_or_else(|| {
350 ::zeroclaw_log::record!(
351 ERROR,
352 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
353 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
354 .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
355 "web_search: Tavily API key not configured"
356 );
357 anyhow::Error::msg("Tavily API key not configured")
358 })?;
359
360 if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
361 let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
362 let store =
363 zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
364 let plaintext = store.decrypt(&raw_key)?;
365 if plaintext.is_empty() {
366 anyhow::bail!("Tavily API key not configured (decrypted value is empty)");
367 }
368 Ok(plaintext)
369 } else {
370 Ok(raw_key)
371 }
372 }
373
374 async fn search_tavily(&self, query: &str) -> anyhow::Result<String> {
375 let client = self.build_tavily_client()?;
376 self.search_tavily_with_client(&client, "https://api.tavily.com/search", query)
377 .await
378 }
379
380 fn build_tavily_client(&self) -> anyhow::Result<reqwest::Client> {
388 let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
389 let builder =
390 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
391 Ok(builder.build()?)
392 }
393
394 async fn search_tavily_with_client(
399 &self,
400 client: &reqwest::Client,
401 url: &str,
402 query: &str,
403 ) -> anyhow::Result<String> {
404 let api_key = self.resolve_tavily_api_key()?;
405
406 let body = serde_json::json!({
411 "query": query,
412 "max_results": self.max_results,
413 "search_depth": "basic",
414 "include_answer": false,
415 "include_raw_content": false,
416 });
417
418 let response = client
419 .post(url)
420 .bearer_auth(&api_key)
421 .json(&body)
422 .send()
423 .await?;
424
425 if !response.status().is_success() {
426 anyhow::bail!("Tavily search failed with status: {}", response.status());
427 }
428
429 let json: serde_json::Value = response.json().await?;
430 self.parse_tavily_results(&json, query)
431 }
432
433 fn parse_tavily_results(
434 &self,
435 json: &serde_json::Value,
436 query: &str,
437 ) -> anyhow::Result<String> {
438 let results = json
439 .get("results")
440 .and_then(|r| r.as_array())
441 .ok_or_else(|| {
442 ::zeroclaw_log::record!(
443 ERROR,
444 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
445 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
446 .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
447 "web_search: invalid Tavily response"
448 );
449 anyhow::Error::msg("Invalid Tavily API response")
450 })?;
451
452 if results.is_empty() {
453 return Ok(format!("No results found for: {}", query));
454 }
455
456 let mut lines = vec![format!("Search results for: {} (via Tavily)", query)];
457
458 for (i, result) in results.iter().take(self.max_results).enumerate() {
459 let title = result
460 .get("title")
461 .and_then(|t| t.as_str())
462 .unwrap_or("No title");
463 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
464 let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
467
468 lines.push(format!("{}. {}", i + 1, title));
469 lines.push(format!(" {}", url));
470 if !content.is_empty() {
471 lines.push(format!(" {}", content));
472 }
473 }
474
475 Ok(lines.join("\n"))
476 }
477
478 fn resolve_jina_api_key(&self) -> anyhow::Result<String> {
481 if let Some(ref key) = self.boot_jina_api_key
482 && !key.is_empty()
483 && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
484 {
485 return Ok(key.clone());
486 }
487 self.reload_jina_api_key()
488 }
489
490 fn reload_jina_api_key(&self) -> anyhow::Result<String> {
492 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
493 ::zeroclaw_log::record!(
494 ERROR,
495 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
496 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
497 .with_attrs(::serde_json::json!({
498 "path": self.config_path.display().to_string(),
499 "search_provider": "jina",
500 "error": format!("{}", e),
501 })),
502 "web_search: failed to read config for Jina AI API key"
503 );
504 anyhow::Error::msg(format!(
505 "Failed to read config file {} for Jina AI API key: {e}",
506 self.config_path.display()
507 ))
508 })?;
509
510 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
511 ::zeroclaw_log::record!(
512 ERROR,
513 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
514 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
515 .with_attrs(::serde_json::json!({
516 "path": self.config_path.display().to_string(),
517 "search_provider": "jina",
518 "error": format!("{}", e),
519 })),
520 "web_search: failed to parse config for Jina AI API key"
521 );
522 anyhow::Error::msg(format!(
523 "Failed to parse config file {} for Jina AI API key: {e}",
524 self.config_path.display()
525 ))
526 })?;
527
528 let raw_key = config
529 .web_search
530 .jina_api_key
531 .filter(|k| !k.is_empty())
532 .ok_or_else(|| {
533 ::zeroclaw_log::record!(
534 ERROR,
535 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
536 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
537 .with_attrs(::serde_json::json!({"search_provider": "jina"})),
538 "web_search: Jina AI API key not configured"
539 );
540 anyhow::Error::msg("Jina AI API key not configured")
541 })?;
542
543 if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
544 let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
545 let store =
546 zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
547 let plaintext = store.decrypt(&raw_key)?;
548 if plaintext.is_empty() {
549 anyhow::bail!("Jina AI API key not configured (decrypted value is empty)");
550 }
551 Ok(plaintext)
552 } else {
553 Ok(raw_key)
554 }
555 }
556
557 async fn search_jina(&self, query: &str) -> anyhow::Result<String> {
558 let api_key = self.resolve_jina_api_key()?;
559
560 let builder = reqwest::Client::builder()
561 .timeout(Duration::from_secs(self.timeout_secs))
562 .user_agent("ZeroClaw/1.0 (https://zeroclaw.ai)");
563 let builder =
564 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
565 let client = builder.build()?;
566
567 let body = serde_json::json!({"q": query});
569
570 let response = client
571 .post("https://s.jina.ai/")
572 .header("Authorization", format!("Bearer {}", api_key))
573 .header("Content-Type", "application/json")
574 .header("Accept", "application/json")
575 .json(&body)
576 .send()
577 .await?;
578
579 if !response.status().is_success() {
580 anyhow::bail!("Jina AI search failed with status: {}", response.status());
581 }
582
583 let json: serde_json::Value = response.json().await?;
584 self.parse_jina_results(&json, query)
585 }
586
587 fn parse_jina_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
588 let results = json.get("data").and_then(|r| r.as_array()).ok_or_else(|| {
590 ::zeroclaw_log::record!(
591 ERROR,
592 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
593 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
594 .with_attrs(::serde_json::json!({"search_provider": "jina"})),
595 "web_search: invalid Jina AI response"
596 );
597 anyhow::Error::msg("Invalid Jina AI API response")
598 })?;
599
600 if results.is_empty() {
601 return Ok(format!("No results found for: {}", query));
602 }
603
604 let mut lines = vec![format!("Search results for: {} (via Jina AI)", query)];
605
606 for (i, result) in results.iter().take(self.max_results).enumerate() {
607 let title = result
608 .get("title")
609 .and_then(|t| t.as_str())
610 .unwrap_or("No title");
611 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
612 let snippet = result
615 .get("content")
616 .and_then(|c| c.as_str())
617 .or_else(|| result.get("description").and_then(|d| d.as_str()))
618 .unwrap_or("");
619
620 lines.push(format!("{}. {}", i + 1, title));
621 lines.push(format!(" {}", url));
622 if !snippet.is_empty() {
623 lines.push(format!(" {}", snippet));
624 }
625 }
626
627 Ok(lines.join("\n"))
628 }
629
630 fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
631 let results = json
632 .get("web")
633 .and_then(|w| w.get("results"))
634 .and_then(|r| r.as_array())
635 .ok_or_else(|| {
636 ::zeroclaw_log::record!(
637 ERROR,
638 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
639 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
640 .with_attrs(::serde_json::json!({"search_provider": "brave"})),
641 "web_search: invalid Brave response"
642 );
643 anyhow::Error::msg("Invalid Brave API response")
644 })?;
645
646 if results.is_empty() {
647 return Ok(format!("No results found for: {}", query));
648 }
649
650 let mut lines = vec![format!("Search results for: {} (via Brave)", query)];
651
652 for (i, result) in results.iter().take(self.max_results).enumerate() {
653 let title = result
654 .get("title")
655 .and_then(|t| t.as_str())
656 .unwrap_or("No title");
657 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
658 let description = result
659 .get("description")
660 .and_then(|d| d.as_str())
661 .unwrap_or("");
662
663 lines.push(format!("{}. {}", i + 1, title));
664 lines.push(format!(" {}", url));
665 if !description.is_empty() {
666 lines.push(format!(" {}", description));
667 }
668 }
669
670 Ok(lines.join("\n"))
671 }
672
673 fn resolve_searxng_instance_url(&self) -> anyhow::Result<String> {
676 if let Some(ref url) = self.searxng_instance_url
677 && !url.is_empty()
678 {
679 return Ok(url.clone());
680 }
681
682 let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
684 ::zeroclaw_log::record!(
685 ERROR,
686 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
687 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
688 .with_attrs(::serde_json::json!({
689 "path": self.config_path.display().to_string(),
690 "search_provider": "searxng",
691 "error": format!("{}", e),
692 })),
693 "web_search: failed to read config for SearXNG URL"
694 );
695 anyhow::Error::msg(format!(
696 "Failed to read config file {} for SearXNG instance URL: {e}",
697 self.config_path.display()
698 ))
699 })?;
700
701 let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
702 ::zeroclaw_log::record!(
703 ERROR,
704 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
705 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
706 .with_attrs(::serde_json::json!({
707 "path": self.config_path.display().to_string(),
708 "search_provider": "searxng",
709 "error": format!("{}", e),
710 })),
711 "web_search: failed to parse config for SearXNG URL"
712 );
713 anyhow::Error::msg(format!(
714 "Failed to parse config file {} for SearXNG instance URL: {e}",
715 self.config_path.display()
716 ))
717 })?;
718
719 config
720 .web_search
721 .searxng_instance_url
722 .filter(|u| !u.is_empty())
723 .ok_or_else(|| {
724 ::zeroclaw_log::record!(
725 ERROR,
726 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
727 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
728 .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
729 "web_search: SearXNG instance URL not configured"
730 );
731 anyhow::Error::msg(
732 "SearXNG instance URL not configured. Set [web_search] searxng_instance_url \
733 in config.toml or the SEARXNG_INSTANCE_URL environment variable.",
734 )
735 })
736 }
737
738 async fn search_searxng(&self, query: &str) -> anyhow::Result<String> {
739 let instance_url = self.resolve_searxng_instance_url()?;
740 let base_url = instance_url.trim_end_matches('/');
741
742 let encoded_query = urlencoding::encode(query);
743 let search_url = format!(
744 "{}/search?q={}&format=json&pageno=1",
745 base_url, encoded_query
746 );
747
748 let builder = reqwest::Client::builder()
749 .timeout(Duration::from_secs(self.timeout_secs))
750 .user_agent("ZeroClaw/1.0");
751 let builder =
752 zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
753 let client = builder.build()?;
754
755 let response = client
756 .get(&search_url)
757 .header("Accept", "application/json")
758 .send()
759 .await?;
760
761 if !response.status().is_success() {
762 anyhow::bail!("SearXNG search failed with status: {}", response.status());
763 }
764
765 let json: serde_json::Value = response.json().await?;
766 self.parse_searxng_results(&json, query)
767 }
768
769 fn parse_searxng_results(
770 &self,
771 json: &serde_json::Value,
772 query: &str,
773 ) -> anyhow::Result<String> {
774 let results = json
775 .get("results")
776 .and_then(|r| r.as_array())
777 .ok_or_else(|| {
778 ::zeroclaw_log::record!(
779 ERROR,
780 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
781 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
782 .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
783 "web_search: invalid SearXNG response"
784 );
785 anyhow::Error::msg("Invalid SearXNG API response")
786 })?;
787
788 if results.is_empty() {
789 return Ok(format!("No results found for: {}", query));
790 }
791
792 let mut lines = vec![format!("Search results for: {} (via SearXNG)", query)];
793
794 for (i, result) in results.iter().take(self.max_results).enumerate() {
795 let title = result
796 .get("title")
797 .and_then(|t| t.as_str())
798 .unwrap_or("No title");
799 let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
800 let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
801
802 lines.push(format!("{}. {}", i + 1, title));
803 lines.push(format!(" {}", url));
804 if !content.is_empty() {
805 lines.push(format!(" {}", content));
806 }
807 }
808
809 Ok(lines.join("\n"))
810 }
811}
812
813fn decode_ddg_redirect_url(raw_url: &str) -> String {
814 if let Some(index) = raw_url.find("uddg=") {
815 let encoded = &raw_url[index + 5..];
816 let encoded = encoded.split('&').next().unwrap_or(encoded);
817 if let Ok(decoded) = urlencoding::decode(encoded) {
818 return decoded.into_owned();
819 }
820 }
821
822 raw_url.to_string()
823}
824
825const DUCKDUCKGO_BLOCK_MESSAGE: &str = "DuckDuckGo blocked the automated search request. Try configuring SearXNG, Brave, or Tavily as the web search provider.";
826
827fn duckduckgo_block_message(
828 status: reqwest::StatusCode,
829 final_url_is_block: bool,
830 html_contains_block: bool,
831) -> Option<&'static str> {
832 if status == reqwest::StatusCode::FORBIDDEN || final_url_is_block || html_contains_block {
833 Some(DUCKDUCKGO_BLOCK_MESSAGE)
834 } else {
835 None
836 }
837}
838
839fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
840 haystack
841 .as_bytes()
842 .windows(needle.len())
843 .any(|window| window.eq_ignore_ascii_case(needle.as_bytes()))
844}
845
846fn strip_tags(content: &str) -> String {
847 let re = Regex::new(r"<[^>]+>").unwrap();
848 re.replace_all(content, "").to_string()
849}
850
851#[async_trait]
852impl Tool for WebSearchTool {
853 fn name(&self) -> &str {
854 "web_search_tool"
855 }
856
857 fn description(&self) -> &str {
858 "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics."
859 }
860
861 fn parameters_schema(&self) -> serde_json::Value {
862 json!({
863 "type": "object",
864 "properties": {
865 "query": {
866 "type": "string",
867 "description": "The search query. Be specific for better results."
868 }
869 },
870 "required": ["query"]
871 })
872 }
873
874 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
875 let query = args.get("query").and_then(|q| q.as_str()).ok_or_else(|| {
876 ::zeroclaw_log::record!(
877 WARN,
878 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
879 .with_outcome(::zeroclaw_log::EventOutcome::Failure)
880 .with_attrs(::serde_json::json!({"param": "query"})),
881 "web_search: missing query parameter"
882 );
883 anyhow::Error::msg("Missing required parameter: query")
884 })?;
885
886 if query.trim().is_empty() {
887 anyhow::bail!("Search query cannot be empty");
888 }
889
890 ::zeroclaw_log::record!(
891 INFO,
892 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
893 &format!("Searching web for: {}", query)
894 );
895
896 let resolution = resolve_web_search_provider(&self.model_provider);
897 if resolution.used_fallback {
898 ::zeroclaw_log::record!(
899 WARN,
900 ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
901 .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
902 &format!(
903 "Unknown web search model_provider '{}'; falling back to '{}'",
904 self.model_provider, resolution.canonical_provider
905 )
906 );
907 }
908
909 let result = match resolution.route {
910 WebSearchProviderRoute::DuckDuckGo => self.search_duckduckgo(query).await?,
911 WebSearchProviderRoute::Brave => self.search_brave(query).await?,
912 WebSearchProviderRoute::Tavily => self.search_tavily(query).await?,
913 WebSearchProviderRoute::SearXNG => self.search_searxng(query).await?,
914 WebSearchProviderRoute::Jina => self.search_jina(query).await?,
915 };
916
917 Ok(ToolResult {
918 success: true,
919 output: result,
920 error: None,
921 })
922 }
923}
924
925#[cfg(test)]
926mod tests {
927 use super::*;
928
929 #[test]
930 fn test_tool_name() {
931 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
932 assert_eq!(tool.name(), "web_search_tool");
933 }
934
935 #[test]
936 fn test_tool_description() {
937 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
938 assert!(tool.description().contains("Search the web"));
939 }
940
941 #[test]
942 fn test_parameters_schema() {
943 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
944 let schema = tool.parameters_schema();
945 assert_eq!(schema["type"], "object");
946 assert!(schema["properties"]["query"].is_object());
947 }
948
949 #[test]
950 fn test_strip_tags() {
951 let html = "<b>Hello</b> <i>World</i>";
952 assert_eq!(strip_tags(html), "Hello World");
953 }
954
955 #[test]
956 fn test_parse_duckduckgo_results_empty() {
957 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
958 let result = tool
959 .parse_duckduckgo_results("<html>No results here</html>", "test")
960 .unwrap();
961 assert!(result.contains("No results found"));
962 }
963
964 #[test]
965 fn test_parse_duckduckgo_results_with_data() {
966 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
967 let html = r#"
968 <a class="result__a" href="https://example.com">Example Title</a>
969 <a class="result__snippet">This is a description</a>
970 "#;
971 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
972 assert!(result.contains("Example Title"));
973 assert!(result.contains("https://example.com"));
974 }
975
976 #[test]
977 fn test_parse_duckduckgo_results_decodes_redirect_url() {
978 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
979 let html = r#"
980 <a class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath%3Fa%3D1&rut=test">Example Title</a>
981 <a class="result__snippet">This is a description</a>
982 "#;
983 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
984 assert!(result.contains("https://example.com/path?a=1"));
985 assert!(!result.contains("rut=test"));
986 }
987
988 #[test]
989 fn test_duckduckgo_block_detection_reports_forbidden_status() {
990 let message = duckduckgo_block_message(reqwest::StatusCode::FORBIDDEN, false, false)
991 .expect("403 responses should be classified as a DuckDuckGo block");
992
993 assert!(message.contains("DuckDuckGo blocked"));
994 assert!(message.contains("SearXNG"));
995 }
996
997 #[test]
998 fn test_duckduckgo_block_detection_reports_verification_redirect() {
999 let message = duckduckgo_block_message(reqwest::StatusCode::OK, true, false)
1000 .expect("verification redirects should be classified as a DuckDuckGo block");
1001
1002 assert!(message.contains("DuckDuckGo blocked"));
1003 assert!(message.contains("SearXNG"));
1004 }
1005
1006 #[test]
1007 fn test_duckduckgo_block_detection_reports_verification_form_in_html() {
1008 let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, true)
1009 .expect("verification form HTML should be classified as a DuckDuckGo block");
1010
1011 assert!(message.contains("DuckDuckGo blocked"));
1012 assert!(message.contains("SearXNG"));
1013 }
1014
1015 #[test]
1016 fn test_duckduckgo_block_detection_ignores_normal_empty_results() {
1017 let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, false);
1018
1019 assert!(message.is_none());
1020 }
1021
1022 #[test]
1023 fn test_duckduckgo_block_detection_is_case_insensitive_without_allocating_html() {
1024 assert!(contains_ascii_case_insensitive(
1025 r#"<form action="/WR.DO?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
1026 "/wr.do?"
1027 ));
1028 }
1029
1030 #[tokio::test]
1031 async fn test_duckduckgo_request_reports_forbidden_status() {
1032 use wiremock::matchers::{method, path, query_param};
1033 use wiremock::{Mock, MockServer, ResponseTemplate};
1034
1035 let server = MockServer::start().await;
1036 Mock::given(method("GET"))
1037 .and(path("/html/"))
1038 .and(query_param("q", "test"))
1039 .respond_with(ResponseTemplate::new(403))
1040 .mount(&server)
1041 .await;
1042
1043 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1044 let err = tool
1045 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1046 .await
1047 .expect_err("403 should be reported as a DuckDuckGo block");
1048
1049 assert!(err.to_string().contains("DuckDuckGo blocked"));
1050 assert!(err.to_string().contains("SearXNG"));
1051 }
1052
1053 #[tokio::test]
1054 async fn test_duckduckgo_request_reports_verification_redirect_url() {
1055 use wiremock::matchers::{method, path, query_param};
1056 use wiremock::{Mock, MockServer, ResponseTemplate};
1057
1058 let server = MockServer::start().await;
1059 Mock::given(method("GET"))
1060 .and(path("/html/"))
1061 .and(query_param("q", "test"))
1062 .respond_with(
1063 ResponseTemplate::new(302)
1064 .insert_header("location", format!("{}/wr.do?u=blocked", server.uri())),
1065 )
1066 .mount(&server)
1067 .await;
1068 Mock::given(method("GET"))
1069 .and(path("/wr.do"))
1070 .respond_with(ResponseTemplate::new(200).set_body_string("<html></html>"))
1071 .mount(&server)
1072 .await;
1073
1074 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1075 let err = tool
1076 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1077 .await
1078 .expect_err("verification redirects should be reported as a DuckDuckGo block");
1079
1080 assert!(err.to_string().contains("DuckDuckGo blocked"));
1081 assert!(err.to_string().contains("SearXNG"));
1082 }
1083
1084 #[tokio::test]
1085 async fn test_duckduckgo_request_reports_verification_form_html() {
1086 use wiremock::matchers::{method, path, query_param};
1087 use wiremock::{Mock, MockServer, ResponseTemplate};
1088
1089 let server = MockServer::start().await;
1090 Mock::given(method("GET"))
1091 .and(path("/html/"))
1092 .and(query_param("q", "test"))
1093 .respond_with(ResponseTemplate::new(200).set_body_string(
1094 r#"<form action="/wr.do?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
1095 ))
1096 .mount(&server)
1097 .await;
1098
1099 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1100 let err = tool
1101 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1102 .await
1103 .expect_err("verification HTML should be reported as a DuckDuckGo block");
1104
1105 assert!(err.to_string().contains("DuckDuckGo blocked"));
1106 assert!(err.to_string().contains("SearXNG"));
1107 }
1108
1109 #[tokio::test]
1110 async fn test_duckduckgo_request_reports_anomaly_modal_block() {
1111 use wiremock::matchers::{method, path, query_param};
1116 use wiremock::{Mock, MockServer, ResponseTemplate};
1117
1118 let server = MockServer::start().await;
1119 Mock::given(method("GET"))
1120 .and(path("/html/"))
1121 .and(query_param("q", "test"))
1122 .respond_with(ResponseTemplate::new(202).set_body_string(
1123 r#"<html><body><div class="anomaly-modal__title">Unusual Traffic Detected</div></body></html>"#,
1124 ))
1125 .mount(&server)
1126 .await;
1127
1128 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1129 let err = tool
1130 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1131 .await
1132 .expect_err("anomaly-modal page should be reported as a DuckDuckGo block");
1133
1134 assert!(err.to_string().contains("DuckDuckGo blocked"));
1135 assert!(err.to_string().contains("SearXNG"));
1136 }
1137
1138 #[tokio::test]
1139 async fn test_duckduckgo_request_preserves_normal_empty_results() {
1140 use wiremock::matchers::{method, path, query_param};
1141 use wiremock::{Mock, MockServer, ResponseTemplate};
1142
1143 let server = MockServer::start().await;
1144 Mock::given(method("GET"))
1145 .and(path("/html/"))
1146 .and(query_param("q", "test"))
1147 .respond_with(
1148 ResponseTemplate::new(200).set_body_string("<html>No results here</html>"),
1149 )
1150 .mount(&server)
1151 .await;
1152
1153 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1154 let result = tool
1155 .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1156 .await
1157 .expect("normal empty result HTML should still parse");
1158
1159 assert!(result.contains("No results found"));
1160 }
1161
1162 #[test]
1163 fn test_constructor_clamps_web_search_limits() {
1164 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 0, 0);
1165 let html = r#"
1166 <a class="result__a" href="https://example.com">Example Title</a>
1167 <a class="result__snippet">This is a description</a>
1168 "#;
1169 let result = tool.parse_duckduckgo_results(html, "test").unwrap();
1170 assert!(result.contains("Example Title"));
1171 }
1172
1173 #[tokio::test]
1174 async fn test_execute_missing_query() {
1175 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1176 let result = tool.execute(json!({})).await;
1177 assert!(result.is_err());
1178 }
1179
1180 #[tokio::test]
1181 async fn test_execute_empty_query() {
1182 let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1183 let result = tool.execute(json!({"query": ""})).await;
1184 assert!(result.is_err());
1185 }
1186
1187 #[tokio::test]
1188 async fn test_execute_brave_without_api_key() {
1189 let tool = WebSearchTool::new("brave".to_string(), None, None, 5, 15);
1190 let result = tool.execute(json!({"query": "test"})).await;
1191 assert!(result.is_err());
1192 assert!(result.unwrap_err().to_string().contains("API key"));
1193 }
1194
1195 #[test]
1196 fn test_resolve_brave_api_key_uses_boot_key() {
1197 let tool = WebSearchTool::new(
1198 "brave".to_string(),
1199 Some("sk-plaintext-key".to_string()),
1200 None,
1201 5,
1202 15,
1203 );
1204 let key = tool.resolve_brave_api_key().unwrap();
1205 assert_eq!(key, "sk-plaintext-key");
1206 }
1207
1208 #[test]
1209 fn test_resolve_brave_api_key_reloads_from_config() {
1210 let tmp = tempfile::TempDir::new().unwrap();
1211 let config_path = tmp.path().join("config.toml");
1212 std::fs::write(
1213 &config_path,
1214 "[web_search]\nbrave_api_key = \"fresh-key-from-disk\"\n",
1215 )
1216 .unwrap();
1217
1218 let tool = WebSearchTool::new_with_config(
1220 "brave".to_string(),
1221 None,
1222 None,
1223 None,
1224 None,
1225 5,
1226 15,
1227 config_path,
1228 false,
1229 );
1230 let key = tool.resolve_brave_api_key().unwrap();
1231 assert_eq!(key, "fresh-key-from-disk");
1232 }
1233
1234 #[test]
1235 fn test_resolve_brave_api_key_decrypts_encrypted_key() {
1236 let tmp = tempfile::TempDir::new().unwrap();
1237 let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1238 let encrypted = store.encrypt("brave-secret-key").unwrap();
1239
1240 let config_path = tmp.path().join("config.toml");
1241 std::fs::write(
1242 &config_path,
1243 format!("[web_search]\nbrave_api_key = \"{}\"\n", encrypted),
1244 )
1245 .unwrap();
1246
1247 let tool = WebSearchTool::new_with_config(
1249 "brave".to_string(),
1250 Some(encrypted),
1251 None,
1252 None,
1253 None,
1254 5,
1255 15,
1256 config_path,
1257 true,
1258 );
1259 let key = tool.resolve_brave_api_key().unwrap();
1260 assert_eq!(key, "brave-secret-key");
1261 }
1262
1263 #[tokio::test]
1264 async fn test_execute_searxng_without_instance_url() {
1265 let tmp = tempfile::TempDir::new().unwrap();
1266 let config_path = tmp.path().join("config.toml");
1267 std::fs::write(&config_path, "[web_search]\n").unwrap();
1268
1269 let tool = WebSearchTool::new_with_config(
1270 "searxng".to_string(),
1271 None,
1272 None,
1273 None,
1274 None,
1275 5,
1276 15,
1277 config_path,
1278 false,
1279 );
1280 let result = tool.execute(json!({"query": "test"})).await;
1281 assert!(result.is_err());
1282 assert!(
1283 result
1284 .unwrap_err()
1285 .to_string()
1286 .contains("SearXNG instance URL not configured")
1287 );
1288 }
1289
1290 #[test]
1291 fn test_parse_tavily_results_empty() {
1292 let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1293 let json = serde_json::json!({"results": []});
1294 let result = tool.parse_tavily_results(&json, "test").unwrap();
1295 assert!(result.contains("No results found"));
1296 }
1297
1298 #[test]
1299 fn test_parse_tavily_results_with_data() {
1300 let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1301 let json = serde_json::json!({
1302 "query": "test",
1303 "results": [
1304 {
1305 "title": "Tavily Example",
1306 "url": "https://example.com",
1307 "content": "Pre-cleaned summary content from Tavily",
1308 "score": 0.91
1309 },
1310 {
1311 "title": "Another Result",
1312 "url": "https://example.org",
1313 "content": "Second result body"
1314 }
1315 ]
1316 });
1317 let result = tool.parse_tavily_results(&json, "test").unwrap();
1318 assert!(result.contains("Tavily Example"));
1319 assert!(result.contains("https://example.com"));
1320 assert!(result.contains("Pre-cleaned summary content from Tavily"));
1321 assert!(result.contains("via Tavily"));
1322 }
1323
1324 #[test]
1325 fn test_parse_tavily_results_invalid_response() {
1326 let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1327 let json = serde_json::json!({"error": "bad api key"});
1328 let result = tool.parse_tavily_results(&json, "test");
1329 assert!(result.is_err());
1330 assert!(
1331 result
1332 .unwrap_err()
1333 .to_string()
1334 .contains("Invalid Tavily API response")
1335 );
1336 }
1337
1338 #[tokio::test]
1339 async fn test_execute_tavily_without_api_key() {
1340 let tmp = tempfile::tempdir().unwrap();
1343 let config_path = tmp.path().join("config.toml");
1344 std::fs::write(&config_path, "[web_search]\n").unwrap();
1345
1346 let tool = WebSearchTool::new_with_config(
1347 "tavily".to_string(),
1348 None,
1349 None,
1350 None,
1351 None,
1352 5,
1353 15,
1354 config_path,
1355 false,
1356 );
1357 let result = tool.execute(json!({"query": "test"})).await;
1358 assert!(result.is_err());
1359 assert!(
1360 result
1361 .unwrap_err()
1362 .to_string()
1363 .contains("Tavily API key not configured")
1364 );
1365 }
1366
1367 #[test]
1368 fn test_resolve_tavily_api_key_uses_boot_key() {
1369 let tool = WebSearchTool::new_with_config(
1370 "tavily".to_string(),
1371 None,
1372 Some("tvly-boot-key".to_string()),
1373 None,
1374 None,
1375 5,
1376 15,
1377 PathBuf::new(),
1378 false,
1379 );
1380 let key = tool.resolve_tavily_api_key().unwrap();
1381 assert_eq!(key, "tvly-boot-key");
1382 }
1383
1384 #[test]
1385 fn test_resolve_tavily_api_key_reloads_from_config() {
1386 let tmp = tempfile::tempdir().unwrap();
1387 let config_path = tmp.path().join("config.toml");
1388 std::fs::write(
1389 &config_path,
1390 "[web_search]\ntavily_api_key = \"tvly-fresh-from-disk\"\n",
1391 )
1392 .unwrap();
1393
1394 let tool = WebSearchTool::new_with_config(
1396 "tavily".to_string(),
1397 None,
1398 None,
1399 None,
1400 None,
1401 5,
1402 15,
1403 config_path,
1404 false,
1405 );
1406 let key = tool.resolve_tavily_api_key().unwrap();
1407 assert_eq!(key, "tvly-fresh-from-disk");
1408 }
1409
1410 #[test]
1411 fn test_resolve_tavily_api_key_decrypts_encrypted_key() {
1412 let tmp = tempfile::TempDir::new().unwrap();
1413 let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1414 let encrypted = store.encrypt("tvly-secret-key").unwrap();
1415
1416 let config_path = tmp.path().join("config.toml");
1417 std::fs::write(
1418 &config_path,
1419 format!("[web_search]\ntavily_api_key = \"{}\"\n", encrypted),
1420 )
1421 .unwrap();
1422
1423 let tool = WebSearchTool::new_with_config(
1425 "tavily".to_string(),
1426 None,
1427 None,
1428 Some(encrypted),
1429 None,
1430 5,
1431 15,
1432 config_path,
1433 true,
1434 );
1435 let key = tool.resolve_tavily_api_key().unwrap();
1436 assert_eq!(key, "tvly-secret-key");
1437 }
1438
1439 #[tokio::test]
1446 async fn test_tavily_request_uses_bearer_auth_header_not_body_field() {
1447 use wiremock::matchers::{header, method, path};
1448 use wiremock::{Mock, MockServer, ResponseTemplate};
1449
1450 let server = MockServer::start().await;
1451
1452 Mock::given(method("POST"))
1453 .and(path("/search"))
1454 .and(header("authorization", "Bearer tvly-test-key"))
1455 .and(header("content-type", "application/json"))
1456 .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
1457 "query": "what is rust",
1458 "results": []
1459 })))
1460 .mount(&server)
1461 .await;
1462
1463 let tool = WebSearchTool::new_with_config(
1464 "tavily".to_string(),
1465 None,
1466 Some("tvly-test-key".to_string()),
1467 None,
1468 None,
1469 5,
1470 15,
1471 PathBuf::new(),
1472 false,
1473 );
1474
1475 let client = reqwest::Client::builder()
1479 .timeout(Duration::from_secs(15))
1480 .build()
1481 .expect("client builder should succeed without a proxy");
1482 let result = tool
1483 .search_tavily_with_client(&client, &format!("{}/search", server.uri()), "what is rust")
1484 .await
1485 .expect("request should succeed against the mock");
1486 assert!(
1487 result.contains("No results found"),
1488 "parser should report empty results: {result}"
1489 );
1490
1491 let recorded = server
1492 .received_requests()
1493 .await
1494 .expect("wiremock should have captured the request");
1495 assert_eq!(recorded.len(), 1, "expected exactly one POST /search");
1496
1497 let body: serde_json::Value =
1498 serde_json::from_slice(&recorded[0].body).expect("body should be JSON");
1499
1500 assert!(
1502 body.get("api_key").is_none(),
1503 "api_key must not appear in the request body; got: {body}"
1504 );
1505
1506 assert_eq!(body["query"], "what is rust");
1509 assert_eq!(body["search_depth"], "basic");
1510 assert_eq!(body["max_results"], 5);
1511 assert_eq!(body["include_answer"], false);
1512 assert_eq!(body["include_raw_content"], false);
1513 }
1514
1515 #[test]
1516 fn test_parse_searxng_results_empty() {
1517 let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1518 let json = serde_json::json!({"results": []});
1519 let result = tool.parse_searxng_results(&json, "test").unwrap();
1520 assert!(result.contains("No results found"));
1521 }
1522
1523 #[test]
1524 fn test_parse_searxng_results_with_data() {
1525 let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1526 let json = serde_json::json!({
1527 "results": [
1528 {
1529 "title": "SearXNG Example",
1530 "url": "https://example.com",
1531 "content": "A privacy-respecting metasearch engine"
1532 },
1533 {
1534 "title": "Another Result",
1535 "url": "https://example.org",
1536 "content": "More information here"
1537 }
1538 ]
1539 });
1540 let result = tool.parse_searxng_results(&json, "test").unwrap();
1541 assert!(result.contains("SearXNG Example"));
1542 assert!(result.contains("https://example.com"));
1543 assert!(result.contains("A privacy-respecting metasearch engine"));
1544 assert!(result.contains("via SearXNG"));
1545 }
1546
1547 #[test]
1548 fn test_parse_searxng_results_invalid_response() {
1549 let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1550 let json = serde_json::json!({"error": "bad request"});
1551 let result = tool.parse_searxng_results(&json, "test");
1552 assert!(result.is_err());
1553 assert!(
1554 result
1555 .unwrap_err()
1556 .to_string()
1557 .contains("Invalid SearXNG API response")
1558 );
1559 }
1560
1561 #[test]
1562 fn test_resolve_searxng_instance_url_from_boot() {
1563 let tool = WebSearchTool {
1564 model_provider: "searxng".into(),
1565 boot_brave_api_key: None,
1566 boot_tavily_api_key: None,
1567 boot_jina_api_key: None,
1568 searxng_instance_url: Some("https://searx.example.com".to_string()),
1569 max_results: 5,
1570 timeout_secs: 15,
1571 config_path: PathBuf::new(),
1572 secrets_encrypt: false,
1573 };
1574 let url = tool.resolve_searxng_instance_url().unwrap();
1575 assert_eq!(url, "https://searx.example.com");
1576 }
1577
1578 #[test]
1579 fn test_resolve_searxng_instance_url_reloads_from_config() {
1580 let tmp = tempfile::TempDir::new().unwrap();
1581 let config_path = tmp.path().join("config.toml");
1582 std::fs::write(
1583 &config_path,
1584 "[web_search]\nsearxng_instance_url = \"https://search.local\"\n",
1585 )
1586 .unwrap();
1587
1588 let tool = WebSearchTool::new_with_config(
1589 "searxng".to_string(),
1590 None,
1591 None,
1592 None,
1593 None,
1594 5,
1595 15,
1596 config_path,
1597 false,
1598 );
1599 let url = tool.resolve_searxng_instance_url().unwrap();
1600 assert_eq!(url, "https://search.local");
1601 }
1602
1603 #[test]
1604 fn test_resolve_brave_api_key_picks_up_runtime_update() {
1605 let tmp = tempfile::TempDir::new().unwrap();
1606 let config_path = tmp.path().join("config.toml");
1607
1608 std::fs::write(&config_path, "[web_search]\n").unwrap();
1610
1611 let tool = WebSearchTool::new_with_config(
1612 "brave".to_string(),
1613 None,
1614 None,
1615 None,
1616 None,
1617 5,
1618 15,
1619 config_path.clone(),
1620 false,
1621 );
1622
1623 assert!(tool.resolve_brave_api_key().is_err());
1625
1626 std::fs::write(
1628 &config_path,
1629 "[web_search]\nbrave_api_key = \"runtime-updated-key\"\n",
1630 )
1631 .unwrap();
1632
1633 let key = tool.resolve_brave_api_key().unwrap();
1635 assert_eq!(key, "runtime-updated-key");
1636 }
1637
1638 #[test]
1639 fn test_resolve_jina_api_key_uses_boot_key() {
1640 let tool = WebSearchTool::new_with_config(
1641 "jina".to_string(),
1642 None,
1643 None,
1644 Some("jina-boot-key".to_string()),
1645 None,
1646 5,
1647 15,
1648 PathBuf::new(),
1649 false,
1650 );
1651 let key = tool.resolve_jina_api_key().unwrap();
1652 assert_eq!(key, "jina-boot-key");
1653 }
1654
1655 #[test]
1656 fn test_resolve_jina_api_key_reloads_from_config() {
1657 let tmp = tempfile::tempdir().unwrap();
1658 let config_path = tmp.path().join("config.toml");
1659 std::fs::write(
1660 &config_path,
1661 "[web_search]\njina_api_key = \"jina-fresh-from-disk\"\n",
1662 )
1663 .unwrap();
1664
1665 let tool = WebSearchTool::new_with_config(
1667 "jina".to_string(),
1668 None,
1669 None,
1670 None,
1671 None,
1672 5,
1673 15,
1674 config_path,
1675 false,
1676 );
1677 let key = tool.resolve_jina_api_key().unwrap();
1678 assert_eq!(key, "jina-fresh-from-disk");
1679 }
1680
1681 #[test]
1682 fn test_parse_jina_results_empty() {
1683 let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1684 let json = serde_json::json!({"data": []});
1686 let result = tool.parse_jina_results(&json, "test").unwrap();
1687 assert!(result.contains("No results found"));
1688 }
1689
1690 #[test]
1691 fn test_parse_jina_results_with_data() {
1692 let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1693 let json = serde_json::json!({
1695 "data": [
1696 {
1697 "title": "Jina AI",
1698 "url": "https://jina.ai/",
1699 "content": "Best-in-class embeddings, rerankers, web reader, deepsearch"
1700 },
1701 {
1702 "title": "Jina AI on GitHub",
1703 "url": "https://github.com/jina-ai",
1704 "description": "Open-source AI infrastructure"
1705 }
1706 ]
1707 });
1708 let result = tool.parse_jina_results(&json, "test").unwrap();
1709 assert!(result.contains("Jina AI"));
1710 assert!(result.contains("https://jina.ai/"));
1711 assert!(result.contains("via Jina AI"));
1712 assert!(result.contains("Best-in-class embeddings"));
1714 }
1715
1716 #[test]
1717 fn test_parse_jina_results_falls_back_to_description() {
1718 let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1719 let json = serde_json::json!({
1721 "data": [
1722 {
1723 "title": "Test",
1724 "url": "https://example.com",
1725 "description": "Fallback description"
1726 }
1727 ]
1728 });
1729 let result = tool.parse_jina_results(&json, "test").unwrap();
1730 assert!(result.contains("Fallback description"));
1731 }
1732
1733 #[test]
1734 fn test_parse_jina_results_invalid_response() {
1735 let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1736 let json = serde_json::json!({"error": "bad api key"});
1737 let result = tool.parse_jina_results(&json, "test");
1738 assert!(result.is_err());
1739 assert!(
1740 result
1741 .unwrap_err()
1742 .to_string()
1743 .contains("Invalid Jina AI API response")
1744 );
1745 }
1746
1747 #[tokio::test]
1748 async fn test_execute_jina_without_api_key() {
1749 let tmp = tempfile::tempdir().unwrap();
1752 let config_path = tmp.path().join("config.toml");
1753 std::fs::write(&config_path, "[web_search]\n").unwrap();
1754
1755 let tool = WebSearchTool::new_with_config(
1756 "jina".to_string(),
1757 None,
1758 None,
1759 None,
1760 None,
1761 5,
1762 15,
1763 config_path,
1764 false,
1765 );
1766 let result = tool.execute(json!({"query": "test"})).await;
1767 assert!(result.is_err());
1768 assert!(
1769 result
1770 .unwrap_err()
1771 .to_string()
1772 .contains("Jina AI API key not configured")
1773 );
1774 }
1775}