Skip to main content

zeroclaw_tools/
web_search_tool.rs

1use super::web_search_provider_routing::{WebSearchProviderRoute, resolve_web_search_provider};
2use async_trait::async_trait;
3use regex::Regex;
4use serde_json::json;
5use std::path::{Path, PathBuf};
6use std::time::Duration;
7use zeroclaw_api::tool::{Tool, ToolResult};
8
9/// Web search tool for searching the internet.
10/// Supports multiple model_providers: DuckDuckGo (free), Brave (requires API key),
11/// Tavily (requires API key), SearXNG (self-hosted, requires instance URL),
12/// Jina AI (requires API key).
13///
14/// API keys are resolved lazily at execution time: if the boot-time key
15/// is missing or still encrypted, the tool re-reads `config.toml`, decrypts the
16/// corresponding `[web_search]` field, and uses the result. This ensures that
17/// keys set or rotated after boot, and encrypted keys, are correctly picked up.
18pub struct WebSearchTool {
19    /// ModelProvider selector as configured by user. Routed via model_provider aliases at runtime.
20    model_provider: String,
21    /// Boot-time key snapshot (may be `None` if not yet configured at startup).
22    boot_brave_api_key: Option<String>,
23    /// Boot-time Tavily key snapshot.
24    boot_tavily_api_key: Option<String>,
25    /// Boot-time Jina AI key snapshot.
26    boot_jina_api_key: Option<String>,
27    /// SearXNG instance base URL (e.g. `"https://searx.example.com"`).
28    searxng_instance_url: Option<String>,
29    max_results: usize,
30    timeout_secs: u64,
31    /// Path to `config.toml` for lazy re-read of keys at execution time.
32    config_path: PathBuf,
33    /// Whether secret encryption is enabled (needed to create a `SecretStore`).
34    secrets_encrypt: bool,
35}
36
37impl WebSearchTool {
38    pub fn new(
39        model_provider: String,
40        brave_api_key: Option<String>,
41        jina_api_key: Option<String>,
42        max_results: usize,
43        timeout_secs: u64,
44    ) -> Self {
45        Self {
46            model_provider: model_provider.trim().to_lowercase(),
47            boot_brave_api_key: brave_api_key,
48            boot_tavily_api_key: None,
49            boot_jina_api_key: jina_api_key,
50            searxng_instance_url: None,
51            max_results: max_results.clamp(1, 10),
52            timeout_secs: timeout_secs.max(1),
53            config_path: PathBuf::new(),
54            secrets_encrypt: false,
55        }
56    }
57
58    /// Create a `WebSearchTool` with config-reload and decryption support.
59    ///
60    /// `config_path` is the path to `config.toml` so the tool can re-read API
61    /// keys at execution time. `secrets_encrypt` controls whether the keys are
62    /// decrypted via `SecretStore`.
63    #[allow(clippy::too_many_arguments)]
64    pub fn new_with_config(
65        model_provider: String,
66        brave_api_key: Option<String>,
67        tavily_api_key: Option<String>,
68        jina_api_key: Option<String>,
69        searxng_instance_url: Option<String>,
70        max_results: usize,
71        timeout_secs: u64,
72        config_path: PathBuf,
73        secrets_encrypt: bool,
74    ) -> Self {
75        Self {
76            model_provider: model_provider.trim().to_lowercase(),
77            boot_brave_api_key: brave_api_key,
78            boot_tavily_api_key: tavily_api_key,
79            boot_jina_api_key: jina_api_key,
80            searxng_instance_url,
81            max_results: max_results.clamp(1, 10),
82            timeout_secs: timeout_secs.max(1),
83            config_path,
84            secrets_encrypt,
85        }
86    }
87
88    /// Resolve the Brave API key, preferring the boot-time value but falling
89    /// back to a fresh config read + decryption when the boot-time value is
90    /// absent.
91    fn resolve_brave_api_key(&self) -> anyhow::Result<String> {
92        // Fast path: boot-time key is present and usable (not an encrypted blob).
93        if let Some(ref key) = self.boot_brave_api_key
94            && !key.is_empty()
95            && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
96        {
97            return Ok(key.clone());
98        }
99
100        // Slow path: re-read config.toml to pick up keys set/rotated after boot.
101        self.reload_brave_api_key()
102    }
103
104    /// Re-read `config.toml` and decrypt `[web_search] brave_api_key`.
105    fn reload_brave_api_key(&self) -> anyhow::Result<String> {
106        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
107            ::zeroclaw_log::record!(
108                ERROR,
109                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
110                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
111                    .with_attrs(::serde_json::json!({
112                        "path": self.config_path.display().to_string(),
113                        "search_provider": "brave",
114                        "error": format!("{}", e),
115                    })),
116                "web_search: failed to read config for Brave API key"
117            );
118            anyhow::Error::msg(format!(
119                "Failed to read config file {} for Brave API key: {e}",
120                self.config_path.display()
121            ))
122        })?;
123
124        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
125            ::zeroclaw_log::record!(
126                ERROR,
127                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
128                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
129                    .with_attrs(::serde_json::json!({
130                        "path": self.config_path.display().to_string(),
131                        "search_provider": "brave",
132                        "error": format!("{}", e),
133                    })),
134                "web_search: failed to parse config for Brave API key"
135            );
136            anyhow::Error::msg(format!(
137                "Failed to parse config file {} for Brave API key: {e}",
138                self.config_path.display()
139            ))
140        })?;
141
142        let raw_key = config
143            .web_search
144            .brave_api_key
145            .filter(|k| !k.is_empty())
146            .ok_or_else(|| {
147                ::zeroclaw_log::record!(
148                    ERROR,
149                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
150                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
151                        .with_attrs(::serde_json::json!({"search_provider": "brave"})),
152                    "web_search: Brave API key not configured"
153                );
154                anyhow::Error::msg("Brave API key not configured")
155            })?;
156
157        // Decrypt if necessary.
158        if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
159            let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
160            let store =
161                zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
162            let plaintext = store.decrypt(&raw_key)?;
163            if plaintext.is_empty() {
164                anyhow::bail!("Brave API key not configured (decrypted value is empty)");
165            }
166            Ok(plaintext)
167        } else {
168            Ok(raw_key)
169        }
170    }
171
172    async fn search_duckduckgo(&self, query: &str) -> anyhow::Result<String> {
173        self.search_duckduckgo_at("https://html.duckduckgo.com/html/", query)
174            .await
175    }
176
177    /// Inner DuckDuckGo request implementation, parameterized on the endpoint URL
178    /// so request-flow tests can target a local mock server. Production calls
179    /// always go through [`Self::search_duckduckgo`].
180    async fn search_duckduckgo_at(
181        &self,
182        endpoint_url: &str,
183        query: &str,
184    ) -> anyhow::Result<String> {
185        let encoded_query = urlencoding::encode(query);
186        let search_url = format!("{}?q={}", endpoint_url, encoded_query);
187
188        let builder = reqwest::Client::builder()
189            .timeout(Duration::from_secs(self.timeout_secs))
190            .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
191        let builder =
192            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
193        let client = builder.build()?;
194
195        let response = client.get(&search_url).send().await?;
196        let status = response.status();
197        let final_url_is_block =
198            contains_ascii_case_insensitive(response.url().as_str(), "/wr.do?");
199
200        if !status.is_success() {
201            if let Some(message) = duckduckgo_block_message(status, final_url_is_block, false) {
202                anyhow::bail!(message);
203            }
204            anyhow::bail!("DuckDuckGo search failed with status: {}", status);
205        }
206
207        let html = response.text().await?;
208        let html_contains_block = contains_ascii_case_insensitive(&html, "/wr.do?")
209            || contains_ascii_case_insensitive(&html, "anomaly-modal");
210        if let Some(message) =
211            duckduckgo_block_message(status, final_url_is_block, html_contains_block)
212        {
213            anyhow::bail!(message);
214        }
215        self.parse_duckduckgo_results(&html, query)
216    }
217
218    fn parse_duckduckgo_results(&self, html: &str, query: &str) -> anyhow::Result<String> {
219        // Extract result links: <a class="result__a" href="...">Title</a>
220        let link_regex = Regex::new(
221            r#"<a[^>]*class="[^"]*result__a[^"]*"[^>]*href="([^"]+)"[^>]*>([\s\S]*?)</a>"#,
222        )?;
223
224        // Extract snippets: <a class="result__snippet">...</a>
225        let snippet_regex = Regex::new(r#"<a class="result__snippet[^"]*"[^>]*>([\s\S]*?)</a>"#)?;
226
227        let link_matches: Vec<_> = link_regex
228            .captures_iter(html)
229            .take(self.max_results + 2)
230            .collect();
231
232        let snippet_matches: Vec<_> = snippet_regex
233            .captures_iter(html)
234            .take(self.max_results + 2)
235            .collect();
236
237        if link_matches.is_empty() {
238            return Ok(format!("No results found for: {}", query));
239        }
240
241        let mut lines = vec![format!("Search results for: {} (via DuckDuckGo)", query)];
242
243        let count = link_matches.len().min(self.max_results);
244
245        for i in 0..count {
246            let caps = &link_matches[i];
247            let url_str = decode_ddg_redirect_url(&caps[1]);
248            let title = strip_tags(&caps[2]);
249
250            lines.push(format!("{}. {}", i + 1, title.trim()));
251            lines.push(format!("   {}", url_str.trim()));
252
253            // Add snippet if available
254            if i < snippet_matches.len() {
255                let snippet = strip_tags(&snippet_matches[i][1]);
256                let snippet = snippet.trim();
257                if !snippet.is_empty() {
258                    lines.push(format!("   {}", snippet));
259                }
260            }
261        }
262
263        Ok(lines.join("\n"))
264    }
265
266    async fn search_brave(&self, query: &str) -> anyhow::Result<String> {
267        let api_key = self.resolve_brave_api_key()?;
268
269        let encoded_query = urlencoding::encode(query);
270        let search_url = format!(
271            "https://api.search.brave.com/res/v1/web/search?q={}&count={}",
272            encoded_query, self.max_results
273        );
274
275        let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
276        let builder =
277            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
278        let client = builder.build()?;
279
280        let response = client
281            .get(&search_url)
282            .header("Accept", "application/json")
283            .header("X-Subscription-Token", &api_key)
284            .send()
285            .await?;
286
287        if !response.status().is_success() {
288            anyhow::bail!("Brave search failed with status: {}", response.status());
289        }
290
291        let json: serde_json::Value = response.json().await?;
292        self.parse_brave_results(&json, query)
293    }
294
295    /// Resolve the Tavily API key from the boot-time snapshot, falling back
296    /// to a fresh config read + decryption when the boot-time value is absent.
297    fn resolve_tavily_api_key(&self) -> anyhow::Result<String> {
298        if let Some(ref key) = self.boot_tavily_api_key
299            && !key.is_empty()
300            && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
301        {
302            return Ok(key.clone());
303        }
304        self.reload_tavily_api_key()
305    }
306
307    /// Re-read `config.toml` and decrypt `[web_search] tavily_api_key`.
308    fn reload_tavily_api_key(&self) -> anyhow::Result<String> {
309        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
310            ::zeroclaw_log::record!(
311                ERROR,
312                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
313                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
314                    .with_attrs(::serde_json::json!({
315                        "path": self.config_path.display().to_string(),
316                        "search_provider": "tavily",
317                        "error": format!("{}", e),
318                    })),
319                "web_search: failed to read config for Tavily API key"
320            );
321            anyhow::Error::msg(format!(
322                "Failed to read config file {} for Tavily API key: {e}",
323                self.config_path.display()
324            ))
325        })?;
326
327        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
328            ::zeroclaw_log::record!(
329                ERROR,
330                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
331                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
332                    .with_attrs(::serde_json::json!({
333                        "path": self.config_path.display().to_string(),
334                        "search_provider": "tavily",
335                        "error": format!("{}", e),
336                    })),
337                "web_search: failed to parse config for Tavily API key"
338            );
339            anyhow::Error::msg(format!(
340                "Failed to parse config file {} for Tavily API key: {e}",
341                self.config_path.display()
342            ))
343        })?;
344
345        let raw_key = config
346            .web_search
347            .tavily_api_key
348            .filter(|k| !k.is_empty())
349            .ok_or_else(|| {
350                ::zeroclaw_log::record!(
351                    ERROR,
352                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
353                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
354                        .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
355                    "web_search: Tavily API key not configured"
356                );
357                anyhow::Error::msg("Tavily API key not configured")
358            })?;
359
360        if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
361            let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
362            let store =
363                zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
364            let plaintext = store.decrypt(&raw_key)?;
365            if plaintext.is_empty() {
366                anyhow::bail!("Tavily API key not configured (decrypted value is empty)");
367            }
368            Ok(plaintext)
369        } else {
370            Ok(raw_key)
371        }
372    }
373
374    async fn search_tavily(&self, query: &str) -> anyhow::Result<String> {
375        let client = self.build_tavily_client()?;
376        self.search_tavily_with_client(&client, "https://api.tavily.com/search", query)
377            .await
378    }
379
380    /// Build the production HTTP client for Tavily, wired through the
381    /// process-global runtime proxy state. Extracted so the
382    /// `search_tavily_with_client` test path can substitute a fresh
383    /// client and stay isolated from concurrent tests that mutate
384    /// `RUNTIME_PROXY_CONFIG` (a request built off a stale "enabled"
385    /// proxy snapshot otherwise routes through a non-existent proxy
386    /// and the wiremock connection fails).
387    fn build_tavily_client(&self) -> anyhow::Result<reqwest::Client> {
388        let builder = reqwest::Client::builder().timeout(Duration::from_secs(self.timeout_secs));
389        let builder =
390            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
391        Ok(builder.build()?)
392    }
393
394    /// Inner Tavily request implementation, parameterized on the HTTP
395    /// client and endpoint URL so request-shape tests can target a local
396    /// mock server with a client that doesn't read process-global proxy
397    /// state. Production calls always go through [`Self::search_tavily`].
398    async fn search_tavily_with_client(
399        &self,
400        client: &reqwest::Client,
401        url: &str,
402        query: &str,
403    ) -> anyhow::Result<String> {
404        let api_key = self.resolve_tavily_api_key()?;
405
406        // Tavily authenticates via `Authorization: Bearer <key>` per
407        // https://docs.tavily.com/documentation/api-reference/endpoint/search
408        // (the API also tolerates `api_key` in the body for legacy clients,
409        // but bearer-header is the documented contract).
410        let body = serde_json::json!({
411            "query": query,
412            "max_results": self.max_results,
413            "search_depth": "basic",
414            "include_answer": false,
415            "include_raw_content": false,
416        });
417
418        let response = client
419            .post(url)
420            .bearer_auth(&api_key)
421            .json(&body)
422            .send()
423            .await?;
424
425        if !response.status().is_success() {
426            anyhow::bail!("Tavily search failed with status: {}", response.status());
427        }
428
429        let json: serde_json::Value = response.json().await?;
430        self.parse_tavily_results(&json, query)
431    }
432
433    fn parse_tavily_results(
434        &self,
435        json: &serde_json::Value,
436        query: &str,
437    ) -> anyhow::Result<String> {
438        let results = json
439            .get("results")
440            .and_then(|r| r.as_array())
441            .ok_or_else(|| {
442                ::zeroclaw_log::record!(
443                    ERROR,
444                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
445                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
446                        .with_attrs(::serde_json::json!({"search_provider": "tavily"})),
447                    "web_search: invalid Tavily response"
448                );
449                anyhow::Error::msg("Invalid Tavily API response")
450            })?;
451
452        if results.is_empty() {
453            return Ok(format!("No results found for: {}", query));
454        }
455
456        let mut lines = vec![format!("Search results for: {} (via Tavily)", query)];
457
458        for (i, result) in results.iter().take(self.max_results).enumerate() {
459            let title = result
460                .get("title")
461                .and_then(|t| t.as_str())
462                .unwrap_or("No title");
463            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
464            // Tavily returns a pre-cleaned `content` field (not just a snippet),
465            // so it doubles as the description for the LLM caller.
466            let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
467
468            lines.push(format!("{}. {}", i + 1, title));
469            lines.push(format!("   {}", url));
470            if !content.is_empty() {
471                lines.push(format!("   {}", content));
472            }
473        }
474
475        Ok(lines.join("\n"))
476    }
477
478    /// Resolve the Jina AI API key from the boot-time snapshot, falling back
479    /// to a fresh config read + decryption when the boot-time value is absent.
480    fn resolve_jina_api_key(&self) -> anyhow::Result<String> {
481        if let Some(ref key) = self.boot_jina_api_key
482            && !key.is_empty()
483            && !zeroclaw_config::secrets::SecretStore::is_encrypted(key)
484        {
485            return Ok(key.clone());
486        }
487        self.reload_jina_api_key()
488    }
489
490    /// Re-read `config.toml` and decrypt `[web_search] jina_api_key`.
491    fn reload_jina_api_key(&self) -> anyhow::Result<String> {
492        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
493            ::zeroclaw_log::record!(
494                ERROR,
495                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
496                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
497                    .with_attrs(::serde_json::json!({
498                        "path": self.config_path.display().to_string(),
499                        "search_provider": "jina",
500                        "error": format!("{}", e),
501                    })),
502                "web_search: failed to read config for Jina AI API key"
503            );
504            anyhow::Error::msg(format!(
505                "Failed to read config file {} for Jina AI API key: {e}",
506                self.config_path.display()
507            ))
508        })?;
509
510        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
511            ::zeroclaw_log::record!(
512                ERROR,
513                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
514                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
515                    .with_attrs(::serde_json::json!({
516                        "path": self.config_path.display().to_string(),
517                        "search_provider": "jina",
518                        "error": format!("{}", e),
519                    })),
520                "web_search: failed to parse config for Jina AI API key"
521            );
522            anyhow::Error::msg(format!(
523                "Failed to parse config file {} for Jina AI API key: {e}",
524                self.config_path.display()
525            ))
526        })?;
527
528        let raw_key = config
529            .web_search
530            .jina_api_key
531            .filter(|k| !k.is_empty())
532            .ok_or_else(|| {
533                ::zeroclaw_log::record!(
534                    ERROR,
535                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
536                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
537                        .with_attrs(::serde_json::json!({"search_provider": "jina"})),
538                    "web_search: Jina AI API key not configured"
539                );
540                anyhow::Error::msg("Jina AI API key not configured")
541            })?;
542
543        if zeroclaw_config::secrets::SecretStore::is_encrypted(&raw_key) {
544            let zeroclaw_dir = self.config_path.parent().unwrap_or_else(|| Path::new("."));
545            let store =
546                zeroclaw_config::secrets::SecretStore::new(zeroclaw_dir, self.secrets_encrypt);
547            let plaintext = store.decrypt(&raw_key)?;
548            if plaintext.is_empty() {
549                anyhow::bail!("Jina AI API key not configured (decrypted value is empty)");
550            }
551            Ok(plaintext)
552        } else {
553            Ok(raw_key)
554        }
555    }
556
557    async fn search_jina(&self, query: &str) -> anyhow::Result<String> {
558        let api_key = self.resolve_jina_api_key()?;
559
560        let builder = reqwest::Client::builder()
561            .timeout(Duration::from_secs(self.timeout_secs))
562            .user_agent("ZeroClaw/1.0 (https://zeroclaw.ai)");
563        let builder =
564            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
565        let client = builder.build()?;
566
567        // Jina Search API requires POST with JSON body
568        let body = serde_json::json!({"q": query});
569
570        let response = client
571            .post("https://s.jina.ai/")
572            .header("Authorization", format!("Bearer {}", api_key))
573            .header("Content-Type", "application/json")
574            .header("Accept", "application/json")
575            .json(&body)
576            .send()
577            .await?;
578
579        if !response.status().is_success() {
580            anyhow::bail!("Jina AI search failed with status: {}", response.status());
581        }
582
583        let json: serde_json::Value = response.json().await?;
584        self.parse_jina_results(&json, query)
585    }
586
587    fn parse_jina_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
588        // Jina API returns {"code": 200, "status": 20000, "data": [...]}
589        let results = json.get("data").and_then(|r| r.as_array()).ok_or_else(|| {
590            ::zeroclaw_log::record!(
591                ERROR,
592                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
593                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
594                    .with_attrs(::serde_json::json!({"search_provider": "jina"})),
595                "web_search: invalid Jina AI response"
596            );
597            anyhow::Error::msg("Invalid Jina AI API response")
598        })?;
599
600        if results.is_empty() {
601            return Ok(format!("No results found for: {}", query));
602        }
603
604        let mut lines = vec![format!("Search results for: {} (via Jina AI)", query)];
605
606        for (i, result) in results.iter().take(self.max_results).enumerate() {
607            let title = result
608                .get("title")
609                .and_then(|t| t.as_str())
610                .unwrap_or("No title");
611            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
612            // Jina's content field contains richer markdown-formatted page content;
613            // fall back to description if content is absent
614            let snippet = result
615                .get("content")
616                .and_then(|c| c.as_str())
617                .or_else(|| result.get("description").and_then(|d| d.as_str()))
618                .unwrap_or("");
619
620            lines.push(format!("{}. {}", i + 1, title));
621            lines.push(format!("   {}", url));
622            if !snippet.is_empty() {
623                lines.push(format!("   {}", snippet));
624            }
625        }
626
627        Ok(lines.join("\n"))
628    }
629
630    fn parse_brave_results(&self, json: &serde_json::Value, query: &str) -> anyhow::Result<String> {
631        let results = json
632            .get("web")
633            .and_then(|w| w.get("results"))
634            .and_then(|r| r.as_array())
635            .ok_or_else(|| {
636                ::zeroclaw_log::record!(
637                    ERROR,
638                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
639                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
640                        .with_attrs(::serde_json::json!({"search_provider": "brave"})),
641                    "web_search: invalid Brave response"
642                );
643                anyhow::Error::msg("Invalid Brave API response")
644            })?;
645
646        if results.is_empty() {
647            return Ok(format!("No results found for: {}", query));
648        }
649
650        let mut lines = vec![format!("Search results for: {} (via Brave)", query)];
651
652        for (i, result) in results.iter().take(self.max_results).enumerate() {
653            let title = result
654                .get("title")
655                .and_then(|t| t.as_str())
656                .unwrap_or("No title");
657            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
658            let description = result
659                .get("description")
660                .and_then(|d| d.as_str())
661                .unwrap_or("");
662
663            lines.push(format!("{}. {}", i + 1, title));
664            lines.push(format!("   {}", url));
665            if !description.is_empty() {
666                lines.push(format!("   {}", description));
667            }
668        }
669
670        Ok(lines.join("\n"))
671    }
672
673    /// Resolve the SearXNG instance URL from the boot-time config or by
674    /// re-reading `config.toml` at runtime.
675    fn resolve_searxng_instance_url(&self) -> anyhow::Result<String> {
676        if let Some(ref url) = self.searxng_instance_url
677            && !url.is_empty()
678        {
679            return Ok(url.clone());
680        }
681
682        // Slow path: re-read config.toml to pick up values set after boot.
683        let contents = std::fs::read_to_string(&self.config_path).map_err(|e| {
684            ::zeroclaw_log::record!(
685                ERROR,
686                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
687                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
688                    .with_attrs(::serde_json::json!({
689                        "path": self.config_path.display().to_string(),
690                        "search_provider": "searxng",
691                        "error": format!("{}", e),
692                    })),
693                "web_search: failed to read config for SearXNG URL"
694            );
695            anyhow::Error::msg(format!(
696                "Failed to read config file {} for SearXNG instance URL: {e}",
697                self.config_path.display()
698            ))
699        })?;
700
701        let config: zeroclaw_config::schema::Config = toml::from_str(&contents).map_err(|e| {
702            ::zeroclaw_log::record!(
703                ERROR,
704                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
705                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
706                    .with_attrs(::serde_json::json!({
707                        "path": self.config_path.display().to_string(),
708                        "search_provider": "searxng",
709                        "error": format!("{}", e),
710                    })),
711                "web_search: failed to parse config for SearXNG URL"
712            );
713            anyhow::Error::msg(format!(
714                "Failed to parse config file {} for SearXNG instance URL: {e}",
715                self.config_path.display()
716            ))
717        })?;
718
719        config
720            .web_search
721            .searxng_instance_url
722            .filter(|u| !u.is_empty())
723            .ok_or_else(|| {
724                ::zeroclaw_log::record!(
725                    ERROR,
726                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
727                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
728                        .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
729                    "web_search: SearXNG instance URL not configured"
730                );
731                anyhow::Error::msg(
732                    "SearXNG instance URL not configured. Set [web_search] searxng_instance_url \
733                     in config.toml or the SEARXNG_INSTANCE_URL environment variable.",
734                )
735            })
736    }
737
738    async fn search_searxng(&self, query: &str) -> anyhow::Result<String> {
739        let instance_url = self.resolve_searxng_instance_url()?;
740        let base_url = instance_url.trim_end_matches('/');
741
742        let encoded_query = urlencoding::encode(query);
743        let search_url = format!(
744            "{}/search?q={}&format=json&pageno=1",
745            base_url, encoded_query
746        );
747
748        let builder = reqwest::Client::builder()
749            .timeout(Duration::from_secs(self.timeout_secs))
750            .user_agent("ZeroClaw/1.0");
751        let builder =
752            zeroclaw_config::schema::apply_runtime_proxy_to_builder(builder, "tool.web_search");
753        let client = builder.build()?;
754
755        let response = client
756            .get(&search_url)
757            .header("Accept", "application/json")
758            .send()
759            .await?;
760
761        if !response.status().is_success() {
762            anyhow::bail!("SearXNG search failed with status: {}", response.status());
763        }
764
765        let json: serde_json::Value = response.json().await?;
766        self.parse_searxng_results(&json, query)
767    }
768
769    fn parse_searxng_results(
770        &self,
771        json: &serde_json::Value,
772        query: &str,
773    ) -> anyhow::Result<String> {
774        let results = json
775            .get("results")
776            .and_then(|r| r.as_array())
777            .ok_or_else(|| {
778                ::zeroclaw_log::record!(
779                    ERROR,
780                    ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Fail)
781                        .with_outcome(::zeroclaw_log::EventOutcome::Failure)
782                        .with_attrs(::serde_json::json!({"search_provider": "searxng"})),
783                    "web_search: invalid SearXNG response"
784                );
785                anyhow::Error::msg("Invalid SearXNG API response")
786            })?;
787
788        if results.is_empty() {
789            return Ok(format!("No results found for: {}", query));
790        }
791
792        let mut lines = vec![format!("Search results for: {} (via SearXNG)", query)];
793
794        for (i, result) in results.iter().take(self.max_results).enumerate() {
795            let title = result
796                .get("title")
797                .and_then(|t| t.as_str())
798                .unwrap_or("No title");
799            let url = result.get("url").and_then(|u| u.as_str()).unwrap_or("");
800            let content = result.get("content").and_then(|c| c.as_str()).unwrap_or("");
801
802            lines.push(format!("{}. {}", i + 1, title));
803            lines.push(format!("   {}", url));
804            if !content.is_empty() {
805                lines.push(format!("   {}", content));
806            }
807        }
808
809        Ok(lines.join("\n"))
810    }
811}
812
813fn decode_ddg_redirect_url(raw_url: &str) -> String {
814    if let Some(index) = raw_url.find("uddg=") {
815        let encoded = &raw_url[index + 5..];
816        let encoded = encoded.split('&').next().unwrap_or(encoded);
817        if let Ok(decoded) = urlencoding::decode(encoded) {
818            return decoded.into_owned();
819        }
820    }
821
822    raw_url.to_string()
823}
824
825const DUCKDUCKGO_BLOCK_MESSAGE: &str = "DuckDuckGo blocked the automated search request. Try configuring SearXNG, Brave, or Tavily as the web search provider.";
826
827fn duckduckgo_block_message(
828    status: reqwest::StatusCode,
829    final_url_is_block: bool,
830    html_contains_block: bool,
831) -> Option<&'static str> {
832    if status == reqwest::StatusCode::FORBIDDEN || final_url_is_block || html_contains_block {
833        Some(DUCKDUCKGO_BLOCK_MESSAGE)
834    } else {
835        None
836    }
837}
838
839fn contains_ascii_case_insensitive(haystack: &str, needle: &str) -> bool {
840    haystack
841        .as_bytes()
842        .windows(needle.len())
843        .any(|window| window.eq_ignore_ascii_case(needle.as_bytes()))
844}
845
846fn strip_tags(content: &str) -> String {
847    let re = Regex::new(r"<[^>]+>").unwrap();
848    re.replace_all(content, "").to_string()
849}
850
851#[async_trait]
852impl Tool for WebSearchTool {
853    fn name(&self) -> &str {
854        "web_search_tool"
855    }
856
857    fn description(&self) -> &str {
858        "Search the web for information. Returns relevant search results with titles, URLs, and descriptions. Use this to find current information, news, or research topics."
859    }
860
861    fn parameters_schema(&self) -> serde_json::Value {
862        json!({
863            "type": "object",
864            "properties": {
865                "query": {
866                    "type": "string",
867                    "description": "The search query. Be specific for better results."
868                }
869            },
870            "required": ["query"]
871        })
872    }
873
874    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
875        let query = args.get("query").and_then(|q| q.as_str()).ok_or_else(|| {
876            ::zeroclaw_log::record!(
877                WARN,
878                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
879                    .with_outcome(::zeroclaw_log::EventOutcome::Failure)
880                    .with_attrs(::serde_json::json!({"param": "query"})),
881                "web_search: missing query parameter"
882            );
883            anyhow::Error::msg("Missing required parameter: query")
884        })?;
885
886        if query.trim().is_empty() {
887            anyhow::bail!("Search query cannot be empty");
888        }
889
890        ::zeroclaw_log::record!(
891            INFO,
892            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note),
893            &format!("Searching web for: {}", query)
894        );
895
896        let resolution = resolve_web_search_provider(&self.model_provider);
897        if resolution.used_fallback {
898            ::zeroclaw_log::record!(
899                WARN,
900                ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Note)
901                    .with_outcome(::zeroclaw_log::EventOutcome::Unknown),
902                &format!(
903                    "Unknown web search model_provider '{}'; falling back to '{}'",
904                    self.model_provider, resolution.canonical_provider
905                )
906            );
907        }
908
909        let result = match resolution.route {
910            WebSearchProviderRoute::DuckDuckGo => self.search_duckduckgo(query).await?,
911            WebSearchProviderRoute::Brave => self.search_brave(query).await?,
912            WebSearchProviderRoute::Tavily => self.search_tavily(query).await?,
913            WebSearchProviderRoute::SearXNG => self.search_searxng(query).await?,
914            WebSearchProviderRoute::Jina => self.search_jina(query).await?,
915        };
916
917        Ok(ToolResult {
918            success: true,
919            output: result,
920            error: None,
921        })
922    }
923}
924
925#[cfg(test)]
926mod tests {
927    use super::*;
928
929    #[test]
930    fn test_tool_name() {
931        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
932        assert_eq!(tool.name(), "web_search_tool");
933    }
934
935    #[test]
936    fn test_tool_description() {
937        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
938        assert!(tool.description().contains("Search the web"));
939    }
940
941    #[test]
942    fn test_parameters_schema() {
943        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
944        let schema = tool.parameters_schema();
945        assert_eq!(schema["type"], "object");
946        assert!(schema["properties"]["query"].is_object());
947    }
948
949    #[test]
950    fn test_strip_tags() {
951        let html = "<b>Hello</b> <i>World</i>";
952        assert_eq!(strip_tags(html), "Hello World");
953    }
954
955    #[test]
956    fn test_parse_duckduckgo_results_empty() {
957        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
958        let result = tool
959            .parse_duckduckgo_results("<html>No results here</html>", "test")
960            .unwrap();
961        assert!(result.contains("No results found"));
962    }
963
964    #[test]
965    fn test_parse_duckduckgo_results_with_data() {
966        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
967        let html = r#"
968            <a class="result__a" href="https://example.com">Example Title</a>
969            <a class="result__snippet">This is a description</a>
970        "#;
971        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
972        assert!(result.contains("Example Title"));
973        assert!(result.contains("https://example.com"));
974    }
975
976    #[test]
977    fn test_parse_duckduckgo_results_decodes_redirect_url() {
978        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
979        let html = r#"
980            <a class="result__a" href="https://duckduckgo.com/l/?uddg=https%3A%2F%2Fexample.com%2Fpath%3Fa%3D1&amp;rut=test">Example Title</a>
981            <a class="result__snippet">This is a description</a>
982        "#;
983        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
984        assert!(result.contains("https://example.com/path?a=1"));
985        assert!(!result.contains("rut=test"));
986    }
987
988    #[test]
989    fn test_duckduckgo_block_detection_reports_forbidden_status() {
990        let message = duckduckgo_block_message(reqwest::StatusCode::FORBIDDEN, false, false)
991            .expect("403 responses should be classified as a DuckDuckGo block");
992
993        assert!(message.contains("DuckDuckGo blocked"));
994        assert!(message.contains("SearXNG"));
995    }
996
997    #[test]
998    fn test_duckduckgo_block_detection_reports_verification_redirect() {
999        let message = duckduckgo_block_message(reqwest::StatusCode::OK, true, false)
1000            .expect("verification redirects should be classified as a DuckDuckGo block");
1001
1002        assert!(message.contains("DuckDuckGo blocked"));
1003        assert!(message.contains("SearXNG"));
1004    }
1005
1006    #[test]
1007    fn test_duckduckgo_block_detection_reports_verification_form_in_html() {
1008        let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, true)
1009            .expect("verification form HTML should be classified as a DuckDuckGo block");
1010
1011        assert!(message.contains("DuckDuckGo blocked"));
1012        assert!(message.contains("SearXNG"));
1013    }
1014
1015    #[test]
1016    fn test_duckduckgo_block_detection_ignores_normal_empty_results() {
1017        let message = duckduckgo_block_message(reqwest::StatusCode::OK, false, false);
1018
1019        assert!(message.is_none());
1020    }
1021
1022    #[test]
1023    fn test_duckduckgo_block_detection_is_case_insensitive_without_allocating_html() {
1024        assert!(contains_ascii_case_insensitive(
1025            r#"<form action="/WR.DO?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
1026            "/wr.do?"
1027        ));
1028    }
1029
1030    #[tokio::test]
1031    async fn test_duckduckgo_request_reports_forbidden_status() {
1032        use wiremock::matchers::{method, path, query_param};
1033        use wiremock::{Mock, MockServer, ResponseTemplate};
1034
1035        let server = MockServer::start().await;
1036        Mock::given(method("GET"))
1037            .and(path("/html/"))
1038            .and(query_param("q", "test"))
1039            .respond_with(ResponseTemplate::new(403))
1040            .mount(&server)
1041            .await;
1042
1043        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1044        let err = tool
1045            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1046            .await
1047            .expect_err("403 should be reported as a DuckDuckGo block");
1048
1049        assert!(err.to_string().contains("DuckDuckGo blocked"));
1050        assert!(err.to_string().contains("SearXNG"));
1051    }
1052
1053    #[tokio::test]
1054    async fn test_duckduckgo_request_reports_verification_redirect_url() {
1055        use wiremock::matchers::{method, path, query_param};
1056        use wiremock::{Mock, MockServer, ResponseTemplate};
1057
1058        let server = MockServer::start().await;
1059        Mock::given(method("GET"))
1060            .and(path("/html/"))
1061            .and(query_param("q", "test"))
1062            .respond_with(
1063                ResponseTemplate::new(302)
1064                    .insert_header("location", format!("{}/wr.do?u=blocked", server.uri())),
1065            )
1066            .mount(&server)
1067            .await;
1068        Mock::given(method("GET"))
1069            .and(path("/wr.do"))
1070            .respond_with(ResponseTemplate::new(200).set_body_string("<html></html>"))
1071            .mount(&server)
1072            .await;
1073
1074        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1075        let err = tool
1076            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1077            .await
1078            .expect_err("verification redirects should be reported as a DuckDuckGo block");
1079
1080        assert!(err.to_string().contains("DuckDuckGo blocked"));
1081        assert!(err.to_string().contains("SearXNG"));
1082    }
1083
1084    #[tokio::test]
1085    async fn test_duckduckgo_request_reports_verification_form_html() {
1086        use wiremock::matchers::{method, path, query_param};
1087        use wiremock::{Mock, MockServer, ResponseTemplate};
1088
1089        let server = MockServer::start().await;
1090        Mock::given(method("GET"))
1091            .and(path("/html/"))
1092            .and(query_param("q", "test"))
1093            .respond_with(ResponseTemplate::new(200).set_body_string(
1094                r#"<form action="/wr.do?u=https%3A%2F%2Fhtml.duckduckgo.com%2Fhtml%2F"></form>"#,
1095            ))
1096            .mount(&server)
1097            .await;
1098
1099        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1100        let err = tool
1101            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1102            .await
1103            .expect_err("verification HTML should be reported as a DuckDuckGo block");
1104
1105        assert!(err.to_string().contains("DuckDuckGo blocked"));
1106        assert!(err.to_string().contains("SearXNG"));
1107    }
1108
1109    #[tokio::test]
1110    async fn test_duckduckgo_request_reports_anomaly_modal_block() {
1111        // Regression for #6373: DuckDuckGo's anti-bot page now ships an
1112        // `anomaly-modal` interstitial (HTTP 200/202, no `/wr.do?` redirect,
1113        // no verification form), and the old detector slid past it,
1114        // returning a misleading "No results found" message to the agent.
1115        use wiremock::matchers::{method, path, query_param};
1116        use wiremock::{Mock, MockServer, ResponseTemplate};
1117
1118        let server = MockServer::start().await;
1119        Mock::given(method("GET"))
1120            .and(path("/html/"))
1121            .and(query_param("q", "test"))
1122            .respond_with(ResponseTemplate::new(202).set_body_string(
1123                r#"<html><body><div class="anomaly-modal__title">Unusual Traffic Detected</div></body></html>"#,
1124            ))
1125            .mount(&server)
1126            .await;
1127
1128        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1129        let err = tool
1130            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1131            .await
1132            .expect_err("anomaly-modal page should be reported as a DuckDuckGo block");
1133
1134        assert!(err.to_string().contains("DuckDuckGo blocked"));
1135        assert!(err.to_string().contains("SearXNG"));
1136    }
1137
1138    #[tokio::test]
1139    async fn test_duckduckgo_request_preserves_normal_empty_results() {
1140        use wiremock::matchers::{method, path, query_param};
1141        use wiremock::{Mock, MockServer, ResponseTemplate};
1142
1143        let server = MockServer::start().await;
1144        Mock::given(method("GET"))
1145            .and(path("/html/"))
1146            .and(query_param("q", "test"))
1147            .respond_with(
1148                ResponseTemplate::new(200).set_body_string("<html>No results here</html>"),
1149            )
1150            .mount(&server)
1151            .await;
1152
1153        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1154        let result = tool
1155            .search_duckduckgo_at(&format!("{}/html/", server.uri()), "test")
1156            .await
1157            .expect("normal empty result HTML should still parse");
1158
1159        assert!(result.contains("No results found"));
1160    }
1161
1162    #[test]
1163    fn test_constructor_clamps_web_search_limits() {
1164        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 0, 0);
1165        let html = r#"
1166            <a class="result__a" href="https://example.com">Example Title</a>
1167            <a class="result__snippet">This is a description</a>
1168        "#;
1169        let result = tool.parse_duckduckgo_results(html, "test").unwrap();
1170        assert!(result.contains("Example Title"));
1171    }
1172
1173    #[tokio::test]
1174    async fn test_execute_missing_query() {
1175        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1176        let result = tool.execute(json!({})).await;
1177        assert!(result.is_err());
1178    }
1179
1180    #[tokio::test]
1181    async fn test_execute_empty_query() {
1182        let tool = WebSearchTool::new("duckduckgo".to_string(), None, None, 5, 15);
1183        let result = tool.execute(json!({"query": ""})).await;
1184        assert!(result.is_err());
1185    }
1186
1187    #[tokio::test]
1188    async fn test_execute_brave_without_api_key() {
1189        let tool = WebSearchTool::new("brave".to_string(), None, None, 5, 15);
1190        let result = tool.execute(json!({"query": "test"})).await;
1191        assert!(result.is_err());
1192        assert!(result.unwrap_err().to_string().contains("API key"));
1193    }
1194
1195    #[test]
1196    fn test_resolve_brave_api_key_uses_boot_key() {
1197        let tool = WebSearchTool::new(
1198            "brave".to_string(),
1199            Some("sk-plaintext-key".to_string()),
1200            None,
1201            5,
1202            15,
1203        );
1204        let key = tool.resolve_brave_api_key().unwrap();
1205        assert_eq!(key, "sk-plaintext-key");
1206    }
1207
1208    #[test]
1209    fn test_resolve_brave_api_key_reloads_from_config() {
1210        let tmp = tempfile::TempDir::new().unwrap();
1211        let config_path = tmp.path().join("config.toml");
1212        std::fs::write(
1213            &config_path,
1214            "[web_search]\nbrave_api_key = \"fresh-key-from-disk\"\n",
1215        )
1216        .unwrap();
1217
1218        // No boot key -- forces reload from config
1219        let tool = WebSearchTool::new_with_config(
1220            "brave".to_string(),
1221            None,
1222            None,
1223            None,
1224            None,
1225            5,
1226            15,
1227            config_path,
1228            false,
1229        );
1230        let key = tool.resolve_brave_api_key().unwrap();
1231        assert_eq!(key, "fresh-key-from-disk");
1232    }
1233
1234    #[test]
1235    fn test_resolve_brave_api_key_decrypts_encrypted_key() {
1236        let tmp = tempfile::TempDir::new().unwrap();
1237        let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1238        let encrypted = store.encrypt("brave-secret-key").unwrap();
1239
1240        let config_path = tmp.path().join("config.toml");
1241        std::fs::write(
1242            &config_path,
1243            format!("[web_search]\nbrave_api_key = \"{}\"\n", encrypted),
1244        )
1245        .unwrap();
1246
1247        // Boot key is the encrypted blob -- should trigger reload + decrypt
1248        let tool = WebSearchTool::new_with_config(
1249            "brave".to_string(),
1250            Some(encrypted),
1251            None,
1252            None,
1253            None,
1254            5,
1255            15,
1256            config_path,
1257            true,
1258        );
1259        let key = tool.resolve_brave_api_key().unwrap();
1260        assert_eq!(key, "brave-secret-key");
1261    }
1262
1263    #[tokio::test]
1264    async fn test_execute_searxng_without_instance_url() {
1265        let tmp = tempfile::TempDir::new().unwrap();
1266        let config_path = tmp.path().join("config.toml");
1267        std::fs::write(&config_path, "[web_search]\n").unwrap();
1268
1269        let tool = WebSearchTool::new_with_config(
1270            "searxng".to_string(),
1271            None,
1272            None,
1273            None,
1274            None,
1275            5,
1276            15,
1277            config_path,
1278            false,
1279        );
1280        let result = tool.execute(json!({"query": "test"})).await;
1281        assert!(result.is_err());
1282        assert!(
1283            result
1284                .unwrap_err()
1285                .to_string()
1286                .contains("SearXNG instance URL not configured")
1287        );
1288    }
1289
1290    #[test]
1291    fn test_parse_tavily_results_empty() {
1292        let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1293        let json = serde_json::json!({"results": []});
1294        let result = tool.parse_tavily_results(&json, "test").unwrap();
1295        assert!(result.contains("No results found"));
1296    }
1297
1298    #[test]
1299    fn test_parse_tavily_results_with_data() {
1300        let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1301        let json = serde_json::json!({
1302            "query": "test",
1303            "results": [
1304                {
1305                    "title": "Tavily Example",
1306                    "url": "https://example.com",
1307                    "content": "Pre-cleaned summary content from Tavily",
1308                    "score": 0.91
1309                },
1310                {
1311                    "title": "Another Result",
1312                    "url": "https://example.org",
1313                    "content": "Second result body"
1314                }
1315            ]
1316        });
1317        let result = tool.parse_tavily_results(&json, "test").unwrap();
1318        assert!(result.contains("Tavily Example"));
1319        assert!(result.contains("https://example.com"));
1320        assert!(result.contains("Pre-cleaned summary content from Tavily"));
1321        assert!(result.contains("via Tavily"));
1322    }
1323
1324    #[test]
1325    fn test_parse_tavily_results_invalid_response() {
1326        let tool = WebSearchTool::new("tavily".to_string(), None, None, 5, 15);
1327        let json = serde_json::json!({"error": "bad api key"});
1328        let result = tool.parse_tavily_results(&json, "test");
1329        assert!(result.is_err());
1330        assert!(
1331            result
1332                .unwrap_err()
1333                .to_string()
1334                .contains("Invalid Tavily API response")
1335        );
1336    }
1337
1338    #[tokio::test]
1339    async fn test_execute_tavily_without_api_key() {
1340        // No boot key + no config field → resolve_tavily_api_key must error
1341        // before any network call is attempted.
1342        let tmp = tempfile::tempdir().unwrap();
1343        let config_path = tmp.path().join("config.toml");
1344        std::fs::write(&config_path, "[web_search]\n").unwrap();
1345
1346        let tool = WebSearchTool::new_with_config(
1347            "tavily".to_string(),
1348            None,
1349            None,
1350            None,
1351            None,
1352            5,
1353            15,
1354            config_path,
1355            false,
1356        );
1357        let result = tool.execute(json!({"query": "test"})).await;
1358        assert!(result.is_err());
1359        assert!(
1360            result
1361                .unwrap_err()
1362                .to_string()
1363                .contains("Tavily API key not configured")
1364        );
1365    }
1366
1367    #[test]
1368    fn test_resolve_tavily_api_key_uses_boot_key() {
1369        let tool = WebSearchTool::new_with_config(
1370            "tavily".to_string(),
1371            None,
1372            Some("tvly-boot-key".to_string()),
1373            None,
1374            None,
1375            5,
1376            15,
1377            PathBuf::new(),
1378            false,
1379        );
1380        let key = tool.resolve_tavily_api_key().unwrap();
1381        assert_eq!(key, "tvly-boot-key");
1382    }
1383
1384    #[test]
1385    fn test_resolve_tavily_api_key_reloads_from_config() {
1386        let tmp = tempfile::tempdir().unwrap();
1387        let config_path = tmp.path().join("config.toml");
1388        std::fs::write(
1389            &config_path,
1390            "[web_search]\ntavily_api_key = \"tvly-fresh-from-disk\"\n",
1391        )
1392        .unwrap();
1393
1394        // No boot key — forces reload from config
1395        let tool = WebSearchTool::new_with_config(
1396            "tavily".to_string(),
1397            None,
1398            None,
1399            None,
1400            None,
1401            5,
1402            15,
1403            config_path,
1404            false,
1405        );
1406        let key = tool.resolve_tavily_api_key().unwrap();
1407        assert_eq!(key, "tvly-fresh-from-disk");
1408    }
1409
1410    #[test]
1411    fn test_resolve_tavily_api_key_decrypts_encrypted_key() {
1412        let tmp = tempfile::TempDir::new().unwrap();
1413        let store = zeroclaw_config::secrets::SecretStore::new(tmp.path(), true);
1414        let encrypted = store.encrypt("tvly-secret-key").unwrap();
1415
1416        let config_path = tmp.path().join("config.toml");
1417        std::fs::write(
1418            &config_path,
1419            format!("[web_search]\ntavily_api_key = \"{}\"\n", encrypted),
1420        )
1421        .unwrap();
1422
1423        // Boot key is the encrypted blob -- should trigger reload + decrypt
1424        let tool = WebSearchTool::new_with_config(
1425            "tavily".to_string(),
1426            None,
1427            None,
1428            Some(encrypted),
1429            None,
1430            5,
1431            15,
1432            config_path,
1433            true,
1434        );
1435        let key = tool.resolve_tavily_api_key().unwrap();
1436        assert_eq!(key, "tvly-secret-key");
1437    }
1438
1439    /// Regression: Tavily auth must travel as `Authorization: Bearer <key>`
1440    /// (the documented contract per
1441    /// https://docs.tavily.com/documentation/api-reference/endpoint/search),
1442    /// NOT as an `api_key` field in the JSON body. The previous shape worked
1443    /// against the live service for legacy reasons, but the docs identify
1444    /// bearer-header as the canonical method.
1445    #[tokio::test]
1446    async fn test_tavily_request_uses_bearer_auth_header_not_body_field() {
1447        use wiremock::matchers::{header, method, path};
1448        use wiremock::{Mock, MockServer, ResponseTemplate};
1449
1450        let server = MockServer::start().await;
1451
1452        Mock::given(method("POST"))
1453            .and(path("/search"))
1454            .and(header("authorization", "Bearer tvly-test-key"))
1455            .and(header("content-type", "application/json"))
1456            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
1457                "query": "what is rust",
1458                "results": []
1459            })))
1460            .mount(&server)
1461            .await;
1462
1463        let tool = WebSearchTool::new_with_config(
1464            "tavily".to_string(),
1465            None,
1466            Some("tvly-test-key".to_string()),
1467            None,
1468            None,
1469            5,
1470            15,
1471            PathBuf::new(),
1472            false,
1473        );
1474
1475        // Isolated client so the request shape under test isn't affected
1476        // by `RUNTIME_PROXY_CONFIG` mutations from sibling proxy_config
1477        // tests running concurrently in the same process.
1478        let client = reqwest::Client::builder()
1479            .timeout(Duration::from_secs(15))
1480            .build()
1481            .expect("client builder should succeed without a proxy");
1482        let result = tool
1483            .search_tavily_with_client(&client, &format!("{}/search", server.uri()), "what is rust")
1484            .await
1485            .expect("request should succeed against the mock");
1486        assert!(
1487            result.contains("No results found"),
1488            "parser should report empty results: {result}"
1489        );
1490
1491        let recorded = server
1492            .received_requests()
1493            .await
1494            .expect("wiremock should have captured the request");
1495        assert_eq!(recorded.len(), 1, "expected exactly one POST /search");
1496
1497        let body: serde_json::Value =
1498            serde_json::from_slice(&recorded[0].body).expect("body should be JSON");
1499
1500        // Auth must NOT leak into the body — bearer header is the only auth channel.
1501        assert!(
1502            body.get("api_key").is_none(),
1503            "api_key must not appear in the request body; got: {body}"
1504        );
1505
1506        // The documented body fields must still be present so the search
1507        // contract continues to match the upstream API spec.
1508        assert_eq!(body["query"], "what is rust");
1509        assert_eq!(body["search_depth"], "basic");
1510        assert_eq!(body["max_results"], 5);
1511        assert_eq!(body["include_answer"], false);
1512        assert_eq!(body["include_raw_content"], false);
1513    }
1514
1515    #[test]
1516    fn test_parse_searxng_results_empty() {
1517        let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1518        let json = serde_json::json!({"results": []});
1519        let result = tool.parse_searxng_results(&json, "test").unwrap();
1520        assert!(result.contains("No results found"));
1521    }
1522
1523    #[test]
1524    fn test_parse_searxng_results_with_data() {
1525        let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1526        let json = serde_json::json!({
1527            "results": [
1528                {
1529                    "title": "SearXNG Example",
1530                    "url": "https://example.com",
1531                    "content": "A privacy-respecting metasearch engine"
1532                },
1533                {
1534                    "title": "Another Result",
1535                    "url": "https://example.org",
1536                    "content": "More information here"
1537                }
1538            ]
1539        });
1540        let result = tool.parse_searxng_results(&json, "test").unwrap();
1541        assert!(result.contains("SearXNG Example"));
1542        assert!(result.contains("https://example.com"));
1543        assert!(result.contains("A privacy-respecting metasearch engine"));
1544        assert!(result.contains("via SearXNG"));
1545    }
1546
1547    #[test]
1548    fn test_parse_searxng_results_invalid_response() {
1549        let tool = WebSearchTool::new("searxng".to_string(), None, None, 5, 15);
1550        let json = serde_json::json!({"error": "bad request"});
1551        let result = tool.parse_searxng_results(&json, "test");
1552        assert!(result.is_err());
1553        assert!(
1554            result
1555                .unwrap_err()
1556                .to_string()
1557                .contains("Invalid SearXNG API response")
1558        );
1559    }
1560
1561    #[test]
1562    fn test_resolve_searxng_instance_url_from_boot() {
1563        let tool = WebSearchTool {
1564            model_provider: "searxng".into(),
1565            boot_brave_api_key: None,
1566            boot_tavily_api_key: None,
1567            boot_jina_api_key: None,
1568            searxng_instance_url: Some("https://searx.example.com".to_string()),
1569            max_results: 5,
1570            timeout_secs: 15,
1571            config_path: PathBuf::new(),
1572            secrets_encrypt: false,
1573        };
1574        let url = tool.resolve_searxng_instance_url().unwrap();
1575        assert_eq!(url, "https://searx.example.com");
1576    }
1577
1578    #[test]
1579    fn test_resolve_searxng_instance_url_reloads_from_config() {
1580        let tmp = tempfile::TempDir::new().unwrap();
1581        let config_path = tmp.path().join("config.toml");
1582        std::fs::write(
1583            &config_path,
1584            "[web_search]\nsearxng_instance_url = \"https://search.local\"\n",
1585        )
1586        .unwrap();
1587
1588        let tool = WebSearchTool::new_with_config(
1589            "searxng".to_string(),
1590            None,
1591            None,
1592            None,
1593            None,
1594            5,
1595            15,
1596            config_path,
1597            false,
1598        );
1599        let url = tool.resolve_searxng_instance_url().unwrap();
1600        assert_eq!(url, "https://search.local");
1601    }
1602
1603    #[test]
1604    fn test_resolve_brave_api_key_picks_up_runtime_update() {
1605        let tmp = tempfile::TempDir::new().unwrap();
1606        let config_path = tmp.path().join("config.toml");
1607
1608        // Start with no key in config
1609        std::fs::write(&config_path, "[web_search]\n").unwrap();
1610
1611        let tool = WebSearchTool::new_with_config(
1612            "brave".to_string(),
1613            None,
1614            None,
1615            None,
1616            None,
1617            5,
1618            15,
1619            config_path.clone(),
1620            false,
1621        );
1622
1623        // Key not configured yet -- should fail
1624        assert!(tool.resolve_brave_api_key().is_err());
1625
1626        // Simulate runtime config update (e.g. via web_search_config set)
1627        std::fs::write(
1628            &config_path,
1629            "[web_search]\nbrave_api_key = \"runtime-updated-key\"\n",
1630        )
1631        .unwrap();
1632
1633        // Now should succeed with the updated key
1634        let key = tool.resolve_brave_api_key().unwrap();
1635        assert_eq!(key, "runtime-updated-key");
1636    }
1637
1638    #[test]
1639    fn test_resolve_jina_api_key_uses_boot_key() {
1640        let tool = WebSearchTool::new_with_config(
1641            "jina".to_string(),
1642            None,
1643            None,
1644            Some("jina-boot-key".to_string()),
1645            None,
1646            5,
1647            15,
1648            PathBuf::new(),
1649            false,
1650        );
1651        let key = tool.resolve_jina_api_key().unwrap();
1652        assert_eq!(key, "jina-boot-key");
1653    }
1654
1655    #[test]
1656    fn test_resolve_jina_api_key_reloads_from_config() {
1657        let tmp = tempfile::tempdir().unwrap();
1658        let config_path = tmp.path().join("config.toml");
1659        std::fs::write(
1660            &config_path,
1661            "[web_search]\njina_api_key = \"jina-fresh-from-disk\"\n",
1662        )
1663        .unwrap();
1664
1665        // No boot key — forces reload from config
1666        let tool = WebSearchTool::new_with_config(
1667            "jina".to_string(),
1668            None,
1669            None,
1670            None,
1671            None,
1672            5,
1673            15,
1674            config_path,
1675            false,
1676        );
1677        let key = tool.resolve_jina_api_key().unwrap();
1678        assert_eq!(key, "jina-fresh-from-disk");
1679    }
1680
1681    #[test]
1682    fn test_parse_jina_results_empty() {
1683        let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1684        // Jina API returns {"code": 200, "status": 20000, "data": [...]}
1685        let json = serde_json::json!({"data": []});
1686        let result = tool.parse_jina_results(&json, "test").unwrap();
1687        assert!(result.contains("No results found"));
1688    }
1689
1690    #[test]
1691    fn test_parse_jina_results_with_data() {
1692        let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1693        // Jina API returns {"code": 200, "status": 20000, "data": [...]}
1694        let json = serde_json::json!({
1695            "data": [
1696                {
1697                    "title": "Jina AI",
1698                    "url": "https://jina.ai/",
1699                    "content": "Best-in-class embeddings, rerankers, web reader, deepsearch"
1700                },
1701                {
1702                    "title": "Jina AI on GitHub",
1703                    "url": "https://github.com/jina-ai",
1704                    "description": "Open-source AI infrastructure"
1705                }
1706            ]
1707        });
1708        let result = tool.parse_jina_results(&json, "test").unwrap();
1709        assert!(result.contains("Jina AI"));
1710        assert!(result.contains("https://jina.ai/"));
1711        assert!(result.contains("via Jina AI"));
1712        // content field should be read when available
1713        assert!(result.contains("Best-in-class embeddings"));
1714    }
1715
1716    #[test]
1717    fn test_parse_jina_results_falls_back_to_description() {
1718        let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1719        // When content is absent, fall back to description
1720        let json = serde_json::json!({
1721            "data": [
1722                {
1723                    "title": "Test",
1724                    "url": "https://example.com",
1725                    "description": "Fallback description"
1726                }
1727            ]
1728        });
1729        let result = tool.parse_jina_results(&json, "test").unwrap();
1730        assert!(result.contains("Fallback description"));
1731    }
1732
1733    #[test]
1734    fn test_parse_jina_results_invalid_response() {
1735        let tool = WebSearchTool::new("jina".to_string(), None, None, 5, 15);
1736        let json = serde_json::json!({"error": "bad api key"});
1737        let result = tool.parse_jina_results(&json, "test");
1738        assert!(result.is_err());
1739        assert!(
1740            result
1741                .unwrap_err()
1742                .to_string()
1743                .contains("Invalid Jina AI API response")
1744        );
1745    }
1746
1747    #[tokio::test]
1748    async fn test_execute_jina_without_api_key() {
1749        // No boot key + no config field → resolve_jina_api_key must error
1750        // before any network call is attempted.
1751        let tmp = tempfile::tempdir().unwrap();
1752        let config_path = tmp.path().join("config.toml");
1753        std::fs::write(&config_path, "[web_search]\n").unwrap();
1754
1755        let tool = WebSearchTool::new_with_config(
1756            "jina".to_string(),
1757            None,
1758            None,
1759            None,
1760            None,
1761            5,
1762            15,
1763            config_path,
1764            false,
1765        );
1766        let result = tool.execute(json!({"query": "test"})).await;
1767        assert!(result.is_err());
1768        assert!(
1769            result
1770                .unwrap_err()
1771                .to_string()
1772                .contains("Jina AI API key not configured")
1773        );
1774    }
1775}