Skip to main content

zeroclaw_providers/
models_dev.rs

1//! Unauthenticated cross-provider model catalog via models.dev.
2//!
3//! `https://models.dev/api.json` is a community-maintained public aggregator
4//! that lists model IDs for 100+ model_providers (Anthropic, OpenAI, Google,
5//! Bedrock, Azure, Moonshot, Qwen, …). No API key required, same shape for
6//! every model_provider. We fetch the catalog once per process and cache in
7//! memory.
8//!
9//! Providers that have a native public `/models` endpoint (OpenRouter,
10//! Ollama's `/api/tags`) override `ModelProvider::list_models` directly and
11//! skip this path.
12
13use std::collections::HashMap;
14use std::sync::Arc;
15use std::time::Duration;
16
17use anyhow::Result;
18use serde::Deserialize;
19use tokio::sync::OnceCell;
20
21const CATALOG_URL: &str = "https://models.dev/api.json";
22const FETCH_TIMEOUT_SECS: u64 = 10;
23
24#[derive(Debug, Deserialize)]
25pub(crate) struct ProviderEntry {
26    #[serde(default)]
27    models: HashMap<String, ModelEntry>,
28}
29
30#[derive(Debug, Deserialize)]
31struct ModelEntry {
32    id: String,
33}
34
35pub(crate) type Catalog = HashMap<String, ProviderEntry>;
36
37static CACHED_CATALOG: OnceCell<Arc<Catalog>> = OnceCell::const_new();
38
39async fn fetch_catalog() -> Result<Arc<Catalog>> {
40    let client = reqwest::Client::builder()
41        .timeout(Duration::from_secs(FETCH_TIMEOUT_SECS))
42        .build()?;
43    let response = client.get(CATALOG_URL).send().await?.error_for_status()?;
44    let bytes = response.bytes().await?;
45    Ok(Arc::new(parse_catalog(&bytes)?))
46}
47
48/// Parse the models.dev JSON into the in-memory `Catalog` shape. Pure
49/// function — unit tests construct minimal JSON byte slices and assert
50/// the filter logic without any network call.
51pub(crate) fn parse_catalog(bytes: &[u8]) -> Result<Catalog> {
52    Ok(serde_json::from_slice(bytes)?)
53}
54
55/// Filter a parsed catalog for a model_provider key. Sorted, deduped.
56/// Pure — separated from the live fetch so it can be unit-tested.
57pub(crate) fn filter_models(catalog: &Catalog, provider_key: &str) -> Result<Vec<String>> {
58    let entry = catalog.get(provider_key).ok_or_else(|| {
59        ::zeroclaw_log::record!(
60            WARN,
61            ::zeroclaw_log::Event::new(module_path!(), ::zeroclaw_log::Action::Reject)
62                .with_outcome(::zeroclaw_log::EventOutcome::Failure)
63                .with_attrs(::serde_json::json!({"model_provider": provider_key})),
64            "models_dev: provider not in catalog"
65        );
66        anyhow::Error::msg(format!(
67            "model_provider {provider_key:?} is not in the models.dev catalog"
68        ))
69    })?;
70    let mut ids: Vec<String> = entry.models.values().map(|m| m.id.clone()).collect();
71    ids.sort();
72    ids.dedup();
73    Ok(ids)
74}
75
76/// Look up model IDs for a model_provider, keyed by `models.dev`'s model_provider name.
77///
78/// First call fetches the catalog; subsequent calls hit the cache. The
79/// returned list is sorted for stable menu rendering.
80pub async fn list_models_for(provider_key: &str) -> Result<Vec<String>> {
81    let catalog = CACHED_CATALOG.get_or_try_init(fetch_catalog).await?;
82    filter_models(catalog, provider_key)
83}
84
85#[cfg(test)]
86mod tests {
87    use super::*;
88
89    const TINY_CATALOG: &str = r#"{
90        "anthropic": {
91            "models": {
92                "claude-sonnet-4-6": {"id": "claude-sonnet-4-6"},
93                "claude-opus-4-7":   {"id": "claude-opus-4-7"}
94            }
95        },
96        "xai": {
97            "models": {
98                "grok-4.3":     {"id": "grok-4.3"},
99                "grok-2-vision":{"id": "grok-2-vision"}
100            }
101        },
102        "empty": { "models": {} }
103    }"#;
104
105    #[test]
106    fn parses_catalog_with_typical_shape() {
107        let catalog = parse_catalog(TINY_CATALOG.as_bytes()).expect("parses");
108        assert_eq!(catalog.len(), 3);
109        assert!(catalog.contains_key("anthropic"));
110        assert!(catalog.contains_key("xai"));
111    }
112
113    #[test]
114    fn filter_returns_sorted_ids() {
115        let catalog = parse_catalog(TINY_CATALOG.as_bytes()).unwrap();
116        let ids = filter_models(&catalog, "xai").unwrap();
117        assert_eq!(ids, vec!["grok-2-vision", "grok-4.3"]);
118    }
119
120    #[test]
121    fn filter_dedups() {
122        // Models.dev model_id values could in theory collide; the filter
123        // dedups the output list so the menu doesn't render duplicates.
124        let raw = r#"{"x": {"models": {"a": {"id": "m1"}, "b": {"id": "m1"}}}}"#;
125        let catalog = parse_catalog(raw.as_bytes()).unwrap();
126        let ids = filter_models(&catalog, "x").unwrap();
127        assert_eq!(ids, vec!["m1"]);
128    }
129
130    #[test]
131    fn filter_returns_empty_for_empty_entry() {
132        let catalog = parse_catalog(TINY_CATALOG.as_bytes()).unwrap();
133        let ids = filter_models(&catalog, "empty").unwrap();
134        assert!(ids.is_empty());
135    }
136
137    #[test]
138    fn filter_errors_on_unknown_key() {
139        let catalog = parse_catalog(TINY_CATALOG.as_bytes()).unwrap();
140        let err = filter_models(&catalog, "missing").expect_err("must error");
141        assert!(err.to_string().contains("missing"));
142    }
143
144    #[test]
145    fn parse_errors_on_malformed_json() {
146        assert!(parse_catalog(b"not json").is_err());
147    }
148}