1use super::traits::{Observer, ObserverEvent, ObserverMetric};
2use prometheus::{
3 Encoder, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounterVec, Registry, TextEncoder,
4};
5use std::sync::{Arc, OnceLock};
6
7pub struct PrometheusObserver {
9 registry: Registry,
10
11 agent_starts: IntCounterVec,
13 llm_requests: IntCounterVec,
14 tokens_input_total: IntCounterVec,
15 tokens_output_total: IntCounterVec,
16 tool_calls: IntCounterVec,
17 channel_messages: IntCounterVec,
18 heartbeat_ticks: prometheus::IntCounter,
19 errors: IntCounterVec,
20 cache_hits: IntCounterVec,
21 cache_misses: IntCounterVec,
22 cache_tokens_saved: IntCounterVec,
23
24 agent_duration: HistogramVec,
26 tool_duration: HistogramVec,
27 request_latency: Histogram,
28
29 tokens_used: prometheus::IntGauge,
31 active_sessions: GaugeVec,
32 queue_depth: GaugeVec,
33
34 deployments_total: IntCounterVec,
36 deployment_lead_time: Histogram,
37 deployment_failure_rate: prometheus::Gauge,
38 recovery_time: Histogram,
39 mttr: prometheus::Gauge,
40 deploy_success_count: std::sync::atomic::AtomicU64,
41 deploy_failure_count: std::sync::atomic::AtomicU64,
42}
43
44impl Default for PrometheusObserver {
45 fn default() -> Self {
46 Self::new()
47 }
48}
49
50impl PrometheusObserver {
51 pub fn new() -> Self {
52 let registry = Registry::new();
53
54 let agent_starts = IntCounterVec::new(
55 prometheus::Opts::new("zeroclaw_agent_starts_total", "Total agent invocations"),
56 &["model_provider", "model"],
57 )
58 .expect("valid metric");
59
60 let llm_requests = IntCounterVec::new(
61 prometheus::Opts::new(
62 "zeroclaw_llm_requests_total",
63 "Total LLM model_provider requests",
64 ),
65 &["model_provider", "model", "success"],
66 )
67 .expect("valid metric");
68
69 let tokens_input_total = IntCounterVec::new(
70 prometheus::Opts::new("zeroclaw_tokens_input_total", "Total input tokens consumed"),
71 &["model_provider", "model"],
72 )
73 .expect("valid metric");
74
75 let tokens_output_total = IntCounterVec::new(
76 prometheus::Opts::new(
77 "zeroclaw_tokens_output_total",
78 "Total output tokens consumed",
79 ),
80 &["model_provider", "model"],
81 )
82 .expect("valid metric");
83
84 let tool_calls = IntCounterVec::new(
85 prometheus::Opts::new("zeroclaw_tool_calls_total", "Total tool calls"),
86 &["tool", "success"],
87 )
88 .expect("valid metric");
89
90 let channel_messages = IntCounterVec::new(
91 prometheus::Opts::new("zeroclaw_channel_messages_total", "Total channel messages"),
92 &["channel", "direction"],
93 )
94 .expect("valid metric");
95
96 let heartbeat_ticks =
97 prometheus::IntCounter::new("zeroclaw_heartbeat_ticks_total", "Total heartbeat ticks")
98 .expect("valid metric");
99
100 let errors = IntCounterVec::new(
101 prometheus::Opts::new("zeroclaw_errors_total", "Total errors by component"),
102 &["component"],
103 )
104 .expect("valid metric");
105
106 let cache_hits = IntCounterVec::new(
107 prometheus::Opts::new("zeroclaw_cache_hits_total", "Total response cache hits"),
108 &["cache_type"],
109 )
110 .expect("valid metric");
111
112 let cache_misses = IntCounterVec::new(
113 prometheus::Opts::new("zeroclaw_cache_misses_total", "Total response cache misses"),
114 &["cache_type"],
115 )
116 .expect("valid metric");
117
118 let cache_tokens_saved = IntCounterVec::new(
119 prometheus::Opts::new(
120 "zeroclaw_cache_tokens_saved_total",
121 "Total tokens saved by response cache",
122 ),
123 &["cache_type"],
124 )
125 .expect("valid metric");
126
127 let agent_duration = HistogramVec::new(
128 HistogramOpts::new(
129 "zeroclaw_agent_duration_seconds",
130 "Agent invocation duration in seconds",
131 )
132 .buckets(vec![0.1, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0]),
133 &["model_provider", "model"],
134 )
135 .expect("valid metric");
136
137 let tool_duration = HistogramVec::new(
138 HistogramOpts::new(
139 "zeroclaw_tool_duration_seconds",
140 "Tool execution duration in seconds",
141 )
142 .buckets(vec![0.01, 0.05, 0.1, 0.5, 1.0, 5.0, 10.0]),
143 &["tool"],
144 )
145 .expect("valid metric");
146
147 let request_latency = Histogram::with_opts(
148 HistogramOpts::new(
149 "zeroclaw_request_latency_seconds",
150 "Request latency in seconds",
151 )
152 .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0]),
153 )
154 .expect("valid metric");
155
156 let tokens_used = prometheus::IntGauge::new(
157 "zeroclaw_tokens_used_last",
158 "Tokens used in the last request",
159 )
160 .expect("valid metric");
161
162 let active_sessions = GaugeVec::new(
163 prometheus::Opts::new("zeroclaw_active_sessions", "Number of active sessions"),
164 &[],
165 )
166 .expect("valid metric");
167
168 let queue_depth = GaugeVec::new(
169 prometheus::Opts::new("zeroclaw_queue_depth", "Message queue depth"),
170 &[],
171 )
172 .expect("valid metric");
173
174 let deployments_total = IntCounterVec::new(
175 prometheus::Opts::new("zeroclaw_deployments_total", "Total deployments by status"),
176 &["status"],
177 )
178 .expect("valid metric");
179
180 let deployment_lead_time = Histogram::with_opts(
181 HistogramOpts::new(
182 "zeroclaw_deployment_lead_time_seconds",
183 "Deployment lead time from commit to deploy in seconds",
184 )
185 .buckets(vec![
186 60.0, 300.0, 600.0, 1800.0, 3600.0, 7200.0, 14400.0, 43200.0, 86400.0,
187 ]),
188 )
189 .expect("valid metric");
190
191 let deployment_failure_rate = prometheus::Gauge::new(
192 "zeroclaw_deployment_failure_rate",
193 "Ratio of failed deployments to total deployments",
194 )
195 .expect("valid metric");
196
197 let recovery_time = Histogram::with_opts(
198 HistogramOpts::new(
199 "zeroclaw_recovery_time_seconds",
200 "Time to recover from a failed deployment in seconds",
201 )
202 .buckets(vec![
203 60.0, 300.0, 600.0, 1800.0, 3600.0, 7200.0, 14400.0, 43200.0, 86400.0,
204 ]),
205 )
206 .expect("valid metric");
207
208 let mttr =
209 prometheus::Gauge::new("zeroclaw_mttr_seconds", "Mean time to recovery in seconds")
210 .expect("valid metric");
211
212 registry.register(Box::new(agent_starts.clone())).ok();
214 registry.register(Box::new(llm_requests.clone())).ok();
215 registry.register(Box::new(tokens_input_total.clone())).ok();
216 registry
217 .register(Box::new(tokens_output_total.clone()))
218 .ok();
219 registry.register(Box::new(tool_calls.clone())).ok();
220 registry.register(Box::new(channel_messages.clone())).ok();
221 registry.register(Box::new(heartbeat_ticks.clone())).ok();
222 registry.register(Box::new(errors.clone())).ok();
223 registry.register(Box::new(cache_hits.clone())).ok();
224 registry.register(Box::new(cache_misses.clone())).ok();
225 registry.register(Box::new(cache_tokens_saved.clone())).ok();
226 registry.register(Box::new(agent_duration.clone())).ok();
227 registry.register(Box::new(tool_duration.clone())).ok();
228 registry.register(Box::new(request_latency.clone())).ok();
229 registry.register(Box::new(tokens_used.clone())).ok();
230 registry.register(Box::new(active_sessions.clone())).ok();
231 registry.register(Box::new(queue_depth.clone())).ok();
232 registry.register(Box::new(deployments_total.clone())).ok();
233 registry
234 .register(Box::new(deployment_lead_time.clone()))
235 .ok();
236 registry
237 .register(Box::new(deployment_failure_rate.clone()))
238 .ok();
239 registry.register(Box::new(recovery_time.clone())).ok();
240 registry.register(Box::new(mttr.clone())).ok();
241
242 Self {
243 registry,
244 agent_starts,
245 llm_requests,
246 tokens_input_total,
247 tokens_output_total,
248 tool_calls,
249 channel_messages,
250 heartbeat_ticks,
251 errors,
252 cache_hits,
253 cache_misses,
254 cache_tokens_saved,
255 agent_duration,
256 tool_duration,
257 request_latency,
258 tokens_used,
259 active_sessions,
260 queue_depth,
261 deployments_total,
262 deployment_lead_time,
263 deployment_failure_rate,
264 recovery_time,
265 mttr,
266 deploy_success_count: std::sync::atomic::AtomicU64::new(0),
267 deploy_failure_count: std::sync::atomic::AtomicU64::new(0),
268 }
269 }
270
271 pub fn encode(&self) -> String {
273 let encoder = TextEncoder::new();
274 let families = self.registry.gather();
275 let mut buf = Vec::new();
276 encoder.encode(&families, &mut buf).unwrap_or_default();
277 String::from_utf8(buf).unwrap_or_default()
278 }
279
280 pub fn shared() -> Arc<Self> {
289 static SINGLETON: OnceLock<Arc<PrometheusObserver>> = OnceLock::new();
290 SINGLETON.get_or_init(|| Arc::new(Self::new())).clone()
291 }
292}
293
294impl Observer for PrometheusObserver {
295 fn record_event(&self, event: &ObserverEvent) {
296 match event {
297 ObserverEvent::AgentStart {
298 model_provider,
299 model,
300 } => {
301 self.agent_starts
302 .with_label_values(&[model_provider, model])
303 .inc();
304 }
305 ObserverEvent::AgentEnd {
306 model_provider,
307 model,
308 duration,
309 tokens_used,
310 cost_usd: _,
311 } => {
312 self.agent_duration
314 .with_label_values(&[model_provider, model])
315 .observe(duration.as_secs_f64());
316 if let Some(t) = tokens_used {
317 self.tokens_used.set(i64::try_from(*t).unwrap_or(i64::MAX));
318 }
319 }
320 ObserverEvent::LlmResponse {
321 model_provider,
322 model,
323 success,
324 input_tokens,
325 output_tokens,
326 ..
327 } => {
328 let success_str = if *success { "true" } else { "false" };
329 self.llm_requests
330 .with_label_values(&[model_provider.as_str(), model.as_str(), success_str])
331 .inc();
332 if let Some(input) = input_tokens {
333 self.tokens_input_total
334 .with_label_values(&[model_provider.as_str(), model.as_str()])
335 .inc_by(*input);
336 }
337 if let Some(output) = output_tokens {
338 self.tokens_output_total
339 .with_label_values(&[model_provider.as_str(), model.as_str()])
340 .inc_by(*output);
341 }
342 }
343 ObserverEvent::ToolCallStart { .. }
344 | ObserverEvent::TurnComplete
345 | ObserverEvent::LlmRequest { .. }
346 | ObserverEvent::DeploymentStarted { .. }
347 | ObserverEvent::RecoveryCompleted { .. } => {}
348 ObserverEvent::ToolCall {
349 tool,
350 duration,
351 success,
352 ..
353 } => {
354 let success_str = if *success { "true" } else { "false" };
355 self.tool_calls
356 .with_label_values(&[tool.as_str(), success_str])
357 .inc();
358 self.tool_duration
359 .with_label_values(&[tool.as_str()])
360 .observe(duration.as_secs_f64());
361 }
362 ObserverEvent::ChannelMessage { channel, direction } => {
363 self.channel_messages
364 .with_label_values(&[channel, direction])
365 .inc();
366 }
367 ObserverEvent::HeartbeatTick => {
368 self.heartbeat_ticks.inc();
369 }
370 ObserverEvent::CacheHit {
371 cache_type,
372 tokens_saved,
373 } => {
374 self.cache_hits.with_label_values(&[cache_type]).inc();
375 self.cache_tokens_saved
376 .with_label_values(&[cache_type])
377 .inc_by(*tokens_saved);
378 }
379 ObserverEvent::CacheMiss { cache_type } => {
380 self.cache_misses.with_label_values(&[cache_type]).inc();
381 }
382 ObserverEvent::Error {
383 component,
384 message: _,
385 } => {
386 self.errors.with_label_values(&[component]).inc();
387 }
388 ObserverEvent::DeploymentCompleted { .. } => {
389 self.deployments_total.with_label_values(&["success"]).inc();
390 let s = self
391 .deploy_success_count
392 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
393 + 1;
394 let f = self
395 .deploy_failure_count
396 .load(std::sync::atomic::Ordering::Relaxed);
397 let total = s + f;
398 if total > 0 {
399 self.deployment_failure_rate.set(f as f64 / total as f64);
400 }
401 }
402 ObserverEvent::DeploymentFailed { .. } => {
403 self.deployments_total.with_label_values(&["failure"]).inc();
404 let f = self
405 .deploy_failure_count
406 .fetch_add(1, std::sync::atomic::Ordering::Relaxed)
407 + 1;
408 let s = self
409 .deploy_success_count
410 .load(std::sync::atomic::Ordering::Relaxed);
411 let total = s + f;
412 if total > 0 {
413 self.deployment_failure_rate.set(f as f64 / total as f64);
414 }
415 }
416 }
417 }
418
419 fn record_metric(&self, metric: &ObserverMetric) {
420 match metric {
421 ObserverMetric::RequestLatency(d) => {
422 self.request_latency.observe(d.as_secs_f64());
423 }
424 ObserverMetric::TokensUsed(t) => {
425 self.tokens_used.set(i64::try_from(*t).unwrap_or(i64::MAX));
426 }
427 ObserverMetric::ActiveSessions(s) => {
428 self.active_sessions
429 .with_label_values(&[] as &[&str])
430 .set(*s as f64);
431 }
432 ObserverMetric::QueueDepth(d) => {
433 self.queue_depth
434 .with_label_values(&[] as &[&str])
435 .set(*d as f64);
436 }
437 ObserverMetric::DeploymentLeadTime(d) => {
438 self.deployment_lead_time.observe(d.as_secs_f64());
439 }
440 ObserverMetric::RecoveryTime(d) => {
441 self.recovery_time.observe(d.as_secs_f64());
442 self.mttr.set(d.as_secs_f64());
443 }
444 }
445 }
446
447 fn name(&self) -> &str {
448 "prometheus"
449 }
450
451 fn as_any(&self) -> &dyn std::any::Any {
452 self
453 }
454}
455
456#[cfg(test)]
457mod tests {
458 use super::*;
459 use std::time::Duration;
460
461 #[test]
462 fn prometheus_observer_name() {
463 assert_eq!(PrometheusObserver::new().name(), "prometheus");
464 }
465
466 #[test]
467 fn records_all_events_without_panic() {
468 let obs = PrometheusObserver::new();
469 obs.record_event(&ObserverEvent::AgentStart {
470 model_provider: "openrouter".into(),
471 model: "claude-sonnet".into(),
472 });
473 obs.record_event(&ObserverEvent::AgentEnd {
474 model_provider: "openrouter".into(),
475 model: "claude-sonnet".into(),
476 duration: Duration::from_millis(500),
477 tokens_used: Some(100),
478 cost_usd: None,
479 });
480 obs.record_event(&ObserverEvent::AgentEnd {
481 model_provider: "openrouter".into(),
482 model: "claude-sonnet".into(),
483 duration: Duration::ZERO,
484 tokens_used: None,
485 cost_usd: None,
486 });
487 obs.record_event(&ObserverEvent::ToolCall {
488 tool: "shell".into(),
489 tool_call_id: None,
490 duration: Duration::from_millis(10),
491 success: true,
492 arguments: None,
493 result: None,
494 });
495 obs.record_event(&ObserverEvent::ToolCall {
496 tool: "file_read".into(),
497 tool_call_id: None,
498 duration: Duration::from_millis(5),
499 success: false,
500 arguments: None,
501 result: None,
502 });
503 obs.record_event(&ObserverEvent::ChannelMessage {
504 channel: "telegram".into(),
505 direction: "inbound".into(),
506 });
507 obs.record_event(&ObserverEvent::HeartbeatTick);
508 obs.record_event(&ObserverEvent::Error {
509 component: "model_provider".into(),
510 message: "timeout".into(),
511 });
512 }
513
514 #[test]
515 fn records_all_metrics_without_panic() {
516 let obs = PrometheusObserver::new();
517 obs.record_metric(&ObserverMetric::RequestLatency(Duration::from_secs(2)));
518 obs.record_metric(&ObserverMetric::TokensUsed(500));
519 obs.record_metric(&ObserverMetric::TokensUsed(0));
520 obs.record_metric(&ObserverMetric::ActiveSessions(3));
521 obs.record_metric(&ObserverMetric::QueueDepth(42));
522 }
523
524 #[test]
525 fn encode_produces_prometheus_text_format() {
526 let obs = PrometheusObserver::new();
527 obs.record_event(&ObserverEvent::AgentStart {
528 model_provider: "openrouter".into(),
529 model: "claude-sonnet".into(),
530 });
531 obs.record_event(&ObserverEvent::ToolCall {
532 tool: "shell".into(),
533 tool_call_id: None,
534 duration: Duration::from_millis(100),
535 success: true,
536 arguments: None,
537 result: None,
538 });
539 obs.record_event(&ObserverEvent::HeartbeatTick);
540 obs.record_metric(&ObserverMetric::RequestLatency(Duration::from_millis(250)));
541
542 let output = obs.encode();
543 assert!(output.contains("zeroclaw_agent_starts_total"));
544 assert!(output.contains("zeroclaw_tool_calls_total"));
545 assert!(output.contains("zeroclaw_heartbeat_ticks_total"));
546 assert!(output.contains("zeroclaw_request_latency_seconds"));
547 }
548
549 #[test]
550 fn counters_increment_correctly() {
551 let obs = PrometheusObserver::new();
552
553 for _ in 0..3 {
554 obs.record_event(&ObserverEvent::HeartbeatTick);
555 }
556
557 let output = obs.encode();
558 assert!(output.contains("zeroclaw_heartbeat_ticks_total 3"));
559 }
560
561 #[test]
562 fn tool_calls_track_success_and_failure_separately() {
563 let obs = PrometheusObserver::new();
564
565 obs.record_event(&ObserverEvent::ToolCall {
566 tool: "shell".into(),
567 tool_call_id: None,
568 duration: Duration::from_millis(10),
569 success: true,
570 arguments: None,
571 result: None,
572 });
573 obs.record_event(&ObserverEvent::ToolCall {
574 tool: "shell".into(),
575 tool_call_id: None,
576 duration: Duration::from_millis(10),
577 success: true,
578 arguments: None,
579 result: None,
580 });
581 obs.record_event(&ObserverEvent::ToolCall {
582 tool: "shell".into(),
583 tool_call_id: None,
584 duration: Duration::from_millis(10),
585 success: false,
586 arguments: None,
587 result: None,
588 });
589
590 let output = obs.encode();
591 assert!(output.contains(r#"zeroclaw_tool_calls_total{success="true",tool="shell"} 2"#));
592 assert!(output.contains(r#"zeroclaw_tool_calls_total{success="false",tool="shell"} 1"#));
593 }
594
595 #[test]
596 fn errors_track_by_component() {
597 let obs = PrometheusObserver::new();
598 obs.record_event(&ObserverEvent::Error {
599 component: "model_provider".into(),
600 message: "timeout".into(),
601 });
602 obs.record_event(&ObserverEvent::Error {
603 component: "model_provider".into(),
604 message: "rate limit".into(),
605 });
606 obs.record_event(&ObserverEvent::Error {
607 component: "channels".into(),
608 message: "disconnected".into(),
609 });
610
611 let output = obs.encode();
612 assert!(output.contains(r#"zeroclaw_errors_total{component="model_provider"} 2"#));
613 assert!(output.contains(r#"zeroclaw_errors_total{component="channels"} 1"#));
614 }
615
616 #[test]
617 fn gauge_reflects_latest_value() {
618 let obs = PrometheusObserver::new();
619 obs.record_metric(&ObserverMetric::TokensUsed(100));
620 obs.record_metric(&ObserverMetric::TokensUsed(200));
621
622 let output = obs.encode();
623 assert!(output.contains("zeroclaw_tokens_used_last 200"));
624 }
625
626 #[test]
627 fn llm_response_tracks_request_count_and_tokens() {
628 let obs = PrometheusObserver::new();
629
630 obs.record_event(&ObserverEvent::LlmResponse {
631 model_provider: "openrouter".into(),
632 model: "claude-sonnet".into(),
633 duration: Duration::from_millis(200),
634 success: true,
635 error_message: None,
636 input_tokens: Some(100),
637 output_tokens: Some(50),
638 });
639 obs.record_event(&ObserverEvent::LlmResponse {
640 model_provider: "openrouter".into(),
641 model: "claude-sonnet".into(),
642 duration: Duration::from_millis(300),
643 success: true,
644 error_message: None,
645 input_tokens: Some(200),
646 output_tokens: Some(80),
647 });
648
649 let output = obs.encode();
650 assert!(output.contains(
651 r#"zeroclaw_llm_requests_total{model="claude-sonnet",model_provider="openrouter",success="true"} 2"#
652 ));
653 assert!(output.contains(
654 r#"zeroclaw_tokens_input_total{model="claude-sonnet",model_provider="openrouter"} 300"#
655 ));
656 assert!(output.contains(
657 r#"zeroclaw_tokens_output_total{model="claude-sonnet",model_provider="openrouter"} 130"#
658 ));
659 }
660
661 #[test]
662 fn llm_response_without_tokens_increments_request_only() {
663 let obs = PrometheusObserver::new();
664
665 obs.record_event(&ObserverEvent::LlmResponse {
666 model_provider: "ollama".into(),
667 model: "llama3".into(),
668 duration: Duration::from_millis(100),
669 success: false,
670 error_message: Some("timeout".into()),
671 input_tokens: None,
672 output_tokens: None,
673 });
674
675 let output = obs.encode();
676 assert!(output.contains(
677 r#"zeroclaw_llm_requests_total{model="llama3",model_provider="ollama",success="false"} 1"#
678 ));
679 assert!(!output.contains("zeroclaw_tokens_input_total{"));
681 assert!(!output.contains("zeroclaw_tokens_output_total{"));
682 }
683
684 #[test]
685 fn dora_deployment_events_track_counters() {
686 let obs = PrometheusObserver::new();
687
688 obs.record_event(&ObserverEvent::DeploymentCompleted {
689 deploy_id: "d1".into(),
690 commit_sha: "abc123".into(),
691 });
692 obs.record_event(&ObserverEvent::DeploymentCompleted {
693 deploy_id: "d2".into(),
694 commit_sha: "def456".into(),
695 });
696 obs.record_event(&ObserverEvent::DeploymentFailed {
697 deploy_id: "d3".into(),
698 reason: "timeout".into(),
699 });
700
701 let output = obs.encode();
702 assert!(output.contains(r#"zeroclaw_deployments_total{status="success"} 2"#));
703 assert!(output.contains(r#"zeroclaw_deployments_total{status="failure"} 1"#));
704 }
705
706 #[test]
707 fn dora_failure_rate_gauge_updates() {
708 let obs = PrometheusObserver::new();
709
710 obs.record_event(&ObserverEvent::DeploymentCompleted {
711 deploy_id: "d1".into(),
712 commit_sha: "abc".into(),
713 });
714 obs.record_event(&ObserverEvent::DeploymentFailed {
715 deploy_id: "d2".into(),
716 reason: "error".into(),
717 });
718
719 let output = obs.encode();
720 assert!(output.contains("zeroclaw_deployment_failure_rate 0.5"));
722 }
723
724 #[test]
725 fn dora_lead_time_and_recovery_metrics() {
726 let obs = PrometheusObserver::new();
727
728 obs.record_metric(&ObserverMetric::DeploymentLeadTime(Duration::from_secs(
729 3600,
730 )));
731 obs.record_metric(&ObserverMetric::RecoveryTime(Duration::from_secs(600)));
732
733 let output = obs.encode();
734 assert!(output.contains("zeroclaw_deployment_lead_time_seconds"));
735 assert!(output.contains("zeroclaw_recovery_time_seconds"));
736 assert!(output.contains("zeroclaw_mttr_seconds 600"));
737 }
738
739 #[test]
740 fn dora_started_and_recovery_events_no_panic() {
741 let obs = PrometheusObserver::new();
742
743 obs.record_event(&ObserverEvent::DeploymentStarted {
744 deploy_id: "d1".into(),
745 });
746 obs.record_event(&ObserverEvent::RecoveryCompleted {
747 deploy_id: "d1".into(),
748 });
749 }
750
751 #[test]
752 fn shared_returns_the_same_registry_across_calls() {
753 let a = PrometheusObserver::shared();
754 let b = PrometheusObserver::shared();
755 assert!(
756 Arc::ptr_eq(&a, &b),
757 "PrometheusObserver::shared() must hand out the same underlying \
758 instance to every caller, otherwise the gateway's /metrics scrape \
759 cannot see counters incremented by the channel orchestrator"
760 );
761 }
762
763 #[test]
764 fn arc_blanket_observer_impl_routes_to_inner() {
765 let shared_a = PrometheusObserver::shared();
766 let shared_b = PrometheusObserver::shared();
767
768 Observer::record_event(
769 &shared_a,
770 &ObserverEvent::ChannelMessage {
771 channel: "test-channel".into(),
772 direction: "inbound".into(),
773 },
774 );
775
776 let output = shared_b.encode();
777 assert!(
778 output.contains(
779 r#"zeroclaw_channel_messages_total{channel="test-channel",direction="inbound"} 1"#
780 ),
781 "an event recorded through one Arc handle must be visible when \
782 scraping through any other handle — output was: {output}"
783 );
784 }
785
786 #[test]
787 fn arc_blanket_observer_impl_preserves_downcast() {
788 let shared: Arc<PrometheusObserver> = PrometheusObserver::shared();
789 let observer: &dyn Observer = &shared;
790 assert!(
791 observer
792 .as_any()
793 .downcast_ref::<PrometheusObserver>()
794 .is_some(),
795 "the /metrics resolver downcasts through `as_any` — Arc<T> must \
796 surface the inner T, not the Arc wrapper"
797 );
798 }
799}