openzeppelin_relayer/metrics/
mod.rs

1//! Metrics module for the application.
2//!
3//! - This module contains the global Prometheus registry.
4//! - Defines specific metrics for the application.
5
6pub mod middleware;
7use lazy_static::lazy_static;
8use prometheus::{
9    CounterVec, Encoder, Gauge, GaugeVec, HistogramOpts, HistogramVec, Opts, Registry, TextEncoder,
10};
11
12// Stage labels for TRANSACTION_PROCESSING_TIME histogram.
13pub const STAGE_REQUEST_QUEUE_DWELL: &str = "request_queue_dwell";
14pub const STAGE_PREPARE_DURATION: &str = "prepare_duration";
15pub const STAGE_SUBMISSION_QUEUE_DWELL: &str = "submission_queue_dwell";
16pub const STAGE_SUBMIT_DURATION: &str = "submit_duration";
17
18/// Observe a duration on the `TRANSACTION_PROCESSING_TIME` histogram.
19pub fn observe_processing_time(relayer_id: &str, network_type: &str, stage: &str, secs: f64) {
20    TRANSACTION_PROCESSING_TIME
21        .with_label_values(&[relayer_id, network_type, stage])
22        .observe(secs);
23}
24use sysinfo::{Disks, System};
25
26lazy_static! {
27    // Global Prometheus registry.
28    pub static ref REGISTRY: Registry = Registry::new();
29
30    // Counter: Total HTTP requests.
31    pub static ref REQUEST_COUNTER: CounterVec = {
32        let opts = Opts::new("requests_total", "Total number of HTTP requests");
33        let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
34        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
35        counter_vec
36    };
37
38    // Counter: Total HTTP requests by raw URI.
39    pub static ref RAW_REQUEST_COUNTER: CounterVec = {
40      let opts = Opts::new("raw_requests_total", "Total number of HTTP requests by raw URI");
41      let counter_vec = CounterVec::new(opts, &["raw_uri", "method", "status"]).unwrap();
42      REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
43      counter_vec
44    };
45
46    // Histogram for request latency in seconds.
47    pub static ref REQUEST_LATENCY: HistogramVec = {
48      let histogram_opts = HistogramOpts::new("request_latency_seconds", "Request latency in seconds")
49          .buckets(vec![0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 25.0, 50.0, 100.0]);
50      let histogram_vec = HistogramVec::new(histogram_opts, &["endpoint", "method", "status"]).unwrap();
51      REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
52      histogram_vec
53    };
54
55    // Counter for error responses.
56    pub static ref ERROR_COUNTER: CounterVec = {
57        let opts = Opts::new("error_requests_total", "Total number of error responses");
58        // Using "status" to record the HTTP status code (or a special label like "service_error")
59        let counter_vec = CounterVec::new(opts, &["endpoint", "method", "status"]).unwrap();
60        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
61        counter_vec
62    };
63
64    // Gauge for CPU usage percentage.
65    pub static ref CPU_USAGE: Gauge = {
66      let gauge = Gauge::new("cpu_usage_percentage", "Current CPU usage percentage").unwrap();
67      REGISTRY.register(Box::new(gauge.clone())).unwrap();
68      gauge
69    };
70
71    // Gauge for memory usage percentage.
72    pub static ref MEMORY_USAGE_PERCENT: Gauge = {
73      let gauge = Gauge::new("memory_usage_percentage", "Memory usage percentage").unwrap();
74      REGISTRY.register(Box::new(gauge.clone())).unwrap();
75      gauge
76    };
77
78    // Gauge for memory usage in bytes.
79    pub static ref MEMORY_USAGE: Gauge = {
80        let gauge = Gauge::new("memory_usage_bytes", "Memory usage in bytes").unwrap();
81        REGISTRY.register(Box::new(gauge.clone())).unwrap();
82        gauge
83    };
84
85    // Gauge for total memory in bytes.
86    pub static ref TOTAL_MEMORY: Gauge = {
87      let gauge = Gauge::new("total_memory_bytes", "Total memory in bytes").unwrap();
88      REGISTRY.register(Box::new(gauge.clone())).unwrap();
89      gauge
90    };
91
92    // Gauge for available memory in bytes.
93    pub static ref AVAILABLE_MEMORY: Gauge = {
94        let gauge = Gauge::new("available_memory_bytes", "Available memory in bytes").unwrap();
95        REGISTRY.register(Box::new(gauge.clone())).unwrap();
96        gauge
97    };
98
99    // Gauge for used disk space in bytes.
100    pub static ref DISK_USAGE: Gauge = {
101      let gauge = Gauge::new("disk_usage_bytes", "Used disk space in bytes").unwrap();
102      REGISTRY.register(Box::new(gauge.clone())).unwrap();
103      gauge
104    };
105
106    // Gauge for disk usage percentage.
107    pub static ref DISK_USAGE_PERCENT: Gauge = {
108      let gauge = Gauge::new("disk_usage_percentage", "Disk usage percentage").unwrap();
109      REGISTRY.register(Box::new(gauge.clone())).unwrap();
110      gauge
111    };
112
113    // Gauge for in-flight requests.
114    pub static ref IN_FLIGHT_REQUESTS: GaugeVec = {
115        let gauge_vec = GaugeVec::new(
116            Opts::new("in_flight_requests", "Number of in-flight requests"),
117            &["endpoint"]
118        ).unwrap();
119        REGISTRY.register(Box::new(gauge_vec.clone())).unwrap();
120        gauge_vec
121    };
122
123    // Counter for request timeouts.
124    pub static ref TIMEOUT_COUNTER: CounterVec = {
125        let opts = Opts::new("request_timeouts_total", "Total number of request timeouts");
126        let counter_vec = CounterVec::new(opts, &["endpoint", "method", "timeout_type"]).unwrap();
127        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
128        counter_vec
129    };
130
131    // Gauge for file descriptor count.
132    pub static ref FILE_DESCRIPTORS: Gauge = {
133        let gauge = Gauge::new("file_descriptors_count", "Current file descriptor count").unwrap();
134        REGISTRY.register(Box::new(gauge.clone())).unwrap();
135        gauge
136    };
137
138    // Gauge for CLOSE_WAIT socket count.
139    pub static ref CLOSE_WAIT_SOCKETS: Gauge = {
140        let gauge = Gauge::new("close_wait_sockets_count", "Number of CLOSE_WAIT sockets").unwrap();
141        REGISTRY.register(Box::new(gauge.clone())).unwrap();
142        gauge
143    };
144
145    // Counter for successful transactions (Confirmed status).
146    pub static ref TRANSACTIONS_SUCCESS: CounterVec = {
147        let opts = Opts::new("transactions_success_total", "Total number of successful transactions");
148        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
149        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
150        counter_vec
151    };
152
153    // Counter for failed transactions (Failed, Expired, Canceled statuses).
154    // Labels: relayer_id, network_type, failure_reason, previous_status.
155    // Note: `previous_status` label added to track the pipeline stage before the failure
156    // (e.g. "pending", "sent", "submitted"), enabling pre- vs post-submission attribution.
157    pub static ref TRANSACTIONS_FAILED: CounterVec = {
158        let opts = Opts::new("transactions_failed_total", "Total number of failed transactions");
159        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type", "failure_reason", "previous_status"]).unwrap();
160        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
161        counter_vec
162    };
163
164    // Counter for RPC failures during API requests (before transaction creation).
165    // This tracks failures that occur during operations like get_status, get_balance, etc.
166    // that happen before a transaction is created.
167    pub static ref API_RPC_FAILURES: CounterVec = {
168        let opts = Opts::new("api_rpc_failures_total", "Total number of RPC failures during API requests (before transaction creation)");
169        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type", "operation_name", "error_type"]).unwrap();
170        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
171        counter_vec
172    };
173
174    // Counter for transaction creation (when a transaction is successfully created in the repository).
175    pub static ref TRANSACTIONS_CREATED: CounterVec = {
176        let opts = Opts::new("transactions_created_total", "Total number of transactions created");
177        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
178        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
179        counter_vec
180    };
181
182    // Counter for transaction submissions (when status changes to Submitted).
183    pub static ref TRANSACTIONS_SUBMITTED: CounterVec = {
184        let opts = Opts::new("transactions_submitted_total", "Total number of transactions submitted to the network");
185        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
186        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
187        counter_vec
188    };
189
190    // Gauge for transaction status distribution (current count of transactions in each status).
191    pub static ref TRANSACTIONS_BY_STATUS: GaugeVec = {
192        let gauge_vec = GaugeVec::new(
193            Opts::new("transactions_by_status", "Current number of transactions by status"),
194            &["relayer_id", "network_type", "status"]
195        ).unwrap();
196        REGISTRY.register(Box::new(gauge_vec.clone())).unwrap();
197        gauge_vec
198    };
199
200    // Histogram for transaction processing times (creation to submission).
201    pub static ref TRANSACTION_PROCESSING_TIME: HistogramVec = {
202        let histogram_opts = HistogramOpts::new("transaction_processing_seconds", "Transaction processing time in seconds")
203            .buckets(vec![0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0, 60.0, 120.0, 300.0]);
204        let histogram_vec = HistogramVec::new(histogram_opts, &["relayer_id", "network_type", "stage"]).unwrap();
205        REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
206        histogram_vec
207    };
208
209    // Histogram for RPC call latency.
210    pub static ref RPC_CALL_LATENCY: HistogramVec = {
211        let histogram_opts = HistogramOpts::new("rpc_call_latency_seconds", "RPC call latency in seconds")
212            .buckets(vec![0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0]);
213        let histogram_vec = HistogramVec::new(histogram_opts, &["relayer_id", "network_type", "operation_name"]).unwrap();
214        REGISTRY.register(Box::new(histogram_vec.clone())).unwrap();
215        histogram_vec
216    };
217
218    // Counter for Stellar transaction submission failures with decoded result codes.
219    pub static ref STELLAR_SUBMISSION_FAILURES: CounterVec = {
220        let opts = Opts::new("stellar_submission_failures_total",
221            "Stellar transaction submission failures by status and result code");
222        let counter_vec = CounterVec::new(opts, &["submit_status", "result_code"]).unwrap();
223        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
224        counter_vec
225    };
226
227    // Counter for plugin calls (tracks requests to /api/v1/plugins/{plugin_id}/call endpoints).
228    pub static ref PLUGIN_CALLS: CounterVec = {
229        let opts = Opts::new("plugin_calls_total", "Total number of plugin calls");
230        let counter_vec = CounterVec::new(opts, &["plugin_id", "method", "status"]).unwrap();
231        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
232        counter_vec
233    };
234
235    // Counter for Stellar submit responses with TRY_AGAIN_LATER status.
236    pub static ref STELLAR_TRY_AGAIN_LATER: CounterVec = {
237        let opts = Opts::new(
238            "stellar_try_again_later_total",
239            "Total number of Stellar transaction submit responses with TRY_AGAIN_LATER"
240        );
241        let counter_vec = CounterVec::new(opts, &["relayer_id", "tx_status"]).unwrap();
242        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
243        counter_vec
244    };
245
246    // Counter for transactions confirmed after experiencing TRY_AGAIN_LATER.
247    pub static ref TRANSACTIONS_TRY_AGAIN_LATER_SUCCESS: CounterVec = {
248        let opts = Opts::new(
249            "transactions_try_again_later_success_total",
250            "Total number of transactions confirmed after experiencing TRY_AGAIN_LATER"
251        );
252        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
253        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
254        counter_vec
255    };
256
257    // Counter for transactions that failed after experiencing TRY_AGAIN_LATER.
258    pub static ref TRANSACTIONS_TRY_AGAIN_LATER_FAILED: CounterVec = {
259        let opts = Opts::new(
260            "transactions_try_again_later_failed_total",
261            "Total number of transactions that failed after experiencing TRY_AGAIN_LATER"
262        );
263        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
264        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
265        counter_vec
266    };
267
268    // Counter for transactions that encountered an insufficient fee error.
269    pub static ref TRANSACTIONS_INSUFFICIENT_FEE: CounterVec = {
270        let opts = Opts::new(
271            "transactions_insufficient_fee_total",
272            "Total number of transactions that encountered an insufficient fee error"
273        );
274        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
275        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
276        counter_vec
277    };
278
279    // Counter for transactions confirmed after experiencing insufficient fee.
280    pub static ref TRANSACTIONS_INSUFFICIENT_FEE_SUCCESS: CounterVec = {
281        let opts = Opts::new(
282            "transactions_insufficient_fee_success_total",
283            "Total number of transactions confirmed after experiencing insufficient fee"
284        );
285        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
286        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
287        counter_vec
288    };
289
290    // Counter for transactions that failed after experiencing insufficient fee.
291    pub static ref TRANSACTIONS_INSUFFICIENT_FEE_FAILED: CounterVec = {
292        let opts = Opts::new(
293            "transactions_insufficient_fee_failed_total",
294            "Total number of transactions that failed after experiencing insufficient fee"
295        );
296        let counter_vec = CounterVec::new(opts, &["relayer_id", "network_type"]).unwrap();
297        REGISTRY.register(Box::new(counter_vec.clone())).unwrap();
298        counter_vec
299    };
300}
301
302/// Gather all metrics and encode into the provided format.
303pub fn gather_metrics() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
304    let encoder = TextEncoder::new();
305    let metric_families = REGISTRY.gather();
306    let mut buffer = Vec::new();
307    encoder.encode(&metric_families, &mut buffer)?;
308    Ok(buffer)
309}
310
311/// Get file descriptor count for current process.
312fn get_fd_count() -> Result<usize, std::io::Error> {
313    let pid = std::process::id();
314
315    #[cfg(target_os = "linux")]
316    {
317        let fd_dir = format!("/proc/{pid}/fd");
318        std::fs::read_dir(fd_dir).map(|entries| entries.count())
319    }
320
321    #[cfg(target_os = "macos")]
322    {
323        use std::process::Command;
324        let output = Command::new("lsof")
325            .args(["-p", &pid.to_string()])
326            .output()?;
327        let count = String::from_utf8_lossy(&output.stdout)
328            .lines()
329            .count()
330            .saturating_sub(1); // Subtract header line
331        Ok(count)
332    }
333
334    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
335    {
336        Ok(0) // Unsupported platform
337    }
338}
339
340/// Get CLOSE_WAIT socket count.
341fn get_close_wait_count() -> Result<usize, std::io::Error> {
342    #[cfg(any(target_os = "linux", target_os = "macos"))]
343    {
344        use std::process::Command;
345        let output = Command::new("sh")
346            .args(["-c", "netstat -an | grep CLOSE_WAIT | wc -l"])
347            .output()?;
348        let count = String::from_utf8_lossy(&output.stdout)
349            .trim()
350            .parse()
351            .unwrap_or(0);
352        Ok(count)
353    }
354
355    #[cfg(not(any(target_os = "linux", target_os = "macos")))]
356    {
357        Ok(0) // Unsupported platform
358    }
359}
360
361/// Updates the system metrics for CPU and memory usage.
362pub fn update_system_metrics() {
363    let mut sys = System::new_all();
364    sys.refresh_all();
365
366    // Overall CPU usage.
367    let cpu_usage = sys.global_cpu_usage();
368    CPU_USAGE.set(cpu_usage as f64);
369
370    // Total memory (in bytes).
371    let total_memory = sys.total_memory();
372    TOTAL_MEMORY.set(total_memory as f64);
373
374    // Available memory (in bytes).
375    let available_memory = sys.available_memory();
376    AVAILABLE_MEMORY.set(available_memory as f64);
377
378    // Used memory (in bytes).
379    let memory_usage = sys.used_memory();
380    MEMORY_USAGE.set(memory_usage as f64);
381
382    // Calculate memory usage percentage
383    let memory_percentage = if total_memory > 0 {
384        (memory_usage as f64 / total_memory as f64) * 100.0
385    } else {
386        0.0
387    };
388    MEMORY_USAGE_PERCENT.set(memory_percentage);
389
390    // Calculate disk usage:
391    // Sum total space and available space across all disks.
392    let disks = Disks::new_with_refreshed_list();
393    let mut total_disk_space: u64 = 0;
394    let mut total_disk_available: u64 = 0;
395    for disk in disks.list() {
396        total_disk_space += disk.total_space();
397        total_disk_available += disk.available_space();
398    }
399    // Used disk space is total minus available ( in bytes).
400    let used_disk_space = total_disk_space.saturating_sub(total_disk_available);
401    DISK_USAGE.set(used_disk_space as f64);
402
403    // Calculate disk usage percentage.
404    let disk_percentage = if total_disk_space > 0 {
405        (used_disk_space as f64 / total_disk_space as f64) * 100.0
406    } else {
407        0.0
408    };
409    DISK_USAGE_PERCENT.set(disk_percentage);
410
411    // Update file descriptor count.
412    if let Ok(fd_count) = get_fd_count() {
413        FILE_DESCRIPTORS.set(fd_count as f64);
414    }
415
416    // Update CLOSE_WAIT socket count.
417    if let Ok(close_wait) = get_close_wait_count() {
418        CLOSE_WAIT_SOCKETS.set(close_wait as f64);
419    }
420}
421
422#[cfg(test)]
423mod actix_tests {
424    use super::*;
425    use actix_web::{
426        dev::{Service, ServiceRequest, ServiceResponse, Transform},
427        http, test, Error, HttpResponse,
428    };
429    use futures::future::{self};
430    use middleware::MetricsMiddleware;
431    use prometheus::proto::MetricFamily;
432    use std::{
433        pin::Pin,
434        task::{Context, Poll},
435    };
436
437    // Dummy service that always returns a successful response (HTTP 200 OK).
438    struct DummySuccessService;
439
440    impl Service<ServiceRequest> for DummySuccessService {
441        type Response = ServiceResponse;
442        type Error = Error;
443        type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
444
445        fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
446            Poll::Ready(Ok(()))
447        }
448
449        fn call(&self, req: ServiceRequest) -> Self::Future {
450            let resp = req.into_response(HttpResponse::Ok().finish());
451            Box::pin(async move { Ok(resp) })
452        }
453    }
454
455    // Dummy service that always returns an error.
456    struct DummyErrorService;
457
458    impl Service<ServiceRequest> for DummyErrorService {
459        type Response = ServiceResponse;
460        type Error = Error;
461        type Future = Pin<Box<dyn future::Future<Output = Result<Self::Response, Self::Error>>>>;
462
463        fn poll_ready(&self, _cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
464            Poll::Ready(Ok(()))
465        }
466
467        fn call(&self, _req: ServiceRequest) -> Self::Future {
468            Box::pin(async move { Err(actix_web::error::ErrorInternalServerError("dummy error")) })
469        }
470    }
471
472    // Helper function to find a metric family by name.
473    fn find_metric_family<'a>(
474        name: &str,
475        families: &'a [MetricFamily],
476    ) -> Option<&'a MetricFamily> {
477        families.iter().find(|mf| mf.name() == name)
478    }
479
480    #[actix_rt::test]
481    async fn test_gather_metrics_contains_expected_names() {
482        // Update system metrics
483        update_system_metrics();
484
485        // Increment request counters to ensure they appear in output
486        REQUEST_COUNTER
487            .with_label_values(&["/test", "GET", "200"])
488            .inc();
489        RAW_REQUEST_COUNTER
490            .with_label_values(&["/test?param=value", "GET", "200"])
491            .inc();
492        REQUEST_LATENCY
493            .with_label_values(&["/test", "GET", "200"])
494            .observe(0.1);
495        ERROR_COUNTER
496            .with_label_values(&["/test", "GET", "500"])
497            .inc();
498
499        // Touch insufficient fee metrics to ensure they appear in output
500        TRANSACTIONS_INSUFFICIENT_FEE
501            .with_label_values(&["test-relayer", "stellar"])
502            .inc();
503        TRANSACTIONS_INSUFFICIENT_FEE_SUCCESS
504            .with_label_values(&["test-relayer", "stellar"])
505            .inc();
506        TRANSACTIONS_INSUFFICIENT_FEE_FAILED
507            .with_label_values(&["test-relayer", "stellar"])
508            .inc();
509
510        // Touch TRY_AGAIN_LATER metrics to ensure they appear in output
511        TRANSACTIONS_TRY_AGAIN_LATER_SUCCESS
512            .with_label_values(&["test-relayer", "stellar"])
513            .inc();
514        TRANSACTIONS_TRY_AGAIN_LATER_FAILED
515            .with_label_values(&["test-relayer", "stellar"])
516            .inc();
517
518        let metrics = gather_metrics().expect("failed to gather metrics");
519        let output = String::from_utf8(metrics).expect("metrics output is not valid UTF-8");
520
521        // System metrics
522        assert!(output.contains("cpu_usage_percentage"));
523        assert!(output.contains("memory_usage_percentage"));
524        assert!(output.contains("memory_usage_bytes"));
525        assert!(output.contains("total_memory_bytes"));
526        assert!(output.contains("available_memory_bytes"));
527        assert!(output.contains("disk_usage_bytes"));
528        assert!(output.contains("disk_usage_percentage"));
529
530        // Request metrics
531        assert!(output.contains("requests_total"));
532        assert!(output.contains("raw_requests_total"));
533        assert!(output.contains("request_latency_seconds"));
534        assert!(output.contains("error_requests_total"));
535
536        // Insufficient fee metrics
537        assert!(output.contains("transactions_insufficient_fee_total"));
538        assert!(output.contains("transactions_insufficient_fee_success_total"));
539        assert!(output.contains("transactions_insufficient_fee_failed_total"));
540
541        // TRY_AGAIN_LATER metrics
542        assert!(output.contains("transactions_try_again_later_success_total"));
543        assert!(output.contains("transactions_try_again_later_failed_total"));
544    }
545
546    #[actix_rt::test]
547    async fn test_update_system_metrics() {
548        // Reset metrics to ensure clean state
549        CPU_USAGE.set(0.0);
550        TOTAL_MEMORY.set(0.0);
551        AVAILABLE_MEMORY.set(0.0);
552        MEMORY_USAGE.set(0.0);
553        MEMORY_USAGE_PERCENT.set(0.0);
554        DISK_USAGE.set(0.0);
555        DISK_USAGE_PERCENT.set(0.0);
556
557        // Call the function we're testing
558        update_system_metrics();
559
560        // Verify that metrics have been updated with reasonable values
561        let cpu_usage = CPU_USAGE.get();
562        assert!(
563            (0.0..=100.0).contains(&cpu_usage),
564            "CPU usage should be between 0-100%, got {cpu_usage}"
565        );
566
567        let memory_usage = MEMORY_USAGE.get();
568        assert!(
569            memory_usage >= 0.0,
570            "Memory usage should be >= 0, got {memory_usage}"
571        );
572
573        let memory_percent = MEMORY_USAGE_PERCENT.get();
574        assert!(
575            (0.0..=100.0).contains(&memory_percent),
576            "Memory usage percentage should be between 0-100%, got {memory_percent}"
577        );
578
579        let total_memory = TOTAL_MEMORY.get();
580        assert!(
581            total_memory > 0.0,
582            "Total memory should be > 0, got {total_memory}"
583        );
584
585        let available_memory = AVAILABLE_MEMORY.get();
586        assert!(
587            available_memory >= 0.0,
588            "Available memory should be >= 0, got {available_memory}"
589        );
590
591        let disk_usage = DISK_USAGE.get();
592        assert!(
593            disk_usage >= 0.0,
594            "Disk usage should be >= 0, got {disk_usage}"
595        );
596
597        let disk_percent = DISK_USAGE_PERCENT.get();
598        assert!(
599            (0.0..=100.0).contains(&disk_percent),
600            "Disk usage percentage should be between 0-100%, got {disk_percent}"
601        );
602
603        // Verify that memory usage doesn't exceed total memory
604        assert!(
605            memory_usage <= total_memory,
606            "Memory usage should be <= total memory, got {memory_usage}"
607        );
608
609        // Verify that available memory plus used memory doesn't exceed total memory
610        assert!(
611            (available_memory + memory_usage) <= total_memory,
612            "Available memory plus used memory should be <= total memory {}, got {}",
613            total_memory,
614            available_memory + memory_usage
615        );
616    }
617
618    #[actix_rt::test]
619    async fn test_middleware_success() {
620        let req = test::TestRequest::with_uri("/test_success").to_srv_request();
621
622        let middleware = MetricsMiddleware;
623        let service = middleware.new_transform(DummySuccessService).await.unwrap();
624
625        let resp = service.call(req).await.unwrap();
626        assert_eq!(resp.response().status(), http::StatusCode::OK);
627
628        let families = REGISTRY.gather();
629        let counter_fam = find_metric_family("requests_total", &families)
630            .expect("requests_total metric family not found");
631
632        let mut found = false;
633        for m in counter_fam.get_metric() {
634            let labels = m.get_label();
635            if labels
636                .iter()
637                .any(|l| l.name() == "endpoint" && l.value() == "/test_success")
638            {
639                found = true;
640                assert!(m.get_counter().value() >= 1.0);
641            }
642        }
643        assert!(
644            found,
645            "Expected metric with endpoint '/test_success' not found"
646        );
647    }
648
649    #[actix_rt::test]
650    async fn test_middleware_error() {
651        let req = test::TestRequest::with_uri("/test_error").to_srv_request();
652
653        let middleware = MetricsMiddleware;
654        let service = middleware.new_transform(DummyErrorService).await.unwrap();
655
656        let result = service.call(req).await;
657        assert!(result.is_err());
658
659        let families = REGISTRY.gather();
660        let error_counter_fam = find_metric_family("error_requests_total", &families)
661            .expect("error_requests_total metric family not found");
662
663        let mut found = false;
664        for m in error_counter_fam.get_metric() {
665            let labels = m.get_label();
666            if labels
667                .iter()
668                .any(|l| l.name() == "endpoint" && l.value() == "/test_error")
669            {
670                found = true;
671                assert!(m.get_counter().value() >= 1.0);
672            }
673        }
674        assert!(
675            found,
676            "Expected error metric with endpoint '/test_error' not found"
677        );
678    }
679}
680
681#[cfg(test)]
682mod property_tests {
683    use proptest::{prelude::*, test_runner::Config};
684
685    // A helper function to compute percentage used from total.
686    fn compute_percentage(used: u64, total: u64) -> f64 {
687        if total > 0 {
688            (used as f64 / total as f64) * 100.0
689        } else {
690            0.0
691        }
692    }
693
694    proptest! {
695        // Set the number of cases to 1000
696        #![proptest_config(Config {
697          cases: 1000, ..Config::default()
698        })]
699
700        #[test]
701        fn prop_compute_percentage((total, used) in {
702            (1u64..1_000_000u64).prop_flat_map(|total| {
703                (Just(total), 0u64..=total)
704            })
705        }) {
706            let percentage = compute_percentage(used, total);
707            prop_assert!(percentage >= 0.0);
708            prop_assert!(percentage <= 100.0);
709        }
710
711        #[test]
712        fn prop_labels_are_reasonable(
713              endpoint in ".*",
714              method in prop::sample::select(vec![
715                "GET".to_string(),
716                "POST".to_string(),
717                "PUT".to_string(),
718                "DELETE".to_string()
719                ])
720            ) {
721            let endpoint_label = if endpoint.is_empty() { "/".to_string() } else { endpoint.clone() };
722            let method_label = method;
723
724            prop_assert!(endpoint_label.chars().count() <= 1024, "Endpoint label too long");
725            prop_assert!(method_label.chars().count() <= 16, "Method label too long");
726
727            let status = "200".to_string();
728            let labels = vec![endpoint_label, method_label, status];
729
730            for label in labels {
731                prop_assert!(!label.is_empty());
732                prop_assert!(label.len() < 1024);
733            }
734        }
735    }
736}
737
738#[cfg(test)]
739mod processing_time_tests {
740    use super::*;
741
742    #[test]
743    fn test_observe_processing_time_records_to_histogram() {
744        let before = TRANSACTION_PROCESSING_TIME
745            .with_label_values(&["test-relayer", "evm", "request_queue_dwell"])
746            .get_sample_count();
747
748        observe_processing_time("test-relayer", "evm", "request_queue_dwell", 1.5);
749
750        let after = TRANSACTION_PROCESSING_TIME
751            .with_label_values(&["test-relayer", "evm", "request_queue_dwell"])
752            .get_sample_count();
753
754        assert_eq!(after, before + 1, "sample count should increase by 1");
755    }
756
757    #[test]
758    fn test_observe_processing_time_accumulates_sum() {
759        let label = "test_sum_stage";
760        let before_sum = TRANSACTION_PROCESSING_TIME
761            .with_label_values(&["test-relayer-sum", "stellar", label])
762            .get_sample_sum();
763
764        observe_processing_time("test-relayer-sum", "stellar", label, 2.0);
765        observe_processing_time("test-relayer-sum", "stellar", label, 3.0);
766
767        let after_sum = TRANSACTION_PROCESSING_TIME
768            .with_label_values(&["test-relayer-sum", "stellar", label])
769            .get_sample_sum();
770
771        let delta = after_sum - before_sum;
772        assert!(
773            (delta - 5.0).abs() < 0.001,
774            "sum should increase by 5.0, got delta {delta}"
775        );
776    }
777
778    #[test]
779    fn test_stage_constants_are_distinct() {
780        let stages = [
781            STAGE_REQUEST_QUEUE_DWELL,
782            STAGE_PREPARE_DURATION,
783            STAGE_SUBMISSION_QUEUE_DWELL,
784            STAGE_SUBMIT_DURATION,
785        ];
786        let unique: std::collections::HashSet<&str> = stages.iter().copied().collect();
787        assert_eq!(stages.len(), unique.len(), "stage constants must be unique");
788    }
789}