risingwave_common_metrics/
error_metrics.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::LazyLock;
16
17use prometheus::{IntCounterVec, Registry, register_int_counter_vec_with_registry};
18
19use crate::monitor::GLOBAL_METRICS_REGISTRY;
20
21#[derive(Clone)]
22pub struct ErrorMetric<const N: usize> {
23    inner: IntCounterVec,
24}
25
26impl<const N: usize> ErrorMetric<N> {
27    pub fn new(name: &str, help: &str, label_names: &[&str; N], registry: &Registry) -> Self {
28        Self {
29            inner: register_int_counter_vec_with_registry!(name, help, label_names, registry)
30                .unwrap(),
31        }
32    }
33
34    pub fn report(&self, labels: [String; N]) {
35        self.inner.with_label_values(&labels).inc();
36    }
37}
38
39/// Metrics for counting errors in the system.
40///
41/// Please avoid adding new error metrics here. Instead, introduce new `error_type` for new errors.
42#[derive(Clone)]
43pub struct ErrorMetrics {
44    pub user_sink_error: ErrorMetric<4>,
45    pub user_compute_error: ErrorMetric<3>,
46    pub user_source_error: ErrorMetric<4>,
47}
48
49impl ErrorMetrics {
50    pub fn new(registry: &Registry) -> Self {
51        Self {
52            user_sink_error: ErrorMetric::new(
53                "user_sink_error_cnt",
54                "Sink errors in the system, queryable by tags",
55                &["error_type", "sink_id", "sink_name", "fragment_id"],
56                registry,
57            ),
58            user_compute_error: ErrorMetric::new(
59                "user_compute_error_cnt",
60                "Compute errors in the system, queryable by tags",
61                &["error_type", "executor_name", "fragment_id"],
62                registry,
63            ),
64            user_source_error: ErrorMetric::new(
65                "user_source_error_cnt",
66                "Source errors in the system, queryable by tags",
67                &["error_type", "source_id", "source_name", "fragment_id"],
68                registry,
69            ),
70        }
71    }
72}
73
74pub static GLOBAL_ERROR_METRICS: LazyLock<ErrorMetrics> =
75    LazyLock::new(|| ErrorMetrics::new(&GLOBAL_METRICS_REGISTRY));