risingwave_common/config/
meta.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use risingwave_common_proc_macro::serde_prefix_all;
16use serde::de::Error as _;
17
18use super::*;
19
20#[derive(Copy, Clone, Debug, Default, ValueEnum, Serialize, Deserialize)]
21pub enum MetaBackend {
22    #[default]
23    Mem,
24    Sql, // any database url
25    Sqlite,
26    Postgres,
27    Mysql,
28}
29
30/// Compression algorithm for hummock version checkpoint serialization.
31#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
32#[serde(rename_all = "lowercase")]
33#[repr(i32)]
34pub enum CheckpointCompression {
35    /// No compression.
36    ///
37    /// NOTE: The numeric values are aligned with protobuf `CheckpointCompressionAlgorithm`.
38    None = 0,
39    /// Zstd compression (default, good balance between ratio and speed).
40    #[default]
41    Zstd = 1,
42    /// Lz4 compression (faster but lower ratio).
43    Lz4 = 2,
44}
45
46#[cfg(test)]
47mod tests {
48    use risingwave_pb::hummock::CheckpointCompressionAlgorithm;
49
50    use super::CheckpointCompression;
51
52    #[test]
53    fn checkpoint_compression_numeric_values_align_with_pb() {
54        assert_eq!(
55            CheckpointCompression::None as i32,
56            CheckpointCompressionAlgorithm::CheckpointCompressionUnspecified as i32
57        );
58        assert_eq!(
59            CheckpointCompression::Zstd as i32,
60            CheckpointCompressionAlgorithm::CheckpointCompressionZstd as i32
61        );
62        assert_eq!(
63            CheckpointCompression::Lz4 as i32,
64            CheckpointCompressionAlgorithm::CheckpointCompressionLz4 as i32
65        );
66    }
67}
68
69#[derive(Copy, Clone, Debug, Default)]
70pub enum DefaultParallelism {
71    #[default]
72    Full,
73    Default(NonZeroUsize),
74}
75
76impl Serialize for DefaultParallelism {
77    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
78    where
79        S: Serializer,
80    {
81        #[derive(Debug, Serialize, Deserialize)]
82        #[serde(untagged)]
83        enum Parallelism {
84            Str(String),
85            Int(usize),
86        }
87        match self {
88            DefaultParallelism::Full => Parallelism::Str("Full".to_owned()).serialize(serializer),
89            DefaultParallelism::Default(val) => {
90                Parallelism::Int(val.get() as _).serialize(serializer)
91            }
92        }
93    }
94}
95
96impl<'de> Deserialize<'de> for DefaultParallelism {
97    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
98    where
99        D: serde::Deserializer<'de>,
100    {
101        #[derive(Debug, Deserialize)]
102        #[serde(untagged)]
103        enum Parallelism {
104            Str(String),
105            Int(usize),
106        }
107        let p = Parallelism::deserialize(deserializer)?;
108        match p {
109            Parallelism::Str(s) => {
110                if s.trim().eq_ignore_ascii_case("full") {
111                    Ok(DefaultParallelism::Full)
112                } else {
113                    Err(serde::de::Error::custom(format!(
114                        "invalid default parallelism: {}",
115                        s
116                    )))
117                }
118            }
119            Parallelism::Int(i) => Ok(DefaultParallelism::Default(
120                // Note: we won't check whether this exceeds the maximum parallelism (i.e., vnode count)
121                // here because it requires extra context. The check will be done when scheduling jobs.
122                NonZeroUsize::new(i).ok_or_else(|| {
123                    serde::de::Error::custom("default parallelism should not be 0")
124                })?,
125            )),
126        }
127    }
128}
129
130/// The section `[meta]` in `risingwave.toml`.
131#[serde_with::apply(Option => #[serde(with = "none_as_empty_string")])]
132#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde, ConfigDoc)]
133pub struct MetaConfig {
134    /// Objects within `min_sst_retention_time_sec` won't be deleted by hummock full GC, even they
135    /// are dangling.
136    #[serde(default = "default::meta::min_sst_retention_time_sec")]
137    pub min_sst_retention_time_sec: u64,
138
139    /// Interval of automatic hummock full GC.
140    #[serde(default = "default::meta::full_gc_interval_sec")]
141    pub full_gc_interval_sec: u64,
142
143    /// Max number of object per full GC job can fetch.
144    #[serde(default = "default::meta::full_gc_object_limit")]
145    pub full_gc_object_limit: u64,
146
147    /// Duration in seconds to retain garbage collection history data.
148    #[serde(default = "default::meta::gc_history_retention_time_sec")]
149    pub gc_history_retention_time_sec: u64,
150
151    /// Max number of inflight time travel query.
152    #[serde(default = "default::meta::max_inflight_time_travel_query")]
153    pub max_inflight_time_travel_query: u64,
154
155    /// Schedule `Dynamic` compaction for all compaction groups with this interval.
156    /// Groups in cooldown (recently found to have no compaction work) are skipped.
157    #[serde(default = "default::meta::periodic_compaction_interval_sec")]
158    pub periodic_compaction_interval_sec: u64,
159
160    /// Interval of invoking a vacuum job, to remove stale metadata from meta store and objects
161    /// from object store.
162    #[serde(default = "default::meta::vacuum_interval_sec")]
163    pub vacuum_interval_sec: u64,
164
165    /// The spin interval inside a vacuum job. It avoids the vacuum job monopolizing resources of
166    /// meta node.
167    #[serde(default = "default::meta::vacuum_spin_interval_ms")]
168    pub vacuum_spin_interval_ms: u64,
169
170    /// Interval of invoking iceberg garbage collection, to expire old snapshots.
171    #[serde(default = "default::meta::iceberg_gc_interval_sec")]
172    pub iceberg_gc_interval_sec: u64,
173
174    /// Interval of hummock version checkpoint.
175    #[serde(default = "default::meta::hummock_version_checkpoint_interval_sec")]
176    pub hummock_version_checkpoint_interval_sec: u64,
177
178    /// Compression algorithm for hummock version checkpoint.
179    #[serde(default)]
180    pub checkpoint_compression_algorithm: CheckpointCompression,
181
182    /// Chunk size in bytes for reading large checkpoints.
183    /// Large checkpoints are read in parallel chunks to avoid single-request timeout issues.
184    /// Default: 128MB
185    #[serde(default = "default::meta::checkpoint_read_chunk_size")]
186    pub checkpoint_read_chunk_size: usize,
187
188    /// Maximum number of concurrent chunk reads when reading large checkpoints.
189    /// Higher values may improve read throughput but increase memory usage.
190    /// Memory usage = `checkpoint_read_chunk_size` * `checkpoint_read_max_in_flight_chunks`
191    /// Default: 4
192    #[serde(default = "default::meta::checkpoint_read_max_in_flight_chunks")]
193    pub checkpoint_read_max_in_flight_chunks: usize,
194
195    /// If enabled, `SSTable` object file and version delta will be retained.
196    ///
197    /// `SSTable` object file need to be deleted via full GC.
198    ///
199    /// version delta need to be manually deleted.
200    #[serde(default = "default::meta::enable_hummock_data_archive")]
201    pub enable_hummock_data_archive: bool,
202
203    /// The interval at which a Hummock version snapshot is taken for time travel.
204    ///
205    /// Larger value indicates less storage overhead but worse query performance.
206    #[serde(default = "default::meta::hummock_time_travel_snapshot_interval")]
207    pub hummock_time_travel_snapshot_interval: u64,
208
209    /// The minimum delta log number a new checkpoint should compact, otherwise the checkpoint
210    /// attempt is rejected.
211    #[serde(default = "default::meta::min_delta_log_num_for_hummock_version_checkpoint")]
212    pub min_delta_log_num_for_hummock_version_checkpoint: u64,
213
214    /// Maximum allowed heartbeat interval in seconds.
215    #[serde(default = "default::meta::max_heartbeat_interval_sec")]
216    pub max_heartbeat_interval_secs: u32,
217
218    /// Whether to enable fail-on-recovery. Should only be used in e2e tests.
219    #[serde(default)]
220    pub disable_recovery: bool,
221
222    /// Whether meta should request pausing all data sources on the next bootstrap.
223    /// This allows us to pause the cluster on next bootstrap in an offline way.
224    /// It's important for standalone or single node deployments.
225    /// In those cases, meta node, frontend and compute may all be co-located.
226    /// If the compute node enters an inconsistent state, and continuously crashloops,
227    /// we may not be able to connect to the cluster to run `alter system set pause_on_next_bootstrap = true;`.
228    /// By providing it in the static config, we can have an offline way to trigger the pause on bootstrap.
229    #[serde(default = "default::meta::pause_on_next_bootstrap_offline")]
230    pub pause_on_next_bootstrap_offline: bool,
231
232    /// Whether to disable adaptive-scaling feature.
233    #[serde(default)]
234    pub disable_automatic_parallelism_control: bool,
235
236    /// The number of streaming jobs per scaling operation.
237    #[serde(default = "default::meta::parallelism_control_batch_size")]
238    pub parallelism_control_batch_size: usize,
239
240    /// The period of parallelism control trigger.
241    #[serde(default = "default::meta::parallelism_control_trigger_period_sec")]
242    pub parallelism_control_trigger_period_sec: u64,
243
244    /// The first delay of parallelism control.
245    #[serde(default = "default::meta::parallelism_control_trigger_first_delay_sec")]
246    pub parallelism_control_trigger_first_delay_sec: u64,
247
248    #[serde(default = "default::meta::meta_leader_lease_secs")]
249    pub meta_leader_lease_secs: u64,
250
251    /// After specified seconds of idle (no mview or flush), the process will be exited.
252    /// It is mainly useful for playgrounds.
253    #[serde(default)]
254    pub dangerous_max_idle_secs: Option<u64>,
255
256    /// The default global parallelism for all streaming jobs, if user doesn't specify the
257    /// parallelism, this value will be used. `FULL` means use all available parallelism units,
258    /// otherwise it's a number.
259    #[serde(default = "default::meta::default_parallelism")]
260    pub default_parallelism: DefaultParallelism,
261
262    /// Whether to enable deterministic compaction scheduling, which
263    /// will disable all auto scheduling of compaction tasks.
264    /// Should only be used in e2e tests.
265    #[serde(default)]
266    pub enable_compaction_deterministic: bool,
267
268    /// Enable sanity check when SSTs are committed.
269    #[serde(default)]
270    pub enable_committed_sst_sanity_check: bool,
271
272    #[serde(default = "default::meta::node_num_monitor_interval_sec")]
273    pub node_num_monitor_interval_sec: u64,
274
275    #[serde(default = "default::meta::backend")]
276    pub backend: MetaBackend,
277
278    /// Schedule `space_reclaim` compaction for all compaction groups with this interval.
279    #[serde(default = "default::meta::periodic_space_reclaim_compaction_interval_sec")]
280    pub periodic_space_reclaim_compaction_interval_sec: u64,
281
282    /// Schedule `ttl_reclaim` compaction for all compaction groups with this interval.
283    #[serde(default = "default::meta::periodic_ttl_reclaim_compaction_interval_sec")]
284    pub periodic_ttl_reclaim_compaction_interval_sec: u64,
285
286    #[serde(default = "default::meta::periodic_tombstone_reclaim_compaction_interval_sec")]
287    pub periodic_tombstone_reclaim_compaction_interval_sec: u64,
288
289    #[serde(default = "default::meta::move_table_size_limit")]
290    #[deprecated]
291    pub move_table_size_limit: u64,
292
293    #[serde(default = "default::meta::split_group_size_limit")]
294    #[deprecated]
295    pub split_group_size_limit: u64,
296
297    #[serde(default = "default::meta::cut_table_size_limit")]
298    #[deprecated]
299    pub cut_table_size_limit: u64,
300
301    /// Whether to protect dropping a table with incoming sink.
302    #[serde(default = "default::meta::protect_drop_table_with_incoming_sink")]
303    pub protect_drop_table_with_incoming_sink: bool,
304
305    #[serde(default, flatten)]
306    #[config_doc(omitted)]
307    pub unrecognized: Unrecognized<Self>,
308
309    /// Whether config object storage bucket lifecycle to purge stale data.
310    #[serde(default)]
311    pub do_not_config_object_storage_lifecycle: bool,
312
313    /// Count of partition in split group. Meta will assign this value to every new group when it splits from default-group by automatically.
314    /// Each partition contains aligned data of `vnode_count / partition_vnode_count` consecutive virtual-nodes of one state table.
315    #[serde(default = "default::meta::partition_vnode_count")]
316    pub partition_vnode_count: u32,
317
318    /// The threshold of write throughput to trigger a group split.
319    #[serde(
320        default = "default::meta::table_high_write_throughput_threshold",
321        alias = "table_write_throughput_threshold"
322    )]
323    pub table_high_write_throughput_threshold: u64,
324
325    #[serde(
326        default = "default::meta::table_low_write_throughput_threshold",
327        alias = "min_table_split_write_throughput"
328    )]
329    /// The threshold of write throughput to trigger a group merge.
330    pub table_low_write_throughput_threshold: u64,
331
332    // If the compaction task does not report heartbeat beyond the
333    // `compaction_task_max_heartbeat_interval_secs` interval, we will cancel the task
334    #[serde(default = "default::meta::compaction_task_max_heartbeat_interval_secs")]
335    pub compaction_task_max_heartbeat_interval_secs: u64,
336
337    // If the compaction task does not change in progress beyond the
338    // `compaction_task_max_heartbeat_interval_secs` interval, we will cancel the task
339    #[serde(default = "default::meta::compaction_task_max_progress_interval_secs")]
340    pub compaction_task_max_progress_interval_secs: u64,
341
342    /// The number of compaction task ids to prefetch from the meta store in one batch.
343    #[serde(default = "default::meta::compaction_task_id_refill_capacity")]
344    pub compaction_task_id_refill_capacity: u32,
345
346    #[serde(default)]
347    #[config_doc(nested)]
348    pub compaction_config: CompactionConfig,
349
350    /// Count of partitions of tables in default group and materialized view group.
351    /// The meta node will decide according to some strategy whether to cut the boundaries of the file according to the vnode alignment.
352    /// Each partition contains aligned data of `vnode_count / hybrid_partition_vnode_count` consecutive virtual-nodes of one state table.
353    /// Set it zero to disable this feature.
354    #[serde(default = "default::meta::hybrid_partition_vnode_count")]
355    pub hybrid_partition_vnode_count: u32,
356
357    #[serde(default = "default::meta::event_log_enabled")]
358    pub event_log_enabled: bool,
359    /// Keeps the latest N events per channel.
360    #[serde(default = "default::meta::event_log_channel_max_size")]
361    pub event_log_channel_max_size: u32,
362
363    #[serde(default)]
364    #[config_doc(nested)]
365    pub developer: MetaDeveloperConfig,
366
367    /// Whether compactor should rewrite row to remove dropped column.
368    #[serde(default = "default::meta::enable_dropped_column_reclaim")]
369    pub enable_dropped_column_reclaim: bool,
370
371    /// Whether to split the compaction group when the size of the group exceeds the `compaction_group_config.max_estimated_group_size() * split_group_size_ratio`.
372    #[serde(default = "default::meta::split_group_size_ratio")]
373    pub split_group_size_ratio: f64,
374
375    // During group scheduling, the configured `*_throughput_ratio` is used to determine if the sample exceeds the threshold.
376    // Use `table_stat_throuput_window_seconds_for_*` to check if the split and merge conditions are met.
377    /// To split the compaction group when the high throughput statistics of the group exceeds the threshold.
378    #[serde(default = "default::meta::table_stat_high_write_throughput_ratio_for_split")]
379    pub table_stat_high_write_throughput_ratio_for_split: f64,
380
381    /// To merge the compaction group when the low throughput statistics of the group exceeds the threshold.
382    #[serde(default = "default::meta::table_stat_low_write_throughput_ratio_for_merge")]
383    pub table_stat_low_write_throughput_ratio_for_merge: f64,
384
385    // Hummock also control the size of samples to be judged during group scheduling by `table_stat_sample_size_for_split` and `table_stat_sample_size_for_merge`.
386    // Will use max(table_stat_throuput_window_seconds_for_split /ckpt, table_stat_throuput_window_seconds_for_merge/ckpt) as the global sample size.
387    // For example, if `table_stat_throuput_window_seconds_for_merge` = 240 and `table_stat_throuput_window_seconds_for_split` = 60, and `ckpt_sec = 1`,
388    //  global sample size will be max(240/1, 60/1), then only the last 60 samples will be considered for split, and so on.
389    /// The window seconds of table throughput statistic history for split compaction group.
390    #[serde(default = "default::meta::table_stat_throuput_window_seconds_for_split")]
391    pub table_stat_throuput_window_seconds_for_split: usize,
392
393    /// The window seconds of table throughput statistic history for merge compaction group.
394    #[serde(default = "default::meta::table_stat_throuput_window_seconds_for_merge")]
395    pub table_stat_throuput_window_seconds_for_merge: usize,
396
397    /// The threshold of table size in one compact task to decide whether to partition one table into `hybrid_partition_vnode_count` parts, which belongs to default group and materialized view group.
398    /// Set it max value of 64-bit number to disable this feature.
399    #[serde(default = "default::meta::compact_task_table_size_partition_threshold_low")]
400    pub compact_task_table_size_partition_threshold_low: u64,
401
402    /// The threshold of table size in one compact task to decide whether to partition one table into `partition_vnode_count` parts, which belongs to default group and materialized view group.
403    /// Set it max value of 64-bit number to disable this feature.
404    #[serde(default = "default::meta::compact_task_table_size_partition_threshold_high")]
405    pub compact_task_table_size_partition_threshold_high: u64,
406
407    /// The interval of the regular periodic compaction group split job.
408    /// This does not disable normalize-triggered splits when
409    /// `enable_compaction_group_normalize` is enabled.
410    #[serde(
411        default = "default::meta::periodic_scheduling_compaction_group_split_interval_sec",
412        alias = "periodic_split_compact_group_interval_sec"
413    )]
414    pub periodic_scheduling_compaction_group_split_interval_sec: u64,
415
416    /// Whether to normalize overlapping compaction groups before the regular split/merge scheduling.
417    #[serde(default = "default::meta::enable_compaction_group_normalize")]
418    pub enable_compaction_group_normalize: bool,
419
420    /// The maximum number of normalize splits in one scheduler round. Must be greater than 0.
421    #[serde(
422        default = "default::meta::max_normalize_splits_per_round",
423        deserialize_with = "deserialize_max_normalize_splits_per_round"
424    )]
425    pub max_normalize_splits_per_round: u64,
426
427    /// The interval of the periodic scheduling compaction group merge job.
428    #[serde(default = "default::meta::periodic_scheduling_compaction_group_merge_interval_sec")]
429    pub periodic_scheduling_compaction_group_merge_interval_sec: u64,
430
431    /// The threshold of each dimension of the compaction group after merging. When the dimension * `compaction_group_merge_dimension_threshold` >= limit, the merging job will be rejected.
432    #[serde(default = "default::meta::compaction_group_merge_dimension_threshold")]
433    pub compaction_group_merge_dimension_threshold: f64,
434
435    /// The interval that the CDC table splits initialization should yield to avoid overloading upstream system.
436    #[serde(default = "default::meta::cdc_table_split_init_sleep_interval_splits")]
437    pub cdc_table_split_init_sleep_interval_splits: u64,
438
439    /// The duration that the CDC table splits initialization should yield to avoid overloading upstream system.
440    #[serde(default = "default::meta::cdc_table_split_init_sleep_duration_millis")]
441    pub cdc_table_split_init_sleep_duration_millis: u64,
442
443    /// The batch size that the CDC table splits initialization should use when persisting to meta store.
444    #[serde(default = "default::meta::cdc_table_split_init_insert_batch_size")]
445    pub cdc_table_split_init_insert_batch_size: u64,
446
447    /// Whether to automatically migrate legacy table fragments when meta starts.
448    #[serde(default = "default::meta::enable_legacy_table_migration")]
449    pub enable_legacy_table_migration: bool,
450
451    #[serde(default)]
452    #[config_doc(nested)]
453    pub meta_store_config: MetaStoreConfig,
454}
455
456/// Note: only applies to meta store backends other than `SQLite`.
457#[serde_with::apply(Option => #[serde(with = "none_as_empty_string")])]
458#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde, ConfigDoc)]
459pub struct MetaStoreConfig {
460    /// Maximum number of connections for the meta store connection pool.
461    #[serde(default = "default::meta_store_config::max_connections")]
462    pub max_connections: u32,
463    /// Minimum number of connections for the meta store connection pool.
464    #[serde(default = "default::meta_store_config::min_connections")]
465    pub min_connections: u32,
466    /// Connection timeout in seconds for a meta store connection.
467    #[serde(default = "default::meta_store_config::connection_timeout_sec")]
468    pub connection_timeout_sec: u64,
469    /// Idle timeout in seconds for a meta store connection.
470    #[serde(default = "default::meta_store_config::idle_timeout_sec")]
471    pub idle_timeout_sec: u64,
472    /// Acquire timeout in seconds for a meta store connection.
473    #[serde(default = "default::meta_store_config::acquire_timeout_sec")]
474    pub acquire_timeout_sec: u64,
475}
476
477fn deserialize_max_normalize_splits_per_round<'de, D>(deserializer: D) -> Result<u64, D::Error>
478where
479    D: serde::Deserializer<'de>,
480{
481    let value = u64::deserialize(deserializer)?;
482    if value == 0 {
483        return Err(D::Error::custom(
484            "meta.max_normalize_splits_per_round must be greater than 0",
485        ));
486    }
487    Ok(value)
488}
489
490/// The subsections `[meta.developer]`.
491///
492/// It is put at [`MetaConfig::developer`].
493#[serde_prefix_all("meta_", mode = "alias")]
494#[serde_with::apply(Option => #[serde(with = "none_as_empty_string")])]
495#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde, ConfigDoc)]
496pub struct MetaDeveloperConfig {
497    /// The number of traces to be cached in-memory by the tracing collector
498    /// embedded in the meta node.
499    #[serde(default = "default::developer::meta_cached_traces_num")]
500    pub cached_traces_num: u32,
501
502    /// The maximum memory usage in bytes for the tracing collector embedded
503    /// in the meta node.
504    #[serde(default = "default::developer::meta_cached_traces_memory_limit_bytes")]
505    pub cached_traces_memory_limit_bytes: usize,
506
507    /// Compaction picker config
508    #[serde(default = "default::developer::enable_trivial_move")]
509    pub enable_trivial_move: bool,
510    #[serde(default = "default::developer::enable_check_task_level_overlap")]
511    pub enable_check_task_level_overlap: bool,
512    #[serde(default = "default::developer::max_trivial_move_task_count_per_loop")]
513    pub max_trivial_move_task_count_per_loop: usize,
514
515    #[serde(default = "default::developer::max_get_task_probe_times")]
516    pub max_get_task_probe_times: usize,
517
518    /// Max number of actor allowed per parallelism (default = 100).
519    /// CREATE MV/Table will be noticed when the number of actors exceeds this limit.
520    #[serde(default = "default::developer::actor_cnt_per_worker_parallelism_soft_limit")]
521    pub actor_cnt_per_worker_parallelism_soft_limit: usize,
522
523    /// Max number of actor allowed per parallelism (default = 400).
524    /// CREATE MV/Table will be rejected when the number of actors exceeds this limit.
525    #[serde(default = "default::developer::actor_cnt_per_worker_parallelism_hard_limit")]
526    pub actor_cnt_per_worker_parallelism_hard_limit: usize,
527
528    /// Max number of SSTs fetched from meta store per SELECT, during time travel Hummock version replay.
529    #[serde(default = "default::developer::hummock_time_travel_sst_info_fetch_batch_size")]
530    pub hummock_time_travel_sst_info_fetch_batch_size: usize,
531
532    /// Max number of SSTs inserted into meta store per INSERT, during time travel metadata writing.
533    #[serde(default = "default::developer::hummock_time_travel_sst_info_insert_batch_size")]
534    pub hummock_time_travel_sst_info_insert_batch_size: usize,
535
536    #[serde(default = "default::developer::time_travel_vacuum_interval_sec")]
537    pub time_travel_vacuum_interval_sec: u64,
538
539    #[serde(default = "default::developer::time_travel_vacuum_max_version_count")]
540    pub time_travel_vacuum_max_version_count: Option<u32>,
541
542    /// Max number of epoch-to-version inserted into meta store per INSERT, during time travel metadata writing.
543    #[serde(default = "default::developer::hummock_time_travel_epoch_version_insert_batch_size")]
544    pub hummock_time_travel_epoch_version_insert_batch_size: usize,
545
546    #[serde(default = "default::developer::hummock_gc_history_insert_batch_size")]
547    pub hummock_gc_history_insert_batch_size: usize,
548
549    #[serde(default = "default::developer::hummock_time_travel_filter_out_objects_batch_size")]
550    pub hummock_time_travel_filter_out_objects_batch_size: usize,
551
552    #[serde(default = "default::developer::hummock_time_travel_filter_out_objects_v1")]
553    pub hummock_time_travel_filter_out_objects_v1: bool,
554
555    #[serde(
556        default = "default::developer::hummock_time_travel_filter_out_objects_list_version_batch_size"
557    )]
558    pub hummock_time_travel_filter_out_objects_list_version_batch_size: usize,
559
560    #[serde(
561        default = "default::developer::hummock_time_travel_filter_out_objects_list_delta_batch_size"
562    )]
563    pub hummock_time_travel_filter_out_objects_list_delta_batch_size: usize,
564
565    #[serde(default)]
566    pub compute_client_config: RpcClientConfig,
567
568    #[serde(default)]
569    pub stream_client_config: RpcClientConfig,
570
571    #[serde(default)]
572    pub frontend_client_config: RpcClientConfig,
573
574    #[serde(default = "default::developer::table_change_log_insert_batch_size")]
575    pub table_change_log_insert_batch_size: u64,
576
577    #[serde(default = "default::developer::table_change_log_delete_batch_size")]
578    pub table_change_log_delete_batch_size: u64,
579}
580
581#[serde_with::apply(Option => #[serde(with = "none_as_empty_string")])]
582#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde, ConfigDoc)]
583pub struct CompactionConfig {
584    #[serde(default = "default::compaction_config::max_bytes_for_level_base")]
585    pub max_bytes_for_level_base: u64,
586    #[serde(default = "default::compaction_config::max_bytes_for_level_multiplier")]
587    pub max_bytes_for_level_multiplier: u64,
588    #[serde(default = "default::compaction_config::max_compaction_bytes")]
589    pub max_compaction_bytes: u64,
590    #[serde(default = "default::compaction_config::sub_level_max_compaction_bytes")]
591    pub sub_level_max_compaction_bytes: u64,
592    #[serde(default = "default::compaction_config::level0_tier_compact_file_number")]
593    pub level0_tier_compact_file_number: u64,
594    #[serde(default = "default::compaction_config::target_file_size_base")]
595    pub target_file_size_base: u64,
596    #[serde(default = "default::compaction_config::compaction_filter_mask")]
597    pub compaction_filter_mask: u32,
598    #[serde(default = "default::compaction_config::max_sub_compaction")]
599    pub max_sub_compaction: u32,
600    #[serde(default = "default::compaction_config::level0_stop_write_threshold_sub_level_number")]
601    pub level0_stop_write_threshold_sub_level_number: u64,
602    #[serde(default = "default::compaction_config::level0_sub_level_compact_level_count")]
603    pub level0_sub_level_compact_level_count: u32,
604    #[serde(
605        default = "default::compaction_config::level0_overlapping_sub_level_compact_level_count"
606    )]
607    pub level0_overlapping_sub_level_compact_level_count: u32,
608    #[serde(default = "default::compaction_config::max_space_reclaim_bytes")]
609    pub max_space_reclaim_bytes: u64,
610    #[serde(default = "default::compaction_config::level0_max_compact_file_number")]
611    pub level0_max_compact_file_number: u64,
612    #[serde(default = "default::compaction_config::tombstone_reclaim_ratio")]
613    pub tombstone_reclaim_ratio: u32,
614    #[serde(default = "default::compaction_config::enable_emergency_picker")]
615    pub enable_emergency_picker: bool,
616    #[serde(default = "default::compaction_config::max_level")]
617    pub max_level: u32,
618    #[serde(default = "default::compaction_config::sst_allowed_trivial_move_min_size")]
619    pub sst_allowed_trivial_move_min_size: u64,
620    #[serde(default = "default::compaction_config::sst_allowed_trivial_move_max_count")]
621    pub sst_allowed_trivial_move_max_count: u32,
622    #[serde(default = "default::compaction_config::max_l0_compact_level_count")]
623    pub max_l0_compact_level_count: u32,
624    #[serde(default = "default::compaction_config::disable_auto_group_scheduling")]
625    pub disable_auto_group_scheduling: bool,
626    #[serde(default = "default::compaction_config::max_overlapping_level_size")]
627    pub max_overlapping_level_size: u64,
628    #[serde(default = "default::compaction_config::emergency_level0_sst_file_count")]
629    pub emergency_level0_sst_file_count: u32,
630    #[serde(default = "default::compaction_config::emergency_level0_sub_level_partition")]
631    pub emergency_level0_sub_level_partition: u32,
632    #[serde(default = "default::compaction_config::level0_stop_write_threshold_max_sst_count")]
633    pub level0_stop_write_threshold_max_sst_count: u32,
634    #[serde(default = "default::compaction_config::level0_stop_write_threshold_max_size")]
635    pub level0_stop_write_threshold_max_size: u64,
636    #[serde(default = "default::compaction_config::enable_optimize_l0_interval_selection")]
637    pub enable_optimize_l0_interval_selection: bool,
638    #[serde(default = "default::compaction_config::max_kv_count_for_xor16")]
639    pub max_kv_count_for_xor16: Option<u64>,
640    #[serde(default = "default::compaction_config::max_vnode_key_range_bytes")]
641    pub max_vnode_key_range_bytes: Option<u64>,
642}
643
644pub mod default {
645    pub use crate::config::default::developer;
646
647    pub mod meta {
648        use crate::config::{DefaultParallelism, MetaBackend};
649
650        pub fn min_sst_retention_time_sec() -> u64 {
651            3600 * 6
652        }
653
654        pub fn gc_history_retention_time_sec() -> u64 {
655            3600 * 6
656        }
657
658        pub fn full_gc_interval_sec() -> u64 {
659            3600
660        }
661
662        pub fn full_gc_object_limit() -> u64 {
663            100_000
664        }
665
666        pub fn max_inflight_time_travel_query() -> u64 {
667            1000
668        }
669
670        pub fn periodic_compaction_interval_sec() -> u64 {
671            300
672        }
673
674        pub fn vacuum_interval_sec() -> u64 {
675            30
676        }
677
678        pub fn vacuum_spin_interval_ms() -> u64 {
679            100
680        }
681
682        pub fn iceberg_gc_interval_sec() -> u64 {
683            3600
684        }
685
686        pub fn hummock_version_checkpoint_interval_sec() -> u64 {
687            30
688        }
689
690        pub fn checkpoint_read_chunk_size() -> usize {
691            128 * 1024 * 1024 // 128MB
692        }
693
694        pub fn checkpoint_read_max_in_flight_chunks() -> usize {
695            4
696        }
697
698        pub fn enable_hummock_data_archive() -> bool {
699            false
700        }
701
702        pub fn hummock_time_travel_snapshot_interval() -> u64 {
703            100
704        }
705
706        pub fn min_delta_log_num_for_hummock_version_checkpoint() -> u64 {
707            10
708        }
709
710        pub fn max_heartbeat_interval_sec() -> u32 {
711            60
712        }
713
714        pub fn meta_leader_lease_secs() -> u64 {
715            30
716        }
717
718        pub fn default_parallelism() -> DefaultParallelism {
719            DefaultParallelism::Full
720        }
721
722        pub fn pause_on_next_bootstrap_offline() -> bool {
723            false
724        }
725
726        pub fn node_num_monitor_interval_sec() -> u64 {
727            10
728        }
729
730        pub fn backend() -> MetaBackend {
731            MetaBackend::Mem
732        }
733
734        pub fn periodic_space_reclaim_compaction_interval_sec() -> u64 {
735            3600 // 60min
736        }
737
738        pub fn periodic_ttl_reclaim_compaction_interval_sec() -> u64 {
739            1800 // 30mi
740        }
741
742        pub fn periodic_scheduling_compaction_group_split_interval_sec() -> u64 {
743            10 // 10s
744        }
745
746        pub fn periodic_tombstone_reclaim_compaction_interval_sec() -> u64 {
747            600
748        }
749
750        // limit the size of state table to trigger split by high throughput
751        pub fn move_table_size_limit() -> u64 {
752            10 * 1024 * 1024 * 1024 // 10GB
753        }
754
755        // limit the size of group to trigger split by group_size and avoid too many small groups
756        pub fn split_group_size_limit() -> u64 {
757            64 * 1024 * 1024 * 1024 // 64GB
758        }
759
760        pub fn protect_drop_table_with_incoming_sink() -> bool {
761            false
762        }
763
764        pub fn partition_vnode_count() -> u32 {
765            16
766        }
767
768        pub fn table_high_write_throughput_threshold() -> u64 {
769            16 * 1024 * 1024 // 16MB
770        }
771
772        pub fn table_low_write_throughput_threshold() -> u64 {
773            4 * 1024 * 1024 // 4MB
774        }
775
776        pub fn compaction_task_max_heartbeat_interval_secs() -> u64 {
777            30 // 30s
778        }
779
780        pub fn compaction_task_max_progress_interval_secs() -> u64 {
781            60 * 10 // 10min
782        }
783
784        pub fn compaction_task_id_refill_capacity() -> u32 {
785            64
786        }
787
788        pub fn cut_table_size_limit() -> u64 {
789            1024 * 1024 * 1024 // 1GB
790        }
791
792        pub fn hybrid_partition_vnode_count() -> u32 {
793            4
794        }
795
796        pub fn compact_task_table_size_partition_threshold_low() -> u64 {
797            128 * 1024 * 1024 // 128MB
798        }
799
800        pub fn compact_task_table_size_partition_threshold_high() -> u64 {
801            512 * 1024 * 1024 // 512MB
802        }
803
804        pub fn event_log_enabled() -> bool {
805            true
806        }
807
808        pub fn event_log_channel_max_size() -> u32 {
809            10
810        }
811
812        pub fn parallelism_control_batch_size() -> usize {
813            10
814        }
815
816        pub fn parallelism_control_trigger_period_sec() -> u64 {
817            10
818        }
819
820        pub fn parallelism_control_trigger_first_delay_sec() -> u64 {
821            30
822        }
823
824        pub fn enable_dropped_column_reclaim() -> bool {
825            false
826        }
827
828        pub fn split_group_size_ratio() -> f64 {
829            0.9
830        }
831
832        pub fn table_stat_high_write_throughput_ratio_for_split() -> f64 {
833            0.5
834        }
835
836        pub fn table_stat_low_write_throughput_ratio_for_merge() -> f64 {
837            0.7
838        }
839
840        pub fn table_stat_throuput_window_seconds_for_split() -> usize {
841            60
842        }
843
844        pub fn table_stat_throuput_window_seconds_for_merge() -> usize {
845            240
846        }
847
848        pub fn periodic_scheduling_compaction_group_merge_interval_sec() -> u64 {
849            60 * 10 // 10min
850        }
851
852        pub fn enable_compaction_group_normalize() -> bool {
853            false
854        }
855
856        pub fn max_normalize_splits_per_round() -> u64 {
857            4
858        }
859
860        pub fn compaction_group_merge_dimension_threshold() -> f64 {
861            1.2
862        }
863
864        pub fn cdc_table_split_init_sleep_interval_splits() -> u64 {
865            1000
866        }
867
868        pub fn cdc_table_split_init_sleep_duration_millis() -> u64 {
869            500
870        }
871
872        pub fn cdc_table_split_init_insert_batch_size() -> u64 {
873            100
874        }
875
876        pub fn enable_legacy_table_migration() -> bool {
877            true
878        }
879    }
880
881    pub mod meta_store_config {
882        const DEFAULT_MAX_CONNECTIONS: u32 = 10;
883        const DEFAULT_MIN_CONNECTIONS: u32 = 1;
884        const DEFAULT_CONNECTION_TIMEOUT_SEC: u64 = 10;
885        const DEFAULT_IDLE_TIMEOUT_SEC: u64 = 30;
886        const DEFAULT_ACQUIRE_TIMEOUT_SEC: u64 = 30;
887
888        pub fn max_connections() -> u32 {
889            DEFAULT_MAX_CONNECTIONS
890        }
891
892        pub fn min_connections() -> u32 {
893            DEFAULT_MIN_CONNECTIONS
894        }
895
896        pub fn connection_timeout_sec() -> u64 {
897            DEFAULT_CONNECTION_TIMEOUT_SEC
898        }
899
900        pub fn idle_timeout_sec() -> u64 {
901            DEFAULT_IDLE_TIMEOUT_SEC
902        }
903
904        pub fn acquire_timeout_sec() -> u64 {
905            DEFAULT_ACQUIRE_TIMEOUT_SEC
906        }
907    }
908
909    pub mod compaction_config {
910        const MB: u64 = 1024 * 1024;
911        const GB: u64 = 1024 * 1024 * 1024;
912        const DEFAULT_MAX_COMPACTION_BYTES: u64 = 2 * GB; // 2GB
913        const DEFAULT_MIN_COMPACTION_BYTES: u64 = 128 * MB; // 128MB
914        const DEFAULT_MAX_BYTES_FOR_LEVEL_BASE: u64 = 512 * MB; // 512MB
915
916        // decrease this configure when the generation of checkpoint barrier is not frequent.
917        const DEFAULT_TIER_COMPACT_TRIGGER_NUMBER: u64 = 12;
918        const DEFAULT_TARGET_FILE_SIZE_BASE: u64 = 32 * MB;
919        // 32MB
920        const DEFAULT_MAX_SUB_COMPACTION: u32 = 4;
921        const DEFAULT_LEVEL_MULTIPLIER: u64 = 10;
922        const DEFAULT_MAX_SPACE_RECLAIM_BYTES: u64 = 512 * MB; // 512MB;
923        const DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_SUB_LEVEL_NUMBER: u64 = 128;
924        const DEFAULT_MAX_COMPACTION_FILE_COUNT: u64 = 100;
925        const DEFAULT_MIN_SUB_LEVEL_COMPACT_LEVEL_COUNT: u32 = 3;
926        const DEFAULT_MIN_OVERLAPPING_SUB_LEVEL_COMPACT_LEVEL_COUNT: u32 = 12;
927        const DEFAULT_TOMBSTONE_RATIO_PERCENT: u32 = 40;
928        const DEFAULT_EMERGENCY_PICKER: bool = true;
929        const DEFAULT_MAX_LEVEL: u32 = 6;
930        const DEFAULT_MAX_L0_COMPACT_LEVEL_COUNT: u32 = 42;
931        const DEFAULT_SST_ALLOWED_TRIVIAL_MOVE_MIN_SIZE: u64 = 4 * MB;
932        const DEFAULT_SST_ALLOWED_TRIVIAL_MOVE_MAX_COUNT: u32 = 256;
933        const DEFAULT_EMERGENCY_LEVEL0_SST_FILE_COUNT: u32 = 2000; // > 50G / 32M = 1600
934        const DEFAULT_EMERGENCY_LEVEL0_SUB_LEVEL_PARTITION: u32 = 256;
935        const DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_MAX_SST_COUNT: u32 = 5000;
936        const DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_MAX_SIZE: u64 = 300 * 1024 * MB; // 300GB
937        const DEFAULT_ENABLE_OPTIMIZE_L0_INTERVAL_SELECTION: bool = true;
938        pub const DEFAULT_MAX_KV_COUNT_FOR_XOR16: u64 = 256 * 1024;
939        const DEFAULT_MAX_VNODE_KEY_RANGE_BYTES: Option<u64> = None;
940
941        use crate::catalog::hummock::CompactionFilterFlag;
942
943        pub fn max_bytes_for_level_base() -> u64 {
944            DEFAULT_MAX_BYTES_FOR_LEVEL_BASE
945        }
946
947        pub fn max_bytes_for_level_multiplier() -> u64 {
948            DEFAULT_LEVEL_MULTIPLIER
949        }
950
951        pub fn max_compaction_bytes() -> u64 {
952            DEFAULT_MAX_COMPACTION_BYTES
953        }
954
955        pub fn sub_level_max_compaction_bytes() -> u64 {
956            DEFAULT_MIN_COMPACTION_BYTES
957        }
958
959        pub fn level0_tier_compact_file_number() -> u64 {
960            DEFAULT_TIER_COMPACT_TRIGGER_NUMBER
961        }
962
963        pub fn target_file_size_base() -> u64 {
964            DEFAULT_TARGET_FILE_SIZE_BASE
965        }
966
967        pub fn compaction_filter_mask() -> u32 {
968            (CompactionFilterFlag::STATE_CLEAN | CompactionFilterFlag::TTL).into()
969        }
970
971        pub fn max_sub_compaction() -> u32 {
972            DEFAULT_MAX_SUB_COMPACTION
973        }
974
975        pub fn level0_stop_write_threshold_sub_level_number() -> u64 {
976            DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_SUB_LEVEL_NUMBER
977        }
978
979        pub fn level0_sub_level_compact_level_count() -> u32 {
980            DEFAULT_MIN_SUB_LEVEL_COMPACT_LEVEL_COUNT
981        }
982
983        pub fn level0_overlapping_sub_level_compact_level_count() -> u32 {
984            DEFAULT_MIN_OVERLAPPING_SUB_LEVEL_COMPACT_LEVEL_COUNT
985        }
986
987        pub fn max_space_reclaim_bytes() -> u64 {
988            DEFAULT_MAX_SPACE_RECLAIM_BYTES
989        }
990
991        pub fn level0_max_compact_file_number() -> u64 {
992            DEFAULT_MAX_COMPACTION_FILE_COUNT
993        }
994
995        pub fn tombstone_reclaim_ratio() -> u32 {
996            DEFAULT_TOMBSTONE_RATIO_PERCENT
997        }
998
999        pub fn enable_emergency_picker() -> bool {
1000            DEFAULT_EMERGENCY_PICKER
1001        }
1002
1003        pub fn max_level() -> u32 {
1004            DEFAULT_MAX_LEVEL
1005        }
1006
1007        pub fn max_l0_compact_level_count() -> u32 {
1008            DEFAULT_MAX_L0_COMPACT_LEVEL_COUNT
1009        }
1010
1011        pub fn sst_allowed_trivial_move_min_size() -> u64 {
1012            DEFAULT_SST_ALLOWED_TRIVIAL_MOVE_MIN_SIZE
1013        }
1014
1015        pub fn disable_auto_group_scheduling() -> bool {
1016            false
1017        }
1018
1019        pub fn max_overlapping_level_size() -> u64 {
1020            256 * MB
1021        }
1022
1023        pub fn sst_allowed_trivial_move_max_count() -> u32 {
1024            DEFAULT_SST_ALLOWED_TRIVIAL_MOVE_MAX_COUNT
1025        }
1026
1027        pub fn emergency_level0_sst_file_count() -> u32 {
1028            DEFAULT_EMERGENCY_LEVEL0_SST_FILE_COUNT
1029        }
1030
1031        pub fn emergency_level0_sub_level_partition() -> u32 {
1032            DEFAULT_EMERGENCY_LEVEL0_SUB_LEVEL_PARTITION
1033        }
1034
1035        pub fn level0_stop_write_threshold_max_sst_count() -> u32 {
1036            DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_MAX_SST_COUNT
1037        }
1038
1039        pub fn level0_stop_write_threshold_max_size() -> u64 {
1040            DEFAULT_LEVEL0_STOP_WRITE_THRESHOLD_MAX_SIZE
1041        }
1042
1043        pub fn enable_optimize_l0_interval_selection() -> bool {
1044            DEFAULT_ENABLE_OPTIMIZE_L0_INTERVAL_SELECTION
1045        }
1046
1047        pub fn max_kv_count_for_xor16() -> Option<u64> {
1048            Some(DEFAULT_MAX_KV_COUNT_FOR_XOR16)
1049        }
1050
1051        pub fn max_vnode_key_range_bytes() -> Option<u64> {
1052            DEFAULT_MAX_VNODE_KEY_RANGE_BYTES
1053        }
1054    }
1055}