1use std::collections::{BTreeMap, HashSet};
16use std::net::SocketAddr;
17use std::ops::Bound;
18use std::pin::Pin;
19use std::sync::Arc;
20use std::thread::JoinHandle;
21use std::time::Duration;
22
23use anyhow::anyhow;
24use bytes::{BufMut, Bytes, BytesMut};
25use clap::Parser;
26use foyer::CacheHint;
27use risingwave_common::catalog::TableId;
28use risingwave_common::config::{
29 MetaConfig, NoOverride, extract_storage_memory_config, load_config,
30};
31use risingwave_common::util::addr::HostAddr;
32use risingwave_common::util::iter_util::ZipEqFast;
33use risingwave_common::util::tokio_util::sync::CancellationToken;
34use risingwave_hummock_sdk::key::TableKey;
35use risingwave_hummock_sdk::version::{HummockVersion, HummockVersionDelta};
36use risingwave_hummock_sdk::{
37 CompactionGroupId, FIRST_VERSION_ID, HummockEpoch, HummockReadEpoch, HummockVersionId,
38};
39use risingwave_pb::common::WorkerType;
40use risingwave_rpc_client::{HummockMetaClient, MetaClient};
41use risingwave_storage::hummock::hummock_meta_client::MonitoredHummockMetaClient;
42use risingwave_storage::hummock::{CachePolicy, HummockStorage};
43use risingwave_storage::monitor::{
44 CompactorMetrics, HummockMetrics, HummockStateStoreMetrics, MonitoredStateStore,
45 MonitoredStorageMetrics, ObjectStoreMetrics,
46};
47use risingwave_storage::opts::StorageOpts;
48use risingwave_storage::store::{NewReadSnapshotOptions, ReadOptions, StateStoreRead};
49use risingwave_storage::{StateStore, StateStoreImpl, StateStoreIter};
50
51const SST_ID_SHIFT_COUNT: u32 = 1000000;
52const CHECKPOINT_FREQ_FOR_REPLAY: u64 = 99999999;
53
54use crate::CompactionTestOpts;
55
56struct CompactionTestMetrics {
57 num_expect_check: u64,
58 num_uncheck: u64,
59}
60
61impl CompactionTestMetrics {
62 fn new() -> CompactionTestMetrics {
63 Self {
64 num_expect_check: 0,
65 num_uncheck: 0,
66 }
67 }
68}
69
70pub async fn compaction_test_main(
79 _listen_addr: SocketAddr,
80 advertise_addr: HostAddr,
81 opts: CompactionTestOpts,
82) -> anyhow::Result<()> {
83 let meta_listen_addr = opts
84 .meta_address
85 .strip_prefix("http://")
86 .unwrap()
87 .to_owned();
88
89 let _meta_handle = tokio::spawn(start_meta_node(
90 meta_listen_addr.clone(),
91 opts.state_store.clone(),
92 opts.config_path_for_meta.clone(),
93 ));
94
95 tokio::time::sleep(Duration::from_secs(1)).await;
97 tracing::info!("Started embedded Meta");
98
99 let (compactor_thrd, compactor_shutdown_tx) = start_compactor_thread(
100 opts.meta_address.clone(),
101 advertise_addr.to_string(),
102 opts.config_path.clone(),
103 );
104
105 let original_meta_endpoint = "http://127.0.0.1:5690";
106 let mut table_id: u32 = opts.table_id;
107
108 init_metadata_for_replay(
109 original_meta_endpoint,
110 &opts.meta_address,
111 &advertise_addr,
112 opts.ci_mode,
113 &mut table_id,
114 )
115 .await?;
116
117 assert_ne!(0, table_id, "Invalid table_id for correctness checking");
118
119 let version_deltas = pull_version_deltas(original_meta_endpoint, &advertise_addr).await?;
120
121 tracing::info!(
122 "Pulled delta logs from Meta: len(logs): {}",
123 version_deltas.len()
124 );
125
126 let replay_thrd = start_replay_thread(opts, table_id, version_deltas);
127 replay_thrd.join().unwrap();
128 compactor_shutdown_tx.send(()).unwrap();
129 compactor_thrd.join().unwrap();
130 Ok(())
131}
132
133pub async fn start_meta_node(listen_addr: String, state_store: String, config_path: String) {
134 let meta_opts = risingwave_meta_node::MetaNodeOpts::parse_from([
135 "meta-node",
136 "--listen-addr",
137 &listen_addr,
138 "--advertise-addr",
139 &listen_addr,
140 "--backend",
141 "mem",
142 "--state-store",
143 &state_store,
144 "--config-path",
145 &config_path,
146 ]);
147 let config = load_config(&meta_opts.config_path, &meta_opts);
148 assert_eq!(
151 CHECKPOINT_FREQ_FOR_REPLAY,
152 config.system.checkpoint_frequency.unwrap()
153 );
154 assert!(
155 config.meta.enable_compaction_deterministic,
156 "enable_compaction_deterministic should be set"
157 );
158
159 risingwave_meta_node::start(meta_opts, CancellationToken::new() ).await
160}
161
162async fn start_compactor_node(
163 meta_rpc_endpoint: String,
164 advertise_addr: String,
165 config_path: String,
166) {
167 let opts = risingwave_compactor::CompactorOpts::parse_from([
168 "compactor-node",
169 "--listen-addr",
170 "127.0.0.1:5550",
171 "--advertise-addr",
172 &advertise_addr,
173 "--meta-address",
174 &meta_rpc_endpoint,
175 "--config-path",
176 &config_path,
177 ]);
178 risingwave_compactor::start(opts, CancellationToken::new() ).await
179}
180
181pub fn start_compactor_thread(
182 meta_endpoint: String,
183 advertise_addr: String,
184 config_path: String,
185) -> (JoinHandle<()>, std::sync::mpsc::Sender<()>) {
186 let (tx, rx) = std::sync::mpsc::channel();
187 let compact_func = move || {
188 let runtime = tokio::runtime::Builder::new_multi_thread()
189 .enable_all()
190 .build()
191 .unwrap();
192 runtime.block_on(async {
193 tokio::spawn(async {
194 tracing::info!("Starting compactor node");
195 start_compactor_node(meta_endpoint, advertise_addr, config_path).await
196 });
197 rx.recv().unwrap();
198 });
199 };
200
201 (std::thread::spawn(compact_func), tx)
202}
203
204fn start_replay_thread(
205 opts: CompactionTestOpts,
206 table_id: u32,
207 version_deltas: Vec<HummockVersionDelta>,
208) -> JoinHandle<()> {
209 let replay_func = move || {
210 let runtime = tokio::runtime::Builder::new_current_thread()
211 .enable_all()
212 .build()
213 .unwrap();
214 runtime
215 .block_on(start_replay(opts, table_id, version_deltas))
216 .expect("repaly error occurred");
217 };
218
219 std::thread::spawn(replay_func)
220}
221
222async fn init_metadata_for_replay(
223 cluster_meta_endpoint: &str,
224 new_meta_endpoint: &str,
225 advertise_addr: &HostAddr,
226 ci_mode: bool,
227 table_id: &mut u32,
228) -> anyhow::Result<()> {
229 tokio::time::sleep(Duration::from_secs(2)).await;
234
235 let meta_config = MetaConfig::default();
236 let meta_client: MetaClient;
237 tokio::select! {
238 _ = tokio::signal::ctrl_c() => {
239 tracing::info!("Ctrl+C received, now exiting");
240 std::process::exit(0);
241 },
242 ret = MetaClient::register_new(cluster_meta_endpoint.parse()?, WorkerType::RiseCtl, advertise_addr, Default::default(), &meta_config) => {
243 (meta_client, _) = ret;
244 },
245 }
246 let worker_id = meta_client.worker_id();
247 tracing::info!("Assigned init worker id {}", worker_id);
248 meta_client.activate(advertise_addr).await.unwrap();
249
250 let tables = meta_client.risectl_list_state_tables().await?;
251
252 let (new_meta_client, _) = MetaClient::register_new(
253 new_meta_endpoint.parse()?,
254 WorkerType::RiseCtl,
255 advertise_addr,
256 Default::default(),
257 &meta_config,
258 )
259 .await;
260 new_meta_client.activate(advertise_addr).await.unwrap();
261 if ci_mode {
262 let table_to_check = tables.iter().find(|t| t.name == "nexmark_q7").unwrap();
263 *table_id = table_to_check.id;
264 }
265
266 new_meta_client
268 .init_metadata_for_replay(tables, vec![])
269 .await?;
270
271 let _ = new_meta_client.get_new_sst_ids(SST_ID_SHIFT_COUNT).await?;
273
274 tracing::info!("Finished initializing the new Meta");
275 Ok(())
276}
277
278async fn pull_version_deltas(
279 cluster_meta_endpoint: &str,
280 advertise_addr: &HostAddr,
281) -> anyhow::Result<Vec<HummockVersionDelta>> {
282 let (meta_client, _) = MetaClient::register_new(
285 cluster_meta_endpoint.parse()?,
286 WorkerType::RiseCtl,
287 advertise_addr,
288 Default::default(),
289 &MetaConfig::default(),
290 )
291 .await;
292 let worker_id = meta_client.worker_id();
293 tracing::info!("Assigned pull worker id {}", worker_id);
294 meta_client.activate(advertise_addr).await.unwrap();
295
296 let (handle, shutdown_tx) =
297 MetaClient::start_heartbeat_loop(meta_client.clone(), Duration::from_millis(1000));
298 let res = meta_client
299 .list_version_deltas(HummockVersionId::new(0), u32::MAX, u64::MAX)
300 .await
301 .unwrap();
302
303 if let Err(err) = shutdown_tx.send(()) {
304 tracing::warn!("Failed to send shutdown to heartbeat task: {:?}", err);
305 }
306 handle.await?;
307 tracing::info!("Shutdown the pull worker");
308 Ok(res)
309}
310
311async fn start_replay(
312 opts: CompactionTestOpts,
313 table_to_check: u32,
314 version_delta_logs: Vec<HummockVersionDelta>,
315) -> anyhow::Result<()> {
316 let advertise_addr = "127.0.0.1:7770".parse().unwrap();
317 tracing::info!(
318 "Start to replay. Advertise address is {}, Table id {}",
319 advertise_addr,
320 table_to_check
321 );
322
323 let mut metric = CompactionTestMetrics::new();
324 let config = load_config(&opts.config_path_for_meta, NoOverride);
325 tracing::info!(
326 "Starting replay with config {:?} and opts {:?}",
327 config,
328 opts
329 );
330
331 let (meta_client, system_params) = MetaClient::register_new(
334 opts.meta_address.parse()?,
335 WorkerType::RiseCtl,
336 &advertise_addr,
337 Default::default(),
338 &config.meta,
339 )
340 .await;
341 let worker_id = meta_client.worker_id();
342 tracing::info!("Assigned replay worker id {}", worker_id);
343 meta_client.activate(&advertise_addr).await.unwrap();
344
345 let sub_tasks = vec![MetaClient::start_heartbeat_loop(
346 meta_client.clone(),
347 Duration::from_millis(1000),
348 )];
349
350 let latest_version = meta_client.disable_commit_epoch().await?;
352 assert_eq!(FIRST_VERSION_ID, latest_version.id);
353 for level in latest_version.levels.values() {
355 level.levels.iter().for_each(|lvl| {
356 assert!(lvl.table_infos.is_empty());
357 assert_eq!(0, lvl.total_file_size);
358 });
359 }
360
361 let storage_memory_config = extract_storage_memory_config(&config);
363 let storage_opts = Arc::new(StorageOpts::from((
364 &config,
365 &system_params,
366 &storage_memory_config,
367 )));
368 let hummock = create_hummock_store_with_metrics(&meta_client, storage_opts, &opts).await?;
369
370 let mut modified_compaction_groups = HashSet::<CompactionGroupId>::new();
372 let mut replay_count: u64 = 0;
373 let mut replayed_epochs = vec![];
374 let mut check_result_task: Option<tokio::task::JoinHandle<_>> = None;
375
376 for delta in version_delta_logs {
377 let (current_version, compaction_groups) = meta_client.replay_version_delta(delta).await?;
378 let (version_id, committed_epoch) = (
379 current_version.id,
380 current_version
381 .table_committed_epoch(table_to_check.into())
382 .unwrap_or_default(),
383 );
384 tracing::info!(
385 "Replayed version delta version_id: {}, committed_epoch: {}, compaction_groups: {:?}",
386 version_id,
387 committed_epoch,
388 compaction_groups
389 );
390
391 hummock
392 .inner()
393 .update_version_and_wait(current_version.clone())
394 .await;
395
396 replay_count += 1;
397 replayed_epochs.push(committed_epoch);
398 compaction_groups
399 .into_iter()
400 .map(|c| modified_compaction_groups.insert(c))
401 .count();
402
403 if replay_count % opts.num_trigger_frequency == 0 && !modified_compaction_groups.is_empty()
406 {
407 if let Some(handle) = check_result_task {
409 handle.await??;
410 }
411
412 metric.num_expect_check += 1;
413
414 replayed_epochs.pop();
416 let mut epochs = vec![committed_epoch];
417 epochs.extend(pin_old_snapshots(&meta_client, &replayed_epochs, 1).into_iter());
418 tracing::info!("===== Prepare to check snapshots: {:?}", epochs);
419
420 let old_version_iters = open_hummock_iters(&hummock, &epochs, table_to_check).await?;
421
422 tracing::info!(
423 "Trigger compaction for version {}, epoch {} compaction_groups: {:?}",
424 version_id,
425 committed_epoch,
426 modified_compaction_groups,
427 );
428 let is_multi_round = opts.num_trigger_rounds > 1;
430 for _ in 0..opts.num_trigger_rounds {
431 meta_client
432 .trigger_compaction_deterministic(
433 version_id,
434 Vec::from_iter(modified_compaction_groups.iter().copied()),
435 )
436 .await?;
437 if is_multi_round {
438 tokio::time::sleep(Duration::from_millis(50)).await;
439 }
440 }
441
442 let old_task_num = meta_client.get_assigned_compact_task_num().await?;
443 let (schedule_ok, version_diff) =
445 poll_compaction_schedule_status(&meta_client, old_task_num).await;
446
447 tracing::info!(
448 "Compaction schedule_ok {}, version_diff {}",
449 schedule_ok,
450 version_diff,
451 );
452 let (compaction_ok, new_version) = poll_compaction_tasks_status(
453 &meta_client,
454 schedule_ok,
455 version_diff as u32,
456 ¤t_version,
457 )
458 .await;
459
460 tracing::info!(
461 "Compaction schedule_ok {}, version_diff {} compaction_ok {}",
462 schedule_ok,
463 version_diff,
464 compaction_ok,
465 );
466
467 let new_version_id = new_version.id;
468 assert!(
469 new_version_id >= version_id,
470 "new_version_id: {}",
471 new_version_id,
472 );
473
474 if new_version_id != version_id {
475 hummock.inner().update_version_and_wait(new_version).await;
476
477 let new_version_iters =
478 open_hummock_iters(&hummock, &epochs, table_to_check).await?;
479
480 check_result_task = Some(tokio::spawn(check_compaction_results(
482 new_version_id,
483 old_version_iters,
484 new_version_iters,
485 )));
486 } else {
487 check_result_task = None;
488 metric.num_uncheck += 1;
489 }
490 modified_compaction_groups.clear();
491 replayed_epochs.clear();
492 }
493 }
494
495 if let Some(handle) = check_result_task {
497 handle.await??;
498 }
499 tracing::info!(
500 "Replay finished. Expect check count: {}, actual check count: {}",
501 metric.num_expect_check,
502 metric.num_expect_check - metric.num_uncheck
503 );
504
505 assert_ne!(0, metric.num_expect_check - metric.num_uncheck);
506
507 for (join_handle, shutdown_sender) in sub_tasks {
508 if let Err(err) = shutdown_sender.send(()) {
509 tracing::warn!("Failed to send shutdown: {:?}", err);
510 continue;
511 }
512 if let Err(err) = join_handle.await {
513 tracing::warn!("Failed to join shutdown: {:?}", err);
514 }
515 }
516
517 Ok(())
518}
519
520fn pin_old_snapshots(
521 _meta_client: &MetaClient,
522 replayed_epochs: &[HummockEpoch],
523 num: usize,
524) -> Vec<HummockEpoch> {
525 let mut old_epochs = vec![];
526 for &epoch in replayed_epochs.iter().rev().take(num) {
527 old_epochs.push(epoch);
528 }
529 old_epochs
530}
531
532async fn poll_compaction_schedule_status(
535 meta_client: &MetaClient,
536 old_task_num: usize,
537) -> (bool, i32) {
538 let poll_timeout = Duration::from_secs(2);
539 let poll_interval = Duration::from_millis(20);
540 let mut poll_duration_cnt = Duration::from_millis(0);
541 let mut new_task_num = meta_client.get_assigned_compact_task_num().await.unwrap();
542 let mut schedule_ok = false;
543 loop {
544 if new_task_num > old_task_num {
546 schedule_ok = true;
547 break;
548 }
549
550 if poll_duration_cnt >= poll_timeout {
551 break;
552 }
553 tokio::time::sleep(poll_interval).await;
554 poll_duration_cnt += poll_interval;
555 new_task_num = meta_client.get_assigned_compact_task_num().await.unwrap();
556 }
557 (
558 schedule_ok,
559 (new_task_num as i32 - old_task_num as i32).abs(),
560 )
561}
562
563async fn poll_compaction_tasks_status(
564 meta_client: &MetaClient,
565 schedule_ok: bool,
566 version_diff: u32,
567 base_version: &HummockVersion,
568) -> (bool, HummockVersion) {
569 let poll_timeout = if schedule_ok {
573 Duration::from_secs(120)
574 } else {
575 Duration::from_secs(5)
576 };
577 let poll_interval = Duration::from_millis(50);
578 let mut duration_cnt = Duration::from_millis(0);
579 let mut compaction_ok = false;
580
581 let mut cur_version = meta_client.get_current_version().await.unwrap();
582 loop {
583 if (cur_version.id > base_version.id)
584 && (cur_version.id - base_version.id >= version_diff as u64)
585 {
586 tracing::info!(
587 "Collected all of compact tasks. Actual version diff {}",
588 cur_version.id - base_version.id
589 );
590 compaction_ok = true;
591 break;
592 }
593 if duration_cnt >= poll_timeout {
594 break;
595 }
596 tokio::time::sleep(poll_interval).await;
597 duration_cnt += poll_interval;
598 cur_version = meta_client.get_current_version().await.unwrap();
599 }
600 (compaction_ok, cur_version)
601}
602
603type StateStoreIterType = Pin<
604 Box<
605 <<MonitoredStateStore<HummockStorage> as StateStore>::ReadSnapshot as StateStoreRead>::Iter,
606 >,
607>;
608
609async fn open_hummock_iters(
610 hummock: &MonitoredStateStore<HummockStorage>,
611 snapshots: &[HummockEpoch],
612 table_id: u32,
613) -> anyhow::Result<BTreeMap<HummockEpoch, StateStoreIterType>> {
614 let mut results = BTreeMap::new();
615
616 let mut buf = BytesMut::with_capacity(5);
619 buf.put_u32(table_id);
620 let b = buf.freeze();
621 let range = (
622 Bound::Included(b.clone()).map(TableKey),
623 Bound::Excluded(Bytes::from(risingwave_hummock_sdk::key::next_key(
624 b.as_ref(),
625 )))
626 .map(TableKey),
627 );
628
629 for &epoch in snapshots {
630 let snapshot = hummock
631 .new_read_snapshot(
632 HummockReadEpoch::NoWait(epoch),
633 NewReadSnapshotOptions {
634 table_id: TableId { table_id },
635 },
636 )
637 .await?;
638 let iter = snapshot
639 .iter(
640 range.clone(),
641 ReadOptions {
642 table_id: TableId { table_id },
643 cache_policy: CachePolicy::Fill(CacheHint::Normal),
644 ..Default::default()
645 },
646 )
647 .await?;
648 results.insert(epoch, Box::pin(iter));
649 }
650 Ok(results)
651}
652
653pub async fn check_compaction_results(
654 version_id: HummockVersionId,
655 mut expect_results: BTreeMap<HummockEpoch, StateStoreIterType>,
656 mut actual_results: BTreeMap<HummockEpoch, StateStoreIterType>,
657) -> anyhow::Result<()> {
658 let combined = expect_results
659 .iter_mut()
660 .zip_eq_fast(actual_results.iter_mut());
661 for ((e1, expect_iter), (e2, actual_iter)) in combined {
662 assert_eq!(e1, e2);
663 tracing::info!(
664 "Check results for version: id: {}, epoch: {}",
665 version_id,
666 e1,
667 );
668 let mut expect_cnt = 0;
669 let mut actual_cnt = 0;
670
671 while let Some(kv_expect) = expect_iter.try_next().await? {
672 expect_cnt += 1;
673 let ret = actual_iter.try_next().await?;
674 match ret {
675 None => {
676 break;
677 }
678 Some(kv_actual) => {
679 actual_cnt += 1;
680 assert_eq!(kv_expect.0, kv_actual.0, "Key mismatch");
681 assert_eq!(kv_expect.1, kv_actual.1, "Value mismatch");
682 }
683 }
684 }
685 assert_eq!(expect_cnt, actual_cnt);
686 }
687 Ok(())
688}
689
690struct StorageMetrics {
691 pub hummock_metrics: Arc<HummockMetrics>,
692 pub state_store_metrics: Arc<HummockStateStoreMetrics>,
693 pub object_store_metrics: Arc<ObjectStoreMetrics>,
694 pub storage_metrics: Arc<MonitoredStorageMetrics>,
695 pub compactor_metrics: Arc<CompactorMetrics>,
696}
697
698pub async fn create_hummock_store_with_metrics(
699 meta_client: &MetaClient,
700 storage_opts: Arc<StorageOpts>,
701 opts: &CompactionTestOpts,
702) -> anyhow::Result<MonitoredStateStore<HummockStorage>> {
703 let metrics = StorageMetrics {
704 hummock_metrics: Arc::new(HummockMetrics::unused()),
705 state_store_metrics: Arc::new(HummockStateStoreMetrics::unused()),
706 object_store_metrics: Arc::new(ObjectStoreMetrics::unused()),
707 storage_metrics: Arc::new(MonitoredStorageMetrics::unused()),
708 compactor_metrics: Arc::new(CompactorMetrics::unused()),
709 };
710
711 let state_store_impl = StateStoreImpl::new(
712 &opts.state_store,
713 storage_opts,
714 Arc::new(MonitoredHummockMetaClient::new(
715 meta_client.clone(),
716 metrics.hummock_metrics.clone(),
717 )),
718 metrics.state_store_metrics.clone(),
719 metrics.object_store_metrics.clone(),
720 metrics.storage_metrics.clone(),
721 metrics.compactor_metrics.clone(),
722 None,
723 true,
724 )
725 .await?;
726
727 if let Some(hummock_state_store) = state_store_impl.as_hummock() {
728 Ok(hummock_state_store
729 .clone()
730 .monitored(metrics.storage_metrics))
731 } else {
732 Err(anyhow!("only Hummock state store is supported!"))
733 }
734}