1#![feature(type_alias_impl_trait)]
16#![feature(impl_trait_in_assoc_type)]
17#![feature(btree_cursors)]
18#![feature(map_try_insert)]
19
20mod key_cmp;
21
22use std::borrow::Borrow;
23use std::cmp::Ordering;
24use std::collections::HashMap;
25use std::fmt::{Display, Formatter};
26use std::ops::{Add, AddAssign, Sub};
27use std::str::FromStr;
28
29pub use key_cmp::*;
30use risingwave_common::util::epoch::EPOCH_SPILL_TIME_MASK;
31use risingwave_pb::common::{BatchQueryEpoch, batch_query_epoch};
32use serde::{Deserialize, Deserializer, Serialize, Serializer};
33use sstable_info::SstableInfo;
34use tracing::warn;
35
36use crate::key_range::KeyRangeCommon;
37use crate::table_stats::TableStatsMap;
38
39pub mod change_log;
40pub mod compact;
41pub mod compact_task;
42pub mod compaction_group;
43pub mod filter_utils;
44pub mod key;
45pub mod key_range;
46pub mod level;
47pub mod prost_key_range;
48pub mod sstable_info;
49pub mod state_table_info;
50pub mod table_stats;
51pub mod table_watermark;
52pub mod time_travel;
53pub mod version;
54pub use frontend_version::{FrontendHummockVersion, FrontendHummockVersionDelta};
55mod frontend_version;
56pub mod vector_index;
57
58pub use compact::*;
59use risingwave_common::catalog::TableId;
60use risingwave_pb::hummock::hummock_version_checkpoint::PbStaleObjects;
61use risingwave_pb::hummock::{PbVectorIndexObjectType, VectorIndexObjectType};
62
63use crate::table_watermark::TableWatermarks;
64use crate::vector_index::VectorIndexAdd;
65
66#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash, Ord, PartialOrd)]
67#[cfg_attr(any(test, feature = "test"), derive(Default))]
68pub struct TypedPrimitive<const C: usize, P>(P);
69
70impl<const C: usize, P: PartialEq> PartialEq<P> for TypedPrimitive<C, P> {
71 fn eq(&self, other: &P) -> bool {
72 self.0 == *other
73 }
74}
75
76macro_rules! impl_primitive {
77 ($($t:ty)*) => {$(
78 impl<const C: usize> PartialEq<TypedPrimitive<C, $t>> for $t {
79 fn eq(&self, other: &TypedPrimitive<C, $t>) -> bool {
80 *self == other.0
81 }
82 }
83 )*}
84}
85
86impl_primitive!(u64);
87
88impl<const C: usize, P: FromStr> FromStr for TypedPrimitive<C, P> {
89 type Err = P::Err;
90
91 fn from_str(s: &str) -> Result<Self, Self::Err> {
92 P::from_str(s).map(TypedPrimitive)
93 }
94}
95
96impl<const C: usize, P> Borrow<P> for TypedPrimitive<C, P> {
97 fn borrow(&self) -> &P {
98 &self.0
99 }
100}
101
102impl<const C: usize, P: Add<Output = P>> Add<P> for TypedPrimitive<C, P> {
103 type Output = Self;
104
105 fn add(self, rhs: P) -> Self::Output {
106 Self(self.0 + rhs)
107 }
108}
109
110impl<const C: usize, P: AddAssign> AddAssign<P> for TypedPrimitive<C, P> {
111 fn add_assign(&mut self, rhs: P) {
112 self.0 += rhs;
113 }
114}
115
116impl<const C: usize, P: Display> Display for TypedPrimitive<C, P> {
117 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
118 write!(f, "{}", self.0)
119 }
120}
121
122impl<const C: usize, P> From<P> for TypedPrimitive<C, P> {
123 fn from(value: P) -> Self {
124 Self(value)
125 }
126}
127
128impl<const C: usize, P: Serialize> Serialize for TypedPrimitive<C, P> {
129 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
130 where
131 S: Serializer,
132 {
133 self.0.serialize(serializer)
134 }
135}
136
137impl<'de, const C: usize, P: Deserialize<'de>> Deserialize<'de> for TypedPrimitive<C, P> {
138 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
139 where
140 D: Deserializer<'de>,
141 {
142 Ok(Self(<P as Deserialize>::deserialize(deserializer)?))
143 }
144}
145
146impl<const C: usize, P> TypedPrimitive<C, P> {
147 pub const fn new(id: P) -> Self {
148 Self(id)
149 }
150
151 pub fn inner(self) -> P {
152 self.0
153 }
154}
155
156pub type HummockRawObjectId = TypedPrimitive<0, u64>;
157pub type HummockSstableObjectId = TypedPrimitive<1, u64>;
158pub type HummockSstableId = TypedPrimitive<2, u64>;
159pub type HummockVectorFileId = TypedPrimitive<3, u64>;
160pub type HummockHnswGraphFileId = TypedPrimitive<4, u64>;
161
162macro_rules! impl_object_id {
163 ($type_name:ty) => {
164 impl $type_name {
165 pub fn as_raw(&self) -> HummockRawObjectId {
166 HummockRawObjectId::new(self.0)
167 }
168 }
169
170 impl From<HummockRawObjectId> for $type_name {
171 fn from(id: HummockRawObjectId) -> Self {
172 Self(id.0)
173 }
174 }
175 };
176}
177
178impl_object_id!(HummockSstableObjectId);
179impl_object_id!(HummockVectorFileId);
180impl_object_id!(HummockHnswGraphFileId);
181
182pub type HummockRefCount = u64;
183pub type HummockContextId = risingwave_common::id::WorkerId;
184pub type HummockEpoch = u64;
185pub type HummockCompactionTaskId = u64;
186pub type CompactionGroupId = u64;
187
188#[derive(Debug, Clone, PartialEq, Copy, Ord, PartialOrd, Eq, Hash)]
189pub struct HummockVersionId(u64);
190
191impl Display for HummockVersionId {
192 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
193 write!(f, "{}", self.0)
194 }
195}
196
197impl Serialize for HummockVersionId {
198 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
199 where
200 S: Serializer,
201 {
202 serializer.serialize_u64(self.0)
203 }
204}
205
206impl<'de> Deserialize<'de> for HummockVersionId {
207 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
208 where
209 D: Deserializer<'de>,
210 {
211 Ok(Self(<u64 as Deserialize>::deserialize(deserializer)?))
212 }
213}
214
215impl HummockVersionId {
216 pub const MAX: Self = Self(i64::MAX as _);
217
218 pub const fn new(id: u64) -> Self {
219 Self(id)
220 }
221
222 pub fn next(&self) -> Self {
223 Self(self.0 + 1)
224 }
225
226 pub fn to_u64(self) -> u64 {
227 self.0
228 }
229}
230
231impl Add<u64> for HummockVersionId {
232 type Output = Self;
233
234 fn add(self, rhs: u64) -> Self::Output {
235 Self(self.0 + rhs)
236 }
237}
238
239impl Sub for HummockVersionId {
240 type Output = u64;
241
242 fn sub(self, rhs: Self) -> Self::Output {
243 self.0 - rhs.0
244 }
245}
246
247pub const INVALID_VERSION_ID: HummockVersionId = HummockVersionId(0);
248pub const FIRST_VERSION_ID: HummockVersionId = HummockVersionId(1);
249pub const SPLIT_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 1u64 << 56;
250pub const SINGLE_TABLE_COMPACTION_GROUP_ID_HEAD: u64 = 2u64 << 56;
251pub const SST_OBJECT_SUFFIX: &str = "data";
252pub const VECTOR_FILE_OBJECT_SUFFIX: &str = "vector";
253pub const HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH: usize = 20;
254
255macro_rules! for_all_object_suffix {
256 ($({$name:ident, $type_name:ty, $suffix:expr},)+) => {
257 #[derive(Eq, PartialEq, Debug, Hash, Clone, Copy)]
258 pub enum HummockObjectId {
259 $(
260 $name($type_name),
261 )+
262 }
263
264 pub const VALID_OBJECT_ID_SUFFIXES: [&str; 3] = [$(
265 $suffix
266 ),+];
267
268 impl HummockObjectId {
269 fn new(id: u64, suffix: &str) -> Option<Self> {
270 match suffix {
271 $(
272 suffix if suffix == $suffix => Some(HummockObjectId::$name(<$type_name>::new(id))),
273 )+
274 _ => None,
275 }
276 }
277
278 pub fn suffix(&self) -> &str {
279 match self {
280 $(
281 HummockObjectId::$name(_) => $suffix,
282 )+
283 }
284 }
285
286 pub fn as_raw(&self) -> HummockRawObjectId {
287 let raw = match self {
288 $(
289 HummockObjectId::$name(id) => id.0,
290 )+
291 };
292 HummockRawObjectId::new(raw)
293 }
294 }
295
296 pub fn try_get_object_id_from_path(path: &str) -> Option<HummockObjectId> {
297 let split: Vec<_> = path.split(&['/', '.']).collect();
298 if split.len() <= 2 {
299 return None;
300 }
301 let suffix = split[split.len() - 1];
302 let id_str = split[split.len() - 2];
303 match suffix {
304 $(
305 suffix if suffix == $suffix => {
306 let id = id_str
307 .parse::<u64>()
308 .unwrap_or_else(|_| panic!("expect valid object id, got {}", id_str));
309 Some(HummockObjectId::$name(<$type_name>::new(id)))
310 },
311 )+
312 _ => None,
313 }
314 }
315 };
316 () => {
317 for_all_object_suffix! {
318 {Sstable, HummockSstableObjectId, SST_OBJECT_SUFFIX},
319 {VectorFile, HummockVectorFileId, VECTOR_FILE_OBJECT_SUFFIX},
320 {HnswGraphFile, HummockHnswGraphFileId, "hnsw_graph"},
321 }
322 };
323}
324
325for_all_object_suffix!();
326
327pub fn get_stale_object_ids(
328 stale_objects: &PbStaleObjects,
329) -> impl Iterator<Item = HummockObjectId> + '_ {
330 match HummockObjectId::Sstable(0.into()) {
334 HummockObjectId::Sstable(_) => {}
335 HummockObjectId::VectorFile(_) => {}
336 HummockObjectId::HnswGraphFile(_) => {}
337 };
338 stale_objects
339 .id
340 .iter()
341 .map(|sst_id| HummockObjectId::Sstable((*sst_id).into()))
342 .chain(stale_objects.vector_files.iter().map(
343 |file| match file.get_object_type().unwrap() {
344 PbVectorIndexObjectType::VectorIndexObjectUnspecified => {
345 unreachable!()
346 }
347 VectorIndexObjectType::VectorIndexObjectVector => {
348 HummockObjectId::VectorFile(file.id.into())
349 }
350 VectorIndexObjectType::VectorIndexObjectHnswGraph => {
351 HummockObjectId::HnswGraphFile(file.id.into())
352 }
353 },
354 ))
355}
356
357#[macro_export]
358macro_rules! info_in_release {
367 ($($arg:tt)*) => {
368 {
369 #[cfg(debug_assertions)]
370 {
371 use tracing::debug;
372 debug!($($arg)*);
373 }
374 #[cfg(not(debug_assertions))]
375 {
376 use tracing::info;
377 info!($($arg)*);
378 }
379 }
380 }
381}
382
383#[derive(Default, Debug)]
384pub struct SyncResult {
385 pub sync_size: usize,
387 pub uncommitted_ssts: Vec<LocalSstableInfo>,
389 pub table_watermarks: HashMap<TableId, TableWatermarks>,
391 pub old_value_ssts: Vec<LocalSstableInfo>,
393 pub vector_index_adds: HashMap<TableId, Vec<VectorIndexAdd>>,
394}
395
396#[derive(Debug, Clone)]
397pub struct LocalSstableInfo {
398 pub sst_info: SstableInfo,
399 pub table_stats: TableStatsMap,
400 pub created_at: u64,
401}
402
403impl LocalSstableInfo {
404 pub fn new(sst_info: SstableInfo, table_stats: TableStatsMap, created_at: u64) -> Self {
405 Self {
406 sst_info,
407 table_stats,
408 created_at,
409 }
410 }
411
412 pub fn for_test(sst_info: SstableInfo) -> Self {
413 Self {
414 sst_info,
415 table_stats: Default::default(),
416 created_at: u64::MAX,
417 }
418 }
419
420 pub fn file_size(&self) -> u64 {
421 assert_eq!(self.sst_info.file_size, self.sst_info.sst_size);
422 self.sst_info.file_size
423 }
424}
425
426impl PartialEq for LocalSstableInfo {
427 fn eq(&self, other: &Self) -> bool {
428 self.sst_info == other.sst_info
429 }
430}
431
432#[derive(Debug, Clone, Copy)]
434pub enum HummockReadEpoch {
435 Committed(HummockEpoch),
437 BatchQueryCommitted(HummockEpoch, HummockVersionId),
439 NoWait(HummockEpoch),
441 Backup(HummockEpoch),
443 TimeTravel(HummockEpoch),
444}
445
446impl From<BatchQueryEpoch> for HummockReadEpoch {
447 fn from(e: BatchQueryEpoch) -> Self {
448 match e.epoch.unwrap() {
449 batch_query_epoch::Epoch::Committed(epoch) => HummockReadEpoch::BatchQueryCommitted(
450 epoch.epoch,
451 HummockVersionId::new(epoch.hummock_version_id),
452 ),
453 batch_query_epoch::Epoch::Current(epoch) => {
454 if epoch != HummockEpoch::MAX {
455 warn!(
456 epoch,
457 "ignore specified current epoch and set it to u64::MAX"
458 );
459 }
460 HummockReadEpoch::NoWait(HummockEpoch::MAX)
461 }
462 batch_query_epoch::Epoch::Backup(epoch) => HummockReadEpoch::Backup(epoch),
463 batch_query_epoch::Epoch::TimeTravel(epoch) => HummockReadEpoch::TimeTravel(epoch),
464 }
465 }
466}
467
468pub fn test_batch_query_epoch() -> BatchQueryEpoch {
469 BatchQueryEpoch {
470 epoch: Some(batch_query_epoch::Epoch::Current(u64::MAX)),
471 }
472}
473
474impl HummockReadEpoch {
475 pub fn get_epoch(&self) -> HummockEpoch {
476 *match self {
477 HummockReadEpoch::Committed(epoch)
478 | HummockReadEpoch::BatchQueryCommitted(epoch, _)
479 | HummockReadEpoch::NoWait(epoch)
480 | HummockReadEpoch::Backup(epoch)
481 | HummockReadEpoch::TimeTravel(epoch) => epoch,
482 }
483 }
484
485 pub fn is_read_committed(&self) -> bool {
486 match self {
487 HummockReadEpoch::Committed(_)
488 | HummockReadEpoch::TimeTravel(_)
489 | HummockReadEpoch::BatchQueryCommitted(_, _) => true,
490 HummockReadEpoch::NoWait(_) | HummockReadEpoch::Backup(_) => false,
491 }
492 }
493}
494pub struct ObjectIdRange {
495 pub start_id: HummockRawObjectId,
497 pub end_id: HummockRawObjectId,
499}
500
501impl ObjectIdRange {
502 pub fn new(
503 start_id: impl Into<HummockRawObjectId>,
504 end_id: impl Into<HummockRawObjectId>,
505 ) -> Self {
506 Self {
507 start_id: start_id.into(),
508 end_id: end_id.into(),
509 }
510 }
511
512 fn peek_next_object_id(&self) -> Option<HummockRawObjectId> {
513 if self.start_id < self.end_id {
514 return Some(self.start_id);
515 }
516 None
517 }
518
519 pub fn get_next_object_id(&mut self) -> Option<HummockRawObjectId> {
521 let next_id = self.peek_next_object_id();
522 self.start_id += 1;
523 next_id
524 }
525}
526
527pub fn can_concat(ssts: &[impl Borrow<SstableInfo>]) -> bool {
528 let len = ssts.len();
529 for i in 1..len {
530 if ssts[i - 1]
531 .borrow()
532 .key_range
533 .compare_right_with(&ssts[i].borrow().key_range.left)
534 != Ordering::Less
535 {
536 return false;
537 }
538 }
539 true
540}
541
542pub fn full_key_can_concat(ssts: &[SstableInfo]) -> bool {
543 let len = ssts.len();
544 for i in 1..len {
545 let sst_1 = &ssts[i - 1];
546 let sst_2 = &ssts[i];
547
548 if sst_1.key_range.right_exclusive {
549 if KeyComparator::compare_encoded_full_key(
550 &sst_1.key_range.right,
551 &sst_2.key_range.left,
552 )
553 .is_gt()
554 {
555 return false;
556 }
557 } else if KeyComparator::compare_encoded_full_key(
558 &sst_1.key_range.right,
559 &sst_2.key_range.left,
560 )
561 .is_ge()
562 {
563 return false;
564 }
565 }
566 true
567}
568
569const CHECKPOINT_DIR: &str = "checkpoint";
570const CHECKPOINT_NAME: &str = "0";
571const ARCHIVE_DIR: &str = "archive";
572
573pub fn version_checkpoint_path(root_dir: &str) -> String {
574 format!("{}/{}/{}", root_dir, CHECKPOINT_DIR, CHECKPOINT_NAME)
575}
576
577pub fn version_archive_dir(root_dir: &str) -> String {
578 format!("{}/{}", root_dir, ARCHIVE_DIR)
579}
580
581pub fn version_checkpoint_dir(checkpoint_path: &str) -> String {
582 checkpoint_path.trim_end_matches(|c| c != '/').to_owned()
583}
584
585#[derive(Clone, Copy, PartialEq, Eq, Hash, Default, Debug, PartialOrd, Ord)]
591pub struct EpochWithGap(u64);
592
593impl EpochWithGap {
594 #[allow(unused_variables)]
595 pub fn new(epoch: u64, spill_offset: u16) -> Self {
596 if risingwave_common::util::epoch::is_max_epoch(epoch) {
600 EpochWithGap::new_max_epoch()
601 } else {
602 debug_assert!((epoch & EPOCH_SPILL_TIME_MASK) == 0);
603 EpochWithGap(epoch + spill_offset as u64)
604 }
605 }
606
607 pub fn new_from_epoch(epoch: u64) -> Self {
608 EpochWithGap::new(epoch, 0)
609 }
610
611 pub fn new_min_epoch() -> Self {
612 EpochWithGap(0)
613 }
614
615 pub fn new_max_epoch() -> Self {
616 EpochWithGap(HummockEpoch::MAX)
617 }
618
619 pub(crate) fn as_u64(&self) -> HummockEpoch {
621 self.0
622 }
623
624 pub fn from_u64(epoch_with_gap: u64) -> Self {
626 EpochWithGap(epoch_with_gap)
627 }
628
629 pub fn pure_epoch(&self) -> HummockEpoch {
631 self.0 & !EPOCH_SPILL_TIME_MASK
632 }
633
634 pub fn offset(&self) -> u64 {
635 self.0 & EPOCH_SPILL_TIME_MASK
636 }
637}
638
639pub fn get_object_data_path(
640 obj_prefix: &str,
641 path_prefix: &str,
642 object_id: HummockObjectId,
643) -> String {
644 let suffix = object_id.suffix();
645 let object_id = object_id.as_raw();
646
647 let mut path = String::with_capacity(
648 path_prefix.len()
649 + "/".len()
650 + obj_prefix.len()
651 + HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH
652 + ".".len()
653 + suffix.len(),
654 );
655 path.push_str(path_prefix);
656 path.push('/');
657 path.push_str(obj_prefix);
658 path.push_str(&object_id.to_string());
659 path.push('.');
660 path.push_str(suffix);
661 path
662}
663
664pub fn get_object_id_from_path(path: &str) -> HummockObjectId {
665 use itertools::Itertools;
666 let split = path.split(&['/', '.']).collect_vec();
667 assert!(split.len() > 2);
668 let suffix = split[split.len() - 1];
669 let id = split[split.len() - 2]
670 .parse::<u64>()
671 .expect("valid object id");
672 HummockObjectId::new(id, suffix)
673 .unwrap_or_else(|| panic!("unknown object id suffix {}", suffix))
674}
675
676#[cfg(test)]
677mod tests {
678 use bytes::Bytes;
679 use sstable_info::SstableInfoInner;
680
681 use super::*;
682
683 #[test]
684 fn test_object_id_decimal_max_length() {
685 let len = u64::MAX.to_string().len();
686 assert_eq!(len, HUMMOCK_SSTABLE_OBJECT_ID_MAX_DECIMAL_LENGTH)
687 }
688
689 #[test]
690 fn test_full_key_concat() {
691 let key1 = b"\0\0\0\x08\0\0\0\x0112-3\0\0\0\0\x04\0\x1c\x16l'\xe2\0\0";
692 let key2 = b"\0\0\0\x08\0\0\0\x0112-3\0\0\0\0\x04\0\x1c\x16l \x12\0\0";
693
694 let sst_1 = SstableInfoInner {
695 key_range: key_range::KeyRange {
696 left: Bytes::from(key1.to_vec()),
697 right: Bytes::from(key1.to_vec()),
698 right_exclusive: false,
699 },
700 ..Default::default()
701 };
702
703 let sst_2 = SstableInfoInner {
704 key_range: key_range::KeyRange {
705 left: Bytes::from(key2.to_vec()),
706 right: Bytes::from(key2.to_vec()),
707 right_exclusive: false,
708 },
709 ..Default::default()
710 };
711
712 let sst_3 = SstableInfoInner {
713 key_range: key_range::KeyRange {
714 left: Bytes::from(key1.to_vec()),
715 right: Bytes::from(key2.to_vec()),
716 right_exclusive: false,
717 },
718 ..Default::default()
719 };
720
721 assert!(full_key_can_concat(&[sst_1.clone().into(), sst_2.into()]));
722
723 assert!(!full_key_can_concat(&[sst_1.into(), sst_3.into()]));
724 }
725}