risingwave_common/
bitmap.rs

1// Copyright 2024 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Licensed to the Apache Software Foundation (ASF) under one
16// or more contributor license agreements.  See the NOTICE file
17// distributed with this work for additional information
18// regarding copyright ownership.  The ASF licenses this file
19// to you under the Apache License, Version 2.0 (the
20// "License"); you may not use this file except in compliance
21// with the License.  You may obtain a copy of the License at
22//
23//   http://www.apache.org/licenses/LICENSE-2.0
24//
25// Unless required by applicable law or agreed to in writing,
26// software distributed under the License is distributed on an
27// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
28// KIND, either express or implied.  See the License for the
29// specific language governing permissions and limitations
30// under the License.
31
32//! Defines a bitmap, which is used to track which values in an Arrow array are null.
33//! This is called a "validity bitmap" in the Arrow documentation.
34//! This file is adapted from [arrow-rs](https://github.com/apache/arrow-rs)
35
36// allow `zip` for performance reasons
37#![allow(clippy::disallowed_methods)]
38
39use std::borrow::Borrow;
40use std::iter::{self, TrustedLen};
41use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, Not, Range, RangeInclusive};
42
43use risingwave_common_estimate_size::EstimateSize;
44use risingwave_pb::common::PbBuffer;
45use risingwave_pb::common::buffer::CompressionType;
46use rw_iter_util::ZipEqFast;
47
48#[derive(Default, Debug, Clone, EstimateSize)]
49pub struct BitmapBuilder {
50    len: usize,
51    data: Vec<usize>,
52}
53
54const BITS: usize = usize::BITS as usize;
55
56impl From<Bitmap> for BitmapBuilder {
57    fn from(bitmap: Bitmap) -> Self {
58        let len = bitmap.len();
59        if let Some(bits) = bitmap.bits {
60            BitmapBuilder {
61                len,
62                data: bits.into_vec(),
63            }
64        } else if bitmap.count_ones == 0 {
65            Self::zeroed(bitmap.len())
66        } else {
67            debug_assert!(bitmap.len() == bitmap.count_ones());
68            Self::filled(bitmap.len())
69        }
70    }
71}
72
73impl BitmapBuilder {
74    /// Creates a new empty bitmap with at least the specified capacity.
75    pub fn with_capacity(capacity: usize) -> BitmapBuilder {
76        BitmapBuilder {
77            len: 0,
78            data: Vec::with_capacity(Bitmap::vec_len(capacity)),
79        }
80    }
81
82    /// Creates a new bitmap with all bits set to 0.
83    pub fn zeroed(len: usize) -> BitmapBuilder {
84        BitmapBuilder {
85            len,
86            data: vec![0; Bitmap::vec_len(len)],
87        }
88    }
89
90    /// Creates a new bitmap with all bits set to 1.
91    pub fn filled(len: usize) -> BitmapBuilder {
92        let vec_len = Bitmap::vec_len(len);
93        let mut data = vec![usize::MAX; vec_len];
94        if vec_len >= 1 && !len.is_multiple_of(BITS) {
95            data[vec_len - 1] = (1 << (len % BITS)) - 1;
96        }
97        BitmapBuilder { len, data }
98    }
99
100    /// Writes a new value into a single bit.
101    pub fn set(&mut self, n: usize, val: bool) {
102        assert!(n < self.len);
103
104        let byte = &mut self.data[n / BITS];
105        let mask = 1 << (n % BITS);
106        if val {
107            *byte |= mask;
108        } else {
109            *byte &= !mask;
110        }
111    }
112
113    /// Tests a single bit.
114    pub fn is_set(&self, n: usize) -> bool {
115        assert!(n < self.len);
116        let byte = &self.data[n / BITS];
117        let mask = 1 << (n % BITS);
118        *byte & mask != 0
119    }
120
121    /// Appends a single bit to the back.
122    pub fn append(&mut self, bit_set: bool) -> &mut Self {
123        if self.len.is_multiple_of(BITS) {
124            self.data.push(0);
125        }
126        self.data[self.len / BITS] |= (bit_set as usize) << (self.len % BITS);
127        self.len += 1;
128        self
129    }
130
131    /// Appends `n` bits to the back.
132    pub fn append_n(&mut self, mut n: usize, bit_set: bool) -> &mut Self {
133        while n != 0 && !self.len.is_multiple_of(BITS) {
134            self.append(bit_set);
135            n -= 1;
136        }
137        self.len += n;
138        self.data.resize(
139            Bitmap::vec_len(self.len),
140            if bit_set { usize::MAX } else { 0 },
141        );
142        if bit_set && !self.len.is_multiple_of(BITS) {
143            // remove tailing 1s
144            *self.data.last_mut().unwrap() &= (1 << (self.len % BITS)) - 1;
145        }
146        self
147    }
148
149    /// Removes the last bit.
150    pub fn pop(&mut self) -> Option<()> {
151        if self.len == 0 {
152            return None;
153        }
154        self.len -= 1;
155        self.data.truncate(Bitmap::vec_len(self.len));
156        if !self.len.is_multiple_of(BITS) {
157            *self.data.last_mut().unwrap() &= (1 << (self.len % BITS)) - 1;
158        }
159        Some(())
160    }
161
162    /// Appends a bitmap to the back.
163    pub fn append_bitmap(&mut self, other: &Bitmap) -> &mut Self {
164        if self.len.is_multiple_of(BITS) {
165            // fast path: self is aligned
166            self.len += other.len();
167            if let Some(bits) = &other.bits {
168                // append the bytes
169                self.data.extend_from_slice(bits);
170            } else if other.count_ones == 0 {
171                // append 0s
172                self.data.resize(Bitmap::vec_len(self.len), 0);
173            } else {
174                // append 1s
175                self.data.resize(Bitmap::vec_len(self.len), usize::MAX);
176                if !self.len.is_multiple_of(BITS) {
177                    // remove tailing 1s
178                    *self.data.last_mut().unwrap() = (1 << (self.len % BITS)) - 1;
179                }
180            }
181        } else {
182            // slow path: append bits one by one
183            for bit in other.iter() {
184                self.append(bit);
185            }
186        }
187        self
188    }
189
190    /// Finishes building and returns the bitmap.
191    pub fn finish(self) -> Bitmap {
192        let count_ones = self.data.iter().map(|&x| x.count_ones()).sum::<u32>() as usize;
193        Bitmap {
194            num_bits: self.len(),
195            count_ones,
196            bits: (count_ones != 0 && count_ones != self.len).then(|| self.data.into()),
197        }
198    }
199
200    /// Returns the number of bits in the bitmap.
201    pub fn len(&self) -> usize {
202        self.len
203    }
204
205    /// Returns `true` if the bitmap has a length of 0.
206    pub fn is_empty(&self) -> bool {
207        self.len == 0
208    }
209}
210
211/// An immutable bitmap. Use [`BitmapBuilder`] to build it.
212#[derive(Clone, PartialEq, Eq)]
213pub struct Bitmap {
214    /// The useful bits in the bitmap. The total number of bits will usually
215    /// be larger than the useful bits due to byte-padding.
216    num_bits: usize,
217
218    /// The number of high bits in the bitmap.
219    count_ones: usize,
220
221    /// Bits are stored in a compact form.
222    /// They are packed into `usize`s.
223    ///
224    /// Optimization: If all bits are set to 0 or 1, this field MUST be `None`.
225    bits: Option<Box<[usize]>>,
226}
227
228impl EstimateSize for Bitmap {
229    fn estimated_heap_size(&self) -> usize {
230        match &self.bits {
231            Some(bits) => std::mem::size_of_val(bits.as_ref()),
232            None => 0,
233        }
234    }
235}
236
237impl std::fmt::Debug for Bitmap {
238    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
239        for data in self.iter() {
240            write!(f, "{}", data as u8)?;
241        }
242        Ok(())
243    }
244}
245
246impl Bitmap {
247    /// Creates a new bitmap with all bits set to 0.
248    pub fn zeros(num_bits: usize) -> Self {
249        Self {
250            bits: None,
251            num_bits,
252            count_ones: 0,
253        }
254    }
255
256    /// Creates a new bitmap with all bits set to 1.
257    pub fn ones(num_bits: usize) -> Self {
258        Self {
259            bits: None,
260            num_bits,
261            count_ones: num_bits,
262        }
263    }
264
265    /// Creates a new bitmap from vector.
266    fn from_vec_with_len(buf: Vec<usize>, num_bits: usize) -> Self {
267        debug_assert_eq!(buf.len(), Self::vec_len(num_bits));
268        let count_ones = buf.iter().map(|&x| x.count_ones()).sum::<u32>() as usize;
269        debug_assert!(count_ones <= num_bits);
270        Self {
271            bits: (count_ones != 0 && count_ones != num_bits).then(|| buf.into()),
272            num_bits,
273            count_ones,
274        }
275    }
276
277    /// Creates a new bitmap from bytes.
278    pub fn from_bytes(buf: &[u8]) -> Self {
279        Self::from_bytes_with_len(buf, buf.len() * 8)
280    }
281
282    /// Creates a new bitmap from the indices of bits set to 1.
283    pub fn from_indices<I, T>(num_bits: usize, ones: I) -> Self
284    where
285        I: IntoIterator<Item = T>,
286        T: Borrow<usize>,
287    {
288        let mut builder = BitmapBuilder::zeroed(num_bits);
289
290        for idx in ones {
291            let idx = *idx.borrow();
292            debug_assert!(idx < num_bits);
293            builder.set(idx, true);
294        }
295
296        builder.finish()
297    }
298
299    /// Creates a new bitmap from bytes and length.
300    fn from_bytes_with_len(buf: &[u8], num_bits: usize) -> Self {
301        let mut bits = Vec::with_capacity(Self::vec_len(num_bits));
302        let slice = unsafe {
303            bits.set_len(bits.capacity());
304            std::slice::from_raw_parts_mut(bits.as_ptr() as *mut u8, bits.len() * (BITS / 8))
305        };
306        slice[..buf.len()].copy_from_slice(buf);
307        slice[buf.len()..].fill(0);
308        Self::from_vec_with_len(bits, num_bits)
309    }
310
311    /// Creates a new bitmap from a slice of `bool`.
312    pub fn from_bool_slice(bools: &[bool]) -> Self {
313        // use SIMD to speed up
314        let mut iter = bools.iter().copied().array_chunks::<BITS>();
315        let mut bits = Vec::with_capacity(Self::vec_len(bools.len()));
316        for chunk in iter.by_ref() {
317            let bitmask = std::simd::Mask::<i8, BITS>::from_array(chunk).to_bitmask() as usize;
318            bits.push(bitmask);
319        }
320        if let Some(remainder_iter) = iter.into_remainder()
321            && !remainder_iter.is_empty()
322        {
323            let mut bitmask = 0;
324            for (i, b) in remainder_iter.enumerate() {
325                bitmask |= (b as usize) << i;
326            }
327            bits.push(bitmask);
328        }
329        Self::from_vec_with_len(bits, bools.len())
330    }
331
332    /// Return the next set bit index on or after `bit_idx`.
333    pub fn next_set_bit(&self, bit_idx: usize) -> Option<usize> {
334        (bit_idx..self.len()).find(|&idx| unsafe { self.is_set_unchecked(idx) })
335    }
336
337    /// Counts the number of bits set to 1.
338    pub fn count_ones(&self) -> usize {
339        self.count_ones
340    }
341
342    /// Returns true if any bit is set to 1.
343    pub fn any(&self) -> bool {
344        self.count_ones != 0
345    }
346
347    /// Returns the length of vector to store `num_bits` bits.
348    fn vec_len(num_bits: usize) -> usize {
349        num_bits.div_ceil(BITS)
350    }
351
352    /// Returns the number of valid bits in the bitmap,
353    /// also referred to its 'length'.
354    #[inline]
355    pub fn len(&self) -> usize {
356        self.num_bits
357    }
358
359    /// Returns true if the `Bitmap` has a length of 0.
360    pub fn is_empty(&self) -> bool {
361        self.num_bits == 0
362    }
363
364    /// Returns true if the bit at `idx` is set, without doing bounds checking.
365    ///
366    /// # Safety
367    ///
368    /// Index must be in range.
369    pub unsafe fn is_set_unchecked(&self, idx: usize) -> bool {
370        unsafe {
371            match &self.bits {
372                None => self.count_ones != 0,
373                Some(bits) => bits.get_unchecked(idx / BITS) & (1 << (idx % BITS)) != 0,
374            }
375        }
376    }
377
378    /// Returns true if the bit at `idx` is set.
379    pub fn is_set(&self, idx: usize) -> bool {
380        assert!(idx < self.len());
381        unsafe { self.is_set_unchecked(idx) }
382    }
383
384    /// Tests if every bit is set to 1.
385    pub fn all(&self) -> bool {
386        self.count_ones == self.len()
387    }
388
389    /// Produces an iterator over each bit.
390    pub fn iter(&self) -> BitmapIter<'_> {
391        BitmapIter {
392            bits: self.bits.as_ref().map(|s| &s[..]),
393            idx: 0,
394            num_bits: self.num_bits,
395            current_usize: 0,
396            all_ones: self.count_ones == self.num_bits,
397        }
398    }
399
400    /// Enumerates the index of each bit set to 1.
401    pub fn iter_ones(&self) -> BitmapOnesIter<'_> {
402        if let Some(bits) = &self.bits {
403            BitmapOnesIter::Buffer {
404                bits: &bits[..],
405                cur_idx: 0,
406                cur_bits: bits.first().cloned(),
407            }
408        } else {
409            // all zeros or all ones
410            BitmapOnesIter::Range {
411                start: 0,
412                end: self.count_ones,
413            }
414        }
415    }
416
417    /// Returns an iterator which yields the position ranges of continuous high bits.
418    pub fn high_ranges(&self) -> impl Iterator<Item = RangeInclusive<usize>> + '_ {
419        let mut start = None;
420
421        self.iter()
422            .chain(iter::once(false))
423            .enumerate()
424            .filter_map(move |(i, bit)| match (bit, start) {
425                // A new high range starts.
426                (true, None) => {
427                    start = Some(i);
428                    None
429                }
430                // The current high range ends.
431                (false, Some(s)) => {
432                    start = None;
433                    Some(s..=(i - 1))
434                }
435                _ => None,
436            })
437    }
438
439    #[cfg(test)]
440    fn assert_valid(&self) {
441        assert_eq!(
442            self.iter().map(|x| x as usize).sum::<usize>(),
443            self.count_ones
444        )
445    }
446
447    /// Creates a new bitmap with all bits in range set to 1.
448    ///
449    /// # Example
450    /// ```
451    /// use risingwave_common::bitmap::Bitmap;
452    /// let bitmap = Bitmap::from_range(200, 100..180);
453    /// assert_eq!(bitmap.count_ones(), 80);
454    /// for i in 0..200 {
455    ///     assert_eq!(bitmap.is_set(i), i >= 100 && i < 180);
456    /// }
457    /// ```
458    pub fn from_range(num_bits: usize, range: Range<usize>) -> Self {
459        assert!(range.start <= range.end);
460        assert!(range.end <= num_bits);
461        if range.start == range.end {
462            return Self::zeros(num_bits);
463        } else if range == (0..num_bits) {
464            return Self::ones(num_bits);
465        }
466        let mut bits = vec![0; Self::vec_len(num_bits)];
467        let start = range.start / BITS;
468        let end = range.end / BITS;
469        let start_offset = range.start % BITS;
470        let end_offset = range.end % BITS;
471        if start == end {
472            bits[start] = ((1 << (end_offset - start_offset)) - 1) << start_offset;
473        } else {
474            bits[start] = !0 << start_offset;
475            bits[start + 1..end].fill(!0);
476            if end_offset != 0 {
477                bits[end] = (1 << end_offset) - 1;
478            }
479        }
480        Self {
481            bits: Some(bits.into()),
482            num_bits,
483            count_ones: range.len(),
484        }
485    }
486}
487
488impl From<usize> for Bitmap {
489    fn from(val: usize) -> Self {
490        Self::ones(val)
491    }
492}
493
494impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
495    type Output = Bitmap;
496
497    fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
498        assert_eq!(self.num_bits, rhs.num_bits);
499        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
500            _ if self.count_ones == 0 || rhs.count_ones == 0 => {
501                return Bitmap::zeros(self.num_bits);
502            }
503            (_, None) => return self.clone(),
504            (None, _) => return rhs.clone(),
505            (Some(lbits), Some(rbits)) => (lbits, rbits),
506        };
507        let bits = (lbits.iter().zip(rbits.iter()))
508            .map(|(&a, &b)| a & b)
509            .collect();
510        Bitmap::from_vec_with_len(bits, self.num_bits)
511    }
512}
513
514impl BitAnd<Bitmap> for &Bitmap {
515    type Output = Bitmap;
516
517    fn bitand(self, rhs: Bitmap) -> Self::Output {
518        self.bitand(&rhs)
519    }
520}
521
522impl<'b> BitAnd<&'b Bitmap> for Bitmap {
523    type Output = Bitmap;
524
525    fn bitand(self, rhs: &'b Bitmap) -> Self::Output {
526        rhs.bitand(self)
527    }
528}
529
530impl BitAnd for Bitmap {
531    type Output = Bitmap;
532
533    fn bitand(self, rhs: Bitmap) -> Self::Output {
534        (&self).bitand(&rhs)
535    }
536}
537
538impl BitAndAssign<&Bitmap> for Bitmap {
539    fn bitand_assign(&mut self, rhs: &Bitmap) {
540        *self = &*self & rhs;
541    }
542}
543
544impl BitAndAssign<Bitmap> for Bitmap {
545    fn bitand_assign(&mut self, rhs: Bitmap) {
546        *self = &*self & rhs;
547    }
548}
549
550impl<'b> BitOr<&'b Bitmap> for &Bitmap {
551    type Output = Bitmap;
552
553    fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
554        assert_eq!(self.num_bits, rhs.num_bits);
555        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
556            _ if self.count_ones == self.num_bits || rhs.count_ones == self.num_bits => {
557                return Bitmap::ones(self.num_bits);
558            }
559            (_, None) => return self.clone(),
560            (None, _) => return rhs.clone(),
561            (Some(lbits), Some(rbits)) => (lbits, rbits),
562        };
563        let bits = (lbits.iter().zip(rbits.iter()))
564            .map(|(&a, &b)| a | b)
565            .collect();
566        Bitmap::from_vec_with_len(bits, self.num_bits)
567    }
568}
569
570impl BitOr<Bitmap> for &Bitmap {
571    type Output = Bitmap;
572
573    fn bitor(self, rhs: Bitmap) -> Self::Output {
574        self.bitor(&rhs)
575    }
576}
577
578impl<'b> BitOr<&'b Bitmap> for Bitmap {
579    type Output = Bitmap;
580
581    fn bitor(self, rhs: &'b Bitmap) -> Self::Output {
582        rhs.bitor(self)
583    }
584}
585
586impl BitOr for Bitmap {
587    type Output = Bitmap;
588
589    fn bitor(self, rhs: Bitmap) -> Self::Output {
590        (&self).bitor(&rhs)
591    }
592}
593
594impl BitOrAssign<&Bitmap> for Bitmap {
595    fn bitor_assign(&mut self, rhs: &Bitmap) {
596        *self = &*self | rhs;
597    }
598}
599
600impl BitOrAssign<Bitmap> for Bitmap {
601    fn bitor_assign(&mut self, rhs: Bitmap) {
602        *self |= &rhs;
603    }
604}
605
606impl BitXor for &Bitmap {
607    type Output = Bitmap;
608
609    fn bitxor(self, rhs: &Bitmap) -> Self::Output {
610        assert_eq!(self.num_bits, rhs.num_bits);
611        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
612            (_, None) if rhs.count_ones == 0 => return self.clone(),
613            (_, None) => return !self,
614            (None, _) if self.count_ones == 0 => return rhs.clone(),
615            (None, _) => return !rhs,
616            (Some(lbits), Some(rbits)) => (lbits, rbits),
617        };
618        let bits = (lbits.iter().zip(rbits.iter()))
619            .map(|(&a, &b)| a ^ b)
620            .collect();
621        Bitmap::from_vec_with_len(bits, self.num_bits)
622    }
623}
624
625impl Not for &Bitmap {
626    type Output = Bitmap;
627
628    fn not(self) -> Self::Output {
629        let bits = match &self.bits {
630            None if self.count_ones == 0 => return Bitmap::ones(self.num_bits),
631            None => return Bitmap::zeros(self.num_bits),
632            Some(bits) => bits,
633        };
634        let mut bits: Box<[usize]> = bits.iter().map(|b| !b).collect();
635        if !self.num_bits.is_multiple_of(BITS) {
636            bits[self.num_bits / BITS] &= (1 << (self.num_bits % BITS)) - 1;
637        }
638        Bitmap {
639            num_bits: self.num_bits,
640            count_ones: self.num_bits - self.count_ones,
641            bits: Some(bits),
642        }
643    }
644}
645
646impl Not for Bitmap {
647    type Output = Bitmap;
648
649    fn not(mut self) -> Self::Output {
650        let bits = match &mut self.bits {
651            None if self.count_ones == 0 => return Bitmap::ones(self.num_bits),
652            None => return Bitmap::zeros(self.num_bits),
653            Some(bits) => bits,
654        };
655        bits.iter_mut().for_each(|x| *x = !*x);
656        if !self.num_bits.is_multiple_of(BITS) {
657            bits[self.num_bits / BITS] &= (1 << (self.num_bits % BITS)) - 1;
658        }
659        self.count_ones = self.num_bits - self.count_ones;
660        self
661    }
662}
663
664impl FromIterator<bool> for Bitmap {
665    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
666        let vec = iter.into_iter().collect::<Vec<_>>();
667        Self::from_bool_slice(&vec)
668    }
669}
670
671impl FromIterator<Option<bool>> for Bitmap {
672    fn from_iter<T: IntoIterator<Item = Option<bool>>>(iter: T) -> Self {
673        iter.into_iter().map(|b| b.unwrap_or(false)).collect()
674    }
675}
676
677impl Bitmap {
678    pub fn to_protobuf(&self) -> PbBuffer {
679        let body_len = self.num_bits.div_ceil(8) + 1;
680        let mut body = Vec::with_capacity(body_len);
681        body.push((self.num_bits % 8) as u8);
682        match &self.bits {
683            None if self.count_ones == 0 => body.resize(body_len, 0),
684            None => {
685                body.resize(body_len, u8::MAX);
686                if !self.num_bits.is_multiple_of(8) {
687                    body[body_len - 1] = (1 << (self.num_bits % 8)) - 1;
688                }
689            }
690            Some(bits) => {
691                body.extend_from_slice(unsafe {
692                    std::slice::from_raw_parts(
693                        bits.as_ptr() as *const u8,
694                        self.num_bits.div_ceil(8),
695                    )
696                });
697            }
698        }
699        PbBuffer {
700            body,
701            compression: CompressionType::None as i32,
702        }
703    }
704}
705
706impl From<&PbBuffer> for Bitmap {
707    fn from(buf: &PbBuffer) -> Self {
708        let last_byte_num_bits = buf.body[0];
709        let num_bits = ((buf.body.len() - 1) * 8) - ((8 - last_byte_num_bits) % 8) as usize;
710        Self::from_bytes_with_len(&buf.body[1..], num_bits)
711    }
712}
713
714impl From<PbBuffer> for Bitmap {
715    fn from(buf: PbBuffer) -> Self {
716        Self::from(&buf)
717    }
718}
719
720/// Bitmap iterator.
721pub struct BitmapIter<'a> {
722    bits: Option<&'a [usize]>,
723    idx: usize,
724    num_bits: usize,
725    current_usize: usize,
726    all_ones: bool,
727}
728
729impl BitmapIter<'_> {
730    fn next_always_load_usize(&mut self) -> Option<bool> {
731        if self.idx >= self.num_bits {
732            return None;
733        }
734        let Some(bits) = &self.bits else {
735            self.idx += 1;
736            return Some(self.all_ones);
737        };
738
739        // Offset of the bit within the usize.
740        let usize_offset = self.idx % BITS;
741
742        // Get the index of usize which the bit is located in
743        let usize_index = self.idx / BITS;
744        self.current_usize = unsafe { *bits.get_unchecked(usize_index) };
745
746        let bit_mask = 1 << usize_offset;
747        let bit_flag = self.current_usize & bit_mask != 0;
748        self.idx += 1;
749        Some(bit_flag)
750    }
751}
752
753impl iter::Iterator for BitmapIter<'_> {
754    type Item = bool;
755
756    fn next(&mut self) -> Option<Self::Item> {
757        if self.idx >= self.num_bits {
758            return None;
759        }
760        let Some(bits) = &self.bits else {
761            self.idx += 1;
762            return Some(self.all_ones);
763        };
764
765        // Offset of the bit within the usize.
766        let usize_offset = self.idx % BITS;
767
768        if usize_offset == 0 {
769            // Get the index of usize which the bit is located in
770            let usize_index = self.idx / BITS;
771            self.current_usize = unsafe { *bits.get_unchecked(usize_index) };
772        }
773
774        let bit_mask = 1 << usize_offset;
775
776        let bit_flag = self.current_usize & bit_mask != 0;
777        self.idx += 1;
778        Some(bit_flag)
779    }
780
781    fn size_hint(&self) -> (usize, Option<usize>) {
782        let remaining = self.num_bits - self.idx;
783        (remaining, Some(remaining))
784    }
785
786    fn nth(&mut self, n: usize) -> Option<Self::Item> {
787        self.idx += n;
788        self.next_always_load_usize()
789    }
790}
791
792impl ExactSizeIterator for BitmapIter<'_> {}
793unsafe impl TrustedLen for BitmapIter<'_> {}
794
795pub enum BitmapOnesIter<'a> {
796    Range {
797        start: usize,
798        end: usize,
799    },
800    Buffer {
801        bits: &'a [usize],
802        cur_idx: usize,
803        cur_bits: Option<usize>,
804    },
805}
806
807impl iter::Iterator for BitmapOnesIter<'_> {
808    type Item = usize;
809
810    fn next(&mut self) -> Option<Self::Item> {
811        match self {
812            BitmapOnesIter::Range { start, end } => {
813                let i = *start;
814                if i >= *end {
815                    None
816                } else {
817                    *start += 1;
818                    Some(i)
819                }
820            }
821            BitmapOnesIter::Buffer {
822                bits,
823                cur_idx,
824                cur_bits,
825            } => {
826                while *cur_bits == Some(0) {
827                    *cur_idx += 1;
828                    *cur_bits = if *cur_idx >= bits.len() {
829                        None
830                    } else {
831                        Some(bits[*cur_idx])
832                    };
833                }
834                cur_bits.map(|bits| {
835                    let low_bit = bits & bits.wrapping_neg();
836                    let low_bit_idx = bits.trailing_zeros();
837                    *cur_bits = Some(bits ^ low_bit);
838                    *cur_idx * BITS + low_bit_idx as usize
839                })
840            }
841        }
842    }
843
844    fn size_hint(&self) -> (usize, Option<usize>) {
845        match self {
846            BitmapOnesIter::Range { start, end } => {
847                let remaining = end - start;
848                (remaining, Some(remaining))
849            }
850            BitmapOnesIter::Buffer { bits, .. } => (0, Some(bits.len() * usize::BITS as usize)),
851        }
852    }
853}
854
855pub trait FilterByBitmap: ExactSizeIterator + Sized {
856    fn filter_by_bitmap(self, bitmap: &Bitmap) -> impl Iterator<Item = Self::Item> {
857        self.zip_eq_fast(bitmap.iter())
858            .filter_map(|(item, bit)| bit.then_some(item))
859    }
860}
861
862impl<T> FilterByBitmap for T where T: ExactSizeIterator {}
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867
868    #[test]
869    fn test_bitmap_builder() {
870        let bitmap1 = {
871            let mut builder = BitmapBuilder::default();
872            let bits = [
873                false, true, true, false, true, false, true, false, true, false, true, true, false,
874                true, false, true,
875            ];
876            for bit in bits {
877                builder.append(bit);
878            }
879            for (idx, bit) in bits.iter().enumerate() {
880                assert_eq!(builder.is_set(idx), *bit);
881            }
882            builder.finish()
883        };
884        let byte1 = 0b0101_0110_u8;
885        let byte2 = 0b1010_1101_u8;
886        let expected = Bitmap::from_bytes(&[byte1, byte2]);
887        assert_eq!(bitmap1, expected);
888        assert_eq!(
889            bitmap1.count_ones(),
890            (byte1.count_ones() + byte2.count_ones()) as usize
891        );
892
893        let bitmap2 = {
894            let mut builder = BitmapBuilder::default();
895            let bits = [false, true, true, false, true, false, true, false];
896            for bit in bits {
897                builder.append(bit);
898            }
899            for (idx, bit) in bits.iter().enumerate() {
900                assert_eq!(builder.is_set(idx), *bit);
901            }
902            builder.finish()
903        };
904        let byte1 = 0b0101_0110_u8;
905        let expected = Bitmap::from_bytes(&[byte1]);
906        assert_eq!(bitmap2, expected);
907    }
908
909    #[test]
910    fn test_builder_append_bitmap() {
911        let mut builder = BitmapBuilder::default();
912        builder.append_bitmap(&Bitmap::zeros(64));
913        builder.append_bitmap(&Bitmap::ones(64));
914        builder.append_bitmap(&Bitmap::from_bytes(&[0b1010_1010]));
915        builder.append_bitmap(&Bitmap::zeros(8));
916        builder.append_bitmap(&Bitmap::ones(8));
917        let bitmap = builder.finish();
918        assert_eq!(
919            bitmap,
920            Bitmap::from_vec_with_len(
921                vec![0, usize::MAX, 0b1111_1111_0000_0000_1010_1010],
922                64 + 64 + 8 + 8 + 8
923            ),
924        );
925    }
926
927    #[test]
928    fn test_bitmap_get_size() {
929        let bitmap1 = {
930            let mut builder = BitmapBuilder::default();
931            let bits = [
932                false, true, true, false, true, false, true, false, true, false, true, true, false,
933                true, false, true,
934            ];
935            for bit in bits {
936                builder.append(bit);
937            }
938            for (idx, bit) in bits.iter().enumerate() {
939                assert_eq!(builder.is_set(idx), *bit);
940            }
941            builder.finish()
942        };
943        assert_eq!(8, bitmap1.estimated_heap_size());
944        assert_eq!(40, bitmap1.estimated_size());
945    }
946
947    #[test]
948    fn test_bitmap_all_high() {
949        let num_bits = 3;
950        let bitmap = Bitmap::ones(num_bits);
951        assert_eq!(bitmap.len(), num_bits);
952        assert!(bitmap.all());
953        for i in 0..num_bits {
954            assert!(bitmap.is_set(i));
955        }
956        // Test to and from protobuf is OK.
957        assert_eq!(bitmap, Bitmap::from(&bitmap.to_protobuf()));
958    }
959
960    #[test]
961    fn test_bitwise_and() {
962        #[rustfmt::skip]
963        let cases = [(
964            vec![],
965            vec![],
966            vec![],
967        ), (
968            vec![0, 1, 1, 0, 1, 0, 1, 0],
969            vec![0, 1, 0, 0, 1, 1, 1, 0],
970            vec![0, 1, 0, 0, 1, 0, 1, 0],
971        ), (
972            vec![0, 1, 0, 1, 0],
973            vec![1, 1, 1, 1, 0],
974            vec![0, 1, 0, 1, 0],
975        )];
976
977        for (input1, input2, expected) in cases {
978            let bitmap1: Bitmap = input1.into_iter().map(|x| x != 0).collect();
979            let bitmap2: Bitmap = input2.into_iter().map(|x| x != 0).collect();
980            let res = bitmap1 & bitmap2;
981            res.assert_valid();
982            assert_eq!(res.iter().map(|x| x as i32).collect::<Vec<_>>(), expected,);
983        }
984    }
985
986    #[test]
987    fn test_bitwise_not() {
988        #[rustfmt::skip]
989        let cases = [(
990            vec![1, 0, 1, 0, 1],
991            vec![0, 1, 0, 1, 0],
992        ), (
993            vec![],
994            vec![],
995        ), (
996            vec![1, 0, 1, 1, 0, 0, 1, 1],
997            vec![0, 1, 0, 0, 1, 1, 0, 0],
998        ), (
999            vec![1, 0, 0, 1, 1, 1, 0, 0, 1, 0],
1000            vec![0, 1, 1, 0, 0, 0, 1, 1, 0, 1],
1001        )];
1002
1003        for (input, expected) in cases {
1004            let bitmap: Bitmap = input.into_iter().map(|x| x != 0).collect();
1005            let res = !bitmap;
1006            res.assert_valid();
1007            assert_eq!(res.iter().map(|x| x as i32).collect::<Vec<_>>(), expected);
1008        }
1009    }
1010
1011    #[test]
1012    fn test_bitwise_or() {
1013        let bitmap1 = Bitmap::from_bytes(&[0b01101010]);
1014        let bitmap2 = Bitmap::from_bytes(&[0b01001110]);
1015        assert_eq!(Bitmap::from_bytes(&[0b01101110]), (bitmap1 | bitmap2));
1016    }
1017
1018    #[test]
1019    fn test_bitmap_is_set() {
1020        let bitmap = Bitmap::from_bytes(&[0b01001010]);
1021        assert!(!bitmap.is_set(0));
1022        assert!(bitmap.is_set(1));
1023        assert!(!bitmap.is_set(2));
1024        assert!(bitmap.is_set(3));
1025        assert!(!bitmap.is_set(4));
1026        assert!(!bitmap.is_set(5));
1027        assert!(bitmap.is_set(6));
1028        assert!(!bitmap.is_set(7));
1029    }
1030
1031    #[test]
1032    fn test_bitmap_iter() {
1033        {
1034            let bitmap = Bitmap::from_bytes(&[0b01001010]);
1035            let mut booleans = vec![];
1036            for b in bitmap.iter() {
1037                booleans.push(b as u8);
1038            }
1039            assert_eq!(booleans, vec![0u8, 1, 0, 1, 0, 0, 1, 0]);
1040        }
1041        {
1042            let bitmap: Bitmap = vec![true; 5].into_iter().collect();
1043            for b in bitmap.iter() {
1044                assert!(b);
1045            }
1046        }
1047    }
1048
1049    #[test]
1050    fn test_bitmap_high_ranges_iter() {
1051        fn test(bits: impl IntoIterator<Item = bool>, expected: Vec<RangeInclusive<usize>>) {
1052            let bitmap = Bitmap::from_iter(bits);
1053            let high_ranges = bitmap.high_ranges().collect::<Vec<_>>();
1054            assert_eq!(high_ranges, expected);
1055        }
1056
1057        test(
1058            vec![
1059                true, true, true, false, false, true, true, true, false, true,
1060            ],
1061            vec![0..=2, 5..=7, 9..=9],
1062        );
1063        test(vec![true, true, true], vec![0..=2]);
1064        test(vec![false, false, false], vec![]);
1065    }
1066
1067    #[test]
1068    fn test_bitmap_from_protobuf() {
1069        let bitmap_bytes = vec![3u8 /* len % BITS */, 0b0101_0010, 0b110];
1070        let buf = PbBuffer {
1071            body: bitmap_bytes,
1072            compression: CompressionType::None as _,
1073        };
1074        let bitmap: Bitmap = (&buf).into();
1075        let actual_bytes: Vec<u8> = bitmap.iter().map(|b| b as u8).collect();
1076
1077        assert_eq!(actual_bytes, vec![0, 1, 0, 0, 1, 0, 1, 0, /*  */ 0, 1, 1]); // in reverse order
1078        assert_eq!(bitmap.count_ones(), 5);
1079    }
1080
1081    #[test]
1082    fn test_bitmap_from_buffer() {
1083        let byte1 = 0b0110_1010_u8;
1084        let byte2 = 0b1011_0101_u8;
1085        let bitmap = Bitmap::from_bytes(&[0b0110_1010, 0b1011_0101]);
1086        let expected = Bitmap::from_iter(vec![
1087            false, true, false, true, false, true, true, false, true, false, true, false, true,
1088            true, false, true,
1089        ]);
1090        let count_ones = (byte1.count_ones() + byte2.count_ones()) as usize;
1091        assert_eq!(expected, bitmap);
1092        assert_eq!(bitmap.count_ones(), count_ones);
1093        assert_eq!(expected.count_ones(), count_ones);
1094    }
1095
1096    #[test]
1097    fn test_bitmap_eq() {
1098        let b1: Bitmap = Bitmap::zeros(3);
1099        let b2: Bitmap = Bitmap::zeros(5);
1100        assert_ne!(b1, b2);
1101
1102        let b1: Bitmap = [true, false].iter().cycle().cloned().take(10000).collect();
1103        let b2: Bitmap = [true, false].iter().cycle().cloned().take(10000).collect();
1104        assert_eq!(b1, b2);
1105    }
1106
1107    #[test]
1108    fn test_bitmap_set() {
1109        let mut b = BitmapBuilder::zeroed(10);
1110
1111        b.set(0, true);
1112        b.set(7, true);
1113        b.set(8, true);
1114        b.set(9, true);
1115
1116        b.set(7, false);
1117        b.set(8, false);
1118
1119        b.append(true);
1120        assert_eq!(b.len(), 11);
1121
1122        let b = b.finish();
1123        assert_eq!(b.count_ones(), 3);
1124        assert_eq!(&b.bits.unwrap()[..], &[0b0110_0000_0001]);
1125    }
1126
1127    #[test]
1128    fn test_bitmap_pop() {
1129        let mut b = BitmapBuilder::zeroed(7);
1130
1131        {
1132            b.append(true);
1133            assert!(b.is_set(b.len() - 1));
1134            b.pop();
1135            assert!(!b.is_set(b.len() - 1));
1136        }
1137
1138        {
1139            b.append(false);
1140            assert!(!b.is_set(b.len() - 1));
1141            b.pop();
1142            assert!(!b.is_set(b.len() - 1));
1143        }
1144
1145        {
1146            b.append(true);
1147            b.append(false);
1148            assert!(!b.is_set(b.len() - 1));
1149            b.pop();
1150            assert!(b.is_set(b.len() - 1));
1151            b.pop();
1152            assert!(!b.is_set(b.len() - 1));
1153        }
1154    }
1155
1156    #[test]
1157    fn test_bitmap_iter_ones() {
1158        let mut builder = BitmapBuilder::zeroed(1000);
1159        builder.append_n(1000, true);
1160        let bitmap = builder.finish();
1161        let mut iter = bitmap.iter_ones();
1162        for i in 0..1000 {
1163            let item = iter.next();
1164            assert!(item == Some(i + 1000));
1165        }
1166        for _ in 0..5 {
1167            let item = iter.next();
1168            assert!(item.is_none());
1169        }
1170    }
1171
1172    #[test]
1173    fn from_indices_creates_correct_bitmap() {
1174        let bitmap = Bitmap::from_indices(10, [1, 3, 5, 7, 9]);
1175        assert_eq!(bitmap.len(), 10);
1176        for i in [1, 3, 5, 7, 9] {
1177            assert!(bitmap.is_set(i));
1178        }
1179
1180        for i in [0, 2, 4, 6, 8] {
1181            assert!(!bitmap.is_set(i));
1182        }
1183    }
1184
1185    #[test]
1186    fn from_indices_empty_indices_creates_zeroed_bitmap() {
1187        let bitmap = Bitmap::from_indices(10, [] as [usize; 0]);
1188        assert_eq!(bitmap.len(), 10);
1189        assert!(!bitmap.any());
1190    }
1191
1192    #[test]
1193    fn from_indices_out_of_bounds_panics() {
1194        let result = std::panic::catch_unwind(|| {
1195            Bitmap::from_indices(5, [0, 6]);
1196        });
1197        assert!(result.is_err());
1198    }
1199
1200    #[test]
1201    fn from_indices_all_indices_set_creates_filled_bitmap() {
1202        let bitmap = Bitmap::from_indices(5, [0, 1, 2, 3, 4]);
1203        assert_eq!(bitmap.len(), 5);
1204        assert!(bitmap.all());
1205    }
1206}