Skip to main content

risingwave_common/
bitmap.rs

1// Copyright 2024 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Licensed to the Apache Software Foundation (ASF) under one
16// or more contributor license agreements.  See the NOTICE file
17// distributed with this work for additional information
18// regarding copyright ownership.  The ASF licenses this file
19// to you under the Apache License, Version 2.0 (the
20// "License"); you may not use this file except in compliance
21// with the License.  You may obtain a copy of the License at
22//
23//   http://www.apache.org/licenses/LICENSE-2.0
24//
25// Unless required by applicable law or agreed to in writing,
26// software distributed under the License is distributed on an
27// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
28// KIND, either express or implied.  See the License for the
29// specific language governing permissions and limitations
30// under the License.
31
32//! Defines a bitmap, which is used to track which values in an Arrow array are null.
33//! This is called a "validity bitmap" in the Arrow documentation.
34//! This file is adapted from [arrow-rs](https://github.com/apache/arrow-rs)
35
36// allow `zip` for performance reasons
37#![allow(clippy::disallowed_methods)]
38
39use std::borrow::Borrow;
40use std::iter::{self, TrustedLen};
41use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, Not, Range, RangeInclusive};
42
43use risingwave_common_estimate_size::EstimateSize;
44use risingwave_pb::common::PbBuffer;
45use risingwave_pb::common::buffer::CompressionType;
46use rw_iter_util::ZipEqFast;
47
48#[derive(Default, Debug, Clone, EstimateSize)]
49pub struct BitmapBuilder {
50    len: usize,
51    data: Vec<usize>,
52}
53
54const BITS: usize = usize::BITS as usize;
55
56impl From<Bitmap> for BitmapBuilder {
57    fn from(bitmap: Bitmap) -> Self {
58        let len = bitmap.len();
59        if let Some(bits) = bitmap.bits {
60            BitmapBuilder {
61                len,
62                data: bits.into_vec(),
63            }
64        } else if bitmap.count_ones == 0 {
65            Self::zeroed(bitmap.len())
66        } else {
67            debug_assert!(bitmap.len() == bitmap.count_ones());
68            Self::filled(bitmap.len())
69        }
70    }
71}
72
73impl BitmapBuilder {
74    /// Creates a new empty bitmap with at least the specified capacity.
75    pub fn with_capacity(capacity: usize) -> BitmapBuilder {
76        BitmapBuilder {
77            len: 0,
78            data: Vec::with_capacity(Bitmap::vec_len(capacity)),
79        }
80    }
81
82    /// Creates a new bitmap with all bits set to 0.
83    pub fn zeroed(len: usize) -> BitmapBuilder {
84        BitmapBuilder {
85            len,
86            data: vec![0; Bitmap::vec_len(len)],
87        }
88    }
89
90    /// Creates a new bitmap with all bits set to 1.
91    pub fn filled(len: usize) -> BitmapBuilder {
92        let vec_len = Bitmap::vec_len(len);
93        let mut data = vec![usize::MAX; vec_len];
94        if vec_len >= 1 && !len.is_multiple_of(BITS) {
95            data[vec_len - 1] = (1 << (len % BITS)) - 1;
96        }
97        BitmapBuilder { len, data }
98    }
99
100    /// Writes a new value into a single bit.
101    pub fn set(&mut self, n: usize, val: bool) {
102        assert!(n < self.len);
103
104        let byte = &mut self.data[n / BITS];
105        let mask = 1 << (n % BITS);
106        if val {
107            *byte |= mask;
108        } else {
109            *byte &= !mask;
110        }
111    }
112
113    /// Tests a single bit.
114    pub fn is_set(&self, n: usize) -> bool {
115        assert!(n < self.len);
116        let byte = &self.data[n / BITS];
117        let mask = 1 << (n % BITS);
118        *byte & mask != 0
119    }
120
121    /// Appends a single bit to the back.
122    pub fn append(&mut self, bit_set: bool) -> &mut Self {
123        if self.len.is_multiple_of(BITS) {
124            self.data.push(0);
125        }
126        self.data[self.len / BITS] |= (bit_set as usize) << (self.len % BITS);
127        self.len += 1;
128        self
129    }
130
131    /// Appends `n` bits to the back.
132    pub fn append_n(&mut self, mut n: usize, bit_set: bool) -> &mut Self {
133        while n != 0 && !self.len.is_multiple_of(BITS) {
134            self.append(bit_set);
135            n -= 1;
136        }
137        self.len += n;
138        self.data.resize(
139            Bitmap::vec_len(self.len),
140            if bit_set { usize::MAX } else { 0 },
141        );
142        if bit_set && !self.len.is_multiple_of(BITS) {
143            // remove tailing 1s
144            *self.data.last_mut().unwrap() &= (1 << (self.len % BITS)) - 1;
145        }
146        self
147    }
148
149    /// Removes the last bit.
150    pub fn pop(&mut self) -> Option<()> {
151        if self.len == 0 {
152            return None;
153        }
154        self.len -= 1;
155        self.data.truncate(Bitmap::vec_len(self.len));
156        if !self.len.is_multiple_of(BITS) {
157            *self.data.last_mut().unwrap() &= (1 << (self.len % BITS)) - 1;
158        }
159        Some(())
160    }
161
162    /// Appends a bitmap to the back.
163    pub fn append_bitmap(&mut self, other: &Bitmap) -> &mut Self {
164        if self.len.is_multiple_of(BITS) {
165            // fast path: self is aligned
166            self.len += other.len();
167            if let Some(bits) = &other.bits {
168                // append the bytes
169                self.data.extend_from_slice(bits);
170            } else if other.count_ones == 0 {
171                // append 0s
172                self.data.resize(Bitmap::vec_len(self.len), 0);
173            } else {
174                // append 1s
175                self.data.resize(Bitmap::vec_len(self.len), usize::MAX);
176                if !self.len.is_multiple_of(BITS) {
177                    // remove tailing 1s
178                    *self.data.last_mut().unwrap() = (1 << (self.len % BITS)) - 1;
179                }
180            }
181        } else {
182            // slow path: append bits one by one
183            for bit in other.iter() {
184                self.append(bit);
185            }
186        }
187        self
188    }
189
190    /// Finishes building and returns the bitmap.
191    pub fn finish(self) -> Bitmap {
192        let count_ones = self.data.iter().map(|&x| x.count_ones()).sum::<u32>() as usize;
193        Bitmap {
194            num_bits: self.len(),
195            count_ones,
196            bits: (count_ones != 0 && count_ones != self.len).then(|| self.data.into()),
197        }
198    }
199
200    /// Returns the number of bits in the bitmap.
201    pub fn len(&self) -> usize {
202        self.len
203    }
204
205    /// Returns `true` if the bitmap has a length of 0.
206    pub fn is_empty(&self) -> bool {
207        self.len == 0
208    }
209}
210
211/// An immutable bitmap. Use [`BitmapBuilder`] to build it.
212#[derive(Clone, PartialEq, Eq)]
213pub struct Bitmap {
214    /// The useful bits in the bitmap. The total number of bits will usually
215    /// be larger than the useful bits due to byte-padding.
216    num_bits: usize,
217
218    /// The number of high bits in the bitmap.
219    count_ones: usize,
220
221    /// Bits are stored in a compact form.
222    /// They are packed into `usize`s.
223    ///
224    /// Optimization: If all bits are set to 0 or 1, this field MUST be `None`.
225    bits: Option<Box<[usize]>>,
226}
227
228impl EstimateSize for Bitmap {
229    fn estimated_heap_size(&self) -> usize {
230        match &self.bits {
231            Some(bits) => std::mem::size_of_val(bits.as_ref()),
232            None => 0,
233        }
234    }
235}
236
237impl std::fmt::Debug for Bitmap {
238    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
239        for data in self.iter() {
240            write!(f, "{}", data as u8)?;
241        }
242        Ok(())
243    }
244}
245
246impl Bitmap {
247    /// Creates a new bitmap with all bits set to 0.
248    pub fn zeros(num_bits: usize) -> Self {
249        Self {
250            bits: None,
251            num_bits,
252            count_ones: 0,
253        }
254    }
255
256    /// Creates a new bitmap with all bits set to 1.
257    pub fn ones(num_bits: usize) -> Self {
258        Self {
259            bits: None,
260            num_bits,
261            count_ones: num_bits,
262        }
263    }
264
265    /// Creates a new bitmap from vector.
266    fn from_vec_with_len(buf: Vec<usize>, num_bits: usize) -> Self {
267        debug_assert_eq!(buf.len(), Self::vec_len(num_bits));
268        let count_ones = buf.iter().map(|&x| x.count_ones()).sum::<u32>() as usize;
269        debug_assert!(count_ones <= num_bits);
270        Self {
271            bits: (count_ones != 0 && count_ones != num_bits).then(|| buf.into()),
272            num_bits,
273            count_ones,
274        }
275    }
276
277    /// Creates a new bitmap from bytes.
278    pub fn from_bytes(buf: &[u8]) -> Self {
279        Self::from_bytes_with_len(buf, buf.len() * 8)
280    }
281
282    /// Creates a new bitmap from the indices of bits set to 1.
283    pub fn from_indices<I, T>(num_bits: usize, ones: I) -> Self
284    where
285        I: IntoIterator<Item = T>,
286        T: Borrow<usize>,
287    {
288        let mut builder = BitmapBuilder::zeroed(num_bits);
289
290        for idx in ones {
291            let idx = *idx.borrow();
292            debug_assert!(idx < num_bits);
293            builder.set(idx, true);
294        }
295
296        builder.finish()
297    }
298
299    /// Creates a new bitmap from bytes and length.
300    fn from_bytes_with_len(buf: &[u8], num_bits: usize) -> Self {
301        let mut bits = Vec::with_capacity(Self::vec_len(num_bits));
302        let slice = unsafe {
303            bits.set_len(bits.capacity());
304            std::slice::from_raw_parts_mut(bits.as_ptr() as *mut u8, bits.len() * (BITS / 8))
305        };
306        slice[..buf.len()].copy_from_slice(buf);
307        slice[buf.len()..].fill(0);
308        Self::from_vec_with_len(bits, num_bits)
309    }
310
311    /// Creates a new bitmap from a slice of `bool`.
312    pub fn from_bool_slice(bools: &[bool]) -> Self {
313        // use SIMD to speed up
314        let mut iter = bools.iter().copied().array_chunks::<BITS>();
315        let mut bits = Vec::with_capacity(Self::vec_len(bools.len()));
316        for chunk in iter.by_ref() {
317            let bitmask = std::simd::Mask::<i8, BITS>::from_array(chunk).to_bitmask() as usize;
318            bits.push(bitmask);
319        }
320        // `ArrayChunks::into_remainder` now returns the remainder iterator directly
321        // (previously `Option<_>`); the full chunks above are already drained via
322        // `by_ref`, so the remainder is simply empty when the length divides evenly.
323        let remainder_iter = iter.into_remainder();
324        if !remainder_iter.is_empty() {
325            let mut bitmask = 0;
326            for (i, b) in remainder_iter.enumerate() {
327                bitmask |= (b as usize) << i;
328            }
329            bits.push(bitmask);
330        }
331        Self::from_vec_with_len(bits, bools.len())
332    }
333
334    /// Return the next set bit index on or after `bit_idx`.
335    pub fn next_set_bit(&self, bit_idx: usize) -> Option<usize> {
336        (bit_idx..self.len()).find(|&idx| unsafe { self.is_set_unchecked(idx) })
337    }
338
339    /// Counts the number of bits set to 1.
340    pub fn count_ones(&self) -> usize {
341        self.count_ones
342    }
343
344    /// Returns true if any bit is set to 1.
345    pub fn any(&self) -> bool {
346        self.count_ones != 0
347    }
348
349    /// Returns the length of vector to store `num_bits` bits.
350    fn vec_len(num_bits: usize) -> usize {
351        num_bits.div_ceil(BITS)
352    }
353
354    /// Returns the number of valid bits in the bitmap,
355    /// also referred to its 'length'.
356    #[inline]
357    pub fn len(&self) -> usize {
358        self.num_bits
359    }
360
361    /// Returns true if the `Bitmap` has a length of 0.
362    pub fn is_empty(&self) -> bool {
363        self.num_bits == 0
364    }
365
366    /// Returns true if the bit at `idx` is set, without doing bounds checking.
367    ///
368    /// # Safety
369    ///
370    /// Index must be in range.
371    pub unsafe fn is_set_unchecked(&self, idx: usize) -> bool {
372        unsafe {
373            match &self.bits {
374                None => self.count_ones != 0,
375                Some(bits) => bits.get_unchecked(idx / BITS) & (1 << (idx % BITS)) != 0,
376            }
377        }
378    }
379
380    /// Returns true if the bit at `idx` is set.
381    pub fn is_set(&self, idx: usize) -> bool {
382        assert!(idx < self.len());
383        unsafe { self.is_set_unchecked(idx) }
384    }
385
386    /// Tests if every bit is set to 1.
387    pub fn all(&self) -> bool {
388        self.count_ones == self.len()
389    }
390
391    /// Produces an iterator over each bit.
392    pub fn iter(&self) -> BitmapIter<'_> {
393        BitmapIter {
394            bits: self.bits.as_ref().map(|s| &s[..]),
395            idx: 0,
396            num_bits: self.num_bits,
397            current_usize: 0,
398            all_ones: self.count_ones == self.num_bits,
399        }
400    }
401
402    /// Enumerates the index of each bit set to 1.
403    pub fn iter_ones(&self) -> BitmapOnesIter<'_> {
404        if let Some(bits) = &self.bits {
405            BitmapOnesIter::Buffer {
406                bits: &bits[..],
407                cur_idx: 0,
408                cur_bits: bits.first().cloned(),
409            }
410        } else {
411            // all zeros or all ones
412            BitmapOnesIter::Range {
413                start: 0,
414                end: self.count_ones,
415            }
416        }
417    }
418
419    /// Returns an iterator which yields the position ranges of continuous high bits.
420    pub fn high_ranges(&self) -> impl Iterator<Item = RangeInclusive<usize>> + '_ {
421        let mut start = None;
422
423        self.iter()
424            .chain(iter::once(false))
425            .enumerate()
426            .filter_map(move |(i, bit)| match (bit, start) {
427                // A new high range starts.
428                (true, None) => {
429                    start = Some(i);
430                    None
431                }
432                // The current high range ends.
433                (false, Some(s)) => {
434                    start = None;
435                    Some(s..=(i - 1))
436                }
437                _ => None,
438            })
439    }
440
441    #[cfg(test)]
442    fn assert_valid(&self) {
443        assert_eq!(
444            self.iter().map(|x| x as usize).sum::<usize>(),
445            self.count_ones
446        )
447    }
448
449    /// Creates a new bitmap with all bits in range set to 1.
450    ///
451    /// # Example
452    /// ```
453    /// use risingwave_common::bitmap::Bitmap;
454    /// let bitmap = Bitmap::from_range(200, 100..180);
455    /// assert_eq!(bitmap.count_ones(), 80);
456    /// for i in 0..200 {
457    ///     assert_eq!(bitmap.is_set(i), i >= 100 && i < 180);
458    /// }
459    /// ```
460    pub fn from_range(num_bits: usize, range: Range<usize>) -> Self {
461        assert!(range.start <= range.end);
462        assert!(range.end <= num_bits);
463        if range.start == range.end {
464            return Self::zeros(num_bits);
465        } else if range == (0..num_bits) {
466            return Self::ones(num_bits);
467        }
468        let mut bits = vec![0; Self::vec_len(num_bits)];
469        let start = range.start / BITS;
470        let end = range.end / BITS;
471        let start_offset = range.start % BITS;
472        let end_offset = range.end % BITS;
473        if start == end {
474            bits[start] = ((1 << (end_offset - start_offset)) - 1) << start_offset;
475        } else {
476            bits[start] = !0 << start_offset;
477            bits[start + 1..end].fill(!0);
478            if end_offset != 0 {
479                bits[end] = (1 << end_offset) - 1;
480            }
481        }
482        Self {
483            bits: Some(bits.into()),
484            num_bits,
485            count_ones: range.len(),
486        }
487    }
488}
489
490impl From<usize> for Bitmap {
491    fn from(val: usize) -> Self {
492        Self::ones(val)
493    }
494}
495
496impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
497    type Output = Bitmap;
498
499    fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
500        assert_eq!(self.num_bits, rhs.num_bits);
501        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
502            _ if self.count_ones == 0 || rhs.count_ones == 0 => {
503                return Bitmap::zeros(self.num_bits);
504            }
505            (_, None) => return self.clone(),
506            (None, _) => return rhs.clone(),
507            (Some(lbits), Some(rbits)) => (lbits, rbits),
508        };
509        let bits = (lbits.iter().zip(rbits.iter()))
510            .map(|(&a, &b)| a & b)
511            .collect();
512        Bitmap::from_vec_with_len(bits, self.num_bits)
513    }
514}
515
516impl BitAnd<Bitmap> for &Bitmap {
517    type Output = Bitmap;
518
519    fn bitand(self, rhs: Bitmap) -> Self::Output {
520        self.bitand(&rhs)
521    }
522}
523
524impl<'b> BitAnd<&'b Bitmap> for Bitmap {
525    type Output = Bitmap;
526
527    fn bitand(self, rhs: &'b Bitmap) -> Self::Output {
528        rhs.bitand(self)
529    }
530}
531
532impl BitAnd for Bitmap {
533    type Output = Bitmap;
534
535    fn bitand(self, rhs: Bitmap) -> Self::Output {
536        (&self).bitand(&rhs)
537    }
538}
539
540impl BitAndAssign<&Bitmap> for Bitmap {
541    fn bitand_assign(&mut self, rhs: &Bitmap) {
542        *self = &*self & rhs;
543    }
544}
545
546impl BitAndAssign<Bitmap> for Bitmap {
547    fn bitand_assign(&mut self, rhs: Bitmap) {
548        *self = &*self & rhs;
549    }
550}
551
552impl<'b> BitOr<&'b Bitmap> for &Bitmap {
553    type Output = Bitmap;
554
555    fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
556        assert_eq!(self.num_bits, rhs.num_bits);
557        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
558            _ if self.count_ones == self.num_bits || rhs.count_ones == self.num_bits => {
559                return Bitmap::ones(self.num_bits);
560            }
561            (_, None) => return self.clone(),
562            (None, _) => return rhs.clone(),
563            (Some(lbits), Some(rbits)) => (lbits, rbits),
564        };
565        let bits = (lbits.iter().zip(rbits.iter()))
566            .map(|(&a, &b)| a | b)
567            .collect();
568        Bitmap::from_vec_with_len(bits, self.num_bits)
569    }
570}
571
572impl BitOr<Bitmap> for &Bitmap {
573    type Output = Bitmap;
574
575    fn bitor(self, rhs: Bitmap) -> Self::Output {
576        self.bitor(&rhs)
577    }
578}
579
580impl<'b> BitOr<&'b Bitmap> for Bitmap {
581    type Output = Bitmap;
582
583    fn bitor(self, rhs: &'b Bitmap) -> Self::Output {
584        rhs.bitor(self)
585    }
586}
587
588impl BitOr for Bitmap {
589    type Output = Bitmap;
590
591    fn bitor(self, rhs: Bitmap) -> Self::Output {
592        (&self).bitor(&rhs)
593    }
594}
595
596impl BitOrAssign<&Bitmap> for Bitmap {
597    fn bitor_assign(&mut self, rhs: &Bitmap) {
598        *self = &*self | rhs;
599    }
600}
601
602impl BitOrAssign<Bitmap> for Bitmap {
603    fn bitor_assign(&mut self, rhs: Bitmap) {
604        *self |= &rhs;
605    }
606}
607
608impl BitXor for &Bitmap {
609    type Output = Bitmap;
610
611    fn bitxor(self, rhs: &Bitmap) -> Self::Output {
612        assert_eq!(self.num_bits, rhs.num_bits);
613        let (lbits, rbits) = match (&self.bits, &rhs.bits) {
614            (_, None) if rhs.count_ones == 0 => return self.clone(),
615            (_, None) => return !self,
616            (None, _) if self.count_ones == 0 => return rhs.clone(),
617            (None, _) => return !rhs,
618            (Some(lbits), Some(rbits)) => (lbits, rbits),
619        };
620        let bits = (lbits.iter().zip(rbits.iter()))
621            .map(|(&a, &b)| a ^ b)
622            .collect();
623        Bitmap::from_vec_with_len(bits, self.num_bits)
624    }
625}
626
627impl Not for &Bitmap {
628    type Output = Bitmap;
629
630    fn not(self) -> Self::Output {
631        let bits = match &self.bits {
632            None if self.count_ones == 0 => return Bitmap::ones(self.num_bits),
633            None => return Bitmap::zeros(self.num_bits),
634            Some(bits) => bits,
635        };
636        let mut bits: Box<[usize]> = bits.iter().map(|b| !b).collect();
637        if !self.num_bits.is_multiple_of(BITS) {
638            bits[self.num_bits / BITS] &= (1 << (self.num_bits % BITS)) - 1;
639        }
640        Bitmap {
641            num_bits: self.num_bits,
642            count_ones: self.num_bits - self.count_ones,
643            bits: Some(bits),
644        }
645    }
646}
647
648impl Not for Bitmap {
649    type Output = Bitmap;
650
651    fn not(mut self) -> Self::Output {
652        let bits = match &mut self.bits {
653            None if self.count_ones == 0 => return Bitmap::ones(self.num_bits),
654            None => return Bitmap::zeros(self.num_bits),
655            Some(bits) => bits,
656        };
657        bits.iter_mut().for_each(|x| *x = !*x);
658        if !self.num_bits.is_multiple_of(BITS) {
659            bits[self.num_bits / BITS] &= (1 << (self.num_bits % BITS)) - 1;
660        }
661        self.count_ones = self.num_bits - self.count_ones;
662        self
663    }
664}
665
666impl FromIterator<bool> for Bitmap {
667    fn from_iter<T: IntoIterator<Item = bool>>(iter: T) -> Self {
668        let vec = iter.into_iter().collect::<Vec<_>>();
669        Self::from_bool_slice(&vec)
670    }
671}
672
673impl FromIterator<Option<bool>> for Bitmap {
674    fn from_iter<T: IntoIterator<Item = Option<bool>>>(iter: T) -> Self {
675        iter.into_iter().map(|b| b.unwrap_or(false)).collect()
676    }
677}
678
679impl Bitmap {
680    pub fn to_protobuf(&self) -> PbBuffer {
681        let body_len = self.num_bits.div_ceil(8) + 1;
682        let mut body = Vec::with_capacity(body_len);
683        body.push((self.num_bits % 8) as u8);
684        match &self.bits {
685            None if self.count_ones == 0 => body.resize(body_len, 0),
686            None => {
687                body.resize(body_len, u8::MAX);
688                if !self.num_bits.is_multiple_of(8) {
689                    body[body_len - 1] = (1 << (self.num_bits % 8)) - 1;
690                }
691            }
692            Some(bits) => {
693                body.extend_from_slice(unsafe {
694                    std::slice::from_raw_parts(
695                        bits.as_ptr() as *const u8,
696                        self.num_bits.div_ceil(8),
697                    )
698                });
699            }
700        }
701        PbBuffer {
702            body,
703            compression: CompressionType::None as i32,
704        }
705    }
706}
707
708impl From<&PbBuffer> for Bitmap {
709    fn from(buf: &PbBuffer) -> Self {
710        let last_byte_num_bits = buf.body[0];
711        let num_bits = ((buf.body.len() - 1) * 8) - ((8 - last_byte_num_bits) % 8) as usize;
712        Self::from_bytes_with_len(&buf.body[1..], num_bits)
713    }
714}
715
716impl From<PbBuffer> for Bitmap {
717    fn from(buf: PbBuffer) -> Self {
718        Self::from(&buf)
719    }
720}
721
722/// Bitmap iterator.
723pub struct BitmapIter<'a> {
724    bits: Option<&'a [usize]>,
725    idx: usize,
726    num_bits: usize,
727    current_usize: usize,
728    all_ones: bool,
729}
730
731impl BitmapIter<'_> {
732    fn next_always_load_usize(&mut self) -> Option<bool> {
733        if self.idx >= self.num_bits {
734            return None;
735        }
736        let Some(bits) = &self.bits else {
737            self.idx += 1;
738            return Some(self.all_ones);
739        };
740
741        // Offset of the bit within the usize.
742        let usize_offset = self.idx % BITS;
743
744        // Get the index of usize which the bit is located in
745        let usize_index = self.idx / BITS;
746        self.current_usize = unsafe { *bits.get_unchecked(usize_index) };
747
748        let bit_mask = 1 << usize_offset;
749        let bit_flag = self.current_usize & bit_mask != 0;
750        self.idx += 1;
751        Some(bit_flag)
752    }
753}
754
755impl iter::Iterator for BitmapIter<'_> {
756    type Item = bool;
757
758    fn next(&mut self) -> Option<Self::Item> {
759        if self.idx >= self.num_bits {
760            return None;
761        }
762        let Some(bits) = &self.bits else {
763            self.idx += 1;
764            return Some(self.all_ones);
765        };
766
767        // Offset of the bit within the usize.
768        let usize_offset = self.idx % BITS;
769
770        if usize_offset == 0 {
771            // Get the index of usize which the bit is located in
772            let usize_index = self.idx / BITS;
773            self.current_usize = unsafe { *bits.get_unchecked(usize_index) };
774        }
775
776        let bit_mask = 1 << usize_offset;
777
778        let bit_flag = self.current_usize & bit_mask != 0;
779        self.idx += 1;
780        Some(bit_flag)
781    }
782
783    fn size_hint(&self) -> (usize, Option<usize>) {
784        let remaining = self.num_bits - self.idx;
785        (remaining, Some(remaining))
786    }
787
788    fn nth(&mut self, n: usize) -> Option<Self::Item> {
789        self.idx += n;
790        self.next_always_load_usize()
791    }
792}
793
794impl ExactSizeIterator for BitmapIter<'_> {}
795unsafe impl TrustedLen for BitmapIter<'_> {}
796
797pub enum BitmapOnesIter<'a> {
798    Range {
799        start: usize,
800        end: usize,
801    },
802    Buffer {
803        bits: &'a [usize],
804        cur_idx: usize,
805        cur_bits: Option<usize>,
806    },
807}
808
809impl iter::Iterator for BitmapOnesIter<'_> {
810    type Item = usize;
811
812    fn next(&mut self) -> Option<Self::Item> {
813        match self {
814            BitmapOnesIter::Range { start, end } => {
815                let i = *start;
816                if i >= *end {
817                    None
818                } else {
819                    *start += 1;
820                    Some(i)
821                }
822            }
823            BitmapOnesIter::Buffer {
824                bits,
825                cur_idx,
826                cur_bits,
827            } => {
828                while *cur_bits == Some(0) {
829                    *cur_idx += 1;
830                    *cur_bits = if *cur_idx >= bits.len() {
831                        None
832                    } else {
833                        Some(bits[*cur_idx])
834                    };
835                }
836                cur_bits.map(|bits| {
837                    let low_bit = bits & bits.wrapping_neg();
838                    let low_bit_idx = bits.trailing_zeros();
839                    *cur_bits = Some(bits ^ low_bit);
840                    *cur_idx * BITS + low_bit_idx as usize
841                })
842            }
843        }
844    }
845
846    fn size_hint(&self) -> (usize, Option<usize>) {
847        match self {
848            BitmapOnesIter::Range { start, end } => {
849                let remaining = end - start;
850                (remaining, Some(remaining))
851            }
852            BitmapOnesIter::Buffer { bits, .. } => (0, Some(bits.len() * usize::BITS as usize)),
853        }
854    }
855}
856
857pub trait FilterByBitmap: ExactSizeIterator + Sized {
858    fn filter_by_bitmap(self, bitmap: &Bitmap) -> impl Iterator<Item = Self::Item> {
859        self.zip_eq_fast(bitmap.iter())
860            .filter_map(|(item, bit)| bit.then_some(item))
861    }
862}
863
864impl<T> FilterByBitmap for T where T: ExactSizeIterator {}
865
866#[cfg(test)]
867mod tests {
868    use super::*;
869
870    #[test]
871    fn test_bitmap_builder() {
872        let bitmap1 = {
873            let mut builder = BitmapBuilder::default();
874            let bits = [
875                false, true, true, false, true, false, true, false, true, false, true, true, false,
876                true, false, true,
877            ];
878            for bit in bits {
879                builder.append(bit);
880            }
881            for (idx, bit) in bits.iter().enumerate() {
882                assert_eq!(builder.is_set(idx), *bit);
883            }
884            builder.finish()
885        };
886        let byte1 = 0b0101_0110_u8;
887        let byte2 = 0b1010_1101_u8;
888        let expected = Bitmap::from_bytes(&[byte1, byte2]);
889        assert_eq!(bitmap1, expected);
890        assert_eq!(
891            bitmap1.count_ones(),
892            (byte1.count_ones() + byte2.count_ones()) as usize
893        );
894
895        let bitmap2 = {
896            let mut builder = BitmapBuilder::default();
897            let bits = [false, true, true, false, true, false, true, false];
898            for bit in bits {
899                builder.append(bit);
900            }
901            for (idx, bit) in bits.iter().enumerate() {
902                assert_eq!(builder.is_set(idx), *bit);
903            }
904            builder.finish()
905        };
906        let byte1 = 0b0101_0110_u8;
907        let expected = Bitmap::from_bytes(&[byte1]);
908        assert_eq!(bitmap2, expected);
909    }
910
911    #[test]
912    fn test_builder_append_bitmap() {
913        let mut builder = BitmapBuilder::default();
914        builder.append_bitmap(&Bitmap::zeros(64));
915        builder.append_bitmap(&Bitmap::ones(64));
916        builder.append_bitmap(&Bitmap::from_bytes(&[0b1010_1010]));
917        builder.append_bitmap(&Bitmap::zeros(8));
918        builder.append_bitmap(&Bitmap::ones(8));
919        let bitmap = builder.finish();
920        assert_eq!(
921            bitmap,
922            Bitmap::from_vec_with_len(
923                vec![0, usize::MAX, 0b1111_1111_0000_0000_1010_1010],
924                64 + 64 + 8 + 8 + 8
925            ),
926        );
927    }
928
929    #[test]
930    fn test_bitmap_get_size() {
931        let bitmap1 = {
932            let mut builder = BitmapBuilder::default();
933            let bits = [
934                false, true, true, false, true, false, true, false, true, false, true, true, false,
935                true, false, true,
936            ];
937            for bit in bits {
938                builder.append(bit);
939            }
940            for (idx, bit) in bits.iter().enumerate() {
941                assert_eq!(builder.is_set(idx), *bit);
942            }
943            builder.finish()
944        };
945        assert_eq!(8, bitmap1.estimated_heap_size());
946        assert_eq!(40, bitmap1.estimated_size());
947    }
948
949    #[test]
950    fn test_bitmap_all_high() {
951        let num_bits = 3;
952        let bitmap = Bitmap::ones(num_bits);
953        assert_eq!(bitmap.len(), num_bits);
954        assert!(bitmap.all());
955        for i in 0..num_bits {
956            assert!(bitmap.is_set(i));
957        }
958        // Test to and from protobuf is OK.
959        assert_eq!(bitmap, Bitmap::from(&bitmap.to_protobuf()));
960    }
961
962    #[test]
963    fn test_bitwise_and() {
964        #[rustfmt::skip]
965        let cases = [(
966            vec![],
967            vec![],
968            vec![],
969        ), (
970            vec![0, 1, 1, 0, 1, 0, 1, 0],
971            vec![0, 1, 0, 0, 1, 1, 1, 0],
972            vec![0, 1, 0, 0, 1, 0, 1, 0],
973        ), (
974            vec![0, 1, 0, 1, 0],
975            vec![1, 1, 1, 1, 0],
976            vec![0, 1, 0, 1, 0],
977        )];
978
979        for (input1, input2, expected) in cases {
980            let bitmap1: Bitmap = input1.into_iter().map(|x| x != 0).collect();
981            let bitmap2: Bitmap = input2.into_iter().map(|x| x != 0).collect();
982            let res = bitmap1 & bitmap2;
983            res.assert_valid();
984            assert_eq!(res.iter().map(|x| x as i32).collect::<Vec<_>>(), expected,);
985        }
986    }
987
988    #[test]
989    fn test_bitwise_not() {
990        #[rustfmt::skip]
991        let cases = [(
992            vec![1, 0, 1, 0, 1],
993            vec![0, 1, 0, 1, 0],
994        ), (
995            vec![],
996            vec![],
997        ), (
998            vec![1, 0, 1, 1, 0, 0, 1, 1],
999            vec![0, 1, 0, 0, 1, 1, 0, 0],
1000        ), (
1001            vec![1, 0, 0, 1, 1, 1, 0, 0, 1, 0],
1002            vec![0, 1, 1, 0, 0, 0, 1, 1, 0, 1],
1003        )];
1004
1005        for (input, expected) in cases {
1006            let bitmap: Bitmap = input.into_iter().map(|x| x != 0).collect();
1007            let res = !bitmap;
1008            res.assert_valid();
1009            assert_eq!(res.iter().map(|x| x as i32).collect::<Vec<_>>(), expected);
1010        }
1011    }
1012
1013    #[test]
1014    fn test_bitwise_or() {
1015        let bitmap1 = Bitmap::from_bytes(&[0b01101010]);
1016        let bitmap2 = Bitmap::from_bytes(&[0b01001110]);
1017        assert_eq!(Bitmap::from_bytes(&[0b01101110]), (bitmap1 | bitmap2));
1018    }
1019
1020    #[test]
1021    fn test_bitmap_is_set() {
1022        let bitmap = Bitmap::from_bytes(&[0b01001010]);
1023        assert!(!bitmap.is_set(0));
1024        assert!(bitmap.is_set(1));
1025        assert!(!bitmap.is_set(2));
1026        assert!(bitmap.is_set(3));
1027        assert!(!bitmap.is_set(4));
1028        assert!(!bitmap.is_set(5));
1029        assert!(bitmap.is_set(6));
1030        assert!(!bitmap.is_set(7));
1031    }
1032
1033    #[test]
1034    fn test_bitmap_iter() {
1035        {
1036            let bitmap = Bitmap::from_bytes(&[0b01001010]);
1037            let mut booleans = vec![];
1038            for b in bitmap.iter() {
1039                booleans.push(b as u8);
1040            }
1041            assert_eq!(booleans, vec![0u8, 1, 0, 1, 0, 0, 1, 0]);
1042        }
1043        {
1044            let bitmap: Bitmap = vec![true; 5].into_iter().collect();
1045            for b in bitmap.iter() {
1046                assert!(b);
1047            }
1048        }
1049    }
1050
1051    #[test]
1052    fn test_bitmap_high_ranges_iter() {
1053        fn test(bits: impl IntoIterator<Item = bool>, expected: Vec<RangeInclusive<usize>>) {
1054            let bitmap = Bitmap::from_iter(bits);
1055            let high_ranges = bitmap.high_ranges().collect::<Vec<_>>();
1056            assert_eq!(high_ranges, expected);
1057        }
1058
1059        test(
1060            vec![
1061                true, true, true, false, false, true, true, true, false, true,
1062            ],
1063            vec![0..=2, 5..=7, 9..=9],
1064        );
1065        test(vec![true, true, true], vec![0..=2]);
1066        test(vec![false, false, false], vec![]);
1067    }
1068
1069    #[test]
1070    fn test_bitmap_from_protobuf() {
1071        let bitmap_bytes = vec![3u8 /* len % BITS */, 0b0101_0010, 0b110];
1072        let buf = PbBuffer {
1073            body: bitmap_bytes,
1074            compression: CompressionType::None as _,
1075        };
1076        let bitmap: Bitmap = (&buf).into();
1077        let actual_bytes: Vec<u8> = bitmap.iter().map(|b| b as u8).collect();
1078
1079        assert_eq!(actual_bytes, vec![0, 1, 0, 0, 1, 0, 1, 0, /*  */ 0, 1, 1]); // in reverse order
1080        assert_eq!(bitmap.count_ones(), 5);
1081    }
1082
1083    #[test]
1084    fn test_bitmap_from_buffer() {
1085        let byte1 = 0b0110_1010_u8;
1086        let byte2 = 0b1011_0101_u8;
1087        let bitmap = Bitmap::from_bytes(&[0b0110_1010, 0b1011_0101]);
1088        let expected = Bitmap::from_iter(vec![
1089            false, true, false, true, false, true, true, false, true, false, true, false, true,
1090            true, false, true,
1091        ]);
1092        let count_ones = (byte1.count_ones() + byte2.count_ones()) as usize;
1093        assert_eq!(expected, bitmap);
1094        assert_eq!(bitmap.count_ones(), count_ones);
1095        assert_eq!(expected.count_ones(), count_ones);
1096    }
1097
1098    #[test]
1099    fn test_bitmap_eq() {
1100        let b1: Bitmap = Bitmap::zeros(3);
1101        let b2: Bitmap = Bitmap::zeros(5);
1102        assert_ne!(b1, b2);
1103
1104        let b1: Bitmap = [true, false].iter().cycle().cloned().take(10000).collect();
1105        let b2: Bitmap = [true, false].iter().cycle().cloned().take(10000).collect();
1106        assert_eq!(b1, b2);
1107    }
1108
1109    #[test]
1110    fn test_bitmap_set() {
1111        let mut b = BitmapBuilder::zeroed(10);
1112
1113        b.set(0, true);
1114        b.set(7, true);
1115        b.set(8, true);
1116        b.set(9, true);
1117
1118        b.set(7, false);
1119        b.set(8, false);
1120
1121        b.append(true);
1122        assert_eq!(b.len(), 11);
1123
1124        let b = b.finish();
1125        assert_eq!(b.count_ones(), 3);
1126        assert_eq!(&b.bits.unwrap()[..], &[0b0110_0000_0001]);
1127    }
1128
1129    #[test]
1130    fn test_bitmap_pop() {
1131        let mut b = BitmapBuilder::zeroed(7);
1132
1133        {
1134            b.append(true);
1135            assert!(b.is_set(b.len() - 1));
1136            b.pop();
1137            assert!(!b.is_set(b.len() - 1));
1138        }
1139
1140        {
1141            b.append(false);
1142            assert!(!b.is_set(b.len() - 1));
1143            b.pop();
1144            assert!(!b.is_set(b.len() - 1));
1145        }
1146
1147        {
1148            b.append(true);
1149            b.append(false);
1150            assert!(!b.is_set(b.len() - 1));
1151            b.pop();
1152            assert!(b.is_set(b.len() - 1));
1153            b.pop();
1154            assert!(!b.is_set(b.len() - 1));
1155        }
1156    }
1157
1158    #[test]
1159    fn test_bitmap_iter_ones() {
1160        let mut builder = BitmapBuilder::zeroed(1000);
1161        builder.append_n(1000, true);
1162        let bitmap = builder.finish();
1163        let mut iter = bitmap.iter_ones();
1164        for i in 0..1000 {
1165            let item = iter.next();
1166            assert!(item == Some(i + 1000));
1167        }
1168        for _ in 0..5 {
1169            let item = iter.next();
1170            assert!(item.is_none());
1171        }
1172    }
1173
1174    #[test]
1175    fn from_indices_creates_correct_bitmap() {
1176        let bitmap = Bitmap::from_indices(10, [1, 3, 5, 7, 9]);
1177        assert_eq!(bitmap.len(), 10);
1178        for i in [1, 3, 5, 7, 9] {
1179            assert!(bitmap.is_set(i));
1180        }
1181
1182        for i in [0, 2, 4, 6, 8] {
1183            assert!(!bitmap.is_set(i));
1184        }
1185    }
1186
1187    #[test]
1188    fn from_indices_empty_indices_creates_zeroed_bitmap() {
1189        let bitmap = Bitmap::from_indices(10, [] as [usize; 0]);
1190        assert_eq!(bitmap.len(), 10);
1191        assert!(!bitmap.any());
1192    }
1193
1194    #[test]
1195    fn from_indices_out_of_bounds_panics() {
1196        let result = std::panic::catch_unwind(|| {
1197            Bitmap::from_indices(5, [0, 6]);
1198        });
1199        assert!(result.is_err());
1200    }
1201
1202    #[test]
1203    fn from_indices_all_indices_set_creates_filled_bitmap() {
1204        let bitmap = Bitmap::from_indices(5, [0, 1, 2, 3, 4]);
1205        assert_eq!(bitmap.len(), 5);
1206        assert!(bitmap.all());
1207    }
1208}