risingwave_common/array/
bool_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use risingwave_common_estimate_size::EstimateSize;
16use risingwave_pb::data::{ArrayType, PbArray};
17
18use super::{Array, ArrayBuilder, DataType};
19use crate::bitmap::{Bitmap, BitmapBuilder};
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct BoolArray {
23    bitmap: Bitmap,
24    data: Bitmap,
25}
26
27impl BoolArray {
28    pub fn new(data: Bitmap, bitmap: Bitmap) -> Self {
29        assert_eq!(bitmap.len(), data.len());
30        Self { bitmap, data }
31    }
32
33    /// Build a [`BoolArray`] from iterator and bitmap.
34    ///
35    /// NOTE: The length of `bitmap` must be equal to the length of `iter`.
36    pub fn from_iter_bitmap(iter: impl IntoIterator<Item = bool>, bitmap: Bitmap) -> Self {
37        let data: Bitmap = iter.into_iter().collect();
38        assert_eq!(data.len(), bitmap.len());
39        BoolArray { bitmap, data }
40    }
41
42    pub fn data(&self) -> &Bitmap {
43        &self.data
44    }
45
46    pub fn to_bitmap(&self) -> Bitmap {
47        &self.data & self.null_bitmap()
48    }
49}
50
51impl FromIterator<Option<bool>> for BoolArray {
52    fn from_iter<I: IntoIterator<Item = Option<bool>>>(iter: I) -> Self {
53        let iter = iter.into_iter();
54        let mut builder = <Self as Array>::Builder::new(iter.size_hint().0);
55        for i in iter {
56            builder.append(i);
57        }
58        builder.finish()
59    }
60}
61
62impl<'a> FromIterator<&'a Option<bool>> for BoolArray {
63    fn from_iter<I: IntoIterator<Item = &'a Option<bool>>>(iter: I) -> Self {
64        iter.into_iter().cloned().collect()
65    }
66}
67
68impl FromIterator<bool> for BoolArray {
69    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
70        let data: Bitmap = iter.into_iter().collect();
71        BoolArray {
72            bitmap: Bitmap::ones(data.len()),
73            data,
74        }
75    }
76}
77
78impl EstimateSize for BoolArray {
79    fn estimated_heap_size(&self) -> usize {
80        self.bitmap.estimated_heap_size() + self.data.estimated_heap_size()
81    }
82}
83
84impl Array for BoolArray {
85    type Builder = BoolArrayBuilder;
86    type OwnedItem = bool;
87    type RefItem<'a> = bool;
88
89    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> bool {
90        unsafe { self.data.is_set_unchecked(idx) }
91    }
92
93    fn len(&self) -> usize {
94        self.data.len()
95    }
96
97    fn to_protobuf(&self) -> PbArray {
98        let value = self.data.to_protobuf();
99        let null_bitmap = self.null_bitmap().to_protobuf();
100
101        PbArray {
102            null_bitmap: Some(null_bitmap),
103            values: vec![value],
104            array_type: ArrayType::Bool as i32,
105            struct_array_data: None,
106            list_array_data: None,
107        }
108    }
109
110    fn null_bitmap(&self) -> &Bitmap {
111        &self.bitmap
112    }
113
114    fn into_null_bitmap(self) -> Bitmap {
115        self.bitmap
116    }
117
118    fn set_bitmap(&mut self, bitmap: Bitmap) {
119        self.bitmap = bitmap;
120    }
121
122    fn data_type(&self) -> DataType {
123        DataType::Boolean
124    }
125}
126
127/// `BoolArrayBuilder` constructs a `BoolArray` from `Option<Bool>`.
128#[derive(Debug, Clone, EstimateSize)]
129pub struct BoolArrayBuilder {
130    bitmap: BitmapBuilder,
131    data: BitmapBuilder,
132}
133
134impl ArrayBuilder for BoolArrayBuilder {
135    type ArrayType = BoolArray;
136
137    fn new(capacity: usize) -> Self {
138        Self {
139            bitmap: BitmapBuilder::with_capacity(capacity),
140            data: BitmapBuilder::with_capacity(capacity),
141        }
142    }
143
144    fn with_type(capacity: usize, ty: DataType) -> Self {
145        assert_eq!(ty, DataType::Boolean);
146        Self::new(capacity)
147    }
148
149    fn append_n(&mut self, n: usize, value: Option<bool>) {
150        match value {
151            Some(x) => {
152                self.bitmap.append_n(n, true);
153                self.data.append_n(n, x);
154            }
155            None => {
156                self.bitmap.append_n(n, false);
157                self.data.append_n(n, false);
158            }
159        }
160    }
161
162    fn append_array(&mut self, other: &BoolArray) {
163        for bit in other.bitmap.iter() {
164            self.bitmap.append(bit);
165        }
166
167        for bit in other.data.iter() {
168            self.data.append(bit);
169        }
170    }
171
172    fn pop(&mut self) -> Option<()> {
173        self.data.pop().map(|_| self.bitmap.pop().unwrap())
174    }
175
176    fn len(&self) -> usize {
177        self.bitmap.len()
178    }
179
180    fn finish(self) -> BoolArray {
181        BoolArray {
182            bitmap: self.bitmap.finish(),
183            data: self.data.finish(),
184        }
185    }
186}
187
188#[cfg(test)]
189mod tests {
190    use std::hash::Hash;
191
192    use itertools::Itertools;
193
194    use super::*;
195    use crate::array::{ArrayImpl, NULL_VAL_FOR_HASH};
196    use crate::util::iter_util::ZipEqFast;
197
198    fn helper_test_builder(data: Vec<Option<bool>>) -> BoolArray {
199        let mut builder = BoolArrayBuilder::new(data.len());
200        for d in data {
201            builder.append(d);
202        }
203        builder.finish()
204    }
205
206    #[test]
207    fn test_bool_builder() {
208        let v = (0..1000)
209            .map(|x| {
210                if x % 2 == 0 {
211                    None
212                } else if x % 3 == 0 {
213                    Some(true)
214                } else {
215                    Some(false)
216                }
217            })
218            .collect_vec();
219        let array = helper_test_builder(v.clone());
220        assert_eq!(256, array.estimated_heap_size());
221        assert_eq!(320, array.estimated_size());
222        let res = v.iter().zip_eq_fast(array.iter()).all(|(a, b)| *a == b);
223        assert!(res);
224    }
225
226    #[test]
227    fn test_bool_array_serde() {
228        for num_bits in [0..8, 128..136].into_iter().flatten() {
229            let v = (0..num_bits)
230                .map(|x| {
231                    if x % 2 == 0 {
232                        None
233                    } else if x % 3 == 0 {
234                        Some(true)
235                    } else {
236                        Some(false)
237                    }
238                })
239                .collect_vec();
240
241            let array = helper_test_builder(v.clone());
242
243            let encoded = array.to_protobuf();
244            let decoded = ArrayImpl::from_protobuf(&encoded, num_bits)
245                .unwrap()
246                .into_bool();
247
248            let equal = array
249                .iter()
250                .zip_eq_fast(decoded.iter())
251                .all(|(a, b)| a == b);
252            assert!(equal);
253        }
254    }
255
256    #[test]
257    fn test_bool_array_hash() {
258        use std::hash::BuildHasher;
259
260        use super::super::test_util::{hash_finish, test_hash};
261
262        const ARR_NUM: usize = 2;
263        const ARR_LEN: usize = 48;
264        let vecs: [Vec<Option<bool>>; ARR_NUM] = [
265            (0..ARR_LEN)
266                .map(|x| match x % 2 {
267                    0 => Some(true),
268                    1 => Some(false),
269                    _ => unreachable!(),
270                })
271                .collect_vec(),
272            (0..ARR_LEN)
273                .map(|x| match x % 3 {
274                    0 => Some(true),
275                    1 => Some(false),
276                    2 => None,
277                    _ => unreachable!(),
278                })
279                .collect_vec(),
280        ];
281
282        let arrs = vecs
283            .iter()
284            .map(|v| helper_test_builder(v.clone()))
285            .collect_vec();
286
287        let hasher_builder = twox_hash::xxhash64::RandomState::default();
288        let mut states = vec![hasher_builder.build_hasher(); ARR_LEN];
289        vecs.iter().for_each(|v| {
290            v.iter()
291                .zip_eq_fast(&mut states)
292                .for_each(|(x, state)| match x {
293                    Some(inner) => inner.hash(state),
294                    None => NULL_VAL_FOR_HASH.hash(state),
295                })
296        });
297        let hashes = hash_finish(&states[..]);
298
299        let count = hashes.iter().counts().len();
300        assert_eq!(count, 6);
301
302        test_hash(arrs, hashes, hasher_builder);
303    }
304}