risingwave_common/array/
list_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Cow;
16use std::cmp::Ordering;
17use std::fmt::{self, Debug, Display};
18use std::future::Future;
19use std::mem::{ManuallyDrop, size_of};
20
21use bytes::{Buf, BufMut};
22use itertools::Itertools;
23use risingwave_common_estimate_size::EstimateSize;
24use risingwave_pb::data::{ListArrayData, PbArray, PbArrayType};
25use serde::{Deserialize, Serializer};
26use thiserror_ext::AsReport;
27
28use super::{
29    Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayResult, BoolArray, PrimitiveArray,
30    PrimitiveArrayItemType, RowRef, Utf8Array,
31};
32use crate::bitmap::{Bitmap, BitmapBuilder};
33use crate::row::Row;
34use crate::types::{
35    DataType, Datum, DatumRef, DefaultOrd, ListType, Scalar, ScalarImpl, ScalarRef, ScalarRefImpl,
36    ToDatumRef, ToText, hash_datum,
37};
38use crate::util::memcmp_encoding;
39use crate::util::value_encoding::estimate_serialize_datum_size;
40
41#[derive(Debug, Clone, EstimateSize)]
42pub struct ListArrayBuilder {
43    bitmap: BitmapBuilder,
44    offsets: Vec<u32>,
45    value: Box<ArrayBuilderImpl>,
46}
47
48impl ArrayBuilder for ListArrayBuilder {
49    type ArrayType = ListArray;
50
51    #[cfg(not(test))]
52    fn new(_capacity: usize) -> Self {
53        panic!("please use `ListArrayBuilder::with_type` instead");
54    }
55
56    #[cfg(test)]
57    fn new(capacity: usize) -> Self {
58        // TODO: deprecate this
59        Self::with_type(
60            capacity,
61            // Default datatype
62            DataType::Int16.list(),
63        )
64    }
65
66    fn with_type(capacity: usize, ty: DataType) -> Self {
67        let DataType::List(list_ty) = ty else {
68            panic!("data type must be DataType::List");
69        };
70        let mut offsets = Vec::with_capacity(capacity + 1);
71        offsets.push(0);
72        Self {
73            bitmap: BitmapBuilder::with_capacity(capacity),
74            offsets,
75            value: Box::new(list_ty.elem().create_array_builder(capacity)),
76        }
77    }
78
79    fn append_n(&mut self, n: usize, value: Option<ListRef<'_>>) {
80        match value {
81            None => {
82                self.bitmap.append_n(n, false);
83                let last = *self.offsets.last().unwrap();
84                for _ in 0..n {
85                    self.offsets.push(last);
86                }
87            }
88            Some(v) => {
89                self.bitmap.append_n(n, true);
90                for _ in 0..n {
91                    let last = *self.offsets.last().unwrap();
92                    let elems = v.iter();
93                    self.offsets.push(
94                        last.checked_add(elems.len() as u32)
95                            .expect("offset overflow"),
96                    );
97                    for elem in elems {
98                        self.value.append(elem);
99                    }
100                }
101            }
102        }
103    }
104
105    fn append_array(&mut self, other: &ListArray) {
106        self.bitmap.append_bitmap(&other.bitmap);
107        let last = *self.offsets.last().unwrap();
108        self.offsets
109            .append(&mut other.offsets[1..].iter().map(|o| *o + last).collect());
110        self.value.append_array(&other.value);
111    }
112
113    fn pop(&mut self) -> Option<()> {
114        self.bitmap.pop()?;
115        let start = self.offsets.pop().unwrap();
116        let end = *self.offsets.last().unwrap();
117        for _ in end..start {
118            self.value.pop().unwrap();
119        }
120        Some(())
121    }
122
123    fn len(&self) -> usize {
124        self.bitmap.len()
125    }
126
127    fn finish(self) -> ListArray {
128        ListArray {
129            bitmap: self.bitmap.finish(),
130            offsets: self.offsets.into(),
131            value: Box::new(self.value.finish()),
132        }
133    }
134}
135
136impl ListArrayBuilder {
137    pub fn append_row_ref(&mut self, row: RowRef<'_>) {
138        self.bitmap.append(true);
139        let last = *self.offsets.last().unwrap();
140        self.offsets
141            .push(last.checked_add(row.len() as u32).expect("offset overflow"));
142        for v in row.iter() {
143            self.value.append(v);
144        }
145    }
146
147    pub fn writer(&mut self) -> ListWriter<'_> {
148        ListWriter::new(self)
149    }
150}
151
152/// Each item of this `ListArray` is a `List<T>`, or called `T[]` (T array).
153///
154/// * As other arrays, there is a null bitmap, with `1` meaning nonnull and `0` meaning null.
155/// * As [`super::BytesArray`], there is an offsets `Vec` and a value `Array`. The value `Array` has
156///   all items concatenated, and the offsets `Vec` stores start and end indices into it for
157///   slicing. Effectively, the inner array is the flattened form, and `offsets.len() == n + 1`.
158///
159/// For example, `values (array[1]), (array[]::int[]), (null), (array[2, 3]);` stores an inner
160///  `I32Array` with `[1, 2, 3]`, along with offsets `[0, 1, 1, 1, 3]` and null bitmap `TTFT`.
161#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct ListArray {
163    pub(super) bitmap: Bitmap,
164    pub(super) offsets: Box<[u32]>,
165    pub(super) value: Box<ArrayImpl>,
166}
167
168impl EstimateSize for ListArray {
169    fn estimated_heap_size(&self) -> usize {
170        self.bitmap.estimated_heap_size()
171            + self.offsets.len() * size_of::<u32>()
172            + self.value.estimated_size()
173    }
174}
175
176impl Array for ListArray {
177    type Builder = ListArrayBuilder;
178    type OwnedItem = ListValue;
179    type RefItem<'a> = ListRef<'a>;
180
181    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
182        unsafe {
183            ListRef {
184                array: &self.value,
185                start: *self.offsets.get_unchecked(idx),
186                end: *self.offsets.get_unchecked(idx + 1),
187            }
188        }
189    }
190
191    fn len(&self) -> usize {
192        self.bitmap.len()
193    }
194
195    fn to_protobuf(&self) -> PbArray {
196        let value = self.value.to_protobuf();
197        PbArray {
198            array_type: PbArrayType::List as i32,
199            struct_array_data: None,
200            list_array_data: Some(Box::new(ListArrayData {
201                offsets: self.offsets.to_vec(),
202                value: Some(Box::new(value)),
203                value_type: Some(self.value.data_type().to_protobuf()),
204                elem_size: None,
205            })),
206            null_bitmap: Some(self.bitmap.to_protobuf()),
207            values: vec![],
208        }
209    }
210
211    fn null_bitmap(&self) -> &Bitmap {
212        &self.bitmap
213    }
214
215    fn into_null_bitmap(self) -> Bitmap {
216        self.bitmap
217    }
218
219    fn set_bitmap(&mut self, bitmap: Bitmap) {
220        self.bitmap = bitmap;
221    }
222
223    fn data_type(&self) -> DataType {
224        DataType::list(self.value.data_type())
225    }
226}
227
228impl ListArray {
229    /// Flatten the list array into a single array.
230    ///
231    /// # Example
232    ///
233    /// ```text
234    /// [[1,2,3],NULL,[4,5]] => [1,2,3,4,5]
235    /// [[[1],[2]],[[3],[4]]] => [1,2,3,4]
236    /// ```
237    pub fn flatten(&self) -> ArrayImpl {
238        match &*self.value {
239            ArrayImpl::List(inner) => inner.flatten(),
240            a => a.clone(),
241        }
242    }
243
244    /// Return the inner array of the list array.
245    pub fn values(&self) -> &ArrayImpl {
246        &self.value
247    }
248
249    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
250        ensure!(
251            array.values.is_empty(),
252            "Must have no buffer in a list array"
253        );
254        debug_assert!(
255            (array.array_type == PbArrayType::List as i32)
256                || (array.array_type == PbArrayType::Map as i32),
257            "invalid array type for list: {}",
258            array.array_type
259        );
260        let bitmap: Bitmap = array.get_null_bitmap()?.into();
261        let array_data = array.get_list_array_data()?.to_owned();
262        let flatten_len = match array_data.offsets.last() {
263            Some(&n) => n as usize,
264            None => bail!("Must have at least one element in offsets"),
265        };
266        let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?;
267        let arr = ListArray {
268            bitmap,
269            offsets: array_data.offsets.into(),
270            value: Box::new(value),
271        };
272        Ok(arr.into())
273    }
274
275    /// Apply the function on the underlying elements.
276    /// e.g. `map_inner([[1,2,3],NULL,[4,5]], DOUBLE) = [[2,4,6],NULL,[8,10]]`
277    pub async fn map_inner<E, Fut, F>(self, f: F) -> std::result::Result<ListArray, E>
278    where
279        F: FnOnce(ArrayImpl) -> Fut,
280        Fut: Future<Output = std::result::Result<ArrayImpl, E>>,
281    {
282        let new_value = (f)(*self.value).await?;
283
284        Ok(Self {
285            offsets: self.offsets,
286            bitmap: self.bitmap,
287            value: Box::new(new_value),
288        })
289    }
290
291    /// Returns the offsets of this list.
292    ///
293    /// # Example
294    /// ```text
295    /// list    = [[a, b, c], [], NULL, [d], [NULL, f]]
296    /// offsets = [0, 3, 3, 3, 4, 6]
297    /// ```
298    pub fn offsets(&self) -> &[u32] {
299        &self.offsets
300    }
301}
302
303impl<T, L> FromIterator<Option<L>> for ListArray
304where
305    T: PrimitiveArrayItemType,
306    L: IntoIterator<Item = T>,
307{
308    fn from_iter<I: IntoIterator<Item = Option<L>>>(iter: I) -> Self {
309        let iter = iter.into_iter();
310        let mut builder =
311            ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(T::DATA_TYPE));
312        for v in iter {
313            match v {
314                None => builder.append(None),
315                Some(v) => {
316                    builder.append(Some(v.into_iter().collect::<ListValue>().as_scalar_ref()))
317                }
318            }
319        }
320        builder.finish()
321    }
322}
323
324impl FromIterator<ListValue> for ListArray {
325    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
326        let mut iter = iter.into_iter();
327        let first = iter.next().expect("empty iterator");
328        let mut builder =
329            ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(first.elem_type()));
330        builder.append(Some(first.as_scalar_ref()));
331        for v in iter {
332            builder.append(Some(v.as_scalar_ref()));
333        }
334        builder.finish()
335    }
336}
337
338#[derive(Clone, PartialEq, Eq, EstimateSize)]
339pub struct ListValue {
340    values: Box<ArrayImpl>,
341}
342
343impl Debug for ListValue {
344    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345        self.as_scalar_ref().fmt(f)
346    }
347}
348
349impl Display for ListValue {
350    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351        self.as_scalar_ref().write(f)
352    }
353}
354
355impl ListValue {
356    pub fn new(values: ArrayImpl) -> Self {
357        Self {
358            values: Box::new(values),
359        }
360    }
361
362    /// Convert this list into an [`Array`] of the element type.
363    pub fn into_array(self) -> ArrayImpl {
364        *self.values
365    }
366
367    /// Creates a new empty `ListValue` with the given element type.
368    pub fn empty(elem_type: &DataType) -> Self {
369        Self::new(elem_type.create_array_builder(0).finish())
370    }
371
372    /// Creates a new `ListValue` from an iterator of elements with the given element type.
373    pub fn from_datum_iter<T: ToDatumRef>(
374        elem_type: &DataType,
375        iter: impl IntoIterator<Item = T>,
376    ) -> Self {
377        let iter = iter.into_iter();
378        let mut builder = elem_type.create_array_builder(iter.size_hint().0);
379        for datum in iter {
380            builder.append(datum);
381        }
382        Self::new(builder.finish())
383    }
384
385    /// Returns the length of the list.
386    pub fn len(&self) -> usize {
387        self.values.len()
388    }
389
390    /// Returns `true` if the list has a length of 0.
391    pub fn is_empty(&self) -> bool {
392        self.values.is_empty()
393    }
394
395    /// Iterates over the elements of the list.
396    pub fn iter(&self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'_>> {
397        self.values.iter()
398    }
399
400    /// Get the element at the given index. Returns `None` if the index is out of bounds.
401    pub fn get(&self, index: usize) -> Option<DatumRef<'_>> {
402        if index < self.len() {
403            Some(self.values.value_at(index))
404        } else {
405            None
406        }
407    }
408
409    /// Returns the data type of the elements in the list.
410    pub fn elem_type(&self) -> DataType {
411        self.values.data_type()
412    }
413
414    pub fn memcmp_deserialize(
415        list_type: &ListType,
416        deserializer: &mut memcomparable::Deserializer<impl Buf>,
417    ) -> memcomparable::Result<Self> {
418        let elem_type = list_type.elem();
419
420        let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
421        let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
422        let mut builder = elem_type.create_array_builder(0);
423        while inner_deserializer.has_remaining() {
424            builder.append(memcmp_encoding::deserialize_datum_in_composite(
425                elem_type,
426                &mut inner_deserializer,
427            )?)
428        }
429        Ok(Self::new(builder.finish()))
430    }
431
432    // Used to display ListValue in explain for better readibilty.
433    pub fn display_for_explain(&self) -> String {
434        // Example of ListValue display: ARRAY[1, 2, null]
435        format!(
436            "ARRAY[{}]",
437            self.iter()
438                .map(|v| {
439                    match v.as_ref() {
440                        None => "null".into(),
441                        Some(scalar) => scalar.to_text(),
442                    }
443                })
444                .format(", ")
445        )
446    }
447
448    /// Returns a mutable slice if the list is of type `int64[]`.
449    pub fn as_i64_mut_slice(&mut self) -> Option<&mut [i64]> {
450        match self.values.as_mut() {
451            ArrayImpl::Int64(array) => Some(array.as_mut_slice()),
452            _ => None,
453        }
454    }
455}
456
457impl PartialOrd for ListValue {
458    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
459        Some(self.cmp(other))
460    }
461}
462
463impl Ord for ListValue {
464    fn cmp(&self, other: &Self) -> Ordering {
465        self.as_scalar_ref().cmp(&other.as_scalar_ref())
466    }
467}
468
469// Construction helpers:
470
471// [Some(1), None].collect()
472impl<T: PrimitiveArrayItemType> FromIterator<Option<T>> for ListValue {
473    fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
474        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
475    }
476}
477// [1, 2].collect()
478impl<T: PrimitiveArrayItemType> FromIterator<T> for ListValue {
479    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
480        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
481    }
482}
483// [true, false].collect()
484impl FromIterator<bool> for ListValue {
485    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
486        Self::new(iter.into_iter().collect::<BoolArray>().into())
487    }
488}
489// [Some("hello"), None].collect()
490impl<'a> FromIterator<Option<&'a str>> for ListValue {
491    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
492        Self::new(iter.into_iter().collect::<Utf8Array>().into())
493    }
494}
495// ["hello", "world"].collect()
496impl<'a> FromIterator<&'a str> for ListValue {
497    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
498        Self::new(iter.into_iter().collect::<Utf8Array>().into())
499    }
500}
501// nested: [ListValue::from_iter([1,2,3]), ListValue::from_iter([4,5])].collect()
502impl FromIterator<ListValue> for ListValue {
503    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
504        Self::new(iter.into_iter().collect::<ListArray>().into())
505    }
506}
507
508/// A slice of an array
509#[derive(Copy, Clone)]
510pub struct ListRef<'a> {
511    array: &'a ArrayImpl,
512    start: u32,
513    end: u32,
514}
515
516impl<'a> ListRef<'a> {
517    /// Returns the length of the list.
518    pub fn len(&self) -> usize {
519        (self.end - self.start) as usize
520    }
521
522    /// Returns `true` if the list has a length of 0.
523    pub fn is_empty(&self) -> bool {
524        self.start == self.end
525    }
526
527    /// Returns the data type of the elements in the list.
528    pub fn elem_type(&self) -> DataType {
529        self.array.data_type()
530    }
531
532    /// Returns the elements in the flattened list.
533    pub fn flatten(self) -> ListRef<'a> {
534        match self.array {
535            ArrayImpl::List(inner) => ListRef {
536                array: &inner.value,
537                start: inner.offsets[self.start as usize],
538                end: inner.offsets[self.end as usize],
539            }
540            .flatten(),
541            _ => self,
542        }
543    }
544
545    /// Iterates over the elements of the list.
546    pub fn iter(self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'a>> + 'a {
547        (self.start..self.end).map(|i| self.array.value_at(i as usize))
548    }
549
550    /// Get the element at the given index. Returns `None` if the index is out of bounds.
551    pub fn get(self, index: usize) -> Option<DatumRef<'a>> {
552        if index < self.len() {
553            Some(self.array.value_at(self.start as usize + index))
554        } else {
555            None
556        }
557    }
558
559    pub fn memcmp_serialize(
560        self,
561        serializer: &mut memcomparable::Serializer<impl BufMut>,
562    ) -> memcomparable::Result<()> {
563        let mut inner_serializer = memcomparable::Serializer::new(vec![]);
564        for datum_ref in self.iter() {
565            memcmp_encoding::serialize_datum_in_composite(datum_ref, &mut inner_serializer)?
566        }
567        serializer.serialize_bytes(&inner_serializer.into_inner())
568    }
569
570    pub fn hash_scalar_inner<H: std::hash::Hasher>(self, state: &mut H) {
571        for datum_ref in self.iter() {
572            hash_datum(datum_ref, state);
573        }
574    }
575
576    /// estimate the serialized size with value encoding
577    pub fn estimate_serialize_size_inner(self) -> usize {
578        self.iter().map(estimate_serialize_datum_size).sum()
579    }
580
581    pub fn as_primitive_slice<T: PrimitiveArrayItemType>(self) -> Option<&'a [T]> {
582        T::try_into_array_ref(self.array)
583            .map(|prim_arr| &prim_arr.as_slice()[self.start as usize..self.end as usize])
584    }
585
586    /// Returns a slice if the list is of type `int64[]`.
587    pub fn as_i64_slice(&self) -> Option<&[i64]> {
588        self.as_primitive_slice()
589    }
590
591    /// # Panics
592    /// Panics if the list is not a map's internal representation (See [`super::MapArray`]).
593    pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
594        let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
595        (
596            ListRef {
597                array: k,
598                start: self.start,
599                end: self.end,
600            },
601            ListRef {
602                array: v,
603                start: self.start,
604                end: self.end,
605            },
606        )
607    }
608}
609
610impl PartialEq for ListRef<'_> {
611    fn eq(&self, other: &Self) -> bool {
612        self.iter().eq(other.iter())
613    }
614}
615
616impl Eq for ListRef<'_> {}
617
618impl PartialOrd for ListRef<'_> {
619    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
620        Some(self.cmp(other))
621    }
622}
623
624impl Ord for ListRef<'_> {
625    fn cmp(&self, other: &Self) -> Ordering {
626        self.iter().cmp_by(other.iter(), |a, b| a.default_cmp(&b))
627    }
628}
629
630impl Debug for ListRef<'_> {
631    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
632        f.debug_list().entries(self.iter()).finish()
633    }
634}
635
636impl Row for ListRef<'_> {
637    fn datum_at(&self, index: usize) -> DatumRef<'_> {
638        self.array.value_at(self.start as usize + index)
639    }
640
641    unsafe fn datum_at_unchecked(&self, index: usize) -> DatumRef<'_> {
642        unsafe { self.array.value_at_unchecked(self.start as usize + index) }
643    }
644
645    fn len(&self) -> usize {
646        self.len()
647    }
648
649    fn iter(&self) -> impl Iterator<Item = DatumRef<'_>> {
650        (*self).iter()
651    }
652}
653
654impl ToText for ListRef<'_> {
655    // This function will be invoked when pgwire prints a list value in string.
656    // Refer to PostgreSQL `array_out` or `appendPGArray`.
657    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
658        write!(
659            f,
660            "{{{}}}",
661            self.iter().format_with(",", |datum_ref, f| {
662                let s = datum_ref.to_text();
663                // Never quote null or inner list, but quote empty, verbatim 'null', special
664                // chars and whitespaces.
665                let need_quote = !matches!(datum_ref, None | Some(ScalarRefImpl::List(_)))
666                    && (s.is_empty()
667                        || s.eq_ignore_ascii_case("null")
668                        || s.contains([
669                            '"', '\\', ',',
670                            // whilespace:
671                            // PostgreSQL `array_isspace` includes '\x0B' but rust
672                            // [`char::is_ascii_whitespace`] does not.
673                            ' ', '\t', '\n', '\r', '\x0B', '\x0C', // list-specific:
674                            '{', '}',
675                        ]));
676                if need_quote {
677                    f(&"\"")?;
678                    s.chars().try_for_each(|c| {
679                        if c == '"' || c == '\\' {
680                            f(&"\\")?;
681                        }
682                        f(&c)
683                    })?;
684                    f(&"\"")
685                } else {
686                    f(&s)
687                }
688            })
689        )
690    }
691
692    fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
693        match ty {
694            DataType::List { .. } => self.write(f),
695            _ => unreachable!(),
696        }
697    }
698}
699
700/// Implement `Scalar` for `ListValue`.
701impl Scalar for ListValue {
702    type ScalarRefType<'a> = ListRef<'a>;
703
704    fn as_scalar_ref(&self) -> ListRef<'_> {
705        ListRef {
706            array: &self.values,
707            start: 0,
708            end: self.len() as u32,
709        }
710    }
711}
712
713/// Implement `Scalar` for `ListValue`.
714impl<'a> ScalarRef<'a> for ListRef<'a> {
715    type ScalarType = ListValue;
716
717    fn to_owned_scalar(&self) -> ListValue {
718        let mut builder = self.array.create_builder(self.len());
719        for datum_ref in self.iter() {
720            builder.append(datum_ref);
721        }
722        ListValue::new(builder.finish())
723    }
724
725    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
726        self.hash_scalar_inner(state)
727    }
728}
729
730impl ListValue {
731    /// Construct an array from literal string and the data type of the list.
732    pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
733        struct Parser<'a> {
734            input: &'a str,
735            data_type: &'a DataType,
736        }
737
738        impl Parser<'_> {
739            /// Parse a datum.
740            fn parse(&mut self) -> Result<Datum, String> {
741                self.skip_whitespace();
742                if self.data_type.is_array() {
743                    if self.try_parse_null() {
744                        return Ok(None);
745                    }
746                    Ok(Some(self.parse_array()?.into()))
747                } else {
748                    self.parse_value()
749                }
750            }
751
752            /// Parse an array.
753            fn parse_array(&mut self) -> Result<ListValue, String> {
754                self.skip_whitespace();
755                if !self.try_consume('{') {
756                    return Err("Array value must start with \"{\"".to_owned());
757                }
758                self.skip_whitespace();
759                if self.try_consume('}') {
760                    return Ok(ListValue::empty(self.data_type.as_list_elem()));
761                }
762                let mut builder =
763                    ArrayBuilderImpl::with_type(0, self.data_type.as_list_elem().clone());
764                loop {
765                    let mut parser = Self {
766                        input: self.input,
767                        data_type: self.data_type.as_list_elem(),
768                    };
769                    builder.append(parser.parse()?);
770                    self.input = parser.input;
771
772                    // expect ',' or '}'
773                    self.skip_whitespace();
774                    match self.peek() {
775                        Some(',') => {
776                            self.try_consume(',');
777                        }
778                        Some('}') => {
779                            self.try_consume('}');
780                            break;
781                        }
782                        None => return Err(Self::eoi()),
783                        _ => return Err("Unexpected array element.".to_owned()),
784                    }
785                }
786                Ok(ListValue::new(builder.finish()))
787            }
788
789            /// Parse a non-array value.
790            fn parse_value(&mut self) -> Result<Datum, String> {
791                if self.peek() == Some('"') {
792                    return Ok(Some(self.parse_quoted()?));
793                }
794                // peek until the next unescaped ',' or '}'
795                let mut chars = self.input.char_indices();
796                let mut has_escape = false;
797                let s = loop {
798                    match chars.next().ok_or_else(Self::eoi)? {
799                        (_, '\\') => {
800                            has_escape = true;
801                            chars.next().ok_or_else(Self::eoi)?;
802                        }
803                        (i, c @ ',' | c @ '}') => {
804                            let s = &self.input[..i];
805                            // consume the value and leave the ',' or '}' for parent
806                            self.input = &self.input[i..];
807
808                            break if has_escape {
809                                Cow::Owned(Self::unescape_trim_end(s))
810                            } else {
811                                let trimmed = s.trim_end();
812                                if trimmed.is_empty() {
813                                    return Err(format!("Unexpected \"{c}\" character."));
814                                }
815                                if trimmed.eq_ignore_ascii_case("null") {
816                                    return Ok(None);
817                                }
818                                Cow::Borrowed(trimmed)
819                            };
820                        }
821                        (_, '{') => return Err("Unexpected \"{\" character.".to_owned()),
822                        (_, '"') => return Err("Unexpected array element.".to_owned()),
823                        _ => {}
824                    }
825                };
826                Ok(Some(
827                    ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())?,
828                ))
829            }
830
831            /// Parse a double quoted non-array value.
832            fn parse_quoted(&mut self) -> Result<ScalarImpl, String> {
833                assert!(self.try_consume('"'));
834                // peek until the next unescaped '"'
835                let mut chars = self.input.char_indices();
836                let mut has_escape = false;
837                let s = loop {
838                    match chars.next().ok_or_else(Self::eoi)? {
839                        (_, '\\') => {
840                            has_escape = true;
841                            chars.next().ok_or_else(Self::eoi)?;
842                        }
843                        (i, '"') => {
844                            let s = &self.input[..i];
845                            self.input = &self.input[i + 1..];
846                            break if has_escape {
847                                Cow::Owned(Self::unescape(s))
848                            } else {
849                                Cow::Borrowed(s)
850                            };
851                        }
852                        _ => {}
853                    }
854                };
855                ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())
856            }
857
858            /// Unescape a string.
859            fn unescape(s: &str) -> String {
860                let mut unescaped = String::with_capacity(s.len());
861                let mut chars = s.chars();
862                while let Some(mut c) = chars.next() {
863                    if c == '\\' {
864                        c = chars.next().unwrap();
865                    }
866                    unescaped.push(c);
867                }
868                unescaped
869            }
870
871            /// Unescape a string and trim the trailing whitespaces.
872            ///
873            /// Example: `"\  " -> " "`
874            fn unescape_trim_end(s: &str) -> String {
875                let mut unescaped = String::with_capacity(s.len());
876                let mut chars = s.chars();
877                let mut len_after_last_escaped_char = 0;
878                while let Some(mut c) = chars.next() {
879                    if c == '\\' {
880                        c = chars.next().unwrap();
881                        unescaped.push(c);
882                        len_after_last_escaped_char = unescaped.len();
883                    } else {
884                        unescaped.push(c);
885                    }
886                }
887                let l = unescaped[len_after_last_escaped_char..].trim_end().len();
888                unescaped.truncate(len_after_last_escaped_char + l);
889                unescaped
890            }
891
892            /// Consume the next 4 characters if it matches "null".
893            ///
894            /// Note: We don't use this function when parsing non-array values.
895            ///       Because we can't decide whether it is a null value or a string starts with "null".
896            ///       Consider this case: `{null value}` => `["null value"]`
897            fn try_parse_null(&mut self) -> bool {
898                if let Some(s) = self.input.get(..4)
899                    && s.eq_ignore_ascii_case("null")
900                {
901                    let next_char = self.input[4..].chars().next();
902                    match next_char {
903                        None | Some(',' | '}') => {}
904                        Some(c) if c.is_ascii_whitespace() => {}
905                        // following normal characters
906                        _ => return false,
907                    }
908                    self.input = &self.input[4..];
909                    true
910                } else {
911                    false
912                }
913            }
914
915            /// Consume the next character if it matches `c`.
916            fn try_consume(&mut self, c: char) -> bool {
917                if self.peek() == Some(c) {
918                    self.input = &self.input[c.len_utf8()..];
919                    true
920                } else {
921                    false
922                }
923            }
924
925            /// Expect end of input.
926            fn expect_end(&mut self) -> Result<(), String> {
927                self.skip_whitespace();
928                match self.peek() {
929                    Some(_) => Err("Junk after closing right brace.".to_owned()),
930                    None => Ok(()),
931                }
932            }
933
934            /// Skip whitespaces.
935            fn skip_whitespace(&mut self) {
936                self.input = match self
937                    .input
938                    .char_indices()
939                    .find(|(_, c)| !c.is_ascii_whitespace())
940                {
941                    Some((i, _)) => &self.input[i..],
942                    None => "",
943                };
944            }
945
946            /// Peek the next character.
947            fn peek(&self) -> Option<char> {
948                self.input.chars().next()
949            }
950
951            /// Return the error message for unexpected end of input.
952            fn eoi() -> String {
953                "Unexpected end of input.".into()
954            }
955        }
956
957        let mut parser = Parser { input, data_type };
958        let array = parser.parse_array()?;
959        parser.expect_end()?;
960        Ok(array)
961    }
962}
963
964pub struct ListWriter<'a> {
965    builder: &'a mut ListArrayBuilder,
966}
967
968impl<'a> ListWriter<'a> {
969    pub fn new(builder: &'a mut ListArrayBuilder) -> Self {
970        Self { builder }
971    }
972
973    /// `finish` will be called when the entire record is successfully written.
974    /// The partial data was committed and the `builder` can no longer be used.
975    pub fn finish(self) {
976        self.builder.offsets.push(
977            self.builder
978                .value
979                .len()
980                .try_into()
981                .expect("offset overflow"),
982        );
983        self.builder.bitmap.append(true);
984        let _ = ManuallyDrop::new(self); // prevent drop
985    }
986
987    /// `rollback` will be called while the entire record is abandoned.
988    /// The partial data was cleaned and the `builder` can be safely used.
989    pub fn rollback(self) {
990        // just drop self, the drop impl will rollback the partial data
991    }
992}
993
994impl Drop for ListWriter<'_> {
995    /// If the writer is dropped without calling `finish` or `rollback`,
996    /// we rollback the partial data by default.
997    fn drop(&mut self) {
998        let last = *self.builder.offsets.last().unwrap() as usize;
999        let cur = self.builder.value.len();
1000        for _ in last..cur {
1001            self.builder.value.pop().unwrap();
1002        }
1003    }
1004}
1005
1006pub trait ListWrite {
1007    fn write(&mut self, value: impl ToDatumRef);
1008
1009    fn write_iter(&mut self, values: impl IntoIterator<Item = impl ToDatumRef>) {
1010        for v in values {
1011            self.write(v);
1012        }
1013    }
1014}
1015
1016impl<'a> ListWrite for ListWriter<'a> {
1017    fn write(&mut self, value: impl ToDatumRef) {
1018        self.builder.value.append(value);
1019    }
1020}
1021
1022impl ListWrite for ArrayBuilderImpl {
1023    fn write(&mut self, value: impl ToDatumRef) {
1024        self.append(value);
1025    }
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030    use more_asserts::{assert_gt, assert_lt};
1031
1032    use super::*;
1033
1034    #[test]
1035    fn test_protobuf() {
1036        use crate::array::*;
1037        let array = ListArray::from_iter([
1038            Some(vec![12i32, -7, 25]),
1039            None,
1040            Some(vec![0, -127, 127, 50]),
1041            Some(vec![]),
1042        ]);
1043        let actual = ListArray::from_protobuf(&array.to_protobuf()).unwrap();
1044        assert_eq!(actual, ArrayImpl::List(array));
1045    }
1046
1047    #[test]
1048    fn test_append_array() {
1049        let part1 = ListArray::from_iter([Some([12i32, -7, 25]), None]);
1050        let part2 = ListArray::from_iter([Some(vec![0, -127, 127, 50]), Some(vec![])]);
1051
1052        let mut builder = ListArrayBuilder::with_type(4, DataType::Int32.list());
1053        builder.append_array(&part1);
1054        builder.append_array(&part2);
1055
1056        let expected = ListArray::from_iter([
1057            Some(vec![12i32, -7, 25]),
1058            None,
1059            Some(vec![0, -127, 127, 50]),
1060            Some(vec![]),
1061        ]);
1062        assert_eq!(builder.finish(), expected);
1063    }
1064
1065    // Ensure `create_builder` exactly copies the same metadata.
1066    #[test]
1067    fn test_list_create_builder() {
1068        use crate::array::*;
1069        let arr = ListArray::from_iter([Some([F32::from(2.0), F32::from(42.0), F32::from(1.0)])]);
1070        let arr2 = arr.create_builder(0).finish();
1071        assert_eq!(arr.data_type(), arr2.data_type());
1072    }
1073
1074    #[test]
1075    fn test_builder_pop() {
1076        use crate::array::*;
1077
1078        {
1079            let mut builder = ListArrayBuilder::with_type(1, DataType::Int32.list());
1080            let val = ListValue::from_iter([1i32, 2, 3]);
1081            builder.append(Some(val.as_scalar_ref()));
1082            assert!(builder.pop().is_some());
1083            assert!(builder.pop().is_none());
1084            let arr = builder.finish();
1085            assert!(arr.is_empty());
1086        }
1087
1088        {
1089            let data_type = DataType::Int32.list().list();
1090            let mut builder = ListArrayBuilder::with_type(2, data_type);
1091            let val1 = ListValue::from_iter([1, 2, 3]);
1092            let val2 = ListValue::from_iter([1, 2, 3]);
1093            let list1 = ListValue::from_iter([val1, val2]);
1094            builder.append(Some(list1.as_scalar_ref()));
1095
1096            let val3 = ListValue::from_iter([1, 2, 3]);
1097            let val4 = ListValue::from_iter([1, 2, 3]);
1098            let list2 = ListValue::from_iter([val3, val4]);
1099
1100            builder.append(Some(list2.as_scalar_ref()));
1101
1102            assert!(builder.pop().is_some());
1103
1104            let arr = builder.finish();
1105            assert_eq!(arr.len(), 1);
1106            assert_eq!(arr.value_at(0).unwrap(), list1.as_scalar_ref());
1107        }
1108    }
1109
1110    #[test]
1111    fn test_list_nested_layout() {
1112        use crate::array::*;
1113
1114        let listarray1 = ListArray::from_iter([Some([1i32, 2]), Some([3, 4])]);
1115        let listarray2 = ListArray::from_iter([Some(vec![5, 6, 7]), None, Some(vec![8])]);
1116        let listarray3 = ListArray::from_iter([Some([9, 10])]);
1117
1118        let nestarray = ListArray::from_iter(
1119            [listarray1, listarray2, listarray3]
1120                .into_iter()
1121                .map(|l| ListValue::new(l.into())),
1122        );
1123        let actual = ListArray::from_protobuf(&nestarray.to_protobuf()).unwrap();
1124        assert_eq!(ArrayImpl::List(nestarray), actual);
1125    }
1126
1127    #[test]
1128    fn test_list_value_cmp() {
1129        // ARRAY[1, 1] < ARRAY[1, 2, 1]
1130        assert_lt!(
1131            ListValue::from_iter([1, 1]),
1132            ListValue::from_iter([1, 2, 1]),
1133        );
1134        // ARRAY[1, 2] < ARRAY[1, 2, 1]
1135        assert_lt!(
1136            ListValue::from_iter([1, 2]),
1137            ListValue::from_iter([1, 2, 1]),
1138        );
1139        // ARRAY[1, 3] > ARRAY[1, 2, 1]
1140        assert_gt!(
1141            ListValue::from_iter([1, 3]),
1142            ListValue::from_iter([1, 2, 1]),
1143        );
1144        // null > 1
1145        assert_gt!(
1146            ListValue::from_iter([None::<i32>]),
1147            ListValue::from_iter([1]),
1148        );
1149        // ARRAY[1, 2, null] > ARRAY[1, 2, 1]
1150        assert_gt!(
1151            ListValue::from_iter([Some(1), Some(2), None]),
1152            ListValue::from_iter([Some(1), Some(2), Some(1)]),
1153        );
1154        // Null value in first ARRAY results into a Greater ordering regardless of the smaller ARRAY
1155        // length. ARRAY[1, null] > ARRAY[1, 2, 3]
1156        assert_gt!(
1157            ListValue::from_iter([Some(1), None]),
1158            ListValue::from_iter([Some(1), Some(2), Some(3)]),
1159        );
1160        // ARRAY[1, null] == ARRAY[1, null]
1161        assert_eq!(
1162            ListValue::from_iter([Some(1), None]),
1163            ListValue::from_iter([Some(1), None]),
1164        );
1165    }
1166
1167    #[test]
1168    fn test_list_ref_display() {
1169        let v = ListValue::from_iter([Some(1), None]);
1170        assert_eq!(v.to_string(), "{1,NULL}");
1171    }
1172
1173    #[test]
1174    fn test_serialize_deserialize() {
1175        let value = ListValue::from_iter([Some("abcd"), Some(""), None, Some("a")]);
1176        let list_ref = value.as_scalar_ref();
1177        let mut serializer = memcomparable::Serializer::new(vec![]);
1178        serializer.set_reverse(true);
1179        list_ref.memcmp_serialize(&mut serializer).unwrap();
1180        let buf = serializer.into_inner();
1181        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1182        deserializer.set_reverse(true);
1183        assert_eq!(
1184            ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1185                .unwrap(),
1186            value
1187        );
1188
1189        let mut builder = ListArrayBuilder::with_type(0, DataType::Varchar.list());
1190        builder.append(Some(list_ref));
1191        let array = builder.finish();
1192        let list_ref = array.value_at(0).unwrap();
1193        let mut serializer = memcomparable::Serializer::new(vec![]);
1194        list_ref.memcmp_serialize(&mut serializer).unwrap();
1195        let buf = serializer.into_inner();
1196        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1197        assert_eq!(
1198            ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1199                .unwrap(),
1200            value
1201        );
1202    }
1203
1204    #[test]
1205    fn test_memcomparable() {
1206        let cases = [
1207            (
1208                ListValue::from_iter([123, 456]),
1209                ListValue::from_iter([123, 789]),
1210            ),
1211            (
1212                ListValue::from_iter([123, 456]),
1213                ListValue::from_iter([123]),
1214            ),
1215            (
1216                ListValue::from_iter([None, Some("")]),
1217                ListValue::from_iter([None, None::<&str>]),
1218            ),
1219            (
1220                ListValue::from_iter([Some(2)]),
1221                ListValue::from_iter([Some(1), None, Some(3)]),
1222            ),
1223        ];
1224
1225        for (lhs, rhs) in cases {
1226            let lhs_serialized = {
1227                let mut serializer = memcomparable::Serializer::new(vec![]);
1228                lhs.as_scalar_ref()
1229                    .memcmp_serialize(&mut serializer)
1230                    .unwrap();
1231                serializer.into_inner()
1232            };
1233            let rhs_serialized = {
1234                let mut serializer = memcomparable::Serializer::new(vec![]);
1235                rhs.as_scalar_ref()
1236                    .memcmp_serialize(&mut serializer)
1237                    .unwrap();
1238                serializer.into_inner()
1239            };
1240            assert_eq!(lhs_serialized.cmp(&rhs_serialized), lhs.cmp(&rhs));
1241        }
1242    }
1243
1244    #[test]
1245    fn test_listref() {
1246        use crate::array::*;
1247        use crate::types;
1248
1249        let arr = ListArray::from_iter([Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6, 7])]);
1250
1251        // get 3rd ListRef from ListArray
1252        let list_ref = arr.value_at(2).unwrap();
1253        assert_eq!(list_ref, ListValue::from_iter([4, 5, 6, 7]).as_scalar_ref());
1254
1255        // Get 2nd value from ListRef
1256        let scalar = list_ref.get(1).unwrap();
1257        assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
1258    }
1259
1260    #[test]
1261    fn test_from_to_literal() {
1262        #[track_caller]
1263        fn test(typestr: &str, input: &str, output: Option<&str>) {
1264            let datatype: DataType = typestr.parse().unwrap();
1265            let list = ListValue::from_str(input, &datatype).unwrap();
1266            let actual = list.as_scalar_ref().to_text();
1267            let output = output.unwrap_or(input);
1268            assert_eq!(actual, output);
1269        }
1270
1271        #[track_caller]
1272        fn test_err(typestr: &str, input: &str, err: &str) {
1273            let datatype: DataType = typestr.parse().unwrap();
1274            let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
1275            assert_eq!(actual_err, err);
1276        }
1277
1278        test("varchar[]", "{}", None);
1279        test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
1280        test("varchar[]", "{🥵,🤡}", None);
1281        test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#));
1282        test("int[]", "{1,2,3}", None);
1283        test("varchar[]", r#"{"1,2"}"#, None);
1284        test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
1285        test("varchar[]", r#"{"\""}"#, None);
1286        test("varchar[]", r#"{\   }"#, Some(r#"{" "}"#));
1287        test("varchar[]", r#"{\\  }"#, Some(r#"{"\\"}"#));
1288        test("varchar[]", "{nulla}", None);
1289        test("varchar[]", "{null a}", Some(r#"{"null a"}"#));
1290        test(
1291            "varchar[]",
1292            r#"{"null", "NULL", null, NuLL}"#,
1293            Some(r#"{"null","NULL",NULL,NULL}"#),
1294        );
1295        test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}"));
1296        test(
1297            "varchar[][][]",
1298            "{{{1, 2, 3}}, {{4, 5, 6}}}",
1299            Some("{{{1,2,3}},{{4,5,6}}}"),
1300        );
1301        test_err("varchar[]", "()", r#"Array value must start with "{""#);
1302        test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
1303        test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
1304        test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#);
1305        test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#);
1306        test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#);
1307    }
1308}