risingwave_common/array/
list_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Cow;
16use std::cmp::Ordering;
17use std::fmt::{self, Debug, Display};
18use std::future::Future;
19use std::mem::size_of;
20
21use bytes::{Buf, BufMut};
22use itertools::Itertools;
23use risingwave_common_estimate_size::EstimateSize;
24use risingwave_pb::data::{ListArrayData, PbArray, PbArrayType};
25use serde::{Deserialize, Serializer};
26use thiserror_ext::AsReport;
27
28use super::{
29    Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayResult, BoolArray, PrimitiveArray,
30    PrimitiveArrayItemType, RowRef, Utf8Array,
31};
32use crate::bitmap::{Bitmap, BitmapBuilder};
33use crate::row::Row;
34use crate::types::{
35    DataType, Datum, DatumRef, DefaultOrd, ListType, Scalar, ScalarImpl, ScalarRef, ScalarRefImpl,
36    ToDatumRef, ToText, hash_datum,
37};
38use crate::util::memcmp_encoding;
39use crate::util::value_encoding::estimate_serialize_datum_size;
40
41#[derive(Debug, Clone, EstimateSize)]
42pub struct ListArrayBuilder {
43    bitmap: BitmapBuilder,
44    offsets: Vec<u32>,
45    value: Box<ArrayBuilderImpl>,
46}
47
48impl ArrayBuilder for ListArrayBuilder {
49    type ArrayType = ListArray;
50
51    #[cfg(not(test))]
52    fn new(_capacity: usize) -> Self {
53        panic!("please use `ListArrayBuilder::with_type` instead");
54    }
55
56    #[cfg(test)]
57    fn new(capacity: usize) -> Self {
58        // TODO: deprecate this
59        Self::with_type(
60            capacity,
61            // Default datatype
62            DataType::Int16.list(),
63        )
64    }
65
66    fn with_type(capacity: usize, ty: DataType) -> Self {
67        let DataType::List(list_ty) = ty else {
68            panic!("data type must be DataType::List");
69        };
70        let mut offsets = Vec::with_capacity(capacity + 1);
71        offsets.push(0);
72        Self {
73            bitmap: BitmapBuilder::with_capacity(capacity),
74            offsets,
75            value: Box::new(list_ty.elem().create_array_builder(capacity)),
76        }
77    }
78
79    fn append_n(&mut self, n: usize, value: Option<ListRef<'_>>) {
80        match value {
81            None => {
82                self.bitmap.append_n(n, false);
83                let last = *self.offsets.last().unwrap();
84                for _ in 0..n {
85                    self.offsets.push(last);
86                }
87            }
88            Some(v) => {
89                self.bitmap.append_n(n, true);
90                for _ in 0..n {
91                    let last = *self.offsets.last().unwrap();
92                    let elems = v.iter();
93                    self.offsets.push(
94                        last.checked_add(elems.len() as u32)
95                            .expect("offset overflow"),
96                    );
97                    for elem in elems {
98                        self.value.append(elem);
99                    }
100                }
101            }
102        }
103    }
104
105    fn append_array(&mut self, other: &ListArray) {
106        self.bitmap.append_bitmap(&other.bitmap);
107        let last = *self.offsets.last().unwrap();
108        self.offsets
109            .append(&mut other.offsets[1..].iter().map(|o| *o + last).collect());
110        self.value.append_array(&other.value);
111    }
112
113    fn pop(&mut self) -> Option<()> {
114        self.bitmap.pop()?;
115        let start = self.offsets.pop().unwrap();
116        let end = *self.offsets.last().unwrap();
117        for _ in end..start {
118            self.value.pop().unwrap();
119        }
120        Some(())
121    }
122
123    fn len(&self) -> usize {
124        self.bitmap.len()
125    }
126
127    fn finish(self) -> ListArray {
128        ListArray {
129            bitmap: self.bitmap.finish(),
130            offsets: self.offsets.into(),
131            value: Box::new(self.value.finish()),
132        }
133    }
134}
135
136impl ListArrayBuilder {
137    pub fn append_row_ref(&mut self, row: RowRef<'_>) {
138        self.bitmap.append(true);
139        let last = *self.offsets.last().unwrap();
140        self.offsets
141            .push(last.checked_add(row.len() as u32).expect("offset overflow"));
142        for v in row.iter() {
143            self.value.append(v);
144        }
145    }
146}
147
148/// Each item of this `ListArray` is a `List<T>`, or called `T[]` (T array).
149///
150/// * As other arrays, there is a null bitmap, with `1` meaning nonnull and `0` meaning null.
151/// * As [`super::BytesArray`], there is an offsets `Vec` and a value `Array`. The value `Array` has
152///   all items concatenated, and the offsets `Vec` stores start and end indices into it for
153///   slicing. Effectively, the inner array is the flattened form, and `offsets.len() == n + 1`.
154///
155/// For example, `values (array[1]), (array[]::int[]), (null), (array[2, 3]);` stores an inner
156///  `I32Array` with `[1, 2, 3]`, along with offsets `[0, 1, 1, 1, 3]` and null bitmap `TTFT`.
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct ListArray {
159    pub(super) bitmap: Bitmap,
160    pub(super) offsets: Box<[u32]>,
161    pub(super) value: Box<ArrayImpl>,
162}
163
164impl EstimateSize for ListArray {
165    fn estimated_heap_size(&self) -> usize {
166        self.bitmap.estimated_heap_size()
167            + self.offsets.len() * size_of::<u32>()
168            + self.value.estimated_size()
169    }
170}
171
172impl Array for ListArray {
173    type Builder = ListArrayBuilder;
174    type OwnedItem = ListValue;
175    type RefItem<'a> = ListRef<'a>;
176
177    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
178        unsafe {
179            ListRef {
180                array: &self.value,
181                start: *self.offsets.get_unchecked(idx),
182                end: *self.offsets.get_unchecked(idx + 1),
183            }
184        }
185    }
186
187    fn len(&self) -> usize {
188        self.bitmap.len()
189    }
190
191    fn to_protobuf(&self) -> PbArray {
192        let value = self.value.to_protobuf();
193        PbArray {
194            array_type: PbArrayType::List as i32,
195            struct_array_data: None,
196            list_array_data: Some(Box::new(ListArrayData {
197                offsets: self.offsets.to_vec(),
198                value: Some(Box::new(value)),
199                value_type: Some(self.value.data_type().to_protobuf()),
200                elem_size: None,
201            })),
202            null_bitmap: Some(self.bitmap.to_protobuf()),
203            values: vec![],
204        }
205    }
206
207    fn null_bitmap(&self) -> &Bitmap {
208        &self.bitmap
209    }
210
211    fn into_null_bitmap(self) -> Bitmap {
212        self.bitmap
213    }
214
215    fn set_bitmap(&mut self, bitmap: Bitmap) {
216        self.bitmap = bitmap;
217    }
218
219    fn data_type(&self) -> DataType {
220        DataType::list(self.value.data_type())
221    }
222}
223
224impl ListArray {
225    /// Flatten the list array into a single array.
226    ///
227    /// # Example
228    ///
229    /// ```text
230    /// [[1,2,3],NULL,[4,5]] => [1,2,3,4,5]
231    /// [[[1],[2]],[[3],[4]]] => [1,2,3,4]
232    /// ```
233    pub fn flatten(&self) -> ArrayImpl {
234        match &*self.value {
235            ArrayImpl::List(inner) => inner.flatten(),
236            a => a.clone(),
237        }
238    }
239
240    /// Return the inner array of the list array.
241    pub fn values(&self) -> &ArrayImpl {
242        &self.value
243    }
244
245    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
246        ensure!(
247            array.values.is_empty(),
248            "Must have no buffer in a list array"
249        );
250        debug_assert!(
251            (array.array_type == PbArrayType::List as i32)
252                || (array.array_type == PbArrayType::Map as i32),
253            "invalid array type for list: {}",
254            array.array_type
255        );
256        let bitmap: Bitmap = array.get_null_bitmap()?.into();
257        let array_data = array.get_list_array_data()?.to_owned();
258        let flatten_len = match array_data.offsets.last() {
259            Some(&n) => n as usize,
260            None => bail!("Must have at least one element in offsets"),
261        };
262        let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?;
263        let arr = ListArray {
264            bitmap,
265            offsets: array_data.offsets.into(),
266            value: Box::new(value),
267        };
268        Ok(arr.into())
269    }
270
271    /// Apply the function on the underlying elements.
272    /// e.g. `map_inner([[1,2,3],NULL,[4,5]], DOUBLE) = [[2,4,6],NULL,[8,10]]`
273    pub async fn map_inner<E, Fut, F>(self, f: F) -> std::result::Result<ListArray, E>
274    where
275        F: FnOnce(ArrayImpl) -> Fut,
276        Fut: Future<Output = std::result::Result<ArrayImpl, E>>,
277    {
278        let new_value = (f)(*self.value).await?;
279
280        Ok(Self {
281            offsets: self.offsets,
282            bitmap: self.bitmap,
283            value: Box::new(new_value),
284        })
285    }
286
287    /// Returns the offsets of this list.
288    ///
289    /// # Example
290    /// ```text
291    /// list    = [[a, b, c], [], NULL, [d], [NULL, f]]
292    /// offsets = [0, 3, 3, 3, 4, 6]
293    /// ```
294    pub fn offsets(&self) -> &[u32] {
295        &self.offsets
296    }
297}
298
299impl<T, L> FromIterator<Option<L>> for ListArray
300where
301    T: PrimitiveArrayItemType,
302    L: IntoIterator<Item = T>,
303{
304    fn from_iter<I: IntoIterator<Item = Option<L>>>(iter: I) -> Self {
305        let iter = iter.into_iter();
306        let mut builder =
307            ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(T::DATA_TYPE.clone()));
308        for v in iter {
309            match v {
310                None => builder.append(None),
311                Some(v) => {
312                    builder.append(Some(v.into_iter().collect::<ListValue>().as_scalar_ref()))
313                }
314            }
315        }
316        builder.finish()
317    }
318}
319
320impl FromIterator<ListValue> for ListArray {
321    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
322        let mut iter = iter.into_iter();
323        let first = iter.next().expect("empty iterator");
324        let mut builder =
325            ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(first.elem_type()));
326        builder.append(Some(first.as_scalar_ref()));
327        for v in iter {
328            builder.append(Some(v.as_scalar_ref()));
329        }
330        builder.finish()
331    }
332}
333
334#[derive(Clone, PartialEq, Eq, EstimateSize)]
335pub struct ListValue {
336    values: Box<ArrayImpl>,
337}
338
339impl Debug for ListValue {
340    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
341        self.as_scalar_ref().fmt(f)
342    }
343}
344
345impl Display for ListValue {
346    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
347        self.as_scalar_ref().write(f)
348    }
349}
350
351impl ListValue {
352    pub fn new(values: ArrayImpl) -> Self {
353        Self {
354            values: Box::new(values),
355        }
356    }
357
358    /// Convert this list into an [`Array`] of the element type.
359    pub fn into_array(self) -> ArrayImpl {
360        *self.values
361    }
362
363    /// Creates a new empty `ListValue` with the given element type.
364    pub fn empty(elem_type: &DataType) -> Self {
365        Self::new(elem_type.create_array_builder(0).finish())
366    }
367
368    /// Creates a new `ListValue` from an iterator of elements with the given element type.
369    pub fn from_datum_iter<T: ToDatumRef>(
370        elem_type: &DataType,
371        iter: impl IntoIterator<Item = T>,
372    ) -> Self {
373        let iter = iter.into_iter();
374        let mut builder = elem_type.create_array_builder(iter.size_hint().0);
375        for datum in iter {
376            builder.append(datum);
377        }
378        Self::new(builder.finish())
379    }
380
381    /// Returns the length of the list.
382    pub fn len(&self) -> usize {
383        self.values.len()
384    }
385
386    /// Returns `true` if the list has a length of 0.
387    pub fn is_empty(&self) -> bool {
388        self.values.is_empty()
389    }
390
391    /// Iterates over the elements of the list.
392    pub fn iter(&self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'_>> {
393        self.values.iter()
394    }
395
396    /// Get the element at the given index. Returns `None` if the index is out of bounds.
397    pub fn get(&self, index: usize) -> Option<DatumRef<'_>> {
398        if index < self.len() {
399            Some(self.values.value_at(index))
400        } else {
401            None
402        }
403    }
404
405    /// Returns the data type of the elements in the list.
406    pub fn elem_type(&self) -> DataType {
407        self.values.data_type()
408    }
409
410    pub fn memcmp_deserialize(
411        list_type: &ListType,
412        deserializer: &mut memcomparable::Deserializer<impl Buf>,
413    ) -> memcomparable::Result<Self> {
414        let elem_type = list_type.elem();
415
416        let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
417        let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
418        let mut builder = elem_type.create_array_builder(0);
419        while inner_deserializer.has_remaining() {
420            builder.append(memcmp_encoding::deserialize_datum_in_composite(
421                elem_type,
422                &mut inner_deserializer,
423            )?)
424        }
425        Ok(Self::new(builder.finish()))
426    }
427
428    // Used to display ListValue in explain for better readibilty.
429    pub fn display_for_explain(&self) -> String {
430        // Example of ListValue display: ARRAY[1, 2, null]
431        format!(
432            "ARRAY[{}]",
433            self.iter()
434                .map(|v| {
435                    match v.as_ref() {
436                        None => "null".into(),
437                        Some(scalar) => scalar.to_text(),
438                    }
439                })
440                .format(", ")
441        )
442    }
443
444    /// Returns a mutable slice if the list is of type `int64[]`.
445    pub fn as_i64_mut_slice(&mut self) -> Option<&mut [i64]> {
446        match self.values.as_mut() {
447            ArrayImpl::Int64(array) => Some(array.as_mut_slice()),
448            _ => None,
449        }
450    }
451}
452
453impl PartialOrd for ListValue {
454    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
455        Some(self.cmp(other))
456    }
457}
458
459impl Ord for ListValue {
460    fn cmp(&self, other: &Self) -> Ordering {
461        self.as_scalar_ref().cmp(&other.as_scalar_ref())
462    }
463}
464
465// Construction helpers:
466
467// [Some(1), None].collect()
468impl<T: PrimitiveArrayItemType> FromIterator<Option<T>> for ListValue {
469    fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
470        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
471    }
472}
473// [1, 2].collect()
474impl<T: PrimitiveArrayItemType> FromIterator<T> for ListValue {
475    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
476        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
477    }
478}
479// [true, false].collect()
480impl FromIterator<bool> for ListValue {
481    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
482        Self::new(iter.into_iter().collect::<BoolArray>().into())
483    }
484}
485// [Some("hello"), None].collect()
486impl<'a> FromIterator<Option<&'a str>> for ListValue {
487    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
488        Self::new(iter.into_iter().collect::<Utf8Array>().into())
489    }
490}
491// ["hello", "world"].collect()
492impl<'a> FromIterator<&'a str> for ListValue {
493    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
494        Self::new(iter.into_iter().collect::<Utf8Array>().into())
495    }
496}
497// nested: [ListValue::from_iter([1,2,3]), ListValue::from_iter([4,5])].collect()
498impl FromIterator<ListValue> for ListValue {
499    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
500        Self::new(iter.into_iter().collect::<ListArray>().into())
501    }
502}
503
504/// A slice of an array
505#[derive(Copy, Clone)]
506pub struct ListRef<'a> {
507    array: &'a ArrayImpl,
508    start: u32,
509    end: u32,
510}
511
512impl<'a> ListRef<'a> {
513    /// Returns the length of the list.
514    pub fn len(&self) -> usize {
515        (self.end - self.start) as usize
516    }
517
518    /// Returns `true` if the list has a length of 0.
519    pub fn is_empty(&self) -> bool {
520        self.start == self.end
521    }
522
523    /// Returns the data type of the elements in the list.
524    pub fn elem_type(&self) -> DataType {
525        self.array.data_type()
526    }
527
528    /// Returns the elements in the flattened list.
529    pub fn flatten(self) -> ListRef<'a> {
530        match self.array {
531            ArrayImpl::List(inner) => ListRef {
532                array: &inner.value,
533                start: inner.offsets[self.start as usize],
534                end: inner.offsets[self.end as usize],
535            }
536            .flatten(),
537            _ => self,
538        }
539    }
540
541    /// Iterates over the elements of the list.
542    pub fn iter(self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'a>> + 'a {
543        (self.start..self.end).map(|i| self.array.value_at(i as usize))
544    }
545
546    /// Get the element at the given index. Returns `None` if the index is out of bounds.
547    pub fn get(self, index: usize) -> Option<DatumRef<'a>> {
548        if index < self.len() {
549            Some(self.array.value_at(self.start as usize + index))
550        } else {
551            None
552        }
553    }
554
555    pub fn memcmp_serialize(
556        self,
557        serializer: &mut memcomparable::Serializer<impl BufMut>,
558    ) -> memcomparable::Result<()> {
559        let mut inner_serializer = memcomparable::Serializer::new(vec![]);
560        for datum_ref in self.iter() {
561            memcmp_encoding::serialize_datum_in_composite(datum_ref, &mut inner_serializer)?
562        }
563        serializer.serialize_bytes(&inner_serializer.into_inner())
564    }
565
566    pub fn hash_scalar_inner<H: std::hash::Hasher>(self, state: &mut H) {
567        for datum_ref in self.iter() {
568            hash_datum(datum_ref, state);
569        }
570    }
571
572    /// estimate the serialized size with value encoding
573    pub fn estimate_serialize_size_inner(self) -> usize {
574        self.iter().map(estimate_serialize_datum_size).sum()
575    }
576
577    pub fn as_primitive_slice<T: PrimitiveArrayItemType>(self) -> Option<&'a [T]> {
578        T::try_into_array_ref(self.array)
579            .map(|prim_arr| &prim_arr.as_slice()[self.start as usize..self.end as usize])
580    }
581
582    /// Returns a slice if the list is of type `int64[]`.
583    pub fn as_i64_slice(&self) -> Option<&[i64]> {
584        self.as_primitive_slice()
585    }
586
587    /// # Panics
588    /// Panics if the list is not a map's internal representation (See [`super::MapArray`]).
589    pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
590        let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
591        (
592            ListRef {
593                array: k,
594                start: self.start,
595                end: self.end,
596            },
597            ListRef {
598                array: v,
599                start: self.start,
600                end: self.end,
601            },
602        )
603    }
604}
605
606impl PartialEq for ListRef<'_> {
607    fn eq(&self, other: &Self) -> bool {
608        self.iter().eq(other.iter())
609    }
610}
611
612impl Eq for ListRef<'_> {}
613
614impl PartialOrd for ListRef<'_> {
615    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
616        Some(self.cmp(other))
617    }
618}
619
620impl Ord for ListRef<'_> {
621    fn cmp(&self, other: &Self) -> Ordering {
622        self.iter().cmp_by(other.iter(), |a, b| a.default_cmp(&b))
623    }
624}
625
626impl Debug for ListRef<'_> {
627    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
628        f.debug_list().entries(self.iter()).finish()
629    }
630}
631
632impl Row for ListRef<'_> {
633    fn datum_at(&self, index: usize) -> DatumRef<'_> {
634        self.array.value_at(self.start as usize + index)
635    }
636
637    unsafe fn datum_at_unchecked(&self, index: usize) -> DatumRef<'_> {
638        unsafe { self.array.value_at_unchecked(self.start as usize + index) }
639    }
640
641    fn len(&self) -> usize {
642        self.len()
643    }
644
645    fn iter(&self) -> impl Iterator<Item = DatumRef<'_>> {
646        (*self).iter()
647    }
648}
649
650impl ToText for ListRef<'_> {
651    // This function will be invoked when pgwire prints a list value in string.
652    // Refer to PostgreSQL `array_out` or `appendPGArray`.
653    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
654        write!(
655            f,
656            "{{{}}}",
657            self.iter().format_with(",", |datum_ref, f| {
658                let s = datum_ref.to_text();
659                // Never quote null or inner list, but quote empty, verbatim 'null', special
660                // chars and whitespaces.
661                let need_quote = !matches!(datum_ref, None | Some(ScalarRefImpl::List(_)))
662                    && (s.is_empty()
663                        || s.eq_ignore_ascii_case("null")
664                        || s.contains([
665                            '"', '\\', ',',
666                            // whilespace:
667                            // PostgreSQL `array_isspace` includes '\x0B' but rust
668                            // [`char::is_ascii_whitespace`] does not.
669                            ' ', '\t', '\n', '\r', '\x0B', '\x0C', // list-specific:
670                            '{', '}',
671                        ]));
672                if need_quote {
673                    f(&"\"")?;
674                    s.chars().try_for_each(|c| {
675                        if c == '"' || c == '\\' {
676                            f(&"\\")?;
677                        }
678                        f(&c)
679                    })?;
680                    f(&"\"")
681                } else {
682                    f(&s)
683                }
684            })
685        )
686    }
687
688    fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
689        match ty {
690            DataType::List { .. } => self.write(f),
691            _ => unreachable!(),
692        }
693    }
694}
695
696/// Implement `Scalar` for `ListValue`.
697impl Scalar for ListValue {
698    type ScalarRefType<'a> = ListRef<'a>;
699
700    fn as_scalar_ref(&self) -> ListRef<'_> {
701        ListRef {
702            array: &self.values,
703            start: 0,
704            end: self.len() as u32,
705        }
706    }
707}
708
709/// Implement `Scalar` for `ListValue`.
710impl<'a> ScalarRef<'a> for ListRef<'a> {
711    type ScalarType = ListValue;
712
713    fn to_owned_scalar(&self) -> ListValue {
714        let mut builder = self.array.create_builder(self.len());
715        for datum_ref in self.iter() {
716            builder.append(datum_ref);
717        }
718        ListValue::new(builder.finish())
719    }
720
721    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
722        self.hash_scalar_inner(state)
723    }
724}
725
726impl ListValue {
727    /// Construct an array from literal string and the data type of the list.
728    pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
729        struct Parser<'a> {
730            input: &'a str,
731            data_type: &'a DataType,
732        }
733
734        impl Parser<'_> {
735            /// Parse a datum.
736            fn parse(&mut self) -> Result<Datum, String> {
737                self.skip_whitespace();
738                if self.data_type.is_array() {
739                    if self.try_parse_null() {
740                        return Ok(None);
741                    }
742                    Ok(Some(self.parse_array()?.into()))
743                } else {
744                    self.parse_value()
745                }
746            }
747
748            /// Parse an array.
749            fn parse_array(&mut self) -> Result<ListValue, String> {
750                self.skip_whitespace();
751                if !self.try_consume('{') {
752                    return Err("Array value must start with \"{\"".to_owned());
753                }
754                self.skip_whitespace();
755                if self.try_consume('}') {
756                    return Ok(ListValue::empty(self.data_type.as_list_elem()));
757                }
758                let mut builder =
759                    ArrayBuilderImpl::with_type(0, self.data_type.as_list_elem().clone());
760                loop {
761                    let mut parser = Self {
762                        input: self.input,
763                        data_type: self.data_type.as_list_elem(),
764                    };
765                    builder.append(parser.parse()?);
766                    self.input = parser.input;
767
768                    // expect ',' or '}'
769                    self.skip_whitespace();
770                    match self.peek() {
771                        Some(',') => {
772                            self.try_consume(',');
773                        }
774                        Some('}') => {
775                            self.try_consume('}');
776                            break;
777                        }
778                        None => return Err(Self::eoi()),
779                        _ => return Err("Unexpected array element.".to_owned()),
780                    }
781                }
782                Ok(ListValue::new(builder.finish()))
783            }
784
785            /// Parse a non-array value.
786            fn parse_value(&mut self) -> Result<Datum, String> {
787                if self.peek() == Some('"') {
788                    return Ok(Some(self.parse_quoted()?));
789                }
790                // peek until the next unescaped ',' or '}'
791                let mut chars = self.input.char_indices();
792                let mut has_escape = false;
793                let s = loop {
794                    match chars.next().ok_or_else(Self::eoi)? {
795                        (_, '\\') => {
796                            has_escape = true;
797                            chars.next().ok_or_else(Self::eoi)?;
798                        }
799                        (i, c @ ',' | c @ '}') => {
800                            let s = &self.input[..i];
801                            // consume the value and leave the ',' or '}' for parent
802                            self.input = &self.input[i..];
803
804                            break if has_escape {
805                                Cow::Owned(Self::unescape_trim_end(s))
806                            } else {
807                                let trimmed = s.trim_end();
808                                if trimmed.is_empty() {
809                                    return Err(format!("Unexpected \"{c}\" character."));
810                                }
811                                if trimmed.eq_ignore_ascii_case("null") {
812                                    return Ok(None);
813                                }
814                                Cow::Borrowed(trimmed)
815                            };
816                        }
817                        (_, '{') => return Err("Unexpected \"{\" character.".to_owned()),
818                        (_, '"') => return Err("Unexpected array element.".to_owned()),
819                        _ => {}
820                    }
821                };
822                Ok(Some(
823                    ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())?,
824                ))
825            }
826
827            /// Parse a double quoted non-array value.
828            fn parse_quoted(&mut self) -> Result<ScalarImpl, String> {
829                assert!(self.try_consume('"'));
830                // peek until the next unescaped '"'
831                let mut chars = self.input.char_indices();
832                let mut has_escape = false;
833                let s = loop {
834                    match chars.next().ok_or_else(Self::eoi)? {
835                        (_, '\\') => {
836                            has_escape = true;
837                            chars.next().ok_or_else(Self::eoi)?;
838                        }
839                        (i, '"') => {
840                            let s = &self.input[..i];
841                            self.input = &self.input[i + 1..];
842                            break if has_escape {
843                                Cow::Owned(Self::unescape(s))
844                            } else {
845                                Cow::Borrowed(s)
846                            };
847                        }
848                        _ => {}
849                    }
850                };
851                ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())
852            }
853
854            /// Unescape a string.
855            fn unescape(s: &str) -> String {
856                let mut unescaped = String::with_capacity(s.len());
857                let mut chars = s.chars();
858                while let Some(mut c) = chars.next() {
859                    if c == '\\' {
860                        c = chars.next().unwrap();
861                    }
862                    unescaped.push(c);
863                }
864                unescaped
865            }
866
867            /// Unescape a string and trim the trailing whitespaces.
868            ///
869            /// Example: `"\  " -> " "`
870            fn unescape_trim_end(s: &str) -> String {
871                let mut unescaped = String::with_capacity(s.len());
872                let mut chars = s.chars();
873                let mut len_after_last_escaped_char = 0;
874                while let Some(mut c) = chars.next() {
875                    if c == '\\' {
876                        c = chars.next().unwrap();
877                        unescaped.push(c);
878                        len_after_last_escaped_char = unescaped.len();
879                    } else {
880                        unescaped.push(c);
881                    }
882                }
883                let l = unescaped[len_after_last_escaped_char..].trim_end().len();
884                unescaped.truncate(len_after_last_escaped_char + l);
885                unescaped
886            }
887
888            /// Consume the next 4 characters if it matches "null".
889            ///
890            /// Note: We don't use this function when parsing non-array values.
891            ///       Because we can't decide whether it is a null value or a string starts with "null".
892            ///       Consider this case: `{null value}` => `["null value"]`
893            fn try_parse_null(&mut self) -> bool {
894                if let Some(s) = self.input.get(..4)
895                    && s.eq_ignore_ascii_case("null")
896                {
897                    let next_char = self.input[4..].chars().next();
898                    match next_char {
899                        None | Some(',' | '}') => {}
900                        Some(c) if c.is_ascii_whitespace() => {}
901                        // following normal characters
902                        _ => return false,
903                    }
904                    self.input = &self.input[4..];
905                    true
906                } else {
907                    false
908                }
909            }
910
911            /// Consume the next character if it matches `c`.
912            fn try_consume(&mut self, c: char) -> bool {
913                if self.peek() == Some(c) {
914                    self.input = &self.input[c.len_utf8()..];
915                    true
916                } else {
917                    false
918                }
919            }
920
921            /// Expect end of input.
922            fn expect_end(&mut self) -> Result<(), String> {
923                self.skip_whitespace();
924                match self.peek() {
925                    Some(_) => Err("Junk after closing right brace.".to_owned()),
926                    None => Ok(()),
927                }
928            }
929
930            /// Skip whitespaces.
931            fn skip_whitespace(&mut self) {
932                self.input = match self
933                    .input
934                    .char_indices()
935                    .find(|(_, c)| !c.is_ascii_whitespace())
936                {
937                    Some((i, _)) => &self.input[i..],
938                    None => "",
939                };
940            }
941
942            /// Peek the next character.
943            fn peek(&self) -> Option<char> {
944                self.input.chars().next()
945            }
946
947            /// Return the error message for unexpected end of input.
948            fn eoi() -> String {
949                "Unexpected end of input.".into()
950            }
951        }
952
953        let mut parser = Parser { input, data_type };
954        let array = parser.parse_array()?;
955        parser.expect_end()?;
956        Ok(array)
957    }
958}
959
960#[cfg(test)]
961mod tests {
962    use more_asserts::{assert_gt, assert_lt};
963
964    use super::*;
965
966    #[test]
967    fn test_protobuf() {
968        use crate::array::*;
969        let array = ListArray::from_iter([
970            Some(vec![12i32, -7, 25]),
971            None,
972            Some(vec![0, -127, 127, 50]),
973            Some(vec![]),
974        ]);
975        let actual = ListArray::from_protobuf(&array.to_protobuf()).unwrap();
976        assert_eq!(actual, ArrayImpl::List(array));
977    }
978
979    #[test]
980    fn test_append_array() {
981        let part1 = ListArray::from_iter([Some([12i32, -7, 25]), None]);
982        let part2 = ListArray::from_iter([Some(vec![0, -127, 127, 50]), Some(vec![])]);
983
984        let mut builder = ListArrayBuilder::with_type(4, DataType::Int32.list());
985        builder.append_array(&part1);
986        builder.append_array(&part2);
987
988        let expected = ListArray::from_iter([
989            Some(vec![12i32, -7, 25]),
990            None,
991            Some(vec![0, -127, 127, 50]),
992            Some(vec![]),
993        ]);
994        assert_eq!(builder.finish(), expected);
995    }
996
997    // Ensure `create_builder` exactly copies the same metadata.
998    #[test]
999    fn test_list_create_builder() {
1000        use crate::array::*;
1001        let arr = ListArray::from_iter([Some([F32::from(2.0), F32::from(42.0), F32::from(1.0)])]);
1002        let arr2 = arr.create_builder(0).finish();
1003        assert_eq!(arr.data_type(), arr2.data_type());
1004    }
1005
1006    #[test]
1007    fn test_builder_pop() {
1008        use crate::array::*;
1009
1010        {
1011            let mut builder = ListArrayBuilder::with_type(1, DataType::Int32.list());
1012            let val = ListValue::from_iter([1i32, 2, 3]);
1013            builder.append(Some(val.as_scalar_ref()));
1014            assert!(builder.pop().is_some());
1015            assert!(builder.pop().is_none());
1016            let arr = builder.finish();
1017            assert!(arr.is_empty());
1018        }
1019
1020        {
1021            let data_type = DataType::Int32.list().list();
1022            let mut builder = ListArrayBuilder::with_type(2, data_type);
1023            let val1 = ListValue::from_iter([1, 2, 3]);
1024            let val2 = ListValue::from_iter([1, 2, 3]);
1025            let list1 = ListValue::from_iter([val1, val2]);
1026            builder.append(Some(list1.as_scalar_ref()));
1027
1028            let val3 = ListValue::from_iter([1, 2, 3]);
1029            let val4 = ListValue::from_iter([1, 2, 3]);
1030            let list2 = ListValue::from_iter([val3, val4]);
1031
1032            builder.append(Some(list2.as_scalar_ref()));
1033
1034            assert!(builder.pop().is_some());
1035
1036            let arr = builder.finish();
1037            assert_eq!(arr.len(), 1);
1038            assert_eq!(arr.value_at(0).unwrap(), list1.as_scalar_ref());
1039        }
1040    }
1041
1042    #[test]
1043    fn test_list_nested_layout() {
1044        use crate::array::*;
1045
1046        let listarray1 = ListArray::from_iter([Some([1i32, 2]), Some([3, 4])]);
1047        let listarray2 = ListArray::from_iter([Some(vec![5, 6, 7]), None, Some(vec![8])]);
1048        let listarray3 = ListArray::from_iter([Some([9, 10])]);
1049
1050        let nestarray = ListArray::from_iter(
1051            [listarray1, listarray2, listarray3]
1052                .into_iter()
1053                .map(|l| ListValue::new(l.into())),
1054        );
1055        let actual = ListArray::from_protobuf(&nestarray.to_protobuf()).unwrap();
1056        assert_eq!(ArrayImpl::List(nestarray), actual);
1057    }
1058
1059    #[test]
1060    fn test_list_value_cmp() {
1061        // ARRAY[1, 1] < ARRAY[1, 2, 1]
1062        assert_lt!(
1063            ListValue::from_iter([1, 1]),
1064            ListValue::from_iter([1, 2, 1]),
1065        );
1066        // ARRAY[1, 2] < ARRAY[1, 2, 1]
1067        assert_lt!(
1068            ListValue::from_iter([1, 2]),
1069            ListValue::from_iter([1, 2, 1]),
1070        );
1071        // ARRAY[1, 3] > ARRAY[1, 2, 1]
1072        assert_gt!(
1073            ListValue::from_iter([1, 3]),
1074            ListValue::from_iter([1, 2, 1]),
1075        );
1076        // null > 1
1077        assert_gt!(
1078            ListValue::from_iter([None::<i32>]),
1079            ListValue::from_iter([1]),
1080        );
1081        // ARRAY[1, 2, null] > ARRAY[1, 2, 1]
1082        assert_gt!(
1083            ListValue::from_iter([Some(1), Some(2), None]),
1084            ListValue::from_iter([Some(1), Some(2), Some(1)]),
1085        );
1086        // Null value in first ARRAY results into a Greater ordering regardless of the smaller ARRAY
1087        // length. ARRAY[1, null] > ARRAY[1, 2, 3]
1088        assert_gt!(
1089            ListValue::from_iter([Some(1), None]),
1090            ListValue::from_iter([Some(1), Some(2), Some(3)]),
1091        );
1092        // ARRAY[1, null] == ARRAY[1, null]
1093        assert_eq!(
1094            ListValue::from_iter([Some(1), None]),
1095            ListValue::from_iter([Some(1), None]),
1096        );
1097    }
1098
1099    #[test]
1100    fn test_list_ref_display() {
1101        let v = ListValue::from_iter([Some(1), None]);
1102        assert_eq!(v.to_string(), "{1,NULL}");
1103    }
1104
1105    #[test]
1106    fn test_serialize_deserialize() {
1107        let value = ListValue::from_iter([Some("abcd"), Some(""), None, Some("a")]);
1108        let list_ref = value.as_scalar_ref();
1109        let mut serializer = memcomparable::Serializer::new(vec![]);
1110        serializer.set_reverse(true);
1111        list_ref.memcmp_serialize(&mut serializer).unwrap();
1112        let buf = serializer.into_inner();
1113        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1114        deserializer.set_reverse(true);
1115        assert_eq!(
1116            ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1117                .unwrap(),
1118            value
1119        );
1120
1121        let mut builder = ListArrayBuilder::with_type(0, DataType::Varchar.list());
1122        builder.append(Some(list_ref));
1123        let array = builder.finish();
1124        let list_ref = array.value_at(0).unwrap();
1125        let mut serializer = memcomparable::Serializer::new(vec![]);
1126        list_ref.memcmp_serialize(&mut serializer).unwrap();
1127        let buf = serializer.into_inner();
1128        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1129        assert_eq!(
1130            ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1131                .unwrap(),
1132            value
1133        );
1134    }
1135
1136    #[test]
1137    fn test_memcomparable() {
1138        let cases = [
1139            (
1140                ListValue::from_iter([123, 456]),
1141                ListValue::from_iter([123, 789]),
1142            ),
1143            (
1144                ListValue::from_iter([123, 456]),
1145                ListValue::from_iter([123]),
1146            ),
1147            (
1148                ListValue::from_iter([None, Some("")]),
1149                ListValue::from_iter([None, None::<&str>]),
1150            ),
1151            (
1152                ListValue::from_iter([Some(2)]),
1153                ListValue::from_iter([Some(1), None, Some(3)]),
1154            ),
1155        ];
1156
1157        for (lhs, rhs) in cases {
1158            let lhs_serialized = {
1159                let mut serializer = memcomparable::Serializer::new(vec![]);
1160                lhs.as_scalar_ref()
1161                    .memcmp_serialize(&mut serializer)
1162                    .unwrap();
1163                serializer.into_inner()
1164            };
1165            let rhs_serialized = {
1166                let mut serializer = memcomparable::Serializer::new(vec![]);
1167                rhs.as_scalar_ref()
1168                    .memcmp_serialize(&mut serializer)
1169                    .unwrap();
1170                serializer.into_inner()
1171            };
1172            assert_eq!(lhs_serialized.cmp(&rhs_serialized), lhs.cmp(&rhs));
1173        }
1174    }
1175
1176    #[test]
1177    fn test_listref() {
1178        use crate::array::*;
1179        use crate::types;
1180
1181        let arr = ListArray::from_iter([Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6, 7])]);
1182
1183        // get 3rd ListRef from ListArray
1184        let list_ref = arr.value_at(2).unwrap();
1185        assert_eq!(list_ref, ListValue::from_iter([4, 5, 6, 7]).as_scalar_ref());
1186
1187        // Get 2nd value from ListRef
1188        let scalar = list_ref.get(1).unwrap();
1189        assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
1190    }
1191
1192    #[test]
1193    fn test_from_to_literal() {
1194        #[track_caller]
1195        fn test(typestr: &str, input: &str, output: Option<&str>) {
1196            let datatype: DataType = typestr.parse().unwrap();
1197            let list = ListValue::from_str(input, &datatype).unwrap();
1198            let actual = list.as_scalar_ref().to_text();
1199            let output = output.unwrap_or(input);
1200            assert_eq!(actual, output);
1201        }
1202
1203        #[track_caller]
1204        fn test_err(typestr: &str, input: &str, err: &str) {
1205            let datatype: DataType = typestr.parse().unwrap();
1206            let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
1207            assert_eq!(actual_err, err);
1208        }
1209
1210        test("varchar[]", "{}", None);
1211        test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
1212        test("varchar[]", "{🥵,🤡}", None);
1213        test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#));
1214        test("int[]", "{1,2,3}", None);
1215        test("varchar[]", r#"{"1,2"}"#, None);
1216        test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
1217        test("varchar[]", r#"{"\""}"#, None);
1218        test("varchar[]", r#"{\   }"#, Some(r#"{" "}"#));
1219        test("varchar[]", r#"{\\  }"#, Some(r#"{"\\"}"#));
1220        test("varchar[]", "{nulla}", None);
1221        test("varchar[]", "{null a}", Some(r#"{"null a"}"#));
1222        test(
1223            "varchar[]",
1224            r#"{"null", "NULL", null, NuLL}"#,
1225            Some(r#"{"null","NULL",NULL,NULL}"#),
1226        );
1227        test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}"));
1228        test(
1229            "varchar[][][]",
1230            "{{{1, 2, 3}}, {{4, 5, 6}}}",
1231            Some("{{{1,2,3}},{{4,5,6}}}"),
1232        );
1233        test_err("varchar[]", "()", r#"Array value must start with "{""#);
1234        test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
1235        test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
1236        test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#);
1237        test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#);
1238        test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#);
1239    }
1240}