risingwave_common/array/
list_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Cow;
16use std::cmp::Ordering;
17use std::fmt::{self, Debug, Display};
18use std::future::Future;
19use std::mem::size_of;
20
21use bytes::{Buf, BufMut};
22use itertools::Itertools;
23use risingwave_common_estimate_size::EstimateSize;
24use risingwave_pb::data::{ListArrayData, PbArray, PbArrayType};
25use serde::{Deserialize, Serializer};
26use thiserror_ext::AsReport;
27
28use super::{
29    Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayResult, BoolArray, PrimitiveArray,
30    PrimitiveArrayItemType, RowRef, Utf8Array,
31};
32use crate::bitmap::{Bitmap, BitmapBuilder};
33use crate::row::Row;
34use crate::types::{
35    DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarImpl, ScalarRefImpl, ToDatumRef, ToText,
36    hash_datum,
37};
38use crate::util::memcmp_encoding;
39use crate::util::value_encoding::estimate_serialize_datum_size;
40
41#[derive(Debug, Clone, EstimateSize)]
42pub struct ListArrayBuilder {
43    bitmap: BitmapBuilder,
44    offsets: Vec<u32>,
45    value: Box<ArrayBuilderImpl>,
46}
47
48impl ArrayBuilder for ListArrayBuilder {
49    type ArrayType = ListArray;
50
51    #[cfg(not(test))]
52    fn new(_capacity: usize) -> Self {
53        panic!("please use `ListArrayBuilder::with_type` instead");
54    }
55
56    #[cfg(test)]
57    fn new(capacity: usize) -> Self {
58        // TODO: deprecate this
59        Self::with_type(
60            capacity,
61            // Default datatype
62            DataType::List(Box::new(DataType::Int16)),
63        )
64    }
65
66    fn with_type(capacity: usize, ty: DataType) -> Self {
67        let DataType::List(value_type) = ty else {
68            panic!("data type must be DataType::List");
69        };
70        let mut offsets = Vec::with_capacity(capacity + 1);
71        offsets.push(0);
72        Self {
73            bitmap: BitmapBuilder::with_capacity(capacity),
74            offsets,
75            value: Box::new(value_type.create_array_builder(capacity)),
76        }
77    }
78
79    fn append_n(&mut self, n: usize, value: Option<ListRef<'_>>) {
80        match value {
81            None => {
82                self.bitmap.append_n(n, false);
83                let last = *self.offsets.last().unwrap();
84                for _ in 0..n {
85                    self.offsets.push(last);
86                }
87            }
88            Some(v) => {
89                self.bitmap.append_n(n, true);
90                for _ in 0..n {
91                    let last = *self.offsets.last().unwrap();
92                    let elems = v.iter();
93                    self.offsets.push(
94                        last.checked_add(elems.len() as u32)
95                            .expect("offset overflow"),
96                    );
97                    for elem in elems {
98                        self.value.append(elem);
99                    }
100                }
101            }
102        }
103    }
104
105    fn append_array(&mut self, other: &ListArray) {
106        self.bitmap.append_bitmap(&other.bitmap);
107        let last = *self.offsets.last().unwrap();
108        self.offsets
109            .append(&mut other.offsets[1..].iter().map(|o| *o + last).collect());
110        self.value.append_array(&other.value);
111    }
112
113    fn pop(&mut self) -> Option<()> {
114        self.bitmap.pop()?;
115        let start = self.offsets.pop().unwrap();
116        let end = *self.offsets.last().unwrap();
117        for _ in end..start {
118            self.value.pop().unwrap();
119        }
120        Some(())
121    }
122
123    fn len(&self) -> usize {
124        self.bitmap.len()
125    }
126
127    fn finish(self) -> ListArray {
128        ListArray {
129            bitmap: self.bitmap.finish(),
130            offsets: self.offsets.into(),
131            value: Box::new(self.value.finish()),
132        }
133    }
134}
135
136impl ListArrayBuilder {
137    pub fn append_row_ref(&mut self, row: RowRef<'_>) {
138        self.bitmap.append(true);
139        let last = *self.offsets.last().unwrap();
140        self.offsets
141            .push(last.checked_add(row.len() as u32).expect("offset overflow"));
142        for v in row.iter() {
143            self.value.append(v);
144        }
145    }
146}
147
148/// Each item of this `ListArray` is a `List<T>`, or called `T[]` (T array).
149///
150/// * As other arrays, there is a null bitmap, with `1` meaning nonnull and `0` meaning null.
151/// * As [`super::BytesArray`], there is an offsets `Vec` and a value `Array`. The value `Array` has
152///   all items concatenated, and the offsets `Vec` stores start and end indices into it for
153///   slicing. Effectively, the inner array is the flattened form, and `offsets.len() == n + 1`.
154///
155/// For example, `values (array[1]), (array[]::int[]), (null), (array[2, 3]);` stores an inner
156///  `I32Array` with `[1, 2, 3]`, along with offsets `[0, 1, 1, 1, 3]` and null bitmap `TTFT`.
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub struct ListArray {
159    pub(super) bitmap: Bitmap,
160    pub(super) offsets: Box<[u32]>,
161    pub(super) value: Box<ArrayImpl>,
162}
163
164impl EstimateSize for ListArray {
165    fn estimated_heap_size(&self) -> usize {
166        self.bitmap.estimated_heap_size()
167            + self.offsets.len() * size_of::<u32>()
168            + self.value.estimated_size()
169    }
170}
171
172impl Array for ListArray {
173    type Builder = ListArrayBuilder;
174    type OwnedItem = ListValue;
175    type RefItem<'a> = ListRef<'a>;
176
177    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
178        unsafe {
179            ListRef {
180                array: &self.value,
181                start: *self.offsets.get_unchecked(idx),
182                end: *self.offsets.get_unchecked(idx + 1),
183            }
184        }
185    }
186
187    fn len(&self) -> usize {
188        self.bitmap.len()
189    }
190
191    fn to_protobuf(&self) -> PbArray {
192        let value = self.value.to_protobuf();
193        PbArray {
194            array_type: PbArrayType::List as i32,
195            struct_array_data: None,
196            list_array_data: Some(Box::new(ListArrayData {
197                offsets: self.offsets.to_vec(),
198                value: Some(Box::new(value)),
199                value_type: Some(self.value.data_type().to_protobuf()),
200            })),
201            null_bitmap: Some(self.bitmap.to_protobuf()),
202            values: vec![],
203        }
204    }
205
206    fn null_bitmap(&self) -> &Bitmap {
207        &self.bitmap
208    }
209
210    fn into_null_bitmap(self) -> Bitmap {
211        self.bitmap
212    }
213
214    fn set_bitmap(&mut self, bitmap: Bitmap) {
215        self.bitmap = bitmap;
216    }
217
218    fn data_type(&self) -> DataType {
219        DataType::List(Box::new(self.value.data_type()))
220    }
221}
222
223impl ListArray {
224    /// Flatten the list array into a single array.
225    ///
226    /// # Example
227    ///
228    /// ```text
229    /// [[1,2,3],NULL,[4,5]] => [1,2,3,4,5]
230    /// [[[1],[2]],[[3],[4]]] => [1,2,3,4]
231    /// ```
232    pub fn flatten(&self) -> ArrayImpl {
233        match &*self.value {
234            ArrayImpl::List(inner) => inner.flatten(),
235            a => a.clone(),
236        }
237    }
238
239    /// Return the inner array of the list array.
240    pub fn values(&self) -> &ArrayImpl {
241        &self.value
242    }
243
244    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
245        ensure!(
246            array.values.is_empty(),
247            "Must have no buffer in a list array"
248        );
249        debug_assert!(
250            (array.array_type == PbArrayType::List as i32)
251                || (array.array_type == PbArrayType::Map as i32),
252            "invalid array type for list: {}",
253            array.array_type
254        );
255        let bitmap: Bitmap = array.get_null_bitmap()?.into();
256        let array_data = array.get_list_array_data()?.to_owned();
257        let flatten_len = match array_data.offsets.last() {
258            Some(&n) => n as usize,
259            None => bail!("Must have at least one element in offsets"),
260        };
261        let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?;
262        let arr = ListArray {
263            bitmap,
264            offsets: array_data.offsets.into(),
265            value: Box::new(value),
266        };
267        Ok(arr.into())
268    }
269
270    /// Apply the function on the underlying elements.
271    /// e.g. `map_inner([[1,2,3],NULL,[4,5]], DOUBLE) = [[2,4,6],NULL,[8,10]]`
272    pub async fn map_inner<E, Fut, F>(self, f: F) -> std::result::Result<ListArray, E>
273    where
274        F: FnOnce(ArrayImpl) -> Fut,
275        Fut: Future<Output = std::result::Result<ArrayImpl, E>>,
276    {
277        let new_value = (f)(*self.value).await?;
278
279        Ok(Self {
280            offsets: self.offsets,
281            bitmap: self.bitmap,
282            value: Box::new(new_value),
283        })
284    }
285
286    /// Returns the offsets of this list.
287    ///
288    /// # Example
289    /// ```text
290    /// list    = [[a, b, c], [], NULL, [d], [NULL, f]]
291    /// offsets = [0, 3, 3, 3, 4, 6]
292    /// ```
293    pub fn offsets(&self) -> &[u32] {
294        &self.offsets
295    }
296}
297
298impl<T, L> FromIterator<Option<L>> for ListArray
299where
300    T: PrimitiveArrayItemType,
301    L: IntoIterator<Item = T>,
302{
303    fn from_iter<I: IntoIterator<Item = Option<L>>>(iter: I) -> Self {
304        let iter = iter.into_iter();
305        let mut builder = ListArrayBuilder::with_type(
306            iter.size_hint().0,
307            DataType::List(Box::new(T::DATA_TYPE.clone())),
308        );
309        for v in iter {
310            match v {
311                None => builder.append(None),
312                Some(v) => {
313                    builder.append(Some(v.into_iter().collect::<ListValue>().as_scalar_ref()))
314                }
315            }
316        }
317        builder.finish()
318    }
319}
320
321impl FromIterator<ListValue> for ListArray {
322    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
323        let mut iter = iter.into_iter();
324        let first = iter.next().expect("empty iterator");
325        let mut builder = ListArrayBuilder::with_type(
326            iter.size_hint().0,
327            DataType::List(Box::new(first.data_type())),
328        );
329        builder.append(Some(first.as_scalar_ref()));
330        for v in iter {
331            builder.append(Some(v.as_scalar_ref()));
332        }
333        builder.finish()
334    }
335}
336
337#[derive(Clone, PartialEq, Eq, EstimateSize)]
338pub struct ListValue {
339    values: Box<ArrayImpl>,
340}
341
342impl Debug for ListValue {
343    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
344        self.as_scalar_ref().fmt(f)
345    }
346}
347
348impl Display for ListValue {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        self.as_scalar_ref().write(f)
351    }
352}
353
354impl ListValue {
355    pub fn new(values: ArrayImpl) -> Self {
356        Self {
357            values: Box::new(values),
358        }
359    }
360
361    pub fn into_array(self) -> ArrayImpl {
362        *self.values
363    }
364
365    pub fn empty(datatype: &DataType) -> Self {
366        Self::new(datatype.create_array_builder(0).finish())
367    }
368
369    /// Creates a new `ListValue` from an iterator of `Datum`.
370    pub fn from_datum_iter<T: ToDatumRef>(
371        elem_datatype: &DataType,
372        iter: impl IntoIterator<Item = T>,
373    ) -> Self {
374        let iter = iter.into_iter();
375        let mut builder = elem_datatype.create_array_builder(iter.size_hint().0);
376        for datum in iter {
377            builder.append(datum);
378        }
379        Self::new(builder.finish())
380    }
381
382    /// Returns the length of the list.
383    pub fn len(&self) -> usize {
384        self.values.len()
385    }
386
387    /// Returns `true` if the list has a length of 0.
388    pub fn is_empty(&self) -> bool {
389        self.values.is_empty()
390    }
391
392    /// Iterates over the elements of the list.
393    pub fn iter(&self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'_>> {
394        self.values.iter()
395    }
396
397    /// Get the element at the given index. Returns `None` if the index is out of bounds.
398    pub fn get(&self, index: usize) -> Option<DatumRef<'_>> {
399        if index < self.len() {
400            Some(self.values.value_at(index))
401        } else {
402            None
403        }
404    }
405
406    /// Returns the data type of the elements in the list.
407    pub fn data_type(&self) -> DataType {
408        self.values.data_type()
409    }
410
411    pub fn memcmp_deserialize(
412        item_datatype: &DataType,
413        deserializer: &mut memcomparable::Deserializer<impl Buf>,
414    ) -> memcomparable::Result<Self> {
415        let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
416        let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
417        let mut builder = item_datatype.create_array_builder(0);
418        while inner_deserializer.has_remaining() {
419            builder.append(memcmp_encoding::deserialize_datum_in_composite(
420                item_datatype,
421                &mut inner_deserializer,
422            )?)
423        }
424        Ok(Self::new(builder.finish()))
425    }
426
427    // Used to display ListValue in explain for better readibilty.
428    pub fn display_for_explain(&self) -> String {
429        // Example of ListValue display: ARRAY[1, 2, null]
430        format!(
431            "ARRAY[{}]",
432            self.iter()
433                .map(|v| {
434                    match v.as_ref() {
435                        None => "null".into(),
436                        Some(scalar) => scalar.to_text(),
437                    }
438                })
439                .format(", ")
440        )
441    }
442
443    /// Returns a mutable slice if the list is of type `int64[]`.
444    pub fn as_i64_mut_slice(&mut self) -> Option<&mut [i64]> {
445        match self.values.as_mut() {
446            ArrayImpl::Int64(array) => Some(array.as_mut_slice()),
447            _ => None,
448        }
449    }
450}
451
452impl PartialOrd for ListValue {
453    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
454        Some(self.cmp(other))
455    }
456}
457
458impl Ord for ListValue {
459    fn cmp(&self, other: &Self) -> Ordering {
460        self.as_scalar_ref().cmp(&other.as_scalar_ref())
461    }
462}
463
464impl<T: PrimitiveArrayItemType> FromIterator<Option<T>> for ListValue {
465    fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
466        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
467    }
468}
469
470impl<T: PrimitiveArrayItemType> FromIterator<T> for ListValue {
471    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
472        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
473    }
474}
475
476impl FromIterator<bool> for ListValue {
477    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
478        Self::new(iter.into_iter().collect::<BoolArray>().into())
479    }
480}
481
482impl<'a> FromIterator<Option<&'a str>> for ListValue {
483    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
484        Self::new(iter.into_iter().collect::<Utf8Array>().into())
485    }
486}
487
488impl<'a> FromIterator<&'a str> for ListValue {
489    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
490        Self::new(iter.into_iter().collect::<Utf8Array>().into())
491    }
492}
493
494impl FromIterator<ListValue> for ListValue {
495    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
496        Self::new(iter.into_iter().collect::<ListArray>().into())
497    }
498}
499
500impl From<ListValue> for ArrayImpl {
501    fn from(value: ListValue) -> Self {
502        *value.values
503    }
504}
505
506/// A slice of an array
507#[derive(Copy, Clone)]
508pub struct ListRef<'a> {
509    array: &'a ArrayImpl,
510    start: u32,
511    end: u32,
512}
513
514impl<'a> ListRef<'a> {
515    /// Returns the length of the list.
516    pub fn len(&self) -> usize {
517        (self.end - self.start) as usize
518    }
519
520    /// Returns `true` if the list has a length of 0.
521    pub fn is_empty(&self) -> bool {
522        self.start == self.end
523    }
524
525    /// Returns the data type of the elements in the list.
526    pub fn data_type(&self) -> DataType {
527        self.array.data_type()
528    }
529
530    /// Returns the elements in the flattened list.
531    pub fn flatten(self) -> ListRef<'a> {
532        match self.array {
533            ArrayImpl::List(inner) => ListRef {
534                array: &inner.value,
535                start: inner.offsets[self.start as usize],
536                end: inner.offsets[self.end as usize],
537            }
538            .flatten(),
539            _ => self,
540        }
541    }
542
543    /// Iterates over the elements of the list.
544    pub fn iter(self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'a>> + 'a {
545        (self.start..self.end).map(|i| self.array.value_at(i as usize))
546    }
547
548    /// Get the element at the given index. Returns `None` if the index is out of bounds.
549    pub fn get(self, index: usize) -> Option<DatumRef<'a>> {
550        if index < self.len() {
551            Some(self.array.value_at(self.start as usize + index))
552        } else {
553            None
554        }
555    }
556
557    pub fn memcmp_serialize(
558        self,
559        serializer: &mut memcomparable::Serializer<impl BufMut>,
560    ) -> memcomparable::Result<()> {
561        let mut inner_serializer = memcomparable::Serializer::new(vec![]);
562        for datum_ref in self.iter() {
563            memcmp_encoding::serialize_datum_in_composite(datum_ref, &mut inner_serializer)?
564        }
565        serializer.serialize_bytes(&inner_serializer.into_inner())
566    }
567
568    pub fn hash_scalar_inner<H: std::hash::Hasher>(self, state: &mut H) {
569        for datum_ref in self.iter() {
570            hash_datum(datum_ref, state);
571        }
572    }
573
574    /// estimate the serialized size with value encoding
575    pub fn estimate_serialize_size_inner(self) -> usize {
576        self.iter().map(estimate_serialize_datum_size).sum()
577    }
578
579    pub fn to_owned(self) -> ListValue {
580        let mut builder = self.array.create_builder(self.len());
581        for datum_ref in self.iter() {
582            builder.append(datum_ref);
583        }
584        ListValue::new(builder.finish())
585    }
586
587    /// Returns a slice if the list is of type `int64[]`.
588    pub fn as_i64_slice(&self) -> Option<&[i64]> {
589        match &self.array {
590            ArrayImpl::Int64(array) => {
591                Some(&array.as_slice()[self.start as usize..self.end as usize])
592            }
593            _ => None,
594        }
595    }
596
597    /// # Panics
598    /// Panics if the list is not a map's internal representation (See [`super::MapArray`]).
599    pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
600        let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
601        (
602            ListRef {
603                array: k,
604                start: self.start,
605                end: self.end,
606            },
607            ListRef {
608                array: v,
609                start: self.start,
610                end: self.end,
611            },
612        )
613    }
614}
615
616impl PartialEq for ListRef<'_> {
617    fn eq(&self, other: &Self) -> bool {
618        self.iter().eq(other.iter())
619    }
620}
621
622impl Eq for ListRef<'_> {}
623
624impl PartialOrd for ListRef<'_> {
625    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
626        Some(self.cmp(other))
627    }
628}
629
630impl Ord for ListRef<'_> {
631    fn cmp(&self, other: &Self) -> Ordering {
632        self.iter().cmp_by(other.iter(), |a, b| a.default_cmp(&b))
633    }
634}
635
636impl Debug for ListRef<'_> {
637    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
638        f.debug_list().entries(self.iter()).finish()
639    }
640}
641
642impl Row for ListRef<'_> {
643    fn datum_at(&self, index: usize) -> DatumRef<'_> {
644        self.array.value_at(self.start as usize + index)
645    }
646
647    unsafe fn datum_at_unchecked(&self, index: usize) -> DatumRef<'_> {
648        unsafe { self.array.value_at_unchecked(self.start as usize + index) }
649    }
650
651    fn len(&self) -> usize {
652        self.len()
653    }
654
655    fn iter(&self) -> impl Iterator<Item = DatumRef<'_>> {
656        (*self).iter()
657    }
658}
659
660impl ToText for ListRef<'_> {
661    // This function will be invoked when pgwire prints a list value in string.
662    // Refer to PostgreSQL `array_out` or `appendPGArray`.
663    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
664        write!(
665            f,
666            "{{{}}}",
667            self.iter().format_with(",", |datum_ref, f| {
668                let s = datum_ref.to_text();
669                // Never quote null or inner list, but quote empty, verbatim 'null', special
670                // chars and whitespaces.
671                let need_quote = !matches!(datum_ref, None | Some(ScalarRefImpl::List(_)))
672                    && (s.is_empty()
673                        || s.eq_ignore_ascii_case("null")
674                        || s.contains([
675                            '"', '\\', ',',
676                            // whilespace:
677                            // PostgreSQL `array_isspace` includes '\x0B' but rust
678                            // [`char::is_ascii_whitespace`] does not.
679                            ' ', '\t', '\n', '\r', '\x0B', '\x0C', // list-specific:
680                            '{', '}',
681                        ]));
682                if need_quote {
683                    f(&"\"")?;
684                    s.chars().try_for_each(|c| {
685                        if c == '"' || c == '\\' {
686                            f(&"\\")?;
687                        }
688                        f(&c)
689                    })?;
690                    f(&"\"")
691                } else {
692                    f(&s)
693                }
694            })
695        )
696    }
697
698    fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
699        match ty {
700            DataType::List { .. } => self.write(f),
701            _ => unreachable!(),
702        }
703    }
704}
705
706impl<'a> From<&'a ListValue> for ListRef<'a> {
707    fn from(value: &'a ListValue) -> Self {
708        ListRef {
709            array: &value.values,
710            start: 0,
711            end: value.len() as u32,
712        }
713    }
714}
715
716impl From<ListRef<'_>> for ListValue {
717    fn from(value: ListRef<'_>) -> Self {
718        value.to_owned()
719    }
720}
721
722impl ListValue {
723    /// Construct an array from literal string.
724    pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
725        struct Parser<'a> {
726            input: &'a str,
727            data_type: &'a DataType,
728        }
729
730        impl Parser<'_> {
731            /// Parse a datum.
732            fn parse(&mut self) -> Result<Datum, String> {
733                self.skip_whitespace();
734                if self.data_type.is_array() {
735                    if self.try_parse_null() {
736                        return Ok(None);
737                    }
738                    Ok(Some(self.parse_array()?.into()))
739                } else {
740                    self.parse_value()
741                }
742            }
743
744            /// Parse an array.
745            fn parse_array(&mut self) -> Result<ListValue, String> {
746                self.skip_whitespace();
747                if !self.try_consume('{') {
748                    return Err("Array value must start with \"{\"".to_owned());
749                }
750                self.skip_whitespace();
751                if self.try_consume('}') {
752                    return Ok(ListValue::empty(self.data_type.as_list_element_type()));
753                }
754                let mut builder =
755                    ArrayBuilderImpl::with_type(0, self.data_type.as_list_element_type().clone());
756                loop {
757                    let mut parser = Self {
758                        input: self.input,
759                        data_type: self.data_type.as_list_element_type(),
760                    };
761                    builder.append(parser.parse()?);
762                    self.input = parser.input;
763
764                    // expect ',' or '}'
765                    self.skip_whitespace();
766                    match self.peek() {
767                        Some(',') => {
768                            self.try_consume(',');
769                        }
770                        Some('}') => {
771                            self.try_consume('}');
772                            break;
773                        }
774                        None => return Err(Self::eoi()),
775                        _ => return Err("Unexpected array element.".to_owned()),
776                    }
777                }
778                Ok(ListValue::new(builder.finish()))
779            }
780
781            /// Parse a non-array value.
782            fn parse_value(&mut self) -> Result<Datum, String> {
783                if self.peek() == Some('"') {
784                    return Ok(Some(self.parse_quoted()?));
785                }
786                // peek until the next unescaped ',' or '}'
787                let mut chars = self.input.char_indices();
788                let mut has_escape = false;
789                let s = loop {
790                    match chars.next().ok_or_else(Self::eoi)? {
791                        (_, '\\') => {
792                            has_escape = true;
793                            chars.next().ok_or_else(Self::eoi)?;
794                        }
795                        (i, c @ ',' | c @ '}') => {
796                            let s = &self.input[..i];
797                            // consume the value and leave the ',' or '}' for parent
798                            self.input = &self.input[i..];
799
800                            break if has_escape {
801                                Cow::Owned(Self::unescape_trim_end(s))
802                            } else {
803                                let trimmed = s.trim_end();
804                                if trimmed.is_empty() {
805                                    return Err(format!("Unexpected \"{c}\" character."));
806                                }
807                                if trimmed.eq_ignore_ascii_case("null") {
808                                    return Ok(None);
809                                }
810                                Cow::Borrowed(trimmed)
811                            };
812                        }
813                        (_, '{') => return Err("Unexpected \"{\" character.".to_owned()),
814                        (_, '"') => return Err("Unexpected array element.".to_owned()),
815                        _ => {}
816                    }
817                };
818                Ok(Some(
819                    ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())?,
820                ))
821            }
822
823            /// Parse a double quoted non-array value.
824            fn parse_quoted(&mut self) -> Result<ScalarImpl, String> {
825                assert!(self.try_consume('"'));
826                // peek until the next unescaped '"'
827                let mut chars = self.input.char_indices();
828                let mut has_escape = false;
829                let s = loop {
830                    match chars.next().ok_or_else(Self::eoi)? {
831                        (_, '\\') => {
832                            has_escape = true;
833                            chars.next().ok_or_else(Self::eoi)?;
834                        }
835                        (i, '"') => {
836                            let s = &self.input[..i];
837                            self.input = &self.input[i + 1..];
838                            break if has_escape {
839                                Cow::Owned(Self::unescape(s))
840                            } else {
841                                Cow::Borrowed(s)
842                            };
843                        }
844                        _ => {}
845                    }
846                };
847                ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())
848            }
849
850            /// Unescape a string.
851            fn unescape(s: &str) -> String {
852                let mut unescaped = String::with_capacity(s.len());
853                let mut chars = s.chars();
854                while let Some(mut c) = chars.next() {
855                    if c == '\\' {
856                        c = chars.next().unwrap();
857                    }
858                    unescaped.push(c);
859                }
860                unescaped
861            }
862
863            /// Unescape a string and trim the trailing whitespaces.
864            ///
865            /// Example: `"\  " -> " "`
866            fn unescape_trim_end(s: &str) -> String {
867                let mut unescaped = String::with_capacity(s.len());
868                let mut chars = s.chars();
869                let mut len_after_last_escaped_char = 0;
870                while let Some(mut c) = chars.next() {
871                    if c == '\\' {
872                        c = chars.next().unwrap();
873                        unescaped.push(c);
874                        len_after_last_escaped_char = unescaped.len();
875                    } else {
876                        unescaped.push(c);
877                    }
878                }
879                let l = unescaped[len_after_last_escaped_char..].trim_end().len();
880                unescaped.truncate(len_after_last_escaped_char + l);
881                unescaped
882            }
883
884            /// Consume the next 4 characters if it matches "null".
885            ///
886            /// Note: We don't use this function when parsing non-array values.
887            ///       Because we can't decide whether it is a null value or a string starts with "null".
888            ///       Consider this case: `{null value}` => `["null value"]`
889            fn try_parse_null(&mut self) -> bool {
890                if let Some(s) = self.input.get(..4)
891                    && s.eq_ignore_ascii_case("null")
892                {
893                    let next_char = self.input[4..].chars().next();
894                    match next_char {
895                        None | Some(',' | '}') => {}
896                        Some(c) if c.is_ascii_whitespace() => {}
897                        // following normal characters
898                        _ => return false,
899                    }
900                    self.input = &self.input[4..];
901                    true
902                } else {
903                    false
904                }
905            }
906
907            /// Consume the next character if it matches `c`.
908            fn try_consume(&mut self, c: char) -> bool {
909                if self.peek() == Some(c) {
910                    self.input = &self.input[c.len_utf8()..];
911                    true
912                } else {
913                    false
914                }
915            }
916
917            /// Expect end of input.
918            fn expect_end(&mut self) -> Result<(), String> {
919                self.skip_whitespace();
920                match self.peek() {
921                    Some(_) => Err("Junk after closing right brace.".to_owned()),
922                    None => Ok(()),
923                }
924            }
925
926            /// Skip whitespaces.
927            fn skip_whitespace(&mut self) {
928                self.input = match self
929                    .input
930                    .char_indices()
931                    .find(|(_, c)| !c.is_ascii_whitespace())
932                {
933                    Some((i, _)) => &self.input[i..],
934                    None => "",
935                };
936            }
937
938            /// Peek the next character.
939            fn peek(&self) -> Option<char> {
940                self.input.chars().next()
941            }
942
943            /// Return the error message for unexpected end of input.
944            fn eoi() -> String {
945                "Unexpected end of input.".into()
946            }
947        }
948
949        let mut parser = Parser { input, data_type };
950        let array = parser.parse_array()?;
951        parser.expect_end()?;
952        Ok(array)
953    }
954}
955
956#[cfg(test)]
957mod tests {
958    use more_asserts::{assert_gt, assert_lt};
959
960    use super::*;
961
962    #[test]
963    fn test_protobuf() {
964        use crate::array::*;
965        let array = ListArray::from_iter([
966            Some(vec![12i32, -7, 25]),
967            None,
968            Some(vec![0, -127, 127, 50]),
969            Some(vec![]),
970        ]);
971        let actual = ListArray::from_protobuf(&array.to_protobuf()).unwrap();
972        assert_eq!(actual, ArrayImpl::List(array));
973    }
974
975    #[test]
976    fn test_append_array() {
977        let part1 = ListArray::from_iter([Some([12i32, -7, 25]), None]);
978        let part2 = ListArray::from_iter([Some(vec![0, -127, 127, 50]), Some(vec![])]);
979
980        let mut builder = ListArrayBuilder::with_type(4, DataType::List(Box::new(DataType::Int32)));
981        builder.append_array(&part1);
982        builder.append_array(&part2);
983
984        let expected = ListArray::from_iter([
985            Some(vec![12i32, -7, 25]),
986            None,
987            Some(vec![0, -127, 127, 50]),
988            Some(vec![]),
989        ]);
990        assert_eq!(builder.finish(), expected);
991    }
992
993    // Ensure `create_builder` exactly copies the same metadata.
994    #[test]
995    fn test_list_create_builder() {
996        use crate::array::*;
997        let arr = ListArray::from_iter([Some([F32::from(2.0), F32::from(42.0), F32::from(1.0)])]);
998        let arr2 = arr.create_builder(0).finish();
999        assert_eq!(arr.data_type(), arr2.data_type());
1000    }
1001
1002    #[test]
1003    fn test_builder_pop() {
1004        use crate::array::*;
1005
1006        {
1007            let mut builder =
1008                ListArrayBuilder::with_type(1, DataType::List(Box::new(DataType::Int32)));
1009            let val = ListValue::from_iter([1i32, 2, 3]);
1010            builder.append(Some(val.as_scalar_ref()));
1011            assert!(builder.pop().is_some());
1012            assert!(builder.pop().is_none());
1013            let arr = builder.finish();
1014            assert!(arr.is_empty());
1015        }
1016
1017        {
1018            let data_type = DataType::List(Box::new(DataType::List(Box::new(DataType::Int32))));
1019            let mut builder = ListArrayBuilder::with_type(2, data_type);
1020            let val1 = ListValue::from_iter([1, 2, 3]);
1021            let val2 = ListValue::from_iter([1, 2, 3]);
1022            let list1 = ListValue::from_iter([val1, val2]);
1023            builder.append(Some(list1.as_scalar_ref()));
1024
1025            let val3 = ListValue::from_iter([1, 2, 3]);
1026            let val4 = ListValue::from_iter([1, 2, 3]);
1027            let list2 = ListValue::from_iter([val3, val4]);
1028
1029            builder.append(Some(list2.as_scalar_ref()));
1030
1031            assert!(builder.pop().is_some());
1032
1033            let arr = builder.finish();
1034            assert_eq!(arr.len(), 1);
1035            assert_eq!(arr.value_at(0).unwrap(), list1.as_scalar_ref());
1036        }
1037    }
1038
1039    #[test]
1040    fn test_list_nested_layout() {
1041        use crate::array::*;
1042
1043        let listarray1 = ListArray::from_iter([Some([1i32, 2]), Some([3, 4])]);
1044        let listarray2 = ListArray::from_iter([Some(vec![5, 6, 7]), None, Some(vec![8])]);
1045        let listarray3 = ListArray::from_iter([Some([9, 10])]);
1046
1047        let nestarray = ListArray::from_iter(
1048            [listarray1, listarray2, listarray3]
1049                .into_iter()
1050                .map(|l| ListValue::new(l.into())),
1051        );
1052        let actual = ListArray::from_protobuf(&nestarray.to_protobuf()).unwrap();
1053        assert_eq!(ArrayImpl::List(nestarray), actual);
1054    }
1055
1056    #[test]
1057    fn test_list_value_cmp() {
1058        // ARRAY[1, 1] < ARRAY[1, 2, 1]
1059        assert_lt!(
1060            ListValue::from_iter([1, 1]),
1061            ListValue::from_iter([1, 2, 1]),
1062        );
1063        // ARRAY[1, 2] < ARRAY[1, 2, 1]
1064        assert_lt!(
1065            ListValue::from_iter([1, 2]),
1066            ListValue::from_iter([1, 2, 1]),
1067        );
1068        // ARRAY[1, 3] > ARRAY[1, 2, 1]
1069        assert_gt!(
1070            ListValue::from_iter([1, 3]),
1071            ListValue::from_iter([1, 2, 1]),
1072        );
1073        // null > 1
1074        assert_gt!(
1075            ListValue::from_iter([None::<i32>]),
1076            ListValue::from_iter([1]),
1077        );
1078        // ARRAY[1, 2, null] > ARRAY[1, 2, 1]
1079        assert_gt!(
1080            ListValue::from_iter([Some(1), Some(2), None]),
1081            ListValue::from_iter([Some(1), Some(2), Some(1)]),
1082        );
1083        // Null value in first ARRAY results into a Greater ordering regardless of the smaller ARRAY
1084        // length. ARRAY[1, null] > ARRAY[1, 2, 3]
1085        assert_gt!(
1086            ListValue::from_iter([Some(1), None]),
1087            ListValue::from_iter([Some(1), Some(2), Some(3)]),
1088        );
1089        // ARRAY[1, null] == ARRAY[1, null]
1090        assert_eq!(
1091            ListValue::from_iter([Some(1), None]),
1092            ListValue::from_iter([Some(1), None]),
1093        );
1094    }
1095
1096    #[test]
1097    fn test_list_ref_display() {
1098        let v = ListValue::from_iter([Some(1), None]);
1099        assert_eq!(v.to_string(), "{1,NULL}");
1100    }
1101
1102    #[test]
1103    fn test_serialize_deserialize() {
1104        let value = ListValue::from_iter([Some("abcd"), Some(""), None, Some("a")]);
1105        let list_ref = value.as_scalar_ref();
1106        let mut serializer = memcomparable::Serializer::new(vec![]);
1107        serializer.set_reverse(true);
1108        list_ref.memcmp_serialize(&mut serializer).unwrap();
1109        let buf = serializer.into_inner();
1110        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1111        deserializer.set_reverse(true);
1112        assert_eq!(
1113            ListValue::memcmp_deserialize(&DataType::Varchar, &mut deserializer).unwrap(),
1114            value
1115        );
1116
1117        let mut builder =
1118            ListArrayBuilder::with_type(0, DataType::List(Box::new(DataType::Varchar)));
1119        builder.append(Some(list_ref));
1120        let array = builder.finish();
1121        let list_ref = array.value_at(0).unwrap();
1122        let mut serializer = memcomparable::Serializer::new(vec![]);
1123        list_ref.memcmp_serialize(&mut serializer).unwrap();
1124        let buf = serializer.into_inner();
1125        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1126        assert_eq!(
1127            ListValue::memcmp_deserialize(&DataType::Varchar, &mut deserializer).unwrap(),
1128            value
1129        );
1130    }
1131
1132    #[test]
1133    fn test_memcomparable() {
1134        let cases = [
1135            (
1136                ListValue::from_iter([123, 456]),
1137                ListValue::from_iter([123, 789]),
1138            ),
1139            (
1140                ListValue::from_iter([123, 456]),
1141                ListValue::from_iter([123]),
1142            ),
1143            (
1144                ListValue::from_iter([None, Some("")]),
1145                ListValue::from_iter([None, None::<&str>]),
1146            ),
1147            (
1148                ListValue::from_iter([Some(2)]),
1149                ListValue::from_iter([Some(1), None, Some(3)]),
1150            ),
1151        ];
1152
1153        for (lhs, rhs) in cases {
1154            let lhs_serialized = {
1155                let mut serializer = memcomparable::Serializer::new(vec![]);
1156                lhs.as_scalar_ref()
1157                    .memcmp_serialize(&mut serializer)
1158                    .unwrap();
1159                serializer.into_inner()
1160            };
1161            let rhs_serialized = {
1162                let mut serializer = memcomparable::Serializer::new(vec![]);
1163                rhs.as_scalar_ref()
1164                    .memcmp_serialize(&mut serializer)
1165                    .unwrap();
1166                serializer.into_inner()
1167            };
1168            assert_eq!(lhs_serialized.cmp(&rhs_serialized), lhs.cmp(&rhs));
1169        }
1170    }
1171
1172    #[test]
1173    fn test_listref() {
1174        use crate::array::*;
1175        use crate::types;
1176
1177        let arr = ListArray::from_iter([Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6, 7])]);
1178
1179        // get 3rd ListRef from ListArray
1180        let list_ref = arr.value_at(2).unwrap();
1181        assert_eq!(list_ref, ListValue::from_iter([4, 5, 6, 7]).as_scalar_ref());
1182
1183        // Get 2nd value from ListRef
1184        let scalar = list_ref.get(1).unwrap();
1185        assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
1186    }
1187
1188    #[test]
1189    fn test_from_to_literal() {
1190        #[track_caller]
1191        fn test(typestr: &str, input: &str, output: Option<&str>) {
1192            let datatype: DataType = typestr.parse().unwrap();
1193            let list = ListValue::from_str(input, &datatype).unwrap();
1194            let actual = list.as_scalar_ref().to_text();
1195            let output = output.unwrap_or(input);
1196            assert_eq!(actual, output);
1197        }
1198
1199        #[track_caller]
1200        fn test_err(typestr: &str, input: &str, err: &str) {
1201            let datatype: DataType = typestr.parse().unwrap();
1202            let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
1203            assert_eq!(actual_err, err);
1204        }
1205
1206        test("varchar[]", "{}", None);
1207        test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
1208        test("varchar[]", "{🥵,🤡}", None);
1209        test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#));
1210        test("int[]", "{1,2,3}", None);
1211        test("varchar[]", r#"{"1,2"}"#, None);
1212        test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
1213        test("varchar[]", r#"{"\""}"#, None);
1214        test("varchar[]", r#"{\   }"#, Some(r#"{" "}"#));
1215        test("varchar[]", r#"{\\  }"#, Some(r#"{"\\"}"#));
1216        test("varchar[]", "{nulla}", None);
1217        test("varchar[]", "{null a}", Some(r#"{"null a"}"#));
1218        test(
1219            "varchar[]",
1220            r#"{"null", "NULL", null, NuLL}"#,
1221            Some(r#"{"null","NULL",NULL,NULL}"#),
1222        );
1223        test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}"));
1224        test(
1225            "varchar[][][]",
1226            "{{{1, 2, 3}}, {{4, 5, 6}}}",
1227            Some("{{{1,2,3}},{{4,5,6}}}"),
1228        );
1229        test_err("varchar[]", "()", r#"Array value must start with "{""#);
1230        test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
1231        test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
1232        test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#);
1233        test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#);
1234        test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#);
1235    }
1236}