risingwave_common/array/
list_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::borrow::Cow;
16use std::cmp::Ordering;
17use std::fmt::{self, Debug, Display};
18use std::future::Future;
19use std::mem::size_of;
20
21use bytes::{Buf, BufMut};
22use itertools::Itertools;
23use risingwave_common_estimate_size::EstimateSize;
24use risingwave_pb::data::{ListArrayData, PbArray, PbArrayType};
25use serde::{Deserialize, Serializer};
26use thiserror_ext::AsReport;
27
28use super::{
29    Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayResult, BoolArray, PrimitiveArray,
30    PrimitiveArrayItemType, RowRef, Utf8Array,
31};
32use crate::bitmap::{Bitmap, BitmapBuilder};
33use crate::row::Row;
34use crate::types::{
35    DataType, Datum, DatumRef, DefaultOrd, Scalar, ScalarImpl, ScalarRefImpl, ToDatumRef, ToText,
36    hash_datum,
37};
38use crate::util::memcmp_encoding;
39use crate::util::value_encoding::estimate_serialize_datum_size;
40
41#[derive(Debug, Clone, EstimateSize)]
42pub struct ListArrayBuilder {
43    bitmap: BitmapBuilder,
44    offsets: Vec<u32>,
45    value: Box<ArrayBuilderImpl>,
46    len: usize,
47}
48
49impl ArrayBuilder for ListArrayBuilder {
50    type ArrayType = ListArray;
51
52    #[cfg(not(test))]
53    fn new(_capacity: usize) -> Self {
54        panic!("please use `ListArrayBuilder::with_type` instead");
55    }
56
57    #[cfg(test)]
58    fn new(capacity: usize) -> Self {
59        // TODO: deprecate this
60        Self::with_type(
61            capacity,
62            // Default datatype
63            DataType::List(Box::new(DataType::Int16)),
64        )
65    }
66
67    fn with_type(capacity: usize, ty: DataType) -> Self {
68        let DataType::List(value_type) = ty else {
69            panic!("data type must be DataType::List");
70        };
71        let mut offsets = Vec::with_capacity(capacity + 1);
72        offsets.push(0);
73        Self {
74            bitmap: BitmapBuilder::with_capacity(capacity),
75            offsets,
76            value: Box::new(value_type.create_array_builder(capacity)),
77            len: 0,
78        }
79    }
80
81    fn append_n(&mut self, n: usize, value: Option<ListRef<'_>>) {
82        match value {
83            None => {
84                self.bitmap.append_n(n, false);
85                let last = *self.offsets.last().unwrap();
86                for _ in 0..n {
87                    self.offsets.push(last);
88                }
89            }
90            Some(v) => {
91                self.bitmap.append_n(n, true);
92                for _ in 0..n {
93                    let last = *self.offsets.last().unwrap();
94                    let elems = v.iter();
95                    self.offsets.push(
96                        last.checked_add(elems.len() as u32)
97                            .expect("offset overflow"),
98                    );
99                    for elem in elems {
100                        self.value.append(elem);
101                    }
102                }
103            }
104        }
105        self.len += n;
106    }
107
108    fn append_array(&mut self, other: &ListArray) {
109        self.bitmap.append_bitmap(&other.bitmap);
110        let last = *self.offsets.last().unwrap();
111        self.offsets
112            .append(&mut other.offsets[1..].iter().map(|o| *o + last).collect());
113        self.value.append_array(&other.value);
114        self.len += other.len();
115    }
116
117    fn pop(&mut self) -> Option<()> {
118        self.bitmap.pop()?;
119        let start = self.offsets.pop().unwrap();
120        let end = *self.offsets.last().unwrap();
121        self.len -= 1;
122        for _ in end..start {
123            self.value.pop().unwrap();
124        }
125        Some(())
126    }
127
128    fn len(&self) -> usize {
129        self.bitmap.len()
130    }
131
132    fn finish(self) -> ListArray {
133        ListArray {
134            bitmap: self.bitmap.finish(),
135            offsets: self.offsets.into(),
136            value: Box::new(self.value.finish()),
137        }
138    }
139}
140
141impl ListArrayBuilder {
142    pub fn append_row_ref(&mut self, row: RowRef<'_>) {
143        self.bitmap.append(true);
144        let last = *self.offsets.last().unwrap();
145        self.offsets
146            .push(last.checked_add(row.len() as u32).expect("offset overflow"));
147        self.len += 1;
148        for v in row.iter() {
149            self.value.append(v);
150        }
151    }
152}
153
154/// Each item of this `ListArray` is a `List<T>`, or called `T[]` (T array).
155///
156/// * As other arrays, there is a null bitmap, with `1` meaning nonnull and `0` meaning null.
157/// * As [`super::BytesArray`], there is an offsets `Vec` and a value `Array`. The value `Array` has
158///   all items concatenated, and the offsets `Vec` stores start and end indices into it for
159///   slicing. Effectively, the inner array is the flattened form, and `offsets.len() == n + 1`.
160///
161/// For example, `values (array[1]), (array[]::int[]), (null), (array[2, 3]);` stores an inner
162///  `I32Array` with `[1, 2, 3]`, along with offsets `[0, 1, 1, 1, 3]` and null bitmap `TTFT`.
163#[derive(Debug, Clone, PartialEq, Eq)]
164pub struct ListArray {
165    pub(super) bitmap: Bitmap,
166    pub(super) offsets: Box<[u32]>,
167    pub(super) value: Box<ArrayImpl>,
168}
169
170impl EstimateSize for ListArray {
171    fn estimated_heap_size(&self) -> usize {
172        self.bitmap.estimated_heap_size()
173            + self.offsets.len() * size_of::<u32>()
174            + self.value.estimated_size()
175    }
176}
177
178impl Array for ListArray {
179    type Builder = ListArrayBuilder;
180    type OwnedItem = ListValue;
181    type RefItem<'a> = ListRef<'a>;
182
183    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
184        unsafe {
185            ListRef {
186                array: &self.value,
187                start: *self.offsets.get_unchecked(idx),
188                end: *self.offsets.get_unchecked(idx + 1),
189            }
190        }
191    }
192
193    fn len(&self) -> usize {
194        self.bitmap.len()
195    }
196
197    fn to_protobuf(&self) -> PbArray {
198        let value = self.value.to_protobuf();
199        PbArray {
200            array_type: PbArrayType::List as i32,
201            struct_array_data: None,
202            list_array_data: Some(Box::new(ListArrayData {
203                offsets: self.offsets.to_vec(),
204                value: Some(Box::new(value)),
205                value_type: Some(self.value.data_type().to_protobuf()),
206            })),
207            null_bitmap: Some(self.bitmap.to_protobuf()),
208            values: vec![],
209        }
210    }
211
212    fn null_bitmap(&self) -> &Bitmap {
213        &self.bitmap
214    }
215
216    fn into_null_bitmap(self) -> Bitmap {
217        self.bitmap
218    }
219
220    fn set_bitmap(&mut self, bitmap: Bitmap) {
221        self.bitmap = bitmap;
222    }
223
224    fn data_type(&self) -> DataType {
225        DataType::List(Box::new(self.value.data_type()))
226    }
227}
228
229impl ListArray {
230    /// Flatten the list array into a single array.
231    ///
232    /// # Example
233    ///
234    /// ```text
235    /// [[1,2,3],NULL,[4,5]] => [1,2,3,4,5]
236    /// [[[1],[2]],[[3],[4]]] => [1,2,3,4]
237    /// ```
238    pub fn flatten(&self) -> ArrayImpl {
239        match &*self.value {
240            ArrayImpl::List(inner) => inner.flatten(),
241            a => a.clone(),
242        }
243    }
244
245    /// Return the inner array of the list array.
246    pub fn values(&self) -> &ArrayImpl {
247        &self.value
248    }
249
250    pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
251        ensure!(
252            array.values.is_empty(),
253            "Must have no buffer in a list array"
254        );
255        debug_assert!(
256            (array.array_type == PbArrayType::List as i32)
257                || (array.array_type == PbArrayType::Map as i32),
258            "invalid array type for list: {}",
259            array.array_type
260        );
261        let bitmap: Bitmap = array.get_null_bitmap()?.into();
262        let array_data = array.get_list_array_data()?.to_owned();
263        let flatten_len = match array_data.offsets.last() {
264            Some(&n) => n as usize,
265            None => bail!("Must have at least one element in offsets"),
266        };
267        let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?;
268        let arr = ListArray {
269            bitmap,
270            offsets: array_data.offsets.into(),
271            value: Box::new(value),
272        };
273        Ok(arr.into())
274    }
275
276    /// Apply the function on the underlying elements.
277    /// e.g. `map_inner([[1,2,3],NULL,[4,5]], DOUBLE) = [[2,4,6],NULL,[8,10]]`
278    pub async fn map_inner<E, Fut, F>(self, f: F) -> std::result::Result<ListArray, E>
279    where
280        F: FnOnce(ArrayImpl) -> Fut,
281        Fut: Future<Output = std::result::Result<ArrayImpl, E>>,
282    {
283        let new_value = (f)(*self.value).await?;
284
285        Ok(Self {
286            offsets: self.offsets,
287            bitmap: self.bitmap,
288            value: Box::new(new_value),
289        })
290    }
291
292    /// Returns the offsets of this list.
293    ///
294    /// # Example
295    /// ```text
296    /// list    = [[a, b, c], [], NULL, [d], [NULL, f]]
297    /// offsets = [0, 3, 3, 3, 4, 6]
298    /// ```
299    pub fn offsets(&self) -> &[u32] {
300        &self.offsets
301    }
302}
303
304impl<T, L> FromIterator<Option<L>> for ListArray
305where
306    T: PrimitiveArrayItemType,
307    L: IntoIterator<Item = T>,
308{
309    fn from_iter<I: IntoIterator<Item = Option<L>>>(iter: I) -> Self {
310        let iter = iter.into_iter();
311        let mut builder = ListArrayBuilder::with_type(
312            iter.size_hint().0,
313            DataType::List(Box::new(T::DATA_TYPE.clone())),
314        );
315        for v in iter {
316            match v {
317                None => builder.append(None),
318                Some(v) => {
319                    builder.append(Some(v.into_iter().collect::<ListValue>().as_scalar_ref()))
320                }
321            }
322        }
323        builder.finish()
324    }
325}
326
327impl FromIterator<ListValue> for ListArray {
328    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
329        let mut iter = iter.into_iter();
330        let first = iter.next().expect("empty iterator");
331        let mut builder = ListArrayBuilder::with_type(
332            iter.size_hint().0,
333            DataType::List(Box::new(first.data_type())),
334        );
335        builder.append(Some(first.as_scalar_ref()));
336        for v in iter {
337            builder.append(Some(v.as_scalar_ref()));
338        }
339        builder.finish()
340    }
341}
342
343#[derive(Clone, PartialEq, Eq, EstimateSize)]
344pub struct ListValue {
345    values: Box<ArrayImpl>,
346}
347
348impl Debug for ListValue {
349    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350        self.as_scalar_ref().fmt(f)
351    }
352}
353
354impl Display for ListValue {
355    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356        self.as_scalar_ref().write(f)
357    }
358}
359
360impl ListValue {
361    pub fn new(values: ArrayImpl) -> Self {
362        Self {
363            values: Box::new(values),
364        }
365    }
366
367    pub fn into_array(self) -> ArrayImpl {
368        *self.values
369    }
370
371    pub fn empty(datatype: &DataType) -> Self {
372        Self::new(datatype.create_array_builder(0).finish())
373    }
374
375    /// Creates a new `ListValue` from an iterator of `Datum`.
376    pub fn from_datum_iter<T: ToDatumRef>(
377        elem_datatype: &DataType,
378        iter: impl IntoIterator<Item = T>,
379    ) -> Self {
380        let iter = iter.into_iter();
381        let mut builder = elem_datatype.create_array_builder(iter.size_hint().0);
382        for datum in iter {
383            builder.append(datum);
384        }
385        Self::new(builder.finish())
386    }
387
388    /// Returns the length of the list.
389    pub fn len(&self) -> usize {
390        self.values.len()
391    }
392
393    /// Returns `true` if the list has a length of 0.
394    pub fn is_empty(&self) -> bool {
395        self.values.is_empty()
396    }
397
398    /// Iterates over the elements of the list.
399    pub fn iter(&self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'_>> {
400        self.values.iter()
401    }
402
403    /// Get the element at the given index. Returns `None` if the index is out of bounds.
404    pub fn get(&self, index: usize) -> Option<DatumRef<'_>> {
405        if index < self.len() {
406            Some(self.values.value_at(index))
407        } else {
408            None
409        }
410    }
411
412    /// Returns the data type of the elements in the list.
413    pub fn data_type(&self) -> DataType {
414        self.values.data_type()
415    }
416
417    pub fn memcmp_deserialize(
418        item_datatype: &DataType,
419        deserializer: &mut memcomparable::Deserializer<impl Buf>,
420    ) -> memcomparable::Result<Self> {
421        let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
422        let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
423        let mut builder = item_datatype.create_array_builder(0);
424        while inner_deserializer.has_remaining() {
425            builder.append(memcmp_encoding::deserialize_datum_in_composite(
426                item_datatype,
427                &mut inner_deserializer,
428            )?)
429        }
430        Ok(Self::new(builder.finish()))
431    }
432
433    // Used to display ListValue in explain for better readibilty.
434    pub fn display_for_explain(&self) -> String {
435        // Example of ListValue display: ARRAY[1, 2, null]
436        format!(
437            "ARRAY[{}]",
438            self.iter()
439                .map(|v| {
440                    match v.as_ref() {
441                        None => "null".into(),
442                        Some(scalar) => scalar.to_text(),
443                    }
444                })
445                .format(", ")
446        )
447    }
448
449    /// Returns a mutable slice if the list is of type `int64[]`.
450    pub fn as_i64_mut_slice(&mut self) -> Option<&mut [i64]> {
451        match self.values.as_mut() {
452            ArrayImpl::Int64(array) => Some(array.as_mut_slice()),
453            _ => None,
454        }
455    }
456}
457
458impl PartialOrd for ListValue {
459    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
460        Some(self.cmp(other))
461    }
462}
463
464impl Ord for ListValue {
465    fn cmp(&self, other: &Self) -> Ordering {
466        self.as_scalar_ref().cmp(&other.as_scalar_ref())
467    }
468}
469
470impl<T: PrimitiveArrayItemType> FromIterator<Option<T>> for ListValue {
471    fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
472        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
473    }
474}
475
476impl<T: PrimitiveArrayItemType> FromIterator<T> for ListValue {
477    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
478        Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
479    }
480}
481
482impl FromIterator<bool> for ListValue {
483    fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
484        Self::new(iter.into_iter().collect::<BoolArray>().into())
485    }
486}
487
488impl<'a> FromIterator<Option<&'a str>> for ListValue {
489    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
490        Self::new(iter.into_iter().collect::<Utf8Array>().into())
491    }
492}
493
494impl<'a> FromIterator<&'a str> for ListValue {
495    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
496        Self::new(iter.into_iter().collect::<Utf8Array>().into())
497    }
498}
499
500impl FromIterator<ListValue> for ListValue {
501    fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
502        Self::new(iter.into_iter().collect::<ListArray>().into())
503    }
504}
505
506impl From<ListValue> for ArrayImpl {
507    fn from(value: ListValue) -> Self {
508        *value.values
509    }
510}
511
512/// A slice of an array
513#[derive(Copy, Clone)]
514pub struct ListRef<'a> {
515    array: &'a ArrayImpl,
516    start: u32,
517    end: u32,
518}
519
520impl<'a> ListRef<'a> {
521    /// Returns the length of the list.
522    pub fn len(&self) -> usize {
523        (self.end - self.start) as usize
524    }
525
526    /// Returns `true` if the list has a length of 0.
527    pub fn is_empty(&self) -> bool {
528        self.start == self.end
529    }
530
531    /// Returns the data type of the elements in the list.
532    pub fn data_type(&self) -> DataType {
533        self.array.data_type()
534    }
535
536    /// Returns the elements in the flattened list.
537    pub fn flatten(self) -> ListRef<'a> {
538        match self.array {
539            ArrayImpl::List(inner) => ListRef {
540                array: &inner.value,
541                start: inner.offsets[self.start as usize],
542                end: inner.offsets[self.end as usize],
543            }
544            .flatten(),
545            _ => self,
546        }
547    }
548
549    /// Iterates over the elements of the list.
550    pub fn iter(self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'a>> + 'a {
551        (self.start..self.end).map(|i| self.array.value_at(i as usize))
552    }
553
554    /// Get the element at the given index. Returns `None` if the index is out of bounds.
555    pub fn get(self, index: usize) -> Option<DatumRef<'a>> {
556        if index < self.len() {
557            Some(self.array.value_at(self.start as usize + index))
558        } else {
559            None
560        }
561    }
562
563    pub fn memcmp_serialize(
564        self,
565        serializer: &mut memcomparable::Serializer<impl BufMut>,
566    ) -> memcomparable::Result<()> {
567        let mut inner_serializer = memcomparable::Serializer::new(vec![]);
568        for datum_ref in self.iter() {
569            memcmp_encoding::serialize_datum_in_composite(datum_ref, &mut inner_serializer)?
570        }
571        serializer.serialize_bytes(&inner_serializer.into_inner())
572    }
573
574    pub fn hash_scalar_inner<H: std::hash::Hasher>(self, state: &mut H) {
575        for datum_ref in self.iter() {
576            hash_datum(datum_ref, state);
577        }
578    }
579
580    /// estimate the serialized size with value encoding
581    pub fn estimate_serialize_size_inner(self) -> usize {
582        self.iter().map(estimate_serialize_datum_size).sum()
583    }
584
585    pub fn to_owned(self) -> ListValue {
586        let mut builder = self.array.create_builder(self.len());
587        for datum_ref in self.iter() {
588            builder.append(datum_ref);
589        }
590        ListValue::new(builder.finish())
591    }
592
593    /// Returns a slice if the list is of type `int64[]`.
594    pub fn as_i64_slice(&self) -> Option<&[i64]> {
595        match &self.array {
596            ArrayImpl::Int64(array) => {
597                Some(&array.as_slice()[self.start as usize..self.end as usize])
598            }
599            _ => None,
600        }
601    }
602
603    /// # Panics
604    /// Panics if the list is not a map's internal representation (See [`super::MapArray`]).
605    pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
606        let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
607        (
608            ListRef {
609                array: k,
610                start: self.start,
611                end: self.end,
612            },
613            ListRef {
614                array: v,
615                start: self.start,
616                end: self.end,
617            },
618        )
619    }
620}
621
622impl PartialEq for ListRef<'_> {
623    fn eq(&self, other: &Self) -> bool {
624        self.iter().eq(other.iter())
625    }
626}
627
628impl Eq for ListRef<'_> {}
629
630impl PartialOrd for ListRef<'_> {
631    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
632        Some(self.cmp(other))
633    }
634}
635
636impl Ord for ListRef<'_> {
637    fn cmp(&self, other: &Self) -> Ordering {
638        self.iter().cmp_by(other.iter(), |a, b| a.default_cmp(&b))
639    }
640}
641
642impl Debug for ListRef<'_> {
643    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
644        f.debug_list().entries(self.iter()).finish()
645    }
646}
647
648impl Row for ListRef<'_> {
649    fn datum_at(&self, index: usize) -> DatumRef<'_> {
650        self.array.value_at(self.start as usize + index)
651    }
652
653    unsafe fn datum_at_unchecked(&self, index: usize) -> DatumRef<'_> {
654        unsafe { self.array.value_at_unchecked(self.start as usize + index) }
655    }
656
657    fn len(&self) -> usize {
658        self.len()
659    }
660
661    fn iter(&self) -> impl Iterator<Item = DatumRef<'_>> {
662        (*self).iter()
663    }
664}
665
666impl ToText for ListRef<'_> {
667    // This function will be invoked when pgwire prints a list value in string.
668    // Refer to PostgreSQL `array_out` or `appendPGArray`.
669    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
670        write!(
671            f,
672            "{{{}}}",
673            self.iter().format_with(",", |datum_ref, f| {
674                let s = datum_ref.to_text();
675                // Never quote null or inner list, but quote empty, verbatim 'null', special
676                // chars and whitespaces.
677                let need_quote = !matches!(datum_ref, None | Some(ScalarRefImpl::List(_)))
678                    && (s.is_empty()
679                        || s.eq_ignore_ascii_case("null")
680                        || s.contains([
681                            '"', '\\', ',',
682                            // whilespace:
683                            // PostgreSQL `array_isspace` includes '\x0B' but rust
684                            // [`char::is_ascii_whitespace`] does not.
685                            ' ', '\t', '\n', '\r', '\x0B', '\x0C', // list-specific:
686                            '{', '}',
687                        ]));
688                if need_quote {
689                    f(&"\"")?;
690                    s.chars().try_for_each(|c| {
691                        if c == '"' || c == '\\' {
692                            f(&"\\")?;
693                        }
694                        f(&c)
695                    })?;
696                    f(&"\"")
697                } else {
698                    f(&s)
699                }
700            })
701        )
702    }
703
704    fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
705        match ty {
706            DataType::List { .. } => self.write(f),
707            _ => unreachable!(),
708        }
709    }
710}
711
712impl<'a> From<&'a ListValue> for ListRef<'a> {
713    fn from(value: &'a ListValue) -> Self {
714        ListRef {
715            array: &value.values,
716            start: 0,
717            end: value.len() as u32,
718        }
719    }
720}
721
722impl From<ListRef<'_>> for ListValue {
723    fn from(value: ListRef<'_>) -> Self {
724        value.to_owned()
725    }
726}
727
728impl ListValue {
729    /// Construct an array from literal string.
730    pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
731        struct Parser<'a> {
732            input: &'a str,
733            data_type: &'a DataType,
734        }
735
736        impl Parser<'_> {
737            /// Parse a datum.
738            fn parse(&mut self) -> Result<Datum, String> {
739                self.skip_whitespace();
740                if self.data_type.is_array() {
741                    if self.try_parse_null() {
742                        return Ok(None);
743                    }
744                    Ok(Some(self.parse_array()?.into()))
745                } else {
746                    self.parse_value()
747                }
748            }
749
750            /// Parse an array.
751            fn parse_array(&mut self) -> Result<ListValue, String> {
752                self.skip_whitespace();
753                if !self.try_consume('{') {
754                    return Err("Array value must start with \"{\"".to_owned());
755                }
756                self.skip_whitespace();
757                if self.try_consume('}') {
758                    return Ok(ListValue::empty(self.data_type.as_list()));
759                }
760                let mut builder = ArrayBuilderImpl::with_type(0, self.data_type.as_list().clone());
761                loop {
762                    let mut parser = Self {
763                        input: self.input,
764                        data_type: self.data_type.as_list(),
765                    };
766                    builder.append(parser.parse()?);
767                    self.input = parser.input;
768
769                    // expect ',' or '}'
770                    self.skip_whitespace();
771                    match self.peek() {
772                        Some(',') => {
773                            self.try_consume(',');
774                        }
775                        Some('}') => {
776                            self.try_consume('}');
777                            break;
778                        }
779                        None => return Err(Self::eoi()),
780                        _ => return Err("Unexpected array element.".to_owned()),
781                    }
782                }
783                Ok(ListValue::new(builder.finish()))
784            }
785
786            /// Parse a non-array value.
787            fn parse_value(&mut self) -> Result<Datum, String> {
788                if self.peek() == Some('"') {
789                    return Ok(Some(self.parse_quoted()?));
790                }
791                // peek until the next unescaped ',' or '}'
792                let mut chars = self.input.char_indices();
793                let mut has_escape = false;
794                let s = loop {
795                    match chars.next().ok_or_else(Self::eoi)? {
796                        (_, '\\') => {
797                            has_escape = true;
798                            chars.next().ok_or_else(Self::eoi)?;
799                        }
800                        (i, c @ ',' | c @ '}') => {
801                            let s = &self.input[..i];
802                            // consume the value and leave the ',' or '}' for parent
803                            self.input = &self.input[i..];
804
805                            break if has_escape {
806                                Cow::Owned(Self::unescape_trim_end(s))
807                            } else {
808                                let trimmed = s.trim_end();
809                                if trimmed.is_empty() {
810                                    return Err(format!("Unexpected \"{c}\" character."));
811                                }
812                                if trimmed.eq_ignore_ascii_case("null") {
813                                    return Ok(None);
814                                }
815                                Cow::Borrowed(trimmed)
816                            };
817                        }
818                        (_, '{') => return Err("Unexpected \"{\" character.".to_owned()),
819                        (_, '"') => return Err("Unexpected array element.".to_owned()),
820                        _ => {}
821                    }
822                };
823                Ok(Some(
824                    ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())?,
825                ))
826            }
827
828            /// Parse a double quoted non-array value.
829            fn parse_quoted(&mut self) -> Result<ScalarImpl, String> {
830                assert!(self.try_consume('"'));
831                // peek until the next unescaped '"'
832                let mut chars = self.input.char_indices();
833                let mut has_escape = false;
834                let s = loop {
835                    match chars.next().ok_or_else(Self::eoi)? {
836                        (_, '\\') => {
837                            has_escape = true;
838                            chars.next().ok_or_else(Self::eoi)?;
839                        }
840                        (i, '"') => {
841                            let s = &self.input[..i];
842                            self.input = &self.input[i + 1..];
843                            break if has_escape {
844                                Cow::Owned(Self::unescape(s))
845                            } else {
846                                Cow::Borrowed(s)
847                            };
848                        }
849                        _ => {}
850                    }
851                };
852                ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())
853            }
854
855            /// Unescape a string.
856            fn unescape(s: &str) -> String {
857                let mut unescaped = String::with_capacity(s.len());
858                let mut chars = s.chars();
859                while let Some(mut c) = chars.next() {
860                    if c == '\\' {
861                        c = chars.next().unwrap();
862                    }
863                    unescaped.push(c);
864                }
865                unescaped
866            }
867
868            /// Unescape a string and trim the trailing whitespaces.
869            ///
870            /// Example: `"\  " -> " "`
871            fn unescape_trim_end(s: &str) -> String {
872                let mut unescaped = String::with_capacity(s.len());
873                let mut chars = s.chars();
874                let mut len_after_last_escaped_char = 0;
875                while let Some(mut c) = chars.next() {
876                    if c == '\\' {
877                        c = chars.next().unwrap();
878                        unescaped.push(c);
879                        len_after_last_escaped_char = unescaped.len();
880                    } else {
881                        unescaped.push(c);
882                    }
883                }
884                let l = unescaped[len_after_last_escaped_char..].trim_end().len();
885                unescaped.truncate(len_after_last_escaped_char + l);
886                unescaped
887            }
888
889            /// Consume the next 4 characters if it matches "null".
890            ///
891            /// Note: We don't use this function when parsing non-array values.
892            ///       Because we can't decide whether it is a null value or a string starts with "null".
893            ///       Consider this case: `{null value}` => `["null value"]`
894            fn try_parse_null(&mut self) -> bool {
895                if let Some(s) = self.input.get(..4)
896                    && s.eq_ignore_ascii_case("null")
897                {
898                    let next_char = self.input[4..].chars().next();
899                    match next_char {
900                        None | Some(',' | '}') => {}
901                        Some(c) if c.is_ascii_whitespace() => {}
902                        // following normal characters
903                        _ => return false,
904                    }
905                    self.input = &self.input[4..];
906                    true
907                } else {
908                    false
909                }
910            }
911
912            /// Consume the next character if it matches `c`.
913            fn try_consume(&mut self, c: char) -> bool {
914                if self.peek() == Some(c) {
915                    self.input = &self.input[c.len_utf8()..];
916                    true
917                } else {
918                    false
919                }
920            }
921
922            /// Expect end of input.
923            fn expect_end(&mut self) -> Result<(), String> {
924                self.skip_whitespace();
925                match self.peek() {
926                    Some(_) => Err("Junk after closing right brace.".to_owned()),
927                    None => Ok(()),
928                }
929            }
930
931            /// Skip whitespaces.
932            fn skip_whitespace(&mut self) {
933                self.input = match self
934                    .input
935                    .char_indices()
936                    .find(|(_, c)| !c.is_ascii_whitespace())
937                {
938                    Some((i, _)) => &self.input[i..],
939                    None => "",
940                };
941            }
942
943            /// Peek the next character.
944            fn peek(&self) -> Option<char> {
945                self.input.chars().next()
946            }
947
948            /// Return the error message for unexpected end of input.
949            fn eoi() -> String {
950                "Unexpected end of input.".into()
951            }
952        }
953
954        let mut parser = Parser { input, data_type };
955        let array = parser.parse_array()?;
956        parser.expect_end()?;
957        Ok(array)
958    }
959}
960
961#[cfg(test)]
962mod tests {
963    use more_asserts::{assert_gt, assert_lt};
964
965    use super::*;
966
967    #[test]
968    fn test_protobuf() {
969        use crate::array::*;
970        let array = ListArray::from_iter([
971            Some(vec![12i32, -7, 25]),
972            None,
973            Some(vec![0, -127, 127, 50]),
974            Some(vec![]),
975        ]);
976        let actual = ListArray::from_protobuf(&array.to_protobuf()).unwrap();
977        assert_eq!(actual, ArrayImpl::List(array));
978    }
979
980    #[test]
981    fn test_append_array() {
982        let part1 = ListArray::from_iter([Some([12i32, -7, 25]), None]);
983        let part2 = ListArray::from_iter([Some(vec![0, -127, 127, 50]), Some(vec![])]);
984
985        let mut builder = ListArrayBuilder::with_type(4, DataType::List(Box::new(DataType::Int32)));
986        builder.append_array(&part1);
987        builder.append_array(&part2);
988
989        let expected = ListArray::from_iter([
990            Some(vec![12i32, -7, 25]),
991            None,
992            Some(vec![0, -127, 127, 50]),
993            Some(vec![]),
994        ]);
995        assert_eq!(builder.finish(), expected);
996    }
997
998    // Ensure `create_builder` exactly copies the same metadata.
999    #[test]
1000    fn test_list_create_builder() {
1001        use crate::array::*;
1002        let arr = ListArray::from_iter([Some([F32::from(2.0), F32::from(42.0), F32::from(1.0)])]);
1003        let arr2 = arr.create_builder(0).finish();
1004        assert_eq!(arr.data_type(), arr2.data_type());
1005    }
1006
1007    #[test]
1008    fn test_builder_pop() {
1009        use crate::array::*;
1010
1011        {
1012            let mut builder =
1013                ListArrayBuilder::with_type(1, DataType::List(Box::new(DataType::Int32)));
1014            let val = ListValue::from_iter([1i32, 2, 3]);
1015            builder.append(Some(val.as_scalar_ref()));
1016            assert!(builder.pop().is_some());
1017            assert!(builder.pop().is_none());
1018            let arr = builder.finish();
1019            assert!(arr.is_empty());
1020        }
1021
1022        {
1023            let data_type = DataType::List(Box::new(DataType::List(Box::new(DataType::Int32))));
1024            let mut builder = ListArrayBuilder::with_type(2, data_type);
1025            let val1 = ListValue::from_iter([1, 2, 3]);
1026            let val2 = ListValue::from_iter([1, 2, 3]);
1027            let list1 = ListValue::from_iter([val1, val2]);
1028            builder.append(Some(list1.as_scalar_ref()));
1029
1030            let val3 = ListValue::from_iter([1, 2, 3]);
1031            let val4 = ListValue::from_iter([1, 2, 3]);
1032            let list2 = ListValue::from_iter([val3, val4]);
1033
1034            builder.append(Some(list2.as_scalar_ref()));
1035
1036            assert!(builder.pop().is_some());
1037
1038            let arr = builder.finish();
1039            assert_eq!(arr.len(), 1);
1040            assert_eq!(arr.value_at(0).unwrap(), list1.as_scalar_ref());
1041        }
1042    }
1043
1044    #[test]
1045    fn test_list_nested_layout() {
1046        use crate::array::*;
1047
1048        let listarray1 = ListArray::from_iter([Some([1i32, 2]), Some([3, 4])]);
1049        let listarray2 = ListArray::from_iter([Some(vec![5, 6, 7]), None, Some(vec![8])]);
1050        let listarray3 = ListArray::from_iter([Some([9, 10])]);
1051
1052        let nestarray = ListArray::from_iter(
1053            [listarray1, listarray2, listarray3]
1054                .into_iter()
1055                .map(|l| ListValue::new(l.into())),
1056        );
1057        let actual = ListArray::from_protobuf(&nestarray.to_protobuf()).unwrap();
1058        assert_eq!(ArrayImpl::List(nestarray), actual);
1059    }
1060
1061    #[test]
1062    fn test_list_value_cmp() {
1063        // ARRAY[1, 1] < ARRAY[1, 2, 1]
1064        assert_lt!(
1065            ListValue::from_iter([1, 1]),
1066            ListValue::from_iter([1, 2, 1]),
1067        );
1068        // ARRAY[1, 2] < ARRAY[1, 2, 1]
1069        assert_lt!(
1070            ListValue::from_iter([1, 2]),
1071            ListValue::from_iter([1, 2, 1]),
1072        );
1073        // ARRAY[1, 3] > ARRAY[1, 2, 1]
1074        assert_gt!(
1075            ListValue::from_iter([1, 3]),
1076            ListValue::from_iter([1, 2, 1]),
1077        );
1078        // null > 1
1079        assert_gt!(
1080            ListValue::from_iter([None::<i32>]),
1081            ListValue::from_iter([1]),
1082        );
1083        // ARRAY[1, 2, null] > ARRAY[1, 2, 1]
1084        assert_gt!(
1085            ListValue::from_iter([Some(1), Some(2), None]),
1086            ListValue::from_iter([Some(1), Some(2), Some(1)]),
1087        );
1088        // Null value in first ARRAY results into a Greater ordering regardless of the smaller ARRAY
1089        // length. ARRAY[1, null] > ARRAY[1, 2, 3]
1090        assert_gt!(
1091            ListValue::from_iter([Some(1), None]),
1092            ListValue::from_iter([Some(1), Some(2), Some(3)]),
1093        );
1094        // ARRAY[1, null] == ARRAY[1, null]
1095        assert_eq!(
1096            ListValue::from_iter([Some(1), None]),
1097            ListValue::from_iter([Some(1), None]),
1098        );
1099    }
1100
1101    #[test]
1102    fn test_list_ref_display() {
1103        let v = ListValue::from_iter([Some(1), None]);
1104        assert_eq!(v.to_string(), "{1,NULL}");
1105    }
1106
1107    #[test]
1108    fn test_serialize_deserialize() {
1109        let value = ListValue::from_iter([Some("abcd"), Some(""), None, Some("a")]);
1110        let list_ref = value.as_scalar_ref();
1111        let mut serializer = memcomparable::Serializer::new(vec![]);
1112        serializer.set_reverse(true);
1113        list_ref.memcmp_serialize(&mut serializer).unwrap();
1114        let buf = serializer.into_inner();
1115        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1116        deserializer.set_reverse(true);
1117        assert_eq!(
1118            ListValue::memcmp_deserialize(&DataType::Varchar, &mut deserializer).unwrap(),
1119            value
1120        );
1121
1122        let mut builder =
1123            ListArrayBuilder::with_type(0, DataType::List(Box::new(DataType::Varchar)));
1124        builder.append(Some(list_ref));
1125        let array = builder.finish();
1126        let list_ref = array.value_at(0).unwrap();
1127        let mut serializer = memcomparable::Serializer::new(vec![]);
1128        list_ref.memcmp_serialize(&mut serializer).unwrap();
1129        let buf = serializer.into_inner();
1130        let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1131        assert_eq!(
1132            ListValue::memcmp_deserialize(&DataType::Varchar, &mut deserializer).unwrap(),
1133            value
1134        );
1135    }
1136
1137    #[test]
1138    fn test_memcomparable() {
1139        let cases = [
1140            (
1141                ListValue::from_iter([123, 456]),
1142                ListValue::from_iter([123, 789]),
1143            ),
1144            (
1145                ListValue::from_iter([123, 456]),
1146                ListValue::from_iter([123]),
1147            ),
1148            (
1149                ListValue::from_iter([None, Some("")]),
1150                ListValue::from_iter([None, None::<&str>]),
1151            ),
1152            (
1153                ListValue::from_iter([Some(2)]),
1154                ListValue::from_iter([Some(1), None, Some(3)]),
1155            ),
1156        ];
1157
1158        for (lhs, rhs) in cases {
1159            let lhs_serialized = {
1160                let mut serializer = memcomparable::Serializer::new(vec![]);
1161                lhs.as_scalar_ref()
1162                    .memcmp_serialize(&mut serializer)
1163                    .unwrap();
1164                serializer.into_inner()
1165            };
1166            let rhs_serialized = {
1167                let mut serializer = memcomparable::Serializer::new(vec![]);
1168                rhs.as_scalar_ref()
1169                    .memcmp_serialize(&mut serializer)
1170                    .unwrap();
1171                serializer.into_inner()
1172            };
1173            assert_eq!(lhs_serialized.cmp(&rhs_serialized), lhs.cmp(&rhs));
1174        }
1175    }
1176
1177    #[test]
1178    fn test_listref() {
1179        use crate::array::*;
1180        use crate::types;
1181
1182        let arr = ListArray::from_iter([Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6, 7])]);
1183
1184        // get 3rd ListRef from ListArray
1185        let list_ref = arr.value_at(2).unwrap();
1186        assert_eq!(list_ref, ListValue::from_iter([4, 5, 6, 7]).as_scalar_ref());
1187
1188        // Get 2nd value from ListRef
1189        let scalar = list_ref.get(1).unwrap();
1190        assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
1191    }
1192
1193    #[test]
1194    fn test_from_to_literal() {
1195        #[track_caller]
1196        fn test(typestr: &str, input: &str, output: Option<&str>) {
1197            let datatype: DataType = typestr.parse().unwrap();
1198            let list = ListValue::from_str(input, &datatype).unwrap();
1199            let actual = list.as_scalar_ref().to_text();
1200            let output = output.unwrap_or(input);
1201            assert_eq!(actual, output);
1202        }
1203
1204        #[track_caller]
1205        fn test_err(typestr: &str, input: &str, err: &str) {
1206            let datatype: DataType = typestr.parse().unwrap();
1207            let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
1208            assert_eq!(actual_err, err);
1209        }
1210
1211        test("varchar[]", "{}", None);
1212        test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
1213        test("varchar[]", "{🥵,🤡}", None);
1214        test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#));
1215        test("int[]", "{1,2,3}", None);
1216        test("varchar[]", r#"{"1,2"}"#, None);
1217        test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
1218        test("varchar[]", r#"{"\""}"#, None);
1219        test("varchar[]", r#"{\   }"#, Some(r#"{" "}"#));
1220        test("varchar[]", r#"{\\  }"#, Some(r#"{"\\"}"#));
1221        test("varchar[]", "{nulla}", None);
1222        test("varchar[]", "{null a}", Some(r#"{"null a"}"#));
1223        test(
1224            "varchar[]",
1225            r#"{"null", "NULL", null, NuLL}"#,
1226            Some(r#"{"null","NULL",NULL,NULL}"#),
1227        );
1228        test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}"));
1229        test(
1230            "varchar[][][]",
1231            "{{{1, 2, 3}}, {{4, 5, 6}}}",
1232            Some("{{{1,2,3}},{{4,5,6}}}"),
1233        );
1234        test_err("varchar[]", "()", r#"Array value must start with "{""#);
1235        test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
1236        test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
1237        test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#);
1238        test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#);
1239        test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#);
1240    }
1241}