1use std::borrow::Cow;
16use std::cmp::Ordering;
17use std::fmt::{self, Debug, Display};
18use std::future::Future;
19use std::mem::{ManuallyDrop, size_of};
20
21use bytes::{Buf, BufMut};
22use itertools::Itertools;
23use risingwave_common_estimate_size::EstimateSize;
24use risingwave_pb::data::{ListArrayData, PbArray, PbArrayType};
25use serde::{Deserialize, Serializer};
26use thiserror_ext::AsReport;
27
28use super::{
29 Array, ArrayBuilder, ArrayBuilderImpl, ArrayImpl, ArrayResult, BoolArray, PrimitiveArray,
30 PrimitiveArrayItemType, RowRef, Utf8Array,
31};
32use crate::bitmap::{Bitmap, BitmapBuilder};
33use crate::row::Row;
34use crate::types::{
35 DataType, Datum, DatumRef, DefaultOrd, ListType, Scalar, ScalarImpl, ScalarRef, ScalarRefImpl,
36 ToDatumRef, ToText, hash_datum,
37};
38use crate::util::memcmp_encoding;
39use crate::util::value_encoding::estimate_serialize_datum_size;
40
41#[derive(Debug, Clone, EstimateSize)]
42pub struct ListArrayBuilder {
43 bitmap: BitmapBuilder,
44 offsets: Vec<u32>,
45 value: Box<ArrayBuilderImpl>,
46}
47
48impl ArrayBuilder for ListArrayBuilder {
49 type ArrayType = ListArray;
50
51 #[cfg(not(test))]
52 fn new(_capacity: usize) -> Self {
53 panic!("please use `ListArrayBuilder::with_type` instead");
54 }
55
56 #[cfg(test)]
57 fn new(capacity: usize) -> Self {
58 Self::with_type(
60 capacity,
61 DataType::Int16.list(),
63 )
64 }
65
66 fn with_type(capacity: usize, ty: DataType) -> Self {
67 let DataType::List(list_ty) = ty else {
68 panic!("data type must be DataType::List");
69 };
70 let mut offsets = Vec::with_capacity(capacity + 1);
71 offsets.push(0);
72 Self {
73 bitmap: BitmapBuilder::with_capacity(capacity),
74 offsets,
75 value: Box::new(list_ty.elem().create_array_builder(capacity)),
76 }
77 }
78
79 fn append_n(&mut self, n: usize, value: Option<ListRef<'_>>) {
80 match value {
81 None => {
82 self.bitmap.append_n(n, false);
83 let last = *self.offsets.last().unwrap();
84 for _ in 0..n {
85 self.offsets.push(last);
86 }
87 }
88 Some(v) => {
89 self.bitmap.append_n(n, true);
90 for _ in 0..n {
91 let last = *self.offsets.last().unwrap();
92 let elems = v.iter();
93 self.offsets.push(
94 last.checked_add(elems.len() as u32)
95 .expect("offset overflow"),
96 );
97 for elem in elems {
98 self.value.append(elem);
99 }
100 }
101 }
102 }
103 }
104
105 fn append_array(&mut self, other: &ListArray) {
106 self.bitmap.append_bitmap(&other.bitmap);
107 let last = *self.offsets.last().unwrap();
108 self.offsets
109 .append(&mut other.offsets[1..].iter().map(|o| *o + last).collect());
110 self.value.append_array(&other.value);
111 }
112
113 fn pop(&mut self) -> Option<()> {
114 self.bitmap.pop()?;
115 let start = self.offsets.pop().unwrap();
116 let end = *self.offsets.last().unwrap();
117 for _ in end..start {
118 self.value.pop().unwrap();
119 }
120 Some(())
121 }
122
123 fn len(&self) -> usize {
124 self.bitmap.len()
125 }
126
127 fn finish(self) -> ListArray {
128 ListArray {
129 bitmap: self.bitmap.finish(),
130 offsets: self.offsets.into(),
131 value: Box::new(self.value.finish()),
132 }
133 }
134}
135
136impl ListArrayBuilder {
137 pub fn append_row_ref(&mut self, row: RowRef<'_>) {
138 self.bitmap.append(true);
139 let last = *self.offsets.last().unwrap();
140 self.offsets
141 .push(last.checked_add(row.len() as u32).expect("offset overflow"));
142 for v in row.iter() {
143 self.value.append(v);
144 }
145 }
146
147 pub fn writer(&mut self) -> ListWriter<'_> {
148 ListWriter::new(self)
149 }
150}
151
152#[derive(Debug, Clone, PartialEq, Eq)]
162pub struct ListArray {
163 pub(super) bitmap: Bitmap,
164 pub(super) offsets: Box<[u32]>,
165 pub(super) value: Box<ArrayImpl>,
166}
167
168impl EstimateSize for ListArray {
169 fn estimated_heap_size(&self) -> usize {
170 self.bitmap.estimated_heap_size()
171 + self.offsets.len() * size_of::<u32>()
172 + self.value.estimated_size()
173 }
174}
175
176impl Array for ListArray {
177 type Builder = ListArrayBuilder;
178 type OwnedItem = ListValue;
179 type RefItem<'a> = ListRef<'a>;
180
181 unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
182 unsafe {
183 ListRef {
184 array: &self.value,
185 start: *self.offsets.get_unchecked(idx),
186 end: *self.offsets.get_unchecked(idx + 1),
187 }
188 }
189 }
190
191 fn len(&self) -> usize {
192 self.bitmap.len()
193 }
194
195 fn to_protobuf(&self) -> PbArray {
196 let value = self.value.to_protobuf();
197 PbArray {
198 array_type: PbArrayType::List as i32,
199 struct_array_data: None,
200 list_array_data: Some(Box::new(ListArrayData {
201 offsets: self.offsets.to_vec(),
202 value: Some(Box::new(value)),
203 value_type: Some(self.value.data_type().to_protobuf()),
204 elem_size: None,
205 })),
206 null_bitmap: Some(self.bitmap.to_protobuf()),
207 values: vec![],
208 }
209 }
210
211 fn null_bitmap(&self) -> &Bitmap {
212 &self.bitmap
213 }
214
215 fn into_null_bitmap(self) -> Bitmap {
216 self.bitmap
217 }
218
219 fn set_bitmap(&mut self, bitmap: Bitmap) {
220 self.bitmap = bitmap;
221 }
222
223 fn data_type(&self) -> DataType {
224 DataType::list(self.value.data_type())
225 }
226}
227
228impl ListArray {
229 pub fn flatten(&self) -> ArrayImpl {
238 match &*self.value {
239 ArrayImpl::List(inner) => inner.flatten(),
240 a => a.clone(),
241 }
242 }
243
244 pub fn values(&self) -> &ArrayImpl {
246 &self.value
247 }
248
249 pub fn from_protobuf(array: &PbArray) -> ArrayResult<ArrayImpl> {
250 ensure!(
251 array.values.is_empty(),
252 "Must have no buffer in a list array"
253 );
254 debug_assert!(
255 (array.array_type == PbArrayType::List as i32)
256 || (array.array_type == PbArrayType::Map as i32),
257 "invalid array type for list: {}",
258 array.array_type
259 );
260 let bitmap: Bitmap = array.get_null_bitmap()?.into();
261 let array_data = array.get_list_array_data()?.to_owned();
262 let flatten_len = match array_data.offsets.last() {
263 Some(&n) => n as usize,
264 None => bail!("Must have at least one element in offsets"),
265 };
266 let value = ArrayImpl::from_protobuf(array_data.value.as_ref().unwrap(), flatten_len)?;
267 let arr = ListArray {
268 bitmap,
269 offsets: array_data.offsets.into(),
270 value: Box::new(value),
271 };
272 Ok(arr.into())
273 }
274
275 pub async fn map_inner<E, Fut, F>(self, f: F) -> std::result::Result<ListArray, E>
278 where
279 F: FnOnce(ArrayImpl) -> Fut,
280 Fut: Future<Output = std::result::Result<ArrayImpl, E>>,
281 {
282 let new_value = (f)(*self.value).await?;
283
284 Ok(Self {
285 offsets: self.offsets,
286 bitmap: self.bitmap,
287 value: Box::new(new_value),
288 })
289 }
290
291 pub fn offsets(&self) -> &[u32] {
299 &self.offsets
300 }
301}
302
303impl<T, L> FromIterator<Option<L>> for ListArray
304where
305 T: PrimitiveArrayItemType,
306 L: IntoIterator<Item = T>,
307{
308 fn from_iter<I: IntoIterator<Item = Option<L>>>(iter: I) -> Self {
309 let iter = iter.into_iter();
310 let mut builder =
311 ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(T::DATA_TYPE));
312 for v in iter {
313 match v {
314 None => builder.append(None),
315 Some(v) => {
316 builder.append(Some(v.into_iter().collect::<ListValue>().as_scalar_ref()))
317 }
318 }
319 }
320 builder.finish()
321 }
322}
323
324impl FromIterator<ListValue> for ListArray {
325 fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
326 let mut iter = iter.into_iter();
327 let first = iter.next().expect("empty iterator");
328 let mut builder =
329 ListArrayBuilder::with_type(iter.size_hint().0, DataType::list(first.elem_type()));
330 builder.append(Some(first.as_scalar_ref()));
331 for v in iter {
332 builder.append(Some(v.as_scalar_ref()));
333 }
334 builder.finish()
335 }
336}
337
338#[derive(Clone, PartialEq, Eq, EstimateSize)]
339pub struct ListValue {
340 values: Box<ArrayImpl>,
341}
342
343impl Debug for ListValue {
344 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
345 self.as_scalar_ref().fmt(f)
346 }
347}
348
349impl Display for ListValue {
350 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351 self.as_scalar_ref().write(f)
352 }
353}
354
355impl ListValue {
356 pub fn new(values: ArrayImpl) -> Self {
357 Self {
358 values: Box::new(values),
359 }
360 }
361
362 pub fn into_array(self) -> ArrayImpl {
364 *self.values
365 }
366
367 pub fn empty(elem_type: &DataType) -> Self {
369 Self::new(elem_type.create_array_builder(0).finish())
370 }
371
372 pub fn from_datum_iter<T: ToDatumRef>(
374 elem_type: &DataType,
375 iter: impl IntoIterator<Item = T>,
376 ) -> Self {
377 let iter = iter.into_iter();
378 let mut builder = elem_type.create_array_builder(iter.size_hint().0);
379 for datum in iter {
380 builder.append(datum);
381 }
382 Self::new(builder.finish())
383 }
384
385 pub fn len(&self) -> usize {
387 self.values.len()
388 }
389
390 pub fn is_empty(&self) -> bool {
392 self.values.is_empty()
393 }
394
395 pub fn iter(&self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'_>> {
397 self.values.iter()
398 }
399
400 pub fn get(&self, index: usize) -> Option<DatumRef<'_>> {
402 if index < self.len() {
403 Some(self.values.value_at(index))
404 } else {
405 None
406 }
407 }
408
409 pub fn elem_type(&self) -> DataType {
411 self.values.data_type()
412 }
413
414 pub fn memcmp_deserialize(
415 list_type: &ListType,
416 deserializer: &mut memcomparable::Deserializer<impl Buf>,
417 ) -> memcomparable::Result<Self> {
418 let elem_type = list_type.elem();
419
420 let bytes = serde_bytes::ByteBuf::deserialize(deserializer)?;
421 let mut inner_deserializer = memcomparable::Deserializer::new(bytes.as_slice());
422 let mut builder = elem_type.create_array_builder(0);
423 while inner_deserializer.has_remaining() {
424 builder.append(memcmp_encoding::deserialize_datum_in_composite(
425 elem_type,
426 &mut inner_deserializer,
427 )?)
428 }
429 Ok(Self::new(builder.finish()))
430 }
431
432 pub fn display_for_explain(&self) -> String {
434 format!(
436 "ARRAY[{}]",
437 self.iter()
438 .map(|v| {
439 match v.as_ref() {
440 None => "null".into(),
441 Some(scalar) => scalar.to_text(),
442 }
443 })
444 .format(", ")
445 )
446 }
447
448 pub fn as_i64_mut_slice(&mut self) -> Option<&mut [i64]> {
450 match self.values.as_mut() {
451 ArrayImpl::Int64(array) => Some(array.as_mut_slice()),
452 _ => None,
453 }
454 }
455}
456
457impl PartialOrd for ListValue {
458 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
459 Some(self.cmp(other))
460 }
461}
462
463impl Ord for ListValue {
464 fn cmp(&self, other: &Self) -> Ordering {
465 self.as_scalar_ref().cmp(&other.as_scalar_ref())
466 }
467}
468
469impl<T: PrimitiveArrayItemType> FromIterator<Option<T>> for ListValue {
473 fn from_iter<I: IntoIterator<Item = Option<T>>>(iter: I) -> Self {
474 Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
475 }
476}
477impl<T: PrimitiveArrayItemType> FromIterator<T> for ListValue {
479 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
480 Self::new(iter.into_iter().collect::<PrimitiveArray<T>>().into())
481 }
482}
483impl FromIterator<bool> for ListValue {
485 fn from_iter<I: IntoIterator<Item = bool>>(iter: I) -> Self {
486 Self::new(iter.into_iter().collect::<BoolArray>().into())
487 }
488}
489impl<'a> FromIterator<Option<&'a str>> for ListValue {
491 fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
492 Self::new(iter.into_iter().collect::<Utf8Array>().into())
493 }
494}
495impl<'a> FromIterator<&'a str> for ListValue {
497 fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
498 Self::new(iter.into_iter().collect::<Utf8Array>().into())
499 }
500}
501impl FromIterator<ListValue> for ListValue {
503 fn from_iter<I: IntoIterator<Item = ListValue>>(iter: I) -> Self {
504 Self::new(iter.into_iter().collect::<ListArray>().into())
505 }
506}
507
508#[derive(Copy, Clone)]
510pub struct ListRef<'a> {
511 array: &'a ArrayImpl,
512 start: u32,
513 end: u32,
514}
515
516impl<'a> ListRef<'a> {
517 pub fn len(&self) -> usize {
519 (self.end - self.start) as usize
520 }
521
522 pub fn is_empty(&self) -> bool {
524 self.start == self.end
525 }
526
527 pub fn elem_type(&self) -> DataType {
529 self.array.data_type()
530 }
531
532 pub fn flatten(self) -> ListRef<'a> {
534 match self.array {
535 ArrayImpl::List(inner) => ListRef {
536 array: &inner.value,
537 start: inner.offsets[self.start as usize],
538 end: inner.offsets[self.end as usize],
539 }
540 .flatten(),
541 _ => self,
542 }
543 }
544
545 pub fn iter(self) -> impl DoubleEndedIterator + ExactSizeIterator<Item = DatumRef<'a>> + 'a {
547 (self.start..self.end).map(|i| self.array.value_at(i as usize))
548 }
549
550 pub fn get(self, index: usize) -> Option<DatumRef<'a>> {
552 if index < self.len() {
553 Some(self.array.value_at(self.start as usize + index))
554 } else {
555 None
556 }
557 }
558
559 pub fn memcmp_serialize(
560 self,
561 serializer: &mut memcomparable::Serializer<impl BufMut>,
562 ) -> memcomparable::Result<()> {
563 let mut inner_serializer = memcomparable::Serializer::new(vec![]);
564 for datum_ref in self.iter() {
565 memcmp_encoding::serialize_datum_in_composite(datum_ref, &mut inner_serializer)?
566 }
567 serializer.serialize_bytes(&inner_serializer.into_inner())
568 }
569
570 pub fn hash_scalar_inner<H: std::hash::Hasher>(self, state: &mut H) {
571 for datum_ref in self.iter() {
572 hash_datum(datum_ref, state);
573 }
574 }
575
576 pub fn estimate_serialize_size_inner(self) -> usize {
578 self.iter().map(estimate_serialize_datum_size).sum()
579 }
580
581 pub fn as_primitive_slice<T: PrimitiveArrayItemType>(self) -> Option<&'a [T]> {
582 T::try_into_array_ref(self.array)
583 .map(|prim_arr| &prim_arr.as_slice()[self.start as usize..self.end as usize])
584 }
585
586 pub fn as_i64_slice(&self) -> Option<&[i64]> {
588 self.as_primitive_slice()
589 }
590
591 pub(super) fn as_map_kv(self) -> (ListRef<'a>, ListRef<'a>) {
594 let (k, v) = self.array.as_struct().fields().collect_tuple().unwrap();
595 (
596 ListRef {
597 array: k,
598 start: self.start,
599 end: self.end,
600 },
601 ListRef {
602 array: v,
603 start: self.start,
604 end: self.end,
605 },
606 )
607 }
608}
609
610impl PartialEq for ListRef<'_> {
611 fn eq(&self, other: &Self) -> bool {
612 self.iter().eq(other.iter())
613 }
614}
615
616impl Eq for ListRef<'_> {}
617
618impl PartialOrd for ListRef<'_> {
619 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
620 Some(self.cmp(other))
621 }
622}
623
624impl Ord for ListRef<'_> {
625 fn cmp(&self, other: &Self) -> Ordering {
626 self.iter().cmp_by(other.iter(), |a, b| a.default_cmp(&b))
627 }
628}
629
630impl Debug for ListRef<'_> {
631 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
632 f.debug_list().entries(self.iter()).finish()
633 }
634}
635
636impl Row for ListRef<'_> {
637 fn datum_at(&self, index: usize) -> DatumRef<'_> {
638 self.array.value_at(self.start as usize + index)
639 }
640
641 unsafe fn datum_at_unchecked(&self, index: usize) -> DatumRef<'_> {
642 unsafe { self.array.value_at_unchecked(self.start as usize + index) }
643 }
644
645 fn len(&self) -> usize {
646 self.len()
647 }
648
649 fn iter(&self) -> impl Iterator<Item = DatumRef<'_>> {
650 (*self).iter()
651 }
652}
653
654impl ToText for ListRef<'_> {
655 fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
658 write!(
659 f,
660 "{{{}}}",
661 self.iter().format_with(",", |datum_ref, f| {
662 let s = datum_ref.to_text();
663 let need_quote = !matches!(datum_ref, None | Some(ScalarRefImpl::List(_)))
666 && (s.is_empty()
667 || s.eq_ignore_ascii_case("null")
668 || s.contains([
669 '"', '\\', ',',
670 ' ', '\t', '\n', '\r', '\x0B', '\x0C', '{', '}',
675 ]));
676 if need_quote {
677 f(&"\"")?;
678 s.chars().try_for_each(|c| {
679 if c == '"' || c == '\\' {
680 f(&"\\")?;
681 }
682 f(&c)
683 })?;
684 f(&"\"")
685 } else {
686 f(&s)
687 }
688 })
689 )
690 }
691
692 fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
693 match ty {
694 DataType::List { .. } => self.write(f),
695 _ => unreachable!(),
696 }
697 }
698}
699
700impl Scalar for ListValue {
702 type ScalarRefType<'a> = ListRef<'a>;
703
704 fn as_scalar_ref(&self) -> ListRef<'_> {
705 ListRef {
706 array: &self.values,
707 start: 0,
708 end: self.len() as u32,
709 }
710 }
711}
712
713impl<'a> ScalarRef<'a> for ListRef<'a> {
715 type ScalarType = ListValue;
716
717 fn to_owned_scalar(&self) -> ListValue {
718 let mut builder = self.array.create_builder(self.len());
719 for datum_ref in self.iter() {
720 builder.append(datum_ref);
721 }
722 ListValue::new(builder.finish())
723 }
724
725 fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
726 self.hash_scalar_inner(state)
727 }
728}
729
730impl ListValue {
731 pub fn from_str(input: &str, data_type: &DataType) -> Result<Self, String> {
733 struct Parser<'a> {
734 input: &'a str,
735 data_type: &'a DataType,
736 }
737
738 impl Parser<'_> {
739 fn parse(&mut self) -> Result<Datum, String> {
741 self.skip_whitespace();
742 if self.data_type.is_array() {
743 if self.try_parse_null() {
744 return Ok(None);
745 }
746 Ok(Some(self.parse_array()?.into()))
747 } else {
748 self.parse_value()
749 }
750 }
751
752 fn parse_array(&mut self) -> Result<ListValue, String> {
754 self.skip_whitespace();
755 if !self.try_consume('{') {
756 return Err("Array value must start with \"{\"".to_owned());
757 }
758 self.skip_whitespace();
759 if self.try_consume('}') {
760 return Ok(ListValue::empty(self.data_type.as_list_elem()));
761 }
762 let mut builder =
763 ArrayBuilderImpl::with_type(0, self.data_type.as_list_elem().clone());
764 loop {
765 let mut parser = Self {
766 input: self.input,
767 data_type: self.data_type.as_list_elem(),
768 };
769 builder.append(parser.parse()?);
770 self.input = parser.input;
771
772 self.skip_whitespace();
774 match self.peek() {
775 Some(',') => {
776 self.try_consume(',');
777 }
778 Some('}') => {
779 self.try_consume('}');
780 break;
781 }
782 None => return Err(Self::eoi()),
783 _ => return Err("Unexpected array element.".to_owned()),
784 }
785 }
786 Ok(ListValue::new(builder.finish()))
787 }
788
789 fn parse_value(&mut self) -> Result<Datum, String> {
791 if self.peek() == Some('"') {
792 return Ok(Some(self.parse_quoted()?));
793 }
794 let mut chars = self.input.char_indices();
796 let mut has_escape = false;
797 let s = loop {
798 match chars.next().ok_or_else(Self::eoi)? {
799 (_, '\\') => {
800 has_escape = true;
801 chars.next().ok_or_else(Self::eoi)?;
802 }
803 (i, c @ ',' | c @ '}') => {
804 let s = &self.input[..i];
805 self.input = &self.input[i..];
807
808 break if has_escape {
809 Cow::Owned(Self::unescape_trim_end(s))
810 } else {
811 let trimmed = s.trim_end();
812 if trimmed.is_empty() {
813 return Err(format!("Unexpected \"{c}\" character."));
814 }
815 if trimmed.eq_ignore_ascii_case("null") {
816 return Ok(None);
817 }
818 Cow::Borrowed(trimmed)
819 };
820 }
821 (_, '{') => return Err("Unexpected \"{\" character.".to_owned()),
822 (_, '"') => return Err("Unexpected array element.".to_owned()),
823 _ => {}
824 }
825 };
826 Ok(Some(
827 ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())?,
828 ))
829 }
830
831 fn parse_quoted(&mut self) -> Result<ScalarImpl, String> {
833 assert!(self.try_consume('"'));
834 let mut chars = self.input.char_indices();
836 let mut has_escape = false;
837 let s = loop {
838 match chars.next().ok_or_else(Self::eoi)? {
839 (_, '\\') => {
840 has_escape = true;
841 chars.next().ok_or_else(Self::eoi)?;
842 }
843 (i, '"') => {
844 let s = &self.input[..i];
845 self.input = &self.input[i + 1..];
846 break if has_escape {
847 Cow::Owned(Self::unescape(s))
848 } else {
849 Cow::Borrowed(s)
850 };
851 }
852 _ => {}
853 }
854 };
855 ScalarImpl::from_text(&s, self.data_type).map_err(|e| e.to_report_string())
856 }
857
858 fn unescape(s: &str) -> String {
860 let mut unescaped = String::with_capacity(s.len());
861 let mut chars = s.chars();
862 while let Some(mut c) = chars.next() {
863 if c == '\\' {
864 c = chars.next().unwrap();
865 }
866 unescaped.push(c);
867 }
868 unescaped
869 }
870
871 fn unescape_trim_end(s: &str) -> String {
875 let mut unescaped = String::with_capacity(s.len());
876 let mut chars = s.chars();
877 let mut len_after_last_escaped_char = 0;
878 while let Some(mut c) = chars.next() {
879 if c == '\\' {
880 c = chars.next().unwrap();
881 unescaped.push(c);
882 len_after_last_escaped_char = unescaped.len();
883 } else {
884 unescaped.push(c);
885 }
886 }
887 let l = unescaped[len_after_last_escaped_char..].trim_end().len();
888 unescaped.truncate(len_after_last_escaped_char + l);
889 unescaped
890 }
891
892 fn try_parse_null(&mut self) -> bool {
898 if let Some(s) = self.input.get(..4)
899 && s.eq_ignore_ascii_case("null")
900 {
901 let next_char = self.input[4..].chars().next();
902 match next_char {
903 None | Some(',' | '}') => {}
904 Some(c) if c.is_ascii_whitespace() => {}
905 _ => return false,
907 }
908 self.input = &self.input[4..];
909 true
910 } else {
911 false
912 }
913 }
914
915 fn try_consume(&mut self, c: char) -> bool {
917 if self.peek() == Some(c) {
918 self.input = &self.input[c.len_utf8()..];
919 true
920 } else {
921 false
922 }
923 }
924
925 fn expect_end(&mut self) -> Result<(), String> {
927 self.skip_whitespace();
928 match self.peek() {
929 Some(_) => Err("Junk after closing right brace.".to_owned()),
930 None => Ok(()),
931 }
932 }
933
934 fn skip_whitespace(&mut self) {
936 self.input = match self
937 .input
938 .char_indices()
939 .find(|(_, c)| !c.is_ascii_whitespace())
940 {
941 Some((i, _)) => &self.input[i..],
942 None => "",
943 };
944 }
945
946 fn peek(&self) -> Option<char> {
948 self.input.chars().next()
949 }
950
951 fn eoi() -> String {
953 "Unexpected end of input.".into()
954 }
955 }
956
957 let mut parser = Parser { input, data_type };
958 let array = parser.parse_array()?;
959 parser.expect_end()?;
960 Ok(array)
961 }
962}
963
964pub struct ListWriter<'a> {
965 builder: &'a mut ListArrayBuilder,
966}
967
968impl<'a> ListWriter<'a> {
969 pub fn new(builder: &'a mut ListArrayBuilder) -> Self {
970 Self { builder }
971 }
972
973 pub fn finish(self) {
976 self.builder.offsets.push(
977 self.builder
978 .value
979 .len()
980 .try_into()
981 .expect("offset overflow"),
982 );
983 self.builder.bitmap.append(true);
984 let _ = ManuallyDrop::new(self); }
986
987 pub fn rollback(self) {
990 }
992}
993
994impl Drop for ListWriter<'_> {
995 fn drop(&mut self) {
998 let last = *self.builder.offsets.last().unwrap() as usize;
999 let cur = self.builder.value.len();
1000 for _ in last..cur {
1001 self.builder.value.pop().unwrap();
1002 }
1003 }
1004}
1005
1006pub trait ListWrite {
1007 fn write(&mut self, value: impl ToDatumRef);
1008
1009 fn write_iter(&mut self, values: impl IntoIterator<Item = impl ToDatumRef>) {
1010 for v in values {
1011 self.write(v);
1012 }
1013 }
1014}
1015
1016impl<'a> ListWrite for ListWriter<'a> {
1017 fn write(&mut self, value: impl ToDatumRef) {
1018 self.builder.value.append(value);
1019 }
1020}
1021
1022impl ListWrite for ArrayBuilderImpl {
1023 fn write(&mut self, value: impl ToDatumRef) {
1024 self.append(value);
1025 }
1026}
1027
1028#[cfg(test)]
1029mod tests {
1030 use more_asserts::{assert_gt, assert_lt};
1031
1032 use super::*;
1033
1034 #[test]
1035 fn test_protobuf() {
1036 use crate::array::*;
1037 let array = ListArray::from_iter([
1038 Some(vec![12i32, -7, 25]),
1039 None,
1040 Some(vec![0, -127, 127, 50]),
1041 Some(vec![]),
1042 ]);
1043 let actual = ListArray::from_protobuf(&array.to_protobuf()).unwrap();
1044 assert_eq!(actual, ArrayImpl::List(array));
1045 }
1046
1047 #[test]
1048 fn test_append_array() {
1049 let part1 = ListArray::from_iter([Some([12i32, -7, 25]), None]);
1050 let part2 = ListArray::from_iter([Some(vec![0, -127, 127, 50]), Some(vec![])]);
1051
1052 let mut builder = ListArrayBuilder::with_type(4, DataType::Int32.list());
1053 builder.append_array(&part1);
1054 builder.append_array(&part2);
1055
1056 let expected = ListArray::from_iter([
1057 Some(vec![12i32, -7, 25]),
1058 None,
1059 Some(vec![0, -127, 127, 50]),
1060 Some(vec![]),
1061 ]);
1062 assert_eq!(builder.finish(), expected);
1063 }
1064
1065 #[test]
1067 fn test_list_create_builder() {
1068 use crate::array::*;
1069 let arr = ListArray::from_iter([Some([F32::from(2.0), F32::from(42.0), F32::from(1.0)])]);
1070 let arr2 = arr.create_builder(0).finish();
1071 assert_eq!(arr.data_type(), arr2.data_type());
1072 }
1073
1074 #[test]
1075 fn test_builder_pop() {
1076 use crate::array::*;
1077
1078 {
1079 let mut builder = ListArrayBuilder::with_type(1, DataType::Int32.list());
1080 let val = ListValue::from_iter([1i32, 2, 3]);
1081 builder.append(Some(val.as_scalar_ref()));
1082 assert!(builder.pop().is_some());
1083 assert!(builder.pop().is_none());
1084 let arr = builder.finish();
1085 assert!(arr.is_empty());
1086 }
1087
1088 {
1089 let data_type = DataType::Int32.list().list();
1090 let mut builder = ListArrayBuilder::with_type(2, data_type);
1091 let val1 = ListValue::from_iter([1, 2, 3]);
1092 let val2 = ListValue::from_iter([1, 2, 3]);
1093 let list1 = ListValue::from_iter([val1, val2]);
1094 builder.append(Some(list1.as_scalar_ref()));
1095
1096 let val3 = ListValue::from_iter([1, 2, 3]);
1097 let val4 = ListValue::from_iter([1, 2, 3]);
1098 let list2 = ListValue::from_iter([val3, val4]);
1099
1100 builder.append(Some(list2.as_scalar_ref()));
1101
1102 assert!(builder.pop().is_some());
1103
1104 let arr = builder.finish();
1105 assert_eq!(arr.len(), 1);
1106 assert_eq!(arr.value_at(0).unwrap(), list1.as_scalar_ref());
1107 }
1108 }
1109
1110 #[test]
1111 fn test_list_nested_layout() {
1112 use crate::array::*;
1113
1114 let listarray1 = ListArray::from_iter([Some([1i32, 2]), Some([3, 4])]);
1115 let listarray2 = ListArray::from_iter([Some(vec![5, 6, 7]), None, Some(vec![8])]);
1116 let listarray3 = ListArray::from_iter([Some([9, 10])]);
1117
1118 let nestarray = ListArray::from_iter(
1119 [listarray1, listarray2, listarray3]
1120 .into_iter()
1121 .map(|l| ListValue::new(l.into())),
1122 );
1123 let actual = ListArray::from_protobuf(&nestarray.to_protobuf()).unwrap();
1124 assert_eq!(ArrayImpl::List(nestarray), actual);
1125 }
1126
1127 #[test]
1128 fn test_list_value_cmp() {
1129 assert_lt!(
1131 ListValue::from_iter([1, 1]),
1132 ListValue::from_iter([1, 2, 1]),
1133 );
1134 assert_lt!(
1136 ListValue::from_iter([1, 2]),
1137 ListValue::from_iter([1, 2, 1]),
1138 );
1139 assert_gt!(
1141 ListValue::from_iter([1, 3]),
1142 ListValue::from_iter([1, 2, 1]),
1143 );
1144 assert_gt!(
1146 ListValue::from_iter([None::<i32>]),
1147 ListValue::from_iter([1]),
1148 );
1149 assert_gt!(
1151 ListValue::from_iter([Some(1), Some(2), None]),
1152 ListValue::from_iter([Some(1), Some(2), Some(1)]),
1153 );
1154 assert_gt!(
1157 ListValue::from_iter([Some(1), None]),
1158 ListValue::from_iter([Some(1), Some(2), Some(3)]),
1159 );
1160 assert_eq!(
1162 ListValue::from_iter([Some(1), None]),
1163 ListValue::from_iter([Some(1), None]),
1164 );
1165 }
1166
1167 #[test]
1168 fn test_list_ref_display() {
1169 let v = ListValue::from_iter([Some(1), None]);
1170 assert_eq!(v.to_string(), "{1,NULL}");
1171 }
1172
1173 #[test]
1174 fn test_serialize_deserialize() {
1175 let value = ListValue::from_iter([Some("abcd"), Some(""), None, Some("a")]);
1176 let list_ref = value.as_scalar_ref();
1177 let mut serializer = memcomparable::Serializer::new(vec![]);
1178 serializer.set_reverse(true);
1179 list_ref.memcmp_serialize(&mut serializer).unwrap();
1180 let buf = serializer.into_inner();
1181 let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1182 deserializer.set_reverse(true);
1183 assert_eq!(
1184 ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1185 .unwrap(),
1186 value
1187 );
1188
1189 let mut builder = ListArrayBuilder::with_type(0, DataType::Varchar.list());
1190 builder.append(Some(list_ref));
1191 let array = builder.finish();
1192 let list_ref = array.value_at(0).unwrap();
1193 let mut serializer = memcomparable::Serializer::new(vec![]);
1194 list_ref.memcmp_serialize(&mut serializer).unwrap();
1195 let buf = serializer.into_inner();
1196 let mut deserializer = memcomparable::Deserializer::new(&buf[..]);
1197 assert_eq!(
1198 ListValue::memcmp_deserialize(&ListType::new(DataType::Varchar), &mut deserializer)
1199 .unwrap(),
1200 value
1201 );
1202 }
1203
1204 #[test]
1205 fn test_memcomparable() {
1206 let cases = [
1207 (
1208 ListValue::from_iter([123, 456]),
1209 ListValue::from_iter([123, 789]),
1210 ),
1211 (
1212 ListValue::from_iter([123, 456]),
1213 ListValue::from_iter([123]),
1214 ),
1215 (
1216 ListValue::from_iter([None, Some("")]),
1217 ListValue::from_iter([None, None::<&str>]),
1218 ),
1219 (
1220 ListValue::from_iter([Some(2)]),
1221 ListValue::from_iter([Some(1), None, Some(3)]),
1222 ),
1223 ];
1224
1225 for (lhs, rhs) in cases {
1226 let lhs_serialized = {
1227 let mut serializer = memcomparable::Serializer::new(vec![]);
1228 lhs.as_scalar_ref()
1229 .memcmp_serialize(&mut serializer)
1230 .unwrap();
1231 serializer.into_inner()
1232 };
1233 let rhs_serialized = {
1234 let mut serializer = memcomparable::Serializer::new(vec![]);
1235 rhs.as_scalar_ref()
1236 .memcmp_serialize(&mut serializer)
1237 .unwrap();
1238 serializer.into_inner()
1239 };
1240 assert_eq!(lhs_serialized.cmp(&rhs_serialized), lhs.cmp(&rhs));
1241 }
1242 }
1243
1244 #[test]
1245 fn test_listref() {
1246 use crate::array::*;
1247 use crate::types;
1248
1249 let arr = ListArray::from_iter([Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6, 7])]);
1250
1251 let list_ref = arr.value_at(2).unwrap();
1253 assert_eq!(list_ref, ListValue::from_iter([4, 5, 6, 7]).as_scalar_ref());
1254
1255 let scalar = list_ref.get(1).unwrap();
1257 assert_eq!(scalar, Some(types::ScalarRefImpl::Int32(5)));
1258 }
1259
1260 #[test]
1261 fn test_from_to_literal() {
1262 #[track_caller]
1263 fn test(typestr: &str, input: &str, output: Option<&str>) {
1264 let datatype: DataType = typestr.parse().unwrap();
1265 let list = ListValue::from_str(input, &datatype).unwrap();
1266 let actual = list.as_scalar_ref().to_text();
1267 let output = output.unwrap_or(input);
1268 assert_eq!(actual, output);
1269 }
1270
1271 #[track_caller]
1272 fn test_err(typestr: &str, input: &str, err: &str) {
1273 let datatype: DataType = typestr.parse().unwrap();
1274 let actual_err = ListValue::from_str(input, &datatype).unwrap_err();
1275 assert_eq!(actual_err, err);
1276 }
1277
1278 test("varchar[]", "{}", None);
1279 test("varchar[]", "{1 2}", Some(r#"{"1 2"}"#));
1280 test("varchar[]", "{🥵,🤡}", None);
1281 test("varchar[]", r#"{aa\\bb}"#, Some(r#"{"aa\\bb"}"#));
1282 test("int[]", "{1,2,3}", None);
1283 test("varchar[]", r#"{"1,2"}"#, None);
1284 test("varchar[]", r#"{1, ""}"#, Some(r#"{1,""}"#));
1285 test("varchar[]", r#"{"\""}"#, None);
1286 test("varchar[]", r#"{\ }"#, Some(r#"{" "}"#));
1287 test("varchar[]", r#"{\\ }"#, Some(r#"{"\\"}"#));
1288 test("varchar[]", "{nulla}", None);
1289 test("varchar[]", "{null a}", Some(r#"{"null a"}"#));
1290 test(
1291 "varchar[]",
1292 r#"{"null", "NULL", null, NuLL}"#,
1293 Some(r#"{"null","NULL",NULL,NULL}"#),
1294 );
1295 test("varchar[][]", "{{1, 2, 3}, null }", Some("{{1,2,3},NULL}"));
1296 test(
1297 "varchar[][][]",
1298 "{{{1, 2, 3}}, {{4, 5, 6}}}",
1299 Some("{{{1,2,3}},{{4,5,6}}}"),
1300 );
1301 test_err("varchar[]", "()", r#"Array value must start with "{""#);
1302 test_err("varchar[]", "{1,", r#"Unexpected end of input."#);
1303 test_err("varchar[]", "{1,}", r#"Unexpected "}" character."#);
1304 test_err("varchar[]", "{1,,3}", r#"Unexpected "," character."#);
1305 test_err("varchar[]", r#"{"a""b"}"#, r#"Unexpected array element."#);
1306 test_err("varchar[]", r#"{}{"#, r#"Junk after closing right brace."#);
1307 }
1308}