risingwave_common/types/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Data types in RisingWave.
16
17// NOTE: When adding or modifying data types, remember to update the type matrix in
18// src/expr/macro/src/types.rs
19
20use std::fmt::Debug;
21use std::hash::Hash;
22use std::str::FromStr;
23
24use bytes::{Buf, BufMut, Bytes};
25use chrono::{Datelike, Timelike};
26use itertools::Itertools;
27use parse_display::{Display, FromStr};
28use paste::paste;
29use postgres_types::{FromSql, IsNull, ToSql, Type};
30use risingwave_common_estimate_size::{EstimateSize, ZeroHeapSize};
31use risingwave_pb::data::PbDataType;
32use risingwave_pb::data::data_type::PbTypeName;
33use rw_iter_util::ZipEqFast as _;
34use serde::{Deserialize, Serialize, Serializer};
35use strum_macros::EnumDiscriminants;
36use thiserror_ext::AsReport;
37
38use crate::array::{
39    ArrayBuilderImpl, ArrayError, ArrayResult, NULL_VAL_FOR_HASH, PrimitiveArrayItemType,
40};
41// Complex type's value is based on the array
42pub use crate::array::{ListRef, ListValue, MapRef, MapValue, StructRef, StructValue};
43use crate::cast::{str_to_bool, str_to_bytea};
44use crate::catalog::ColumnId;
45use crate::error::BoxedError;
46use crate::{
47    dispatch_data_types, dispatch_scalar_ref_variants, dispatch_scalar_variants, for_all_variants,
48};
49
50mod cow;
51mod datetime;
52mod decimal;
53mod fields;
54mod from_sql;
55mod interval;
56mod jsonb;
57mod macros;
58mod map_type;
59mod native_type;
60mod num256;
61mod ops;
62mod ordered;
63mod ordered_float;
64mod postgres_type;
65mod scalar_impl;
66mod sentinel;
67mod serial;
68mod struct_type;
69mod successor;
70mod timestamptz;
71mod to_binary;
72mod to_sql;
73mod to_text;
74mod with_data_type;
75
76pub use fields::Fields;
77pub use risingwave_fields_derive::Fields;
78
79pub use self::cow::DatumCow;
80pub use self::datetime::{Date, Time, Timestamp};
81pub use self::decimal::{Decimal, PowError as DecimalPowError};
82pub use self::interval::{DateTimeField, Interval, IntervalDisplay, test_utils};
83pub use self::jsonb::{JsonbRef, JsonbVal};
84pub use self::map_type::MapType;
85pub use self::native_type::*;
86pub use self::num256::{Int256, Int256Ref};
87pub use self::ops::{CheckedAdd, IsNegative};
88pub use self::ordered::*;
89pub use self::ordered_float::{FloatExt, IntoOrdered};
90pub use self::scalar_impl::*;
91pub use self::sentinel::Sentinelled;
92pub use self::serial::Serial;
93pub use self::struct_type::StructType;
94pub use self::successor::Successor;
95pub use self::timestamptz::*;
96pub use self::to_text::ToText;
97pub use self::with_data_type::WithDataType;
98
99/// A 32-bit floating point type with total order.
100pub type F32 = ordered_float::OrderedFloat<f32>;
101
102/// A 64-bit floating point type with total order.
103pub type F64 = ordered_float::OrderedFloat<f64>;
104
105/// The set of datatypes that are supported in RisingWave.
106///
107/// # Trait implementations
108///
109/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
110///   but without data fields.
111/// - `FromStr` is only used internally for tests.
112///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
113#[derive(
114    Debug, Display, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, EnumDiscriminants, FromStr,
115)]
116#[strum_discriminants(derive(Hash, Ord, PartialOrd))]
117#[strum_discriminants(name(DataTypeName))]
118#[strum_discriminants(vis(pub))]
119#[cfg_attr(test, strum_discriminants(derive(strum_macros::EnumIter)))]
120pub enum DataType {
121    #[display("boolean")]
122    #[from_str(regex = "(?i)^bool$|^boolean$")]
123    Boolean,
124    #[display("smallint")]
125    #[from_str(regex = "(?i)^smallint$|^int2$")]
126    Int16,
127    #[display("integer")]
128    #[from_str(regex = "(?i)^integer$|^int$|^int4$")]
129    Int32,
130    #[display("bigint")]
131    #[from_str(regex = "(?i)^bigint$|^int8$")]
132    Int64,
133    #[display("real")]
134    #[from_str(regex = "(?i)^real$|^float4$")]
135    Float32,
136    #[display("double precision")]
137    #[from_str(regex = "(?i)^double precision$|^float8$")]
138    Float64,
139    #[display("numeric")]
140    #[from_str(regex = "(?i)^numeric$|^decimal$")]
141    Decimal,
142    #[display("date")]
143    #[from_str(regex = "(?i)^date$")]
144    Date,
145    #[display("character varying")]
146    #[from_str(regex = "(?i)^character varying$|^varchar$")]
147    Varchar,
148    #[display("time without time zone")]
149    #[from_str(regex = "(?i)^time$|^time without time zone$")]
150    Time,
151    #[display("timestamp without time zone")]
152    #[from_str(regex = "(?i)^timestamp$|^timestamp without time zone$")]
153    Timestamp,
154    #[display("timestamp with time zone")]
155    #[from_str(regex = "(?i)^timestamptz$|^timestamp with time zone$")]
156    Timestamptz,
157    #[display("interval")]
158    #[from_str(regex = "(?i)^interval$")]
159    Interval,
160    #[display("{0}")]
161    #[from_str(regex = "(?i)^(?P<0>.+)$")]
162    Struct(StructType),
163    #[display("{0}[]")]
164    #[from_str(regex = r"(?i)^(?P<0>.+)\[\]$")]
165    List(Box<DataType>),
166    #[display("bytea")]
167    #[from_str(regex = "(?i)^bytea$")]
168    Bytea,
169    #[display("jsonb")]
170    #[from_str(regex = "(?i)^jsonb$")]
171    Jsonb,
172    #[display("serial")]
173    #[from_str(regex = "(?i)^serial$")]
174    Serial,
175    #[display("rw_int256")]
176    #[from_str(regex = "(?i)^rw_int256$")]
177    Int256,
178    #[display("{0}")]
179    #[from_str(regex = "(?i)^(?P<0>.+)$")]
180    Map(MapType),
181}
182
183// For DataType::List
184impl std::str::FromStr for Box<DataType> {
185    type Err = BoxedError;
186
187    fn from_str(s: &str) -> Result<Self, Self::Err> {
188        Ok(Box::new(DataType::from_str(s)?))
189    }
190}
191
192impl ZeroHeapSize for DataType {}
193
194impl TryFrom<DataTypeName> for DataType {
195    type Error = &'static str;
196
197    fn try_from(type_name: DataTypeName) -> Result<Self, Self::Error> {
198        match type_name {
199            DataTypeName::Boolean => Ok(DataType::Boolean),
200            DataTypeName::Int16 => Ok(DataType::Int16),
201            DataTypeName::Int32 => Ok(DataType::Int32),
202            DataTypeName::Int64 => Ok(DataType::Int64),
203            DataTypeName::Int256 => Ok(DataType::Int256),
204            DataTypeName::Serial => Ok(DataType::Serial),
205            DataTypeName::Decimal => Ok(DataType::Decimal),
206            DataTypeName::Float32 => Ok(DataType::Float32),
207            DataTypeName::Float64 => Ok(DataType::Float64),
208            DataTypeName::Varchar => Ok(DataType::Varchar),
209            DataTypeName::Bytea => Ok(DataType::Bytea),
210            DataTypeName::Date => Ok(DataType::Date),
211            DataTypeName::Timestamp => Ok(DataType::Timestamp),
212            DataTypeName::Timestamptz => Ok(DataType::Timestamptz),
213            DataTypeName::Time => Ok(DataType::Time),
214            DataTypeName::Interval => Ok(DataType::Interval),
215            DataTypeName::Jsonb => Ok(DataType::Jsonb),
216            DataTypeName::Struct | DataTypeName::List | DataTypeName::Map => Err(
217                "Functions returning composite types can not be inferred. Please use `FunctionCall::new_unchecked`.",
218            ),
219        }
220    }
221}
222
223impl From<&PbDataType> for DataType {
224    fn from(proto: &PbDataType) -> DataType {
225        match proto.get_type_name().expect("missing type field") {
226            PbTypeName::TypeUnspecified => unreachable!(),
227            PbTypeName::Int16 => DataType::Int16,
228            PbTypeName::Int32 => DataType::Int32,
229            PbTypeName::Int64 => DataType::Int64,
230            PbTypeName::Serial => DataType::Serial,
231            PbTypeName::Float => DataType::Float32,
232            PbTypeName::Double => DataType::Float64,
233            PbTypeName::Boolean => DataType::Boolean,
234            PbTypeName::Varchar => DataType::Varchar,
235            PbTypeName::Date => DataType::Date,
236            PbTypeName::Time => DataType::Time,
237            PbTypeName::Timestamp => DataType::Timestamp,
238            PbTypeName::Timestamptz => DataType::Timestamptz,
239            PbTypeName::Decimal => DataType::Decimal,
240            PbTypeName::Interval => DataType::Interval,
241            PbTypeName::Bytea => DataType::Bytea,
242            PbTypeName::Jsonb => DataType::Jsonb,
243            PbTypeName::Struct => {
244                let fields: Vec<DataType> = proto.field_type.iter().map(|f| f.into()).collect_vec();
245                let field_names: Vec<String> = proto.field_names.iter().cloned().collect_vec();
246                let field_ids = (proto.field_ids.iter().copied())
247                    .map(ColumnId::new)
248                    .collect_vec();
249
250                let mut struct_type = if proto.field_names.is_empty() {
251                    StructType::unnamed(fields)
252                } else {
253                    StructType::new(field_names.into_iter().zip_eq_fast(fields))
254                };
255                if !field_ids.is_empty() {
256                    struct_type = struct_type.with_ids(field_ids);
257                }
258                struct_type.into()
259            }
260            PbTypeName::List => DataType::List(
261                // The first (and only) item is the list element type.
262                Box::new((&proto.field_type[0]).into()),
263            ),
264            PbTypeName::Map => {
265                // Map is physically the same as a list.
266                // So the first (and only) item is the list element type.
267                let list_entries_type: DataType = (&proto.field_type[0]).into();
268                DataType::Map(MapType::from_entries(list_entries_type))
269            }
270            PbTypeName::Int256 => DataType::Int256,
271        }
272    }
273}
274
275impl From<DataTypeName> for PbTypeName {
276    fn from(type_name: DataTypeName) -> Self {
277        match type_name {
278            DataTypeName::Boolean => PbTypeName::Boolean,
279            DataTypeName::Int16 => PbTypeName::Int16,
280            DataTypeName::Int32 => PbTypeName::Int32,
281            DataTypeName::Int64 => PbTypeName::Int64,
282            DataTypeName::Serial => PbTypeName::Serial,
283            DataTypeName::Float32 => PbTypeName::Float,
284            DataTypeName::Float64 => PbTypeName::Double,
285            DataTypeName::Varchar => PbTypeName::Varchar,
286            DataTypeName::Date => PbTypeName::Date,
287            DataTypeName::Timestamp => PbTypeName::Timestamp,
288            DataTypeName::Timestamptz => PbTypeName::Timestamptz,
289            DataTypeName::Time => PbTypeName::Time,
290            DataTypeName::Interval => PbTypeName::Interval,
291            DataTypeName::Decimal => PbTypeName::Decimal,
292            DataTypeName::Bytea => PbTypeName::Bytea,
293            DataTypeName::Jsonb => PbTypeName::Jsonb,
294            DataTypeName::Struct => PbTypeName::Struct,
295            DataTypeName::List => PbTypeName::List,
296            DataTypeName::Int256 => PbTypeName::Int256,
297            DataTypeName::Map => PbTypeName::Map,
298        }
299    }
300}
301
302/// Convenient macros to generate match arms for [`DataType`].
303pub mod data_types {
304    use super::DataType;
305
306    /// Numeric [`DataType`]s supported to be `offset` of `RANGE` frame.
307    #[macro_export]
308    macro_rules! _range_frame_numeric_data_types {
309        () => {
310            DataType::Int16
311                | DataType::Int32
312                | DataType::Int64
313                | DataType::Float32
314                | DataType::Float64
315                | DataType::Decimal
316        };
317    }
318    pub use _range_frame_numeric_data_types as range_frame_numeric;
319
320    /// Date/time [`DataType`]s supported to be `offset` of `RANGE` frame.
321    #[macro_export]
322    macro_rules! _range_frame_datetime_data_types {
323        () => {
324            DataType::Date
325                | DataType::Time
326                | DataType::Timestamp
327                | DataType::Timestamptz
328                | DataType::Interval
329        };
330    }
331    pub use _range_frame_datetime_data_types as range_frame_datetime;
332
333    /// Data types that do not have inner fields.
334    #[macro_export]
335    macro_rules! _simple_data_types {
336        () => {
337            DataType::Boolean
338                | DataType::Int16
339                | DataType::Int32
340                | DataType::Int64
341                | DataType::Float32
342                | DataType::Float64
343                | DataType::Decimal
344                | DataType::Date
345                | DataType::Varchar
346                | DataType::Time
347                | DataType::Timestamp
348                | DataType::Timestamptz
349                | DataType::Interval
350                | DataType::Bytea
351                | DataType::Jsonb
352                | DataType::Serial
353                | DataType::Int256
354        };
355    }
356    pub use _simple_data_types as simple;
357
358    /// Data types that have inner fields.
359    #[macro_export]
360    macro_rules! _composite_data_types {
361        () => {
362            DataType::Struct { .. } | DataType::List { .. } | DataType::Map { .. }
363        };
364    }
365    pub use _composite_data_types as composite;
366
367    /// Test that all data types are covered either by `simple!()` or `composite!()`.
368    fn _simple_composite_data_types_exhausted(dt: DataType) {
369        match dt {
370            simple!() => {}
371            composite!() => {}
372        }
373    }
374}
375
376impl DataType {
377    pub fn create_array_builder(&self, capacity: usize) -> ArrayBuilderImpl {
378        use crate::array::*;
379
380        dispatch_data_types!(self, [B = ArrayBuilder], {
381            B::with_type(capacity, self.clone()).into()
382        })
383    }
384
385    pub fn type_name(&self) -> DataTypeName {
386        DataTypeName::from(self)
387    }
388
389    pub fn prost_type_name(&self) -> PbTypeName {
390        self.type_name().into()
391    }
392
393    pub fn to_protobuf(&self) -> PbDataType {
394        let mut pb = PbDataType {
395            type_name: self.prost_type_name() as i32,
396            is_nullable: true,
397            ..Default::default()
398        };
399        match self {
400            DataType::Struct(t) => {
401                if !t.is_unnamed() {
402                    // To be consistent with `From<&PbDataType>`,
403                    // we only set field names when it's a named struct.
404                    pb.field_names = t.names().map(|s| s.into()).collect();
405                }
406                pb.field_type = t.types().map(|f| f.to_protobuf()).collect();
407                if let Some(ids) = t.ids() {
408                    pb.field_ids = ids.map(|id| id.get_id()).collect();
409                }
410            }
411            DataType::List(datatype) => {
412                pb.field_type = vec![datatype.to_protobuf()];
413            }
414            DataType::Map(datatype) => {
415                // Same as List<Struct<K,V>>
416                pb.field_type = vec![datatype.clone().into_struct().to_protobuf()];
417            }
418            DataType::Boolean
419            | DataType::Int16
420            | DataType::Int32
421            | DataType::Int64
422            | DataType::Float32
423            | DataType::Float64
424            | DataType::Decimal
425            | DataType::Date
426            | DataType::Varchar
427            | DataType::Time
428            | DataType::Timestamp
429            | DataType::Timestamptz
430            | DataType::Interval
431            | DataType::Bytea
432            | DataType::Jsonb
433            | DataType::Serial
434            | DataType::Int256 => (),
435        }
436        pb
437    }
438
439    pub fn is_numeric(&self) -> bool {
440        matches!(
441            self,
442            DataType::Int16
443                | DataType::Int32
444                | DataType::Int64
445                | DataType::Serial
446                | DataType::Float32
447                | DataType::Float64
448                | DataType::Decimal
449        )
450    }
451
452    /// Returns whether the data type does not have inner fields.
453    pub fn is_simple(&self) -> bool {
454        matches!(self, data_types::simple!())
455    }
456
457    /// Returns whether the data type has inner fields.
458    pub fn is_composite(&self) -> bool {
459        matches!(self, data_types::composite!())
460    }
461
462    pub fn is_array(&self) -> bool {
463        matches!(self, DataType::List(_))
464    }
465
466    pub fn is_struct(&self) -> bool {
467        matches!(self, DataType::Struct(_))
468    }
469
470    pub fn is_map(&self) -> bool {
471        matches!(self, DataType::Map(_))
472    }
473
474    pub fn is_int(&self) -> bool {
475        matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
476    }
477
478    /// Returns the output type of time window function on a given input type.
479    pub fn window_of(input: &DataType) -> Option<DataType> {
480        match input {
481            DataType::Timestamptz => Some(DataType::Timestamptz),
482            DataType::Timestamp | DataType::Date => Some(DataType::Timestamp),
483            _ => None,
484        }
485    }
486
487    pub fn as_struct(&self) -> &StructType {
488        match self {
489            DataType::Struct(t) => t,
490            t => panic!("expect struct type, got {t}"),
491        }
492    }
493
494    pub fn as_map(&self) -> &MapType {
495        match self {
496            DataType::Map(t) => t,
497            t => panic!("expect map type, got {t}"),
498        }
499    }
500
501    pub fn into_map(self) -> MapType {
502        match self {
503            DataType::Map(t) => t,
504            t => panic!("expect map type, got {t}"),
505        }
506    }
507
508    /// Returns the inner element's type of a list type.
509    ///
510    /// # Panics
511    ///
512    /// Panics if the type is not a list type.
513    pub fn as_list_element_type(&self) -> &DataType {
514        match self {
515            DataType::List(t) => t,
516            t => panic!("expect list type, got {t}"),
517        }
518    }
519
520    pub fn into_list_element_type(self) -> DataType {
521        match self {
522            DataType::List(t) => *t,
523            t => panic!("expect list type, got {t}"),
524        }
525    }
526
527    /// Return a new type that removes the outer list, and get the innermost element type.
528    ///
529    /// Use [`DataType::as_list_element_type`] if you only want the element type of a list.
530    ///
531    /// ```
532    /// use risingwave_common::types::DataType::*;
533    /// assert_eq!(List(Box::new(Int32)).unnest_list(), &Int32);
534    /// assert_eq!(List(Box::new(List(Box::new(Int32)))).unnest_list(), &Int32);
535    /// ```
536    pub fn unnest_list(&self) -> &Self {
537        match self {
538            DataType::List(inner) => inner.unnest_list(),
539            _ => self,
540        }
541    }
542
543    /// Return the number of dimensions of this array/list type. Return `0` when this type is not an
544    /// array/list.
545    pub fn array_ndims(&self) -> usize {
546        let mut d = 0;
547        let mut t = self;
548        while let Self::List(inner) = t {
549            d += 1;
550            t = inner;
551        }
552        d
553    }
554
555    /// Compares the datatype with another, ignoring nested field names and ids.
556    pub fn equals_datatype(&self, other: &DataType) -> bool {
557        match (self, other) {
558            (Self::Struct(s1), Self::Struct(s2)) => s1.equals_datatype(s2),
559            (Self::List(d1), Self::List(d2)) => d1.equals_datatype(d2),
560            (Self::Map(m1), Self::Map(m2)) => {
561                m1.key().equals_datatype(m2.key()) && m1.value().equals_datatype(m2.value())
562            }
563            _ => self == other,
564        }
565    }
566
567    /// Whether a column with this data type can be altered to a new data type. This determines
568    /// the encoding of the column data.
569    ///
570    /// Returns...
571    /// - `None`, if the data type is simple or does not contain a struct type.
572    /// - `Some(true)`, if the data type contains a struct type with field ids ([`StructType::has_ids`]).
573    /// - `Some(false)`, if the data type contains a struct type without field ids.
574    pub fn can_alter(&self) -> Option<bool> {
575        match self {
576            data_types::simple!() => None,
577
578            DataType::Struct(struct_type) => {
579                // As long as we meet a struct type, we can check its `ids` field to determine if
580                // it can be altered.
581                let struct_can_alter = struct_type.has_ids();
582                // In debug build, we assert that once a struct type does (or does not) have ids,
583                // all its composite fields should have the same property.
584                if cfg!(debug_assertions) {
585                    for field in struct_type.types() {
586                        if let Some(field_can_alter) = field.can_alter() {
587                            assert_eq!(struct_can_alter, field_can_alter);
588                        }
589                    }
590                }
591                Some(struct_can_alter)
592            }
593
594            DataType::List(inner_type) => inner_type.can_alter(),
595            DataType::Map(map_type) => {
596                debug_assert!(
597                    map_type.key().is_simple(),
598                    "unexpected key type of map {map_type:?}"
599                );
600                map_type.value().can_alter()
601            }
602        }
603    }
604}
605
606impl From<StructType> for DataType {
607    fn from(value: StructType) -> Self {
608        Self::Struct(value)
609    }
610}
611
612impl From<DataType> for PbDataType {
613    fn from(data_type: DataType) -> Self {
614        data_type.to_protobuf()
615    }
616}
617
618mod private {
619    use super::*;
620
621    // Note: put pub trait inside a private mod just makes the name private,
622    // The trait methods will still be publicly available...
623    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
624
625    /// Common trait bounds of scalar and scalar reference types.
626    ///
627    /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
628    pub trait ScalarBounds<Impl> = Debug
629        + Send
630        + Sync
631        + Clone
632        + PartialEq
633        + Eq
634        // in default ascending order
635        + PartialOrd
636        + Ord
637        + TryFrom<Impl, Error = ArrayError>
638        // `ScalarImpl`/`ScalarRefImpl`
639        + Into<Impl>;
640}
641
642/// `Scalar` is a trait over all possible owned types in the evaluation
643/// framework.
644///
645/// `Scalar` is reciprocal to `ScalarRef`. Use `as_scalar_ref` to get a
646/// reference which has the same lifetime as `self`.
647pub trait Scalar: private::ScalarBounds<ScalarImpl> + 'static {
648    /// Type for reference of `Scalar`
649    type ScalarRefType<'a>: ScalarRef<'a, ScalarType = Self> + 'a
650    where
651        Self: 'a;
652
653    /// Get a reference to current scalar.
654    fn as_scalar_ref(&self) -> Self::ScalarRefType<'_>;
655
656    fn to_scalar_value(self) -> ScalarImpl {
657        self.into()
658    }
659}
660
661/// `ScalarRef` is a trait over all possible references in the evaluation
662/// framework.
663///
664/// `ScalarRef` is reciprocal to `Scalar`. Use `to_owned_scalar` to get an
665/// owned scalar.
666pub trait ScalarRef<'a>: private::ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
667    /// `ScalarType` is the owned type of current `ScalarRef`.
668    type ScalarType: Scalar<ScalarRefType<'a> = Self>;
669
670    /// Convert `ScalarRef` to an owned scalar.
671    fn to_owned_scalar(&self) -> Self::ScalarType;
672
673    /// A wrapped hash function to get the hash value for this scaler.
674    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H);
675}
676
677/// Define `ScalarImpl` and `ScalarRefImpl` with macro.
678macro_rules! scalar_impl_enum {
679    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
680        /// `ScalarImpl` embeds all possible scalars in the evaluation framework.
681        ///
682        /// Note: `ScalarImpl` doesn't contain all information of its `DataType`,
683        /// so sometimes they need to be used together.
684        /// e.g., for `Struct`, we don't have the field names in the value.
685        ///
686        /// See `for_all_variants` for the definition.
687        #[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
688        pub enum ScalarImpl {
689            $( $variant_name($scalar) ),*
690        }
691
692        /// `ScalarRefImpl` embeds all possible scalar references in the evaluation
693        /// framework.
694        ///
695        /// Note: `ScalarRefImpl` doesn't contain all information of its `DataType`,
696        /// so sometimes they need to be used together.
697        /// e.g., for `Struct`, we don't have the field names in the value.
698        ///
699        /// See `for_all_variants` for the definition.
700        #[derive(Debug, Copy, Clone, PartialEq, Eq)]
701        pub enum ScalarRefImpl<'scalar> {
702            $( $variant_name($scalar_ref) ),*
703        }
704    };
705}
706
707for_all_variants! { scalar_impl_enum }
708
709// We MUST NOT implement `Ord` for `ScalarImpl` because that will make `Datum` derive an incorrect
710// default `Ord`. To get a default-ordered `ScalarImpl`/`ScalarRefImpl`/`Datum`/`DatumRef`, you can
711// use `DefaultOrdered<T>`. If non-default order is needed, please refer to `sort_util`.
712impl !PartialOrd for ScalarImpl {}
713impl !PartialOrd for ScalarRefImpl<'_> {}
714
715pub type Datum = Option<ScalarImpl>;
716pub type DatumRef<'a> = Option<ScalarRefImpl<'a>>;
717
718/// This trait is to implement `to_owned_datum` for `Option<ScalarImpl>`
719pub trait ToOwnedDatum {
720    /// Convert the datum to an owned [`Datum`].
721    fn to_owned_datum(self) -> Datum;
722}
723
724impl ToOwnedDatum for &Datum {
725    #[inline(always)]
726    fn to_owned_datum(self) -> Datum {
727        self.clone()
728    }
729}
730
731impl<T: Into<ScalarImpl>> ToOwnedDatum for T {
732    #[inline(always)]
733    fn to_owned_datum(self) -> Datum {
734        Some(self.into())
735    }
736}
737
738impl<T: Into<ScalarImpl>> ToOwnedDatum for Option<T> {
739    #[inline(always)]
740    fn to_owned_datum(self) -> Datum {
741        self.map(Into::into)
742    }
743}
744
745#[auto_impl::auto_impl(&)]
746pub trait ToDatumRef: PartialEq + Eq + Debug {
747    /// Convert the datum to [`DatumRef`].
748    fn to_datum_ref(&self) -> DatumRef<'_>;
749}
750
751impl ToDatumRef for Datum {
752    #[inline(always)]
753    fn to_datum_ref(&self) -> DatumRef<'_> {
754        self.as_ref().map(|d| d.as_scalar_ref_impl())
755    }
756}
757impl ToDatumRef for Option<&ScalarImpl> {
758    #[inline(always)]
759    fn to_datum_ref(&self) -> DatumRef<'_> {
760        self.map(|d| d.as_scalar_ref_impl())
761    }
762}
763impl ToDatumRef for DatumRef<'_> {
764    #[inline(always)]
765    fn to_datum_ref(&self) -> DatumRef<'_> {
766        *self
767    }
768}
769
770/// To make sure there is `as_scalar_ref` for all scalar ref types.
771/// See <https://github.com/risingwavelabs/risingwave/pull/9977/files#r1208972881>
772///
773/// This is used by the expr macro.
774pub trait SelfAsScalarRef {
775    fn as_scalar_ref(&self) -> Self;
776}
777macro_rules! impl_self_as_scalar_ref {
778    ($($t:ty),*) => {
779        $(
780            impl SelfAsScalarRef for $t {
781                fn as_scalar_ref(&self) -> Self {
782                    *self
783                }
784            }
785        )*
786    };
787}
788impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
789
790/// `for_all_native_types` includes all native variants of our scalar types.
791///
792/// Specifically, it doesn't support u8/u16/u32/u64.
793#[macro_export]
794macro_rules! for_all_native_types {
795    ($macro:ident) => {
796        $macro! {
797            { i16, Int16, read_i16 },
798            { i32, Int32, read_i32 },
799            { i64, Int64, read_i64 },
800            { Serial, Serial, read_i64 },
801            { $crate::types::F32, Float32, read_f32 },
802            { $crate::types::F64, Float64, read_f64 }
803        }
804    };
805}
806
807/// `impl_convert` implements several conversions for `Scalar`.
808/// * `Scalar <-> ScalarImpl` with `From` and `TryFrom` trait.
809/// * `ScalarRef <-> ScalarRefImpl` with `From` and `TryFrom` trait.
810/// * `&ScalarImpl -> &Scalar` with `impl.as_int16()`.
811/// * `ScalarImpl -> Scalar` with `impl.into_int16()`.
812macro_rules! impl_convert {
813    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
814        $(
815            impl From<$scalar> for ScalarImpl {
816                fn from(val: $scalar) -> Self {
817                    ScalarImpl::$variant_name(val)
818                }
819            }
820
821            impl TryFrom<ScalarImpl> for $scalar {
822                type Error = ArrayError;
823
824                fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
825                    match val {
826                        ScalarImpl::$variant_name(scalar) => Ok(scalar),
827                        other_scalar => bail!("cannot convert ScalarImpl::{} to concrete type", other_scalar.get_ident()),
828                    }
829                }
830            }
831
832            impl <'scalar> From<$scalar_ref> for ScalarRefImpl<'scalar> {
833                fn from(val: $scalar_ref) -> Self {
834                    ScalarRefImpl::$variant_name(val)
835                }
836            }
837
838            impl <'scalar> TryFrom<ScalarRefImpl<'scalar>> for $scalar_ref {
839                type Error = ArrayError;
840
841                fn try_from(val: ScalarRefImpl<'scalar>) -> ArrayResult<Self> {
842                    match val {
843                        ScalarRefImpl::$variant_name(scalar_ref) => Ok(scalar_ref),
844                        other_scalar => bail!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name)),
845                    }
846                }
847            }
848
849            paste! {
850                impl ScalarImpl {
851                    /// # Panics
852                    /// If the scalar is not of the expected type.
853                    pub fn [<as_ $suffix_name>](&self) -> &$scalar {
854                        match self {
855                            Self::$variant_name(scalar) => scalar,
856                            other_scalar => panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
857                        }
858                    }
859
860                    /// # Panics
861                    /// If the scalar is not of the expected type.
862                    pub fn [<into_ $suffix_name>](self) -> $scalar {
863                        match self {
864                            Self::$variant_name(scalar) => scalar,
865                            other_scalar =>  panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
866                        }
867                    }
868                }
869
870                impl <'scalar> ScalarRefImpl<'scalar> {
871                    /// # Panics
872                    /// If the scalar is not of the expected type.
873                    pub fn [<into_ $suffix_name>](self) -> $scalar_ref {
874                        match self {
875                            Self::$variant_name(inner) => inner,
876                            other_scalar => panic!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
877                        }
878                    }
879                }
880            }
881        )*
882    };
883}
884
885for_all_variants! { impl_convert }
886
887// Implement `From<raw float>` for `ScalarImpl::Float` as a sugar.
888impl From<f32> for ScalarImpl {
889    fn from(f: f32) -> Self {
890        Self::Float32(f.into())
891    }
892}
893impl From<f64> for ScalarImpl {
894    fn from(f: f64) -> Self {
895        Self::Float64(f.into())
896    }
897}
898
899// Implement `From<string like>` for `ScalarImpl::Utf8` as a sugar.
900impl From<String> for ScalarImpl {
901    fn from(s: String) -> Self {
902        Self::Utf8(s.into_boxed_str())
903    }
904}
905impl From<&str> for ScalarImpl {
906    fn from(s: &str) -> Self {
907        Self::Utf8(s.into())
908    }
909}
910impl From<&String> for ScalarImpl {
911    fn from(s: &String) -> Self {
912        Self::Utf8(s.as_str().into())
913    }
914}
915impl TryFrom<ScalarImpl> for String {
916    type Error = ArrayError;
917
918    fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
919        match val {
920            ScalarImpl::Utf8(s) => Ok(s.into()),
921            other_scalar => bail!(
922                "cannot convert ScalarImpl::{} to concrete type",
923                other_scalar.get_ident()
924            ),
925        }
926    }
927}
928
929impl From<char> for ScalarImpl {
930    fn from(c: char) -> Self {
931        Self::Utf8(c.to_string().into())
932    }
933}
934
935impl From<&[u8]> for ScalarImpl {
936    fn from(s: &[u8]) -> Self {
937        Self::Bytea(s.into())
938    }
939}
940
941impl From<JsonbRef<'_>> for ScalarImpl {
942    fn from(jsonb: JsonbRef<'_>) -> Self {
943        Self::Jsonb(jsonb.to_owned_scalar())
944    }
945}
946
947impl<T: PrimitiveArrayItemType> From<Vec<T>> for ScalarImpl {
948    fn from(v: Vec<T>) -> Self {
949        Self::List(v.into_iter().collect())
950    }
951}
952
953impl<T: PrimitiveArrayItemType> From<Vec<Option<T>>> for ScalarImpl {
954    fn from(v: Vec<Option<T>>) -> Self {
955        Self::List(v.into_iter().collect())
956    }
957}
958
959impl From<Vec<String>> for ScalarImpl {
960    fn from(v: Vec<String>) -> Self {
961        Self::List(v.iter().map(|s| s.as_str()).collect())
962    }
963}
964
965impl From<Vec<u8>> for ScalarImpl {
966    fn from(v: Vec<u8>) -> Self {
967        Self::Bytea(v.into())
968    }
969}
970
971impl From<Bytes> for ScalarImpl {
972    fn from(v: Bytes) -> Self {
973        Self::Bytea(v.as_ref().into())
974    }
975}
976
977impl From<ListRef<'_>> for ScalarImpl {
978    fn from(list: ListRef<'_>) -> Self {
979        Self::List(list.to_owned_scalar())
980    }
981}
982
983impl ScalarImpl {
984    /// Creates a scalar from pgwire "BINARY" format.
985    ///
986    /// The counterpart of [`to_binary::ToBinary`].
987    pub fn from_binary(bytes: &Bytes, data_type: &DataType) -> Result<Self, BoxedError> {
988        let res = match data_type {
989            DataType::Varchar => Self::Utf8(String::from_sql(&Type::VARCHAR, bytes)?.into()),
990            DataType::Bytea => Self::Bytea(Vec::<u8>::from_sql(&Type::BYTEA, bytes)?.into()),
991            DataType::Boolean => Self::Bool(bool::from_sql(&Type::BOOL, bytes)?),
992            DataType::Int16 => Self::Int16(i16::from_sql(&Type::INT2, bytes)?),
993            DataType::Int32 => Self::Int32(i32::from_sql(&Type::INT4, bytes)?),
994            DataType::Int64 => Self::Int64(i64::from_sql(&Type::INT8, bytes)?),
995            DataType::Serial => Self::Serial(Serial::from(i64::from_sql(&Type::INT8, bytes)?)),
996            DataType::Float32 => Self::Float32(f32::from_sql(&Type::FLOAT4, bytes)?.into()),
997            DataType::Float64 => Self::Float64(f64::from_sql(&Type::FLOAT8, bytes)?.into()),
998            DataType::Decimal => {
999                Self::Decimal(rust_decimal::Decimal::from_sql(&Type::NUMERIC, bytes)?.into())
1000            }
1001            DataType::Date => Self::Date(chrono::NaiveDate::from_sql(&Type::DATE, bytes)?.into()),
1002            DataType::Time => Self::Time(chrono::NaiveTime::from_sql(&Type::TIME, bytes)?.into()),
1003            DataType::Timestamp => {
1004                Self::Timestamp(chrono::NaiveDateTime::from_sql(&Type::TIMESTAMP, bytes)?.into())
1005            }
1006            DataType::Timestamptz => Self::Timestamptz(
1007                chrono::DateTime::<chrono::Utc>::from_sql(&Type::TIMESTAMPTZ, bytes)?.into(),
1008            ),
1009            DataType::Interval => Self::Interval(Interval::from_sql(&Type::INTERVAL, bytes)?),
1010            DataType::Jsonb => Self::Jsonb(
1011                JsonbVal::value_deserialize(bytes)
1012                    .ok_or_else(|| "invalid value of Jsonb".to_owned())?,
1013            ),
1014            DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
1015            DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
1016                return Err(format!("unsupported data type: {}", data_type).into());
1017            }
1018        };
1019        Ok(res)
1020    }
1021
1022    /// Creates a scalar from pgwire "TEXT" format.
1023    ///
1024    /// The counterpart of [`ToText`].
1025    pub fn from_text(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1026        Ok(match data_type {
1027            DataType::Boolean => str_to_bool(s)?.into(),
1028            DataType::Int16 => i16::from_str(s)?.into(),
1029            DataType::Int32 => i32::from_str(s)?.into(),
1030            DataType::Int64 => i64::from_str(s)?.into(),
1031            DataType::Int256 => Int256::from_str(s)?.into(),
1032            DataType::Serial => Serial::from(i64::from_str(s)?).into(),
1033            DataType::Decimal => Decimal::from_str(s)?.into(),
1034            DataType::Float32 => F32::from_str(s)?.into(),
1035            DataType::Float64 => F64::from_str(s)?.into(),
1036            DataType::Varchar => s.into(),
1037            DataType::Date => Date::from_str(s)?.into(),
1038            DataType::Timestamp => Timestamp::from_str(s)?.into(),
1039            // We only handle the case with timezone here, and leave the implicit session timezone case
1040            // for later phase.
1041            DataType::Timestamptz => Timestamptz::from_str(s)?.into(),
1042            DataType::Time => Time::from_str(s)?.into(),
1043            DataType::Interval => Interval::from_str(s)?.into(),
1044            DataType::List(_) => ListValue::from_str(s, data_type)?.into(),
1045            DataType::Struct(st) => StructValue::from_str(s, st)?.into(),
1046            DataType::Jsonb => JsonbVal::from_str(s)?.into(),
1047            DataType::Bytea => str_to_bytea(s)?.into(),
1048            DataType::Map(_m) => return Err("map from text is not supported".into()),
1049        })
1050    }
1051
1052    pub fn from_text_for_test(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1053        Ok(match data_type {
1054            DataType::Map(map_type) => MapValue::from_str_for_test(s, map_type)?.into(),
1055            _ => ScalarImpl::from_text(s, data_type)?,
1056        })
1057    }
1058}
1059
1060impl From<ScalarRefImpl<'_>> for ScalarImpl {
1061    fn from(scalar_ref: ScalarRefImpl<'_>) -> Self {
1062        scalar_ref.into_scalar_impl()
1063    }
1064}
1065
1066impl<'a> From<&'a ScalarImpl> for ScalarRefImpl<'a> {
1067    fn from(scalar: &'a ScalarImpl) -> Self {
1068        scalar.as_scalar_ref_impl()
1069    }
1070}
1071
1072impl ScalarImpl {
1073    /// Converts [`ScalarImpl`] to [`ScalarRefImpl`]
1074    pub fn as_scalar_ref_impl(&self) -> ScalarRefImpl<'_> {
1075        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().into() })
1076    }
1077}
1078
1079impl ScalarRefImpl<'_> {
1080    /// Converts [`ScalarRefImpl`] to [`ScalarImpl`]
1081    pub fn into_scalar_impl(self) -> ScalarImpl {
1082        dispatch_scalar_ref_variants!(self, inner, { inner.to_owned_scalar().into() })
1083    }
1084}
1085
1086impl Hash for ScalarImpl {
1087    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1088        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().hash_scalar(state) })
1089    }
1090}
1091
1092impl Hash for ScalarRefImpl<'_> {
1093    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1094        dispatch_scalar_ref_variants!(self, inner, { inner.hash_scalar(state) })
1095    }
1096}
1097
1098/// Feeds the raw scalar reference of `datum` to the given `state`, which should behave the same
1099/// as [`crate::array::Array::hash_at`], where NULL value will be carefully handled.
1100///
1101/// **FIXME**: the result of this function might be different from [`std::hash::Hash`] due to the
1102/// type alias of `DatumRef = Option<_>`, we should manually implement [`std::hash::Hash`] for
1103/// [`DatumRef`] in the future when it becomes a newtype. (#477)
1104#[inline(always)]
1105pub fn hash_datum(datum: impl ToDatumRef, state: &mut impl std::hash::Hasher) {
1106    match datum.to_datum_ref() {
1107        Some(scalar_ref) => scalar_ref.hash(state),
1108        None => NULL_VAL_FOR_HASH.hash(state),
1109    }
1110}
1111
1112impl ScalarRefImpl<'_> {
1113    pub fn binary_format(&self, data_type: &DataType) -> to_binary::Result<Bytes> {
1114        use self::to_binary::ToBinary;
1115        self.to_binary_with_type(data_type)
1116    }
1117
1118    pub fn text_format(&self, data_type: &DataType) -> String {
1119        self.to_text_with_type(data_type)
1120    }
1121
1122    /// Serialize the scalar into the `memcomparable` format.
1123    pub fn serialize(
1124        &self,
1125        ser: &mut memcomparable::Serializer<impl BufMut>,
1126    ) -> memcomparable::Result<()> {
1127        match self {
1128            Self::Int16(v) => v.serialize(ser)?,
1129            Self::Int32(v) => v.serialize(ser)?,
1130            Self::Int64(v) => v.serialize(ser)?,
1131            Self::Serial(v) => v.serialize(ser)?,
1132            Self::Float32(v) => v.serialize(ser)?,
1133            Self::Float64(v) => v.serialize(ser)?,
1134            Self::Utf8(v) => v.serialize(ser)?,
1135            Self::Bytea(v) => ser.serialize_bytes(v)?,
1136            Self::Bool(v) => v.serialize(ser)?,
1137            Self::Decimal(v) => ser.serialize_decimal((*v).into())?,
1138            Self::Interval(v) => v.serialize(ser)?,
1139            Self::Date(v) => v.0.num_days_from_ce().serialize(ser)?,
1140            Self::Timestamp(v) => {
1141                v.0.and_utc().timestamp().serialize(&mut *ser)?;
1142                v.0.and_utc().timestamp_subsec_nanos().serialize(ser)?;
1143            }
1144            Self::Timestamptz(v) => v.serialize(ser)?,
1145            Self::Time(v) => {
1146                v.0.num_seconds_from_midnight().serialize(&mut *ser)?;
1147                v.0.nanosecond().serialize(ser)?;
1148            }
1149            Self::Int256(v) => v.memcmp_serialize(ser)?,
1150            Self::Jsonb(v) => v.memcmp_serialize(ser)?,
1151            Self::Struct(v) => v.memcmp_serialize(ser)?,
1152            Self::List(v) => v.memcmp_serialize(ser)?,
1153            Self::Map(v) => v.memcmp_serialize(ser)?,
1154        };
1155        Ok(())
1156    }
1157}
1158
1159impl ScalarImpl {
1160    /// Serialize the scalar into the `memcomparable` format.
1161    pub fn serialize(
1162        &self,
1163        ser: &mut memcomparable::Serializer<impl BufMut>,
1164    ) -> memcomparable::Result<()> {
1165        self.as_scalar_ref_impl().serialize(ser)
1166    }
1167
1168    /// Deserialize the scalar from the `memcomparable` format.
1169    pub fn deserialize(
1170        ty: &DataType,
1171        de: &mut memcomparable::Deserializer<impl Buf>,
1172    ) -> memcomparable::Result<Self> {
1173        use DataType as Ty;
1174        Ok(match ty {
1175            Ty::Int16 => Self::Int16(i16::deserialize(de)?),
1176            Ty::Int32 => Self::Int32(i32::deserialize(de)?),
1177            Ty::Int64 => Self::Int64(i64::deserialize(de)?),
1178            Ty::Int256 => Self::Int256(Int256::memcmp_deserialize(de)?),
1179            Ty::Serial => Self::Serial(Serial::from(i64::deserialize(de)?)),
1180            Ty::Float32 => Self::Float32(f32::deserialize(de)?.into()),
1181            Ty::Float64 => Self::Float64(f64::deserialize(de)?.into()),
1182            Ty::Varchar => Self::Utf8(Box::<str>::deserialize(de)?),
1183            Ty::Bytea => Self::Bytea(serde_bytes::ByteBuf::deserialize(de)?.into_vec().into()),
1184            Ty::Boolean => Self::Bool(bool::deserialize(de)?),
1185            Ty::Decimal => Self::Decimal(de.deserialize_decimal()?.into()),
1186            Ty::Interval => Self::Interval(Interval::deserialize(de)?),
1187            Ty::Time => Self::Time({
1188                let secs = u32::deserialize(&mut *de)?;
1189                let nano = u32::deserialize(de)?;
1190                Time::with_secs_nano(secs, nano)
1191                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1192            }),
1193            Ty::Timestamp => Self::Timestamp({
1194                let secs = i64::deserialize(&mut *de)?;
1195                let nsecs = u32::deserialize(de)?;
1196                Timestamp::with_secs_nsecs(secs, nsecs)
1197                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1198            }),
1199            Ty::Timestamptz => Self::Timestamptz(Timestamptz::deserialize(de)?),
1200            Ty::Date => Self::Date({
1201                let days = i32::deserialize(de)?;
1202                Date::with_days_since_ce(days)
1203                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1204            }),
1205            Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
1206            Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
1207            Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1208            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1209        })
1210    }
1211
1212    pub fn as_integral(&self) -> i64 {
1213        match self {
1214            Self::Int16(v) => *v as i64,
1215            Self::Int32(v) => *v as i64,
1216            Self::Int64(v) => *v,
1217            _ => panic!(
1218                "Can't convert ScalarImpl::{} to a integral",
1219                self.get_ident()
1220            ),
1221        }
1222    }
1223}
1224
1225/// Returns whether the `literal` matches the `data_type`.
1226pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
1227    match literal {
1228        Some(scalar) => {
1229            macro_rules! matches {
1230                ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty }),*) => {
1231                    match (data_type, scalar) {
1232                        $(
1233                            (DataType::$data_type { .. }, ScalarImpl::$variant_name(_)) => true,
1234                            (DataType::$data_type { .. }, _) => false, // so that we won't forget to match a new logical type
1235                        )*
1236                    }
1237                }
1238            }
1239            for_all_variants! { matches }
1240        }
1241        None => true,
1242    }
1243}
1244
1245#[cfg(test)]
1246mod tests {
1247    use std::hash::{BuildHasher, Hasher};
1248
1249    use strum::IntoEnumIterator;
1250
1251    use super::*;
1252    use crate::util::hash_util::Crc32FastBuilder;
1253
1254    #[test]
1255    fn test_size() {
1256        use static_assertions::const_assert_eq;
1257
1258        use crate::array::*;
1259
1260        macro_rules! assert_item_size_eq {
1261            ($array:ty, $size:literal) => {
1262                const_assert_eq!(std::mem::size_of::<<$array as Array>::OwnedItem>(), $size);
1263            };
1264        }
1265
1266        assert_item_size_eq!(StructArray, 16); // Box<[Datum]>
1267        assert_item_size_eq!(ListArray, 8); // Box<ArrayImpl>
1268        assert_item_size_eq!(Utf8Array, 16); // Box<str>
1269        assert_item_size_eq!(IntervalArray, 16);
1270        assert_item_size_eq!(TimestampArray, 12);
1271
1272        // TODO: try to reduce the memory usage of `Decimal`, `ScalarImpl` and `Datum`.
1273        assert_item_size_eq!(DecimalArray, 20);
1274
1275        const_assert_eq!(std::mem::size_of::<ScalarImpl>(), 24);
1276        const_assert_eq!(std::mem::size_of::<ScalarRefImpl<'_>>(), 24);
1277        const_assert_eq!(std::mem::size_of::<Datum>(), 24);
1278        const_assert_eq!(std::mem::size_of::<StructType>(), 8);
1279        const_assert_eq!(std::mem::size_of::<DataType>(), 16);
1280    }
1281
1282    #[test]
1283    fn test_data_type_display() {
1284        let d: DataType =
1285            StructType::new(vec![("i", DataType::Int32), ("j", DataType::Varchar)]).into();
1286        assert_eq!(
1287            format!("{}", d),
1288            "struct<i integer, j character varying>".to_owned()
1289        );
1290    }
1291
1292    #[test]
1293    fn test_hash_implementation() {
1294        fn test(datum: Datum, data_type: DataType) {
1295            assert!(literal_type_match(&data_type, datum.as_ref()));
1296
1297            let mut builder = data_type.create_array_builder(6);
1298            for _ in 0..3 {
1299                builder.append_null();
1300                builder.append(&datum);
1301            }
1302            let array = builder.finish();
1303
1304            let hash_from_array = {
1305                let mut state = Crc32FastBuilder.build_hasher();
1306                array.hash_at(3, &mut state);
1307                state.finish()
1308            };
1309
1310            let hash_from_datum = {
1311                let mut state = Crc32FastBuilder.build_hasher();
1312                hash_datum(&datum, &mut state);
1313                state.finish()
1314            };
1315
1316            let hash_from_datum_ref = {
1317                let mut state = Crc32FastBuilder.build_hasher();
1318                hash_datum(datum.to_datum_ref(), &mut state);
1319                state.finish()
1320            };
1321
1322            assert_eq!(hash_from_array, hash_from_datum);
1323            assert_eq!(hash_from_datum, hash_from_datum_ref);
1324        }
1325
1326        for name in DataTypeName::iter() {
1327            let (scalar, data_type) = match name {
1328                DataTypeName::Boolean => (ScalarImpl::Bool(true), DataType::Boolean),
1329                DataTypeName::Int16 => (ScalarImpl::Int16(233), DataType::Int16),
1330                DataTypeName::Int32 => (ScalarImpl::Int32(233333), DataType::Int32),
1331                DataTypeName::Int64 => (ScalarImpl::Int64(233333333333), DataType::Int64),
1332                DataTypeName::Int256 => (
1333                    ScalarImpl::Int256(233333333333_i64.into()),
1334                    DataType::Int256,
1335                ),
1336                DataTypeName::Serial => (ScalarImpl::Serial(233333333333.into()), DataType::Serial),
1337                DataTypeName::Float32 => (ScalarImpl::Float32(23.33.into()), DataType::Float32),
1338                DataTypeName::Float64 => (
1339                    ScalarImpl::Float64(23.333333333333.into()),
1340                    DataType::Float64,
1341                ),
1342                DataTypeName::Decimal => (
1343                    ScalarImpl::Decimal("233.33".parse().unwrap()),
1344                    DataType::Decimal,
1345                ),
1346                DataTypeName::Date => (
1347                    ScalarImpl::Date(Date::from_ymd_uncheck(2333, 3, 3)),
1348                    DataType::Date,
1349                ),
1350                DataTypeName::Varchar => (ScalarImpl::Utf8("233".into()), DataType::Varchar),
1351                DataTypeName::Bytea => (
1352                    ScalarImpl::Bytea("\\x233".as_bytes().into()),
1353                    DataType::Bytea,
1354                ),
1355                DataTypeName::Time => (
1356                    ScalarImpl::Time(Time::from_hms_uncheck(2, 3, 3)),
1357                    DataType::Time,
1358                ),
1359                DataTypeName::Timestamp => (
1360                    ScalarImpl::Timestamp(Timestamp::from_timestamp_uncheck(23333333, 2333)),
1361                    DataType::Timestamp,
1362                ),
1363                DataTypeName::Timestamptz => (
1364                    ScalarImpl::Timestamptz(Timestamptz::from_micros(233333333)),
1365                    DataType::Timestamptz,
1366                ),
1367                DataTypeName::Interval => (
1368                    ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
1369                    DataType::Interval,
1370                ),
1371                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
1372                DataTypeName::Struct => (
1373                    ScalarImpl::Struct(StructValue::new(vec![
1374                        ScalarImpl::Int64(233).into(),
1375                        ScalarImpl::Float64(23.33.into()).into(),
1376                    ])),
1377                    DataType::Struct(StructType::new(vec![
1378                        ("a", DataType::Int64),
1379                        ("b", DataType::Float64),
1380                    ])),
1381                ),
1382                DataTypeName::List => (
1383                    ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
1384                    DataType::List(Box::new(DataType::Int64)),
1385                ),
1386                DataTypeName::Map => {
1387                    // map is not hashable
1388                    continue;
1389                }
1390            };
1391
1392            test(Some(scalar), data_type.clone());
1393            test(None, data_type);
1394        }
1395    }
1396
1397    #[test]
1398    fn test_data_type_from_str() {
1399        assert_eq!(DataType::from_str("bool").unwrap(), DataType::Boolean);
1400        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
1401        assert_eq!(DataType::from_str("BOOL").unwrap(), DataType::Boolean);
1402        assert_eq!(DataType::from_str("BOOLEAN").unwrap(), DataType::Boolean);
1403
1404        assert_eq!(DataType::from_str("int2").unwrap(), DataType::Int16);
1405        assert_eq!(DataType::from_str("smallint").unwrap(), DataType::Int16);
1406        assert_eq!(DataType::from_str("INT2").unwrap(), DataType::Int16);
1407        assert_eq!(DataType::from_str("SMALLINT").unwrap(), DataType::Int16);
1408
1409        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1410        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Int32);
1411        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1412        assert_eq!(DataType::from_str("INT4").unwrap(), DataType::Int32);
1413        assert_eq!(DataType::from_str("INTEGER").unwrap(), DataType::Int32);
1414        assert_eq!(DataType::from_str("INT").unwrap(), DataType::Int32);
1415
1416        assert_eq!(DataType::from_str("int8").unwrap(), DataType::Int64);
1417        assert_eq!(DataType::from_str("bigint").unwrap(), DataType::Int64);
1418        assert_eq!(DataType::from_str("INT8").unwrap(), DataType::Int64);
1419        assert_eq!(DataType::from_str("BIGINT").unwrap(), DataType::Int64);
1420
1421        assert_eq!(DataType::from_str("rw_int256").unwrap(), DataType::Int256);
1422        assert_eq!(DataType::from_str("RW_INT256").unwrap(), DataType::Int256);
1423
1424        assert_eq!(DataType::from_str("float4").unwrap(), DataType::Float32);
1425        assert_eq!(DataType::from_str("real").unwrap(), DataType::Float32);
1426        assert_eq!(DataType::from_str("FLOAT4").unwrap(), DataType::Float32);
1427        assert_eq!(DataType::from_str("REAL").unwrap(), DataType::Float32);
1428
1429        assert_eq!(DataType::from_str("float8").unwrap(), DataType::Float64);
1430        assert_eq!(
1431            DataType::from_str("double precision").unwrap(),
1432            DataType::Float64
1433        );
1434        assert_eq!(DataType::from_str("FLOAT8").unwrap(), DataType::Float64);
1435        assert_eq!(
1436            DataType::from_str("DOUBLE PRECISION").unwrap(),
1437            DataType::Float64
1438        );
1439
1440        assert_eq!(DataType::from_str("decimal").unwrap(), DataType::Decimal);
1441        assert_eq!(DataType::from_str("DECIMAL").unwrap(), DataType::Decimal);
1442        assert_eq!(DataType::from_str("numeric").unwrap(), DataType::Decimal);
1443        assert_eq!(DataType::from_str("NUMERIC").unwrap(), DataType::Decimal);
1444
1445        assert_eq!(DataType::from_str("date").unwrap(), DataType::Date);
1446        assert_eq!(DataType::from_str("DATE").unwrap(), DataType::Date);
1447
1448        assert_eq!(DataType::from_str("varchar").unwrap(), DataType::Varchar);
1449        assert_eq!(DataType::from_str("VARCHAR").unwrap(), DataType::Varchar);
1450
1451        assert_eq!(DataType::from_str("time").unwrap(), DataType::Time);
1452        assert_eq!(
1453            DataType::from_str("time without time zone").unwrap(),
1454            DataType::Time
1455        );
1456        assert_eq!(DataType::from_str("TIME").unwrap(), DataType::Time);
1457        assert_eq!(
1458            DataType::from_str("TIME WITHOUT TIME ZONE").unwrap(),
1459            DataType::Time
1460        );
1461
1462        assert_eq!(
1463            DataType::from_str("timestamp").unwrap(),
1464            DataType::Timestamp
1465        );
1466        assert_eq!(
1467            DataType::from_str("timestamp without time zone").unwrap(),
1468            DataType::Timestamp
1469        );
1470        assert_eq!(
1471            DataType::from_str("TIMESTAMP").unwrap(),
1472            DataType::Timestamp
1473        );
1474        assert_eq!(
1475            DataType::from_str("TIMESTAMP WITHOUT TIME ZONE").unwrap(),
1476            DataType::Timestamp
1477        );
1478
1479        assert_eq!(
1480            DataType::from_str("timestamptz").unwrap(),
1481            DataType::Timestamptz
1482        );
1483        assert_eq!(
1484            DataType::from_str("timestamp with time zone").unwrap(),
1485            DataType::Timestamptz
1486        );
1487        assert_eq!(
1488            DataType::from_str("TIMESTAMPTZ").unwrap(),
1489            DataType::Timestamptz
1490        );
1491        assert_eq!(
1492            DataType::from_str("TIMESTAMP WITH TIME ZONE").unwrap(),
1493            DataType::Timestamptz
1494        );
1495
1496        assert_eq!(DataType::from_str("interval").unwrap(), DataType::Interval);
1497        assert_eq!(DataType::from_str("INTERVAL").unwrap(), DataType::Interval);
1498
1499        assert_eq!(
1500            DataType::from_str("int2[]").unwrap(),
1501            DataType::List(Box::new(DataType::Int16))
1502        );
1503        assert_eq!(
1504            DataType::from_str("int[]").unwrap(),
1505            DataType::List(Box::new(DataType::Int32))
1506        );
1507        assert_eq!(
1508            DataType::from_str("int8[]").unwrap(),
1509            DataType::List(Box::new(DataType::Int64))
1510        );
1511        assert_eq!(
1512            DataType::from_str("float4[]").unwrap(),
1513            DataType::List(Box::new(DataType::Float32))
1514        );
1515        assert_eq!(
1516            DataType::from_str("float8[]").unwrap(),
1517            DataType::List(Box::new(DataType::Float64))
1518        );
1519        assert_eq!(
1520            DataType::from_str("decimal[]").unwrap(),
1521            DataType::List(Box::new(DataType::Decimal))
1522        );
1523        assert_eq!(
1524            DataType::from_str("varchar[]").unwrap(),
1525            DataType::List(Box::new(DataType::Varchar))
1526        );
1527        assert_eq!(
1528            DataType::from_str("date[]").unwrap(),
1529            DataType::List(Box::new(DataType::Date))
1530        );
1531        assert_eq!(
1532            DataType::from_str("time[]").unwrap(),
1533            DataType::List(Box::new(DataType::Time))
1534        );
1535        assert_eq!(
1536            DataType::from_str("timestamp[]").unwrap(),
1537            DataType::List(Box::new(DataType::Timestamp))
1538        );
1539        assert_eq!(
1540            DataType::from_str("timestamptz[]").unwrap(),
1541            DataType::List(Box::new(DataType::Timestamptz))
1542        );
1543        assert_eq!(
1544            DataType::from_str("interval[]").unwrap(),
1545            DataType::List(Box::new(DataType::Interval))
1546        );
1547
1548        assert_eq!(
1549            DataType::from_str("record").unwrap(),
1550            DataType::Struct(StructType::unnamed(vec![]))
1551        );
1552        assert_eq!(
1553            DataType::from_str("struct<a int4, b varchar>").unwrap(),
1554            DataType::Struct(StructType::new(vec![
1555                ("a", DataType::Int32),
1556                ("b", DataType::Varchar)
1557            ]))
1558        );
1559    }
1560
1561    #[test]
1562    fn test_can_alter() {
1563        let cannots = [
1564            (DataType::Int32, None),
1565            (DataType::List(DataType::Int32.into()), None),
1566            (
1567                MapType::from_kv(DataType::Varchar, DataType::List(DataType::Int32.into())).into(),
1568                None,
1569            ),
1570            (
1571                StructType::new([("a", DataType::Int32)]).into(),
1572                Some(false),
1573            ),
1574            (
1575                MapType::from_kv(
1576                    DataType::Varchar,
1577                    StructType::new([("a", DataType::Int32)]).into(),
1578                )
1579                .into(),
1580                Some(false),
1581            ),
1582        ];
1583        for (cannot, why) in cannots {
1584            assert_eq!(cannot.can_alter(), why, "{cannot:?}");
1585        }
1586
1587        let cans = [
1588            StructType::new([
1589                ("a", DataType::Int32),
1590                ("b", DataType::List(DataType::Int32.into())),
1591            ])
1592            .with_ids([ColumnId::new(1), ColumnId::new(2)])
1593            .into(),
1594            DataType::List(Box::new(DataType::Struct(
1595                StructType::new([("a", DataType::Int32)]).with_ids([ColumnId::new(1)]),
1596            ))),
1597            MapType::from_kv(
1598                DataType::Varchar,
1599                StructType::new([("a", DataType::Int32)])
1600                    .with_ids([ColumnId::new(1)])
1601                    .into(),
1602            )
1603            .into(),
1604        ];
1605        for can in cans {
1606            assert_eq!(can.can_alter(), Some(true), "{can:?}");
1607        }
1608    }
1609}