risingwave_common/types/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Data types in RisingWave.
16
17// NOTE: When adding or modifying data types, remember to update the type matrix in
18// src/expr/macro/src/types.rs
19
20use std::fmt::Debug;
21use std::hash::Hash;
22use std::str::FromStr;
23
24use bytes::{Buf, BufMut, Bytes};
25use chrono::{Datelike, Timelike};
26use itertools::Itertools;
27use parse_display::{Display, FromStr};
28use paste::paste;
29use postgres_types::{FromSql, IsNull, ToSql, Type};
30use risingwave_common_estimate_size::{EstimateSize, ZeroHeapSize};
31use risingwave_pb::data::PbDataType;
32use risingwave_pb::data::data_type::PbTypeName;
33use rw_iter_util::ZipEqFast as _;
34use serde::{Deserialize, Serialize, Serializer};
35use strum_macros::EnumDiscriminants;
36use thiserror_ext::AsReport;
37
38use crate::array::{
39    ArrayBuilderImpl, ArrayError, ArrayResult, NULL_VAL_FOR_HASH, PrimitiveArrayItemType,
40};
41// Complex type's value is based on the array
42pub use crate::array::{ListRef, ListValue, MapRef, MapValue, StructRef, StructValue};
43use crate::cast::{str_to_bool, str_to_bytea};
44use crate::catalog::ColumnId;
45use crate::error::BoxedError;
46use crate::{
47    dispatch_data_types, dispatch_scalar_ref_variants, dispatch_scalar_variants, for_all_variants,
48};
49
50mod cow;
51mod datetime;
52mod decimal;
53mod fields;
54mod from_sql;
55mod interval;
56mod jsonb;
57mod macros;
58mod map_type;
59mod native_type;
60mod num256;
61mod ops;
62mod ordered;
63mod ordered_float;
64mod postgres_type;
65mod scalar_impl;
66mod sentinel;
67mod serial;
68mod struct_type;
69mod successor;
70mod timestamptz;
71mod to_binary;
72mod to_sql;
73mod to_text;
74mod with_data_type;
75
76pub use fields::Fields;
77pub use risingwave_fields_derive::Fields;
78
79pub use self::cow::DatumCow;
80pub use self::datetime::{Date, Time, Timestamp};
81pub use self::decimal::{Decimal, PowError as DecimalPowError};
82pub use self::interval::{DateTimeField, Interval, IntervalDisplay, test_utils};
83pub use self::jsonb::{JsonbRef, JsonbVal};
84pub use self::map_type::MapType;
85pub use self::native_type::*;
86pub use self::num256::{Int256, Int256Ref};
87pub use self::ops::{CheckedAdd, IsNegative};
88pub use self::ordered::*;
89pub use self::ordered_float::{FloatExt, IntoOrdered};
90pub use self::scalar_impl::*;
91pub use self::sentinel::Sentinelled;
92pub use self::serial::Serial;
93pub use self::struct_type::StructType;
94pub use self::successor::Successor;
95pub use self::timestamptz::*;
96pub use self::to_text::ToText;
97pub use self::with_data_type::WithDataType;
98
99/// A 32-bit floating point type with total order.
100pub type F32 = ordered_float::OrderedFloat<f32>;
101
102/// A 64-bit floating point type with total order.
103pub type F64 = ordered_float::OrderedFloat<f64>;
104
105/// The set of datatypes that are supported in RisingWave.
106///
107/// # Trait implementations
108///
109/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
110///   but without data fields.
111/// - `FromStr` is only used internally for tests.
112///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
113#[derive(
114    Debug, Display, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, EnumDiscriminants, FromStr,
115)]
116#[strum_discriminants(derive(Hash, Ord, PartialOrd))]
117#[strum_discriminants(name(DataTypeName))]
118#[strum_discriminants(vis(pub))]
119#[cfg_attr(test, strum_discriminants(derive(strum_macros::EnumIter)))]
120pub enum DataType {
121    #[display("boolean")]
122    #[from_str(regex = "(?i)^bool$|^boolean$")]
123    Boolean,
124    #[display("smallint")]
125    #[from_str(regex = "(?i)^smallint$|^int2$")]
126    Int16,
127    #[display("integer")]
128    #[from_str(regex = "(?i)^integer$|^int$|^int4$")]
129    Int32,
130    #[display("bigint")]
131    #[from_str(regex = "(?i)^bigint$|^int8$")]
132    Int64,
133    #[display("real")]
134    #[from_str(regex = "(?i)^real$|^float4$")]
135    Float32,
136    #[display("double precision")]
137    #[from_str(regex = "(?i)^double precision$|^float8$")]
138    Float64,
139    #[display("numeric")]
140    #[from_str(regex = "(?i)^numeric$|^decimal$")]
141    Decimal,
142    #[display("date")]
143    #[from_str(regex = "(?i)^date$")]
144    Date,
145    #[display("character varying")]
146    #[from_str(regex = "(?i)^character varying$|^varchar$")]
147    Varchar,
148    #[display("time without time zone")]
149    #[from_str(regex = "(?i)^time$|^time without time zone$")]
150    Time,
151    #[display("timestamp without time zone")]
152    #[from_str(regex = "(?i)^timestamp$|^timestamp without time zone$")]
153    Timestamp,
154    #[display("timestamp with time zone")]
155    #[from_str(regex = "(?i)^timestamptz$|^timestamp with time zone$")]
156    Timestamptz,
157    #[display("interval")]
158    #[from_str(regex = "(?i)^interval$")]
159    Interval,
160    #[display("{0}")]
161    #[from_str(regex = "(?i)^(?P<0>.+)$")]
162    Struct(StructType),
163    #[display("{0}[]")]
164    #[from_str(regex = r"(?i)^(?P<0>.+)\[\]$")]
165    List(Box<DataType>),
166    #[display("bytea")]
167    #[from_str(regex = "(?i)^bytea$")]
168    Bytea,
169    #[display("jsonb")]
170    #[from_str(regex = "(?i)^jsonb$")]
171    Jsonb,
172    #[display("serial")]
173    #[from_str(regex = "(?i)^serial$")]
174    Serial,
175    #[display("rw_int256")]
176    #[from_str(regex = "(?i)^rw_int256$")]
177    Int256,
178    #[display("{0}")]
179    #[from_str(regex = "(?i)^(?P<0>.+)$")]
180    Map(MapType),
181}
182
183// For DataType::List
184impl std::str::FromStr for Box<DataType> {
185    type Err = BoxedError;
186
187    fn from_str(s: &str) -> Result<Self, Self::Err> {
188        Ok(Box::new(DataType::from_str(s)?))
189    }
190}
191
192impl ZeroHeapSize for DataType {}
193
194impl TryFrom<DataTypeName> for DataType {
195    type Error = &'static str;
196
197    fn try_from(type_name: DataTypeName) -> Result<Self, Self::Error> {
198        match type_name {
199            DataTypeName::Boolean => Ok(DataType::Boolean),
200            DataTypeName::Int16 => Ok(DataType::Int16),
201            DataTypeName::Int32 => Ok(DataType::Int32),
202            DataTypeName::Int64 => Ok(DataType::Int64),
203            DataTypeName::Int256 => Ok(DataType::Int256),
204            DataTypeName::Serial => Ok(DataType::Serial),
205            DataTypeName::Decimal => Ok(DataType::Decimal),
206            DataTypeName::Float32 => Ok(DataType::Float32),
207            DataTypeName::Float64 => Ok(DataType::Float64),
208            DataTypeName::Varchar => Ok(DataType::Varchar),
209            DataTypeName::Bytea => Ok(DataType::Bytea),
210            DataTypeName::Date => Ok(DataType::Date),
211            DataTypeName::Timestamp => Ok(DataType::Timestamp),
212            DataTypeName::Timestamptz => Ok(DataType::Timestamptz),
213            DataTypeName::Time => Ok(DataType::Time),
214            DataTypeName::Interval => Ok(DataType::Interval),
215            DataTypeName::Jsonb => Ok(DataType::Jsonb),
216            DataTypeName::Struct | DataTypeName::List | DataTypeName::Map => Err(
217                "Functions returning composite types can not be inferred. Please use `FunctionCall::new_unchecked`.",
218            ),
219        }
220    }
221}
222
223impl From<&PbDataType> for DataType {
224    fn from(proto: &PbDataType) -> DataType {
225        match proto.get_type_name().expect("missing type field") {
226            PbTypeName::TypeUnspecified => unreachable!(),
227            PbTypeName::Int16 => DataType::Int16,
228            PbTypeName::Int32 => DataType::Int32,
229            PbTypeName::Int64 => DataType::Int64,
230            PbTypeName::Serial => DataType::Serial,
231            PbTypeName::Float => DataType::Float32,
232            PbTypeName::Double => DataType::Float64,
233            PbTypeName::Boolean => DataType::Boolean,
234            PbTypeName::Varchar => DataType::Varchar,
235            PbTypeName::Date => DataType::Date,
236            PbTypeName::Time => DataType::Time,
237            PbTypeName::Timestamp => DataType::Timestamp,
238            PbTypeName::Timestamptz => DataType::Timestamptz,
239            PbTypeName::Decimal => DataType::Decimal,
240            PbTypeName::Interval => DataType::Interval,
241            PbTypeName::Bytea => DataType::Bytea,
242            PbTypeName::Jsonb => DataType::Jsonb,
243            PbTypeName::Struct => {
244                let fields: Vec<DataType> = proto.field_type.iter().map(|f| f.into()).collect_vec();
245                let field_names: Vec<String> = proto.field_names.iter().cloned().collect_vec();
246                let field_ids = (proto.field_ids.iter().copied())
247                    .map(ColumnId::new)
248                    .collect_vec();
249
250                let mut struct_type = if proto.field_names.is_empty() {
251                    StructType::unnamed(fields)
252                } else {
253                    StructType::new(field_names.into_iter().zip_eq_fast(fields))
254                };
255                if !field_ids.is_empty() {
256                    struct_type = struct_type.with_ids(field_ids);
257                }
258                struct_type.into()
259            }
260            PbTypeName::List => DataType::List(
261                // The first (and only) item is the list element type.
262                Box::new((&proto.field_type[0]).into()),
263            ),
264            PbTypeName::Map => {
265                // Map is physically the same as a list.
266                // So the first (and only) item is the list element type.
267                let list_entries_type: DataType = (&proto.field_type[0]).into();
268                DataType::Map(MapType::from_entries(list_entries_type))
269            }
270            PbTypeName::Int256 => DataType::Int256,
271        }
272    }
273}
274
275impl From<DataTypeName> for PbTypeName {
276    fn from(type_name: DataTypeName) -> Self {
277        match type_name {
278            DataTypeName::Boolean => PbTypeName::Boolean,
279            DataTypeName::Int16 => PbTypeName::Int16,
280            DataTypeName::Int32 => PbTypeName::Int32,
281            DataTypeName::Int64 => PbTypeName::Int64,
282            DataTypeName::Serial => PbTypeName::Serial,
283            DataTypeName::Float32 => PbTypeName::Float,
284            DataTypeName::Float64 => PbTypeName::Double,
285            DataTypeName::Varchar => PbTypeName::Varchar,
286            DataTypeName::Date => PbTypeName::Date,
287            DataTypeName::Timestamp => PbTypeName::Timestamp,
288            DataTypeName::Timestamptz => PbTypeName::Timestamptz,
289            DataTypeName::Time => PbTypeName::Time,
290            DataTypeName::Interval => PbTypeName::Interval,
291            DataTypeName::Decimal => PbTypeName::Decimal,
292            DataTypeName::Bytea => PbTypeName::Bytea,
293            DataTypeName::Jsonb => PbTypeName::Jsonb,
294            DataTypeName::Struct => PbTypeName::Struct,
295            DataTypeName::List => PbTypeName::List,
296            DataTypeName::Int256 => PbTypeName::Int256,
297            DataTypeName::Map => PbTypeName::Map,
298        }
299    }
300}
301
302/// Convenient macros to generate match arms for [`DataType`].
303pub mod data_types {
304    use super::DataType;
305
306    /// Numeric [`DataType`]s supported to be `offset` of `RANGE` frame.
307    #[macro_export]
308    macro_rules! _range_frame_numeric_data_types {
309        () => {
310            DataType::Int16
311                | DataType::Int32
312                | DataType::Int64
313                | DataType::Float32
314                | DataType::Float64
315                | DataType::Decimal
316        };
317    }
318    pub use _range_frame_numeric_data_types as range_frame_numeric;
319
320    /// Date/time [`DataType`]s supported to be `offset` of `RANGE` frame.
321    #[macro_export]
322    macro_rules! _range_frame_datetime_data_types {
323        () => {
324            DataType::Date
325                | DataType::Time
326                | DataType::Timestamp
327                | DataType::Timestamptz
328                | DataType::Interval
329        };
330    }
331    pub use _range_frame_datetime_data_types as range_frame_datetime;
332
333    /// Data types that do not have inner fields.
334    #[macro_export]
335    macro_rules! _simple_data_types {
336        () => {
337            DataType::Boolean
338                | DataType::Int16
339                | DataType::Int32
340                | DataType::Int64
341                | DataType::Float32
342                | DataType::Float64
343                | DataType::Decimal
344                | DataType::Date
345                | DataType::Varchar
346                | DataType::Time
347                | DataType::Timestamp
348                | DataType::Timestamptz
349                | DataType::Interval
350                | DataType::Bytea
351                | DataType::Jsonb
352                | DataType::Serial
353                | DataType::Int256
354        };
355    }
356    pub use _simple_data_types as simple;
357
358    /// Data types that have inner fields.
359    #[macro_export]
360    macro_rules! _composite_data_types {
361        () => {
362            DataType::Struct { .. } | DataType::List { .. } | DataType::Map { .. }
363        };
364    }
365    pub use _composite_data_types as composite;
366
367    /// Test that all data types are covered either by `simple!()` or `composite!()`.
368    fn _simple_composite_data_types_exhausted(dt: DataType) {
369        match dt {
370            simple!() => {}
371            composite!() => {}
372        }
373    }
374}
375
376impl DataType {
377    pub fn create_array_builder(&self, capacity: usize) -> ArrayBuilderImpl {
378        use crate::array::*;
379
380        dispatch_data_types!(self, [B = ArrayBuilder], {
381            B::with_type(capacity, self.clone()).into()
382        })
383    }
384
385    pub fn type_name(&self) -> DataTypeName {
386        DataTypeName::from(self)
387    }
388
389    pub fn prost_type_name(&self) -> PbTypeName {
390        self.type_name().into()
391    }
392
393    pub fn to_protobuf(&self) -> PbDataType {
394        let mut pb = PbDataType {
395            type_name: self.prost_type_name() as i32,
396            is_nullable: true,
397            ..Default::default()
398        };
399        match self {
400            DataType::Struct(t) => {
401                if !t.is_unnamed() {
402                    // To be consistent with `From<&PbDataType>`,
403                    // we only set field names when it's a named struct.
404                    pb.field_names = t.names().map(|s| s.into()).collect();
405                }
406                pb.field_type = t.types().map(|f| f.to_protobuf()).collect();
407                if let Some(ids) = t.ids() {
408                    pb.field_ids = ids.map(|id| id.get_id()).collect();
409                }
410            }
411            DataType::List(datatype) => {
412                pb.field_type = vec![datatype.to_protobuf()];
413            }
414            DataType::Map(datatype) => {
415                // Same as List<Struct<K,V>>
416                pb.field_type = vec![datatype.clone().into_struct().to_protobuf()];
417            }
418            DataType::Boolean
419            | DataType::Int16
420            | DataType::Int32
421            | DataType::Int64
422            | DataType::Float32
423            | DataType::Float64
424            | DataType::Decimal
425            | DataType::Date
426            | DataType::Varchar
427            | DataType::Time
428            | DataType::Timestamp
429            | DataType::Timestamptz
430            | DataType::Interval
431            | DataType::Bytea
432            | DataType::Jsonb
433            | DataType::Serial
434            | DataType::Int256 => (),
435        }
436        pb
437    }
438
439    pub fn is_numeric(&self) -> bool {
440        matches!(
441            self,
442            DataType::Int16
443                | DataType::Int32
444                | DataType::Int64
445                | DataType::Serial
446                | DataType::Float32
447                | DataType::Float64
448                | DataType::Decimal
449        )
450    }
451
452    /// Returns whether the data type does not have inner fields.
453    pub fn is_simple(&self) -> bool {
454        matches!(self, data_types::simple!())
455    }
456
457    /// Returns whether the data type has inner fields.
458    pub fn is_composite(&self) -> bool {
459        matches!(self, data_types::composite!())
460    }
461
462    pub fn is_array(&self) -> bool {
463        matches!(self, DataType::List(_))
464    }
465
466    pub fn is_struct(&self) -> bool {
467        matches!(self, DataType::Struct(_))
468    }
469
470    pub fn is_map(&self) -> bool {
471        matches!(self, DataType::Map(_))
472    }
473
474    pub fn is_int(&self) -> bool {
475        matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
476    }
477
478    /// Returns the output type of time window function on a given input type.
479    pub fn window_of(input: &DataType) -> Option<DataType> {
480        match input {
481            DataType::Timestamptz => Some(DataType::Timestamptz),
482            DataType::Timestamp | DataType::Date => Some(DataType::Timestamp),
483            _ => None,
484        }
485    }
486
487    pub fn as_struct(&self) -> &StructType {
488        match self {
489            DataType::Struct(t) => t,
490            t => panic!("expect struct type, got {t}"),
491        }
492    }
493
494    pub fn as_map(&self) -> &MapType {
495        match self {
496            DataType::Map(t) => t,
497            t => panic!("expect map type, got {t}"),
498        }
499    }
500
501    pub fn into_map(self) -> MapType {
502        match self {
503            DataType::Map(t) => t,
504            t => panic!("expect map type, got {t}"),
505        }
506    }
507
508    /// Returns the inner element's type of a list type.
509    ///
510    /// # Panics
511    ///
512    /// Panics if the type is not a list type.
513    pub fn as_list(&self) -> &DataType {
514        match self {
515            DataType::List(t) => t,
516            t => panic!("expect list type, got {t}"),
517        }
518    }
519
520    /// Return a new type that removes the outer list, and get the innermost element type.
521    ///
522    /// Use [`DataType::as_list`] if you only want the element type of a list.
523    ///
524    /// ```
525    /// use risingwave_common::types::DataType::*;
526    /// assert_eq!(List(Box::new(Int32)).unnest_list(), &Int32);
527    /// assert_eq!(List(Box::new(List(Box::new(Int32)))).unnest_list(), &Int32);
528    /// ```
529    pub fn unnest_list(&self) -> &Self {
530        match self {
531            DataType::List(inner) => inner.unnest_list(),
532            _ => self,
533        }
534    }
535
536    /// Return the number of dimensions of this array/list type. Return `0` when this type is not an
537    /// array/list.
538    pub fn array_ndims(&self) -> usize {
539        let mut d = 0;
540        let mut t = self;
541        while let Self::List(inner) = t {
542            d += 1;
543            t = inner;
544        }
545        d
546    }
547
548    /// Compares the datatype with another, ignoring nested field names and ids.
549    pub fn equals_datatype(&self, other: &DataType) -> bool {
550        match (self, other) {
551            (Self::Struct(s1), Self::Struct(s2)) => s1.equals_datatype(s2),
552            (Self::List(d1), Self::List(d2)) => d1.equals_datatype(d2),
553            (Self::Map(m1), Self::Map(m2)) => {
554                m1.key().equals_datatype(m2.key()) && m1.value().equals_datatype(m2.value())
555            }
556            _ => self == other,
557        }
558    }
559
560    /// Whether a column with this data type can be altered to a new data type. This determines
561    /// the encoding of the column data.
562    ///
563    /// Returns...
564    /// - `None`, if the data type is simple or does not contain a struct type.
565    /// - `Some(true)`, if the data type contains a struct type with field ids ([`StructType::has_ids`]).
566    /// - `Some(false)`, if the data type contains a struct type without field ids.
567    pub fn can_alter(&self) -> Option<bool> {
568        match self {
569            data_types::simple!() => None,
570
571            DataType::Struct(struct_type) => {
572                // As long as we meet a struct type, we can check its `ids` field to determine if
573                // it can be altered.
574                let struct_can_alter = struct_type.has_ids();
575                // In debug build, we assert that once a struct type does (or does not) have ids,
576                // all its composite fields should have the same property.
577                if cfg!(debug_assertions) {
578                    for field in struct_type.types() {
579                        if let Some(field_can_alter) = field.can_alter() {
580                            assert_eq!(struct_can_alter, field_can_alter);
581                        }
582                    }
583                }
584                Some(struct_can_alter)
585            }
586
587            DataType::List(inner_type) => inner_type.can_alter(),
588            DataType::Map(map_type) => {
589                debug_assert!(
590                    map_type.key().is_simple(),
591                    "unexpected key type of map {map_type:?}"
592                );
593                map_type.value().can_alter()
594            }
595        }
596    }
597}
598
599impl From<StructType> for DataType {
600    fn from(value: StructType) -> Self {
601        Self::Struct(value)
602    }
603}
604
605impl From<DataType> for PbDataType {
606    fn from(data_type: DataType) -> Self {
607        data_type.to_protobuf()
608    }
609}
610
611mod private {
612    use super::*;
613
614    // Note: put pub trait inside a private mod just makes the name private,
615    // The trait methods will still be publicly available...
616    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
617
618    /// Common trait bounds of scalar and scalar reference types.
619    ///
620    /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
621    pub trait ScalarBounds<Impl> = Debug
622        + Send
623        + Sync
624        + Clone
625        + PartialEq
626        + Eq
627        // in default ascending order
628        + PartialOrd
629        + Ord
630        + TryFrom<Impl, Error = ArrayError>
631        // `ScalarImpl`/`ScalarRefImpl`
632        + Into<Impl>;
633}
634
635/// `Scalar` is a trait over all possible owned types in the evaluation
636/// framework.
637///
638/// `Scalar` is reciprocal to `ScalarRef`. Use `as_scalar_ref` to get a
639/// reference which has the same lifetime as `self`.
640pub trait Scalar: private::ScalarBounds<ScalarImpl> + 'static {
641    /// Type for reference of `Scalar`
642    type ScalarRefType<'a>: ScalarRef<'a, ScalarType = Self> + 'a
643    where
644        Self: 'a;
645
646    /// Get a reference to current scalar.
647    fn as_scalar_ref(&self) -> Self::ScalarRefType<'_>;
648
649    fn to_scalar_value(self) -> ScalarImpl {
650        self.into()
651    }
652}
653
654/// `ScalarRef` is a trait over all possible references in the evaluation
655/// framework.
656///
657/// `ScalarRef` is reciprocal to `Scalar`. Use `to_owned_scalar` to get an
658/// owned scalar.
659pub trait ScalarRef<'a>: private::ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
660    /// `ScalarType` is the owned type of current `ScalarRef`.
661    type ScalarType: Scalar<ScalarRefType<'a> = Self>;
662
663    /// Convert `ScalarRef` to an owned scalar.
664    fn to_owned_scalar(&self) -> Self::ScalarType;
665
666    /// A wrapped hash function to get the hash value for this scaler.
667    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H);
668}
669
670/// Define `ScalarImpl` and `ScalarRefImpl` with macro.
671macro_rules! scalar_impl_enum {
672    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
673        /// `ScalarImpl` embeds all possible scalars in the evaluation framework.
674        ///
675        /// Note: `ScalarImpl` doesn't contain all information of its `DataType`,
676        /// so sometimes they need to be used together.
677        /// e.g., for `Struct`, we don't have the field names in the value.
678        ///
679        /// See `for_all_variants` for the definition.
680        #[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
681        pub enum ScalarImpl {
682            $( $variant_name($scalar) ),*
683        }
684
685        /// `ScalarRefImpl` embeds all possible scalar references in the evaluation
686        /// framework.
687        ///
688        /// Note: `ScalarRefImpl` doesn't contain all information of its `DataType`,
689        /// so sometimes they need to be used together.
690        /// e.g., for `Struct`, we don't have the field names in the value.
691        ///
692        /// See `for_all_variants` for the definition.
693        #[derive(Debug, Copy, Clone, PartialEq, Eq)]
694        pub enum ScalarRefImpl<'scalar> {
695            $( $variant_name($scalar_ref) ),*
696        }
697    };
698}
699
700for_all_variants! { scalar_impl_enum }
701
702// We MUST NOT implement `Ord` for `ScalarImpl` because that will make `Datum` derive an incorrect
703// default `Ord`. To get a default-ordered `ScalarImpl`/`ScalarRefImpl`/`Datum`/`DatumRef`, you can
704// use `DefaultOrdered<T>`. If non-default order is needed, please refer to `sort_util`.
705impl !PartialOrd for ScalarImpl {}
706impl !PartialOrd for ScalarRefImpl<'_> {}
707
708pub type Datum = Option<ScalarImpl>;
709pub type DatumRef<'a> = Option<ScalarRefImpl<'a>>;
710
711/// This trait is to implement `to_owned_datum` for `Option<ScalarImpl>`
712pub trait ToOwnedDatum {
713    /// Convert the datum to an owned [`Datum`].
714    fn to_owned_datum(self) -> Datum;
715}
716
717impl ToOwnedDatum for &Datum {
718    #[inline(always)]
719    fn to_owned_datum(self) -> Datum {
720        self.clone()
721    }
722}
723
724impl<T: Into<ScalarImpl>> ToOwnedDatum for T {
725    #[inline(always)]
726    fn to_owned_datum(self) -> Datum {
727        Some(self.into())
728    }
729}
730
731impl<T: Into<ScalarImpl>> ToOwnedDatum for Option<T> {
732    #[inline(always)]
733    fn to_owned_datum(self) -> Datum {
734        self.map(Into::into)
735    }
736}
737
738#[auto_impl::auto_impl(&)]
739pub trait ToDatumRef: PartialEq + Eq + Debug {
740    /// Convert the datum to [`DatumRef`].
741    fn to_datum_ref(&self) -> DatumRef<'_>;
742}
743
744impl ToDatumRef for Datum {
745    #[inline(always)]
746    fn to_datum_ref(&self) -> DatumRef<'_> {
747        self.as_ref().map(|d| d.as_scalar_ref_impl())
748    }
749}
750impl ToDatumRef for Option<&ScalarImpl> {
751    #[inline(always)]
752    fn to_datum_ref(&self) -> DatumRef<'_> {
753        self.map(|d| d.as_scalar_ref_impl())
754    }
755}
756impl ToDatumRef for DatumRef<'_> {
757    #[inline(always)]
758    fn to_datum_ref(&self) -> DatumRef<'_> {
759        *self
760    }
761}
762
763/// To make sure there is `as_scalar_ref` for all scalar ref types.
764/// See <https://github.com/risingwavelabs/risingwave/pull/9977/files#r1208972881>
765///
766/// This is used by the expr macro.
767pub trait SelfAsScalarRef {
768    fn as_scalar_ref(&self) -> Self;
769}
770macro_rules! impl_self_as_scalar_ref {
771    ($($t:ty),*) => {
772        $(
773            impl SelfAsScalarRef for $t {
774                fn as_scalar_ref(&self) -> Self {
775                    *self
776                }
777            }
778        )*
779    };
780}
781impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
782
783/// `for_all_native_types` includes all native variants of our scalar types.
784///
785/// Specifically, it doesn't support u8/u16/u32/u64.
786#[macro_export]
787macro_rules! for_all_native_types {
788    ($macro:ident) => {
789        $macro! {
790            { i16, Int16, read_i16 },
791            { i32, Int32, read_i32 },
792            { i64, Int64, read_i64 },
793            { Serial, Serial, read_i64 },
794            { $crate::types::F32, Float32, read_f32 },
795            { $crate::types::F64, Float64, read_f64 }
796        }
797    };
798}
799
800/// `impl_convert` implements several conversions for `Scalar`.
801/// * `Scalar <-> ScalarImpl` with `From` and `TryFrom` trait.
802/// * `ScalarRef <-> ScalarRefImpl` with `From` and `TryFrom` trait.
803/// * `&ScalarImpl -> &Scalar` with `impl.as_int16()`.
804/// * `ScalarImpl -> Scalar` with `impl.into_int16()`.
805macro_rules! impl_convert {
806    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
807        $(
808            impl From<$scalar> for ScalarImpl {
809                fn from(val: $scalar) -> Self {
810                    ScalarImpl::$variant_name(val)
811                }
812            }
813
814            impl TryFrom<ScalarImpl> for $scalar {
815                type Error = ArrayError;
816
817                fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
818                    match val {
819                        ScalarImpl::$variant_name(scalar) => Ok(scalar),
820                        other_scalar => bail!("cannot convert ScalarImpl::{} to concrete type", other_scalar.get_ident()),
821                    }
822                }
823            }
824
825            impl <'scalar> From<$scalar_ref> for ScalarRefImpl<'scalar> {
826                fn from(val: $scalar_ref) -> Self {
827                    ScalarRefImpl::$variant_name(val)
828                }
829            }
830
831            impl <'scalar> TryFrom<ScalarRefImpl<'scalar>> for $scalar_ref {
832                type Error = ArrayError;
833
834                fn try_from(val: ScalarRefImpl<'scalar>) -> ArrayResult<Self> {
835                    match val {
836                        ScalarRefImpl::$variant_name(scalar_ref) => Ok(scalar_ref),
837                        other_scalar => bail!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name)),
838                    }
839                }
840            }
841
842            paste! {
843                impl ScalarImpl {
844                    /// # Panics
845                    /// If the scalar is not of the expected type.
846                    pub fn [<as_ $suffix_name>](&self) -> &$scalar {
847                        match self {
848                            Self::$variant_name(scalar) => scalar,
849                            other_scalar => panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
850                        }
851                    }
852
853                    /// # Panics
854                    /// If the scalar is not of the expected type.
855                    pub fn [<into_ $suffix_name>](self) -> $scalar {
856                        match self {
857                            Self::$variant_name(scalar) => scalar,
858                            other_scalar =>  panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
859                        }
860                    }
861                }
862
863                impl <'scalar> ScalarRefImpl<'scalar> {
864                    /// # Panics
865                    /// If the scalar is not of the expected type.
866                    pub fn [<into_ $suffix_name>](self) -> $scalar_ref {
867                        match self {
868                            Self::$variant_name(inner) => inner,
869                            other_scalar => panic!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
870                        }
871                    }
872                }
873            }
874        )*
875    };
876}
877
878for_all_variants! { impl_convert }
879
880// Implement `From<raw float>` for `ScalarImpl::Float` as a sugar.
881impl From<f32> for ScalarImpl {
882    fn from(f: f32) -> Self {
883        Self::Float32(f.into())
884    }
885}
886impl From<f64> for ScalarImpl {
887    fn from(f: f64) -> Self {
888        Self::Float64(f.into())
889    }
890}
891
892// Implement `From<string like>` for `ScalarImpl::Utf8` as a sugar.
893impl From<String> for ScalarImpl {
894    fn from(s: String) -> Self {
895        Self::Utf8(s.into_boxed_str())
896    }
897}
898impl From<&str> for ScalarImpl {
899    fn from(s: &str) -> Self {
900        Self::Utf8(s.into())
901    }
902}
903impl From<&String> for ScalarImpl {
904    fn from(s: &String) -> Self {
905        Self::Utf8(s.as_str().into())
906    }
907}
908impl TryFrom<ScalarImpl> for String {
909    type Error = ArrayError;
910
911    fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
912        match val {
913            ScalarImpl::Utf8(s) => Ok(s.into()),
914            other_scalar => bail!(
915                "cannot convert ScalarImpl::{} to concrete type",
916                other_scalar.get_ident()
917            ),
918        }
919    }
920}
921
922impl From<char> for ScalarImpl {
923    fn from(c: char) -> Self {
924        Self::Utf8(c.to_string().into())
925    }
926}
927
928impl From<&[u8]> for ScalarImpl {
929    fn from(s: &[u8]) -> Self {
930        Self::Bytea(s.into())
931    }
932}
933
934impl From<JsonbRef<'_>> for ScalarImpl {
935    fn from(jsonb: JsonbRef<'_>) -> Self {
936        Self::Jsonb(jsonb.to_owned_scalar())
937    }
938}
939
940impl<T: PrimitiveArrayItemType> From<Vec<T>> for ScalarImpl {
941    fn from(v: Vec<T>) -> Self {
942        Self::List(v.into_iter().collect())
943    }
944}
945
946impl<T: PrimitiveArrayItemType> From<Vec<Option<T>>> for ScalarImpl {
947    fn from(v: Vec<Option<T>>) -> Self {
948        Self::List(v.into_iter().collect())
949    }
950}
951
952impl From<Vec<String>> for ScalarImpl {
953    fn from(v: Vec<String>) -> Self {
954        Self::List(v.iter().map(|s| s.as_str()).collect())
955    }
956}
957
958impl From<Vec<u8>> for ScalarImpl {
959    fn from(v: Vec<u8>) -> Self {
960        Self::Bytea(v.into())
961    }
962}
963
964impl From<Bytes> for ScalarImpl {
965    fn from(v: Bytes) -> Self {
966        Self::Bytea(v.as_ref().into())
967    }
968}
969
970impl From<ListRef<'_>> for ScalarImpl {
971    fn from(list: ListRef<'_>) -> Self {
972        Self::List(list.to_owned_scalar())
973    }
974}
975
976impl ScalarImpl {
977    /// Creates a scalar from pgwire "BINARY" format.
978    ///
979    /// The counterpart of [`to_binary::ToBinary`].
980    pub fn from_binary(bytes: &Bytes, data_type: &DataType) -> Result<Self, BoxedError> {
981        let res = match data_type {
982            DataType::Varchar => Self::Utf8(String::from_sql(&Type::VARCHAR, bytes)?.into()),
983            DataType::Bytea => Self::Bytea(Vec::<u8>::from_sql(&Type::BYTEA, bytes)?.into()),
984            DataType::Boolean => Self::Bool(bool::from_sql(&Type::BOOL, bytes)?),
985            DataType::Int16 => Self::Int16(i16::from_sql(&Type::INT2, bytes)?),
986            DataType::Int32 => Self::Int32(i32::from_sql(&Type::INT4, bytes)?),
987            DataType::Int64 => Self::Int64(i64::from_sql(&Type::INT8, bytes)?),
988            DataType::Serial => Self::Serial(Serial::from(i64::from_sql(&Type::INT8, bytes)?)),
989            DataType::Float32 => Self::Float32(f32::from_sql(&Type::FLOAT4, bytes)?.into()),
990            DataType::Float64 => Self::Float64(f64::from_sql(&Type::FLOAT8, bytes)?.into()),
991            DataType::Decimal => {
992                Self::Decimal(rust_decimal::Decimal::from_sql(&Type::NUMERIC, bytes)?.into())
993            }
994            DataType::Date => Self::Date(chrono::NaiveDate::from_sql(&Type::DATE, bytes)?.into()),
995            DataType::Time => Self::Time(chrono::NaiveTime::from_sql(&Type::TIME, bytes)?.into()),
996            DataType::Timestamp => {
997                Self::Timestamp(chrono::NaiveDateTime::from_sql(&Type::TIMESTAMP, bytes)?.into())
998            }
999            DataType::Timestamptz => Self::Timestamptz(
1000                chrono::DateTime::<chrono::Utc>::from_sql(&Type::TIMESTAMPTZ, bytes)?.into(),
1001            ),
1002            DataType::Interval => Self::Interval(Interval::from_sql(&Type::INTERVAL, bytes)?),
1003            DataType::Jsonb => Self::Jsonb(
1004                JsonbVal::value_deserialize(bytes)
1005                    .ok_or_else(|| "invalid value of Jsonb".to_owned())?,
1006            ),
1007            DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
1008            DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
1009                return Err(format!("unsupported data type: {}", data_type).into());
1010            }
1011        };
1012        Ok(res)
1013    }
1014
1015    /// Creates a scalar from pgwire "TEXT" format.
1016    ///
1017    /// The counterpart of [`ToText`].
1018    pub fn from_text(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1019        Ok(match data_type {
1020            DataType::Boolean => str_to_bool(s)?.into(),
1021            DataType::Int16 => i16::from_str(s)?.into(),
1022            DataType::Int32 => i32::from_str(s)?.into(),
1023            DataType::Int64 => i64::from_str(s)?.into(),
1024            DataType::Int256 => Int256::from_str(s)?.into(),
1025            DataType::Serial => Serial::from(i64::from_str(s)?).into(),
1026            DataType::Decimal => Decimal::from_str(s)?.into(),
1027            DataType::Float32 => F32::from_str(s)?.into(),
1028            DataType::Float64 => F64::from_str(s)?.into(),
1029            DataType::Varchar => s.into(),
1030            DataType::Date => Date::from_str(s)?.into(),
1031            DataType::Timestamp => Timestamp::from_str(s)?.into(),
1032            // We only handle the case with timezone here, and leave the implicit session timezone case
1033            // for later phase.
1034            DataType::Timestamptz => Timestamptz::from_str(s)?.into(),
1035            DataType::Time => Time::from_str(s)?.into(),
1036            DataType::Interval => Interval::from_str(s)?.into(),
1037            DataType::List(_) => ListValue::from_str(s, data_type)?.into(),
1038            DataType::Struct(st) => StructValue::from_str(s, st)?.into(),
1039            DataType::Jsonb => JsonbVal::from_str(s)?.into(),
1040            DataType::Bytea => str_to_bytea(s)?.into(),
1041            DataType::Map(_m) => return Err("map from text is not supported".into()),
1042        })
1043    }
1044
1045    pub fn from_text_for_test(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1046        Ok(match data_type {
1047            DataType::Map(map_type) => MapValue::from_str_for_test(s, map_type)?.into(),
1048            _ => ScalarImpl::from_text(s, data_type)?,
1049        })
1050    }
1051}
1052
1053impl From<ScalarRefImpl<'_>> for ScalarImpl {
1054    fn from(scalar_ref: ScalarRefImpl<'_>) -> Self {
1055        scalar_ref.into_scalar_impl()
1056    }
1057}
1058
1059impl<'a> From<&'a ScalarImpl> for ScalarRefImpl<'a> {
1060    fn from(scalar: &'a ScalarImpl) -> Self {
1061        scalar.as_scalar_ref_impl()
1062    }
1063}
1064
1065impl ScalarImpl {
1066    /// Converts [`ScalarImpl`] to [`ScalarRefImpl`]
1067    pub fn as_scalar_ref_impl(&self) -> ScalarRefImpl<'_> {
1068        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().into() })
1069    }
1070}
1071
1072impl ScalarRefImpl<'_> {
1073    /// Converts [`ScalarRefImpl`] to [`ScalarImpl`]
1074    pub fn into_scalar_impl(self) -> ScalarImpl {
1075        dispatch_scalar_ref_variants!(self, inner, { inner.to_owned_scalar().into() })
1076    }
1077}
1078
1079impl Hash for ScalarImpl {
1080    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1081        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().hash_scalar(state) })
1082    }
1083}
1084
1085impl Hash for ScalarRefImpl<'_> {
1086    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1087        dispatch_scalar_ref_variants!(self, inner, { inner.hash_scalar(state) })
1088    }
1089}
1090
1091/// Feeds the raw scalar reference of `datum` to the given `state`, which should behave the same
1092/// as [`crate::array::Array::hash_at`], where NULL value will be carefully handled.
1093///
1094/// **FIXME**: the result of this function might be different from [`std::hash::Hash`] due to the
1095/// type alias of `DatumRef = Option<_>`, we should manually implement [`std::hash::Hash`] for
1096/// [`DatumRef`] in the future when it becomes a newtype. (#477)
1097#[inline(always)]
1098pub fn hash_datum(datum: impl ToDatumRef, state: &mut impl std::hash::Hasher) {
1099    match datum.to_datum_ref() {
1100        Some(scalar_ref) => scalar_ref.hash(state),
1101        None => NULL_VAL_FOR_HASH.hash(state),
1102    }
1103}
1104
1105impl ScalarRefImpl<'_> {
1106    pub fn binary_format(&self, data_type: &DataType) -> to_binary::Result<Bytes> {
1107        use self::to_binary::ToBinary;
1108        self.to_binary_with_type(data_type)
1109    }
1110
1111    pub fn text_format(&self, data_type: &DataType) -> String {
1112        self.to_text_with_type(data_type)
1113    }
1114
1115    /// Serialize the scalar into the `memcomparable` format.
1116    pub fn serialize(
1117        &self,
1118        ser: &mut memcomparable::Serializer<impl BufMut>,
1119    ) -> memcomparable::Result<()> {
1120        match self {
1121            Self::Int16(v) => v.serialize(ser)?,
1122            Self::Int32(v) => v.serialize(ser)?,
1123            Self::Int64(v) => v.serialize(ser)?,
1124            Self::Serial(v) => v.serialize(ser)?,
1125            Self::Float32(v) => v.serialize(ser)?,
1126            Self::Float64(v) => v.serialize(ser)?,
1127            Self::Utf8(v) => v.serialize(ser)?,
1128            Self::Bytea(v) => ser.serialize_bytes(v)?,
1129            Self::Bool(v) => v.serialize(ser)?,
1130            Self::Decimal(v) => ser.serialize_decimal((*v).into())?,
1131            Self::Interval(v) => v.serialize(ser)?,
1132            Self::Date(v) => v.0.num_days_from_ce().serialize(ser)?,
1133            Self::Timestamp(v) => {
1134                v.0.and_utc().timestamp().serialize(&mut *ser)?;
1135                v.0.and_utc().timestamp_subsec_nanos().serialize(ser)?;
1136            }
1137            Self::Timestamptz(v) => v.serialize(ser)?,
1138            Self::Time(v) => {
1139                v.0.num_seconds_from_midnight().serialize(&mut *ser)?;
1140                v.0.nanosecond().serialize(ser)?;
1141            }
1142            Self::Int256(v) => v.memcmp_serialize(ser)?,
1143            Self::Jsonb(v) => v.memcmp_serialize(ser)?,
1144            Self::Struct(v) => v.memcmp_serialize(ser)?,
1145            Self::List(v) => v.memcmp_serialize(ser)?,
1146            Self::Map(v) => v.memcmp_serialize(ser)?,
1147        };
1148        Ok(())
1149    }
1150}
1151
1152impl ScalarImpl {
1153    /// Serialize the scalar into the `memcomparable` format.
1154    pub fn serialize(
1155        &self,
1156        ser: &mut memcomparable::Serializer<impl BufMut>,
1157    ) -> memcomparable::Result<()> {
1158        self.as_scalar_ref_impl().serialize(ser)
1159    }
1160
1161    /// Deserialize the scalar from the `memcomparable` format.
1162    pub fn deserialize(
1163        ty: &DataType,
1164        de: &mut memcomparable::Deserializer<impl Buf>,
1165    ) -> memcomparable::Result<Self> {
1166        use DataType as Ty;
1167        Ok(match ty {
1168            Ty::Int16 => Self::Int16(i16::deserialize(de)?),
1169            Ty::Int32 => Self::Int32(i32::deserialize(de)?),
1170            Ty::Int64 => Self::Int64(i64::deserialize(de)?),
1171            Ty::Int256 => Self::Int256(Int256::memcmp_deserialize(de)?),
1172            Ty::Serial => Self::Serial(Serial::from(i64::deserialize(de)?)),
1173            Ty::Float32 => Self::Float32(f32::deserialize(de)?.into()),
1174            Ty::Float64 => Self::Float64(f64::deserialize(de)?.into()),
1175            Ty::Varchar => Self::Utf8(Box::<str>::deserialize(de)?),
1176            Ty::Bytea => Self::Bytea(serde_bytes::ByteBuf::deserialize(de)?.into_vec().into()),
1177            Ty::Boolean => Self::Bool(bool::deserialize(de)?),
1178            Ty::Decimal => Self::Decimal(de.deserialize_decimal()?.into()),
1179            Ty::Interval => Self::Interval(Interval::deserialize(de)?),
1180            Ty::Time => Self::Time({
1181                let secs = u32::deserialize(&mut *de)?;
1182                let nano = u32::deserialize(de)?;
1183                Time::with_secs_nano(secs, nano)
1184                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1185            }),
1186            Ty::Timestamp => Self::Timestamp({
1187                let secs = i64::deserialize(&mut *de)?;
1188                let nsecs = u32::deserialize(de)?;
1189                Timestamp::with_secs_nsecs(secs, nsecs)
1190                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1191            }),
1192            Ty::Timestamptz => Self::Timestamptz(Timestamptz::deserialize(de)?),
1193            Ty::Date => Self::Date({
1194                let days = i32::deserialize(de)?;
1195                Date::with_days_since_ce(days)
1196                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1197            }),
1198            Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
1199            Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
1200            Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1201            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1202        })
1203    }
1204
1205    pub fn as_integral(&self) -> i64 {
1206        match self {
1207            Self::Int16(v) => *v as i64,
1208            Self::Int32(v) => *v as i64,
1209            Self::Int64(v) => *v,
1210            _ => panic!(
1211                "Can't convert ScalarImpl::{} to a integral",
1212                self.get_ident()
1213            ),
1214        }
1215    }
1216}
1217
1218/// Returns whether the `literal` matches the `data_type`.
1219pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
1220    match literal {
1221        Some(scalar) => {
1222            macro_rules! matches {
1223                ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty }),*) => {
1224                    match (data_type, scalar) {
1225                        $(
1226                            (DataType::$data_type { .. }, ScalarImpl::$variant_name(_)) => true,
1227                            (DataType::$data_type { .. }, _) => false, // so that we won't forget to match a new logical type
1228                        )*
1229                    }
1230                }
1231            }
1232            for_all_variants! { matches }
1233        }
1234        None => true,
1235    }
1236}
1237
1238#[cfg(test)]
1239mod tests {
1240    use std::hash::{BuildHasher, Hasher};
1241
1242    use strum::IntoEnumIterator;
1243
1244    use super::*;
1245    use crate::util::hash_util::Crc32FastBuilder;
1246
1247    #[test]
1248    fn test_size() {
1249        use static_assertions::const_assert_eq;
1250
1251        use crate::array::*;
1252
1253        macro_rules! assert_item_size_eq {
1254            ($array:ty, $size:literal) => {
1255                const_assert_eq!(std::mem::size_of::<<$array as Array>::OwnedItem>(), $size);
1256            };
1257        }
1258
1259        assert_item_size_eq!(StructArray, 16); // Box<[Datum]>
1260        assert_item_size_eq!(ListArray, 8); // Box<ArrayImpl>
1261        assert_item_size_eq!(Utf8Array, 16); // Box<str>
1262        assert_item_size_eq!(IntervalArray, 16);
1263        assert_item_size_eq!(TimestampArray, 12);
1264
1265        // TODO: try to reduce the memory usage of `Decimal`, `ScalarImpl` and `Datum`.
1266        assert_item_size_eq!(DecimalArray, 20);
1267
1268        const_assert_eq!(std::mem::size_of::<ScalarImpl>(), 24);
1269        const_assert_eq!(std::mem::size_of::<ScalarRefImpl<'_>>(), 24);
1270        const_assert_eq!(std::mem::size_of::<Datum>(), 24);
1271        const_assert_eq!(std::mem::size_of::<StructType>(), 8);
1272        const_assert_eq!(std::mem::size_of::<DataType>(), 16);
1273    }
1274
1275    #[test]
1276    fn test_data_type_display() {
1277        let d: DataType =
1278            StructType::new(vec![("i", DataType::Int32), ("j", DataType::Varchar)]).into();
1279        assert_eq!(
1280            format!("{}", d),
1281            "struct<i integer, j character varying>".to_owned()
1282        );
1283    }
1284
1285    #[test]
1286    fn test_hash_implementation() {
1287        fn test(datum: Datum, data_type: DataType) {
1288            assert!(literal_type_match(&data_type, datum.as_ref()));
1289
1290            let mut builder = data_type.create_array_builder(6);
1291            for _ in 0..3 {
1292                builder.append_null();
1293                builder.append(&datum);
1294            }
1295            let array = builder.finish();
1296
1297            let hash_from_array = {
1298                let mut state = Crc32FastBuilder.build_hasher();
1299                array.hash_at(3, &mut state);
1300                state.finish()
1301            };
1302
1303            let hash_from_datum = {
1304                let mut state = Crc32FastBuilder.build_hasher();
1305                hash_datum(&datum, &mut state);
1306                state.finish()
1307            };
1308
1309            let hash_from_datum_ref = {
1310                let mut state = Crc32FastBuilder.build_hasher();
1311                hash_datum(datum.to_datum_ref(), &mut state);
1312                state.finish()
1313            };
1314
1315            assert_eq!(hash_from_array, hash_from_datum);
1316            assert_eq!(hash_from_datum, hash_from_datum_ref);
1317        }
1318
1319        for name in DataTypeName::iter() {
1320            let (scalar, data_type) = match name {
1321                DataTypeName::Boolean => (ScalarImpl::Bool(true), DataType::Boolean),
1322                DataTypeName::Int16 => (ScalarImpl::Int16(233), DataType::Int16),
1323                DataTypeName::Int32 => (ScalarImpl::Int32(233333), DataType::Int32),
1324                DataTypeName::Int64 => (ScalarImpl::Int64(233333333333), DataType::Int64),
1325                DataTypeName::Int256 => (
1326                    ScalarImpl::Int256(233333333333_i64.into()),
1327                    DataType::Int256,
1328                ),
1329                DataTypeName::Serial => (ScalarImpl::Serial(233333333333.into()), DataType::Serial),
1330                DataTypeName::Float32 => (ScalarImpl::Float32(23.33.into()), DataType::Float32),
1331                DataTypeName::Float64 => (
1332                    ScalarImpl::Float64(23.333333333333.into()),
1333                    DataType::Float64,
1334                ),
1335                DataTypeName::Decimal => (
1336                    ScalarImpl::Decimal("233.33".parse().unwrap()),
1337                    DataType::Decimal,
1338                ),
1339                DataTypeName::Date => (
1340                    ScalarImpl::Date(Date::from_ymd_uncheck(2333, 3, 3)),
1341                    DataType::Date,
1342                ),
1343                DataTypeName::Varchar => (ScalarImpl::Utf8("233".into()), DataType::Varchar),
1344                DataTypeName::Bytea => (
1345                    ScalarImpl::Bytea("\\x233".as_bytes().into()),
1346                    DataType::Bytea,
1347                ),
1348                DataTypeName::Time => (
1349                    ScalarImpl::Time(Time::from_hms_uncheck(2, 3, 3)),
1350                    DataType::Time,
1351                ),
1352                DataTypeName::Timestamp => (
1353                    ScalarImpl::Timestamp(Timestamp::from_timestamp_uncheck(23333333, 2333)),
1354                    DataType::Timestamp,
1355                ),
1356                DataTypeName::Timestamptz => (
1357                    ScalarImpl::Timestamptz(Timestamptz::from_micros(233333333)),
1358                    DataType::Timestamptz,
1359                ),
1360                DataTypeName::Interval => (
1361                    ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
1362                    DataType::Interval,
1363                ),
1364                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
1365                DataTypeName::Struct => (
1366                    ScalarImpl::Struct(StructValue::new(vec![
1367                        ScalarImpl::Int64(233).into(),
1368                        ScalarImpl::Float64(23.33.into()).into(),
1369                    ])),
1370                    DataType::Struct(StructType::new(vec![
1371                        ("a", DataType::Int64),
1372                        ("b", DataType::Float64),
1373                    ])),
1374                ),
1375                DataTypeName::List => (
1376                    ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
1377                    DataType::List(Box::new(DataType::Int64)),
1378                ),
1379                DataTypeName::Map => {
1380                    // map is not hashable
1381                    continue;
1382                }
1383            };
1384
1385            test(Some(scalar), data_type.clone());
1386            test(None, data_type);
1387        }
1388    }
1389
1390    #[test]
1391    fn test_data_type_from_str() {
1392        assert_eq!(DataType::from_str("bool").unwrap(), DataType::Boolean);
1393        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
1394        assert_eq!(DataType::from_str("BOOL").unwrap(), DataType::Boolean);
1395        assert_eq!(DataType::from_str("BOOLEAN").unwrap(), DataType::Boolean);
1396
1397        assert_eq!(DataType::from_str("int2").unwrap(), DataType::Int16);
1398        assert_eq!(DataType::from_str("smallint").unwrap(), DataType::Int16);
1399        assert_eq!(DataType::from_str("INT2").unwrap(), DataType::Int16);
1400        assert_eq!(DataType::from_str("SMALLINT").unwrap(), DataType::Int16);
1401
1402        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1403        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Int32);
1404        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1405        assert_eq!(DataType::from_str("INT4").unwrap(), DataType::Int32);
1406        assert_eq!(DataType::from_str("INTEGER").unwrap(), DataType::Int32);
1407        assert_eq!(DataType::from_str("INT").unwrap(), DataType::Int32);
1408
1409        assert_eq!(DataType::from_str("int8").unwrap(), DataType::Int64);
1410        assert_eq!(DataType::from_str("bigint").unwrap(), DataType::Int64);
1411        assert_eq!(DataType::from_str("INT8").unwrap(), DataType::Int64);
1412        assert_eq!(DataType::from_str("BIGINT").unwrap(), DataType::Int64);
1413
1414        assert_eq!(DataType::from_str("rw_int256").unwrap(), DataType::Int256);
1415        assert_eq!(DataType::from_str("RW_INT256").unwrap(), DataType::Int256);
1416
1417        assert_eq!(DataType::from_str("float4").unwrap(), DataType::Float32);
1418        assert_eq!(DataType::from_str("real").unwrap(), DataType::Float32);
1419        assert_eq!(DataType::from_str("FLOAT4").unwrap(), DataType::Float32);
1420        assert_eq!(DataType::from_str("REAL").unwrap(), DataType::Float32);
1421
1422        assert_eq!(DataType::from_str("float8").unwrap(), DataType::Float64);
1423        assert_eq!(
1424            DataType::from_str("double precision").unwrap(),
1425            DataType::Float64
1426        );
1427        assert_eq!(DataType::from_str("FLOAT8").unwrap(), DataType::Float64);
1428        assert_eq!(
1429            DataType::from_str("DOUBLE PRECISION").unwrap(),
1430            DataType::Float64
1431        );
1432
1433        assert_eq!(DataType::from_str("decimal").unwrap(), DataType::Decimal);
1434        assert_eq!(DataType::from_str("DECIMAL").unwrap(), DataType::Decimal);
1435        assert_eq!(DataType::from_str("numeric").unwrap(), DataType::Decimal);
1436        assert_eq!(DataType::from_str("NUMERIC").unwrap(), DataType::Decimal);
1437
1438        assert_eq!(DataType::from_str("date").unwrap(), DataType::Date);
1439        assert_eq!(DataType::from_str("DATE").unwrap(), DataType::Date);
1440
1441        assert_eq!(DataType::from_str("varchar").unwrap(), DataType::Varchar);
1442        assert_eq!(DataType::from_str("VARCHAR").unwrap(), DataType::Varchar);
1443
1444        assert_eq!(DataType::from_str("time").unwrap(), DataType::Time);
1445        assert_eq!(
1446            DataType::from_str("time without time zone").unwrap(),
1447            DataType::Time
1448        );
1449        assert_eq!(DataType::from_str("TIME").unwrap(), DataType::Time);
1450        assert_eq!(
1451            DataType::from_str("TIME WITHOUT TIME ZONE").unwrap(),
1452            DataType::Time
1453        );
1454
1455        assert_eq!(
1456            DataType::from_str("timestamp").unwrap(),
1457            DataType::Timestamp
1458        );
1459        assert_eq!(
1460            DataType::from_str("timestamp without time zone").unwrap(),
1461            DataType::Timestamp
1462        );
1463        assert_eq!(
1464            DataType::from_str("TIMESTAMP").unwrap(),
1465            DataType::Timestamp
1466        );
1467        assert_eq!(
1468            DataType::from_str("TIMESTAMP WITHOUT TIME ZONE").unwrap(),
1469            DataType::Timestamp
1470        );
1471
1472        assert_eq!(
1473            DataType::from_str("timestamptz").unwrap(),
1474            DataType::Timestamptz
1475        );
1476        assert_eq!(
1477            DataType::from_str("timestamp with time zone").unwrap(),
1478            DataType::Timestamptz
1479        );
1480        assert_eq!(
1481            DataType::from_str("TIMESTAMPTZ").unwrap(),
1482            DataType::Timestamptz
1483        );
1484        assert_eq!(
1485            DataType::from_str("TIMESTAMP WITH TIME ZONE").unwrap(),
1486            DataType::Timestamptz
1487        );
1488
1489        assert_eq!(DataType::from_str("interval").unwrap(), DataType::Interval);
1490        assert_eq!(DataType::from_str("INTERVAL").unwrap(), DataType::Interval);
1491
1492        assert_eq!(
1493            DataType::from_str("int2[]").unwrap(),
1494            DataType::List(Box::new(DataType::Int16))
1495        );
1496        assert_eq!(
1497            DataType::from_str("int[]").unwrap(),
1498            DataType::List(Box::new(DataType::Int32))
1499        );
1500        assert_eq!(
1501            DataType::from_str("int8[]").unwrap(),
1502            DataType::List(Box::new(DataType::Int64))
1503        );
1504        assert_eq!(
1505            DataType::from_str("float4[]").unwrap(),
1506            DataType::List(Box::new(DataType::Float32))
1507        );
1508        assert_eq!(
1509            DataType::from_str("float8[]").unwrap(),
1510            DataType::List(Box::new(DataType::Float64))
1511        );
1512        assert_eq!(
1513            DataType::from_str("decimal[]").unwrap(),
1514            DataType::List(Box::new(DataType::Decimal))
1515        );
1516        assert_eq!(
1517            DataType::from_str("varchar[]").unwrap(),
1518            DataType::List(Box::new(DataType::Varchar))
1519        );
1520        assert_eq!(
1521            DataType::from_str("date[]").unwrap(),
1522            DataType::List(Box::new(DataType::Date))
1523        );
1524        assert_eq!(
1525            DataType::from_str("time[]").unwrap(),
1526            DataType::List(Box::new(DataType::Time))
1527        );
1528        assert_eq!(
1529            DataType::from_str("timestamp[]").unwrap(),
1530            DataType::List(Box::new(DataType::Timestamp))
1531        );
1532        assert_eq!(
1533            DataType::from_str("timestamptz[]").unwrap(),
1534            DataType::List(Box::new(DataType::Timestamptz))
1535        );
1536        assert_eq!(
1537            DataType::from_str("interval[]").unwrap(),
1538            DataType::List(Box::new(DataType::Interval))
1539        );
1540
1541        assert_eq!(
1542            DataType::from_str("record").unwrap(),
1543            DataType::Struct(StructType::unnamed(vec![]))
1544        );
1545        assert_eq!(
1546            DataType::from_str("struct<a int4, b varchar>").unwrap(),
1547            DataType::Struct(StructType::new(vec![
1548                ("a", DataType::Int32),
1549                ("b", DataType::Varchar)
1550            ]))
1551        );
1552    }
1553
1554    #[test]
1555    fn test_can_alter() {
1556        let cannots = [
1557            (DataType::Int32, None),
1558            (DataType::List(DataType::Int32.into()), None),
1559            (
1560                MapType::from_kv(DataType::Varchar, DataType::List(DataType::Int32.into())).into(),
1561                None,
1562            ),
1563            (
1564                StructType::new([("a", DataType::Int32)]).into(),
1565                Some(false),
1566            ),
1567            (
1568                MapType::from_kv(
1569                    DataType::Varchar,
1570                    StructType::new([("a", DataType::Int32)]).into(),
1571                )
1572                .into(),
1573                Some(false),
1574            ),
1575        ];
1576        for (cannot, why) in cannots {
1577            assert_eq!(cannot.can_alter(), why, "{cannot:?}");
1578        }
1579
1580        let cans = [
1581            StructType::new([
1582                ("a", DataType::Int32),
1583                ("b", DataType::List(DataType::Int32.into())),
1584            ])
1585            .with_ids([ColumnId::new(1), ColumnId::new(2)])
1586            .into(),
1587            DataType::List(Box::new(DataType::Struct(
1588                StructType::new([("a", DataType::Int32)]).with_ids([ColumnId::new(1)]),
1589            ))),
1590            MapType::from_kv(
1591                DataType::Varchar,
1592                StructType::new([("a", DataType::Int32)])
1593                    .with_ids([ColumnId::new(1)])
1594                    .into(),
1595            )
1596            .into(),
1597        ];
1598        for can in cans {
1599            assert_eq!(can.can_alter(), Some(true), "{can:?}");
1600        }
1601    }
1602}