risingwave_common/types/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Data types in RisingWave.
16
17// NOTE: When adding or modifying data types, remember to update the type matrix in
18// src/expr/macro/src/types.rs
19
20use std::fmt::Debug;
21use std::hash::Hash;
22use std::str::FromStr;
23
24use bytes::{Buf, BufMut, Bytes};
25use chrono::{Datelike, Timelike};
26use itertools::Itertools;
27use parse_display::{Display, FromStr};
28use paste::paste;
29use postgres_types::{FromSql, IsNull, ToSql, Type};
30use risingwave_common_estimate_size::{EstimateSize, ZeroHeapSize};
31use risingwave_pb::data::PbDataType;
32use risingwave_pb::data::data_type::PbTypeName;
33use rw_iter_util::ZipEqFast as _;
34use serde::{Deserialize, Serialize, Serializer};
35use strum_macros::EnumDiscriminants;
36use thiserror_ext::AsReport;
37
38use crate::array::{
39    ArrayBuilderImpl, ArrayError, ArrayResult, NULL_VAL_FOR_HASH, PrimitiveArrayItemType,
40};
41// Complex type's value is based on the array
42pub use crate::array::{
43    ListRef, ListValue, MapRef, MapValue, StructRef, StructValue, VectorRef, VectorVal,
44};
45use crate::cast::{str_to_bool, str_to_bytea};
46use crate::catalog::ColumnId;
47use crate::error::BoxedError;
48use crate::{
49    dispatch_data_types, dispatch_scalar_ref_variants, dispatch_scalar_variants, for_all_variants,
50};
51
52mod cow;
53mod datetime;
54mod decimal;
55mod fields;
56mod from_sql;
57mod interval;
58mod jsonb;
59mod macros;
60mod map_type;
61mod native_type;
62mod num256;
63mod ops;
64mod ordered;
65mod ordered_float;
66pub mod postgres_type;
67mod scalar_impl;
68mod sentinel;
69mod serial;
70mod struct_type;
71mod successor;
72mod timestamptz;
73mod to_binary;
74mod to_sql;
75mod to_text;
76mod with_data_type;
77
78pub use fields::Fields;
79pub use risingwave_fields_derive::Fields;
80
81pub use self::cow::DatumCow;
82pub use self::datetime::{Date, Time, Timestamp};
83pub use self::decimal::{Decimal, PowError as DecimalPowError};
84pub use self::interval::{DateTimeField, Interval, IntervalDisplay, test_utils};
85pub use self::jsonb::{JsonbRef, JsonbVal};
86pub use self::map_type::MapType;
87pub use self::native_type::*;
88pub use self::num256::{Int256, Int256Ref};
89pub use self::ops::{CheckedAdd, IsNegative};
90pub use self::ordered::*;
91pub use self::ordered_float::{FloatExt, IntoOrdered};
92pub use self::scalar_impl::*;
93pub use self::sentinel::Sentinelled;
94pub use self::serial::Serial;
95pub use self::struct_type::StructType;
96pub use self::successor::Successor;
97pub use self::timestamptz::*;
98pub use self::to_text::ToText;
99pub use self::with_data_type::WithDataType;
100
101/// A 32-bit floating point type with total order.
102pub type F32 = ordered_float::OrderedFloat<f32>;
103
104/// A 64-bit floating point type with total order.
105pub type F64 = ordered_float::OrderedFloat<f64>;
106
107pub const DEBEZIUM_UNAVAILABLE_VALUE: &str = "__debezium_unavailable_value";
108
109// Pre-built JSON value for Debezium unavailable value to avoid rebuilding it every time
110pub static DEBEZIUM_UNAVAILABLE_JSON: std::sync::LazyLock<JsonbVal> =
111    std::sync::LazyLock::new(|| {
112        let mut builder = jsonbb::Builder::default();
113        builder.add_string(DEBEZIUM_UNAVAILABLE_VALUE);
114        JsonbVal(builder.finish())
115    });
116
117/// The set of datatypes that are supported in RisingWave.
118///
119/// # Trait implementations
120///
121/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
122///   but without data fields.
123/// - `FromStr` is only used internally for tests.
124///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
125#[derive(Debug, Display, Clone, PartialEq, Eq, Hash, EnumDiscriminants, FromStr)]
126#[strum_discriminants(derive(Hash, Ord, PartialOrd))]
127#[strum_discriminants(name(DataTypeName))]
128#[strum_discriminants(vis(pub))]
129#[cfg_attr(test, strum_discriminants(derive(strum_macros::EnumIter)))]
130pub enum DataType {
131    #[display("boolean")]
132    #[from_str(regex = "(?i)^bool$|^boolean$")]
133    Boolean,
134    #[display("smallint")]
135    #[from_str(regex = "(?i)^smallint$|^int2$")]
136    Int16,
137    #[display("integer")]
138    #[from_str(regex = "(?i)^integer$|^int$|^int4$")]
139    Int32,
140    #[display("bigint")]
141    #[from_str(regex = "(?i)^bigint$|^int8$")]
142    Int64,
143    #[display("real")]
144    #[from_str(regex = "(?i)^real$|^float4$")]
145    Float32,
146    #[display("double precision")]
147    #[from_str(regex = "(?i)^double precision$|^float8$")]
148    Float64,
149    #[display("numeric")]
150    #[from_str(regex = "(?i)^numeric$|^decimal$")]
151    Decimal,
152    #[display("date")]
153    #[from_str(regex = "(?i)^date$")]
154    Date,
155    #[display("character varying")]
156    #[from_str(regex = "(?i)^character varying$|^varchar$")]
157    Varchar,
158    #[display("time without time zone")]
159    #[from_str(regex = "(?i)^time$|^time without time zone$")]
160    Time,
161    #[display("timestamp without time zone")]
162    #[from_str(regex = "(?i)^timestamp$|^timestamp without time zone$")]
163    Timestamp,
164    #[display("timestamp with time zone")]
165    #[from_str(regex = "(?i)^timestamptz$|^timestamp with time zone$")]
166    Timestamptz,
167    #[display("interval")]
168    #[from_str(regex = "(?i)^interval$")]
169    Interval,
170    #[display("{0}")]
171    #[from_str(regex = "(?i)^(?P<0>.+)$")]
172    Struct(StructType),
173    #[display("{0}[]")]
174    #[from_str(regex = r"(?i)^(?P<0>.+)\[\]$")]
175    List(Box<DataType>),
176    #[display("bytea")]
177    #[from_str(regex = "(?i)^bytea$")]
178    Bytea,
179    #[display("jsonb")]
180    #[from_str(regex = "(?i)^jsonb$")]
181    Jsonb,
182    #[display("serial")]
183    #[from_str(regex = "(?i)^serial$")]
184    Serial,
185    #[display("rw_int256")]
186    #[from_str(regex = "(?i)^rw_int256$")]
187    Int256,
188    #[display("{0}")]
189    #[from_str(regex = "(?i)^(?P<0>.+)$")]
190    Map(MapType),
191    #[display("vector({0})")]
192    #[from_str(regex = "(?i)^vector\\((?P<0>.+)\\)$")]
193    Vector(usize),
194}
195
196impl !PartialOrd for DataType {}
197
198// For DataType::List
199impl std::str::FromStr for Box<DataType> {
200    type Err = BoxedError;
201
202    fn from_str(s: &str) -> Result<Self, Self::Err> {
203        Ok(Box::new(DataType::from_str(s)?))
204    }
205}
206
207impl ZeroHeapSize for DataType {}
208
209impl TryFrom<DataTypeName> for DataType {
210    type Error = &'static str;
211
212    fn try_from(type_name: DataTypeName) -> Result<Self, Self::Error> {
213        match type_name {
214            DataTypeName::Boolean => Ok(DataType::Boolean),
215            DataTypeName::Int16 => Ok(DataType::Int16),
216            DataTypeName::Int32 => Ok(DataType::Int32),
217            DataTypeName::Int64 => Ok(DataType::Int64),
218            DataTypeName::Int256 => Ok(DataType::Int256),
219            DataTypeName::Serial => Ok(DataType::Serial),
220            DataTypeName::Decimal => Ok(DataType::Decimal),
221            DataTypeName::Float32 => Ok(DataType::Float32),
222            DataTypeName::Float64 => Ok(DataType::Float64),
223            DataTypeName::Varchar => Ok(DataType::Varchar),
224            DataTypeName::Bytea => Ok(DataType::Bytea),
225            DataTypeName::Date => Ok(DataType::Date),
226            DataTypeName::Timestamp => Ok(DataType::Timestamp),
227            DataTypeName::Timestamptz => Ok(DataType::Timestamptz),
228            DataTypeName::Time => Ok(DataType::Time),
229            DataTypeName::Interval => Ok(DataType::Interval),
230            DataTypeName::Jsonb => Ok(DataType::Jsonb),
231            DataTypeName::Struct
232            | DataTypeName::List
233            | DataTypeName::Map
234            | DataTypeName::Vector => Err(
235                "Functions returning parameterized types can not be inferred. Please use `FunctionCall::new_unchecked`.",
236            ),
237        }
238    }
239}
240
241impl From<&PbDataType> for DataType {
242    fn from(proto: &PbDataType) -> DataType {
243        match proto.get_type_name().expect("missing type field") {
244            PbTypeName::TypeUnspecified => unreachable!(),
245            PbTypeName::Int16 => DataType::Int16,
246            PbTypeName::Int32 => DataType::Int32,
247            PbTypeName::Int64 => DataType::Int64,
248            PbTypeName::Serial => DataType::Serial,
249            PbTypeName::Float => DataType::Float32,
250            PbTypeName::Double => DataType::Float64,
251            PbTypeName::Boolean => DataType::Boolean,
252            PbTypeName::Varchar => DataType::Varchar,
253            PbTypeName::Date => DataType::Date,
254            PbTypeName::Time => DataType::Time,
255            PbTypeName::Timestamp => DataType::Timestamp,
256            PbTypeName::Timestamptz => DataType::Timestamptz,
257            PbTypeName::Decimal => DataType::Decimal,
258            PbTypeName::Interval => DataType::Interval,
259            PbTypeName::Bytea => DataType::Bytea,
260            PbTypeName::Jsonb => DataType::Jsonb,
261            PbTypeName::Struct => {
262                let fields: Vec<DataType> = proto.field_type.iter().map(|f| f.into()).collect_vec();
263                let field_names: Vec<String> = proto.field_names.iter().cloned().collect_vec();
264                let field_ids = (proto.field_ids.iter().copied())
265                    .map(ColumnId::new)
266                    .collect_vec();
267
268                let mut struct_type = if proto.field_names.is_empty() {
269                    StructType::unnamed(fields)
270                } else {
271                    StructType::new(field_names.into_iter().zip_eq_fast(fields))
272                };
273                // `field_ids` is used for nested-schema evolution. Cases when `field_ids` is empty:
274                //
275                // 1. The data type is not associated with a table column, so we don't need to set it.
276                // 2. The column is created before nested-schema evolution is supported, thus is using
277                //    the old serialization format and does not have field ids.
278                // 3. This is an empty struct, which is always considered alterable, and setting ids
279                //    is a no-op.
280                if !field_ids.is_empty() {
281                    struct_type = struct_type.with_ids(field_ids);
282                }
283                struct_type.into()
284            }
285            PbTypeName::List => DataType::List(
286                // The first (and only) item is the list element type.
287                Box::new((&proto.field_type[0]).into()),
288            ),
289            PbTypeName::Map => {
290                // Map is physically the same as a list.
291                // So the first (and only) item is the list element type.
292                let list_entries_type: DataType = (&proto.field_type[0]).into();
293                DataType::Map(MapType::from_entries(list_entries_type))
294            }
295            PbTypeName::Vector => DataType::Vector(proto.precision as _),
296            PbTypeName::Int256 => DataType::Int256,
297        }
298    }
299}
300
301impl From<PbDataType> for DataType {
302    fn from(proto: PbDataType) -> DataType {
303        DataType::from(&proto)
304    }
305}
306
307impl From<DataTypeName> for PbTypeName {
308    fn from(type_name: DataTypeName) -> Self {
309        match type_name {
310            DataTypeName::Boolean => PbTypeName::Boolean,
311            DataTypeName::Int16 => PbTypeName::Int16,
312            DataTypeName::Int32 => PbTypeName::Int32,
313            DataTypeName::Int64 => PbTypeName::Int64,
314            DataTypeName::Serial => PbTypeName::Serial,
315            DataTypeName::Float32 => PbTypeName::Float,
316            DataTypeName::Float64 => PbTypeName::Double,
317            DataTypeName::Varchar => PbTypeName::Varchar,
318            DataTypeName::Date => PbTypeName::Date,
319            DataTypeName::Timestamp => PbTypeName::Timestamp,
320            DataTypeName::Timestamptz => PbTypeName::Timestamptz,
321            DataTypeName::Time => PbTypeName::Time,
322            DataTypeName::Interval => PbTypeName::Interval,
323            DataTypeName::Decimal => PbTypeName::Decimal,
324            DataTypeName::Bytea => PbTypeName::Bytea,
325            DataTypeName::Jsonb => PbTypeName::Jsonb,
326            DataTypeName::Struct => PbTypeName::Struct,
327            DataTypeName::List => PbTypeName::List,
328            DataTypeName::Int256 => PbTypeName::Int256,
329            DataTypeName::Map => PbTypeName::Map,
330            DataTypeName::Vector => PbTypeName::Vector,
331        }
332    }
333}
334
335/// Convenient macros to generate match arms for [`DataType`].
336pub mod data_types {
337    use super::DataType;
338
339    /// Numeric [`DataType`]s supported to be `offset` of `RANGE` frame.
340    #[macro_export]
341    macro_rules! _range_frame_numeric_data_types {
342        () => {
343            DataType::Int16
344                | DataType::Int32
345                | DataType::Int64
346                | DataType::Float32
347                | DataType::Float64
348                | DataType::Decimal
349        };
350    }
351    pub use _range_frame_numeric_data_types as range_frame_numeric;
352
353    /// Date/time [`DataType`]s supported to be `offset` of `RANGE` frame.
354    #[macro_export]
355    macro_rules! _range_frame_datetime_data_types {
356        () => {
357            DataType::Date
358                | DataType::Time
359                | DataType::Timestamp
360                | DataType::Timestamptz
361                | DataType::Interval
362        };
363    }
364    pub use _range_frame_datetime_data_types as range_frame_datetime;
365
366    /// Data types that do not have inner fields.
367    #[macro_export]
368    macro_rules! _simple_data_types {
369        () => {
370            DataType::Boolean
371                | DataType::Int16
372                | DataType::Int32
373                | DataType::Int64
374                | DataType::Float32
375                | DataType::Float64
376                | DataType::Decimal
377                | DataType::Date
378                | DataType::Varchar
379                | DataType::Time
380                | DataType::Timestamp
381                | DataType::Timestamptz
382                | DataType::Interval
383                | DataType::Bytea
384                | DataType::Jsonb
385                | DataType::Serial
386                | DataType::Int256
387                | DataType::Vector(_)
388        };
389    }
390    pub use _simple_data_types as simple;
391
392    /// Data types that have inner fields.
393    #[macro_export]
394    macro_rules! _composite_data_types {
395        () => {
396            DataType::Struct { .. } | DataType::List { .. } | DataType::Map { .. }
397        };
398    }
399    pub use _composite_data_types as composite;
400
401    /// Test that all data types are covered either by `simple!()` or `composite!()`.
402    fn _simple_composite_data_types_exhausted(dt: DataType) {
403        match dt {
404            simple!() => {}
405            composite!() => {}
406        }
407    }
408}
409
410impl DataType {
411    /// Same as pgvector; unsure how it was chosen there
412    /// <https://github.com/pgvector/pgvector/blob/v0.8.0/README.md#vector-type>
413    pub const VEC_MAX_SIZE: usize = 16000;
414
415    pub fn create_array_builder(&self, capacity: usize) -> ArrayBuilderImpl {
416        use crate::array::*;
417
418        dispatch_data_types!(self, [B = ArrayBuilder], {
419            B::with_type(capacity, self.clone()).into()
420        })
421    }
422
423    pub fn type_name(&self) -> DataTypeName {
424        DataTypeName::from(self)
425    }
426
427    pub fn prost_type_name(&self) -> PbTypeName {
428        self.type_name().into()
429    }
430
431    pub fn to_protobuf(&self) -> PbDataType {
432        let mut pb = PbDataType {
433            type_name: self.prost_type_name() as i32,
434            is_nullable: true,
435            ..Default::default()
436        };
437        match self {
438            DataType::Struct(t) => {
439                if !t.is_unnamed() {
440                    // To be consistent with `From<&PbDataType>`,
441                    // we only set field names when it's a named struct.
442                    pb.field_names = t.names().map(|s| s.into()).collect();
443                }
444                pb.field_type = t.types().map(|f| f.to_protobuf()).collect();
445                if let Some(ids) = t.ids() {
446                    pb.field_ids = ids.map(|id| id.get_id()).collect();
447                }
448            }
449            DataType::List(datatype) => {
450                pb.field_type = vec![datatype.to_protobuf()];
451            }
452            DataType::Map(datatype) => {
453                // Same as List<Struct<K,V>>
454                pb.field_type = vec![datatype.clone().into_struct().to_protobuf()];
455            }
456            DataType::Vector(size) => {
457                pb.precision = *size as _;
458            }
459            DataType::Boolean
460            | DataType::Int16
461            | DataType::Int32
462            | DataType::Int64
463            | DataType::Float32
464            | DataType::Float64
465            | DataType::Decimal
466            | DataType::Date
467            | DataType::Varchar
468            | DataType::Time
469            | DataType::Timestamp
470            | DataType::Timestamptz
471            | DataType::Interval
472            | DataType::Bytea
473            | DataType::Jsonb
474            | DataType::Serial
475            | DataType::Int256 => (),
476        }
477        pb
478    }
479
480    pub fn is_numeric(&self) -> bool {
481        matches!(
482            self,
483            DataType::Int16
484                | DataType::Int32
485                | DataType::Int64
486                | DataType::Serial
487                | DataType::Float32
488                | DataType::Float64
489                | DataType::Decimal
490        )
491    }
492
493    /// Returns whether the data type does not have inner fields.
494    pub fn is_simple(&self) -> bool {
495        matches!(self, data_types::simple!())
496    }
497
498    /// Returns whether the data type has inner fields.
499    pub fn is_composite(&self) -> bool {
500        matches!(self, data_types::composite!())
501    }
502
503    pub fn is_array(&self) -> bool {
504        matches!(self, DataType::List(_))
505    }
506
507    pub fn is_struct(&self) -> bool {
508        matches!(self, DataType::Struct(_))
509    }
510
511    pub fn is_map(&self) -> bool {
512        matches!(self, DataType::Map(_))
513    }
514
515    pub fn is_int(&self) -> bool {
516        matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
517    }
518
519    /// Returns the output type of time window function on a given input type.
520    pub fn window_of(input: &DataType) -> Option<DataType> {
521        match input {
522            DataType::Timestamptz => Some(DataType::Timestamptz),
523            DataType::Timestamp | DataType::Date => Some(DataType::Timestamp),
524            _ => None,
525        }
526    }
527
528    pub fn as_struct(&self) -> &StructType {
529        match self {
530            DataType::Struct(t) => t,
531            t => panic!("expect struct type, got {t}"),
532        }
533    }
534
535    pub fn into_struct(self) -> StructType {
536        match self {
537            DataType::Struct(t) => t,
538            t => panic!("expect struct type, got {t}"),
539        }
540    }
541
542    pub fn as_map(&self) -> &MapType {
543        match self {
544            DataType::Map(t) => t,
545            t => panic!("expect map type, got {t}"),
546        }
547    }
548
549    pub fn into_map(self) -> MapType {
550        match self {
551            DataType::Map(t) => t,
552            t => panic!("expect map type, got {t}"),
553        }
554    }
555
556    /// Returns the inner element's type of a list type.
557    ///
558    /// # Panics
559    ///
560    /// Panics if the type is not a list type.
561    pub fn as_list_element_type(&self) -> &DataType {
562        match self {
563            DataType::List(t) => t,
564            t => panic!("expect list type, got {t}"),
565        }
566    }
567
568    pub fn into_list_element_type(self) -> DataType {
569        match self {
570            DataType::List(t) => *t,
571            t => panic!("expect list type, got {t}"),
572        }
573    }
574
575    /// Return a new type that removes the outer list, and get the innermost element type.
576    ///
577    /// Use [`DataType::as_list_element_type`] if you only want the element type of a list.
578    ///
579    /// ```
580    /// use risingwave_common::types::DataType::*;
581    /// assert_eq!(List(Box::new(Int32)).unnest_list(), &Int32);
582    /// assert_eq!(List(Box::new(List(Box::new(Int32)))).unnest_list(), &Int32);
583    /// ```
584    pub fn unnest_list(&self) -> &Self {
585        match self {
586            DataType::List(inner) => inner.unnest_list(),
587            _ => self,
588        }
589    }
590
591    /// Return the number of dimensions of this array/list type. Return `0` when this type is not an
592    /// array/list.
593    pub fn array_ndims(&self) -> usize {
594        let mut d = 0;
595        let mut t = self;
596        while let Self::List(inner) = t {
597            d += 1;
598            t = inner;
599        }
600        d
601    }
602
603    /// Compares the datatype with another, ignoring nested field names and ids.
604    pub fn equals_datatype(&self, other: &DataType) -> bool {
605        match (self, other) {
606            (Self::Struct(s1), Self::Struct(s2)) => s1.equals_datatype(s2),
607            (Self::List(d1), Self::List(d2)) => d1.equals_datatype(d2),
608            (Self::Map(m1), Self::Map(m2)) => {
609                m1.key().equals_datatype(m2.key()) && m1.value().equals_datatype(m2.value())
610            }
611            _ => self == other,
612        }
613    }
614
615    /// Whether a column with this data type can be altered to a new data type. This determines
616    /// the encoding of the column data.
617    ///
618    /// Returns...
619    /// - `None`, if the data type is simple or does not contain a struct type.
620    /// - `Some(true)`, if the data type contains a struct type with field ids ([`StructType::has_ids`]).
621    /// - `Some(false)`, if the data type contains a struct type without field ids.
622    pub fn can_alter(&self) -> Option<bool> {
623        match self {
624            data_types::simple!() => None,
625            DataType::Struct(struct_type) => {
626                // As long as we meet a struct type, we can check its `ids` field to determine if
627                // it can be altered.
628                let struct_can_alter = struct_type.has_ids();
629                // In debug build, we assert that once a struct type does (or does not) have ids,
630                // all its composite fields should have the same property.
631                if cfg!(debug_assertions) {
632                    for field in struct_type.types() {
633                        if let Some(field_can_alter) = field.can_alter() {
634                            assert_eq!(struct_can_alter, field_can_alter);
635                        }
636                    }
637                }
638                Some(struct_can_alter)
639            }
640
641            DataType::List(inner_type) => inner_type.can_alter(),
642            DataType::Map(map_type) => {
643                debug_assert!(
644                    map_type.key().is_simple(),
645                    "unexpected key type of map {map_type:?}"
646                );
647                map_type.value().can_alter()
648            }
649        }
650    }
651}
652
653impl From<StructType> for DataType {
654    fn from(value: StructType) -> Self {
655        Self::Struct(value)
656    }
657}
658
659impl From<DataType> for PbDataType {
660    fn from(data_type: DataType) -> Self {
661        data_type.to_protobuf()
662    }
663}
664
665mod private {
666    use super::*;
667
668    // Note: put pub trait inside a private mod just makes the name private,
669    // The trait methods will still be publicly available...
670    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
671
672    /// Common trait bounds of scalar and scalar reference types.
673    ///
674    /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
675    pub trait ScalarBounds<Impl> = Debug
676        + Send
677        + Sync
678        + Clone
679        + PartialEq
680        + Eq
681        // in default ascending order
682        + PartialOrd
683        + Ord
684        + TryFrom<Impl, Error = ArrayError>
685        // `ScalarImpl`/`ScalarRefImpl`
686        + Into<Impl>;
687}
688
689/// `Scalar` is a trait over all possible owned types in the evaluation
690/// framework.
691///
692/// `Scalar` is reciprocal to `ScalarRef`. Use `as_scalar_ref` to get a
693/// reference which has the same lifetime as `self`.
694pub trait Scalar: private::ScalarBounds<ScalarImpl> + 'static {
695    /// Type for reference of `Scalar`
696    type ScalarRefType<'a>: ScalarRef<'a, ScalarType = Self> + 'a
697    where
698        Self: 'a;
699
700    /// Get a reference to current scalar.
701    fn as_scalar_ref(&self) -> Self::ScalarRefType<'_>;
702
703    fn to_scalar_value(self) -> ScalarImpl {
704        self.into()
705    }
706}
707
708/// `ScalarRef` is a trait over all possible references in the evaluation
709/// framework.
710///
711/// `ScalarRef` is reciprocal to `Scalar`. Use `to_owned_scalar` to get an
712/// owned scalar.
713pub trait ScalarRef<'a>: private::ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
714    /// `ScalarType` is the owned type of current `ScalarRef`.
715    type ScalarType: Scalar<ScalarRefType<'a> = Self>;
716
717    /// Convert `ScalarRef` to an owned scalar.
718    fn to_owned_scalar(&self) -> Self::ScalarType;
719
720    /// A wrapped hash function to get the hash value for this scaler.
721    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H);
722}
723
724/// Define `ScalarImpl` and `ScalarRefImpl` with macro.
725macro_rules! scalar_impl_enum {
726    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
727        /// `ScalarImpl` embeds all possible scalars in the evaluation framework.
728        ///
729        /// Note: `ScalarImpl` doesn't contain all information of its `DataType`,
730        /// so sometimes they need to be used together.
731        /// e.g., for `Struct`, we don't have the field names in the value.
732        ///
733        /// See `for_all_variants` for the definition.
734        #[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
735        pub enum ScalarImpl {
736            $( $variant_name($scalar) ),*
737        }
738
739        /// `ScalarRefImpl` embeds all possible scalar references in the evaluation
740        /// framework.
741        ///
742        /// Note: `ScalarRefImpl` doesn't contain all information of its `DataType`,
743        /// so sometimes they need to be used together.
744        /// e.g., for `Struct`, we don't have the field names in the value.
745        ///
746        /// See `for_all_variants` for the definition.
747        #[derive(Debug, Copy, Clone, PartialEq, Eq)]
748        pub enum ScalarRefImpl<'scalar> {
749            $( $variant_name($scalar_ref) ),*
750        }
751    };
752}
753
754for_all_variants! { scalar_impl_enum }
755
756// We MUST NOT implement `Ord` for `ScalarImpl` because that will make `Datum` derive an incorrect
757// default `Ord`. To get a default-ordered `ScalarImpl`/`ScalarRefImpl`/`Datum`/`DatumRef`, you can
758// use `DefaultOrdered<T>`. If non-default order is needed, please refer to `sort_util`.
759impl !PartialOrd for ScalarImpl {}
760impl !PartialOrd for ScalarRefImpl<'_> {}
761
762pub type Datum = Option<ScalarImpl>;
763pub type DatumRef<'a> = Option<ScalarRefImpl<'a>>;
764
765/// This trait is to implement `to_owned_datum` for `Option<ScalarImpl>`
766pub trait ToOwnedDatum {
767    /// Convert the datum to an owned [`Datum`].
768    fn to_owned_datum(self) -> Datum;
769}
770
771impl ToOwnedDatum for &Datum {
772    #[inline(always)]
773    fn to_owned_datum(self) -> Datum {
774        self.clone()
775    }
776}
777
778impl<T: Into<ScalarImpl>> ToOwnedDatum for T {
779    #[inline(always)]
780    fn to_owned_datum(self) -> Datum {
781        Some(self.into())
782    }
783}
784
785impl<T: Into<ScalarImpl>> ToOwnedDatum for Option<T> {
786    #[inline(always)]
787    fn to_owned_datum(self) -> Datum {
788        self.map(Into::into)
789    }
790}
791
792#[auto_impl::auto_impl(&)]
793pub trait ToDatumRef: PartialEq + Eq + Debug {
794    /// Convert the datum to [`DatumRef`].
795    fn to_datum_ref(&self) -> DatumRef<'_>;
796}
797
798impl ToDatumRef for Datum {
799    #[inline(always)]
800    fn to_datum_ref(&self) -> DatumRef<'_> {
801        self.as_ref().map(|d| d.as_scalar_ref_impl())
802    }
803}
804impl ToDatumRef for Option<&ScalarImpl> {
805    #[inline(always)]
806    fn to_datum_ref(&self) -> DatumRef<'_> {
807        self.map(|d| d.as_scalar_ref_impl())
808    }
809}
810impl ToDatumRef for DatumRef<'_> {
811    #[inline(always)]
812    fn to_datum_ref(&self) -> DatumRef<'_> {
813        *self
814    }
815}
816
817/// To make sure there is `as_scalar_ref` for all scalar ref types.
818/// See <https://github.com/risingwavelabs/risingwave/pull/9977/files#r1208972881>
819///
820/// This is used by the expr macro.
821pub trait SelfAsScalarRef {
822    fn as_scalar_ref(&self) -> Self;
823}
824macro_rules! impl_self_as_scalar_ref {
825    ($($t:ty),*) => {
826        $(
827            impl SelfAsScalarRef for $t {
828                fn as_scalar_ref(&self) -> Self {
829                    *self
830                }
831            }
832        )*
833    };
834}
835impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
836
837/// `for_all_native_types` includes all native variants of our scalar types.
838///
839/// Specifically, it doesn't support u8/u16/u32/u64.
840#[macro_export]
841macro_rules! for_all_native_types {
842    ($macro:ident) => {
843        $macro! {
844            { i16, Int16, read_i16 },
845            { i32, Int32, read_i32 },
846            { i64, Int64, read_i64 },
847            { Serial, Serial, read_i64 },
848            { $crate::types::F32, Float32, read_f32 },
849            { $crate::types::F64, Float64, read_f64 }
850        }
851    };
852}
853
854/// `impl_convert` implements several conversions for `Scalar`.
855/// * `Scalar <-> ScalarImpl` with `From` and `TryFrom` trait.
856/// * `ScalarRef <-> ScalarRefImpl` with `From` and `TryFrom` trait.
857/// * `&ScalarImpl -> &Scalar` with `impl.as_int16()`.
858/// * `ScalarImpl -> Scalar` with `impl.into_int16()`.
859macro_rules! impl_convert {
860    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
861        $(
862            impl From<$scalar> for ScalarImpl {
863                fn from(val: $scalar) -> Self {
864                    ScalarImpl::$variant_name(val)
865                }
866            }
867
868            impl TryFrom<ScalarImpl> for $scalar {
869                type Error = ArrayError;
870
871                fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
872                    match val {
873                        ScalarImpl::$variant_name(scalar) => Ok(scalar),
874                        other_scalar => bail!("cannot convert ScalarImpl::{} to concrete type", other_scalar.get_ident()),
875                    }
876                }
877            }
878
879            impl <'scalar> From<$scalar_ref> for ScalarRefImpl<'scalar> {
880                fn from(val: $scalar_ref) -> Self {
881                    ScalarRefImpl::$variant_name(val)
882                }
883            }
884
885            impl <'scalar> TryFrom<ScalarRefImpl<'scalar>> for $scalar_ref {
886                type Error = ArrayError;
887
888                fn try_from(val: ScalarRefImpl<'scalar>) -> ArrayResult<Self> {
889                    match val {
890                        ScalarRefImpl::$variant_name(scalar_ref) => Ok(scalar_ref),
891                        other_scalar => bail!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name)),
892                    }
893                }
894            }
895
896            paste! {
897                impl ScalarImpl {
898                    /// # Panics
899                    /// If the scalar is not of the expected type.
900                    pub fn [<as_ $suffix_name>](&self) -> &$scalar {
901                        match self {
902                            Self::$variant_name(scalar) => scalar,
903                            other_scalar => panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
904                        }
905                    }
906
907                    /// # Panics
908                    /// If the scalar is not of the expected type.
909                    pub fn [<into_ $suffix_name>](self) -> $scalar {
910                        match self {
911                            Self::$variant_name(scalar) => scalar,
912                            other_scalar =>  panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
913                        }
914                    }
915                }
916
917                impl <'scalar> ScalarRefImpl<'scalar> {
918                    /// # Panics
919                    /// If the scalar is not of the expected type.
920                    pub fn [<into_ $suffix_name>](self) -> $scalar_ref {
921                        match self {
922                            Self::$variant_name(inner) => inner,
923                            other_scalar => panic!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
924                        }
925                    }
926                }
927            }
928        )*
929    };
930}
931
932for_all_variants! { impl_convert }
933
934// Implement `From<raw float>` for `ScalarImpl::Float` as a sugar.
935impl From<f32> for ScalarImpl {
936    fn from(f: f32) -> Self {
937        Self::Float32(f.into())
938    }
939}
940impl From<f64> for ScalarImpl {
941    fn from(f: f64) -> Self {
942        Self::Float64(f.into())
943    }
944}
945
946// Implement `From<string like>` for `ScalarImpl::Utf8` as a sugar.
947impl From<String> for ScalarImpl {
948    fn from(s: String) -> Self {
949        Self::Utf8(s.into_boxed_str())
950    }
951}
952impl From<&str> for ScalarImpl {
953    fn from(s: &str) -> Self {
954        Self::Utf8(s.into())
955    }
956}
957impl From<&String> for ScalarImpl {
958    fn from(s: &String) -> Self {
959        Self::Utf8(s.as_str().into())
960    }
961}
962impl TryFrom<ScalarImpl> for String {
963    type Error = ArrayError;
964
965    fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
966        match val {
967            ScalarImpl::Utf8(s) => Ok(s.into()),
968            other_scalar => bail!(
969                "cannot convert ScalarImpl::{} to concrete type",
970                other_scalar.get_ident()
971            ),
972        }
973    }
974}
975
976impl From<char> for ScalarImpl {
977    fn from(c: char) -> Self {
978        Self::Utf8(c.to_string().into())
979    }
980}
981
982impl From<&[u8]> for ScalarImpl {
983    fn from(s: &[u8]) -> Self {
984        Self::Bytea(s.into())
985    }
986}
987
988impl From<JsonbRef<'_>> for ScalarImpl {
989    fn from(jsonb: JsonbRef<'_>) -> Self {
990        Self::Jsonb(jsonb.to_owned_scalar())
991    }
992}
993
994impl<T: PrimitiveArrayItemType> From<Vec<T>> for ScalarImpl {
995    fn from(v: Vec<T>) -> Self {
996        Self::List(v.into_iter().collect())
997    }
998}
999
1000impl<T: PrimitiveArrayItemType> From<Vec<Option<T>>> for ScalarImpl {
1001    fn from(v: Vec<Option<T>>) -> Self {
1002        Self::List(v.into_iter().collect())
1003    }
1004}
1005
1006impl From<Vec<String>> for ScalarImpl {
1007    fn from(v: Vec<String>) -> Self {
1008        Self::List(v.iter().map(|s| s.as_str()).collect())
1009    }
1010}
1011
1012impl From<Vec<u8>> for ScalarImpl {
1013    fn from(v: Vec<u8>) -> Self {
1014        Self::Bytea(v.into())
1015    }
1016}
1017
1018impl From<Bytes> for ScalarImpl {
1019    fn from(v: Bytes) -> Self {
1020        Self::Bytea(v.as_ref().into())
1021    }
1022}
1023
1024impl From<ListRef<'_>> for ScalarImpl {
1025    fn from(list: ListRef<'_>) -> Self {
1026        Self::List(list.to_owned_scalar())
1027    }
1028}
1029
1030impl ScalarImpl {
1031    /// Creates a scalar from pgwire "BINARY" format.
1032    ///
1033    /// The counterpart of [`to_binary::ToBinary`].
1034    pub fn from_binary(bytes: &Bytes, data_type: &DataType) -> Result<Self, BoxedError> {
1035        let res = match data_type {
1036            DataType::Varchar => Self::Utf8(String::from_sql(&Type::VARCHAR, bytes)?.into()),
1037            DataType::Bytea => Self::Bytea(Vec::<u8>::from_sql(&Type::BYTEA, bytes)?.into()),
1038            DataType::Boolean => Self::Bool(bool::from_sql(&Type::BOOL, bytes)?),
1039            DataType::Int16 => Self::Int16(i16::from_sql(&Type::INT2, bytes)?),
1040            DataType::Int32 => Self::Int32(i32::from_sql(&Type::INT4, bytes)?),
1041            DataType::Int64 => Self::Int64(i64::from_sql(&Type::INT8, bytes)?),
1042            DataType::Serial => Self::Serial(Serial::from(i64::from_sql(&Type::INT8, bytes)?)),
1043            DataType::Float32 => Self::Float32(f32::from_sql(&Type::FLOAT4, bytes)?.into()),
1044            DataType::Float64 => Self::Float64(f64::from_sql(&Type::FLOAT8, bytes)?.into()),
1045            DataType::Decimal => {
1046                Self::Decimal(rust_decimal::Decimal::from_sql(&Type::NUMERIC, bytes)?.into())
1047            }
1048            DataType::Date => Self::Date(chrono::NaiveDate::from_sql(&Type::DATE, bytes)?.into()),
1049            DataType::Time => Self::Time(chrono::NaiveTime::from_sql(&Type::TIME, bytes)?.into()),
1050            DataType::Timestamp => {
1051                Self::Timestamp(chrono::NaiveDateTime::from_sql(&Type::TIMESTAMP, bytes)?.into())
1052            }
1053            DataType::Timestamptz => Self::Timestamptz(
1054                chrono::DateTime::<chrono::Utc>::from_sql(&Type::TIMESTAMPTZ, bytes)?.into(),
1055            ),
1056            DataType::Interval => Self::Interval(Interval::from_sql(&Type::INTERVAL, bytes)?),
1057            DataType::Jsonb => Self::Jsonb(
1058                JsonbVal::value_deserialize(bytes)
1059                    .ok_or_else(|| "invalid value of Jsonb".to_owned())?,
1060            ),
1061            DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
1062            DataType::Vector(_) | DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
1063                return Err(format!("unsupported data type: {}", data_type).into());
1064            }
1065        };
1066        Ok(res)
1067    }
1068
1069    /// Creates a scalar from pgwire "TEXT" format.
1070    ///
1071    /// The counterpart of [`ToText`].
1072    pub fn from_text(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1073        Ok(match data_type {
1074            DataType::Boolean => str_to_bool(s)?.into(),
1075            DataType::Int16 => i16::from_str(s)?.into(),
1076            DataType::Int32 => i32::from_str(s)?.into(),
1077            DataType::Int64 => i64::from_str(s)?.into(),
1078            DataType::Int256 => Int256::from_str(s)?.into(),
1079            DataType::Serial => Serial::from(i64::from_str(s)?).into(),
1080            DataType::Decimal => Decimal::from_str(s)?.into(),
1081            DataType::Float32 => F32::from_str(s)?.into(),
1082            DataType::Float64 => F64::from_str(s)?.into(),
1083            DataType::Varchar => s.into(),
1084            DataType::Date => Date::from_str(s)?.into(),
1085            DataType::Timestamp => Timestamp::from_str(s)?.into(),
1086            // We only handle the case with timezone here, and leave the implicit session timezone case
1087            // for later phase.
1088            DataType::Timestamptz => Timestamptz::from_str(s)?.into(),
1089            DataType::Time => Time::from_str(s)?.into(),
1090            DataType::Interval => Interval::from_str(s)?.into(),
1091            DataType::List(_) => ListValue::from_str(s, data_type)?.into(),
1092            DataType::Struct(st) => StructValue::from_str(s, st)?.into(),
1093            DataType::Jsonb => JsonbVal::from_str(s)?.into(),
1094            DataType::Bytea => str_to_bytea(s)?.into(),
1095            DataType::Vector(size) => VectorVal::from_text(s, *size)?.into(),
1096            DataType::Map(_m) => return Err("map from text is not supported".into()),
1097        })
1098    }
1099
1100    pub fn from_text_for_test(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1101        Ok(match data_type {
1102            DataType::Map(map_type) => MapValue::from_str_for_test(s, map_type)?.into(),
1103            _ => ScalarImpl::from_text(s, data_type)?,
1104        })
1105    }
1106}
1107
1108impl From<ScalarRefImpl<'_>> for ScalarImpl {
1109    fn from(scalar_ref: ScalarRefImpl<'_>) -> Self {
1110        scalar_ref.into_scalar_impl()
1111    }
1112}
1113
1114impl<'a> From<&'a ScalarImpl> for ScalarRefImpl<'a> {
1115    fn from(scalar: &'a ScalarImpl) -> Self {
1116        scalar.as_scalar_ref_impl()
1117    }
1118}
1119
1120impl ScalarImpl {
1121    /// Converts [`ScalarImpl`] to [`ScalarRefImpl`]
1122    pub fn as_scalar_ref_impl(&self) -> ScalarRefImpl<'_> {
1123        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().into() })
1124    }
1125}
1126
1127impl ScalarRefImpl<'_> {
1128    /// Converts [`ScalarRefImpl`] to [`ScalarImpl`]
1129    pub fn into_scalar_impl(self) -> ScalarImpl {
1130        dispatch_scalar_ref_variants!(self, inner, { inner.to_owned_scalar().into() })
1131    }
1132}
1133
1134impl Hash for ScalarImpl {
1135    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1136        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().hash_scalar(state) })
1137    }
1138}
1139
1140impl Hash for ScalarRefImpl<'_> {
1141    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1142        dispatch_scalar_ref_variants!(self, inner, { inner.hash_scalar(state) })
1143    }
1144}
1145
1146/// Feeds the raw scalar reference of `datum` to the given `state`, which should behave the same
1147/// as [`crate::array::Array::hash_at`], where NULL value will be carefully handled.
1148///
1149/// **FIXME**: the result of this function might be different from [`std::hash::Hash`] due to the
1150/// type alias of `DatumRef = Option<_>`, we should manually implement [`std::hash::Hash`] for
1151/// [`DatumRef`] in the future when it becomes a newtype. (#477)
1152#[inline(always)]
1153pub fn hash_datum(datum: impl ToDatumRef, state: &mut impl std::hash::Hasher) {
1154    match datum.to_datum_ref() {
1155        Some(scalar_ref) => scalar_ref.hash(state),
1156        None => NULL_VAL_FOR_HASH.hash(state),
1157    }
1158}
1159
1160impl ScalarRefImpl<'_> {
1161    pub fn binary_format(&self, data_type: &DataType) -> to_binary::Result<Bytes> {
1162        use self::to_binary::ToBinary;
1163        self.to_binary_with_type(data_type)
1164    }
1165
1166    pub fn text_format(&self, data_type: &DataType) -> String {
1167        self.to_text_with_type(data_type)
1168    }
1169
1170    /// Serialize the scalar into the `memcomparable` format.
1171    pub fn serialize(
1172        &self,
1173        ser: &mut memcomparable::Serializer<impl BufMut>,
1174    ) -> memcomparable::Result<()> {
1175        match self {
1176            Self::Int16(v) => v.serialize(ser)?,
1177            Self::Int32(v) => v.serialize(ser)?,
1178            Self::Int64(v) => v.serialize(ser)?,
1179            Self::Serial(v) => v.serialize(ser)?,
1180            Self::Float32(v) => v.serialize(ser)?,
1181            Self::Float64(v) => v.serialize(ser)?,
1182            Self::Utf8(v) => v.serialize(ser)?,
1183            Self::Bytea(v) => ser.serialize_bytes(v)?,
1184            Self::Bool(v) => v.serialize(ser)?,
1185            Self::Decimal(v) => ser.serialize_decimal((*v).into())?,
1186            Self::Interval(v) => v.serialize(ser)?,
1187            Self::Date(v) => v.0.num_days_from_ce().serialize(ser)?,
1188            Self::Timestamp(v) => {
1189                v.0.and_utc().timestamp().serialize(&mut *ser)?;
1190                v.0.and_utc().timestamp_subsec_nanos().serialize(ser)?;
1191            }
1192            Self::Timestamptz(v) => v.serialize(ser)?,
1193            Self::Time(v) => {
1194                v.0.num_seconds_from_midnight().serialize(&mut *ser)?;
1195                v.0.nanosecond().serialize(ser)?;
1196            }
1197            Self::Int256(v) => v.memcmp_serialize(ser)?,
1198            Self::Jsonb(v) => v.memcmp_serialize(ser)?,
1199            Self::Struct(v) => v.memcmp_serialize(ser)?,
1200            Self::List(v) => v.memcmp_serialize(ser)?,
1201            Self::Map(v) => v.memcmp_serialize(ser)?,
1202            Self::Vector(v) => v.memcmp_serialize(ser)?,
1203        };
1204        Ok(())
1205    }
1206}
1207
1208impl ScalarImpl {
1209    /// Serialize the scalar into the `memcomparable` format.
1210    pub fn serialize(
1211        &self,
1212        ser: &mut memcomparable::Serializer<impl BufMut>,
1213    ) -> memcomparable::Result<()> {
1214        self.as_scalar_ref_impl().serialize(ser)
1215    }
1216
1217    /// Deserialize the scalar from the `memcomparable` format.
1218    pub fn deserialize(
1219        ty: &DataType,
1220        de: &mut memcomparable::Deserializer<impl Buf>,
1221    ) -> memcomparable::Result<Self> {
1222        use DataType as Ty;
1223        Ok(match ty {
1224            Ty::Int16 => Self::Int16(i16::deserialize(de)?),
1225            Ty::Int32 => Self::Int32(i32::deserialize(de)?),
1226            Ty::Int64 => Self::Int64(i64::deserialize(de)?),
1227            Ty::Int256 => Self::Int256(Int256::memcmp_deserialize(de)?),
1228            Ty::Serial => Self::Serial(Serial::from(i64::deserialize(de)?)),
1229            Ty::Float32 => Self::Float32(f32::deserialize(de)?.into()),
1230            Ty::Float64 => Self::Float64(f64::deserialize(de)?.into()),
1231            Ty::Varchar => Self::Utf8(Box::<str>::deserialize(de)?),
1232            Ty::Bytea => Self::Bytea(serde_bytes::ByteBuf::deserialize(de)?.into_vec().into()),
1233            Ty::Boolean => Self::Bool(bool::deserialize(de)?),
1234            Ty::Decimal => Self::Decimal(de.deserialize_decimal()?.into()),
1235            Ty::Interval => Self::Interval(Interval::deserialize(de)?),
1236            Ty::Time => Self::Time({
1237                let secs = u32::deserialize(&mut *de)?;
1238                let nano = u32::deserialize(de)?;
1239                Time::with_secs_nano(secs, nano)
1240                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1241            }),
1242            Ty::Timestamp => Self::Timestamp({
1243                let secs = i64::deserialize(&mut *de)?;
1244                let nsecs = u32::deserialize(de)?;
1245                Timestamp::with_secs_nsecs(secs, nsecs)
1246                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1247            }),
1248            Ty::Timestamptz => Self::Timestamptz(Timestamptz::deserialize(de)?),
1249            Ty::Date => Self::Date({
1250                let days = i32::deserialize(de)?;
1251                Date::with_days_since_ce(days)
1252                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1253            }),
1254            Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
1255            Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
1256            Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1257            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1258            Ty::Vector(dimension) => {
1259                VectorVal::memcmp_deserialize(*dimension, de)?.to_scalar_value()
1260            }
1261        })
1262    }
1263
1264    pub fn as_integral(&self) -> i64 {
1265        match self {
1266            Self::Int16(v) => *v as i64,
1267            Self::Int32(v) => *v as i64,
1268            Self::Int64(v) => *v,
1269            _ => panic!(
1270                "Can't convert ScalarImpl::{} to a integral",
1271                self.get_ident()
1272            ),
1273        }
1274    }
1275}
1276
1277/// Returns whether the `literal` matches the `data_type`.
1278pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
1279    match literal {
1280        Some(scalar) => {
1281            macro_rules! matches {
1282                ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty }),*) => {
1283                    match (data_type, scalar) {
1284                        $(
1285                            (DataType::$data_type { .. }, ScalarImpl::$variant_name(_)) => true,
1286                            (DataType::$data_type { .. }, _) => false, // so that we won't forget to match a new logical type
1287                        )*
1288                    }
1289                }
1290            }
1291            for_all_variants! { matches }
1292        }
1293        None => true,
1294    }
1295}
1296
1297#[cfg(test)]
1298mod tests {
1299    use std::hash::{BuildHasher, Hasher};
1300
1301    use strum::IntoEnumIterator;
1302
1303    use super::*;
1304    use crate::util::hash_util::Crc32FastBuilder;
1305
1306    #[test]
1307    fn test_size() {
1308        use static_assertions::const_assert_eq;
1309
1310        use crate::array::*;
1311
1312        macro_rules! assert_item_size_eq {
1313            ($array:ty, $size:literal) => {
1314                const_assert_eq!(std::mem::size_of::<<$array as Array>::OwnedItem>(), $size);
1315            };
1316        }
1317
1318        assert_item_size_eq!(StructArray, 16); // Box<[Datum]>
1319        assert_item_size_eq!(ListArray, 8); // Box<ArrayImpl>
1320        assert_item_size_eq!(Utf8Array, 16); // Box<str>
1321        assert_item_size_eq!(IntervalArray, 16);
1322        assert_item_size_eq!(TimestampArray, 12);
1323
1324        // TODO: try to reduce the memory usage of `Decimal`, `ScalarImpl` and `Datum`.
1325        assert_item_size_eq!(DecimalArray, 20);
1326
1327        const_assert_eq!(std::mem::size_of::<ScalarImpl>(), 24);
1328        const_assert_eq!(std::mem::size_of::<ScalarRefImpl<'_>>(), 24);
1329        const_assert_eq!(std::mem::size_of::<Datum>(), 24);
1330        const_assert_eq!(std::mem::size_of::<StructType>(), 8);
1331        const_assert_eq!(std::mem::size_of::<DataType>(), 16);
1332    }
1333
1334    #[test]
1335    fn test_data_type_display() {
1336        let d: DataType =
1337            StructType::new(vec![("i", DataType::Int32), ("j", DataType::Varchar)]).into();
1338        assert_eq!(
1339            format!("{}", d),
1340            "struct<i integer, j character varying>".to_owned()
1341        );
1342    }
1343
1344    #[test]
1345    fn test_hash_implementation() {
1346        fn test(datum: Datum, data_type: DataType) {
1347            assert!(literal_type_match(&data_type, datum.as_ref()));
1348
1349            let mut builder = data_type.create_array_builder(6);
1350            for _ in 0..3 {
1351                builder.append_null();
1352                builder.append(&datum);
1353            }
1354            let array = builder.finish();
1355
1356            let hash_from_array = {
1357                let mut state = Crc32FastBuilder.build_hasher();
1358                array.hash_at(3, &mut state);
1359                state.finish()
1360            };
1361
1362            let hash_from_datum = {
1363                let mut state = Crc32FastBuilder.build_hasher();
1364                hash_datum(&datum, &mut state);
1365                state.finish()
1366            };
1367
1368            let hash_from_datum_ref = {
1369                let mut state = Crc32FastBuilder.build_hasher();
1370                hash_datum(datum.to_datum_ref(), &mut state);
1371                state.finish()
1372            };
1373
1374            assert_eq!(hash_from_array, hash_from_datum);
1375            assert_eq!(hash_from_datum, hash_from_datum_ref);
1376        }
1377
1378        for name in DataTypeName::iter() {
1379            let (scalar, data_type) = match name {
1380                DataTypeName::Boolean => (ScalarImpl::Bool(true), DataType::Boolean),
1381                DataTypeName::Int16 => (ScalarImpl::Int16(233), DataType::Int16),
1382                DataTypeName::Int32 => (ScalarImpl::Int32(233333), DataType::Int32),
1383                DataTypeName::Int64 => (ScalarImpl::Int64(233333333333), DataType::Int64),
1384                DataTypeName::Int256 => (
1385                    ScalarImpl::Int256(233333333333_i64.into()),
1386                    DataType::Int256,
1387                ),
1388                DataTypeName::Serial => (ScalarImpl::Serial(233333333333.into()), DataType::Serial),
1389                DataTypeName::Float32 => (ScalarImpl::Float32(23.33.into()), DataType::Float32),
1390                DataTypeName::Float64 => (
1391                    ScalarImpl::Float64(23.333333333333.into()),
1392                    DataType::Float64,
1393                ),
1394                DataTypeName::Decimal => (
1395                    ScalarImpl::Decimal("233.33".parse().unwrap()),
1396                    DataType::Decimal,
1397                ),
1398                DataTypeName::Date => (
1399                    ScalarImpl::Date(Date::from_ymd_uncheck(2333, 3, 3)),
1400                    DataType::Date,
1401                ),
1402                DataTypeName::Varchar => (ScalarImpl::Utf8("233".into()), DataType::Varchar),
1403                DataTypeName::Bytea => (
1404                    ScalarImpl::Bytea("\\x233".as_bytes().into()),
1405                    DataType::Bytea,
1406                ),
1407                DataTypeName::Time => (
1408                    ScalarImpl::Time(Time::from_hms_uncheck(2, 3, 3)),
1409                    DataType::Time,
1410                ),
1411                DataTypeName::Timestamp => (
1412                    ScalarImpl::Timestamp(Timestamp::from_timestamp_uncheck(23333333, 2333)),
1413                    DataType::Timestamp,
1414                ),
1415                DataTypeName::Timestamptz => (
1416                    ScalarImpl::Timestamptz(Timestamptz::from_micros(233333333)),
1417                    DataType::Timestamptz,
1418                ),
1419                DataTypeName::Interval => (
1420                    ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
1421                    DataType::Interval,
1422                ),
1423                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
1424                DataTypeName::Struct => (
1425                    ScalarImpl::Struct(StructValue::new(vec![
1426                        ScalarImpl::Int64(233).into(),
1427                        ScalarImpl::Float64(23.33.into()).into(),
1428                    ])),
1429                    DataType::Struct(StructType::new(vec![
1430                        ("a", DataType::Int64),
1431                        ("b", DataType::Float64),
1432                    ])),
1433                ),
1434                DataTypeName::List => (
1435                    ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
1436                    DataType::List(Box::new(DataType::Int64)),
1437                ),
1438                DataTypeName::Vector => (
1439                    ScalarImpl::Vector(VectorVal::from_iter(
1440                        (0..VectorVal::TEST_VECTOR_DIMENSION)
1441                            .map(|i| ((i + 1) as f32).try_into().unwrap()),
1442                    )),
1443                    DataType::Vector(VectorVal::TEST_VECTOR_DIMENSION),
1444                ),
1445                DataTypeName::Map => {
1446                    // map is not hashable
1447                    continue;
1448                }
1449            };
1450
1451            test(Some(scalar), data_type.clone());
1452            test(None, data_type);
1453        }
1454    }
1455
1456    #[test]
1457    fn test_data_type_from_str() {
1458        assert_eq!(DataType::from_str("bool").unwrap(), DataType::Boolean);
1459        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
1460        assert_eq!(DataType::from_str("BOOL").unwrap(), DataType::Boolean);
1461        assert_eq!(DataType::from_str("BOOLEAN").unwrap(), DataType::Boolean);
1462
1463        assert_eq!(DataType::from_str("int2").unwrap(), DataType::Int16);
1464        assert_eq!(DataType::from_str("smallint").unwrap(), DataType::Int16);
1465        assert_eq!(DataType::from_str("INT2").unwrap(), DataType::Int16);
1466        assert_eq!(DataType::from_str("SMALLINT").unwrap(), DataType::Int16);
1467
1468        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1469        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Int32);
1470        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1471        assert_eq!(DataType::from_str("INT4").unwrap(), DataType::Int32);
1472        assert_eq!(DataType::from_str("INTEGER").unwrap(), DataType::Int32);
1473        assert_eq!(DataType::from_str("INT").unwrap(), DataType::Int32);
1474
1475        assert_eq!(DataType::from_str("int8").unwrap(), DataType::Int64);
1476        assert_eq!(DataType::from_str("bigint").unwrap(), DataType::Int64);
1477        assert_eq!(DataType::from_str("INT8").unwrap(), DataType::Int64);
1478        assert_eq!(DataType::from_str("BIGINT").unwrap(), DataType::Int64);
1479
1480        assert_eq!(DataType::from_str("rw_int256").unwrap(), DataType::Int256);
1481        assert_eq!(DataType::from_str("RW_INT256").unwrap(), DataType::Int256);
1482
1483        assert_eq!(DataType::from_str("float4").unwrap(), DataType::Float32);
1484        assert_eq!(DataType::from_str("real").unwrap(), DataType::Float32);
1485        assert_eq!(DataType::from_str("FLOAT4").unwrap(), DataType::Float32);
1486        assert_eq!(DataType::from_str("REAL").unwrap(), DataType::Float32);
1487
1488        assert_eq!(DataType::from_str("float8").unwrap(), DataType::Float64);
1489        assert_eq!(
1490            DataType::from_str("double precision").unwrap(),
1491            DataType::Float64
1492        );
1493        assert_eq!(DataType::from_str("FLOAT8").unwrap(), DataType::Float64);
1494        assert_eq!(
1495            DataType::from_str("DOUBLE PRECISION").unwrap(),
1496            DataType::Float64
1497        );
1498
1499        assert_eq!(DataType::from_str("decimal").unwrap(), DataType::Decimal);
1500        assert_eq!(DataType::from_str("DECIMAL").unwrap(), DataType::Decimal);
1501        assert_eq!(DataType::from_str("numeric").unwrap(), DataType::Decimal);
1502        assert_eq!(DataType::from_str("NUMERIC").unwrap(), DataType::Decimal);
1503
1504        assert_eq!(DataType::from_str("date").unwrap(), DataType::Date);
1505        assert_eq!(DataType::from_str("DATE").unwrap(), DataType::Date);
1506
1507        assert_eq!(DataType::from_str("varchar").unwrap(), DataType::Varchar);
1508        assert_eq!(DataType::from_str("VARCHAR").unwrap(), DataType::Varchar);
1509
1510        assert_eq!(DataType::from_str("time").unwrap(), DataType::Time);
1511        assert_eq!(
1512            DataType::from_str("time without time zone").unwrap(),
1513            DataType::Time
1514        );
1515        assert_eq!(DataType::from_str("TIME").unwrap(), DataType::Time);
1516        assert_eq!(
1517            DataType::from_str("TIME WITHOUT TIME ZONE").unwrap(),
1518            DataType::Time
1519        );
1520
1521        assert_eq!(
1522            DataType::from_str("timestamp").unwrap(),
1523            DataType::Timestamp
1524        );
1525        assert_eq!(
1526            DataType::from_str("timestamp without time zone").unwrap(),
1527            DataType::Timestamp
1528        );
1529        assert_eq!(
1530            DataType::from_str("TIMESTAMP").unwrap(),
1531            DataType::Timestamp
1532        );
1533        assert_eq!(
1534            DataType::from_str("TIMESTAMP WITHOUT TIME ZONE").unwrap(),
1535            DataType::Timestamp
1536        );
1537
1538        assert_eq!(
1539            DataType::from_str("timestamptz").unwrap(),
1540            DataType::Timestamptz
1541        );
1542        assert_eq!(
1543            DataType::from_str("timestamp with time zone").unwrap(),
1544            DataType::Timestamptz
1545        );
1546        assert_eq!(
1547            DataType::from_str("TIMESTAMPTZ").unwrap(),
1548            DataType::Timestamptz
1549        );
1550        assert_eq!(
1551            DataType::from_str("TIMESTAMP WITH TIME ZONE").unwrap(),
1552            DataType::Timestamptz
1553        );
1554
1555        assert_eq!(DataType::from_str("interval").unwrap(), DataType::Interval);
1556        assert_eq!(DataType::from_str("INTERVAL").unwrap(), DataType::Interval);
1557
1558        assert_eq!(
1559            DataType::from_str("int2[]").unwrap(),
1560            DataType::List(Box::new(DataType::Int16))
1561        );
1562        assert_eq!(
1563            DataType::from_str("int[]").unwrap(),
1564            DataType::List(Box::new(DataType::Int32))
1565        );
1566        assert_eq!(
1567            DataType::from_str("int8[]").unwrap(),
1568            DataType::List(Box::new(DataType::Int64))
1569        );
1570        assert_eq!(
1571            DataType::from_str("float4[]").unwrap(),
1572            DataType::List(Box::new(DataType::Float32))
1573        );
1574        assert_eq!(
1575            DataType::from_str("float8[]").unwrap(),
1576            DataType::List(Box::new(DataType::Float64))
1577        );
1578        assert_eq!(
1579            DataType::from_str("decimal[]").unwrap(),
1580            DataType::List(Box::new(DataType::Decimal))
1581        );
1582        assert_eq!(
1583            DataType::from_str("varchar[]").unwrap(),
1584            DataType::List(Box::new(DataType::Varchar))
1585        );
1586        assert_eq!(
1587            DataType::from_str("date[]").unwrap(),
1588            DataType::List(Box::new(DataType::Date))
1589        );
1590        assert_eq!(
1591            DataType::from_str("time[]").unwrap(),
1592            DataType::List(Box::new(DataType::Time))
1593        );
1594        assert_eq!(
1595            DataType::from_str("timestamp[]").unwrap(),
1596            DataType::List(Box::new(DataType::Timestamp))
1597        );
1598        assert_eq!(
1599            DataType::from_str("timestamptz[]").unwrap(),
1600            DataType::List(Box::new(DataType::Timestamptz))
1601        );
1602        assert_eq!(
1603            DataType::from_str("interval[]").unwrap(),
1604            DataType::List(Box::new(DataType::Interval))
1605        );
1606
1607        assert_eq!(
1608            DataType::from_str("record").unwrap(),
1609            DataType::Struct(StructType::unnamed(vec![]))
1610        );
1611        assert_eq!(
1612            DataType::from_str("struct<a int4, b varchar>").unwrap(),
1613            DataType::Struct(StructType::new(vec![
1614                ("a", DataType::Int32),
1615                ("b", DataType::Varchar)
1616            ]))
1617        );
1618    }
1619
1620    #[test]
1621    fn test_can_alter() {
1622        let cannots = [
1623            (DataType::Int32, None),
1624            (DataType::List(DataType::Int32.into()), None),
1625            (
1626                MapType::from_kv(DataType::Varchar, DataType::List(DataType::Int32.into())).into(),
1627                None,
1628            ),
1629            (
1630                StructType::new([("a", DataType::Int32)]).into(),
1631                Some(false),
1632            ),
1633            (
1634                MapType::from_kv(
1635                    DataType::Varchar,
1636                    StructType::new([("a", DataType::Int32)]).into(),
1637                )
1638                .into(),
1639                Some(false),
1640            ),
1641        ];
1642        for (cannot, why) in cannots {
1643            assert_eq!(cannot.can_alter(), why, "{cannot:?}");
1644        }
1645
1646        let cans = [
1647            StructType::new([
1648                ("a", DataType::Int32),
1649                ("b", DataType::List(DataType::Int32.into())),
1650            ])
1651            .with_ids([ColumnId::new(1), ColumnId::new(2)])
1652            .into(),
1653            DataType::List(Box::new(DataType::Struct(
1654                StructType::new([("a", DataType::Int32)]).with_ids([ColumnId::new(1)]),
1655            ))),
1656            MapType::from_kv(
1657                DataType::Varchar,
1658                StructType::new([("a", DataType::Int32)])
1659                    .with_ids([ColumnId::new(1)])
1660                    .into(),
1661            )
1662            .into(),
1663        ];
1664        for can in cans {
1665            assert_eq!(can.can_alter(), Some(true), "{can:?}");
1666        }
1667    }
1668}