risingwave_common/types/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Data types in RisingWave.
16
17// NOTE: When adding or modifying data types, remember to update the type matrix in
18// src/expr/macro/src/types.rs
19
20use std::fmt::Debug;
21use std::hash::Hash;
22use std::str::FromStr;
23
24use bytes::{Buf, BufMut, Bytes};
25use chrono::{Datelike, Timelike};
26use itertools::Itertools;
27use parse_display::{Display, FromStr};
28use paste::paste;
29use postgres_types::{FromSql, IsNull, ToSql, Type};
30use risingwave_common_estimate_size::{EstimateSize, ZeroHeapSize};
31use risingwave_pb::data::PbDataType;
32use risingwave_pb::data::data_type::PbTypeName;
33use rw_iter_util::ZipEqFast as _;
34use serde::{Deserialize, Serialize, Serializer};
35use strum_macros::EnumDiscriminants;
36use thiserror_ext::AsReport;
37
38use crate::array::{
39    ArrayBuilderImpl, ArrayError, ArrayResult, NULL_VAL_FOR_HASH, PrimitiveArrayItemType,
40};
41// Complex type's value is based on the array
42pub use crate::array::{
43    ListRef, ListValue, MapRef, MapValue, StructRef, StructValue, VectorRef, VectorVal,
44};
45use crate::cast::{str_to_bool, str_to_bytea};
46use crate::catalog::ColumnId;
47use crate::error::BoxedError;
48use crate::{
49    dispatch_data_types, dispatch_scalar_ref_variants, dispatch_scalar_variants, for_all_variants,
50};
51
52mod cow;
53mod datetime;
54mod decimal;
55mod fields;
56mod from_sql;
57mod interval;
58mod jsonb;
59mod macros;
60mod map_type;
61mod native_type;
62mod num256;
63mod ops;
64mod ordered;
65mod ordered_float;
66mod postgres_type;
67mod scalar_impl;
68mod sentinel;
69mod serial;
70mod struct_type;
71mod successor;
72mod timestamptz;
73mod to_binary;
74mod to_sql;
75mod to_text;
76mod with_data_type;
77
78pub use fields::Fields;
79pub use risingwave_fields_derive::Fields;
80
81pub use self::cow::DatumCow;
82pub use self::datetime::{Date, Time, Timestamp};
83pub use self::decimal::{Decimal, PowError as DecimalPowError};
84pub use self::interval::{DateTimeField, Interval, IntervalDisplay, test_utils};
85pub use self::jsonb::{JsonbRef, JsonbVal};
86pub use self::map_type::MapType;
87pub use self::native_type::*;
88pub use self::num256::{Int256, Int256Ref};
89pub use self::ops::{CheckedAdd, IsNegative};
90pub use self::ordered::*;
91pub use self::ordered_float::{FloatExt, IntoOrdered};
92pub use self::scalar_impl::*;
93pub use self::sentinel::Sentinelled;
94pub use self::serial::Serial;
95pub use self::struct_type::StructType;
96pub use self::successor::Successor;
97pub use self::timestamptz::*;
98pub use self::to_text::ToText;
99pub use self::with_data_type::WithDataType;
100
101/// A 32-bit floating point type with total order.
102pub type F32 = ordered_float::OrderedFloat<f32>;
103
104/// A 64-bit floating point type with total order.
105pub type F64 = ordered_float::OrderedFloat<f64>;
106
107/// The set of datatypes that are supported in RisingWave.
108///
109/// # Trait implementations
110///
111/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
112///   but without data fields.
113/// - `FromStr` is only used internally for tests.
114///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
115#[derive(Debug, Display, Clone, PartialEq, Eq, Hash, EnumDiscriminants, FromStr)]
116#[strum_discriminants(derive(Hash, Ord, PartialOrd))]
117#[strum_discriminants(name(DataTypeName))]
118#[strum_discriminants(vis(pub))]
119#[cfg_attr(test, strum_discriminants(derive(strum_macros::EnumIter)))]
120pub enum DataType {
121    #[display("boolean")]
122    #[from_str(regex = "(?i)^bool$|^boolean$")]
123    Boolean,
124    #[display("smallint")]
125    #[from_str(regex = "(?i)^smallint$|^int2$")]
126    Int16,
127    #[display("integer")]
128    #[from_str(regex = "(?i)^integer$|^int$|^int4$")]
129    Int32,
130    #[display("bigint")]
131    #[from_str(regex = "(?i)^bigint$|^int8$")]
132    Int64,
133    #[display("real")]
134    #[from_str(regex = "(?i)^real$|^float4$")]
135    Float32,
136    #[display("double precision")]
137    #[from_str(regex = "(?i)^double precision$|^float8$")]
138    Float64,
139    #[display("numeric")]
140    #[from_str(regex = "(?i)^numeric$|^decimal$")]
141    Decimal,
142    #[display("date")]
143    #[from_str(regex = "(?i)^date$")]
144    Date,
145    #[display("character varying")]
146    #[from_str(regex = "(?i)^character varying$|^varchar$")]
147    Varchar,
148    #[display("time without time zone")]
149    #[from_str(regex = "(?i)^time$|^time without time zone$")]
150    Time,
151    #[display("timestamp without time zone")]
152    #[from_str(regex = "(?i)^timestamp$|^timestamp without time zone$")]
153    Timestamp,
154    #[display("timestamp with time zone")]
155    #[from_str(regex = "(?i)^timestamptz$|^timestamp with time zone$")]
156    Timestamptz,
157    #[display("interval")]
158    #[from_str(regex = "(?i)^interval$")]
159    Interval,
160    #[display("{0}")]
161    #[from_str(regex = "(?i)^(?P<0>.+)$")]
162    Struct(StructType),
163    #[display("{0}[]")]
164    #[from_str(regex = r"(?i)^(?P<0>.+)\[\]$")]
165    List(Box<DataType>),
166    #[display("bytea")]
167    #[from_str(regex = "(?i)^bytea$")]
168    Bytea,
169    #[display("jsonb")]
170    #[from_str(regex = "(?i)^jsonb$")]
171    Jsonb,
172    #[display("serial")]
173    #[from_str(regex = "(?i)^serial$")]
174    Serial,
175    #[display("rw_int256")]
176    #[from_str(regex = "(?i)^rw_int256$")]
177    Int256,
178    #[display("{0}")]
179    #[from_str(regex = "(?i)^(?P<0>.+)$")]
180    Map(MapType),
181    #[display("vector({0})")]
182    #[from_str(regex = "(?i)^vector\\((?P<0>.+)\\)$")]
183    Vector(usize),
184}
185
186impl !PartialOrd for DataType {}
187
188// For DataType::List
189impl std::str::FromStr for Box<DataType> {
190    type Err = BoxedError;
191
192    fn from_str(s: &str) -> Result<Self, Self::Err> {
193        Ok(Box::new(DataType::from_str(s)?))
194    }
195}
196
197impl ZeroHeapSize for DataType {}
198
199impl TryFrom<DataTypeName> for DataType {
200    type Error = &'static str;
201
202    fn try_from(type_name: DataTypeName) -> Result<Self, Self::Error> {
203        match type_name {
204            DataTypeName::Boolean => Ok(DataType::Boolean),
205            DataTypeName::Int16 => Ok(DataType::Int16),
206            DataTypeName::Int32 => Ok(DataType::Int32),
207            DataTypeName::Int64 => Ok(DataType::Int64),
208            DataTypeName::Int256 => Ok(DataType::Int256),
209            DataTypeName::Serial => Ok(DataType::Serial),
210            DataTypeName::Decimal => Ok(DataType::Decimal),
211            DataTypeName::Float32 => Ok(DataType::Float32),
212            DataTypeName::Float64 => Ok(DataType::Float64),
213            DataTypeName::Varchar => Ok(DataType::Varchar),
214            DataTypeName::Bytea => Ok(DataType::Bytea),
215            DataTypeName::Date => Ok(DataType::Date),
216            DataTypeName::Timestamp => Ok(DataType::Timestamp),
217            DataTypeName::Timestamptz => Ok(DataType::Timestamptz),
218            DataTypeName::Time => Ok(DataType::Time),
219            DataTypeName::Interval => Ok(DataType::Interval),
220            DataTypeName::Jsonb => Ok(DataType::Jsonb),
221            DataTypeName::Struct
222            | DataTypeName::List
223            | DataTypeName::Map
224            | DataTypeName::Vector => Err(
225                "Functions returning parameterized types can not be inferred. Please use `FunctionCall::new_unchecked`.",
226            ),
227        }
228    }
229}
230
231impl From<&PbDataType> for DataType {
232    fn from(proto: &PbDataType) -> DataType {
233        match proto.get_type_name().expect("missing type field") {
234            PbTypeName::TypeUnspecified => unreachable!(),
235            PbTypeName::Int16 => DataType::Int16,
236            PbTypeName::Int32 => DataType::Int32,
237            PbTypeName::Int64 => DataType::Int64,
238            PbTypeName::Serial => DataType::Serial,
239            PbTypeName::Float => DataType::Float32,
240            PbTypeName::Double => DataType::Float64,
241            PbTypeName::Boolean => DataType::Boolean,
242            PbTypeName::Varchar => DataType::Varchar,
243            PbTypeName::Date => DataType::Date,
244            PbTypeName::Time => DataType::Time,
245            PbTypeName::Timestamp => DataType::Timestamp,
246            PbTypeName::Timestamptz => DataType::Timestamptz,
247            PbTypeName::Decimal => DataType::Decimal,
248            PbTypeName::Interval => DataType::Interval,
249            PbTypeName::Bytea => DataType::Bytea,
250            PbTypeName::Jsonb => DataType::Jsonb,
251            PbTypeName::Struct => {
252                let fields: Vec<DataType> = proto.field_type.iter().map(|f| f.into()).collect_vec();
253                let field_names: Vec<String> = proto.field_names.iter().cloned().collect_vec();
254                let field_ids = (proto.field_ids.iter().copied())
255                    .map(ColumnId::new)
256                    .collect_vec();
257
258                let mut struct_type = if proto.field_names.is_empty() {
259                    StructType::unnamed(fields)
260                } else {
261                    StructType::new(field_names.into_iter().zip_eq_fast(fields))
262                };
263                if !field_ids.is_empty() {
264                    struct_type = struct_type.with_ids(field_ids);
265                }
266                struct_type.into()
267            }
268            PbTypeName::List => DataType::List(
269                // The first (and only) item is the list element type.
270                Box::new((&proto.field_type[0]).into()),
271            ),
272            PbTypeName::Map => {
273                // Map is physically the same as a list.
274                // So the first (and only) item is the list element type.
275                let list_entries_type: DataType = (&proto.field_type[0]).into();
276                DataType::Map(MapType::from_entries(list_entries_type))
277            }
278            PbTypeName::Vector => DataType::Vector(proto.precision as _),
279            PbTypeName::Int256 => DataType::Int256,
280        }
281    }
282}
283
284impl From<PbDataType> for DataType {
285    fn from(proto: PbDataType) -> DataType {
286        DataType::from(&proto)
287    }
288}
289
290impl From<DataTypeName> for PbTypeName {
291    fn from(type_name: DataTypeName) -> Self {
292        match type_name {
293            DataTypeName::Boolean => PbTypeName::Boolean,
294            DataTypeName::Int16 => PbTypeName::Int16,
295            DataTypeName::Int32 => PbTypeName::Int32,
296            DataTypeName::Int64 => PbTypeName::Int64,
297            DataTypeName::Serial => PbTypeName::Serial,
298            DataTypeName::Float32 => PbTypeName::Float,
299            DataTypeName::Float64 => PbTypeName::Double,
300            DataTypeName::Varchar => PbTypeName::Varchar,
301            DataTypeName::Date => PbTypeName::Date,
302            DataTypeName::Timestamp => PbTypeName::Timestamp,
303            DataTypeName::Timestamptz => PbTypeName::Timestamptz,
304            DataTypeName::Time => PbTypeName::Time,
305            DataTypeName::Interval => PbTypeName::Interval,
306            DataTypeName::Decimal => PbTypeName::Decimal,
307            DataTypeName::Bytea => PbTypeName::Bytea,
308            DataTypeName::Jsonb => PbTypeName::Jsonb,
309            DataTypeName::Struct => PbTypeName::Struct,
310            DataTypeName::List => PbTypeName::List,
311            DataTypeName::Int256 => PbTypeName::Int256,
312            DataTypeName::Map => PbTypeName::Map,
313            DataTypeName::Vector => PbTypeName::Vector,
314        }
315    }
316}
317
318/// Convenient macros to generate match arms for [`DataType`].
319pub mod data_types {
320    use super::DataType;
321
322    /// Numeric [`DataType`]s supported to be `offset` of `RANGE` frame.
323    #[macro_export]
324    macro_rules! _range_frame_numeric_data_types {
325        () => {
326            DataType::Int16
327                | DataType::Int32
328                | DataType::Int64
329                | DataType::Float32
330                | DataType::Float64
331                | DataType::Decimal
332        };
333    }
334    pub use _range_frame_numeric_data_types as range_frame_numeric;
335
336    /// Date/time [`DataType`]s supported to be `offset` of `RANGE` frame.
337    #[macro_export]
338    macro_rules! _range_frame_datetime_data_types {
339        () => {
340            DataType::Date
341                | DataType::Time
342                | DataType::Timestamp
343                | DataType::Timestamptz
344                | DataType::Interval
345        };
346    }
347    pub use _range_frame_datetime_data_types as range_frame_datetime;
348
349    /// Data types that do not have inner fields.
350    #[macro_export]
351    macro_rules! _simple_data_types {
352        () => {
353            DataType::Boolean
354                | DataType::Int16
355                | DataType::Int32
356                | DataType::Int64
357                | DataType::Float32
358                | DataType::Float64
359                | DataType::Decimal
360                | DataType::Date
361                | DataType::Varchar
362                | DataType::Time
363                | DataType::Timestamp
364                | DataType::Timestamptz
365                | DataType::Interval
366                | DataType::Bytea
367                | DataType::Jsonb
368                | DataType::Serial
369                | DataType::Int256
370        };
371    }
372    pub use _simple_data_types as simple;
373
374    /// Data types that have inner fields.
375    #[macro_export]
376    macro_rules! _composite_data_types {
377        () => {
378            DataType::Struct { .. } | DataType::List { .. } | DataType::Map { .. }
379        };
380    }
381    pub use _composite_data_types as composite;
382
383    /// Test that all data types are covered either by `simple!()` or `composite!()`.
384    fn _simple_composite_data_types_exhausted(dt: DataType) {
385        match dt {
386            simple!() => {}
387            DataType::Vector(_) => todo!("VECTOR_PLACEHOLDER"),
388            composite!() => {}
389        }
390    }
391}
392
393impl DataType {
394    pub fn create_array_builder(&self, capacity: usize) -> ArrayBuilderImpl {
395        use crate::array::*;
396
397        dispatch_data_types!(self, [B = ArrayBuilder], {
398            B::with_type(capacity, self.clone()).into()
399        })
400    }
401
402    pub fn type_name(&self) -> DataTypeName {
403        DataTypeName::from(self)
404    }
405
406    pub fn prost_type_name(&self) -> PbTypeName {
407        self.type_name().into()
408    }
409
410    pub fn to_protobuf(&self) -> PbDataType {
411        let mut pb = PbDataType {
412            type_name: self.prost_type_name() as i32,
413            is_nullable: true,
414            ..Default::default()
415        };
416        match self {
417            DataType::Struct(t) => {
418                if !t.is_unnamed() {
419                    // To be consistent with `From<&PbDataType>`,
420                    // we only set field names when it's a named struct.
421                    pb.field_names = t.names().map(|s| s.into()).collect();
422                }
423                pb.field_type = t.types().map(|f| f.to_protobuf()).collect();
424                if let Some(ids) = t.ids() {
425                    pb.field_ids = ids.map(|id| id.get_id()).collect();
426                }
427            }
428            DataType::List(datatype) => {
429                pb.field_type = vec![datatype.to_protobuf()];
430            }
431            DataType::Map(datatype) => {
432                // Same as List<Struct<K,V>>
433                pb.field_type = vec![datatype.clone().into_struct().to_protobuf()];
434            }
435            DataType::Vector(size) => {
436                pb.precision = *size as _;
437            }
438            DataType::Boolean
439            | DataType::Int16
440            | DataType::Int32
441            | DataType::Int64
442            | DataType::Float32
443            | DataType::Float64
444            | DataType::Decimal
445            | DataType::Date
446            | DataType::Varchar
447            | DataType::Time
448            | DataType::Timestamp
449            | DataType::Timestamptz
450            | DataType::Interval
451            | DataType::Bytea
452            | DataType::Jsonb
453            | DataType::Serial
454            | DataType::Int256 => (),
455        }
456        pb
457    }
458
459    pub fn is_numeric(&self) -> bool {
460        matches!(
461            self,
462            DataType::Int16
463                | DataType::Int32
464                | DataType::Int64
465                | DataType::Serial
466                | DataType::Float32
467                | DataType::Float64
468                | DataType::Decimal
469        )
470    }
471
472    /// Returns whether the data type does not have inner fields.
473    pub fn is_simple(&self) -> bool {
474        matches!(self, data_types::simple!())
475    }
476
477    /// Returns whether the data type has inner fields.
478    pub fn is_composite(&self) -> bool {
479        matches!(self, data_types::composite!())
480    }
481
482    pub fn is_array(&self) -> bool {
483        matches!(self, DataType::List(_))
484    }
485
486    pub fn is_struct(&self) -> bool {
487        matches!(self, DataType::Struct(_))
488    }
489
490    pub fn is_map(&self) -> bool {
491        matches!(self, DataType::Map(_))
492    }
493
494    pub fn is_int(&self) -> bool {
495        matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
496    }
497
498    /// Returns the output type of time window function on a given input type.
499    pub fn window_of(input: &DataType) -> Option<DataType> {
500        match input {
501            DataType::Timestamptz => Some(DataType::Timestamptz),
502            DataType::Timestamp | DataType::Date => Some(DataType::Timestamp),
503            _ => None,
504        }
505    }
506
507    pub fn as_struct(&self) -> &StructType {
508        match self {
509            DataType::Struct(t) => t,
510            t => panic!("expect struct type, got {t}"),
511        }
512    }
513
514    pub fn into_struct(self) -> StructType {
515        match self {
516            DataType::Struct(t) => t,
517            t => panic!("expect struct type, got {t}"),
518        }
519    }
520
521    pub fn as_map(&self) -> &MapType {
522        match self {
523            DataType::Map(t) => t,
524            t => panic!("expect map type, got {t}"),
525        }
526    }
527
528    pub fn into_map(self) -> MapType {
529        match self {
530            DataType::Map(t) => t,
531            t => panic!("expect map type, got {t}"),
532        }
533    }
534
535    /// Returns the inner element's type of a list type.
536    ///
537    /// # Panics
538    ///
539    /// Panics if the type is not a list type.
540    pub fn as_list_element_type(&self) -> &DataType {
541        match self {
542            DataType::List(t) => t,
543            t => panic!("expect list type, got {t}"),
544        }
545    }
546
547    pub fn into_list_element_type(self) -> DataType {
548        match self {
549            DataType::List(t) => *t,
550            t => panic!("expect list type, got {t}"),
551        }
552    }
553
554    /// Return a new type that removes the outer list, and get the innermost element type.
555    ///
556    /// Use [`DataType::as_list_element_type`] if you only want the element type of a list.
557    ///
558    /// ```
559    /// use risingwave_common::types::DataType::*;
560    /// assert_eq!(List(Box::new(Int32)).unnest_list(), &Int32);
561    /// assert_eq!(List(Box::new(List(Box::new(Int32)))).unnest_list(), &Int32);
562    /// ```
563    pub fn unnest_list(&self) -> &Self {
564        match self {
565            DataType::List(inner) => inner.unnest_list(),
566            _ => self,
567        }
568    }
569
570    /// Return the number of dimensions of this array/list type. Return `0` when this type is not an
571    /// array/list.
572    pub fn array_ndims(&self) -> usize {
573        let mut d = 0;
574        let mut t = self;
575        while let Self::List(inner) = t {
576            d += 1;
577            t = inner;
578        }
579        d
580    }
581
582    /// Compares the datatype with another, ignoring nested field names and ids.
583    pub fn equals_datatype(&self, other: &DataType) -> bool {
584        match (self, other) {
585            (Self::Struct(s1), Self::Struct(s2)) => s1.equals_datatype(s2),
586            (Self::List(d1), Self::List(d2)) => d1.equals_datatype(d2),
587            (Self::Map(m1), Self::Map(m2)) => {
588                m1.key().equals_datatype(m2.key()) && m1.value().equals_datatype(m2.value())
589            }
590            _ => self == other,
591        }
592    }
593
594    /// Whether a column with this data type can be altered to a new data type. This determines
595    /// the encoding of the column data.
596    ///
597    /// Returns...
598    /// - `None`, if the data type is simple or does not contain a struct type.
599    /// - `Some(true)`, if the data type contains a struct type with field ids ([`StructType::has_ids`]).
600    /// - `Some(false)`, if the data type contains a struct type without field ids.
601    pub fn can_alter(&self) -> Option<bool> {
602        match self {
603            data_types::simple!() => None,
604            DataType::Vector(_) => None,
605
606            DataType::Struct(struct_type) => {
607                // As long as we meet a struct type, we can check its `ids` field to determine if
608                // it can be altered.
609                let struct_can_alter = struct_type.has_ids();
610                // In debug build, we assert that once a struct type does (or does not) have ids,
611                // all its composite fields should have the same property.
612                if cfg!(debug_assertions) {
613                    for field in struct_type.types() {
614                        if let Some(field_can_alter) = field.can_alter() {
615                            assert_eq!(struct_can_alter, field_can_alter);
616                        }
617                    }
618                }
619                Some(struct_can_alter)
620            }
621
622            DataType::List(inner_type) => inner_type.can_alter(),
623            DataType::Map(map_type) => {
624                debug_assert!(
625                    map_type.key().is_simple(),
626                    "unexpected key type of map {map_type:?}"
627                );
628                map_type.value().can_alter()
629            }
630        }
631    }
632}
633
634impl From<StructType> for DataType {
635    fn from(value: StructType) -> Self {
636        Self::Struct(value)
637    }
638}
639
640impl From<DataType> for PbDataType {
641    fn from(data_type: DataType) -> Self {
642        data_type.to_protobuf()
643    }
644}
645
646mod private {
647    use super::*;
648
649    // Note: put pub trait inside a private mod just makes the name private,
650    // The trait methods will still be publicly available...
651    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
652
653    /// Common trait bounds of scalar and scalar reference types.
654    ///
655    /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
656    pub trait ScalarBounds<Impl> = Debug
657        + Send
658        + Sync
659        + Clone
660        + PartialEq
661        + Eq
662        // in default ascending order
663        + PartialOrd
664        + Ord
665        + TryFrom<Impl, Error = ArrayError>
666        // `ScalarImpl`/`ScalarRefImpl`
667        + Into<Impl>;
668}
669
670/// `Scalar` is a trait over all possible owned types in the evaluation
671/// framework.
672///
673/// `Scalar` is reciprocal to `ScalarRef`. Use `as_scalar_ref` to get a
674/// reference which has the same lifetime as `self`.
675pub trait Scalar: private::ScalarBounds<ScalarImpl> + 'static {
676    /// Type for reference of `Scalar`
677    type ScalarRefType<'a>: ScalarRef<'a, ScalarType = Self> + 'a
678    where
679        Self: 'a;
680
681    /// Get a reference to current scalar.
682    fn as_scalar_ref(&self) -> Self::ScalarRefType<'_>;
683
684    fn to_scalar_value(self) -> ScalarImpl {
685        self.into()
686    }
687}
688
689/// `ScalarRef` is a trait over all possible references in the evaluation
690/// framework.
691///
692/// `ScalarRef` is reciprocal to `Scalar`. Use `to_owned_scalar` to get an
693/// owned scalar.
694pub trait ScalarRef<'a>: private::ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
695    /// `ScalarType` is the owned type of current `ScalarRef`.
696    type ScalarType: Scalar<ScalarRefType<'a> = Self>;
697
698    /// Convert `ScalarRef` to an owned scalar.
699    fn to_owned_scalar(&self) -> Self::ScalarType;
700
701    /// A wrapped hash function to get the hash value for this scaler.
702    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H);
703}
704
705/// Define `ScalarImpl` and `ScalarRefImpl` with macro.
706macro_rules! scalar_impl_enum {
707    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
708        /// `ScalarImpl` embeds all possible scalars in the evaluation framework.
709        ///
710        /// Note: `ScalarImpl` doesn't contain all information of its `DataType`,
711        /// so sometimes they need to be used together.
712        /// e.g., for `Struct`, we don't have the field names in the value.
713        ///
714        /// See `for_all_variants` for the definition.
715        #[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
716        pub enum ScalarImpl {
717            $( $variant_name($scalar) ),*
718        }
719
720        /// `ScalarRefImpl` embeds all possible scalar references in the evaluation
721        /// framework.
722        ///
723        /// Note: `ScalarRefImpl` doesn't contain all information of its `DataType`,
724        /// so sometimes they need to be used together.
725        /// e.g., for `Struct`, we don't have the field names in the value.
726        ///
727        /// See `for_all_variants` for the definition.
728        #[derive(Debug, Copy, Clone, PartialEq, Eq)]
729        pub enum ScalarRefImpl<'scalar> {
730            $( $variant_name($scalar_ref) ),*
731        }
732    };
733}
734
735for_all_variants! { scalar_impl_enum }
736
737// We MUST NOT implement `Ord` for `ScalarImpl` because that will make `Datum` derive an incorrect
738// default `Ord`. To get a default-ordered `ScalarImpl`/`ScalarRefImpl`/`Datum`/`DatumRef`, you can
739// use `DefaultOrdered<T>`. If non-default order is needed, please refer to `sort_util`.
740impl !PartialOrd for ScalarImpl {}
741impl !PartialOrd for ScalarRefImpl<'_> {}
742
743pub type Datum = Option<ScalarImpl>;
744pub type DatumRef<'a> = Option<ScalarRefImpl<'a>>;
745
746/// This trait is to implement `to_owned_datum` for `Option<ScalarImpl>`
747pub trait ToOwnedDatum {
748    /// Convert the datum to an owned [`Datum`].
749    fn to_owned_datum(self) -> Datum;
750}
751
752impl ToOwnedDatum for &Datum {
753    #[inline(always)]
754    fn to_owned_datum(self) -> Datum {
755        self.clone()
756    }
757}
758
759impl<T: Into<ScalarImpl>> ToOwnedDatum for T {
760    #[inline(always)]
761    fn to_owned_datum(self) -> Datum {
762        Some(self.into())
763    }
764}
765
766impl<T: Into<ScalarImpl>> ToOwnedDatum for Option<T> {
767    #[inline(always)]
768    fn to_owned_datum(self) -> Datum {
769        self.map(Into::into)
770    }
771}
772
773#[auto_impl::auto_impl(&)]
774pub trait ToDatumRef: PartialEq + Eq + Debug {
775    /// Convert the datum to [`DatumRef`].
776    fn to_datum_ref(&self) -> DatumRef<'_>;
777}
778
779impl ToDatumRef for Datum {
780    #[inline(always)]
781    fn to_datum_ref(&self) -> DatumRef<'_> {
782        self.as_ref().map(|d| d.as_scalar_ref_impl())
783    }
784}
785impl ToDatumRef for Option<&ScalarImpl> {
786    #[inline(always)]
787    fn to_datum_ref(&self) -> DatumRef<'_> {
788        self.map(|d| d.as_scalar_ref_impl())
789    }
790}
791impl ToDatumRef for DatumRef<'_> {
792    #[inline(always)]
793    fn to_datum_ref(&self) -> DatumRef<'_> {
794        *self
795    }
796}
797
798/// To make sure there is `as_scalar_ref` for all scalar ref types.
799/// See <https://github.com/risingwavelabs/risingwave/pull/9977/files#r1208972881>
800///
801/// This is used by the expr macro.
802pub trait SelfAsScalarRef {
803    fn as_scalar_ref(&self) -> Self;
804}
805macro_rules! impl_self_as_scalar_ref {
806    ($($t:ty),*) => {
807        $(
808            impl SelfAsScalarRef for $t {
809                fn as_scalar_ref(&self) -> Self {
810                    *self
811                }
812            }
813        )*
814    };
815}
816impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
817
818/// `for_all_native_types` includes all native variants of our scalar types.
819///
820/// Specifically, it doesn't support u8/u16/u32/u64.
821#[macro_export]
822macro_rules! for_all_native_types {
823    ($macro:ident) => {
824        $macro! {
825            { i16, Int16, read_i16 },
826            { i32, Int32, read_i32 },
827            { i64, Int64, read_i64 },
828            { Serial, Serial, read_i64 },
829            { $crate::types::F32, Float32, read_f32 },
830            { $crate::types::F64, Float64, read_f64 }
831        }
832    };
833}
834
835/// `impl_convert` implements several conversions for `Scalar`.
836/// * `Scalar <-> ScalarImpl` with `From` and `TryFrom` trait.
837/// * `ScalarRef <-> ScalarRefImpl` with `From` and `TryFrom` trait.
838/// * `&ScalarImpl -> &Scalar` with `impl.as_int16()`.
839/// * `ScalarImpl -> Scalar` with `impl.into_int16()`.
840macro_rules! impl_convert {
841    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
842        $(
843            impl From<$scalar> for ScalarImpl {
844                fn from(val: $scalar) -> Self {
845                    ScalarImpl::$variant_name(val)
846                }
847            }
848
849            impl TryFrom<ScalarImpl> for $scalar {
850                type Error = ArrayError;
851
852                fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
853                    match val {
854                        ScalarImpl::$variant_name(scalar) => Ok(scalar),
855                        other_scalar => bail!("cannot convert ScalarImpl::{} to concrete type", other_scalar.get_ident()),
856                    }
857                }
858            }
859
860            impl <'scalar> From<$scalar_ref> for ScalarRefImpl<'scalar> {
861                fn from(val: $scalar_ref) -> Self {
862                    ScalarRefImpl::$variant_name(val)
863                }
864            }
865
866            impl <'scalar> TryFrom<ScalarRefImpl<'scalar>> for $scalar_ref {
867                type Error = ArrayError;
868
869                fn try_from(val: ScalarRefImpl<'scalar>) -> ArrayResult<Self> {
870                    match val {
871                        ScalarRefImpl::$variant_name(scalar_ref) => Ok(scalar_ref),
872                        other_scalar => bail!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name)),
873                    }
874                }
875            }
876
877            paste! {
878                impl ScalarImpl {
879                    /// # Panics
880                    /// If the scalar is not of the expected type.
881                    pub fn [<as_ $suffix_name>](&self) -> &$scalar {
882                        match self {
883                            Self::$variant_name(scalar) => scalar,
884                            other_scalar => panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
885                        }
886                    }
887
888                    /// # Panics
889                    /// If the scalar is not of the expected type.
890                    pub fn [<into_ $suffix_name>](self) -> $scalar {
891                        match self {
892                            Self::$variant_name(scalar) => scalar,
893                            other_scalar =>  panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
894                        }
895                    }
896                }
897
898                impl <'scalar> ScalarRefImpl<'scalar> {
899                    /// # Panics
900                    /// If the scalar is not of the expected type.
901                    pub fn [<into_ $suffix_name>](self) -> $scalar_ref {
902                        match self {
903                            Self::$variant_name(inner) => inner,
904                            other_scalar => panic!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
905                        }
906                    }
907                }
908            }
909        )*
910    };
911}
912
913for_all_variants! { impl_convert }
914
915// Implement `From<raw float>` for `ScalarImpl::Float` as a sugar.
916impl From<f32> for ScalarImpl {
917    fn from(f: f32) -> Self {
918        Self::Float32(f.into())
919    }
920}
921impl From<f64> for ScalarImpl {
922    fn from(f: f64) -> Self {
923        Self::Float64(f.into())
924    }
925}
926
927// Implement `From<string like>` for `ScalarImpl::Utf8` as a sugar.
928impl From<String> for ScalarImpl {
929    fn from(s: String) -> Self {
930        Self::Utf8(s.into_boxed_str())
931    }
932}
933impl From<&str> for ScalarImpl {
934    fn from(s: &str) -> Self {
935        Self::Utf8(s.into())
936    }
937}
938impl From<&String> for ScalarImpl {
939    fn from(s: &String) -> Self {
940        Self::Utf8(s.as_str().into())
941    }
942}
943impl TryFrom<ScalarImpl> for String {
944    type Error = ArrayError;
945
946    fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
947        match val {
948            ScalarImpl::Utf8(s) => Ok(s.into()),
949            other_scalar => bail!(
950                "cannot convert ScalarImpl::{} to concrete type",
951                other_scalar.get_ident()
952            ),
953        }
954    }
955}
956
957impl From<char> for ScalarImpl {
958    fn from(c: char) -> Self {
959        Self::Utf8(c.to_string().into())
960    }
961}
962
963impl From<&[u8]> for ScalarImpl {
964    fn from(s: &[u8]) -> Self {
965        Self::Bytea(s.into())
966    }
967}
968
969impl From<JsonbRef<'_>> for ScalarImpl {
970    fn from(jsonb: JsonbRef<'_>) -> Self {
971        Self::Jsonb(jsonb.to_owned_scalar())
972    }
973}
974
975impl<T: PrimitiveArrayItemType> From<Vec<T>> for ScalarImpl {
976    fn from(v: Vec<T>) -> Self {
977        Self::List(v.into_iter().collect())
978    }
979}
980
981impl<T: PrimitiveArrayItemType> From<Vec<Option<T>>> for ScalarImpl {
982    fn from(v: Vec<Option<T>>) -> Self {
983        Self::List(v.into_iter().collect())
984    }
985}
986
987impl From<Vec<String>> for ScalarImpl {
988    fn from(v: Vec<String>) -> Self {
989        Self::List(v.iter().map(|s| s.as_str()).collect())
990    }
991}
992
993impl From<Vec<u8>> for ScalarImpl {
994    fn from(v: Vec<u8>) -> Self {
995        Self::Bytea(v.into())
996    }
997}
998
999impl From<Bytes> for ScalarImpl {
1000    fn from(v: Bytes) -> Self {
1001        Self::Bytea(v.as_ref().into())
1002    }
1003}
1004
1005impl From<ListRef<'_>> for ScalarImpl {
1006    fn from(list: ListRef<'_>) -> Self {
1007        Self::List(list.to_owned_scalar())
1008    }
1009}
1010
1011impl ScalarImpl {
1012    /// Creates a scalar from pgwire "BINARY" format.
1013    ///
1014    /// The counterpart of [`to_binary::ToBinary`].
1015    pub fn from_binary(bytes: &Bytes, data_type: &DataType) -> Result<Self, BoxedError> {
1016        let res = match data_type {
1017            DataType::Varchar => Self::Utf8(String::from_sql(&Type::VARCHAR, bytes)?.into()),
1018            DataType::Bytea => Self::Bytea(Vec::<u8>::from_sql(&Type::BYTEA, bytes)?.into()),
1019            DataType::Boolean => Self::Bool(bool::from_sql(&Type::BOOL, bytes)?),
1020            DataType::Int16 => Self::Int16(i16::from_sql(&Type::INT2, bytes)?),
1021            DataType::Int32 => Self::Int32(i32::from_sql(&Type::INT4, bytes)?),
1022            DataType::Int64 => Self::Int64(i64::from_sql(&Type::INT8, bytes)?),
1023            DataType::Serial => Self::Serial(Serial::from(i64::from_sql(&Type::INT8, bytes)?)),
1024            DataType::Float32 => Self::Float32(f32::from_sql(&Type::FLOAT4, bytes)?.into()),
1025            DataType::Float64 => Self::Float64(f64::from_sql(&Type::FLOAT8, bytes)?.into()),
1026            DataType::Decimal => {
1027                Self::Decimal(rust_decimal::Decimal::from_sql(&Type::NUMERIC, bytes)?.into())
1028            }
1029            DataType::Date => Self::Date(chrono::NaiveDate::from_sql(&Type::DATE, bytes)?.into()),
1030            DataType::Time => Self::Time(chrono::NaiveTime::from_sql(&Type::TIME, bytes)?.into()),
1031            DataType::Timestamp => {
1032                Self::Timestamp(chrono::NaiveDateTime::from_sql(&Type::TIMESTAMP, bytes)?.into())
1033            }
1034            DataType::Timestamptz => Self::Timestamptz(
1035                chrono::DateTime::<chrono::Utc>::from_sql(&Type::TIMESTAMPTZ, bytes)?.into(),
1036            ),
1037            DataType::Interval => Self::Interval(Interval::from_sql(&Type::INTERVAL, bytes)?),
1038            DataType::Jsonb => Self::Jsonb(
1039                JsonbVal::value_deserialize(bytes)
1040                    .ok_or_else(|| "invalid value of Jsonb".to_owned())?,
1041            ),
1042            DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
1043            DataType::Vector(_) => todo!("VECTOR_PLACEHOLDER"),
1044            DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
1045                return Err(format!("unsupported data type: {}", data_type).into());
1046            }
1047        };
1048        Ok(res)
1049    }
1050
1051    /// Creates a scalar from pgwire "TEXT" format.
1052    ///
1053    /// The counterpart of [`ToText`].
1054    pub fn from_text(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1055        Ok(match data_type {
1056            DataType::Boolean => str_to_bool(s)?.into(),
1057            DataType::Int16 => i16::from_str(s)?.into(),
1058            DataType::Int32 => i32::from_str(s)?.into(),
1059            DataType::Int64 => i64::from_str(s)?.into(),
1060            DataType::Int256 => Int256::from_str(s)?.into(),
1061            DataType::Serial => Serial::from(i64::from_str(s)?).into(),
1062            DataType::Decimal => Decimal::from_str(s)?.into(),
1063            DataType::Float32 => F32::from_str(s)?.into(),
1064            DataType::Float64 => F64::from_str(s)?.into(),
1065            DataType::Varchar => s.into(),
1066            DataType::Date => Date::from_str(s)?.into(),
1067            DataType::Timestamp => Timestamp::from_str(s)?.into(),
1068            // We only handle the case with timezone here, and leave the implicit session timezone case
1069            // for later phase.
1070            DataType::Timestamptz => Timestamptz::from_str(s)?.into(),
1071            DataType::Time => Time::from_str(s)?.into(),
1072            DataType::Interval => Interval::from_str(s)?.into(),
1073            DataType::List(_) => ListValue::from_str(s, data_type)?.into(),
1074            DataType::Struct(st) => StructValue::from_str(s, st)?.into(),
1075            DataType::Jsonb => JsonbVal::from_str(s)?.into(),
1076            DataType::Bytea => str_to_bytea(s)?.into(),
1077            DataType::Vector(size) => VectorVal::from_text(s, *size)?.into(),
1078            DataType::Map(_m) => return Err("map from text is not supported".into()),
1079        })
1080    }
1081
1082    pub fn from_text_for_test(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1083        Ok(match data_type {
1084            DataType::Map(map_type) => MapValue::from_str_for_test(s, map_type)?.into(),
1085            _ => ScalarImpl::from_text(s, data_type)?,
1086        })
1087    }
1088}
1089
1090impl From<ScalarRefImpl<'_>> for ScalarImpl {
1091    fn from(scalar_ref: ScalarRefImpl<'_>) -> Self {
1092        scalar_ref.into_scalar_impl()
1093    }
1094}
1095
1096impl<'a> From<&'a ScalarImpl> for ScalarRefImpl<'a> {
1097    fn from(scalar: &'a ScalarImpl) -> Self {
1098        scalar.as_scalar_ref_impl()
1099    }
1100}
1101
1102impl ScalarImpl {
1103    /// Converts [`ScalarImpl`] to [`ScalarRefImpl`]
1104    pub fn as_scalar_ref_impl(&self) -> ScalarRefImpl<'_> {
1105        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().into() })
1106    }
1107}
1108
1109impl ScalarRefImpl<'_> {
1110    /// Converts [`ScalarRefImpl`] to [`ScalarImpl`]
1111    pub fn into_scalar_impl(self) -> ScalarImpl {
1112        dispatch_scalar_ref_variants!(self, inner, { inner.to_owned_scalar().into() })
1113    }
1114}
1115
1116impl Hash for ScalarImpl {
1117    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1118        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().hash_scalar(state) })
1119    }
1120}
1121
1122impl Hash for ScalarRefImpl<'_> {
1123    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1124        dispatch_scalar_ref_variants!(self, inner, { inner.hash_scalar(state) })
1125    }
1126}
1127
1128/// Feeds the raw scalar reference of `datum` to the given `state`, which should behave the same
1129/// as [`crate::array::Array::hash_at`], where NULL value will be carefully handled.
1130///
1131/// **FIXME**: the result of this function might be different from [`std::hash::Hash`] due to the
1132/// type alias of `DatumRef = Option<_>`, we should manually implement [`std::hash::Hash`] for
1133/// [`DatumRef`] in the future when it becomes a newtype. (#477)
1134#[inline(always)]
1135pub fn hash_datum(datum: impl ToDatumRef, state: &mut impl std::hash::Hasher) {
1136    match datum.to_datum_ref() {
1137        Some(scalar_ref) => scalar_ref.hash(state),
1138        None => NULL_VAL_FOR_HASH.hash(state),
1139    }
1140}
1141
1142impl ScalarRefImpl<'_> {
1143    pub fn binary_format(&self, data_type: &DataType) -> to_binary::Result<Bytes> {
1144        use self::to_binary::ToBinary;
1145        self.to_binary_with_type(data_type)
1146    }
1147
1148    pub fn text_format(&self, data_type: &DataType) -> String {
1149        self.to_text_with_type(data_type)
1150    }
1151
1152    /// Serialize the scalar into the `memcomparable` format.
1153    pub fn serialize(
1154        &self,
1155        ser: &mut memcomparable::Serializer<impl BufMut>,
1156    ) -> memcomparable::Result<()> {
1157        match self {
1158            Self::Int16(v) => v.serialize(ser)?,
1159            Self::Int32(v) => v.serialize(ser)?,
1160            Self::Int64(v) => v.serialize(ser)?,
1161            Self::Serial(v) => v.serialize(ser)?,
1162            Self::Float32(v) => v.serialize(ser)?,
1163            Self::Float64(v) => v.serialize(ser)?,
1164            Self::Utf8(v) => v.serialize(ser)?,
1165            Self::Bytea(v) => ser.serialize_bytes(v)?,
1166            Self::Bool(v) => v.serialize(ser)?,
1167            Self::Decimal(v) => ser.serialize_decimal((*v).into())?,
1168            Self::Interval(v) => v.serialize(ser)?,
1169            Self::Date(v) => v.0.num_days_from_ce().serialize(ser)?,
1170            Self::Timestamp(v) => {
1171                v.0.and_utc().timestamp().serialize(&mut *ser)?;
1172                v.0.and_utc().timestamp_subsec_nanos().serialize(ser)?;
1173            }
1174            Self::Timestamptz(v) => v.serialize(ser)?,
1175            Self::Time(v) => {
1176                v.0.num_seconds_from_midnight().serialize(&mut *ser)?;
1177                v.0.nanosecond().serialize(ser)?;
1178            }
1179            Self::Int256(v) => v.memcmp_serialize(ser)?,
1180            Self::Jsonb(v) => v.memcmp_serialize(ser)?,
1181            Self::Struct(v) => v.memcmp_serialize(ser)?,
1182            Self::List(v) => v.memcmp_serialize(ser)?,
1183            Self::Map(v) => v.memcmp_serialize(ser)?,
1184            Self::Vector(_) => todo!("VECTOR_PLACEHOLDER"),
1185        };
1186        Ok(())
1187    }
1188}
1189
1190impl ScalarImpl {
1191    /// Serialize the scalar into the `memcomparable` format.
1192    pub fn serialize(
1193        &self,
1194        ser: &mut memcomparable::Serializer<impl BufMut>,
1195    ) -> memcomparable::Result<()> {
1196        self.as_scalar_ref_impl().serialize(ser)
1197    }
1198
1199    /// Deserialize the scalar from the `memcomparable` format.
1200    pub fn deserialize(
1201        ty: &DataType,
1202        de: &mut memcomparable::Deserializer<impl Buf>,
1203    ) -> memcomparable::Result<Self> {
1204        use DataType as Ty;
1205        Ok(match ty {
1206            Ty::Int16 => Self::Int16(i16::deserialize(de)?),
1207            Ty::Int32 => Self::Int32(i32::deserialize(de)?),
1208            Ty::Int64 => Self::Int64(i64::deserialize(de)?),
1209            Ty::Int256 => Self::Int256(Int256::memcmp_deserialize(de)?),
1210            Ty::Serial => Self::Serial(Serial::from(i64::deserialize(de)?)),
1211            Ty::Float32 => Self::Float32(f32::deserialize(de)?.into()),
1212            Ty::Float64 => Self::Float64(f64::deserialize(de)?.into()),
1213            Ty::Varchar => Self::Utf8(Box::<str>::deserialize(de)?),
1214            Ty::Bytea => Self::Bytea(serde_bytes::ByteBuf::deserialize(de)?.into_vec().into()),
1215            Ty::Boolean => Self::Bool(bool::deserialize(de)?),
1216            Ty::Decimal => Self::Decimal(de.deserialize_decimal()?.into()),
1217            Ty::Interval => Self::Interval(Interval::deserialize(de)?),
1218            Ty::Time => Self::Time({
1219                let secs = u32::deserialize(&mut *de)?;
1220                let nano = u32::deserialize(de)?;
1221                Time::with_secs_nano(secs, nano)
1222                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1223            }),
1224            Ty::Timestamp => Self::Timestamp({
1225                let secs = i64::deserialize(&mut *de)?;
1226                let nsecs = u32::deserialize(de)?;
1227                Timestamp::with_secs_nsecs(secs, nsecs)
1228                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1229            }),
1230            Ty::Timestamptz => Self::Timestamptz(Timestamptz::deserialize(de)?),
1231            Ty::Date => Self::Date({
1232                let days = i32::deserialize(de)?;
1233                Date::with_days_since_ce(days)
1234                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1235            }),
1236            Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
1237            Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
1238            Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1239            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1240            Ty::Vector(_) => todo!("VECTOR_PLACEHOLDER"),
1241        })
1242    }
1243
1244    pub fn as_integral(&self) -> i64 {
1245        match self {
1246            Self::Int16(v) => *v as i64,
1247            Self::Int32(v) => *v as i64,
1248            Self::Int64(v) => *v,
1249            _ => panic!(
1250                "Can't convert ScalarImpl::{} to a integral",
1251                self.get_ident()
1252            ),
1253        }
1254    }
1255}
1256
1257/// Returns whether the `literal` matches the `data_type`.
1258pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
1259    match literal {
1260        Some(scalar) => {
1261            macro_rules! matches {
1262                ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty }),*) => {
1263                    match (data_type, scalar) {
1264                        $(
1265                            (DataType::$data_type { .. }, ScalarImpl::$variant_name(_)) => true,
1266                            (DataType::$data_type { .. }, _) => false, // so that we won't forget to match a new logical type
1267                        )*
1268                    }
1269                }
1270            }
1271            for_all_variants! { matches }
1272        }
1273        None => true,
1274    }
1275}
1276
1277#[cfg(test)]
1278mod tests {
1279    use std::hash::{BuildHasher, Hasher};
1280
1281    use strum::IntoEnumIterator;
1282
1283    use super::*;
1284    use crate::util::hash_util::Crc32FastBuilder;
1285
1286    #[test]
1287    fn test_size() {
1288        use static_assertions::const_assert_eq;
1289
1290        use crate::array::*;
1291
1292        macro_rules! assert_item_size_eq {
1293            ($array:ty, $size:literal) => {
1294                const_assert_eq!(std::mem::size_of::<<$array as Array>::OwnedItem>(), $size);
1295            };
1296        }
1297
1298        assert_item_size_eq!(StructArray, 16); // Box<[Datum]>
1299        assert_item_size_eq!(ListArray, 8); // Box<ArrayImpl>
1300        assert_item_size_eq!(Utf8Array, 16); // Box<str>
1301        assert_item_size_eq!(IntervalArray, 16);
1302        assert_item_size_eq!(TimestampArray, 12);
1303
1304        // TODO: try to reduce the memory usage of `Decimal`, `ScalarImpl` and `Datum`.
1305        assert_item_size_eq!(DecimalArray, 20);
1306
1307        const_assert_eq!(std::mem::size_of::<ScalarImpl>(), 24);
1308        const_assert_eq!(std::mem::size_of::<ScalarRefImpl<'_>>(), 24);
1309        const_assert_eq!(std::mem::size_of::<Datum>(), 24);
1310        const_assert_eq!(std::mem::size_of::<StructType>(), 8);
1311        const_assert_eq!(std::mem::size_of::<DataType>(), 16);
1312    }
1313
1314    #[test]
1315    fn test_data_type_display() {
1316        let d: DataType =
1317            StructType::new(vec![("i", DataType::Int32), ("j", DataType::Varchar)]).into();
1318        assert_eq!(
1319            format!("{}", d),
1320            "struct<i integer, j character varying>".to_owned()
1321        );
1322    }
1323
1324    #[test]
1325    fn test_hash_implementation() {
1326        fn test(datum: Datum, data_type: DataType) {
1327            assert!(literal_type_match(&data_type, datum.as_ref()));
1328
1329            let mut builder = data_type.create_array_builder(6);
1330            for _ in 0..3 {
1331                builder.append_null();
1332                builder.append(&datum);
1333            }
1334            let array = builder.finish();
1335
1336            let hash_from_array = {
1337                let mut state = Crc32FastBuilder.build_hasher();
1338                array.hash_at(3, &mut state);
1339                state.finish()
1340            };
1341
1342            let hash_from_datum = {
1343                let mut state = Crc32FastBuilder.build_hasher();
1344                hash_datum(&datum, &mut state);
1345                state.finish()
1346            };
1347
1348            let hash_from_datum_ref = {
1349                let mut state = Crc32FastBuilder.build_hasher();
1350                hash_datum(datum.to_datum_ref(), &mut state);
1351                state.finish()
1352            };
1353
1354            assert_eq!(hash_from_array, hash_from_datum);
1355            assert_eq!(hash_from_datum, hash_from_datum_ref);
1356        }
1357
1358        for name in DataTypeName::iter() {
1359            let (scalar, data_type) = match name {
1360                DataTypeName::Boolean => (ScalarImpl::Bool(true), DataType::Boolean),
1361                DataTypeName::Int16 => (ScalarImpl::Int16(233), DataType::Int16),
1362                DataTypeName::Int32 => (ScalarImpl::Int32(233333), DataType::Int32),
1363                DataTypeName::Int64 => (ScalarImpl::Int64(233333333333), DataType::Int64),
1364                DataTypeName::Int256 => (
1365                    ScalarImpl::Int256(233333333333_i64.into()),
1366                    DataType::Int256,
1367                ),
1368                DataTypeName::Serial => (ScalarImpl::Serial(233333333333.into()), DataType::Serial),
1369                DataTypeName::Float32 => (ScalarImpl::Float32(23.33.into()), DataType::Float32),
1370                DataTypeName::Float64 => (
1371                    ScalarImpl::Float64(23.333333333333.into()),
1372                    DataType::Float64,
1373                ),
1374                DataTypeName::Decimal => (
1375                    ScalarImpl::Decimal("233.33".parse().unwrap()),
1376                    DataType::Decimal,
1377                ),
1378                DataTypeName::Date => (
1379                    ScalarImpl::Date(Date::from_ymd_uncheck(2333, 3, 3)),
1380                    DataType::Date,
1381                ),
1382                DataTypeName::Varchar => (ScalarImpl::Utf8("233".into()), DataType::Varchar),
1383                DataTypeName::Bytea => (
1384                    ScalarImpl::Bytea("\\x233".as_bytes().into()),
1385                    DataType::Bytea,
1386                ),
1387                DataTypeName::Time => (
1388                    ScalarImpl::Time(Time::from_hms_uncheck(2, 3, 3)),
1389                    DataType::Time,
1390                ),
1391                DataTypeName::Timestamp => (
1392                    ScalarImpl::Timestamp(Timestamp::from_timestamp_uncheck(23333333, 2333)),
1393                    DataType::Timestamp,
1394                ),
1395                DataTypeName::Timestamptz => (
1396                    ScalarImpl::Timestamptz(Timestamptz::from_micros(233333333)),
1397                    DataType::Timestamptz,
1398                ),
1399                DataTypeName::Interval => (
1400                    ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
1401                    DataType::Interval,
1402                ),
1403                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
1404                DataTypeName::Struct => (
1405                    ScalarImpl::Struct(StructValue::new(vec![
1406                        ScalarImpl::Int64(233).into(),
1407                        ScalarImpl::Float64(23.33.into()).into(),
1408                    ])),
1409                    DataType::Struct(StructType::new(vec![
1410                        ("a", DataType::Int64),
1411                        ("b", DataType::Float64),
1412                    ])),
1413                ),
1414                DataTypeName::List => (
1415                    ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
1416                    DataType::List(Box::new(DataType::Int64)),
1417                ),
1418                DataTypeName::Map => {
1419                    // map is not hashable
1420                    continue;
1421                }
1422                DataTypeName::Vector => continue, // todo!("VECTOR_PLACEHOLDER"),
1423            };
1424
1425            test(Some(scalar), data_type.clone());
1426            test(None, data_type);
1427        }
1428    }
1429
1430    #[test]
1431    fn test_data_type_from_str() {
1432        assert_eq!(DataType::from_str("bool").unwrap(), DataType::Boolean);
1433        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
1434        assert_eq!(DataType::from_str("BOOL").unwrap(), DataType::Boolean);
1435        assert_eq!(DataType::from_str("BOOLEAN").unwrap(), DataType::Boolean);
1436
1437        assert_eq!(DataType::from_str("int2").unwrap(), DataType::Int16);
1438        assert_eq!(DataType::from_str("smallint").unwrap(), DataType::Int16);
1439        assert_eq!(DataType::from_str("INT2").unwrap(), DataType::Int16);
1440        assert_eq!(DataType::from_str("SMALLINT").unwrap(), DataType::Int16);
1441
1442        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1443        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Int32);
1444        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1445        assert_eq!(DataType::from_str("INT4").unwrap(), DataType::Int32);
1446        assert_eq!(DataType::from_str("INTEGER").unwrap(), DataType::Int32);
1447        assert_eq!(DataType::from_str("INT").unwrap(), DataType::Int32);
1448
1449        assert_eq!(DataType::from_str("int8").unwrap(), DataType::Int64);
1450        assert_eq!(DataType::from_str("bigint").unwrap(), DataType::Int64);
1451        assert_eq!(DataType::from_str("INT8").unwrap(), DataType::Int64);
1452        assert_eq!(DataType::from_str("BIGINT").unwrap(), DataType::Int64);
1453
1454        assert_eq!(DataType::from_str("rw_int256").unwrap(), DataType::Int256);
1455        assert_eq!(DataType::from_str("RW_INT256").unwrap(), DataType::Int256);
1456
1457        assert_eq!(DataType::from_str("float4").unwrap(), DataType::Float32);
1458        assert_eq!(DataType::from_str("real").unwrap(), DataType::Float32);
1459        assert_eq!(DataType::from_str("FLOAT4").unwrap(), DataType::Float32);
1460        assert_eq!(DataType::from_str("REAL").unwrap(), DataType::Float32);
1461
1462        assert_eq!(DataType::from_str("float8").unwrap(), DataType::Float64);
1463        assert_eq!(
1464            DataType::from_str("double precision").unwrap(),
1465            DataType::Float64
1466        );
1467        assert_eq!(DataType::from_str("FLOAT8").unwrap(), DataType::Float64);
1468        assert_eq!(
1469            DataType::from_str("DOUBLE PRECISION").unwrap(),
1470            DataType::Float64
1471        );
1472
1473        assert_eq!(DataType::from_str("decimal").unwrap(), DataType::Decimal);
1474        assert_eq!(DataType::from_str("DECIMAL").unwrap(), DataType::Decimal);
1475        assert_eq!(DataType::from_str("numeric").unwrap(), DataType::Decimal);
1476        assert_eq!(DataType::from_str("NUMERIC").unwrap(), DataType::Decimal);
1477
1478        assert_eq!(DataType::from_str("date").unwrap(), DataType::Date);
1479        assert_eq!(DataType::from_str("DATE").unwrap(), DataType::Date);
1480
1481        assert_eq!(DataType::from_str("varchar").unwrap(), DataType::Varchar);
1482        assert_eq!(DataType::from_str("VARCHAR").unwrap(), DataType::Varchar);
1483
1484        assert_eq!(DataType::from_str("time").unwrap(), DataType::Time);
1485        assert_eq!(
1486            DataType::from_str("time without time zone").unwrap(),
1487            DataType::Time
1488        );
1489        assert_eq!(DataType::from_str("TIME").unwrap(), DataType::Time);
1490        assert_eq!(
1491            DataType::from_str("TIME WITHOUT TIME ZONE").unwrap(),
1492            DataType::Time
1493        );
1494
1495        assert_eq!(
1496            DataType::from_str("timestamp").unwrap(),
1497            DataType::Timestamp
1498        );
1499        assert_eq!(
1500            DataType::from_str("timestamp without time zone").unwrap(),
1501            DataType::Timestamp
1502        );
1503        assert_eq!(
1504            DataType::from_str("TIMESTAMP").unwrap(),
1505            DataType::Timestamp
1506        );
1507        assert_eq!(
1508            DataType::from_str("TIMESTAMP WITHOUT TIME ZONE").unwrap(),
1509            DataType::Timestamp
1510        );
1511
1512        assert_eq!(
1513            DataType::from_str("timestamptz").unwrap(),
1514            DataType::Timestamptz
1515        );
1516        assert_eq!(
1517            DataType::from_str("timestamp with time zone").unwrap(),
1518            DataType::Timestamptz
1519        );
1520        assert_eq!(
1521            DataType::from_str("TIMESTAMPTZ").unwrap(),
1522            DataType::Timestamptz
1523        );
1524        assert_eq!(
1525            DataType::from_str("TIMESTAMP WITH TIME ZONE").unwrap(),
1526            DataType::Timestamptz
1527        );
1528
1529        assert_eq!(DataType::from_str("interval").unwrap(), DataType::Interval);
1530        assert_eq!(DataType::from_str("INTERVAL").unwrap(), DataType::Interval);
1531
1532        assert_eq!(
1533            DataType::from_str("int2[]").unwrap(),
1534            DataType::List(Box::new(DataType::Int16))
1535        );
1536        assert_eq!(
1537            DataType::from_str("int[]").unwrap(),
1538            DataType::List(Box::new(DataType::Int32))
1539        );
1540        assert_eq!(
1541            DataType::from_str("int8[]").unwrap(),
1542            DataType::List(Box::new(DataType::Int64))
1543        );
1544        assert_eq!(
1545            DataType::from_str("float4[]").unwrap(),
1546            DataType::List(Box::new(DataType::Float32))
1547        );
1548        assert_eq!(
1549            DataType::from_str("float8[]").unwrap(),
1550            DataType::List(Box::new(DataType::Float64))
1551        );
1552        assert_eq!(
1553            DataType::from_str("decimal[]").unwrap(),
1554            DataType::List(Box::new(DataType::Decimal))
1555        );
1556        assert_eq!(
1557            DataType::from_str("varchar[]").unwrap(),
1558            DataType::List(Box::new(DataType::Varchar))
1559        );
1560        assert_eq!(
1561            DataType::from_str("date[]").unwrap(),
1562            DataType::List(Box::new(DataType::Date))
1563        );
1564        assert_eq!(
1565            DataType::from_str("time[]").unwrap(),
1566            DataType::List(Box::new(DataType::Time))
1567        );
1568        assert_eq!(
1569            DataType::from_str("timestamp[]").unwrap(),
1570            DataType::List(Box::new(DataType::Timestamp))
1571        );
1572        assert_eq!(
1573            DataType::from_str("timestamptz[]").unwrap(),
1574            DataType::List(Box::new(DataType::Timestamptz))
1575        );
1576        assert_eq!(
1577            DataType::from_str("interval[]").unwrap(),
1578            DataType::List(Box::new(DataType::Interval))
1579        );
1580
1581        assert_eq!(
1582            DataType::from_str("record").unwrap(),
1583            DataType::Struct(StructType::unnamed(vec![]))
1584        );
1585        assert_eq!(
1586            DataType::from_str("struct<a int4, b varchar>").unwrap(),
1587            DataType::Struct(StructType::new(vec![
1588                ("a", DataType::Int32),
1589                ("b", DataType::Varchar)
1590            ]))
1591        );
1592    }
1593
1594    #[test]
1595    fn test_can_alter() {
1596        let cannots = [
1597            (DataType::Int32, None),
1598            (DataType::List(DataType::Int32.into()), None),
1599            (
1600                MapType::from_kv(DataType::Varchar, DataType::List(DataType::Int32.into())).into(),
1601                None,
1602            ),
1603            (
1604                StructType::new([("a", DataType::Int32)]).into(),
1605                Some(false),
1606            ),
1607            (
1608                MapType::from_kv(
1609                    DataType::Varchar,
1610                    StructType::new([("a", DataType::Int32)]).into(),
1611                )
1612                .into(),
1613                Some(false),
1614            ),
1615        ];
1616        for (cannot, why) in cannots {
1617            assert_eq!(cannot.can_alter(), why, "{cannot:?}");
1618        }
1619
1620        let cans = [
1621            StructType::new([
1622                ("a", DataType::Int32),
1623                ("b", DataType::List(DataType::Int32.into())),
1624            ])
1625            .with_ids([ColumnId::new(1), ColumnId::new(2)])
1626            .into(),
1627            DataType::List(Box::new(DataType::Struct(
1628                StructType::new([("a", DataType::Int32)]).with_ids([ColumnId::new(1)]),
1629            ))),
1630            MapType::from_kv(
1631                DataType::Varchar,
1632                StructType::new([("a", DataType::Int32)])
1633                    .with_ids([ColumnId::new(1)])
1634                    .into(),
1635            )
1636            .into(),
1637        ];
1638        for can in cans {
1639            assert_eq!(can.can_alter(), Some(true), "{can:?}");
1640        }
1641    }
1642}