risingwave_common/types/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Data types in RisingWave.
16
17// NOTE: When adding or modifying data types, remember to update the type matrix in
18// src/expr/macro/src/types.rs
19
20use std::fmt::Debug;
21use std::hash::Hash;
22use std::str::FromStr;
23
24use bytes::{Buf, BufMut, Bytes};
25use chrono::{Datelike, Timelike};
26use itertools::Itertools;
27use parse_display::{Display, FromStr};
28use paste::paste;
29use postgres_types::{FromSql, IsNull, ToSql, Type};
30use risingwave_common_estimate_size::{EstimateSize, ZeroHeapSize};
31use risingwave_pb::data::PbDataType;
32use risingwave_pb::data::data_type::PbTypeName;
33use rw_iter_util::ZipEqFast as _;
34use serde::{Deserialize, Serialize, Serializer};
35use strum_macros::EnumDiscriminants;
36use thiserror_ext::AsReport;
37
38use crate::array::{
39    ArrayBuilderImpl, ArrayError, ArrayResult, NULL_VAL_FOR_HASH, PrimitiveArrayItemType,
40};
41// Complex type's value is based on the array
42pub use crate::array::{
43    ListRef, ListValue, MapRef, MapValue, StructRef, StructValue, VectorRef, VectorVal,
44};
45use crate::cast::{str_to_bool, str_to_bytea};
46use crate::catalog::ColumnId;
47use crate::error::BoxedError;
48use crate::{
49    dispatch_data_types, dispatch_scalar_ref_variants, dispatch_scalar_variants, for_all_variants,
50};
51
52mod cow;
53mod datetime;
54mod decimal;
55mod fields;
56mod from_sql;
57mod interval;
58mod jsonb;
59mod list_type;
60mod macros;
61mod map_type;
62mod native_type;
63mod num256;
64mod ops;
65mod ordered;
66mod ordered_float;
67pub mod postgres_type;
68mod scalar_impl;
69mod sentinel;
70mod serial;
71mod struct_type;
72mod successor;
73mod timestamptz;
74mod to_binary;
75mod to_sql;
76mod to_text;
77mod with_data_type;
78
79pub use fields::Fields;
80pub use risingwave_fields_derive::Fields;
81
82pub use self::cow::DatumCow;
83pub use self::datetime::{Date, Time, Timestamp};
84pub use self::decimal::{Decimal, PowError as DecimalPowError};
85pub use self::interval::{DateTimeField, Interval, IntervalDisplay, test_utils};
86pub use self::jsonb::{JsonbRef, JsonbVal};
87pub use self::list_type::ListType;
88pub use self::map_type::MapType;
89pub use self::native_type::*;
90pub use self::num256::{Int256, Int256Ref};
91pub use self::ops::{CheckedAdd, IsNegative};
92pub use self::ordered::*;
93pub use self::ordered_float::{FloatExt, IntoOrdered};
94pub use self::scalar_impl::*;
95pub use self::sentinel::Sentinelled;
96pub use self::serial::Serial;
97pub use self::struct_type::StructType;
98pub use self::successor::Successor;
99pub use self::timestamptz::*;
100pub use self::to_text::ToText;
101pub use self::with_data_type::WithDataType;
102
103/// A 32-bit floating point type with total order.
104pub type F32 = ordered_float::OrderedFloat<f32>;
105
106/// A 64-bit floating point type with total order.
107pub type F64 = ordered_float::OrderedFloat<f64>;
108
109pub const DEBEZIUM_UNAVAILABLE_VALUE: &str = "__debezium_unavailable_value";
110
111// Pre-built JSON value for Debezium unavailable value to avoid rebuilding it every time
112pub static DEBEZIUM_UNAVAILABLE_JSON: std::sync::LazyLock<JsonbVal> =
113    std::sync::LazyLock::new(|| {
114        let mut builder = jsonbb::Builder::default();
115        builder.add_string(DEBEZIUM_UNAVAILABLE_VALUE);
116        JsonbVal(builder.finish())
117    });
118
119/// The set of datatypes that are supported in RisingWave.
120///
121/// # Trait implementations
122///
123/// - `EnumDiscriminants` generates [`DataTypeName`] enum with the same variants,
124///   but without data fields.
125/// - `FromStr` is only used internally for tests.
126///   The generated implementation isn't efficient, and doesn't handle whitespaces, etc.
127#[derive(Debug, Display, Clone, PartialEq, Eq, Hash, EnumDiscriminants, FromStr)]
128#[strum_discriminants(derive(Hash, Ord, PartialOrd))]
129#[strum_discriminants(name(DataTypeName))]
130#[strum_discriminants(vis(pub))]
131#[cfg_attr(test, strum_discriminants(derive(strum_macros::EnumIter)))]
132pub enum DataType {
133    #[display("boolean")]
134    #[from_str(regex = "(?i)^bool$|^boolean$")]
135    Boolean,
136    #[display("smallint")]
137    #[from_str(regex = "(?i)^smallint$|^int2$")]
138    Int16,
139    #[display("integer")]
140    #[from_str(regex = "(?i)^integer$|^int$|^int4$")]
141    Int32,
142    #[display("bigint")]
143    #[from_str(regex = "(?i)^bigint$|^int8$")]
144    Int64,
145    #[display("real")]
146    #[from_str(regex = "(?i)^real$|^float4$")]
147    Float32,
148    #[display("double precision")]
149    #[from_str(regex = "(?i)^double precision$|^float8$")]
150    Float64,
151    #[display("numeric")]
152    #[from_str(regex = "(?i)^numeric$|^decimal$")]
153    Decimal,
154    #[display("date")]
155    #[from_str(regex = "(?i)^date$")]
156    Date,
157    #[display("character varying")]
158    #[from_str(regex = "(?i)^character varying$|^varchar$")]
159    Varchar,
160    #[display("time without time zone")]
161    #[from_str(regex = "(?i)^time$|^time without time zone$")]
162    Time,
163    #[display("timestamp without time zone")]
164    #[from_str(regex = "(?i)^timestamp$|^timestamp without time zone$")]
165    Timestamp,
166    #[display("timestamp with time zone")]
167    #[from_str(regex = "(?i)^timestamptz$|^timestamp with time zone$")]
168    Timestamptz,
169    #[display("interval")]
170    #[from_str(regex = "(?i)^interval$")]
171    Interval,
172    #[display("{0}")]
173    #[from_str(regex = "(?i)^(?P<0>.+)$")]
174    Struct(StructType),
175    #[display("{0}")]
176    #[from_str(regex = "(?i)^(?P<0>.+)$")]
177    List(ListType),
178    #[display("bytea")]
179    #[from_str(regex = "(?i)^bytea$")]
180    Bytea,
181    #[display("jsonb")]
182    #[from_str(regex = "(?i)^jsonb$")]
183    Jsonb,
184    #[display("serial")]
185    #[from_str(regex = "(?i)^serial$")]
186    Serial,
187    #[display("rw_int256")]
188    #[from_str(regex = "(?i)^rw_int256$")]
189    Int256,
190    #[display("{0}")]
191    #[from_str(regex = "(?i)^(?P<0>.+)$")]
192    Map(MapType),
193    #[display("vector({0})")]
194    #[from_str(regex = "(?i)^vector\\((?P<0>.+)\\)$")]
195    Vector(usize),
196}
197
198impl !PartialOrd for DataType {}
199
200impl ZeroHeapSize for DataType {}
201
202impl TryFrom<DataTypeName> for DataType {
203    type Error = &'static str;
204
205    fn try_from(type_name: DataTypeName) -> Result<Self, Self::Error> {
206        match type_name {
207            DataTypeName::Boolean => Ok(DataType::Boolean),
208            DataTypeName::Int16 => Ok(DataType::Int16),
209            DataTypeName::Int32 => Ok(DataType::Int32),
210            DataTypeName::Int64 => Ok(DataType::Int64),
211            DataTypeName::Int256 => Ok(DataType::Int256),
212            DataTypeName::Serial => Ok(DataType::Serial),
213            DataTypeName::Decimal => Ok(DataType::Decimal),
214            DataTypeName::Float32 => Ok(DataType::Float32),
215            DataTypeName::Float64 => Ok(DataType::Float64),
216            DataTypeName::Varchar => Ok(DataType::Varchar),
217            DataTypeName::Bytea => Ok(DataType::Bytea),
218            DataTypeName::Date => Ok(DataType::Date),
219            DataTypeName::Timestamp => Ok(DataType::Timestamp),
220            DataTypeName::Timestamptz => Ok(DataType::Timestamptz),
221            DataTypeName::Time => Ok(DataType::Time),
222            DataTypeName::Interval => Ok(DataType::Interval),
223            DataTypeName::Jsonb => Ok(DataType::Jsonb),
224            DataTypeName::Struct
225            | DataTypeName::List
226            | DataTypeName::Map
227            | DataTypeName::Vector => Err(
228                "Functions returning parameterized types can not be inferred. Please use `FunctionCall::new_unchecked`.",
229            ),
230        }
231    }
232}
233
234impl From<&PbDataType> for DataType {
235    fn from(proto: &PbDataType) -> DataType {
236        match proto.get_type_name().expect("missing type field") {
237            PbTypeName::TypeUnspecified => unreachable!(),
238            PbTypeName::Int16 => DataType::Int16,
239            PbTypeName::Int32 => DataType::Int32,
240            PbTypeName::Int64 => DataType::Int64,
241            PbTypeName::Serial => DataType::Serial,
242            PbTypeName::Float => DataType::Float32,
243            PbTypeName::Double => DataType::Float64,
244            PbTypeName::Boolean => DataType::Boolean,
245            PbTypeName::Varchar => DataType::Varchar,
246            PbTypeName::Date => DataType::Date,
247            PbTypeName::Time => DataType::Time,
248            PbTypeName::Timestamp => DataType::Timestamp,
249            PbTypeName::Timestamptz => DataType::Timestamptz,
250            PbTypeName::Decimal => DataType::Decimal,
251            PbTypeName::Interval => DataType::Interval,
252            PbTypeName::Bytea => DataType::Bytea,
253            PbTypeName::Jsonb => DataType::Jsonb,
254            PbTypeName::Struct => {
255                let fields: Vec<DataType> = proto.field_type.iter().map(|f| f.into()).collect_vec();
256                let field_names: Vec<String> = proto.field_names.iter().cloned().collect_vec();
257                let field_ids = (proto.field_ids.iter().copied())
258                    .map(ColumnId::new)
259                    .collect_vec();
260
261                let mut struct_type = if proto.field_names.is_empty() {
262                    StructType::unnamed(fields)
263                } else {
264                    StructType::new(field_names.into_iter().zip_eq_fast(fields))
265                };
266                // `field_ids` is used for nested-schema evolution. Cases when `field_ids` is empty:
267                //
268                // 1. The data type is not associated with a table column, so we don't need to set it.
269                // 2. The column is created before nested-schema evolution is supported, thus is using
270                //    the old serialization format and does not have field ids.
271                // 3. This is an empty struct, which is always considered alterable, and setting ids
272                //    is a no-op.
273                if !field_ids.is_empty() {
274                    struct_type = struct_type.with_ids(field_ids);
275                }
276                struct_type.into()
277            }
278            PbTypeName::List => DataType::list(
279                // The first (and only) item is the list element type.
280                proto.field_type[0].clone().into(),
281            ),
282            PbTypeName::Map => {
283                // Map is physically the same as a list.
284                // So the first (and only) item is the list element type.
285                let list_entries_type: DataType = (&proto.field_type[0]).into();
286                DataType::Map(MapType::from_entries(list_entries_type))
287            }
288            PbTypeName::Vector => DataType::Vector(proto.precision as _),
289            PbTypeName::Int256 => DataType::Int256,
290        }
291    }
292}
293
294impl From<PbDataType> for DataType {
295    fn from(proto: PbDataType) -> DataType {
296        DataType::from(&proto)
297    }
298}
299
300impl From<DataTypeName> for PbTypeName {
301    fn from(type_name: DataTypeName) -> Self {
302        match type_name {
303            DataTypeName::Boolean => PbTypeName::Boolean,
304            DataTypeName::Int16 => PbTypeName::Int16,
305            DataTypeName::Int32 => PbTypeName::Int32,
306            DataTypeName::Int64 => PbTypeName::Int64,
307            DataTypeName::Serial => PbTypeName::Serial,
308            DataTypeName::Float32 => PbTypeName::Float,
309            DataTypeName::Float64 => PbTypeName::Double,
310            DataTypeName::Varchar => PbTypeName::Varchar,
311            DataTypeName::Date => PbTypeName::Date,
312            DataTypeName::Timestamp => PbTypeName::Timestamp,
313            DataTypeName::Timestamptz => PbTypeName::Timestamptz,
314            DataTypeName::Time => PbTypeName::Time,
315            DataTypeName::Interval => PbTypeName::Interval,
316            DataTypeName::Decimal => PbTypeName::Decimal,
317            DataTypeName::Bytea => PbTypeName::Bytea,
318            DataTypeName::Jsonb => PbTypeName::Jsonb,
319            DataTypeName::Struct => PbTypeName::Struct,
320            DataTypeName::List => PbTypeName::List,
321            DataTypeName::Int256 => PbTypeName::Int256,
322            DataTypeName::Map => PbTypeName::Map,
323            DataTypeName::Vector => PbTypeName::Vector,
324        }
325    }
326}
327
328/// Convenient macros to generate match arms for [`DataType`].
329pub mod data_types {
330    use super::DataType;
331
332    /// Numeric [`DataType`]s supported to be `offset` of `RANGE` frame.
333    #[macro_export]
334    macro_rules! _range_frame_numeric_data_types {
335        () => {
336            DataType::Int16
337                | DataType::Int32
338                | DataType::Int64
339                | DataType::Float32
340                | DataType::Float64
341                | DataType::Decimal
342        };
343    }
344    pub use _range_frame_numeric_data_types as range_frame_numeric;
345
346    /// Date/time [`DataType`]s supported to be `offset` of `RANGE` frame.
347    #[macro_export]
348    macro_rules! _range_frame_datetime_data_types {
349        () => {
350            DataType::Date
351                | DataType::Time
352                | DataType::Timestamp
353                | DataType::Timestamptz
354                | DataType::Interval
355        };
356    }
357    pub use _range_frame_datetime_data_types as range_frame_datetime;
358
359    /// Data types that do not have inner fields.
360    #[macro_export]
361    macro_rules! _simple_data_types {
362        () => {
363            DataType::Boolean
364                | DataType::Int16
365                | DataType::Int32
366                | DataType::Int64
367                | DataType::Float32
368                | DataType::Float64
369                | DataType::Decimal
370                | DataType::Date
371                | DataType::Varchar
372                | DataType::Time
373                | DataType::Timestamp
374                | DataType::Timestamptz
375                | DataType::Interval
376                | DataType::Bytea
377                | DataType::Jsonb
378                | DataType::Serial
379                | DataType::Int256
380                | DataType::Vector(_)
381        };
382    }
383    pub use _simple_data_types as simple;
384
385    /// Data types that have inner fields.
386    #[macro_export]
387    macro_rules! _composite_data_types {
388        () => {
389            DataType::Struct { .. } | DataType::List { .. } | DataType::Map { .. }
390        };
391    }
392    pub use _composite_data_types as composite;
393
394    /// Test that all data types are covered either by `simple!()` or `composite!()`.
395    fn _simple_composite_data_types_exhausted(dt: DataType) {
396        match dt {
397            simple!() => {}
398            composite!() => {}
399        }
400    }
401}
402
403impl DataType {
404    /// Same as pgvector; unsure how it was chosen there
405    /// <https://github.com/pgvector/pgvector/blob/v0.8.0/README.md#vector-type>
406    pub const VEC_MAX_SIZE: usize = 16000;
407
408    pub fn create_array_builder(&self, capacity: usize) -> ArrayBuilderImpl {
409        use crate::array::*;
410
411        dispatch_data_types!(self, [B = ArrayBuilder], {
412            B::with_type(capacity, self.clone()).into()
413        })
414    }
415
416    pub fn type_name(&self) -> DataTypeName {
417        DataTypeName::from(self)
418    }
419
420    pub fn prost_type_name(&self) -> PbTypeName {
421        self.type_name().into()
422    }
423
424    pub fn to_protobuf(&self) -> PbDataType {
425        let mut pb = PbDataType {
426            type_name: self.prost_type_name() as i32,
427            is_nullable: true,
428            ..Default::default()
429        };
430        match self {
431            DataType::Struct(t) => {
432                if !t.is_unnamed() {
433                    // To be consistent with `From<&PbDataType>`,
434                    // we only set field names when it's a named struct.
435                    pb.field_names = t.names().map(|s| s.into()).collect();
436                }
437                pb.field_type = t.types().map(|f| f.to_protobuf()).collect();
438                if let Some(ids) = t.ids() {
439                    pb.field_ids = ids.map(|id| id.get_id()).collect();
440                }
441            }
442            DataType::List(list) => {
443                pb.field_type = vec![list.elem().to_protobuf()];
444            }
445            DataType::Map(map) => {
446                // Same as List<Struct<K,V>>
447                pb.field_type = vec![map.clone().into_struct().to_protobuf()];
448            }
449            DataType::Vector(size) => {
450                pb.precision = *size as _;
451            }
452            DataType::Boolean
453            | DataType::Int16
454            | DataType::Int32
455            | DataType::Int64
456            | DataType::Float32
457            | DataType::Float64
458            | DataType::Decimal
459            | DataType::Date
460            | DataType::Varchar
461            | DataType::Time
462            | DataType::Timestamp
463            | DataType::Timestamptz
464            | DataType::Interval
465            | DataType::Bytea
466            | DataType::Jsonb
467            | DataType::Serial
468            | DataType::Int256 => (),
469        }
470        pb
471    }
472
473    pub fn is_numeric(&self) -> bool {
474        matches!(
475            self,
476            DataType::Int16
477                | DataType::Int32
478                | DataType::Int64
479                | DataType::Serial
480                | DataType::Float32
481                | DataType::Float64
482                | DataType::Decimal
483        )
484    }
485
486    /// Returns whether the data type does not have inner fields.
487    pub fn is_simple(&self) -> bool {
488        matches!(self, data_types::simple!())
489    }
490
491    /// Returns whether the data type has inner fields.
492    pub fn is_composite(&self) -> bool {
493        matches!(self, data_types::composite!())
494    }
495
496    pub fn is_array(&self) -> bool {
497        matches!(self, DataType::List(_))
498    }
499
500    pub fn is_struct(&self) -> bool {
501        matches!(self, DataType::Struct(_))
502    }
503
504    pub fn is_map(&self) -> bool {
505        matches!(self, DataType::Map(_))
506    }
507
508    pub fn is_int(&self) -> bool {
509        matches!(self, DataType::Int16 | DataType::Int32 | DataType::Int64)
510    }
511
512    /// Returns the output type of time window function on a given input type.
513    pub fn window_of(input: &DataType) -> Option<DataType> {
514        match input {
515            DataType::Timestamptz => Some(DataType::Timestamptz),
516            DataType::Timestamp | DataType::Date => Some(DataType::Timestamp),
517            _ => None,
518        }
519    }
520
521    pub fn as_struct(&self) -> &StructType {
522        match self {
523            DataType::Struct(t) => t,
524            t => panic!("expect struct type, got {t}"),
525        }
526    }
527
528    pub fn into_struct(self) -> StructType {
529        match self {
530            DataType::Struct(t) => t,
531            t => panic!("expect struct type, got {t}"),
532        }
533    }
534
535    pub fn as_map(&self) -> &MapType {
536        match self {
537            DataType::Map(t) => t,
538            t => panic!("expect map type, got {t}"),
539        }
540    }
541
542    pub fn into_map(self) -> MapType {
543        match self {
544            DataType::Map(t) => t,
545            t => panic!("expect map type, got {t}"),
546        }
547    }
548
549    pub fn as_list(&self) -> &ListType {
550        match self {
551            DataType::List(t) => t,
552            t => panic!("expect list type, got {t}"),
553        }
554    }
555
556    pub fn into_list(self) -> ListType {
557        match self {
558            DataType::List(t) => t,
559            t => panic!("expect list type, got {t}"),
560        }
561    }
562
563    /// Returns the inner element's type if `self` is a list type.
564    /// Equivalent to `self.as_list().elem()`.
565    pub fn as_list_elem(&self) -> &DataType {
566        self.as_list().elem()
567    }
568
569    /// Returns the inner element's type if `self` is a list type.
570    /// Equivalent to `self.into_list().into_elem()`.
571    pub fn into_list_elem(self) -> DataType {
572        self.into_list().into_elem()
573    }
574
575    /// Return a new type that removes the outer list, and get the innermost element type.
576    ///
577    /// Use [`DataType::as_list_elem`] if you only want the element type of a list.
578    ///
579    /// ```
580    /// use risingwave_common::types::DataType::*;
581    /// assert_eq!(Int32.list().unnest_list(), &Int32);
582    /// assert_eq!(Int32.list().list().unnest_list(), &Int32);
583    /// ```
584    pub fn unnest_list(&self) -> &Self {
585        match self {
586            DataType::List(list) => list.elem().unnest_list(),
587            _ => self,
588        }
589    }
590
591    /// Return the number of dimensions of this array/list type. Return `0` when this type is not an
592    /// array/list.
593    pub fn array_ndims(&self) -> usize {
594        let mut d = 0;
595        let mut t = self;
596        while let Self::List(list) = t {
597            d += 1;
598            t = list.elem();
599        }
600        d
601    }
602
603    /// Compares the datatype with another, ignoring nested field names and ids.
604    pub fn equals_datatype(&self, other: &DataType) -> bool {
605        match (self, other) {
606            (Self::Struct(s1), Self::Struct(s2)) => s1.equals_datatype(s2),
607            (Self::List(d1), Self::List(d2)) => d1.elem().equals_datatype(d2.elem()),
608            (Self::Map(m1), Self::Map(m2)) => {
609                m1.key().equals_datatype(m2.key()) && m1.value().equals_datatype(m2.value())
610            }
611            _ => self == other,
612        }
613    }
614
615    /// Whether a column with this data type can be altered to a new data type. This determines
616    /// the encoding of the column data.
617    ///
618    /// Returns...
619    /// - `None`, if the data type is simple or does not contain a struct type.
620    /// - `Some(true)`, if the data type contains a struct type with field ids ([`StructType::has_ids`]).
621    /// - `Some(false)`, if the data type contains a struct type without field ids.
622    pub fn can_alter(&self) -> Option<bool> {
623        match self {
624            data_types::simple!() => None,
625            DataType::Struct(struct_type) => {
626                // As long as we meet a struct type, we can check its `ids` field to determine if
627                // it can be altered.
628                let struct_can_alter = struct_type.has_ids();
629                // In debug build, we assert that once a struct type does (or does not) have ids,
630                // all its composite fields should have the same property.
631                if cfg!(debug_assertions) {
632                    for field in struct_type.types() {
633                        if let Some(field_can_alter) = field.can_alter() {
634                            assert_eq!(struct_can_alter, field_can_alter);
635                        }
636                    }
637                }
638                Some(struct_can_alter)
639            }
640
641            DataType::List(list_type) => list_type.elem().can_alter(),
642            DataType::Map(map_type) => {
643                debug_assert!(
644                    map_type.key().is_simple(),
645                    "unexpected key type of map {map_type:?}"
646                );
647                map_type.value().can_alter()
648            }
649        }
650    }
651}
652
653impl From<StructType> for DataType {
654    fn from(value: StructType) -> Self {
655        Self::Struct(value)
656    }
657}
658
659impl From<DataType> for PbDataType {
660    fn from(data_type: DataType) -> Self {
661        data_type.to_protobuf()
662    }
663}
664
665mod private {
666    use super::*;
667
668    // Note: put pub trait inside a private mod just makes the name private,
669    // The trait methods will still be publicly available...
670    // a.k.a. ["Voldemort type"](https://rust-lang.github.io/rfcs/2145-type-privacy.html#lint-3-voldemort-types-its-reachable-but-i-cant-name-it)
671
672    /// Common trait bounds of scalar and scalar reference types.
673    ///
674    /// NOTE(rc): `Hash` is not in the trait bound list, it's implemented as [`ScalarRef::hash_scalar`].
675    pub trait ScalarBounds<Impl> = Debug
676        + Send
677        + Sync
678        + Clone
679        + PartialEq
680        + Eq
681        // in default ascending order
682        + PartialOrd
683        + Ord
684        + TryFrom<Impl, Error = ArrayError>
685        // `ScalarImpl`/`ScalarRefImpl`
686        + Into<Impl>;
687}
688
689/// `Scalar` is a trait over all possible owned types in the evaluation
690/// framework.
691///
692/// `Scalar` is reciprocal to `ScalarRef`. Use `as_scalar_ref` to get a
693/// reference which has the same lifetime as `self`.
694pub trait Scalar: private::ScalarBounds<ScalarImpl> + 'static {
695    /// Type for reference of `Scalar`
696    type ScalarRefType<'a>: ScalarRef<'a, ScalarType = Self> + 'a
697    where
698        Self: 'a;
699
700    /// Get a reference to current scalar.
701    fn as_scalar_ref(&self) -> Self::ScalarRefType<'_>;
702
703    fn to_scalar_value(self) -> ScalarImpl {
704        self.into()
705    }
706}
707
708/// `ScalarRef` is a trait over all possible references in the evaluation
709/// framework.
710///
711/// `ScalarRef` is reciprocal to `Scalar`. Use `to_owned_scalar` to get an
712/// owned scalar.
713pub trait ScalarRef<'a>: private::ScalarBounds<ScalarRefImpl<'a>> + 'a + Copy {
714    /// `ScalarType` is the owned type of current `ScalarRef`.
715    type ScalarType: Scalar<ScalarRefType<'a> = Self>;
716
717    /// Convert `ScalarRef` to an owned scalar.
718    fn to_owned_scalar(&self) -> Self::ScalarType;
719
720    /// A wrapped hash function to get the hash value for this scaler.
721    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H);
722}
723
724/// Define `ScalarImpl` and `ScalarRefImpl` with macro.
725macro_rules! scalar_impl_enum {
726    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
727        /// `ScalarImpl` embeds all possible scalars in the evaluation framework.
728        ///
729        /// Note: `ScalarImpl` doesn't contain all information of its `DataType`,
730        /// so sometimes they need to be used together.
731        /// e.g., for `Struct`, we don't have the field names in the value.
732        ///
733        /// See `for_all_variants` for the definition.
734        #[derive(Debug, Clone, PartialEq, Eq, EstimateSize)]
735        pub enum ScalarImpl {
736            $( $variant_name($scalar) ),*
737        }
738
739        /// `ScalarRefImpl` embeds all possible scalar references in the evaluation
740        /// framework.
741        ///
742        /// Note: `ScalarRefImpl` doesn't contain all information of its `DataType`,
743        /// so sometimes they need to be used together.
744        /// e.g., for `Struct`, we don't have the field names in the value.
745        ///
746        /// See `for_all_variants` for the definition.
747        #[derive(Debug, Copy, Clone, PartialEq, Eq)]
748        pub enum ScalarRefImpl<'scalar> {
749            $( $variant_name($scalar_ref) ),*
750        }
751    };
752}
753
754for_all_variants! { scalar_impl_enum }
755
756// We MUST NOT implement `Ord` for `ScalarImpl` because that will make `Datum` derive an incorrect
757// default `Ord`. To get a default-ordered `ScalarImpl`/`ScalarRefImpl`/`Datum`/`DatumRef`, you can
758// use `DefaultOrdered<T>`. If non-default order is needed, please refer to `sort_util`.
759impl !PartialOrd for ScalarImpl {}
760impl !PartialOrd for ScalarRefImpl<'_> {}
761
762pub type Datum = Option<ScalarImpl>;
763pub type DatumRef<'a> = Option<ScalarRefImpl<'a>>;
764
765/// This trait is to implement `to_owned_datum` for `Option<ScalarImpl>`
766pub trait ToOwnedDatum {
767    /// Convert the datum to an owned [`Datum`].
768    fn to_owned_datum(self) -> Datum;
769}
770
771impl ToOwnedDatum for &Datum {
772    #[inline(always)]
773    fn to_owned_datum(self) -> Datum {
774        self.clone()
775    }
776}
777
778impl<T: Into<ScalarImpl>> ToOwnedDatum for T {
779    #[inline(always)]
780    fn to_owned_datum(self) -> Datum {
781        Some(self.into())
782    }
783}
784
785impl<T: Into<ScalarImpl>> ToOwnedDatum for Option<T> {
786    #[inline(always)]
787    fn to_owned_datum(self) -> Datum {
788        self.map(Into::into)
789    }
790}
791
792#[auto_impl::auto_impl(&)]
793pub trait ToDatumRef: PartialEq + Eq + Debug {
794    /// Convert the datum to [`DatumRef`].
795    fn to_datum_ref(&self) -> DatumRef<'_>;
796}
797
798impl ToDatumRef for Datum {
799    #[inline(always)]
800    fn to_datum_ref(&self) -> DatumRef<'_> {
801        self.as_ref().map(|d| d.as_scalar_ref_impl())
802    }
803}
804impl ToDatumRef for Option<&ScalarImpl> {
805    #[inline(always)]
806    fn to_datum_ref(&self) -> DatumRef<'_> {
807        self.map(|d| d.as_scalar_ref_impl())
808    }
809}
810impl ToDatumRef for DatumRef<'_> {
811    #[inline(always)]
812    fn to_datum_ref(&self) -> DatumRef<'_> {
813        *self
814    }
815}
816
817/// To make sure there is `as_scalar_ref` for all scalar ref types.
818/// See <https://github.com/risingwavelabs/risingwave/pull/9977/files#r1208972881>
819///
820/// This is used by the expr macro.
821pub trait SelfAsScalarRef {
822    fn as_scalar_ref(&self) -> Self;
823}
824macro_rules! impl_self_as_scalar_ref {
825    ($($t:ty),*) => {
826        $(
827            impl SelfAsScalarRef for $t {
828                fn as_scalar_ref(&self) -> Self {
829                    *self
830                }
831            }
832        )*
833    };
834}
835impl_self_as_scalar_ref! { &str, &[u8], Int256Ref<'_>, JsonbRef<'_>, ListRef<'_>, StructRef<'_>, ScalarRefImpl<'_>, MapRef<'_> }
836
837/// `for_all_native_types` includes all native variants of our scalar types.
838///
839/// Specifically, it doesn't support u8/u16/u32/u64.
840#[macro_export]
841macro_rules! for_all_native_types {
842    ($macro:ident) => {
843        $macro! {
844            { i16, Int16, read_i16 },
845            { i32, Int32, read_i32 },
846            { i64, Int64, read_i64 },
847            { Serial, Serial, read_i64 },
848            { $crate::types::F32, Float32, read_f32 },
849            { $crate::types::F64, Float64, read_f64 }
850        }
851    };
852}
853
854/// `impl_convert` implements several conversions for `Scalar`.
855/// * `Scalar <-> ScalarImpl` with `From` and `TryFrom` trait.
856/// * `ScalarRef <-> ScalarRefImpl` with `From` and `TryFrom` trait.
857/// * `&ScalarImpl -> &Scalar` with `impl.as_int16()`.
858/// * `ScalarImpl -> Scalar` with `impl.into_int16()`.
859macro_rules! impl_convert {
860    ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty } ),*) => {
861        $(
862            impl From<$scalar> for ScalarImpl {
863                fn from(val: $scalar) -> Self {
864                    ScalarImpl::$variant_name(val)
865                }
866            }
867
868            impl TryFrom<ScalarImpl> for $scalar {
869                type Error = ArrayError;
870
871                fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
872                    match val {
873                        ScalarImpl::$variant_name(scalar) => Ok(scalar),
874                        other_scalar => bail!("cannot convert ScalarImpl::{} to concrete type", other_scalar.get_ident()),
875                    }
876                }
877            }
878
879            impl <'scalar> From<$scalar_ref> for ScalarRefImpl<'scalar> {
880                fn from(val: $scalar_ref) -> Self {
881                    ScalarRefImpl::$variant_name(val)
882                }
883            }
884
885            impl <'scalar> TryFrom<ScalarRefImpl<'scalar>> for $scalar_ref {
886                type Error = ArrayError;
887
888                fn try_from(val: ScalarRefImpl<'scalar>) -> ArrayResult<Self> {
889                    match val {
890                        ScalarRefImpl::$variant_name(scalar_ref) => Ok(scalar_ref),
891                        other_scalar => bail!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name)),
892                    }
893                }
894            }
895
896            paste! {
897                impl ScalarImpl {
898                    /// # Panics
899                    /// If the scalar is not of the expected type.
900                    pub fn [<as_ $suffix_name>](&self) -> &$scalar {
901                        match self {
902                            Self::$variant_name(scalar) => scalar,
903                            other_scalar => panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
904                        }
905                    }
906
907                    /// # Panics
908                    /// If the scalar is not of the expected type.
909                    pub fn [<into_ $suffix_name>](self) -> $scalar {
910                        match self {
911                            Self::$variant_name(scalar) => scalar,
912                            other_scalar =>  panic!("cannot convert ScalarImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
913                        }
914                    }
915                }
916
917                impl <'scalar> ScalarRefImpl<'scalar> {
918                    /// # Panics
919                    /// If the scalar is not of the expected type.
920                    pub fn [<into_ $suffix_name>](self) -> $scalar_ref {
921                        match self {
922                            Self::$variant_name(inner) => inner,
923                            other_scalar => panic!("cannot convert ScalarRefImpl::{} to concrete type {}", other_scalar.get_ident(), stringify!($variant_name))
924                        }
925                    }
926                }
927            }
928        )*
929    };
930}
931
932for_all_variants! { impl_convert }
933
934// Implement `From<raw float>` for `ScalarImpl::Float` as a sugar.
935impl From<f32> for ScalarImpl {
936    fn from(f: f32) -> Self {
937        Self::Float32(f.into())
938    }
939}
940impl From<f64> for ScalarImpl {
941    fn from(f: f64) -> Self {
942        Self::Float64(f.into())
943    }
944}
945
946// Implement `From<string like>` for `ScalarImpl::Utf8` as a sugar.
947impl From<String> for ScalarImpl {
948    fn from(s: String) -> Self {
949        Self::Utf8(s.into_boxed_str())
950    }
951}
952impl From<&str> for ScalarImpl {
953    fn from(s: &str) -> Self {
954        Self::Utf8(s.into())
955    }
956}
957impl From<&String> for ScalarImpl {
958    fn from(s: &String) -> Self {
959        Self::Utf8(s.as_str().into())
960    }
961}
962impl TryFrom<ScalarImpl> for String {
963    type Error = ArrayError;
964
965    fn try_from(val: ScalarImpl) -> ArrayResult<Self> {
966        match val {
967            ScalarImpl::Utf8(s) => Ok(s.into()),
968            other_scalar => bail!(
969                "cannot convert ScalarImpl::{} to concrete type",
970                other_scalar.get_ident()
971            ),
972        }
973    }
974}
975
976impl From<char> for ScalarImpl {
977    fn from(c: char) -> Self {
978        Self::Utf8(c.to_string().into())
979    }
980}
981
982impl From<&[u8]> for ScalarImpl {
983    fn from(s: &[u8]) -> Self {
984        Self::Bytea(s.into())
985    }
986}
987
988impl From<JsonbRef<'_>> for ScalarImpl {
989    fn from(jsonb: JsonbRef<'_>) -> Self {
990        Self::Jsonb(jsonb.to_owned_scalar())
991    }
992}
993
994impl<T: PrimitiveArrayItemType> From<Vec<T>> for ScalarImpl {
995    fn from(v: Vec<T>) -> Self {
996        Self::List(v.into_iter().collect())
997    }
998}
999
1000impl<T: PrimitiveArrayItemType> From<Vec<Option<T>>> for ScalarImpl {
1001    fn from(v: Vec<Option<T>>) -> Self {
1002        Self::List(v.into_iter().collect())
1003    }
1004}
1005
1006impl From<Vec<String>> for ScalarImpl {
1007    fn from(v: Vec<String>) -> Self {
1008        Self::List(v.iter().map(|s| s.as_str()).collect())
1009    }
1010}
1011
1012impl From<Vec<u8>> for ScalarImpl {
1013    fn from(v: Vec<u8>) -> Self {
1014        Self::Bytea(v.into())
1015    }
1016}
1017
1018impl From<Bytes> for ScalarImpl {
1019    fn from(v: Bytes) -> Self {
1020        Self::Bytea(v.as_ref().into())
1021    }
1022}
1023
1024impl From<ListRef<'_>> for ScalarImpl {
1025    fn from(list: ListRef<'_>) -> Self {
1026        Self::List(list.to_owned_scalar())
1027    }
1028}
1029
1030impl ScalarImpl {
1031    /// Creates a scalar from pgwire "BINARY" format.
1032    ///
1033    /// The counterpart of [`to_binary::ToBinary`].
1034    pub fn from_binary(bytes: &Bytes, data_type: &DataType) -> Result<Self, BoxedError> {
1035        let res = match data_type {
1036            DataType::Varchar => Self::Utf8(String::from_sql(&Type::VARCHAR, bytes)?.into()),
1037            DataType::Bytea => Self::Bytea(Vec::<u8>::from_sql(&Type::BYTEA, bytes)?.into()),
1038            DataType::Boolean => Self::Bool(bool::from_sql(&Type::BOOL, bytes)?),
1039            DataType::Int16 => Self::Int16(i16::from_sql(&Type::INT2, bytes)?),
1040            DataType::Int32 => Self::Int32(i32::from_sql(&Type::INT4, bytes)?),
1041            DataType::Int64 => Self::Int64(i64::from_sql(&Type::INT8, bytes)?),
1042            DataType::Serial => Self::Serial(Serial::from(i64::from_sql(&Type::INT8, bytes)?)),
1043            DataType::Float32 => Self::Float32(f32::from_sql(&Type::FLOAT4, bytes)?.into()),
1044            DataType::Float64 => Self::Float64(f64::from_sql(&Type::FLOAT8, bytes)?.into()),
1045            DataType::Decimal => {
1046                Self::Decimal(rust_decimal::Decimal::from_sql(&Type::NUMERIC, bytes)?.into())
1047            }
1048            DataType::Date => Self::Date(chrono::NaiveDate::from_sql(&Type::DATE, bytes)?.into()),
1049            DataType::Time => Self::Time(chrono::NaiveTime::from_sql(&Type::TIME, bytes)?.into()),
1050            DataType::Timestamp => {
1051                Self::Timestamp(chrono::NaiveDateTime::from_sql(&Type::TIMESTAMP, bytes)?.into())
1052            }
1053            DataType::Timestamptz => Self::Timestamptz(
1054                chrono::DateTime::<chrono::Utc>::from_sql(&Type::TIMESTAMPTZ, bytes)?.into(),
1055            ),
1056            DataType::Interval => Self::Interval(Interval::from_sql(&Type::INTERVAL, bytes)?),
1057            DataType::Jsonb => Self::Jsonb(
1058                JsonbVal::value_deserialize(bytes)
1059                    .ok_or_else(|| "invalid value of Jsonb".to_owned())?,
1060            ),
1061            DataType::Int256 => Self::Int256(Int256::from_binary(bytes)?),
1062            DataType::Vector(_) | DataType::Struct(_) | DataType::List(_) | DataType::Map(_) => {
1063                return Err(format!("unsupported data type: {}", data_type).into());
1064            }
1065        };
1066        Ok(res)
1067    }
1068
1069    /// Creates a scalar from pgwire "TEXT" format.
1070    ///
1071    /// The counterpart of [`ToText`].
1072    pub fn from_text(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1073        Ok(match data_type {
1074            DataType::Boolean => str_to_bool(s)?.into(),
1075            DataType::Int16 => i16::from_str(s)?.into(),
1076            DataType::Int32 => i32::from_str(s)?.into(),
1077            DataType::Int64 => i64::from_str(s)?.into(),
1078            DataType::Int256 => Int256::from_str(s)?.into(),
1079            DataType::Serial => Serial::from(i64::from_str(s)?).into(),
1080            DataType::Decimal => Decimal::from_str(s)?.into(),
1081            DataType::Float32 => F32::from_str(s)?.into(),
1082            DataType::Float64 => F64::from_str(s)?.into(),
1083            DataType::Varchar => s.into(),
1084            DataType::Date => Date::from_str(s)?.into(),
1085            DataType::Timestamp => Timestamp::from_str(s)?.into(),
1086            // We only handle the case with timezone here, and leave the implicit session timezone case
1087            // for later phase.
1088            DataType::Timestamptz => Timestamptz::from_str(s)?.into(),
1089            DataType::Time => Time::from_str(s)?.into(),
1090            DataType::Interval => Interval::from_str(s)?.into(),
1091            DataType::List(_) => ListValue::from_str(s, data_type)?.into(),
1092            DataType::Struct(st) => StructValue::from_str(s, st)?.into(),
1093            DataType::Jsonb => JsonbVal::from_str(s)?.into(),
1094            DataType::Bytea => str_to_bytea(s)?.into(),
1095            DataType::Vector(size) => VectorVal::from_text(s, *size)?.into(),
1096            DataType::Map(_m) => return Err("map from text is not supported".into()),
1097        })
1098    }
1099
1100    pub fn from_text_for_test(s: &str, data_type: &DataType) -> Result<Self, BoxedError> {
1101        Ok(match data_type {
1102            DataType::Map(map_type) => MapValue::from_str_for_test(s, map_type)?.into(),
1103            _ => ScalarImpl::from_text(s, data_type)?,
1104        })
1105    }
1106}
1107
1108impl From<ScalarRefImpl<'_>> for ScalarImpl {
1109    fn from(scalar_ref: ScalarRefImpl<'_>) -> Self {
1110        scalar_ref.into_scalar_impl()
1111    }
1112}
1113
1114impl<'a> From<&'a ScalarImpl> for ScalarRefImpl<'a> {
1115    fn from(scalar: &'a ScalarImpl) -> Self {
1116        scalar.as_scalar_ref_impl()
1117    }
1118}
1119
1120impl ScalarImpl {
1121    /// Converts [`ScalarImpl`] to [`ScalarRefImpl`]
1122    pub fn as_scalar_ref_impl(&self) -> ScalarRefImpl<'_> {
1123        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().into() })
1124    }
1125}
1126
1127impl ScalarRefImpl<'_> {
1128    /// Converts [`ScalarRefImpl`] to [`ScalarImpl`]
1129    pub fn into_scalar_impl(self) -> ScalarImpl {
1130        dispatch_scalar_ref_variants!(self, inner, { inner.to_owned_scalar().into() })
1131    }
1132}
1133
1134impl Hash for ScalarImpl {
1135    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1136        dispatch_scalar_variants!(self, inner, { inner.as_scalar_ref().hash_scalar(state) })
1137    }
1138}
1139
1140impl Hash for ScalarRefImpl<'_> {
1141    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1142        dispatch_scalar_ref_variants!(self, inner, { inner.hash_scalar(state) })
1143    }
1144}
1145
1146/// Feeds the raw scalar reference of `datum` to the given `state`, which should behave the same
1147/// as [`crate::array::Array::hash_at`], where NULL value will be carefully handled.
1148///
1149/// **FIXME**: the result of this function might be different from [`std::hash::Hash`] due to the
1150/// type alias of `DatumRef = Option<_>`, we should manually implement [`std::hash::Hash`] for
1151/// [`DatumRef`] in the future when it becomes a newtype. (#477)
1152#[inline(always)]
1153pub fn hash_datum(datum: impl ToDatumRef, state: &mut impl std::hash::Hasher) {
1154    match datum.to_datum_ref() {
1155        Some(scalar_ref) => scalar_ref.hash(state),
1156        None => NULL_VAL_FOR_HASH.hash(state),
1157    }
1158}
1159
1160impl ScalarRefImpl<'_> {
1161    pub fn binary_format(&self, data_type: &DataType) -> to_binary::Result<Bytes> {
1162        use self::to_binary::ToBinary;
1163        self.to_binary_with_type(data_type)
1164    }
1165
1166    pub fn text_format(&self, data_type: &DataType) -> String {
1167        self.to_text_with_type(data_type)
1168    }
1169
1170    /// Serialize the scalar into the `memcomparable` format.
1171    pub fn serialize(
1172        &self,
1173        ser: &mut memcomparable::Serializer<impl BufMut>,
1174    ) -> memcomparable::Result<()> {
1175        match self {
1176            Self::Int16(v) => v.serialize(ser)?,
1177            Self::Int32(v) => v.serialize(ser)?,
1178            Self::Int64(v) => v.serialize(ser)?,
1179            Self::Serial(v) => v.serialize(ser)?,
1180            Self::Float32(v) => v.serialize(ser)?,
1181            Self::Float64(v) => v.serialize(ser)?,
1182            Self::Utf8(v) => v.serialize(ser)?,
1183            Self::Bytea(v) => ser.serialize_bytes(v)?,
1184            Self::Bool(v) => v.serialize(ser)?,
1185            Self::Decimal(v) => ser.serialize_decimal((*v).into())?,
1186            Self::Interval(v) => v.serialize(ser)?,
1187            Self::Date(v) => v.0.num_days_from_ce().serialize(ser)?,
1188            Self::Timestamp(v) => {
1189                v.0.and_utc().timestamp().serialize(&mut *ser)?;
1190                v.0.and_utc().timestamp_subsec_nanos().serialize(ser)?;
1191            }
1192            Self::Timestamptz(v) => v.serialize(ser)?,
1193            Self::Time(v) => {
1194                v.0.num_seconds_from_midnight().serialize(&mut *ser)?;
1195                v.0.nanosecond().serialize(ser)?;
1196            }
1197            Self::Int256(v) => v.memcmp_serialize(ser)?,
1198            Self::Jsonb(v) => v.memcmp_serialize(ser)?,
1199            Self::Struct(v) => v.memcmp_serialize(ser)?,
1200            Self::List(v) => v.memcmp_serialize(ser)?,
1201            Self::Map(v) => v.memcmp_serialize(ser)?,
1202            Self::Vector(v) => v.memcmp_serialize(ser)?,
1203        };
1204        Ok(())
1205    }
1206}
1207
1208impl ScalarImpl {
1209    /// Serialize the scalar into the `memcomparable` format.
1210    pub fn serialize(
1211        &self,
1212        ser: &mut memcomparable::Serializer<impl BufMut>,
1213    ) -> memcomparable::Result<()> {
1214        self.as_scalar_ref_impl().serialize(ser)
1215    }
1216
1217    /// Deserialize the scalar from the `memcomparable` format.
1218    pub fn deserialize(
1219        ty: &DataType,
1220        de: &mut memcomparable::Deserializer<impl Buf>,
1221    ) -> memcomparable::Result<Self> {
1222        use DataType as Ty;
1223        Ok(match ty {
1224            Ty::Int16 => Self::Int16(i16::deserialize(de)?),
1225            Ty::Int32 => Self::Int32(i32::deserialize(de)?),
1226            Ty::Int64 => Self::Int64(i64::deserialize(de)?),
1227            Ty::Int256 => Self::Int256(Int256::memcmp_deserialize(de)?),
1228            Ty::Serial => Self::Serial(Serial::from(i64::deserialize(de)?)),
1229            Ty::Float32 => Self::Float32(f32::deserialize(de)?.into()),
1230            Ty::Float64 => Self::Float64(f64::deserialize(de)?.into()),
1231            Ty::Varchar => Self::Utf8(Box::<str>::deserialize(de)?),
1232            Ty::Bytea => Self::Bytea(serde_bytes::ByteBuf::deserialize(de)?.into_vec().into()),
1233            Ty::Boolean => Self::Bool(bool::deserialize(de)?),
1234            Ty::Decimal => Self::Decimal(de.deserialize_decimal()?.into()),
1235            Ty::Interval => Self::Interval(Interval::deserialize(de)?),
1236            Ty::Time => Self::Time({
1237                let secs = u32::deserialize(&mut *de)?;
1238                let nano = u32::deserialize(de)?;
1239                Time::with_secs_nano(secs, nano)
1240                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1241            }),
1242            Ty::Timestamp => Self::Timestamp({
1243                let secs = i64::deserialize(&mut *de)?;
1244                let nsecs = u32::deserialize(de)?;
1245                Timestamp::with_secs_nsecs(secs, nsecs)
1246                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1247            }),
1248            Ty::Timestamptz => Self::Timestamptz(Timestamptz::deserialize(de)?),
1249            Ty::Date => Self::Date({
1250                let days = i32::deserialize(de)?;
1251                Date::with_days_since_ce(days)
1252                    .map_err(|e| memcomparable::Error::Message(e.to_report_string()))?
1253            }),
1254            Ty::Jsonb => Self::Jsonb(JsonbVal::memcmp_deserialize(de)?),
1255            Ty::Struct(t) => StructValue::memcmp_deserialize(t.types(), de)?.to_scalar_value(),
1256            Ty::List(t) => ListValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1257            Ty::Map(t) => MapValue::memcmp_deserialize(t, de)?.to_scalar_value(),
1258            Ty::Vector(dimension) => {
1259                VectorVal::memcmp_deserialize(*dimension, de)?.to_scalar_value()
1260            }
1261        })
1262    }
1263
1264    pub fn as_integral(&self) -> i64 {
1265        match self {
1266            Self::Int16(v) => *v as i64,
1267            Self::Int32(v) => *v as i64,
1268            Self::Int64(v) => *v,
1269            _ => panic!(
1270                "Can't convert ScalarImpl::{} to a integral",
1271                self.get_ident()
1272            ),
1273        }
1274    }
1275}
1276
1277/// Returns whether the `literal` matches the `data_type`.
1278pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
1279    match literal {
1280        Some(scalar) => {
1281            macro_rules! matches {
1282                ($( { $data_type:ident, $variant_name:ident, $suffix_name:ident, $scalar:ty, $scalar_ref:ty, $array:ty, $builder:ty }),*) => {
1283                    match (data_type, scalar) {
1284                        $(
1285                            (DataType::$data_type { .. }, ScalarImpl::$variant_name(_)) => true,
1286                            (DataType::$data_type { .. }, _) => false, // so that we won't forget to match a new logical type
1287                        )*
1288                    }
1289                }
1290            }
1291            for_all_variants! { matches }
1292        }
1293        None => true,
1294    }
1295}
1296
1297#[cfg(test)]
1298mod tests {
1299    use std::hash::{BuildHasher, Hasher};
1300
1301    use strum::IntoEnumIterator;
1302
1303    use super::*;
1304    use crate::util::hash_util::Crc32FastBuilder;
1305
1306    #[test]
1307    fn test_size() {
1308        use static_assertions::const_assert_eq;
1309
1310        use crate::array::*;
1311
1312        macro_rules! assert_item_size_eq {
1313            ($array:ty, $size:literal) => {
1314                const_assert_eq!(std::mem::size_of::<<$array as Array>::OwnedItem>(), $size);
1315            };
1316        }
1317
1318        assert_item_size_eq!(StructArray, 16); // Box<[Datum]>
1319        assert_item_size_eq!(ListArray, 8); // Box<ArrayImpl>
1320        assert_item_size_eq!(Utf8Array, 16); // Box<str>
1321        assert_item_size_eq!(IntervalArray, 16);
1322        assert_item_size_eq!(TimestampArray, 12);
1323
1324        // TODO: try to reduce the memory usage of `Decimal`, `ScalarImpl` and `Datum`.
1325        assert_item_size_eq!(DecimalArray, 20);
1326
1327        const_assert_eq!(std::mem::size_of::<ScalarImpl>(), 24);
1328        const_assert_eq!(std::mem::size_of::<ScalarRefImpl<'_>>(), 24);
1329        const_assert_eq!(std::mem::size_of::<Datum>(), 24);
1330        const_assert_eq!(std::mem::size_of::<StructType>(), 8);
1331        const_assert_eq!(std::mem::size_of::<DataType>(), 16);
1332    }
1333
1334    #[test]
1335    fn test_data_type_display() {
1336        let d: DataType =
1337            StructType::new(vec![("i", DataType::Int32), ("j", DataType::Varchar)]).into();
1338        assert_eq!(
1339            format!("{}", d),
1340            "struct<i integer, j character varying>".to_owned()
1341        );
1342    }
1343
1344    #[test]
1345    fn test_hash_implementation() {
1346        fn test(datum: Datum, data_type: DataType) {
1347            assert!(literal_type_match(&data_type, datum.as_ref()));
1348
1349            let mut builder = data_type.create_array_builder(6);
1350            for _ in 0..3 {
1351                builder.append_null();
1352                builder.append(&datum);
1353            }
1354            let array = builder.finish();
1355
1356            let hash_from_array = {
1357                let mut state = Crc32FastBuilder.build_hasher();
1358                array.hash_at(3, &mut state);
1359                state.finish()
1360            };
1361
1362            let hash_from_datum = {
1363                let mut state = Crc32FastBuilder.build_hasher();
1364                hash_datum(&datum, &mut state);
1365                state.finish()
1366            };
1367
1368            let hash_from_datum_ref = {
1369                let mut state = Crc32FastBuilder.build_hasher();
1370                hash_datum(datum.to_datum_ref(), &mut state);
1371                state.finish()
1372            };
1373
1374            assert_eq!(hash_from_array, hash_from_datum);
1375            assert_eq!(hash_from_datum, hash_from_datum_ref);
1376        }
1377
1378        for name in DataTypeName::iter() {
1379            let (scalar, data_type) = match name {
1380                DataTypeName::Boolean => (ScalarImpl::Bool(true), DataType::Boolean),
1381                DataTypeName::Int16 => (ScalarImpl::Int16(233), DataType::Int16),
1382                DataTypeName::Int32 => (ScalarImpl::Int32(233333), DataType::Int32),
1383                DataTypeName::Int64 => (ScalarImpl::Int64(233333333333), DataType::Int64),
1384                DataTypeName::Int256 => (
1385                    ScalarImpl::Int256(233333333333_i64.into()),
1386                    DataType::Int256,
1387                ),
1388                DataTypeName::Serial => (ScalarImpl::Serial(233333333333.into()), DataType::Serial),
1389                DataTypeName::Float32 => (ScalarImpl::Float32(23.33.into()), DataType::Float32),
1390                DataTypeName::Float64 => (
1391                    ScalarImpl::Float64(23.333333333333.into()),
1392                    DataType::Float64,
1393                ),
1394                DataTypeName::Decimal => (
1395                    ScalarImpl::Decimal("233.33".parse().unwrap()),
1396                    DataType::Decimal,
1397                ),
1398                DataTypeName::Date => (
1399                    ScalarImpl::Date(Date::from_ymd_uncheck(2333, 3, 3)),
1400                    DataType::Date,
1401                ),
1402                DataTypeName::Varchar => (ScalarImpl::Utf8("233".into()), DataType::Varchar),
1403                DataTypeName::Bytea => (
1404                    ScalarImpl::Bytea("\\x233".as_bytes().into()),
1405                    DataType::Bytea,
1406                ),
1407                DataTypeName::Time => (
1408                    ScalarImpl::Time(Time::from_hms_uncheck(2, 3, 3)),
1409                    DataType::Time,
1410                ),
1411                DataTypeName::Timestamp => (
1412                    ScalarImpl::Timestamp(Timestamp::from_timestamp_uncheck(23333333, 2333)),
1413                    DataType::Timestamp,
1414                ),
1415                DataTypeName::Timestamptz => (
1416                    ScalarImpl::Timestamptz(Timestamptz::from_micros(233333333)),
1417                    DataType::Timestamptz,
1418                ),
1419                DataTypeName::Interval => (
1420                    ScalarImpl::Interval(Interval::from_month_day_usec(2, 3, 3333)),
1421                    DataType::Interval,
1422                ),
1423                DataTypeName::Jsonb => (ScalarImpl::Jsonb(JsonbVal::null()), DataType::Jsonb),
1424                DataTypeName::Struct => (
1425                    ScalarImpl::Struct(StructValue::new(vec![
1426                        ScalarImpl::Int64(233).into(),
1427                        ScalarImpl::Float64(23.33.into()).into(),
1428                    ])),
1429                    DataType::Struct(StructType::new(vec![
1430                        ("a", DataType::Int64),
1431                        ("b", DataType::Float64),
1432                    ])),
1433                ),
1434                DataTypeName::List => (
1435                    ScalarImpl::List(ListValue::from_iter([233i64, 2333])),
1436                    DataType::Int64.list(),
1437                ),
1438                DataTypeName::Vector => (
1439                    ScalarImpl::Vector(VectorVal::from_iter(
1440                        (0..VectorVal::TEST_VECTOR_DIMENSION)
1441                            .map(|i| ((i + 1) as f32).try_into().unwrap()),
1442                    )),
1443                    DataType::Vector(VectorVal::TEST_VECTOR_DIMENSION),
1444                ),
1445                DataTypeName::Map => {
1446                    // map is not hashable
1447                    continue;
1448                }
1449            };
1450
1451            test(Some(scalar), data_type.clone());
1452            test(None, data_type);
1453        }
1454    }
1455
1456    #[test]
1457    fn test_data_type_from_str() {
1458        assert_eq!(DataType::from_str("bool").unwrap(), DataType::Boolean);
1459        assert_eq!(DataType::from_str("boolean").unwrap(), DataType::Boolean);
1460        assert_eq!(DataType::from_str("BOOL").unwrap(), DataType::Boolean);
1461        assert_eq!(DataType::from_str("BOOLEAN").unwrap(), DataType::Boolean);
1462
1463        assert_eq!(DataType::from_str("int2").unwrap(), DataType::Int16);
1464        assert_eq!(DataType::from_str("smallint").unwrap(), DataType::Int16);
1465        assert_eq!(DataType::from_str("INT2").unwrap(), DataType::Int16);
1466        assert_eq!(DataType::from_str("SMALLINT").unwrap(), DataType::Int16);
1467
1468        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1469        assert_eq!(DataType::from_str("integer").unwrap(), DataType::Int32);
1470        assert_eq!(DataType::from_str("int4").unwrap(), DataType::Int32);
1471        assert_eq!(DataType::from_str("INT4").unwrap(), DataType::Int32);
1472        assert_eq!(DataType::from_str("INTEGER").unwrap(), DataType::Int32);
1473        assert_eq!(DataType::from_str("INT").unwrap(), DataType::Int32);
1474
1475        assert_eq!(DataType::from_str("int8").unwrap(), DataType::Int64);
1476        assert_eq!(DataType::from_str("bigint").unwrap(), DataType::Int64);
1477        assert_eq!(DataType::from_str("INT8").unwrap(), DataType::Int64);
1478        assert_eq!(DataType::from_str("BIGINT").unwrap(), DataType::Int64);
1479
1480        assert_eq!(DataType::from_str("rw_int256").unwrap(), DataType::Int256);
1481        assert_eq!(DataType::from_str("RW_INT256").unwrap(), DataType::Int256);
1482
1483        assert_eq!(DataType::from_str("float4").unwrap(), DataType::Float32);
1484        assert_eq!(DataType::from_str("real").unwrap(), DataType::Float32);
1485        assert_eq!(DataType::from_str("FLOAT4").unwrap(), DataType::Float32);
1486        assert_eq!(DataType::from_str("REAL").unwrap(), DataType::Float32);
1487
1488        assert_eq!(DataType::from_str("float8").unwrap(), DataType::Float64);
1489        assert_eq!(
1490            DataType::from_str("double precision").unwrap(),
1491            DataType::Float64
1492        );
1493        assert_eq!(DataType::from_str("FLOAT8").unwrap(), DataType::Float64);
1494        assert_eq!(
1495            DataType::from_str("DOUBLE PRECISION").unwrap(),
1496            DataType::Float64
1497        );
1498
1499        assert_eq!(DataType::from_str("decimal").unwrap(), DataType::Decimal);
1500        assert_eq!(DataType::from_str("DECIMAL").unwrap(), DataType::Decimal);
1501        assert_eq!(DataType::from_str("numeric").unwrap(), DataType::Decimal);
1502        assert_eq!(DataType::from_str("NUMERIC").unwrap(), DataType::Decimal);
1503
1504        assert_eq!(DataType::from_str("date").unwrap(), DataType::Date);
1505        assert_eq!(DataType::from_str("DATE").unwrap(), DataType::Date);
1506
1507        assert_eq!(DataType::from_str("varchar").unwrap(), DataType::Varchar);
1508        assert_eq!(DataType::from_str("VARCHAR").unwrap(), DataType::Varchar);
1509
1510        assert_eq!(DataType::from_str("time").unwrap(), DataType::Time);
1511        assert_eq!(
1512            DataType::from_str("time without time zone").unwrap(),
1513            DataType::Time
1514        );
1515        assert_eq!(DataType::from_str("TIME").unwrap(), DataType::Time);
1516        assert_eq!(
1517            DataType::from_str("TIME WITHOUT TIME ZONE").unwrap(),
1518            DataType::Time
1519        );
1520
1521        assert_eq!(
1522            DataType::from_str("timestamp").unwrap(),
1523            DataType::Timestamp
1524        );
1525        assert_eq!(
1526            DataType::from_str("timestamp without time zone").unwrap(),
1527            DataType::Timestamp
1528        );
1529        assert_eq!(
1530            DataType::from_str("TIMESTAMP").unwrap(),
1531            DataType::Timestamp
1532        );
1533        assert_eq!(
1534            DataType::from_str("TIMESTAMP WITHOUT TIME ZONE").unwrap(),
1535            DataType::Timestamp
1536        );
1537
1538        assert_eq!(
1539            DataType::from_str("timestamptz").unwrap(),
1540            DataType::Timestamptz
1541        );
1542        assert_eq!(
1543            DataType::from_str("timestamp with time zone").unwrap(),
1544            DataType::Timestamptz
1545        );
1546        assert_eq!(
1547            DataType::from_str("TIMESTAMPTZ").unwrap(),
1548            DataType::Timestamptz
1549        );
1550        assert_eq!(
1551            DataType::from_str("TIMESTAMP WITH TIME ZONE").unwrap(),
1552            DataType::Timestamptz
1553        );
1554
1555        assert_eq!(DataType::from_str("interval").unwrap(), DataType::Interval);
1556        assert_eq!(DataType::from_str("INTERVAL").unwrap(), DataType::Interval);
1557
1558        assert_eq!(
1559            DataType::from_str("int2[]").unwrap(),
1560            DataType::Int16.list()
1561        );
1562        assert_eq!(DataType::from_str("int[]").unwrap(), DataType::Int32.list());
1563        assert_eq!(
1564            DataType::from_str("int8[]").unwrap(),
1565            DataType::Int64.list()
1566        );
1567        assert_eq!(
1568            DataType::from_str("float4[]").unwrap(),
1569            DataType::Float32.list()
1570        );
1571        assert_eq!(
1572            DataType::from_str("float8[]").unwrap(),
1573            DataType::Float64.list()
1574        );
1575        assert_eq!(
1576            DataType::from_str("decimal[]").unwrap(),
1577            DataType::Decimal.list()
1578        );
1579        assert_eq!(
1580            DataType::from_str("varchar[]").unwrap(),
1581            DataType::Varchar.list()
1582        );
1583        assert_eq!(DataType::from_str("date[]").unwrap(), DataType::Date.list());
1584        assert_eq!(DataType::from_str("time[]").unwrap(), DataType::Time.list());
1585        assert_eq!(
1586            DataType::from_str("timestamp[]").unwrap(),
1587            DataType::Timestamp.list()
1588        );
1589        assert_eq!(
1590            DataType::from_str("timestamptz[]").unwrap(),
1591            DataType::Timestamptz.list()
1592        );
1593        assert_eq!(
1594            DataType::from_str("interval[]").unwrap(),
1595            DataType::Interval.list()
1596        );
1597
1598        assert_eq!(
1599            DataType::from_str("record").unwrap(),
1600            DataType::Struct(StructType::unnamed(vec![]))
1601        );
1602        assert_eq!(
1603            DataType::from_str("struct<a int4, b varchar>").unwrap(),
1604            DataType::Struct(StructType::new(vec![
1605                ("a", DataType::Int32),
1606                ("b", DataType::Varchar)
1607            ]))
1608        );
1609    }
1610
1611    #[test]
1612    fn test_can_alter() {
1613        let cannots = [
1614            (DataType::Int32, None),
1615            (DataType::Int32.list(), None),
1616            (
1617                MapType::from_kv(DataType::Varchar, DataType::Int32.list()).into(),
1618                None,
1619            ),
1620            (
1621                StructType::new([("a", DataType::Int32)]).into(),
1622                Some(false),
1623            ),
1624            (
1625                MapType::from_kv(
1626                    DataType::Varchar,
1627                    StructType::new([("a", DataType::Int32)]).into(),
1628                )
1629                .into(),
1630                Some(false),
1631            ),
1632        ];
1633        for (cannot, why) in cannots {
1634            assert_eq!(cannot.can_alter(), why, "{cannot:?}");
1635        }
1636
1637        let cans = [
1638            StructType::new([("a", DataType::Int32), ("b", DataType::Int32.list())])
1639                .with_ids([ColumnId::new(1), ColumnId::new(2)])
1640                .into(),
1641            DataType::list(DataType::Struct(
1642                StructType::new([("a", DataType::Int32)]).with_ids([ColumnId::new(1)]),
1643            )),
1644            MapType::from_kv(
1645                DataType::Varchar,
1646                StructType::new([("a", DataType::Int32)])
1647                    .with_ids([ColumnId::new(1)])
1648                    .into(),
1649            )
1650            .into(),
1651        ];
1652        for can in cans {
1653            assert_eq!(can.can_alter(), Some(true), "{can:?}");
1654        }
1655    }
1656}