risingwave_common/types/
struct_type.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Display, Formatter};
16use std::str::FromStr;
17use std::sync::Arc;
18
19use anyhow::anyhow;
20use either::Either;
21use itertools::{Itertools, repeat_n};
22
23use super::DataType;
24use crate::catalog::ColumnId;
25use crate::util::iter_util::ZipEqFast;
26
27/// A cheaply cloneable struct type.
28#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
29pub struct StructType(Arc<StructTypeInner>);
30
31impl Debug for StructType {
32    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
33        let alternate = f.alternate();
34
35        let mut d = f.debug_struct("StructType");
36        d.field("fields", &self.0.fields);
37        if let Some(ids) = &self.0.field_ids
38        // TODO: This is for making `EXPLAIN` output more concise, but it hurts the readability
39        // for testing and debugging. Avoid using `Debug` repr in `EXPLAIN` output instead.
40            && alternate
41        {
42            d.field("field_ids", ids);
43        }
44        d.finish()
45    }
46}
47
48#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
49struct StructTypeInner {
50    /// The name and data type of each field.
51    ///
52    /// If fields are unnamed, the names will be `f1`, `f2`, etc.
53    fields: Box<[(String, DataType)]>,
54
55    /// The ids of the fields. Used in serialization for nested-schema evolution purposes.
56    ///
57    /// Only present if this data type is persisted within a table schema (`ColumnDesc`)
58    /// in a new version of the catalog that supports nested-schema evolution.
59    field_ids: Option<Box<[ColumnId]>>,
60
61    /// Whether the fields are unnamed.
62    is_unnamed: bool,
63}
64
65impl StructType {
66    /// Creates a struct type with named fields.
67    pub fn new(named_fields: impl IntoIterator<Item = (impl Into<String>, DataType)>) -> Self {
68        let fields = named_fields
69            .into_iter()
70            .map(|(name, ty)| (name.into(), ty))
71            .collect();
72
73        Self(Arc::new(StructTypeInner {
74            fields,
75            field_ids: None,
76            is_unnamed: false,
77        }))
78    }
79
80    /// Creates a struct type with no fields. This makes no sense in practice.
81    #[cfg(test)]
82    pub fn empty() -> Self {
83        Self::unnamed(Vec::new())
84    }
85
86    /// Creates a struct type with unnamed fields. The names will be assigned `f1`, `f2`, etc.
87    pub fn unnamed(fields: Vec<DataType>) -> Self {
88        let fields = fields
89            .into_iter()
90            .enumerate()
91            .map(|(i, ty)| (format!("f{}", i + 1), ty))
92            .collect();
93
94        Self(Arc::new(StructTypeInner {
95            fields,
96            field_ids: None,
97            is_unnamed: true,
98        }))
99    }
100
101    /// Attaches given field ids to the struct type.
102    pub fn with_ids(self, ids: impl IntoIterator<Item = ColumnId>) -> Self {
103        let ids: Box<[ColumnId]> = ids.into_iter().collect();
104
105        assert_eq!(ids.len(), self.len(), "ids length mismatches");
106        assert!(
107            ids.iter().all(|id| *id != ColumnId::placeholder()),
108            "ids should not contain placeholder value"
109        );
110
111        let mut inner = Arc::unwrap_or_clone(self.0);
112        inner.field_ids = Some(ids);
113        Self(Arc::new(inner))
114    }
115
116    /// Whether the struct type has field ids.
117    ///
118    /// Note that this does not recursively check whether composite fields have ids.
119    pub fn has_ids(&self) -> bool {
120        self.0.field_ids.is_some()
121    }
122
123    /// Whether the fields are unnamed.
124    pub fn is_unnamed(&self) -> bool {
125        self.0.is_unnamed
126    }
127
128    /// Returns the number of fields.
129    pub fn len(&self) -> usize {
130        self.0.fields.len()
131    }
132
133    /// Returns `true` if there are no fields.
134    pub fn is_empty(&self) -> bool {
135        self.0.fields.is_empty()
136    }
137
138    /// Gets an iterator over the names of the fields.
139    ///
140    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
141    pub fn names(&self) -> impl ExactSizeIterator<Item = &str> {
142        self.0.fields.iter().map(|(name, _)| name.as_str())
143    }
144
145    /// Gets an iterator over the types of the fields.
146    pub fn types(&self) -> impl ExactSizeIterator<Item = &DataType> {
147        self.0.fields.iter().map(|(_, ty)| ty)
148    }
149
150    /// Gets the type of a field by index.
151    pub fn type_at(&self, index: usize) -> &DataType {
152        &self.0.fields[index].1
153    }
154
155    /// Gets an iterator over the fields.
156    ///
157    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
158    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&str, &DataType)> {
159        self.0.fields.iter().map(|(name, ty)| (name.as_str(), ty))
160    }
161
162    /// Gets an iterator over the field ids.
163    ///
164    /// Returns `None` if they are not present. See documentation on the field `field_ids`
165    /// for the cases.
166    pub fn ids(&self) -> Option<impl ExactSizeIterator<Item = ColumnId> + '_> {
167        self.0.field_ids.as_ref().map(|ids| ids.iter().copied())
168    }
169
170    /// Get an iterator over the field ids, or a sequence of placeholder ids if they are not present.
171    pub fn ids_or_placeholder(&self) -> impl ExactSizeIterator<Item = ColumnId> + '_ {
172        match self.ids() {
173            Some(ids) => Either::Left(ids),
174            None => Either::Right(repeat_n(ColumnId::placeholder(), self.len())),
175        }
176    }
177
178    /// Compares the datatype with another, ignoring nested field names and ids.
179    pub fn equals_datatype(&self, other: &StructType) -> bool {
180        if self.len() != other.len() {
181            return false;
182        }
183
184        (self.types())
185            .zip_eq_fast(other.types())
186            .all(|(a, b)| a.equals_datatype(b))
187    }
188}
189
190impl Display for StructType {
191    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
192        if self.is_unnamed() {
193            // To be consistent with the return type of `ROW` in Postgres.
194            write!(f, "record")
195        } else {
196            write!(
197                f,
198                "struct<{}>",
199                self.iter()
200                    .map(|(name, ty)| format!("{} {}", name, ty))
201                    .join(", ")
202            )
203        }
204    }
205}
206
207impl FromStr for StructType {
208    type Err = anyhow::Error;
209
210    fn from_str(s: &str) -> Result<Self, Self::Err> {
211        if s == "record" {
212            return Ok(StructType::unnamed(Vec::new()));
213        }
214        if !(s.starts_with("struct<") && s.ends_with('>')) {
215            return Err(anyhow!("expect struct<...>"));
216        };
217        let mut fields = Vec::new();
218        for field in s[7..s.len() - 1].split(',') {
219            let field = field.trim();
220            let mut iter = field.split_whitespace();
221            let field_name = iter.next().unwrap().to_owned();
222            let field_type = DataType::from_str(iter.next().unwrap())?;
223            fields.push((field_name, field_type));
224        }
225        Ok(Self::new(fields))
226    }
227}