risingwave_common/types/
struct_type.rs

1// Copyright 2022 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Display, Formatter};
16use std::iter::empty;
17use std::str::FromStr;
18use std::sync::Arc;
19
20use anyhow::anyhow;
21use either::Either;
22use itertools::{Itertools, repeat_n};
23use risingwave_sqlparser::ast::QuoteIdent;
24
25use super::DataType;
26use crate::catalog::ColumnId;
27use crate::util::iter_util::ZipEqFast;
28
29/// A cheaply cloneable struct type.
30#[derive(Clone, PartialEq, Eq, Hash)]
31pub struct StructType(Arc<StructTypeInner>);
32
33impl Debug for StructType {
34    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35        // `DataType` derives `Debug`, so `StructType`'s `Debug` representation frequently appears
36        // in `EXPLAIN` and planner test outputs. Prefer a compact single-line format for `{:?}`,
37        // while keeping the detailed `StructType { fields: ..., field_ids: ... }` for `{:#?}`.
38        if f.alternate() {
39            let mut d = f.debug_struct("StructType");
40            d.field("fields", &self.0.fields);
41            if let Some(ids) = &self.0.field_ids {
42                d.field("field_ids", ids);
43            }
44            d.finish()
45        } else {
46            // Many internal composite types use synthetic field names like `f1`, `f2`, ...
47            // Treat them as unnamed to keep `EXPLAIN` output concise.
48            let omit_names = self.is_unnamed()
49                || self
50                    .0
51                    .fields
52                    .iter()
53                    .enumerate()
54                    .all(|(i, (name, _))| name == &format!("f{}", i + 1));
55            let mut first = true;
56            for (name, ty) in self.iter() {
57                if !first {
58                    write!(f, ", ")?;
59                }
60                first = false;
61                if omit_names {
62                    // Unnamed struct comes from `ROW(...)` expressions and prints as `Struct(t1, t2, ...)`.
63                    write!(f, "{:?}", ty)?;
64                } else {
65                    // Quote the identifier when needed (e.g. reserved keywords or special chars).
66                    write!(f, "{}:{:?}", QuoteIdent(name), ty)?;
67                }
68            }
69            Ok(())
70        }
71    }
72}
73
74#[derive(Clone, Debug, educe::Educe)]
75#[educe(PartialEq, Eq, Hash)] // ignore ids for backward compatibility
76struct StructTypeInner {
77    /// The name and data type of each field.
78    ///
79    /// If fields are unnamed, the names will be `f1`, `f2`, etc.
80    fields: Box<[(String, DataType)]>,
81
82    /// The ids of the fields. Used in serialization for nested-schema evolution purposes.
83    ///
84    /// Only present if this data type is persisted within a table schema (`ColumnDesc`)
85    /// in a new version of the catalog that supports nested-schema evolution.
86    #[educe(PartialEq(ignore), Hash(ignore))]
87    field_ids: Option<Box<[ColumnId]>>,
88
89    /// Whether the fields are unnamed.
90    #[educe(PartialEq(ignore), Hash(ignore))]
91    is_unnamed: bool,
92}
93
94impl StructType {
95    /// Creates a struct type with named fields.
96    pub fn new(named_fields: impl IntoIterator<Item = (impl Into<String>, DataType)>) -> Self {
97        let fields = named_fields
98            .into_iter()
99            .map(|(name, ty)| (name.into(), ty))
100            .collect();
101
102        Self(Arc::new(StructTypeInner {
103            fields,
104            field_ids: None,
105            is_unnamed: false,
106        }))
107    }
108
109    /// Creates a struct type with no fields. This makes no sense in practice.
110    #[cfg(test)]
111    pub fn empty() -> Self {
112        Self::unnamed(Vec::new())
113    }
114
115    /// Creates a struct type with unnamed fields. The names will be assigned `f1`, `f2`, etc.
116    pub fn unnamed(fields: impl IntoIterator<Item = DataType>) -> Self {
117        let fields = fields
118            .into_iter()
119            .enumerate()
120            .map(|(i, ty)| (format!("f{}", i + 1), ty))
121            .collect();
122
123        Self(Arc::new(StructTypeInner {
124            fields,
125            field_ids: None,
126            is_unnamed: true,
127        }))
128    }
129
130    /// Creates a struct type for `ROW` expression with unnamed fields.
131    pub fn row_expr_type(fields: impl IntoIterator<Item = DataType>) -> Self {
132        Self::unnamed(fields)
133    }
134
135    /// Attaches given field ids to the struct type.
136    ///
137    /// Note that for empty struct, this method is a no-op, as [`StructType::ids`] will always
138    /// return `Some(<empty>)` for empty struct.
139    pub fn with_ids(self, ids: impl IntoIterator<Item = ColumnId>) -> Self {
140        let ids: Box<[ColumnId]> = ids.into_iter().collect();
141
142        assert_eq!(ids.len(), self.len(), "ids length mismatches");
143        assert!(
144            ids.iter().all(|id| *id != ColumnId::placeholder()),
145            "ids should not contain placeholder value"
146        );
147
148        // No-op for empty struct.
149        if self.is_empty() {
150            return self;
151        }
152
153        let mut inner = Arc::unwrap_or_clone(self.0);
154        inner.field_ids = Some(ids);
155        Self(Arc::new(inner))
156    }
157
158    /// Whether the struct type has field ids. An empty struct is considered to have ids.
159    ///
160    /// Note that this does not recursively check whether composite fields have ids.
161    pub fn has_ids(&self) -> bool {
162        self.is_empty() || self.0.field_ids.is_some()
163    }
164
165    /// Whether the fields are unnamed.
166    pub fn is_unnamed(&self) -> bool {
167        self.0.is_unnamed
168    }
169
170    /// Returns the number of fields.
171    pub fn len(&self) -> usize {
172        self.0.fields.len()
173    }
174
175    /// Returns `true` if there are no fields.
176    pub fn is_empty(&self) -> bool {
177        self.0.fields.is_empty()
178    }
179
180    /// Gets an iterator over the names of the fields.
181    ///
182    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
183    pub fn names(&self) -> impl ExactSizeIterator<Item = &str> {
184        self.0.fields.iter().map(|(name, _)| name.as_str())
185    }
186
187    /// Gets an iterator over the types of the fields.
188    pub fn types(&self) -> impl ExactSizeIterator<Item = &DataType> {
189        self.0.fields.iter().map(|(_, ty)| ty)
190    }
191
192    /// Gets the type of a field by index.
193    pub fn type_at(&self, index: usize) -> &DataType {
194        &self.0.fields[index].1
195    }
196
197    /// Gets an iterator over the fields.
198    ///
199    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
200    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&str, &DataType)> {
201        self.0.fields.iter().map(|(name, ty)| (name.as_str(), ty))
202    }
203
204    /// Gets an iterator over the field ids.
205    ///
206    /// Returns `None` if they are not present. See documentation on the field `field_ids`
207    /// for the cases. For empty struct, this returns `Some(<empty>)`.
208    pub fn ids(&self) -> Option<impl ExactSizeIterator<Item = ColumnId> + '_> {
209        if self.is_empty() {
210            Some(Either::Left(empty()))
211        } else {
212            (self.0.field_ids.as_ref())
213                .map(|field_ids| field_ids.iter().copied())
214                .map(Either::Right)
215        }
216    }
217
218    /// Gets the field id at the given index.
219    ///
220    /// Returns `None` if they are not present. See documentation on the field `field_ids`
221    /// for the cases.
222    pub fn id_at(&self, index: usize) -> Option<ColumnId> {
223        self.ids().map(|mut ids| ids.nth(index).unwrap())
224    }
225
226    /// Get an iterator over the field ids, or a sequence of placeholder ids if they are not present.
227    pub fn ids_or_placeholder(&self) -> impl ExactSizeIterator<Item = ColumnId> + '_ {
228        match self.ids() {
229            Some(ids) => Either::Left(ids),
230            None => Either::Right(repeat_n(ColumnId::placeholder(), self.len())),
231        }
232    }
233
234    /// Compares the datatype with another, ignoring nested field names and ids.
235    pub fn equals_datatype(&self, other: &StructType) -> bool {
236        if self.len() != other.len() {
237            return false;
238        }
239
240        (self.types())
241            .zip_eq_fast(other.types())
242            .all(|(a, b)| a.equals_datatype(b))
243    }
244}
245
246impl Display for StructType {
247    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
248        if self.is_unnamed() {
249            // To be consistent with the return type of `ROW` in Postgres.
250            write!(f, "record")
251        } else {
252            write!(
253                f,
254                "struct<{}>",
255                self.iter()
256                    .map(|(name, ty)| format!("{} {}", QuoteIdent(name), ty))
257                    .join(", ")
258            )
259        }
260    }
261}
262
263impl FromStr for StructType {
264    type Err = anyhow::Error;
265
266    fn from_str(s: &str) -> Result<Self, Self::Err> {
267        if s == "record" {
268            return Ok(StructType::unnamed(Vec::new()));
269        }
270        if !(s.starts_with("struct<") && s.ends_with('>')) {
271            return Err(anyhow!("expect struct<...>"));
272        };
273        let mut fields = Vec::new();
274        for field in s[7..s.len() - 1].split(',') {
275            let field = field.trim();
276            let mut iter = field.split_whitespace();
277            let field_name = iter.next().unwrap().to_owned();
278            let field_type = DataType::from_str(iter.next().unwrap())?;
279            fields.push((field_name, field_type));
280        }
281        Ok(Self::new(fields))
282    }
283}