risingwave_common/types/
struct_type.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Display, Formatter};
16use std::iter::empty;
17use std::str::FromStr;
18use std::sync::Arc;
19
20use anyhow::anyhow;
21use either::Either;
22use itertools::{Itertools, repeat_n};
23
24use super::DataType;
25use crate::catalog::ColumnId;
26use crate::util::iter_util::ZipEqFast;
27use crate::util::quote_ident::QuoteIdent;
28
29/// A cheaply cloneable struct type.
30#[derive(Clone, PartialEq, Eq, Hash)]
31pub struct StructType(Arc<StructTypeInner>);
32
33impl Debug for StructType {
34    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
35        let alternate = f.alternate();
36
37        let mut d = f.debug_struct("StructType");
38        d.field("fields", &self.0.fields);
39        if let Some(ids) = &self.0.field_ids
40        // TODO: This is for making `EXPLAIN` output more concise, but it hurts the readability
41        // for testing and debugging. Avoid using `Debug` repr in `EXPLAIN` output instead.
42            && alternate
43        {
44            d.field("field_ids", ids);
45        }
46        d.finish()
47    }
48}
49
50#[derive(Clone, Debug, educe::Educe)]
51#[educe(PartialEq, Eq, Hash)] // ignore ids for backward compatibility
52struct StructTypeInner {
53    /// The name and data type of each field.
54    ///
55    /// If fields are unnamed, the names will be `f1`, `f2`, etc.
56    fields: Box<[(String, DataType)]>,
57
58    /// The ids of the fields. Used in serialization for nested-schema evolution purposes.
59    ///
60    /// Only present if this data type is persisted within a table schema (`ColumnDesc`)
61    /// in a new version of the catalog that supports nested-schema evolution.
62    #[educe(PartialEq(ignore), Hash(ignore))]
63    field_ids: Option<Box<[ColumnId]>>,
64
65    /// Whether the fields are unnamed.
66    #[educe(PartialEq(ignore), Hash(ignore))]
67    is_unnamed: bool,
68}
69
70impl StructType {
71    /// Creates a struct type with named fields.
72    pub fn new(named_fields: impl IntoIterator<Item = (impl Into<String>, DataType)>) -> Self {
73        let fields = named_fields
74            .into_iter()
75            .map(|(name, ty)| (name.into(), ty))
76            .collect();
77
78        Self(Arc::new(StructTypeInner {
79            fields,
80            field_ids: None,
81            is_unnamed: false,
82        }))
83    }
84
85    /// Creates a struct type with no fields. This makes no sense in practice.
86    #[cfg(test)]
87    pub fn empty() -> Self {
88        Self::unnamed(Vec::new())
89    }
90
91    /// Creates a struct type with unnamed fields. The names will be assigned `f1`, `f2`, etc.
92    pub fn unnamed(fields: Vec<DataType>) -> Self {
93        let fields = fields
94            .into_iter()
95            .enumerate()
96            .map(|(i, ty)| (format!("f{}", i + 1), ty))
97            .collect();
98
99        Self(Arc::new(StructTypeInner {
100            fields,
101            field_ids: None,
102            is_unnamed: true,
103        }))
104    }
105
106    /// Attaches given field ids to the struct type.
107    ///
108    /// Note that for empty struct, this method is a no-op, as [`StructType::ids`] will always
109    /// return `Some(<empty>)` for empty struct.
110    pub fn with_ids(self, ids: impl IntoIterator<Item = ColumnId>) -> Self {
111        let ids: Box<[ColumnId]> = ids.into_iter().collect();
112
113        assert_eq!(ids.len(), self.len(), "ids length mismatches");
114        assert!(
115            ids.iter().all(|id| *id != ColumnId::placeholder()),
116            "ids should not contain placeholder value"
117        );
118
119        // No-op for empty struct.
120        if self.is_empty() {
121            return self;
122        }
123
124        let mut inner = Arc::unwrap_or_clone(self.0);
125        inner.field_ids = Some(ids);
126        Self(Arc::new(inner))
127    }
128
129    /// Whether the struct type has field ids. An empty struct is considered to have ids.
130    ///
131    /// Note that this does not recursively check whether composite fields have ids.
132    pub fn has_ids(&self) -> bool {
133        self.is_empty() || self.0.field_ids.is_some()
134    }
135
136    /// Whether the fields are unnamed.
137    pub fn is_unnamed(&self) -> bool {
138        self.0.is_unnamed
139    }
140
141    /// Returns the number of fields.
142    pub fn len(&self) -> usize {
143        self.0.fields.len()
144    }
145
146    /// Returns `true` if there are no fields.
147    pub fn is_empty(&self) -> bool {
148        self.0.fields.is_empty()
149    }
150
151    /// Gets an iterator over the names of the fields.
152    ///
153    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
154    pub fn names(&self) -> impl ExactSizeIterator<Item = &str> {
155        self.0.fields.iter().map(|(name, _)| name.as_str())
156    }
157
158    /// Gets an iterator over the types of the fields.
159    pub fn types(&self) -> impl ExactSizeIterator<Item = &DataType> {
160        self.0.fields.iter().map(|(_, ty)| ty)
161    }
162
163    /// Gets the type of a field by index.
164    pub fn type_at(&self, index: usize) -> &DataType {
165        &self.0.fields[index].1
166    }
167
168    /// Gets an iterator over the fields.
169    ///
170    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
171    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&str, &DataType)> {
172        self.0.fields.iter().map(|(name, ty)| (name.as_str(), ty))
173    }
174
175    /// Gets an iterator over the field ids.
176    ///
177    /// Returns `None` if they are not present. See documentation on the field `field_ids`
178    /// for the cases. For empty struct, this returns `Some(<empty>)`.
179    pub fn ids(&self) -> Option<impl ExactSizeIterator<Item = ColumnId> + '_> {
180        if self.is_empty() {
181            Some(Either::Left(empty()))
182        } else {
183            (self.0.field_ids.as_ref())
184                .map(|field_ids| field_ids.iter().copied())
185                .map(Either::Right)
186        }
187    }
188
189    /// Gets the field id at the given index.
190    ///
191    /// Returns `None` if they are not present. See documentation on the field `field_ids`
192    /// for the cases.
193    pub fn id_at(&self, index: usize) -> Option<ColumnId> {
194        self.ids().map(|mut ids| ids.nth(index).unwrap())
195    }
196
197    /// Get an iterator over the field ids, or a sequence of placeholder ids if they are not present.
198    pub fn ids_or_placeholder(&self) -> impl ExactSizeIterator<Item = ColumnId> + '_ {
199        match self.ids() {
200            Some(ids) => Either::Left(ids),
201            None => Either::Right(repeat_n(ColumnId::placeholder(), self.len())),
202        }
203    }
204
205    /// Compares the datatype with another, ignoring nested field names and ids.
206    pub fn equals_datatype(&self, other: &StructType) -> bool {
207        if self.len() != other.len() {
208            return false;
209        }
210
211        (self.types())
212            .zip_eq_fast(other.types())
213            .all(|(a, b)| a.equals_datatype(b))
214    }
215}
216
217impl Display for StructType {
218    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
219        if self.is_unnamed() {
220            // To be consistent with the return type of `ROW` in Postgres.
221            write!(f, "record")
222        } else {
223            write!(
224                f,
225                "struct<{}>",
226                self.iter()
227                    .map(|(name, ty)| format!("{} {}", QuoteIdent(name), ty))
228                    .join(", ")
229            )
230        }
231    }
232}
233
234impl FromStr for StructType {
235    type Err = anyhow::Error;
236
237    fn from_str(s: &str) -> Result<Self, Self::Err> {
238        if s == "record" {
239            return Ok(StructType::unnamed(Vec::new()));
240        }
241        if !(s.starts_with("struct<") && s.ends_with('>')) {
242            return Err(anyhow!("expect struct<...>"));
243        };
244        let mut fields = Vec::new();
245        for field in s[7..s.len() - 1].split(',') {
246            let field = field.trim();
247            let mut iter = field.split_whitespace();
248            let field_name = iter.next().unwrap().to_owned();
249            let field_type = DataType::from_str(iter.next().unwrap())?;
250            fields.push((field_name, field_type));
251        }
252        Ok(Self::new(fields))
253    }
254}