risingwave_common/types/
struct_type.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::{Debug, Display, Formatter};
16use std::str::FromStr;
17use std::sync::Arc;
18
19use anyhow::anyhow;
20use either::Either;
21use itertools::{Itertools, repeat_n};
22
23use super::DataType;
24use crate::catalog::ColumnId;
25use crate::util::iter_util::ZipEqFast;
26use crate::util::quote_ident::QuoteIdent;
27
28/// A cheaply cloneable struct type.
29#[derive(Clone, PartialEq, Eq, Hash)]
30pub struct StructType(Arc<StructTypeInner>);
31
32impl Debug for StructType {
33    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
34        let alternate = f.alternate();
35
36        let mut d = f.debug_struct("StructType");
37        d.field("fields", &self.0.fields);
38        if let Some(ids) = &self.0.field_ids
39        // TODO: This is for making `EXPLAIN` output more concise, but it hurts the readability
40        // for testing and debugging. Avoid using `Debug` repr in `EXPLAIN` output instead.
41            && alternate
42        {
43            d.field("field_ids", ids);
44        }
45        d.finish()
46    }
47}
48
49#[derive(Clone, Debug, educe::Educe)]
50#[educe(PartialEq, Eq, Hash)] // ignore ids for backward compatibility
51struct StructTypeInner {
52    /// The name and data type of each field.
53    ///
54    /// If fields are unnamed, the names will be `f1`, `f2`, etc.
55    fields: Box<[(String, DataType)]>,
56
57    /// The ids of the fields. Used in serialization for nested-schema evolution purposes.
58    ///
59    /// Only present if this data type is persisted within a table schema (`ColumnDesc`)
60    /// in a new version of the catalog that supports nested-schema evolution.
61    #[educe(PartialEq(ignore), Hash(ignore))]
62    field_ids: Option<Box<[ColumnId]>>,
63
64    /// Whether the fields are unnamed.
65    #[educe(PartialEq(ignore), Hash(ignore))]
66    is_unnamed: bool,
67}
68
69impl StructType {
70    /// Creates a struct type with named fields.
71    pub fn new(named_fields: impl IntoIterator<Item = (impl Into<String>, DataType)>) -> Self {
72        let fields = named_fields
73            .into_iter()
74            .map(|(name, ty)| (name.into(), ty))
75            .collect();
76
77        Self(Arc::new(StructTypeInner {
78            fields,
79            field_ids: None,
80            is_unnamed: false,
81        }))
82    }
83
84    /// Creates a struct type with no fields. This makes no sense in practice.
85    #[cfg(test)]
86    pub fn empty() -> Self {
87        Self::unnamed(Vec::new())
88    }
89
90    /// Creates a struct type with unnamed fields. The names will be assigned `f1`, `f2`, etc.
91    pub fn unnamed(fields: Vec<DataType>) -> Self {
92        let fields = fields
93            .into_iter()
94            .enumerate()
95            .map(|(i, ty)| (format!("f{}", i + 1), ty))
96            .collect();
97
98        Self(Arc::new(StructTypeInner {
99            fields,
100            field_ids: None,
101            is_unnamed: true,
102        }))
103    }
104
105    /// Attaches given field ids to the struct type.
106    pub fn with_ids(self, ids: impl IntoIterator<Item = ColumnId>) -> Self {
107        let ids: Box<[ColumnId]> = ids.into_iter().collect();
108
109        assert_eq!(ids.len(), self.len(), "ids length mismatches");
110        assert!(
111            ids.iter().all(|id| *id != ColumnId::placeholder()),
112            "ids should not contain placeholder value"
113        );
114
115        let mut inner = Arc::unwrap_or_clone(self.0);
116        inner.field_ids = Some(ids);
117        Self(Arc::new(inner))
118    }
119
120    /// Whether the struct type has field ids.
121    ///
122    /// Note that this does not recursively check whether composite fields have ids.
123    pub fn has_ids(&self) -> bool {
124        self.0.field_ids.is_some()
125    }
126
127    /// Whether the fields are unnamed.
128    pub fn is_unnamed(&self) -> bool {
129        self.0.is_unnamed
130    }
131
132    /// Returns the number of fields.
133    pub fn len(&self) -> usize {
134        self.0.fields.len()
135    }
136
137    /// Returns `true` if there are no fields.
138    pub fn is_empty(&self) -> bool {
139        self.0.fields.is_empty()
140    }
141
142    /// Gets an iterator over the names of the fields.
143    ///
144    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
145    pub fn names(&self) -> impl ExactSizeIterator<Item = &str> {
146        self.0.fields.iter().map(|(name, _)| name.as_str())
147    }
148
149    /// Gets an iterator over the types of the fields.
150    pub fn types(&self) -> impl ExactSizeIterator<Item = &DataType> {
151        self.0.fields.iter().map(|(_, ty)| ty)
152    }
153
154    /// Gets the type of a field by index.
155    pub fn type_at(&self, index: usize) -> &DataType {
156        &self.0.fields[index].1
157    }
158
159    /// Gets an iterator over the fields.
160    ///
161    /// If fields are unnamed, the field names will be `f1`, `f2`, etc.
162    pub fn iter(&self) -> impl ExactSizeIterator<Item = (&str, &DataType)> {
163        self.0.fields.iter().map(|(name, ty)| (name.as_str(), ty))
164    }
165
166    /// Gets an iterator over the field ids.
167    ///
168    /// Returns `None` if they are not present. See documentation on the field `field_ids`
169    /// for the cases.
170    pub fn ids(&self) -> Option<impl ExactSizeIterator<Item = ColumnId> + '_> {
171        self.0.field_ids.as_ref().map(|ids| ids.iter().copied())
172    }
173
174    /// Gets the field id at the given index.
175    ///
176    /// Returns `None` if they are not present. See documentation on the field `field_ids`
177    /// for the cases.
178    pub fn id_at(&self, index: usize) -> Option<ColumnId> {
179        self.0.field_ids.as_ref().map(|ids| ids[index])
180    }
181
182    /// Get an iterator over the field ids, or a sequence of placeholder ids if they are not present.
183    pub fn ids_or_placeholder(&self) -> impl ExactSizeIterator<Item = ColumnId> + '_ {
184        match self.ids() {
185            Some(ids) => Either::Left(ids),
186            None => Either::Right(repeat_n(ColumnId::placeholder(), self.len())),
187        }
188    }
189
190    /// Compares the datatype with another, ignoring nested field names and ids.
191    pub fn equals_datatype(&self, other: &StructType) -> bool {
192        if self.len() != other.len() {
193            return false;
194        }
195
196        (self.types())
197            .zip_eq_fast(other.types())
198            .all(|(a, b)| a.equals_datatype(b))
199    }
200}
201
202impl Display for StructType {
203    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
204        if self.is_unnamed() {
205            // To be consistent with the return type of `ROW` in Postgres.
206            write!(f, "record")
207        } else {
208            write!(
209                f,
210                "struct<{}>",
211                self.iter()
212                    .map(|(name, ty)| format!("{} {}", QuoteIdent(name), ty))
213                    .join(", ")
214            )
215        }
216    }
217}
218
219impl FromStr for StructType {
220    type Err = anyhow::Error;
221
222    fn from_str(s: &str) -> Result<Self, Self::Err> {
223        if s == "record" {
224            return Ok(StructType::unnamed(Vec::new()));
225        }
226        if !(s.starts_with("struct<") && s.ends_with('>')) {
227            return Err(anyhow!("expect struct<...>"));
228        };
229        let mut fields = Vec::new();
230        for field in s[7..s.len() - 1].split(',') {
231            let field = field.trim();
232            let mut iter = field.split_whitespace();
233            let field_name = iter.next().unwrap().to_owned();
234            let field_type = DataType::from_str(iter.next().unwrap())?;
235            fields.push((field_name, field_type));
236        }
237        Ok(Self::new(fields))
238    }
239}