risingwave_sqlsmith/sql_gen/
query.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Interface for generating a query
16//! We construct Query based on the AST representation,
17//! as defined in the [`risingwave_sqlparser`] module.
18
19use std::vec;
20
21use itertools::Itertools;
22use rand::Rng;
23use rand::prelude::{IndexedRandom, SliceRandom};
24use risingwave_common::types::DataType;
25use risingwave_sqlparser::ast::{
26    Corresponding, Cte, Distinct, Expr, Ident, ObjectName, Query, Select, SelectItem, SetExpr,
27    SetOperator, TableWithJoins, Value, With,
28};
29
30use crate::config::{Feature, Syntax};
31use crate::sql_gen::utils::create_table_with_joins_from_table;
32use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext, Table};
33
34/// Generators
35impl<R: Rng> SqlGenerator<'_, R> {
36    /// Generates query expression and returns its
37    /// query schema as well.
38    pub(crate) fn gen_query(&mut self) -> (Query, Vec<Column>) {
39        if self.rng.random_bool(0.3) {
40            self.gen_complex_query()
41        } else {
42            self.gen_simple_query()
43        }
44    }
45
46    /// Generates a complex query which may recurse.
47    /// e.g. through `gen_with` or other generated parts of the query.
48    fn gen_complex_query(&mut self) -> (Query, Vec<Column>) {
49        let num_select_items = self.rng.random_range(1..=4);
50        let (with, with_tables) = self.gen_with();
51        let (query, schema) = self.gen_set_expr(with_tables, num_select_items);
52        let order_by = self.gen_order_by();
53        let has_order_by = !order_by.is_empty();
54        (
55            Query {
56                with,
57                body: query,
58                order_by,
59                limit: self.gen_limit(has_order_by),
60                offset: None,
61                fetch: None,
62            },
63            schema,
64        )
65    }
66
67    /// This query can still recurse, but it is "simpler"
68    /// does not have "with" clause, "order by".
69    /// Which makes it more unlikely to recurse.
70    fn gen_simple_query(&mut self) -> (Query, Vec<Column>) {
71        let num_select_items = self.rng.random_range(1..=4);
72        let with_tables = vec![];
73        let (query, schema) = self.gen_set_expr(with_tables, num_select_items);
74        (
75            Query {
76                with: None,
77                body: query,
78                order_by: vec![],
79                limit: self.gen_limit(false),
80                offset: None,
81                fetch: None,
82            },
83            schema,
84        )
85    }
86
87    /// Generates a query with a single SELECT item. e.g. SELECT v from t;
88    /// Returns the query and the SELECT column alias.
89    pub(crate) fn gen_single_item_query(&mut self) -> (Query, Column) {
90        let with_tables = vec![];
91        let (query, schema) = self.gen_set_expr(with_tables, 1);
92        (
93            Query {
94                with: None,
95                body: query,
96                order_by: vec![],
97                limit: None,
98                offset: None,
99                fetch: None,
100            },
101            schema[0].clone(),
102        )
103    }
104
105    /// Generates a query with local context.
106    /// Used by `WITH`, `Table Subquery` in Relation
107    pub(crate) fn gen_local_query(&mut self) -> (Query, Vec<Column>) {
108        let old_ctxt = self.new_local_context();
109        let t = self.gen_query();
110        self.restore_context(old_ctxt);
111        t
112    }
113
114    /// Generates a query with correlated context to ensure proper recursion.
115    pub(crate) fn gen_correlated_query(&mut self) -> (Query, Vec<Column>) {
116        let old_ctxt = self.clone_local_context();
117        let t = self.gen_query();
118        self.restore_context(old_ctxt);
119        t
120    }
121
122    fn gen_with(&mut self) -> (Option<With>, Vec<Table>) {
123        match self.can_recurse() {
124            true => (None, vec![]),
125            false => {
126                let (with, tables) = self.gen_with_inner();
127                (Some(with), tables)
128            }
129        }
130    }
131
132    fn gen_with_inner(&mut self) -> (With, Vec<Table>) {
133        let alias = self.gen_table_alias_with_prefix("with");
134        let (query, query_schema) = self.gen_local_query();
135        let cte = Cte {
136            alias: alias.clone(),
137            cte_inner: risingwave_sqlparser::ast::CteInner::Query(Box::new(query)),
138        };
139
140        let with_tables = vec![Table::new(alias.name.real_value(), query_schema)];
141        (
142            With {
143                recursive: false,
144                cte_tables: vec![cte],
145            },
146            with_tables,
147        )
148    }
149
150    fn gen_set_expr(
151        &mut self,
152        with_tables: Vec<Table>,
153        num_select_items: usize,
154    ) -> (SetExpr, Vec<Column>) {
155        if self.should_generate(Feature::Except) {
156            let left_ctxt = self.new_local_context();
157            let (left_expr, left_schema) = self.gen_set_expr(with_tables.clone(), num_select_items);
158            self.restore_context(left_ctxt);
159
160            let right_ctxt = self.new_local_context();
161            let (right_expr, right_schema) =
162                self.gen_set_expr(with_tables.clone(), num_select_items);
163            self.restore_context(right_ctxt);
164
165            if !self.schemas_compatible(&left_schema, &right_schema) {
166                return self.gen_set_expr(with_tables, num_select_items);
167            }
168
169            let all = matches!(self.rng.random_range(0..=1), 1);
170            (
171                SetExpr::SetOperation {
172                    op: SetOperator::Except,
173                    all,
174                    corresponding: Corresponding {
175                        corresponding: false,
176                        column_list: None,
177                    },
178                    left: Box::new(left_expr),
179                    right: Box::new(right_expr),
180                },
181                left_schema,
182            )
183        } else {
184            let (select, schema) = self.gen_select_stmt(with_tables, num_select_items);
185            (SetExpr::Select(Box::new(select)), schema)
186        }
187    }
188
189    fn schemas_compatible(&self, left: &[Column], right: &[Column]) -> bool {
190        if left.len() != right.len() {
191            return false;
192        }
193        left.iter()
194            .zip(right.iter())
195            .all(|(l, r)| l.data_type == r.data_type)
196    }
197
198    fn gen_limit(&mut self, has_order_by: bool) -> Option<Expr> {
199        if (!self.is_mview || has_order_by) && self.flip_coin() {
200            let start = if self.is_mview { 1 } else { 0 };
201            Some(Expr::Value(Value::Number(
202                self.rng.random_range(start..=100).to_string(),
203            )))
204        } else {
205            None
206        }
207    }
208
209    fn gen_select_stmt(
210        &mut self,
211        with_tables: Vec<Table>,
212        num_select_items: usize,
213    ) -> (Select, Vec<Column>) {
214        // Generate random tables/relations first so that select items can refer to them.
215        let from = self.gen_from(with_tables);
216        let selection = self.gen_where();
217        let group_by = self.gen_group_by();
218        let having = self.gen_having(!group_by.is_empty());
219        let (select_list, schema) = self.gen_select_list(num_select_items);
220        let select = Select {
221            distinct: Distinct::All,
222            projection: select_list,
223            from,
224            lateral_views: vec![],
225            selection,
226            group_by,
227            having,
228            window: vec![], // TODO: generate named window definition
229        };
230        (select, schema)
231    }
232
233    fn gen_select_list(&mut self, num_select_items: usize) -> (Vec<SelectItem>, Vec<Column>) {
234        let context = SqlGeneratorContext::new(self.should_generate(Syntax::Agg), false);
235        (0..num_select_items)
236            .map(|i| self.gen_select_item(i, context))
237            .unzip()
238    }
239
240    fn gen_select_item(&mut self, i: usize, context: SqlGeneratorContext) -> (SelectItem, Column) {
241        let (ret_type, expr) = self.gen_arbitrary_expr(context);
242
243        let alias = format!("col_{}", i);
244        (
245            SelectItem::ExprWithAlias {
246                expr,
247                alias: Ident::new_unchecked(alias.clone()),
248            },
249            Column {
250                name: ObjectName::from_test_str(&alias),
251                data_type: ret_type,
252            },
253        )
254    }
255
256    fn gen_from(&mut self, with_tables: Vec<Table>) -> Vec<TableWithJoins> {
257        let mut from = if !with_tables.is_empty() {
258            let with_table = with_tables
259                .choose(&mut self.rng)
260                .expect("with tables should not be empty");
261            vec![create_table_with_joins_from_table(with_table)]
262        } else {
263            let (rel, tables) = self.gen_from_relation();
264            self.add_relations_to_context(tables);
265            vec![rel]
266        };
267
268        // We short-circuit here for mview to avoid streaming nested loop join,
269        // since CROSS JOIN below could be correlated.
270        if self.is_mview {
271            assert!(!self.tables.is_empty());
272            return from;
273        }
274
275        // Generate one cross join at most.
276        let mut lateral_contexts = vec![];
277        if self.rng.random_bool(0.1) {
278            let (table_with_join, mut table) = self.gen_from_relation();
279            from.push(table_with_join);
280            lateral_contexts.append(&mut table);
281        }
282        self.add_relations_to_context(lateral_contexts);
283        from
284    }
285
286    fn gen_where(&mut self) -> Option<Expr> {
287        if self.should_generate(Syntax::Where) {
288            let context = SqlGeneratorContext::new(false, false);
289            Some(self.gen_expr(&DataType::Boolean, context))
290        } else {
291            None
292        }
293    }
294
295    /// GROUP BY will constrain the generated columns.
296    fn gen_group_by(&mut self) -> Vec<Expr> {
297        // 90% generate simple group by.
298        // 10% generate grouping sets.
299        match self.rng.random_range(0..=9) {
300            0..=8 => {
301                let group_by_cols = self.gen_random_bound_columns();
302                self.bound_columns.clone_from(&group_by_cols);
303                group_by_cols
304                    .into_iter()
305                    .map(|c| c.name_expr())
306                    .collect_vec()
307            }
308            9 => self.gen_grouping_sets(),
309            _ => unreachable!(),
310        }
311    }
312
313    #[allow(dead_code)]
314    /// GROUPING SETS will constrain the generated columns.
315    fn gen_grouping_sets(&mut self) -> Vec<Expr> {
316        let grouping_num = self.rng.random_range(0..=5);
317        let mut grouping_sets = vec![];
318        let mut new_bound_columns = vec![];
319        for _i in 0..grouping_num {
320            let group_by_cols = self.gen_random_bound_columns();
321            grouping_sets.push(group_by_cols.iter().map(|c| c.name_expr()).collect_vec());
322            new_bound_columns.extend(group_by_cols);
323        }
324        if grouping_sets.is_empty() {
325            self.bound_columns = vec![];
326            vec![]
327        } else {
328            let grouping_sets = Expr::GroupingSets(grouping_sets);
329            self.bound_columns = new_bound_columns
330                .into_iter()
331                .unique_by(|c| c.name.real_value())
332                .sorted_by_key(|c| c.name.real_value())
333                .collect();
334
335            // Currently, grouping sets only support one set.
336            vec![grouping_sets]
337        }
338    }
339
340    fn gen_random_bound_columns(&mut self) -> Vec<Column> {
341        let mut available = if self.should_generate(Feature::Eowc) {
342            self.get_columns_with_watermark(&self.bound_columns.clone())
343        } else {
344            self.bound_columns.clone()
345        };
346        if !available.is_empty() {
347            available.shuffle(self.rng);
348            let upper_bound = available.len().div_ceil(2);
349            let n = self.rng.random_range(1..=upper_bound);
350            available.drain(..n).collect_vec()
351        } else {
352            vec![]
353        }
354    }
355
356    fn gen_having(&mut self, have_group_by: bool) -> Option<Expr> {
357        if have_group_by & self.flip_coin() {
358            let context = SqlGeneratorContext::new(self.should_generate(Syntax::Agg), false);
359            Some(self.gen_expr(&DataType::Boolean, context))
360        } else {
361            None
362        }
363    }
364}