risingwave_sqlsmith/sql_gen/
query.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Interface for generating a query
16//! We construct Query based on the AST representation,
17//! as defined in the [`risingwave_sqlparser`] module.
18
19use std::vec;
20
21use itertools::Itertools;
22use rand::Rng;
23use rand::prelude::{IndexedRandom, SliceRandom};
24use risingwave_common::types::DataType;
25use risingwave_sqlparser::ast::{
26    Cte, Distinct, Expr, Ident, Query, Select, SelectItem, SetExpr, TableWithJoins, Value, With,
27};
28
29use crate::sql_gen::utils::create_table_with_joins_from_table;
30use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext, Table};
31
32/// Generators
33impl<R: Rng> SqlGenerator<'_, R> {
34    /// Generates query expression and returns its
35    /// query schema as well.
36    pub(crate) fn gen_query(&mut self) -> (Query, Vec<Column>) {
37        if self.rng.random_bool(0.3) {
38            self.gen_complex_query()
39        } else {
40            self.gen_simple_query()
41        }
42    }
43
44    /// Generates a complex query which may recurse.
45    /// e.g. through `gen_with` or other generated parts of the query.
46    fn gen_complex_query(&mut self) -> (Query, Vec<Column>) {
47        let num_select_items = self.rng.random_range(1..=4);
48        let (with, with_tables) = self.gen_with();
49        let (query, schema) = self.gen_set_expr(with_tables, num_select_items);
50        let order_by = self.gen_order_by();
51        let has_order_by = !order_by.is_empty();
52        (
53            Query {
54                with,
55                body: query,
56                order_by,
57                limit: self.gen_limit(has_order_by),
58                offset: None,
59                fetch: None,
60            },
61            schema,
62        )
63    }
64
65    /// This query can still recurse, but it is "simpler"
66    /// does not have "with" clause, "order by".
67    /// Which makes it more unlikely to recurse.
68    fn gen_simple_query(&mut self) -> (Query, Vec<Column>) {
69        let num_select_items = self.rng.random_range(1..=4);
70        let with_tables = vec![];
71        let (query, schema) = self.gen_set_expr(with_tables, num_select_items);
72        (
73            Query {
74                with: None,
75                body: query,
76                order_by: vec![],
77                limit: self.gen_limit(false),
78                offset: None,
79                fetch: None,
80            },
81            schema,
82        )
83    }
84
85    /// Generates a query with a single SELECT item. e.g. SELECT v from t;
86    /// Returns the query and the SELECT column alias.
87    pub(crate) fn gen_single_item_query(&mut self) -> (Query, Column) {
88        let with_tables = vec![];
89        let (query, schema) = self.gen_set_expr(with_tables, 1);
90        (
91            Query {
92                with: None,
93                body: query,
94                order_by: vec![],
95                limit: None,
96                offset: None,
97                fetch: None,
98            },
99            schema[0].clone(),
100        )
101    }
102
103    /// Generates a query with local context.
104    /// Used by `WITH`, `Table Subquery` in Relation
105    pub(crate) fn gen_local_query(&mut self) -> (Query, Vec<Column>) {
106        let old_ctxt = self.new_local_context();
107        let t = self.gen_query();
108        self.restore_context(old_ctxt);
109        t
110    }
111
112    /// Generates a query with correlated context to ensure proper recursion.
113    pub(crate) fn gen_correlated_query(&mut self) -> (Query, Vec<Column>) {
114        let old_ctxt = self.clone_local_context();
115        let t = self.gen_query();
116        self.restore_context(old_ctxt);
117        t
118    }
119
120    fn gen_with(&mut self) -> (Option<With>, Vec<Table>) {
121        match self.can_recurse() {
122            true => (None, vec![]),
123            false => {
124                let (with, tables) = self.gen_with_inner();
125                (Some(with), tables)
126            }
127        }
128    }
129
130    fn gen_with_inner(&mut self) -> (With, Vec<Table>) {
131        let alias = self.gen_table_alias_with_prefix("with");
132        let (query, query_schema) = self.gen_local_query();
133        let cte = Cte {
134            alias: alias.clone(),
135            cte_inner: risingwave_sqlparser::ast::CteInner::Query(Box::new(query)),
136        };
137
138        let with_tables = vec![Table::new(alias.name.real_value(), query_schema)];
139        (
140            With {
141                recursive: false,
142                cte_tables: vec![cte],
143            },
144            with_tables,
145        )
146    }
147
148    fn gen_set_expr(
149        &mut self,
150        with_tables: Vec<Table>,
151        num_select_items: usize,
152    ) -> (SetExpr, Vec<Column>) {
153        match self.rng.random_range(0..=9) {
154            // TODO: Generate other `SetExpr`
155            0..=9 => {
156                let (select, schema) = self.gen_select_stmt(with_tables, num_select_items);
157                (SetExpr::Select(Box::new(select)), schema)
158            }
159            _ => unreachable!(),
160        }
161    }
162
163    fn gen_limit(&mut self, has_order_by: bool) -> Option<Expr> {
164        if (!self.is_mview || has_order_by) && self.flip_coin() {
165            let start = if self.is_mview { 1 } else { 0 };
166            Some(Expr::Value(Value::Number(
167                self.rng.random_range(start..=100).to_string(),
168            )))
169        } else {
170            None
171        }
172    }
173
174    fn gen_select_stmt(
175        &mut self,
176        with_tables: Vec<Table>,
177        num_select_items: usize,
178    ) -> (Select, Vec<Column>) {
179        // Generate random tables/relations first so that select items can refer to them.
180        let from = self.gen_from(with_tables);
181        let selection = self.gen_where();
182        let group_by = self.gen_group_by();
183        let having = self.gen_having(!group_by.is_empty());
184        let (select_list, schema) = self.gen_select_list(num_select_items);
185        let select = Select {
186            distinct: Distinct::All,
187            projection: select_list,
188            from,
189            lateral_views: vec![],
190            selection,
191            group_by,
192            having,
193        };
194        (select, schema)
195    }
196
197    fn gen_select_list(&mut self, num_select_items: usize) -> (Vec<SelectItem>, Vec<Column>) {
198        let can_agg = self.flip_coin();
199        let context = SqlGeneratorContext::new_with_can_agg(can_agg);
200        (0..num_select_items)
201            .map(|i| self.gen_select_item(i, context))
202            .unzip()
203    }
204
205    fn gen_select_item(&mut self, i: usize, context: SqlGeneratorContext) -> (SelectItem, Column) {
206        let (ret_type, expr) = self.gen_arbitrary_expr(context);
207
208        let alias = format!("col_{}", i);
209        (
210            SelectItem::ExprWithAlias {
211                expr,
212                alias: Ident::new_unchecked(alias.clone()),
213            },
214            Column {
215                name: alias,
216                data_type: ret_type,
217            },
218        )
219    }
220
221    fn gen_from(&mut self, with_tables: Vec<Table>) -> Vec<TableWithJoins> {
222        let mut from = if !with_tables.is_empty() {
223            let with_table = with_tables
224                .choose(&mut self.rng)
225                .expect("with tables should not be empty");
226            vec![create_table_with_joins_from_table(with_table)]
227        } else {
228            let (rel, tables) = self.gen_from_relation();
229            self.add_relations_to_context(tables);
230            vec![rel]
231        };
232
233        // We short-circuit here for mview to avoid streaming nested loop join,
234        // since CROSS JOIN below could be correlated.
235        if self.is_mview {
236            assert!(!self.tables.is_empty());
237            return from;
238        }
239
240        // Generate one cross join at most.
241        let mut lateral_contexts = vec![];
242        if self.rng.random_bool(0.1) {
243            let (table_with_join, mut table) = self.gen_from_relation();
244            from.push(table_with_join);
245            lateral_contexts.append(&mut table);
246        }
247        self.add_relations_to_context(lateral_contexts);
248        from
249    }
250
251    fn gen_where(&mut self) -> Option<Expr> {
252        if self.flip_coin() {
253            let context = SqlGeneratorContext::new_with_can_agg(false);
254            Some(self.gen_expr(&DataType::Boolean, context))
255        } else {
256            None
257        }
258    }
259
260    /// GROUP BY will constrain the generated columns.
261    fn gen_group_by(&mut self) -> Vec<Expr> {
262        // 90% generate simple group by.
263        // 10% generate grouping sets.
264        match self.rng.random_range(0..=9) {
265            0..=8 => {
266                let group_by_cols = self.gen_random_bound_columns();
267                self.bound_columns.clone_from(&group_by_cols);
268                group_by_cols
269                    .into_iter()
270                    .map(|c| Expr::Identifier(Ident::new_unchecked(c.name)))
271                    .collect_vec()
272            }
273            9 => self.gen_grouping_sets(),
274            _ => unreachable!(),
275        }
276    }
277
278    #[allow(dead_code)]
279    /// GROUPING SETS will constrain the generated columns.
280    fn gen_grouping_sets(&mut self) -> Vec<Expr> {
281        let grouping_num = self.rng.random_range(0..=5);
282        let mut grouping_sets = vec![];
283        let mut new_bound_columns = vec![];
284        for _i in 0..grouping_num {
285            let group_by_cols = self.gen_random_bound_columns();
286            grouping_sets.push(
287                group_by_cols
288                    .iter()
289                    .map(|c| Expr::Identifier(Ident::new_unchecked(c.name.clone())))
290                    .collect_vec(),
291            );
292            new_bound_columns.extend(group_by_cols);
293        }
294        if grouping_sets.is_empty() {
295            self.bound_columns = vec![];
296            vec![]
297        } else {
298            let grouping_sets = Expr::GroupingSets(grouping_sets);
299            self.bound_columns = new_bound_columns
300                .into_iter()
301                .sorted_by(|a, b| Ord::cmp(&a.name, &b.name))
302                .dedup_by(|a, b| a.name == b.name)
303                .collect();
304
305            // Currently, grouping sets only support one set.
306            vec![grouping_sets]
307        }
308    }
309
310    fn gen_random_bound_columns(&mut self) -> Vec<Column> {
311        let mut available = self.bound_columns.clone();
312        if !available.is_empty() {
313            available.shuffle(self.rng);
314            let upper_bound = available.len().div_ceil(2);
315            let n = self.rng.random_range(1..=upper_bound);
316            available.drain(..n).collect_vec()
317        } else {
318            vec![]
319        }
320    }
321
322    fn gen_having(&mut self, have_group_by: bool) -> Option<Expr> {
323        if have_group_by & self.flip_coin() {
324            let context = SqlGeneratorContext::new();
325            Some(self.gen_expr(&DataType::Boolean, context))
326        } else {
327            None
328        }
329    }
330}