risingwave_sqlsmith/sql_gen/
relation.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use rand::Rng;
16use rand::prelude::{IndexedRandom, SliceRandom};
17use risingwave_common::types::DataType::Boolean;
18use risingwave_sqlparser::ast::{
19    Ident, ObjectName, TableAlias, TableFactor, TableWithJoins, Value,
20};
21
22use crate::config::{Feature, Syntax};
23use crate::sql_gen::types::BINARY_INEQUALITY_OP_TABLE;
24use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext};
25use crate::{BinaryOperator, Expr, Join, JoinConstraint, JoinOperator, Table};
26
27fn create_binary_expr(op: BinaryOperator, left: &Column, right: &Column) -> Expr {
28    let left = Box::new(left.name_expr());
29    let right = Box::new(right.name_expr());
30    Expr::BinaryOp { left, op, right }
31}
32
33fn create_equi_expr(left: &Column, right: &Column) -> Expr {
34    create_binary_expr(BinaryOperator::Eq, left, right)
35}
36
37impl<R: Rng> SqlGenerator<'_, R> {
38    /// A relation specified in the FROM clause.
39    pub(crate) fn gen_from_relation(&mut self) -> (TableWithJoins, Vec<Table>) {
40        if !self.should_generate(Syntax::Join) {
41            return self.gen_no_join();
42        }
43        match self.rng.random_range(1..=3) {
44            1..=2 => self
45                .gen_simple_join_clause()
46                .unwrap_or_else(|| self.gen_no_join()),
47            3..=3 => self.gen_more_joins(),
48            // TODO(kwannoel): cycles, bushy joins.
49            _ => unreachable!(),
50        }
51    }
52
53    fn gen_no_join(&mut self) -> (TableWithJoins, Vec<Table>) {
54        let (relation, table) = self.gen_table_factor();
55        (
56            TableWithJoins {
57                relation,
58                joins: vec![],
59            },
60            vec![table],
61        )
62    }
63
64    fn gen_simple_table_factor(&mut self) -> (TableFactor, Table) {
65        let alias = self.gen_table_name_with_prefix("t");
66        let mut table = if self.should_generate(Feature::Eowc) {
67            self.get_append_only_tables()
68                .choose(&mut self.rng)
69                .unwrap()
70                .clone()
71        } else {
72            self.tables.choose(&mut self.rng).unwrap().clone()
73        };
74        let table_factor = TableFactor::Table {
75            name: ObjectName(vec![Ident::new_unchecked(&table.name)]),
76            alias: Some(TableAlias {
77                name: alias.as_str().into(),
78                columns: vec![],
79            }),
80            as_of: None,
81        };
82        table.name = alias; // Rename the table.
83        (table_factor, table)
84    }
85
86    fn gen_table_factor(&mut self) -> (TableFactor, Table) {
87        let current_context = self.new_local_context();
88        let factor = self.gen_table_factor_inner();
89        self.restore_context(current_context);
90        factor
91    }
92
93    /// Generates a table factor, and provides bound columns.
94    /// Generated column names should be qualified by table name.
95    fn gen_table_factor_inner(&mut self) -> (TableFactor, Table) {
96        let mut choices = vec![0, 3]; // time_window, simple_table
97        if !self.should_generate(Feature::Eowc) {
98            choices.push(1); // table_func
99        }
100        if self.can_recurse() {
101            choices.push(2); // subquery
102        }
103
104        match *choices.choose(&mut self.rng).unwrap() {
105            0 => self.gen_time_window_func(),
106            1 => self.gen_table_func(),
107            2 => self.gen_table_subquery(),
108            3 => self.gen_simple_table_factor(),
109            _ => unreachable!(),
110        }
111    }
112
113    fn gen_equi_join_columns(
114        &mut self,
115        left_columns: Vec<Column>,
116        right_columns: Vec<Column>,
117    ) -> Vec<(Column, Column)> {
118        let mut available_join_on_columns = vec![];
119        for left_column in &left_columns {
120            for right_column in &right_columns {
121                if left_column.data_type == right_column.data_type {
122                    available_join_on_columns.push((left_column.clone(), right_column.clone()))
123                }
124            }
125        }
126        available_join_on_columns
127    }
128
129    fn gen_bool_with_tables(&mut self, tables: Vec<Table>) -> Expr {
130        let old_context = self.new_local_context();
131        self.add_relations_to_context(tables);
132        let expr = self.gen_expr(&Boolean, SqlGeneratorContext::new(false, false));
133        self.restore_context(old_context);
134        expr
135    }
136
137    fn gen_single_equi_join_expr(
138        &mut self,
139        left_columns: Vec<Column>,
140        right_columns: Vec<Column>,
141    ) -> Option<(Expr, Vec<(Column, Column)>)> {
142        let mut available_join_on_columns = self.gen_equi_join_columns(left_columns, right_columns);
143        if available_join_on_columns.is_empty() {
144            return None;
145        }
146        available_join_on_columns.shuffle(&mut self.rng);
147        let remaining_columns = available_join_on_columns.split_off(1);
148        let (left_column, right_column) = available_join_on_columns.drain(..).next().unwrap();
149        let join_on_expr = create_equi_expr(&left_column, &right_column);
150        Some((join_on_expr, remaining_columns))
151    }
152
153    fn gen_non_equi_expr(&mut self, available_join_on_columns: Vec<(Column, Column)>) -> Expr {
154        let expr = Expr::Value(Value::Boolean(true));
155        if available_join_on_columns.is_empty() {
156            return expr;
157        }
158        let n = self.rng.random_range(0..available_join_on_columns.len());
159        let mut count = 0;
160        for (l_col, r_col) in available_join_on_columns {
161            if count >= n {
162                break;
163            }
164            let Some(inequality_ops) =
165                BINARY_INEQUALITY_OP_TABLE.get(&(l_col.data_type.clone(), r_col.data_type.clone()))
166            else {
167                continue;
168            };
169            let inequality_op = inequality_ops.choose(&mut self.rng).unwrap();
170            let _non_equi_expr = create_binary_expr(inequality_op.clone(), &l_col, &r_col);
171            count += 1;
172        }
173        expr
174    }
175
176    fn gen_more_equi_join_exprs(
177        &mut self,
178        mut available_join_on_columns: Vec<(Column, Column)>,
179    ) -> Expr {
180        let mut expr = Expr::Value(Value::Boolean(true));
181        if available_join_on_columns.is_empty() {
182            return expr;
183        }
184        let n_join_cols = available_join_on_columns.len();
185        let n = if n_join_cols < 2 {
186            n_join_cols
187        } else {
188            match self.rng.random_range(0..100) {
189                0..=10 => self.rng.random_range(n_join_cols / 2..n_join_cols),
190                11..=100 => self.rng.random_range(0..n_join_cols / 2),
191                _ => unreachable!(),
192            }
193        };
194
195        for (l_col, r_col) in available_join_on_columns.drain(0..n) {
196            let equi_expr = create_equi_expr(&l_col, &r_col);
197            expr = Expr::BinaryOp {
198                left: Box::new(expr),
199                op: BinaryOperator::And,
200                right: Box::new(equi_expr),
201            }
202        }
203        expr
204    }
205
206    fn gen_arbitrary_bool(&mut self, left_table: Table, right_table: Table) -> Option<Expr> {
207        let expr = self.gen_bool_with_tables(vec![left_table, right_table]);
208
209        // FIXME(noel): This is a hack to reduce streaming nested loop join occurrences.
210        // ... JOIN ON x=y AND false => ... JOIN ON x=y
211        // We can use const folding, then remove the right expression,
212        // if it evaluates to `false` after const folding.
213        // Have to first bind `Expr`, since it is AST form.
214        // Then if successfully bound, use `eval_row_const` to constant fold it.
215        // Take a look at <https://github.com/risingwavelabs/risingwave/pull/7541/files#diff-08400d774a613753da25dcb45e905e8fe3d20acaccca846f39a86834f4c01656>.
216        if expr != Expr::Value(Value::Boolean(false)) {
217            Some(expr)
218        } else {
219            None
220        }
221    }
222
223    /// Generates the `ON` clause in `t JOIN t2 ON ...`
224    /// It will generate at least one equi join condition
225    /// This will reduce chance of nested loop join from being generated.
226    fn gen_join_on_expr(
227        &mut self,
228        left_columns: Vec<Column>,
229        left_table: Table,
230        right_columns: Vec<Column>,
231        right_table: Table,
232    ) -> Option<Expr> {
233        // We always generate an equi join, to avoid stream nested loop join.
234        let (base_join_on_expr, remaining_equi_columns) =
235            self.gen_single_equi_join_expr(left_columns, right_columns)?;
236
237        // Add more expressions
238        let extra_expr = match self.rng.random_range(1..=100) {
239            1..=25 => None,
240            26..=50 => Some(self.gen_non_equi_expr(remaining_equi_columns)),
241            51..=75 => Some(self.gen_more_equi_join_exprs(remaining_equi_columns)),
242            76..=100 => self.gen_arbitrary_bool(left_table, right_table),
243            _ => unreachable!(),
244        };
245        if let Some(extra_expr) = extra_expr {
246            Some(Expr::BinaryOp {
247                left: Box::new(base_join_on_expr),
248                op: BinaryOperator::And,
249                right: Box::new(extra_expr),
250            })
251        } else {
252            Some(base_join_on_expr)
253        }
254    }
255
256    fn gen_join_constraint(
257        &mut self,
258        left_columns: Vec<Column>,
259        left_table: Table,
260        right_columns: Vec<Column>,
261        right_table: Table,
262    ) -> Option<JoinConstraint> {
263        let common_columns: Vec<_> = left_columns
264            .iter()
265            .filter_map(|l_col| {
266                right_columns
267                    .iter()
268                    .find(|r_col| r_col.name == l_col.name)?;
269                Some(l_col.base_name())
270            })
271            .collect();
272
273        if !common_columns.is_empty() && self.should_generate(Feature::NaturalJoin) {
274            return Some(JoinConstraint::Natural);
275        }
276
277        if !common_columns.is_empty() && self.should_generate(Feature::UsingJoin) {
278            return Some(JoinConstraint::Using(common_columns));
279        }
280
281        let expr = self.gen_join_on_expr(left_columns, left_table, right_columns, right_table)?;
282        Some(JoinConstraint::On(expr))
283    }
284
285    /// Generates t1 JOIN t2 ON ...
286    fn gen_join_operator(
287        &mut self,
288        left_columns: Vec<Column>,
289        left_table: Table,
290        right_columns: Vec<Column>,
291        right_table: Table,
292    ) -> Option<JoinOperator> {
293        let join_constraint =
294            self.gen_join_constraint(left_columns, left_table, right_columns, right_table)?;
295
296        // NOTE: INNER JOIN works fine, usually does not encounter `StreamNestedLoopJoin` much.
297        // If many failures due to `StreamNestedLoopJoin`, try disable the others.
298        let join_operator = match self.rng.random_range(0..=3) {
299            0 => JoinOperator::Inner(join_constraint),
300            1 => JoinOperator::LeftOuter(join_constraint),
301            2 => JoinOperator::RightOuter(join_constraint),
302            _ => JoinOperator::FullOuter(join_constraint),
303            // NOTE: Do not generate CrossJoin,
304            // it has been already generated in query.
305            // _ => JoinOperator::CrossJoin,
306        };
307
308        Some(join_operator)
309    }
310
311    /// Generates t1 JOIN t2 ON ...
312    fn gen_simple_join_clause(&mut self) -> Option<(TableWithJoins, Vec<Table>)> {
313        let (left_factor, left_table) = self.gen_table_factor();
314        let left_columns = left_table.get_qualified_columns();
315        let (right_factor, right_table) = self.gen_table_factor();
316        let right_columns = right_table.get_qualified_columns();
317        let join_operator = self.gen_join_operator(
318            left_columns,
319            left_table.clone(),
320            right_columns,
321            right_table.clone(),
322        )?;
323
324        let right_factor_with_join = Join {
325            relation: right_factor,
326            join_operator,
327        };
328        Some((
329            TableWithJoins {
330                relation: left_factor,
331                joins: vec![right_factor_with_join],
332            },
333            vec![left_table, right_table],
334        ))
335    }
336
337    /// Generates three-way join.
338    fn gen_more_joins(&mut self) -> (TableWithJoins, Vec<Table>) {
339        // gen left
340        let Some((left_table_with_join, mut left_tables)) = self.gen_simple_join_clause() else {
341            return self.gen_no_join();
342        };
343        let left_columns = left_tables
344            .iter()
345            .flat_map(|t| t.get_qualified_columns())
346            .collect();
347
348        // gen right
349        let (right_factor, right_table) = self.gen_table_factor();
350        let right_columns = right_table.get_qualified_columns();
351
352        // gen join
353        let left_table = left_tables.choose(&mut self.rng).unwrap();
354        let Some(join_operator) = self.gen_join_operator(
355            left_columns,
356            left_table.clone(),
357            right_columns,
358            right_table.clone(),
359        ) else {
360            return (left_table_with_join, left_tables);
361        };
362
363        // build result
364        let mut tables = vec![];
365        tables.append(&mut left_tables);
366        tables.push(right_table);
367
368        let right_join = Join {
369            relation: right_factor,
370            join_operator,
371        };
372
373        (
374            TableWithJoins {
375                relation: TableFactor::NestedJoin(Box::new(left_table_with_join)),
376                joins: vec![right_join],
377            },
378            tables,
379        )
380    }
381
382    fn gen_table_subquery(&mut self) -> (TableFactor, Table) {
383        let (subquery, columns) = self.gen_local_query();
384        let alias = self.gen_table_name_with_prefix("sq");
385        let table = Table::new(alias.clone(), columns);
386        let factor = TableFactor::Derived {
387            lateral: false,
388            subquery: Box::new(subquery),
389            alias: Some(TableAlias {
390                name: Ident::new_unchecked(alias),
391                columns: vec![],
392            }),
393        };
394
395        (factor, table)
396    }
397}