risingwave_sqlsmith/sql_gen/
relation.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use rand::Rng;
16use rand::prelude::{IndexedRandom, SliceRandom};
17use risingwave_common::types::DataType::Boolean;
18use risingwave_sqlparser::ast::{
19    Ident, ObjectName, TableAlias, TableFactor, TableWithJoins, Value,
20};
21
22use crate::sql_gen::types::BINARY_INEQUALITY_OP_TABLE;
23use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext};
24use crate::{BinaryOperator, Expr, Join, JoinConstraint, JoinOperator, Table};
25
26fn create_binary_expr(op: BinaryOperator, left: String, right: String) -> Expr {
27    let left = Box::new(Expr::Identifier(Ident::new_unchecked(left)));
28    let right = Box::new(Expr::Identifier(Ident::new_unchecked(right)));
29    Expr::BinaryOp { left, op, right }
30}
31
32fn create_equi_expr(left: String, right: String) -> Expr {
33    create_binary_expr(BinaryOperator::Eq, left, right)
34}
35
36impl<R: Rng> SqlGenerator<'_, R> {
37    /// A relation specified in the FROM clause.
38    pub(crate) fn gen_from_relation(&mut self) -> (TableWithJoins, Vec<Table>) {
39        match self.rng.random_range(1..=4) {
40            1..=1 => self.gen_no_join(),
41            2..=3 => self
42                .gen_simple_join_clause()
43                .unwrap_or_else(|| self.gen_no_join()),
44            4..=4 => self.gen_more_joins(),
45            // TODO(kwannoel): cycles, bushy joins.
46            _ => unreachable!(),
47        }
48    }
49
50    fn gen_no_join(&mut self) -> (TableWithJoins, Vec<Table>) {
51        let (relation, table) = self.gen_table_factor();
52        (
53            TableWithJoins {
54                relation,
55                joins: vec![],
56            },
57            vec![table],
58        )
59    }
60
61    fn gen_simple_table_factor(&mut self) -> (TableFactor, Table) {
62        let alias = self.gen_table_name_with_prefix("t");
63        let mut table = self.tables.choose(&mut self.rng).unwrap().clone();
64        let table_factor = TableFactor::Table {
65            name: ObjectName(vec![Ident::new_unchecked(&table.name)]),
66            alias: Some(TableAlias {
67                name: alias.as_str().into(),
68                columns: vec![],
69            }),
70            as_of: None,
71        };
72        table.name = alias; // Rename the table.
73        (table_factor, table)
74    }
75
76    fn gen_table_factor(&mut self) -> (TableFactor, Table) {
77        let current_context = self.new_local_context();
78        let factor = self.gen_table_factor_inner();
79        self.restore_context(current_context);
80        factor
81    }
82
83    /// Generates a table factor, and provides bound columns.
84    /// Generated column names should be qualified by table name.
85    fn gen_table_factor_inner(&mut self) -> (TableFactor, Table) {
86        // TODO: TableFactor::Derived, TableFactor::TableFunction, TableFactor::NestedJoin
87        match self.rng.random_range(0..=2) {
88            0 => self.gen_time_window_func(),
89            1 => {
90                if self.can_recurse() {
91                    self.gen_table_subquery()
92                } else {
93                    self.gen_simple_table_factor()
94                }
95            }
96            2 => self.gen_simple_table_factor(),
97            _ => unreachable!(),
98        }
99    }
100
101    fn gen_equi_join_columns(
102        &mut self,
103        left_columns: Vec<Column>,
104        right_columns: Vec<Column>,
105    ) -> Vec<(Column, Column)> {
106        let mut available_join_on_columns = vec![];
107        for left_column in &left_columns {
108            for right_column in &right_columns {
109                if left_column.data_type == right_column.data_type {
110                    available_join_on_columns.push((left_column.clone(), right_column.clone()))
111                }
112            }
113        }
114        available_join_on_columns
115    }
116
117    fn gen_bool_with_tables(&mut self, tables: Vec<Table>) -> Expr {
118        let old_context = self.new_local_context();
119        self.add_relations_to_context(tables);
120        let expr = self.gen_expr(&Boolean, SqlGeneratorContext::new_with_can_agg(false));
121        self.restore_context(old_context);
122        expr
123    }
124
125    fn gen_single_equi_join_expr(
126        &mut self,
127        left_columns: Vec<Column>,
128        right_columns: Vec<Column>,
129    ) -> Option<(Expr, Vec<(Column, Column)>)> {
130        let mut available_join_on_columns = self.gen_equi_join_columns(left_columns, right_columns);
131        if available_join_on_columns.is_empty() {
132            return None;
133        }
134        available_join_on_columns.shuffle(&mut self.rng);
135        let remaining_columns = available_join_on_columns.split_off(1);
136        let (left_column, right_column) = available_join_on_columns.drain(..).next().unwrap();
137        let join_on_expr = create_equi_expr(left_column.name, right_column.name);
138        Some((join_on_expr, remaining_columns))
139    }
140
141    fn gen_non_equi_expr(&mut self, available_join_on_columns: Vec<(Column, Column)>) -> Expr {
142        let expr = Expr::Value(Value::Boolean(true));
143        if available_join_on_columns.is_empty() {
144            return expr;
145        }
146        let n = self.rng.random_range(0..available_join_on_columns.len());
147        let mut count = 0;
148        for (l_col, r_col) in available_join_on_columns {
149            if count >= n {
150                break;
151            }
152            let Some(inequality_ops) =
153                BINARY_INEQUALITY_OP_TABLE.get(&(l_col.data_type, r_col.data_type))
154            else {
155                continue;
156            };
157            let inequality_op = inequality_ops.choose(&mut self.rng).unwrap();
158            let _non_equi_expr = create_binary_expr(inequality_op.clone(), l_col.name, r_col.name);
159            count += 1;
160        }
161        expr
162    }
163
164    fn gen_more_equi_join_exprs(
165        &mut self,
166        mut available_join_on_columns: Vec<(Column, Column)>,
167    ) -> Expr {
168        let mut expr = Expr::Value(Value::Boolean(true));
169        if available_join_on_columns.is_empty() {
170            return expr;
171        }
172        let n_join_cols = available_join_on_columns.len();
173        let n = if n_join_cols < 2 {
174            n_join_cols
175        } else {
176            match self.rng.random_range(0..100) {
177                0..=10 => self.rng.random_range(n_join_cols / 2..n_join_cols),
178                11..=100 => self.rng.random_range(0..n_join_cols / 2),
179                _ => unreachable!(),
180            }
181        };
182
183        for (l_col, r_col) in available_join_on_columns.drain(0..n) {
184            let equi_expr = create_equi_expr(l_col.name, r_col.name);
185            expr = Expr::BinaryOp {
186                left: Box::new(expr),
187                op: BinaryOperator::And,
188                right: Box::new(equi_expr),
189            }
190        }
191        expr
192    }
193
194    fn gen_arbitrary_bool(&mut self, left_table: Table, right_table: Table) -> Option<Expr> {
195        let expr = self.gen_bool_with_tables(vec![left_table, right_table]);
196
197        // FIXME(noel): This is a hack to reduce streaming nested loop join occurrences.
198        // ... JOIN ON x=y AND false => ... JOIN ON x=y
199        // We can use const folding, then remove the right expression,
200        // if it evaluates to `false` after const folding.
201        // Have to first bind `Expr`, since it is AST form.
202        // Then if successfully bound, use `eval_row_const` to constant fold it.
203        // Take a look at <https://github.com/risingwavelabs/risingwave/pull/7541/files#diff-08400d774a613753da25dcb45e905e8fe3d20acaccca846f39a86834f4c01656>.
204        if expr != Expr::Value(Value::Boolean(false)) {
205            Some(expr)
206        } else {
207            None
208        }
209    }
210
211    /// Generates the `ON` clause in `t JOIN t2 ON ...`
212    /// It will generate at least one equi join condition
213    /// This will reduce chance of nested loop join from being generated.
214    fn gen_join_on_expr(
215        &mut self,
216        left_columns: Vec<Column>,
217        left_table: Table,
218        right_columns: Vec<Column>,
219        right_table: Table,
220    ) -> Option<Expr> {
221        // We always generate an equi join, to avoid stream nested loop join.
222        let (base_join_on_expr, remaining_equi_columns) =
223            self.gen_single_equi_join_expr(left_columns, right_columns)?;
224
225        // Add more expressions
226        let extra_expr = match self.rng.random_range(1..=100) {
227            1..=25 => None,
228            26..=50 => Some(self.gen_non_equi_expr(remaining_equi_columns)),
229            51..=75 => Some(self.gen_more_equi_join_exprs(remaining_equi_columns)),
230            76..=100 => self.gen_arbitrary_bool(left_table, right_table),
231            _ => unreachable!(),
232        };
233        if let Some(extra_expr) = extra_expr {
234            Some(Expr::BinaryOp {
235                left: Box::new(base_join_on_expr),
236                op: BinaryOperator::And,
237                right: Box::new(extra_expr),
238            })
239        } else {
240            Some(base_join_on_expr)
241        }
242    }
243
244    fn gen_join_constraint(
245        &mut self,
246        left_columns: Vec<Column>,
247        left_table: Table,
248        right_columns: Vec<Column>,
249        right_table: Table,
250    ) -> Option<JoinConstraint> {
251        let expr = self.gen_join_on_expr(left_columns, left_table, right_columns, right_table)?;
252        Some(JoinConstraint::On(expr))
253    }
254
255    /// Generates t1 JOIN t2 ON ...
256    fn gen_join_operator(
257        &mut self,
258        left_columns: Vec<Column>,
259        left_table: Table,
260        right_columns: Vec<Column>,
261        right_table: Table,
262    ) -> Option<JoinOperator> {
263        let join_constraint =
264            self.gen_join_constraint(left_columns, left_table, right_columns, right_table)?;
265
266        // NOTE: INNER JOIN works fine, usually does not encounter `StreamNestedLoopJoin` much.
267        // If many failures due to `StreamNestedLoopJoin`, try disable the others.
268        let join_operator = match self.rng.random_range(0..=3) {
269            0 => JoinOperator::Inner(join_constraint),
270            1 => JoinOperator::LeftOuter(join_constraint),
271            2 => JoinOperator::RightOuter(join_constraint),
272            _ => JoinOperator::FullOuter(join_constraint),
273            // NOTE: Do not generate CrossJoin,
274            // it has been already generated in query.
275            // _ => JoinOperator::CrossJoin,
276        };
277
278        Some(join_operator)
279    }
280
281    /// Generates t1 JOIN t2 ON ...
282    fn gen_simple_join_clause(&mut self) -> Option<(TableWithJoins, Vec<Table>)> {
283        let (left_factor, left_table) = self.gen_table_factor();
284        let left_columns = left_table.get_qualified_columns();
285        let (right_factor, right_table) = self.gen_table_factor();
286        let right_columns = right_table.get_qualified_columns();
287        let join_operator = self.gen_join_operator(
288            left_columns,
289            left_table.clone(),
290            right_columns,
291            right_table.clone(),
292        )?;
293
294        let right_factor_with_join = Join {
295            relation: right_factor,
296            join_operator,
297        };
298        Some((
299            TableWithJoins {
300                relation: left_factor,
301                joins: vec![right_factor_with_join],
302            },
303            vec![left_table, right_table],
304        ))
305    }
306
307    /// Generates three-way join.
308    fn gen_more_joins(&mut self) -> (TableWithJoins, Vec<Table>) {
309        // gen left
310        let Some((left_table_with_join, mut left_tables)) = self.gen_simple_join_clause() else {
311            return self.gen_no_join();
312        };
313        let left_columns = left_tables
314            .iter()
315            .flat_map(|t| t.get_qualified_columns())
316            .collect();
317
318        // gen right
319        let (right_factor, right_table) = self.gen_table_factor();
320        let right_columns = right_table.get_qualified_columns();
321
322        // gen join
323        let left_table = left_tables.choose(&mut self.rng).unwrap();
324        let Some(join_operator) = self.gen_join_operator(
325            left_columns,
326            left_table.clone(),
327            right_columns,
328            right_table.clone(),
329        ) else {
330            return (left_table_with_join, left_tables);
331        };
332
333        // build result
334        let mut tables = vec![];
335        tables.append(&mut left_tables);
336        tables.push(right_table);
337
338        let right_join = Join {
339            relation: right_factor,
340            join_operator,
341        };
342
343        (
344            TableWithJoins {
345                relation: TableFactor::NestedJoin(Box::new(left_table_with_join)),
346                joins: vec![right_join],
347            },
348            tables,
349        )
350    }
351
352    fn gen_table_subquery(&mut self) -> (TableFactor, Table) {
353        let (subquery, columns) = self.gen_local_query();
354        let alias = self.gen_table_name_with_prefix("sq");
355        let table = Table::new(alias.clone(), columns);
356        let factor = TableFactor::Derived {
357            lateral: false,
358            subquery: Box::new(subquery),
359            alias: Some(TableAlias {
360                name: Ident::new_unchecked(alias),
361                columns: vec![],
362            }),
363        };
364
365        (factor, table)
366    }
367}