risingwave_sqlsmith/
lib.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use itertools::Itertools;
28use rand::Rng;
29use rand::prelude::IndexedRandom;
30use risingwave_sqlparser::ast::{
31    BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
32    TableConstraint,
33};
34use risingwave_sqlparser::parser::Parser;
35
36use crate::sql_gen::SqlGenerator;
37
38pub mod reducer;
39mod sql_gen;
40pub mod test_runners;
41mod utils;
42pub mod validation;
43pub use validation::is_permissible_error;
44
45pub use crate::sql_gen::{Table, print_function_table};
46
47/// Generate a random SQL string.
48pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>) -> String {
49    let mut r#gen = SqlGenerator::new(rng, tables);
50    format!("{}", r#gen.gen_batch_query_stmt())
51}
52
53/// Generate `INSERT`
54pub fn insert_sql_gen(rng: &mut impl Rng, tables: Vec<Table>, count: usize) -> Vec<String> {
55    let mut r#gen = SqlGenerator::new(rng, vec![]);
56    tables
57        .into_iter()
58        .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
59        .collect()
60}
61
62/// Generate a random CREATE MATERIALIZED VIEW sql string.
63/// These are derived from `tables`.
64pub fn mview_sql_gen<R: Rng>(rng: &mut R, tables: Vec<Table>, name: &str) -> (String, Table) {
65    let mut r#gen = SqlGenerator::new_for_mview(rng, tables);
66    let (mview, table) = r#gen.gen_mview_stmt(name);
67    (mview.to_string(), table)
68}
69
70pub fn differential_sql_gen<R: Rng>(
71    rng: &mut R,
72    tables: Vec<Table>,
73    name: &str,
74) -> Result<(String, String, Table)> {
75    let mut r#gen = SqlGenerator::new_for_mview(rng, tables);
76    let (stream, table) = r#gen.gen_mview_stmt(name);
77    let batch = match stream {
78        Statement::CreateView { ref query, .. } => query.to_string(),
79        _ => bail!("Differential pair should be mview statement!"),
80    };
81    Ok((batch, stream.to_string(), table))
82}
83
84/// TODO(noel): Eventually all session variables should be fuzzed.
85/// For now we start of with a few hardcoded configs.
86/// Some config need workarounds, for instance `QUERY_MODE`,
87/// which can lead to stack overflow
88/// (a simple workaround is limit length of
89/// generated query when `QUERY_MODE=local`.
90pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
91    [
92        "SET ENABLE_TWO_PHASE_AGG TO TRUE",
93        "SET ENABLE_TWO_PHASE_AGG TO FALSE",
94        "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
95        "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
96    ]
97    .choose(rng)
98    .unwrap()
99    .to_string()
100}
101
102pub fn generate_update_statements<R: Rng>(
103    rng: &mut R,
104    tables: &[Table],
105    inserts: &[Statement],
106) -> Result<Vec<Statement>> {
107    let mut r#gen = SqlGenerator::new(rng, vec![]);
108    r#gen.generate_update_statements(tables, inserts)
109}
110
111/// Parse SQL
112/// FIXME(Noel): Introduce error type for sqlsmith for this.
113pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
114    let sql = sql.as_ref();
115    Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
116}
117
118/// Extract relevant info from CREATE TABLE statement, to construct a Table
119pub fn create_table_statement_to_table(statement: &Statement) -> Table {
120    match statement {
121        Statement::CreateTable {
122            name,
123            columns,
124            constraints,
125            ..
126        } => {
127            let column_name_to_index_mapping: HashMap<_, _> = columns
128                .iter()
129                .enumerate()
130                .map(|(i, c)| (&c.name, i))
131                .collect();
132            let mut pk_indices = HashSet::new();
133            for (i, column) in columns.iter().enumerate() {
134                let is_primary_key = column
135                    .options
136                    .iter()
137                    .any(|option| option.option == ColumnOption::Unique { is_primary: true });
138                if is_primary_key {
139                    pk_indices.insert(i);
140                }
141            }
142            for constraint in constraints {
143                if let TableConstraint::Unique {
144                    columns,
145                    is_primary: true,
146                    ..
147                } = constraint
148                {
149                    for column in columns {
150                        let pk_index = column_name_to_index_mapping.get(column).unwrap();
151                        pk_indices.insert(*pk_index);
152                    }
153                }
154            }
155            let mut pk_indices = pk_indices.into_iter().collect_vec();
156            pk_indices.sort_unstable();
157            Table::new_for_base_table(
158                name.0[0].real_value(),
159                columns.iter().map(|c| c.clone().into()).collect(),
160                pk_indices,
161            )
162        }
163        _ => panic!(
164            "Only CREATE TABLE statements permitted, received: {}",
165            statement
166        ),
167    }
168}
169
170pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
171    let statements = parse_sql(&sql);
172    let tables = statements
173        .iter()
174        .map(create_table_statement_to_table)
175        .collect();
176    (tables, statements)
177}
178
179#[cfg(test)]
180mod tests {
181    use std::fmt::Debug;
182
183    use expect_test::{Expect, expect};
184
185    use super::*;
186
187    fn check(actual: impl Debug, expect: Expect) {
188        let actual = format!("{:#?}", actual);
189        expect.assert_eq(&actual);
190    }
191
192    #[test]
193    fn test_parse_create_table_statements_no_pk() {
194        let test_string = "
195CREATE TABLE t(v1 int);
196CREATE TABLE t2(v1 int, v2 bool);
197CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
198        ";
199        check(
200            parse_create_table_statements(test_string),
201            expect![[r#"
202                (
203                    [
204                        Table {
205                            name: "t",
206                            columns: [
207                                Column {
208                                    name: "v1",
209                                    data_type: Int32,
210                                },
211                            ],
212                            pk_indices: [],
213                            is_base_table: true,
214                        },
215                        Table {
216                            name: "t2",
217                            columns: [
218                                Column {
219                                    name: "v1",
220                                    data_type: Int32,
221                                },
222                                Column {
223                                    name: "v2",
224                                    data_type: Boolean,
225                                },
226                            ],
227                            pk_indices: [],
228                            is_base_table: true,
229                        },
230                        Table {
231                            name: "t3",
232                            columns: [
233                                Column {
234                                    name: "v1",
235                                    data_type: Int32,
236                                },
237                                Column {
238                                    name: "v2",
239                                    data_type: Boolean,
240                                },
241                                Column {
242                                    name: "v3",
243                                    data_type: Int16,
244                                },
245                            ],
246                            pk_indices: [],
247                            is_base_table: true,
248                        },
249                    ],
250                    [
251                        CreateTable {
252                            or_replace: false,
253                            temporary: false,
254                            if_not_exists: false,
255                            name: ObjectName(
256                                [
257                                    Ident {
258                                        value: "t",
259                                        quote_style: None,
260                                    },
261                                ],
262                            ),
263                            columns: [
264                                ColumnDef {
265                                    name: Ident {
266                                        value: "v1",
267                                        quote_style: None,
268                                    },
269                                    data_type: Some(
270                                        Int,
271                                    ),
272                                    collation: None,
273                                    options: [],
274                                },
275                            ],
276                            wildcard_idx: None,
277                            constraints: [],
278                            with_options: [],
279                            format_encode: None,
280                            source_watermarks: [],
281                            append_only: false,
282                            on_conflict: None,
283                            with_version_column: None,
284                            query: None,
285                            cdc_table_info: None,
286                            include_column_options: [],
287                            webhook_info: None,
288                            engine: Hummock,
289                        },
290                        CreateTable {
291                            or_replace: false,
292                            temporary: false,
293                            if_not_exists: false,
294                            name: ObjectName(
295                                [
296                                    Ident {
297                                        value: "t2",
298                                        quote_style: None,
299                                    },
300                                ],
301                            ),
302                            columns: [
303                                ColumnDef {
304                                    name: Ident {
305                                        value: "v1",
306                                        quote_style: None,
307                                    },
308                                    data_type: Some(
309                                        Int,
310                                    ),
311                                    collation: None,
312                                    options: [],
313                                },
314                                ColumnDef {
315                                    name: Ident {
316                                        value: "v2",
317                                        quote_style: None,
318                                    },
319                                    data_type: Some(
320                                        Boolean,
321                                    ),
322                                    collation: None,
323                                    options: [],
324                                },
325                            ],
326                            wildcard_idx: None,
327                            constraints: [],
328                            with_options: [],
329                            format_encode: None,
330                            source_watermarks: [],
331                            append_only: false,
332                            on_conflict: None,
333                            with_version_column: None,
334                            query: None,
335                            cdc_table_info: None,
336                            include_column_options: [],
337                            webhook_info: None,
338                            engine: Hummock,
339                        },
340                        CreateTable {
341                            or_replace: false,
342                            temporary: false,
343                            if_not_exists: false,
344                            name: ObjectName(
345                                [
346                                    Ident {
347                                        value: "t3",
348                                        quote_style: None,
349                                    },
350                                ],
351                            ),
352                            columns: [
353                                ColumnDef {
354                                    name: Ident {
355                                        value: "v1",
356                                        quote_style: None,
357                                    },
358                                    data_type: Some(
359                                        Int,
360                                    ),
361                                    collation: None,
362                                    options: [],
363                                },
364                                ColumnDef {
365                                    name: Ident {
366                                        value: "v2",
367                                        quote_style: None,
368                                    },
369                                    data_type: Some(
370                                        Boolean,
371                                    ),
372                                    collation: None,
373                                    options: [],
374                                },
375                                ColumnDef {
376                                    name: Ident {
377                                        value: "v3",
378                                        quote_style: None,
379                                    },
380                                    data_type: Some(
381                                        SmallInt,
382                                    ),
383                                    collation: None,
384                                    options: [],
385                                },
386                            ],
387                            wildcard_idx: None,
388                            constraints: [],
389                            with_options: [],
390                            format_encode: None,
391                            source_watermarks: [],
392                            append_only: false,
393                            on_conflict: None,
394                            with_version_column: None,
395                            query: None,
396                            cdc_table_info: None,
397                            include_column_options: [],
398                            webhook_info: None,
399                            engine: Hummock,
400                        },
401                    ],
402                )"#]],
403        );
404    }
405
406    #[test]
407    fn test_parse_create_table_statements_with_pk() {
408        let test_string = "
409CREATE TABLE t(v1 int PRIMARY KEY);
410CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
411CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
412CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
413";
414        check(
415            parse_create_table_statements(test_string),
416            expect![[r#"
417                (
418                    [
419                        Table {
420                            name: "t",
421                            columns: [
422                                Column {
423                                    name: "v1",
424                                    data_type: Int32,
425                                },
426                            ],
427                            pk_indices: [
428                                0,
429                            ],
430                            is_base_table: true,
431                        },
432                        Table {
433                            name: "t2",
434                            columns: [
435                                Column {
436                                    name: "v1",
437                                    data_type: Int32,
438                                },
439                                Column {
440                                    name: "v2",
441                                    data_type: Int16,
442                                },
443                            ],
444                            pk_indices: [
445                                1,
446                            ],
447                            is_base_table: true,
448                        },
449                        Table {
450                            name: "t3",
451                            columns: [
452                                Column {
453                                    name: "v1",
454                                    data_type: Int32,
455                                },
456                                Column {
457                                    name: "v2",
458                                    data_type: Int16,
459                                },
460                            ],
461                            pk_indices: [
462                                0,
463                                1,
464                            ],
465                            is_base_table: true,
466                        },
467                        Table {
468                            name: "t4",
469                            columns: [
470                                Column {
471                                    name: "v1",
472                                    data_type: Int32,
473                                },
474                                Column {
475                                    name: "v2",
476                                    data_type: Int16,
477                                },
478                                Column {
479                                    name: "v3",
480                                    data_type: Boolean,
481                                },
482                            ],
483                            pk_indices: [
484                                0,
485                                1,
486                                2,
487                            ],
488                            is_base_table: true,
489                        },
490                    ],
491                    [
492                        CreateTable {
493                            or_replace: false,
494                            temporary: false,
495                            if_not_exists: false,
496                            name: ObjectName(
497                                [
498                                    Ident {
499                                        value: "t",
500                                        quote_style: None,
501                                    },
502                                ],
503                            ),
504                            columns: [
505                                ColumnDef {
506                                    name: Ident {
507                                        value: "v1",
508                                        quote_style: None,
509                                    },
510                                    data_type: Some(
511                                        Int,
512                                    ),
513                                    collation: None,
514                                    options: [
515                                        ColumnOptionDef {
516                                            name: None,
517                                            option: Unique {
518                                                is_primary: true,
519                                            },
520                                        },
521                                    ],
522                                },
523                            ],
524                            wildcard_idx: None,
525                            constraints: [],
526                            with_options: [],
527                            format_encode: None,
528                            source_watermarks: [],
529                            append_only: false,
530                            on_conflict: None,
531                            with_version_column: None,
532                            query: None,
533                            cdc_table_info: None,
534                            include_column_options: [],
535                            webhook_info: None,
536                            engine: Hummock,
537                        },
538                        CreateTable {
539                            or_replace: false,
540                            temporary: false,
541                            if_not_exists: false,
542                            name: ObjectName(
543                                [
544                                    Ident {
545                                        value: "t2",
546                                        quote_style: None,
547                                    },
548                                ],
549                            ),
550                            columns: [
551                                ColumnDef {
552                                    name: Ident {
553                                        value: "v1",
554                                        quote_style: None,
555                                    },
556                                    data_type: Some(
557                                        Int,
558                                    ),
559                                    collation: None,
560                                    options: [],
561                                },
562                                ColumnDef {
563                                    name: Ident {
564                                        value: "v2",
565                                        quote_style: None,
566                                    },
567                                    data_type: Some(
568                                        SmallInt,
569                                    ),
570                                    collation: None,
571                                    options: [
572                                        ColumnOptionDef {
573                                            name: None,
574                                            option: Unique {
575                                                is_primary: true,
576                                            },
577                                        },
578                                    ],
579                                },
580                            ],
581                            wildcard_idx: None,
582                            constraints: [],
583                            with_options: [],
584                            format_encode: None,
585                            source_watermarks: [],
586                            append_only: false,
587                            on_conflict: None,
588                            with_version_column: None,
589                            query: None,
590                            cdc_table_info: None,
591                            include_column_options: [],
592                            webhook_info: None,
593                            engine: Hummock,
594                        },
595                        CreateTable {
596                            or_replace: false,
597                            temporary: false,
598                            if_not_exists: false,
599                            name: ObjectName(
600                                [
601                                    Ident {
602                                        value: "t3",
603                                        quote_style: None,
604                                    },
605                                ],
606                            ),
607                            columns: [
608                                ColumnDef {
609                                    name: Ident {
610                                        value: "v1",
611                                        quote_style: None,
612                                    },
613                                    data_type: Some(
614                                        Int,
615                                    ),
616                                    collation: None,
617                                    options: [
618                                        ColumnOptionDef {
619                                            name: None,
620                                            option: Unique {
621                                                is_primary: true,
622                                            },
623                                        },
624                                    ],
625                                },
626                                ColumnDef {
627                                    name: Ident {
628                                        value: "v2",
629                                        quote_style: None,
630                                    },
631                                    data_type: Some(
632                                        SmallInt,
633                                    ),
634                                    collation: None,
635                                    options: [
636                                        ColumnOptionDef {
637                                            name: None,
638                                            option: Unique {
639                                                is_primary: true,
640                                            },
641                                        },
642                                    ],
643                                },
644                            ],
645                            wildcard_idx: None,
646                            constraints: [],
647                            with_options: [],
648                            format_encode: None,
649                            source_watermarks: [],
650                            append_only: false,
651                            on_conflict: None,
652                            with_version_column: None,
653                            query: None,
654                            cdc_table_info: None,
655                            include_column_options: [],
656                            webhook_info: None,
657                            engine: Hummock,
658                        },
659                        CreateTable {
660                            or_replace: false,
661                            temporary: false,
662                            if_not_exists: false,
663                            name: ObjectName(
664                                [
665                                    Ident {
666                                        value: "t4",
667                                        quote_style: None,
668                                    },
669                                ],
670                            ),
671                            columns: [
672                                ColumnDef {
673                                    name: Ident {
674                                        value: "v1",
675                                        quote_style: None,
676                                    },
677                                    data_type: Some(
678                                        Int,
679                                    ),
680                                    collation: None,
681                                    options: [
682                                        ColumnOptionDef {
683                                            name: None,
684                                            option: Unique {
685                                                is_primary: true,
686                                            },
687                                        },
688                                    ],
689                                },
690                                ColumnDef {
691                                    name: Ident {
692                                        value: "v2",
693                                        quote_style: None,
694                                    },
695                                    data_type: Some(
696                                        SmallInt,
697                                    ),
698                                    collation: None,
699                                    options: [
700                                        ColumnOptionDef {
701                                            name: None,
702                                            option: Unique {
703                                                is_primary: true,
704                                            },
705                                        },
706                                    ],
707                                },
708                                ColumnDef {
709                                    name: Ident {
710                                        value: "v3",
711                                        quote_style: None,
712                                    },
713                                    data_type: Some(
714                                        Boolean,
715                                    ),
716                                    collation: None,
717                                    options: [
718                                        ColumnOptionDef {
719                                            name: None,
720                                            option: Unique {
721                                                is_primary: true,
722                                            },
723                                        },
724                                    ],
725                                },
726                            ],
727                            wildcard_idx: None,
728                            constraints: [],
729                            with_options: [],
730                            format_encode: None,
731                            source_watermarks: [],
732                            append_only: false,
733                            on_conflict: None,
734                            with_version_column: None,
735                            query: None,
736                            cdc_table_info: None,
737                            include_column_options: [],
738                            webhook_info: None,
739                            engine: Hummock,
740                        },
741                    ],
742                )"#]],
743        );
744    }
745}