risingwave_sqlsmith/
lib.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use config::Configuration;
28use itertools::Itertools;
29use rand::Rng;
30use rand::prelude::IndexedRandom;
31use risingwave_sqlparser::ast::{
32    BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
33    TableConstraint,
34};
35use risingwave_sqlparser::parser::Parser;
36
37use crate::sql_gen::SqlGenerator;
38
39pub mod config;
40pub mod reducer;
41mod sql_gen;
42pub mod sqlreduce;
43pub mod test_runners;
44mod utils;
45pub mod validation;
46pub use validation::is_permissible_error;
47
48pub use crate::sql_gen::{Table, print_function_table};
49
50/// Generate a random SQL string.
51pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>, config: &Configuration) -> String {
52    let mut r#gen = SqlGenerator::new(rng, tables, config.clone());
53    format!("{}", r#gen.gen_batch_query_stmt())
54}
55
56/// Generate `INSERT`
57pub fn insert_sql_gen(
58    rng: &mut impl Rng,
59    tables: Vec<Table>,
60    count: usize,
61    config: &Configuration,
62) -> Vec<String> {
63    let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
64    tables
65        .into_iter()
66        .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
67        .collect()
68}
69
70/// Generate a random CREATE MATERIALIZED VIEW sql string.
71/// These are derived from `tables`.
72pub fn mview_sql_gen<R: Rng>(
73    rng: &mut R,
74    tables: Vec<Table>,
75    name: &str,
76    config: &Configuration,
77) -> (String, Table) {
78    let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
79    let (mview, table) = r#gen.gen_mview_stmt(name);
80    (mview.to_string(), table)
81}
82
83pub fn differential_sql_gen<R: Rng>(
84    rng: &mut R,
85    tables: Vec<Table>,
86    name: &str,
87    config: &Configuration,
88) -> Result<(String, String, Table)> {
89    let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
90    let (stream, table) = r#gen.gen_mview_stmt(name);
91    let batch = match stream {
92        Statement::CreateView { ref query, .. } => query.to_string(),
93        _ => bail!("Differential pair should be mview statement!"),
94    };
95    Ok((batch, stream.to_string(), table))
96}
97
98/// TODO(noel): Eventually all session variables should be fuzzed.
99/// For now we start of with a few hardcoded configs.
100/// Some config need workarounds, for instance `QUERY_MODE`,
101/// which can lead to stack overflow
102/// (a simple workaround is limit length of
103/// generated query when `QUERY_MODE=local`.
104pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
105    [
106        "SET ENABLE_TWO_PHASE_AGG TO TRUE",
107        "SET ENABLE_TWO_PHASE_AGG TO FALSE",
108        "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
109        "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
110    ]
111    .choose(rng)
112    .unwrap()
113    .to_string()
114}
115
116pub fn generate_update_statements<R: Rng>(
117    rng: &mut R,
118    tables: &[Table],
119    inserts: &[Statement],
120    config: &Configuration,
121) -> Result<Vec<Statement>> {
122    let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
123    r#gen.generate_update_statements(tables, inserts)
124}
125
126/// Parse SQL
127/// FIXME(Noel): Introduce error type for sqlsmith for this.
128pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
129    let sql = sql.as_ref();
130    Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
131}
132
133/// Extract relevant info from CREATE TABLE statement, to construct a Table
134pub fn create_table_statement_to_table(statement: &Statement) -> Table {
135    match statement {
136        Statement::CreateTable {
137            name,
138            columns,
139            constraints,
140            append_only,
141            source_watermarks,
142            ..
143        } => {
144            let column_name_to_index_mapping: HashMap<_, _> = columns
145                .iter()
146                .enumerate()
147                .map(|(i, c)| (&c.name, i))
148                .collect();
149            let mut pk_indices = HashSet::new();
150            for (i, column) in columns.iter().enumerate() {
151                let is_primary_key = column
152                    .options
153                    .iter()
154                    .any(|option| option.option == ColumnOption::Unique { is_primary: true });
155                if is_primary_key {
156                    pk_indices.insert(i);
157                }
158            }
159            for constraint in constraints {
160                if let TableConstraint::Unique {
161                    columns,
162                    is_primary: true,
163                    ..
164                } = constraint
165                {
166                    for column in columns {
167                        let pk_index = column_name_to_index_mapping.get(column).unwrap();
168                        pk_indices.insert(*pk_index);
169                    }
170                }
171            }
172            let mut pk_indices = pk_indices.into_iter().collect_vec();
173            pk_indices.sort_unstable();
174            Table::new_for_base_table(
175                name.0[0].real_value(),
176                columns.iter().map(|c| c.clone().into()).collect(),
177                pk_indices,
178                *append_only,
179                source_watermarks.to_vec(),
180            )
181        }
182        _ => panic!(
183            "Only CREATE TABLE statements permitted, received: {}",
184            statement
185        ),
186    }
187}
188
189pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
190    let statements = parse_sql(&sql);
191    let tables = statements
192        .iter()
193        .map(create_table_statement_to_table)
194        .collect();
195    (tables, statements)
196}
197
198#[cfg(test)]
199mod tests {
200    use std::fmt::Debug;
201
202    use expect_test::{Expect, expect};
203
204    use super::*;
205
206    fn check(actual: impl Debug, expect: Expect) {
207        let actual = format!("{:#?}", actual);
208        expect.assert_eq(&actual);
209    }
210
211    #[test]
212    fn test_parse_create_table_statements_no_pk() {
213        let test_string = "
214CREATE TABLE t(v1 int);
215CREATE TABLE t2(v1 int, v2 bool);
216CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
217        ";
218        check(
219            parse_create_table_statements(test_string),
220            expect![[r#"
221                (
222                    [
223                        Table {
224                            name: "t",
225                            columns: [
226                                Column {
227                                    name: ObjectName(
228                                        [
229                                            Ident {
230                                                value: "v1",
231                                                quote_style: None,
232                                            },
233                                        ],
234                                    ),
235                                    data_type: Int32,
236                                },
237                            ],
238                            pk_indices: [],
239                            is_base_table: true,
240                            is_append_only: false,
241                            source_watermarks: [],
242                        },
243                        Table {
244                            name: "t2",
245                            columns: [
246                                Column {
247                                    name: ObjectName(
248                                        [
249                                            Ident {
250                                                value: "v1",
251                                                quote_style: None,
252                                            },
253                                        ],
254                                    ),
255                                    data_type: Int32,
256                                },
257                                Column {
258                                    name: ObjectName(
259                                        [
260                                            Ident {
261                                                value: "v2",
262                                                quote_style: None,
263                                            },
264                                        ],
265                                    ),
266                                    data_type: Boolean,
267                                },
268                            ],
269                            pk_indices: [],
270                            is_base_table: true,
271                            is_append_only: false,
272                            source_watermarks: [],
273                        },
274                        Table {
275                            name: "t3",
276                            columns: [
277                                Column {
278                                    name: ObjectName(
279                                        [
280                                            Ident {
281                                                value: "v1",
282                                                quote_style: None,
283                                            },
284                                        ],
285                                    ),
286                                    data_type: Int32,
287                                },
288                                Column {
289                                    name: ObjectName(
290                                        [
291                                            Ident {
292                                                value: "v2",
293                                                quote_style: None,
294                                            },
295                                        ],
296                                    ),
297                                    data_type: Boolean,
298                                },
299                                Column {
300                                    name: ObjectName(
301                                        [
302                                            Ident {
303                                                value: "v3",
304                                                quote_style: None,
305                                            },
306                                        ],
307                                    ),
308                                    data_type: Int16,
309                                },
310                            ],
311                            pk_indices: [],
312                            is_base_table: true,
313                            is_append_only: false,
314                            source_watermarks: [],
315                        },
316                    ],
317                    [
318                        CreateTable {
319                            or_replace: false,
320                            temporary: false,
321                            if_not_exists: false,
322                            name: ObjectName(
323                                [
324                                    Ident {
325                                        value: "t",
326                                        quote_style: None,
327                                    },
328                                ],
329                            ),
330                            columns: [
331                                ColumnDef {
332                                    name: Ident {
333                                        value: "v1",
334                                        quote_style: None,
335                                    },
336                                    data_type: Some(
337                                        Int,
338                                    ),
339                                    collation: None,
340                                    options: [],
341                                },
342                            ],
343                            wildcard_idx: None,
344                            constraints: [],
345                            with_options: [],
346                            format_encode: None,
347                            source_watermarks: [],
348                            append_only: false,
349                            on_conflict: None,
350                            with_version_columns: [],
351                            query: None,
352                            cdc_table_info: None,
353                            include_column_options: [],
354                            webhook_info: None,
355                            engine: Hummock,
356                        },
357                        CreateTable {
358                            or_replace: false,
359                            temporary: false,
360                            if_not_exists: false,
361                            name: ObjectName(
362                                [
363                                    Ident {
364                                        value: "t2",
365                                        quote_style: None,
366                                    },
367                                ],
368                            ),
369                            columns: [
370                                ColumnDef {
371                                    name: Ident {
372                                        value: "v1",
373                                        quote_style: None,
374                                    },
375                                    data_type: Some(
376                                        Int,
377                                    ),
378                                    collation: None,
379                                    options: [],
380                                },
381                                ColumnDef {
382                                    name: Ident {
383                                        value: "v2",
384                                        quote_style: None,
385                                    },
386                                    data_type: Some(
387                                        Boolean,
388                                    ),
389                                    collation: None,
390                                    options: [],
391                                },
392                            ],
393                            wildcard_idx: None,
394                            constraints: [],
395                            with_options: [],
396                            format_encode: None,
397                            source_watermarks: [],
398                            append_only: false,
399                            on_conflict: None,
400                            with_version_columns: [],
401                            query: None,
402                            cdc_table_info: None,
403                            include_column_options: [],
404                            webhook_info: None,
405                            engine: Hummock,
406                        },
407                        CreateTable {
408                            or_replace: false,
409                            temporary: false,
410                            if_not_exists: false,
411                            name: ObjectName(
412                                [
413                                    Ident {
414                                        value: "t3",
415                                        quote_style: None,
416                                    },
417                                ],
418                            ),
419                            columns: [
420                                ColumnDef {
421                                    name: Ident {
422                                        value: "v1",
423                                        quote_style: None,
424                                    },
425                                    data_type: Some(
426                                        Int,
427                                    ),
428                                    collation: None,
429                                    options: [],
430                                },
431                                ColumnDef {
432                                    name: Ident {
433                                        value: "v2",
434                                        quote_style: None,
435                                    },
436                                    data_type: Some(
437                                        Boolean,
438                                    ),
439                                    collation: None,
440                                    options: [],
441                                },
442                                ColumnDef {
443                                    name: Ident {
444                                        value: "v3",
445                                        quote_style: None,
446                                    },
447                                    data_type: Some(
448                                        SmallInt,
449                                    ),
450                                    collation: None,
451                                    options: [],
452                                },
453                            ],
454                            wildcard_idx: None,
455                            constraints: [],
456                            with_options: [],
457                            format_encode: None,
458                            source_watermarks: [],
459                            append_only: false,
460                            on_conflict: None,
461                            with_version_columns: [],
462                            query: None,
463                            cdc_table_info: None,
464                            include_column_options: [],
465                            webhook_info: None,
466                            engine: Hummock,
467                        },
468                    ],
469                )"#]],
470        );
471    }
472
473    #[test]
474    fn test_parse_create_table_statements_with_pk() {
475        let test_string = "
476CREATE TABLE t(v1 int PRIMARY KEY);
477CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
478CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
479CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
480";
481        check(
482            parse_create_table_statements(test_string),
483            expect![[r#"
484                (
485                    [
486                        Table {
487                            name: "t",
488                            columns: [
489                                Column {
490                                    name: ObjectName(
491                                        [
492                                            Ident {
493                                                value: "v1",
494                                                quote_style: None,
495                                            },
496                                        ],
497                                    ),
498                                    data_type: Int32,
499                                },
500                            ],
501                            pk_indices: [
502                                0,
503                            ],
504                            is_base_table: true,
505                            is_append_only: false,
506                            source_watermarks: [],
507                        },
508                        Table {
509                            name: "t2",
510                            columns: [
511                                Column {
512                                    name: ObjectName(
513                                        [
514                                            Ident {
515                                                value: "v1",
516                                                quote_style: None,
517                                            },
518                                        ],
519                                    ),
520                                    data_type: Int32,
521                                },
522                                Column {
523                                    name: ObjectName(
524                                        [
525                                            Ident {
526                                                value: "v2",
527                                                quote_style: None,
528                                            },
529                                        ],
530                                    ),
531                                    data_type: Int16,
532                                },
533                            ],
534                            pk_indices: [
535                                1,
536                            ],
537                            is_base_table: true,
538                            is_append_only: false,
539                            source_watermarks: [],
540                        },
541                        Table {
542                            name: "t3",
543                            columns: [
544                                Column {
545                                    name: ObjectName(
546                                        [
547                                            Ident {
548                                                value: "v1",
549                                                quote_style: None,
550                                            },
551                                        ],
552                                    ),
553                                    data_type: Int32,
554                                },
555                                Column {
556                                    name: ObjectName(
557                                        [
558                                            Ident {
559                                                value: "v2",
560                                                quote_style: None,
561                                            },
562                                        ],
563                                    ),
564                                    data_type: Int16,
565                                },
566                            ],
567                            pk_indices: [
568                                0,
569                                1,
570                            ],
571                            is_base_table: true,
572                            is_append_only: false,
573                            source_watermarks: [],
574                        },
575                        Table {
576                            name: "t4",
577                            columns: [
578                                Column {
579                                    name: ObjectName(
580                                        [
581                                            Ident {
582                                                value: "v1",
583                                                quote_style: None,
584                                            },
585                                        ],
586                                    ),
587                                    data_type: Int32,
588                                },
589                                Column {
590                                    name: ObjectName(
591                                        [
592                                            Ident {
593                                                value: "v2",
594                                                quote_style: None,
595                                            },
596                                        ],
597                                    ),
598                                    data_type: Int16,
599                                },
600                                Column {
601                                    name: ObjectName(
602                                        [
603                                            Ident {
604                                                value: "v3",
605                                                quote_style: None,
606                                            },
607                                        ],
608                                    ),
609                                    data_type: Boolean,
610                                },
611                            ],
612                            pk_indices: [
613                                0,
614                                1,
615                                2,
616                            ],
617                            is_base_table: true,
618                            is_append_only: false,
619                            source_watermarks: [],
620                        },
621                    ],
622                    [
623                        CreateTable {
624                            or_replace: false,
625                            temporary: false,
626                            if_not_exists: false,
627                            name: ObjectName(
628                                [
629                                    Ident {
630                                        value: "t",
631                                        quote_style: None,
632                                    },
633                                ],
634                            ),
635                            columns: [
636                                ColumnDef {
637                                    name: Ident {
638                                        value: "v1",
639                                        quote_style: None,
640                                    },
641                                    data_type: Some(
642                                        Int,
643                                    ),
644                                    collation: None,
645                                    options: [
646                                        ColumnOptionDef {
647                                            name: None,
648                                            option: Unique {
649                                                is_primary: true,
650                                            },
651                                        },
652                                    ],
653                                },
654                            ],
655                            wildcard_idx: None,
656                            constraints: [],
657                            with_options: [],
658                            format_encode: None,
659                            source_watermarks: [],
660                            append_only: false,
661                            on_conflict: None,
662                            with_version_columns: [],
663                            query: None,
664                            cdc_table_info: None,
665                            include_column_options: [],
666                            webhook_info: None,
667                            engine: Hummock,
668                        },
669                        CreateTable {
670                            or_replace: false,
671                            temporary: false,
672                            if_not_exists: false,
673                            name: ObjectName(
674                                [
675                                    Ident {
676                                        value: "t2",
677                                        quote_style: None,
678                                    },
679                                ],
680                            ),
681                            columns: [
682                                ColumnDef {
683                                    name: Ident {
684                                        value: "v1",
685                                        quote_style: None,
686                                    },
687                                    data_type: Some(
688                                        Int,
689                                    ),
690                                    collation: None,
691                                    options: [],
692                                },
693                                ColumnDef {
694                                    name: Ident {
695                                        value: "v2",
696                                        quote_style: None,
697                                    },
698                                    data_type: Some(
699                                        SmallInt,
700                                    ),
701                                    collation: None,
702                                    options: [
703                                        ColumnOptionDef {
704                                            name: None,
705                                            option: Unique {
706                                                is_primary: true,
707                                            },
708                                        },
709                                    ],
710                                },
711                            ],
712                            wildcard_idx: None,
713                            constraints: [],
714                            with_options: [],
715                            format_encode: None,
716                            source_watermarks: [],
717                            append_only: false,
718                            on_conflict: None,
719                            with_version_columns: [],
720                            query: None,
721                            cdc_table_info: None,
722                            include_column_options: [],
723                            webhook_info: None,
724                            engine: Hummock,
725                        },
726                        CreateTable {
727                            or_replace: false,
728                            temporary: false,
729                            if_not_exists: false,
730                            name: ObjectName(
731                                [
732                                    Ident {
733                                        value: "t3",
734                                        quote_style: None,
735                                    },
736                                ],
737                            ),
738                            columns: [
739                                ColumnDef {
740                                    name: Ident {
741                                        value: "v1",
742                                        quote_style: None,
743                                    },
744                                    data_type: Some(
745                                        Int,
746                                    ),
747                                    collation: None,
748                                    options: [
749                                        ColumnOptionDef {
750                                            name: None,
751                                            option: Unique {
752                                                is_primary: true,
753                                            },
754                                        },
755                                    ],
756                                },
757                                ColumnDef {
758                                    name: Ident {
759                                        value: "v2",
760                                        quote_style: None,
761                                    },
762                                    data_type: Some(
763                                        SmallInt,
764                                    ),
765                                    collation: None,
766                                    options: [
767                                        ColumnOptionDef {
768                                            name: None,
769                                            option: Unique {
770                                                is_primary: true,
771                                            },
772                                        },
773                                    ],
774                                },
775                            ],
776                            wildcard_idx: None,
777                            constraints: [],
778                            with_options: [],
779                            format_encode: None,
780                            source_watermarks: [],
781                            append_only: false,
782                            on_conflict: None,
783                            with_version_columns: [],
784                            query: None,
785                            cdc_table_info: None,
786                            include_column_options: [],
787                            webhook_info: None,
788                            engine: Hummock,
789                        },
790                        CreateTable {
791                            or_replace: false,
792                            temporary: false,
793                            if_not_exists: false,
794                            name: ObjectName(
795                                [
796                                    Ident {
797                                        value: "t4",
798                                        quote_style: None,
799                                    },
800                                ],
801                            ),
802                            columns: [
803                                ColumnDef {
804                                    name: Ident {
805                                        value: "v1",
806                                        quote_style: None,
807                                    },
808                                    data_type: Some(
809                                        Int,
810                                    ),
811                                    collation: None,
812                                    options: [
813                                        ColumnOptionDef {
814                                            name: None,
815                                            option: Unique {
816                                                is_primary: true,
817                                            },
818                                        },
819                                    ],
820                                },
821                                ColumnDef {
822                                    name: Ident {
823                                        value: "v2",
824                                        quote_style: None,
825                                    },
826                                    data_type: Some(
827                                        SmallInt,
828                                    ),
829                                    collation: None,
830                                    options: [
831                                        ColumnOptionDef {
832                                            name: None,
833                                            option: Unique {
834                                                is_primary: true,
835                                            },
836                                        },
837                                    ],
838                                },
839                                ColumnDef {
840                                    name: Ident {
841                                        value: "v3",
842                                        quote_style: None,
843                                    },
844                                    data_type: Some(
845                                        Boolean,
846                                    ),
847                                    collation: None,
848                                    options: [
849                                        ColumnOptionDef {
850                                            name: None,
851                                            option: Unique {
852                                                is_primary: true,
853                                            },
854                                        },
855                                    ],
856                                },
857                            ],
858                            wildcard_idx: None,
859                            constraints: [],
860                            with_options: [],
861                            format_encode: None,
862                            source_watermarks: [],
863                            append_only: false,
864                            on_conflict: None,
865                            with_version_columns: [],
866                            query: None,
867                            cdc_table_info: None,
868                            include_column_options: [],
869                            webhook_info: None,
870                            engine: Hummock,
871                        },
872                    ],
873                )"#]],
874        );
875    }
876
877    #[test]
878    fn test_parse_create_table_statements_with_append_only_and_watermark() {
879        let test_string = r#"
880    CREATE TABLE t1 (
881        v1 INT,
882        ts TIMESTAMP,
883        WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
884    ) APPEND ONLY;
885
886    CREATE TABLE t2 (
887        v1 INT,
888        ts TIMESTAMP
889    ) APPEND ONLY;
890
891    CREATE TABLE t3 (
892        v1 INT,
893        ts TIMESTAMP
894    );
895    "#;
896
897        let (tables, _) = parse_create_table_statements(test_string);
898
899        // Check t1
900        let t1 = &tables[0];
901        assert!(t1.is_append_only);
902        assert_eq!(t1.source_watermarks.len(), 1);
903        assert_eq!(t1.source_watermarks[0].column.real_value(), "ts");
904
905        // Check t2
906        let t2 = &tables[1];
907        assert!(t2.is_append_only);
908        assert_eq!(t2.source_watermarks.len(), 0);
909
910        // Check t3
911        let t3 = &tables[2];
912        assert!(!t3.is_append_only);
913        assert_eq!(t3.source_watermarks.len(), 0);
914    }
915}