risingwave_sqlsmith/
lib.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use config::Configuration;
28use itertools::Itertools;
29use rand::Rng;
30use rand::prelude::IndexedRandom;
31use risingwave_sqlparser::ast::{
32    BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
33    TableConstraint,
34};
35use risingwave_sqlparser::parser::Parser;
36
37use crate::sql_gen::SqlGenerator;
38
39pub mod config;
40pub mod reducer;
41mod sql_gen;
42pub mod test_runners;
43mod utils;
44pub mod validation;
45pub use validation::is_permissible_error;
46
47pub use crate::sql_gen::{Table, print_function_table};
48
49/// Generate a random SQL string.
50pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>, config: &Configuration) -> String {
51    let mut r#gen = SqlGenerator::new(rng, tables, config.clone());
52    format!("{}", r#gen.gen_batch_query_stmt())
53}
54
55/// Generate `INSERT`
56pub fn insert_sql_gen(
57    rng: &mut impl Rng,
58    tables: Vec<Table>,
59    count: usize,
60    config: &Configuration,
61) -> Vec<String> {
62    let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
63    tables
64        .into_iter()
65        .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
66        .collect()
67}
68
69/// Generate a random CREATE MATERIALIZED VIEW sql string.
70/// These are derived from `tables`.
71pub fn mview_sql_gen<R: Rng>(
72    rng: &mut R,
73    tables: Vec<Table>,
74    name: &str,
75    config: &Configuration,
76) -> (String, Table) {
77    let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
78    let (mview, table) = r#gen.gen_mview_stmt(name);
79    (mview.to_string(), table)
80}
81
82pub fn differential_sql_gen<R: Rng>(
83    rng: &mut R,
84    tables: Vec<Table>,
85    name: &str,
86    config: &Configuration,
87) -> Result<(String, String, Table)> {
88    let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
89    let (stream, table) = r#gen.gen_mview_stmt(name);
90    let batch = match stream {
91        Statement::CreateView { ref query, .. } => query.to_string(),
92        _ => bail!("Differential pair should be mview statement!"),
93    };
94    Ok((batch, stream.to_string(), table))
95}
96
97/// TODO(noel): Eventually all session variables should be fuzzed.
98/// For now we start of with a few hardcoded configs.
99/// Some config need workarounds, for instance `QUERY_MODE`,
100/// which can lead to stack overflow
101/// (a simple workaround is limit length of
102/// generated query when `QUERY_MODE=local`.
103pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
104    [
105        "SET ENABLE_TWO_PHASE_AGG TO TRUE",
106        "SET ENABLE_TWO_PHASE_AGG TO FALSE",
107        "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
108        "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
109    ]
110    .choose(rng)
111    .unwrap()
112    .to_string()
113}
114
115pub fn generate_update_statements<R: Rng>(
116    rng: &mut R,
117    tables: &[Table],
118    inserts: &[Statement],
119    config: &Configuration,
120) -> Result<Vec<Statement>> {
121    let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
122    r#gen.generate_update_statements(tables, inserts)
123}
124
125/// Parse SQL
126/// FIXME(Noel): Introduce error type for sqlsmith for this.
127pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
128    let sql = sql.as_ref();
129    Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
130}
131
132/// Extract relevant info from CREATE TABLE statement, to construct a Table
133pub fn create_table_statement_to_table(statement: &Statement) -> Table {
134    match statement {
135        Statement::CreateTable {
136            name,
137            columns,
138            constraints,
139            append_only,
140            source_watermarks,
141            ..
142        } => {
143            let column_name_to_index_mapping: HashMap<_, _> = columns
144                .iter()
145                .enumerate()
146                .map(|(i, c)| (&c.name, i))
147                .collect();
148            let mut pk_indices = HashSet::new();
149            for (i, column) in columns.iter().enumerate() {
150                let is_primary_key = column
151                    .options
152                    .iter()
153                    .any(|option| option.option == ColumnOption::Unique { is_primary: true });
154                if is_primary_key {
155                    pk_indices.insert(i);
156                }
157            }
158            for constraint in constraints {
159                if let TableConstraint::Unique {
160                    columns,
161                    is_primary: true,
162                    ..
163                } = constraint
164                {
165                    for column in columns {
166                        let pk_index = column_name_to_index_mapping.get(column).unwrap();
167                        pk_indices.insert(*pk_index);
168                    }
169                }
170            }
171            let mut pk_indices = pk_indices.into_iter().collect_vec();
172            pk_indices.sort_unstable();
173            Table::new_for_base_table(
174                name.0[0].real_value(),
175                columns.iter().map(|c| c.clone().into()).collect(),
176                pk_indices,
177                *append_only,
178                source_watermarks.to_vec(),
179            )
180        }
181        _ => panic!(
182            "Only CREATE TABLE statements permitted, received: {}",
183            statement
184        ),
185    }
186}
187
188pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
189    let statements = parse_sql(&sql);
190    let tables = statements
191        .iter()
192        .map(create_table_statement_to_table)
193        .collect();
194    (tables, statements)
195}
196
197#[cfg(test)]
198mod tests {
199    use std::fmt::Debug;
200
201    use expect_test::{Expect, expect};
202
203    use super::*;
204
205    fn check(actual: impl Debug, expect: Expect) {
206        let actual = format!("{:#?}", actual);
207        expect.assert_eq(&actual);
208    }
209
210    #[test]
211    fn test_parse_create_table_statements_no_pk() {
212        let test_string = "
213CREATE TABLE t(v1 int);
214CREATE TABLE t2(v1 int, v2 bool);
215CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
216        ";
217        check(
218            parse_create_table_statements(test_string),
219            expect![[r#"
220                (
221                    [
222                        Table {
223                            name: "t",
224                            columns: [
225                                Column {
226                                    name: ObjectName(
227                                        [
228                                            Ident {
229                                                value: "v1",
230                                                quote_style: None,
231                                            },
232                                        ],
233                                    ),
234                                    data_type: Int32,
235                                },
236                            ],
237                            pk_indices: [],
238                            is_base_table: true,
239                            is_append_only: false,
240                            source_watermarks: [],
241                        },
242                        Table {
243                            name: "t2",
244                            columns: [
245                                Column {
246                                    name: ObjectName(
247                                        [
248                                            Ident {
249                                                value: "v1",
250                                                quote_style: None,
251                                            },
252                                        ],
253                                    ),
254                                    data_type: Int32,
255                                },
256                                Column {
257                                    name: ObjectName(
258                                        [
259                                            Ident {
260                                                value: "v2",
261                                                quote_style: None,
262                                            },
263                                        ],
264                                    ),
265                                    data_type: Boolean,
266                                },
267                            ],
268                            pk_indices: [],
269                            is_base_table: true,
270                            is_append_only: false,
271                            source_watermarks: [],
272                        },
273                        Table {
274                            name: "t3",
275                            columns: [
276                                Column {
277                                    name: ObjectName(
278                                        [
279                                            Ident {
280                                                value: "v1",
281                                                quote_style: None,
282                                            },
283                                        ],
284                                    ),
285                                    data_type: Int32,
286                                },
287                                Column {
288                                    name: ObjectName(
289                                        [
290                                            Ident {
291                                                value: "v2",
292                                                quote_style: None,
293                                            },
294                                        ],
295                                    ),
296                                    data_type: Boolean,
297                                },
298                                Column {
299                                    name: ObjectName(
300                                        [
301                                            Ident {
302                                                value: "v3",
303                                                quote_style: None,
304                                            },
305                                        ],
306                                    ),
307                                    data_type: Int16,
308                                },
309                            ],
310                            pk_indices: [],
311                            is_base_table: true,
312                            is_append_only: false,
313                            source_watermarks: [],
314                        },
315                    ],
316                    [
317                        CreateTable {
318                            or_replace: false,
319                            temporary: false,
320                            if_not_exists: false,
321                            name: ObjectName(
322                                [
323                                    Ident {
324                                        value: "t",
325                                        quote_style: None,
326                                    },
327                                ],
328                            ),
329                            columns: [
330                                ColumnDef {
331                                    name: Ident {
332                                        value: "v1",
333                                        quote_style: None,
334                                    },
335                                    data_type: Some(
336                                        Int,
337                                    ),
338                                    collation: None,
339                                    options: [],
340                                },
341                            ],
342                            wildcard_idx: None,
343                            constraints: [],
344                            with_options: [],
345                            format_encode: None,
346                            source_watermarks: [],
347                            append_only: false,
348                            on_conflict: None,
349                            with_version_column: None,
350                            query: None,
351                            cdc_table_info: None,
352                            include_column_options: [],
353                            webhook_info: None,
354                            engine: Hummock,
355                        },
356                        CreateTable {
357                            or_replace: false,
358                            temporary: false,
359                            if_not_exists: false,
360                            name: ObjectName(
361                                [
362                                    Ident {
363                                        value: "t2",
364                                        quote_style: None,
365                                    },
366                                ],
367                            ),
368                            columns: [
369                                ColumnDef {
370                                    name: Ident {
371                                        value: "v1",
372                                        quote_style: None,
373                                    },
374                                    data_type: Some(
375                                        Int,
376                                    ),
377                                    collation: None,
378                                    options: [],
379                                },
380                                ColumnDef {
381                                    name: Ident {
382                                        value: "v2",
383                                        quote_style: None,
384                                    },
385                                    data_type: Some(
386                                        Boolean,
387                                    ),
388                                    collation: None,
389                                    options: [],
390                                },
391                            ],
392                            wildcard_idx: None,
393                            constraints: [],
394                            with_options: [],
395                            format_encode: None,
396                            source_watermarks: [],
397                            append_only: false,
398                            on_conflict: None,
399                            with_version_column: None,
400                            query: None,
401                            cdc_table_info: None,
402                            include_column_options: [],
403                            webhook_info: None,
404                            engine: Hummock,
405                        },
406                        CreateTable {
407                            or_replace: false,
408                            temporary: false,
409                            if_not_exists: false,
410                            name: ObjectName(
411                                [
412                                    Ident {
413                                        value: "t3",
414                                        quote_style: None,
415                                    },
416                                ],
417                            ),
418                            columns: [
419                                ColumnDef {
420                                    name: Ident {
421                                        value: "v1",
422                                        quote_style: None,
423                                    },
424                                    data_type: Some(
425                                        Int,
426                                    ),
427                                    collation: None,
428                                    options: [],
429                                },
430                                ColumnDef {
431                                    name: Ident {
432                                        value: "v2",
433                                        quote_style: None,
434                                    },
435                                    data_type: Some(
436                                        Boolean,
437                                    ),
438                                    collation: None,
439                                    options: [],
440                                },
441                                ColumnDef {
442                                    name: Ident {
443                                        value: "v3",
444                                        quote_style: None,
445                                    },
446                                    data_type: Some(
447                                        SmallInt,
448                                    ),
449                                    collation: None,
450                                    options: [],
451                                },
452                            ],
453                            wildcard_idx: None,
454                            constraints: [],
455                            with_options: [],
456                            format_encode: None,
457                            source_watermarks: [],
458                            append_only: false,
459                            on_conflict: None,
460                            with_version_column: None,
461                            query: None,
462                            cdc_table_info: None,
463                            include_column_options: [],
464                            webhook_info: None,
465                            engine: Hummock,
466                        },
467                    ],
468                )"#]],
469        );
470    }
471
472    #[test]
473    fn test_parse_create_table_statements_with_pk() {
474        let test_string = "
475CREATE TABLE t(v1 int PRIMARY KEY);
476CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
477CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
478CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
479";
480        check(
481            parse_create_table_statements(test_string),
482            expect![[r#"
483                (
484                    [
485                        Table {
486                            name: "t",
487                            columns: [
488                                Column {
489                                    name: ObjectName(
490                                        [
491                                            Ident {
492                                                value: "v1",
493                                                quote_style: None,
494                                            },
495                                        ],
496                                    ),
497                                    data_type: Int32,
498                                },
499                            ],
500                            pk_indices: [
501                                0,
502                            ],
503                            is_base_table: true,
504                            is_append_only: false,
505                            source_watermarks: [],
506                        },
507                        Table {
508                            name: "t2",
509                            columns: [
510                                Column {
511                                    name: ObjectName(
512                                        [
513                                            Ident {
514                                                value: "v1",
515                                                quote_style: None,
516                                            },
517                                        ],
518                                    ),
519                                    data_type: Int32,
520                                },
521                                Column {
522                                    name: ObjectName(
523                                        [
524                                            Ident {
525                                                value: "v2",
526                                                quote_style: None,
527                                            },
528                                        ],
529                                    ),
530                                    data_type: Int16,
531                                },
532                            ],
533                            pk_indices: [
534                                1,
535                            ],
536                            is_base_table: true,
537                            is_append_only: false,
538                            source_watermarks: [],
539                        },
540                        Table {
541                            name: "t3",
542                            columns: [
543                                Column {
544                                    name: ObjectName(
545                                        [
546                                            Ident {
547                                                value: "v1",
548                                                quote_style: None,
549                                            },
550                                        ],
551                                    ),
552                                    data_type: Int32,
553                                },
554                                Column {
555                                    name: ObjectName(
556                                        [
557                                            Ident {
558                                                value: "v2",
559                                                quote_style: None,
560                                            },
561                                        ],
562                                    ),
563                                    data_type: Int16,
564                                },
565                            ],
566                            pk_indices: [
567                                0,
568                                1,
569                            ],
570                            is_base_table: true,
571                            is_append_only: false,
572                            source_watermarks: [],
573                        },
574                        Table {
575                            name: "t4",
576                            columns: [
577                                Column {
578                                    name: ObjectName(
579                                        [
580                                            Ident {
581                                                value: "v1",
582                                                quote_style: None,
583                                            },
584                                        ],
585                                    ),
586                                    data_type: Int32,
587                                },
588                                Column {
589                                    name: ObjectName(
590                                        [
591                                            Ident {
592                                                value: "v2",
593                                                quote_style: None,
594                                            },
595                                        ],
596                                    ),
597                                    data_type: Int16,
598                                },
599                                Column {
600                                    name: ObjectName(
601                                        [
602                                            Ident {
603                                                value: "v3",
604                                                quote_style: None,
605                                            },
606                                        ],
607                                    ),
608                                    data_type: Boolean,
609                                },
610                            ],
611                            pk_indices: [
612                                0,
613                                1,
614                                2,
615                            ],
616                            is_base_table: true,
617                            is_append_only: false,
618                            source_watermarks: [],
619                        },
620                    ],
621                    [
622                        CreateTable {
623                            or_replace: false,
624                            temporary: false,
625                            if_not_exists: false,
626                            name: ObjectName(
627                                [
628                                    Ident {
629                                        value: "t",
630                                        quote_style: None,
631                                    },
632                                ],
633                            ),
634                            columns: [
635                                ColumnDef {
636                                    name: Ident {
637                                        value: "v1",
638                                        quote_style: None,
639                                    },
640                                    data_type: Some(
641                                        Int,
642                                    ),
643                                    collation: None,
644                                    options: [
645                                        ColumnOptionDef {
646                                            name: None,
647                                            option: Unique {
648                                                is_primary: true,
649                                            },
650                                        },
651                                    ],
652                                },
653                            ],
654                            wildcard_idx: None,
655                            constraints: [],
656                            with_options: [],
657                            format_encode: None,
658                            source_watermarks: [],
659                            append_only: false,
660                            on_conflict: None,
661                            with_version_column: None,
662                            query: None,
663                            cdc_table_info: None,
664                            include_column_options: [],
665                            webhook_info: None,
666                            engine: Hummock,
667                        },
668                        CreateTable {
669                            or_replace: false,
670                            temporary: false,
671                            if_not_exists: false,
672                            name: ObjectName(
673                                [
674                                    Ident {
675                                        value: "t2",
676                                        quote_style: None,
677                                    },
678                                ],
679                            ),
680                            columns: [
681                                ColumnDef {
682                                    name: Ident {
683                                        value: "v1",
684                                        quote_style: None,
685                                    },
686                                    data_type: Some(
687                                        Int,
688                                    ),
689                                    collation: None,
690                                    options: [],
691                                },
692                                ColumnDef {
693                                    name: Ident {
694                                        value: "v2",
695                                        quote_style: None,
696                                    },
697                                    data_type: Some(
698                                        SmallInt,
699                                    ),
700                                    collation: None,
701                                    options: [
702                                        ColumnOptionDef {
703                                            name: None,
704                                            option: Unique {
705                                                is_primary: true,
706                                            },
707                                        },
708                                    ],
709                                },
710                            ],
711                            wildcard_idx: None,
712                            constraints: [],
713                            with_options: [],
714                            format_encode: None,
715                            source_watermarks: [],
716                            append_only: false,
717                            on_conflict: None,
718                            with_version_column: None,
719                            query: None,
720                            cdc_table_info: None,
721                            include_column_options: [],
722                            webhook_info: None,
723                            engine: Hummock,
724                        },
725                        CreateTable {
726                            or_replace: false,
727                            temporary: false,
728                            if_not_exists: false,
729                            name: ObjectName(
730                                [
731                                    Ident {
732                                        value: "t3",
733                                        quote_style: None,
734                                    },
735                                ],
736                            ),
737                            columns: [
738                                ColumnDef {
739                                    name: Ident {
740                                        value: "v1",
741                                        quote_style: None,
742                                    },
743                                    data_type: Some(
744                                        Int,
745                                    ),
746                                    collation: None,
747                                    options: [
748                                        ColumnOptionDef {
749                                            name: None,
750                                            option: Unique {
751                                                is_primary: true,
752                                            },
753                                        },
754                                    ],
755                                },
756                                ColumnDef {
757                                    name: Ident {
758                                        value: "v2",
759                                        quote_style: None,
760                                    },
761                                    data_type: Some(
762                                        SmallInt,
763                                    ),
764                                    collation: None,
765                                    options: [
766                                        ColumnOptionDef {
767                                            name: None,
768                                            option: Unique {
769                                                is_primary: true,
770                                            },
771                                        },
772                                    ],
773                                },
774                            ],
775                            wildcard_idx: None,
776                            constraints: [],
777                            with_options: [],
778                            format_encode: None,
779                            source_watermarks: [],
780                            append_only: false,
781                            on_conflict: None,
782                            with_version_column: None,
783                            query: None,
784                            cdc_table_info: None,
785                            include_column_options: [],
786                            webhook_info: None,
787                            engine: Hummock,
788                        },
789                        CreateTable {
790                            or_replace: false,
791                            temporary: false,
792                            if_not_exists: false,
793                            name: ObjectName(
794                                [
795                                    Ident {
796                                        value: "t4",
797                                        quote_style: None,
798                                    },
799                                ],
800                            ),
801                            columns: [
802                                ColumnDef {
803                                    name: Ident {
804                                        value: "v1",
805                                        quote_style: None,
806                                    },
807                                    data_type: Some(
808                                        Int,
809                                    ),
810                                    collation: None,
811                                    options: [
812                                        ColumnOptionDef {
813                                            name: None,
814                                            option: Unique {
815                                                is_primary: true,
816                                            },
817                                        },
818                                    ],
819                                },
820                                ColumnDef {
821                                    name: Ident {
822                                        value: "v2",
823                                        quote_style: None,
824                                    },
825                                    data_type: Some(
826                                        SmallInt,
827                                    ),
828                                    collation: None,
829                                    options: [
830                                        ColumnOptionDef {
831                                            name: None,
832                                            option: Unique {
833                                                is_primary: true,
834                                            },
835                                        },
836                                    ],
837                                },
838                                ColumnDef {
839                                    name: Ident {
840                                        value: "v3",
841                                        quote_style: None,
842                                    },
843                                    data_type: Some(
844                                        Boolean,
845                                    ),
846                                    collation: None,
847                                    options: [
848                                        ColumnOptionDef {
849                                            name: None,
850                                            option: Unique {
851                                                is_primary: true,
852                                            },
853                                        },
854                                    ],
855                                },
856                            ],
857                            wildcard_idx: None,
858                            constraints: [],
859                            with_options: [],
860                            format_encode: None,
861                            source_watermarks: [],
862                            append_only: false,
863                            on_conflict: None,
864                            with_version_column: None,
865                            query: None,
866                            cdc_table_info: None,
867                            include_column_options: [],
868                            webhook_info: None,
869                            engine: Hummock,
870                        },
871                    ],
872                )"#]],
873        );
874    }
875
876    #[test]
877    fn test_parse_create_table_statements_with_append_only_and_watermark() {
878        let test_string = r#"
879    CREATE TABLE t1 (
880        v1 INT,
881        ts TIMESTAMP,
882        WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
883    ) APPEND ONLY;
884
885    CREATE TABLE t2 (
886        v1 INT,
887        ts TIMESTAMP
888    ) APPEND ONLY;
889
890    CREATE TABLE t3 (
891        v1 INT,
892        ts TIMESTAMP
893    );
894    "#;
895
896        let (tables, _) = parse_create_table_statements(test_string);
897
898        // Check t1
899        let t1 = &tables[0];
900        assert!(t1.is_append_only);
901        assert_eq!(t1.source_watermarks.len(), 1);
902        assert_eq!(t1.source_watermarks[0].column.real_value(), "ts");
903
904        // Check t2
905        let t2 = &tables[1];
906        assert!(t2.is_append_only);
907        assert_eq!(t2.source_watermarks.len(), 0);
908
909        // Check t3
910        let t3 = &tables[2];
911        assert!(!t3.is_append_only);
912        assert_eq!(t3.source_watermarks.len(), 0);
913    }
914}