risingwave_sqlsmith/lib.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use config::Configuration;
28use itertools::Itertools;
29use rand::Rng;
30use rand::prelude::IndexedRandom;
31use risingwave_sqlparser::ast::{
32 BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
33 TableConstraint,
34};
35use risingwave_sqlparser::parser::Parser;
36
37use crate::sql_gen::SqlGenerator;
38
39pub mod config;
40pub mod reducer;
41mod sql_gen;
42pub mod sqlreduce;
43pub mod test_runners;
44mod utils;
45pub mod validation;
46pub use validation::is_permissible_error;
47
48pub use crate::sql_gen::{Table, print_function_table};
49
50/// Generate a random SQL string.
51pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>, config: &Configuration) -> String {
52 let mut r#gen = SqlGenerator::new(rng, tables, config.clone());
53 format!("{}", r#gen.gen_batch_query_stmt())
54}
55
56/// Generate `INSERT`
57pub fn insert_sql_gen(
58 rng: &mut impl Rng,
59 tables: Vec<Table>,
60 count: usize,
61 config: &Configuration,
62) -> Vec<String> {
63 let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
64 tables
65 .into_iter()
66 .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
67 .collect()
68}
69
70/// Generate a random CREATE MATERIALIZED VIEW sql string.
71/// These are derived from `tables`.
72pub fn mview_sql_gen<R: Rng>(
73 rng: &mut R,
74 tables: Vec<Table>,
75 name: &str,
76 config: &Configuration,
77) -> (String, Table) {
78 let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
79 let (mview, table) = r#gen.gen_mview_stmt(name);
80 (mview.to_string(), table)
81}
82
83pub fn differential_sql_gen<R: Rng>(
84 rng: &mut R,
85 tables: Vec<Table>,
86 name: &str,
87 config: &Configuration,
88) -> Result<(String, String, Table)> {
89 let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
90 let (stream, table) = r#gen.gen_mview_stmt(name);
91 let batch = match stream {
92 Statement::CreateView { ref query, .. } => query.to_string(),
93 _ => bail!("Differential pair should be mview statement!"),
94 };
95 Ok((batch, stream.to_string(), table))
96}
97
98/// TODO(noel): Eventually all session variables should be fuzzed.
99/// For now we start of with a few hardcoded configs.
100/// Some config need workarounds, for instance `QUERY_MODE`,
101/// which can lead to stack overflow
102/// (a simple workaround is limit length of
103/// generated query when `QUERY_MODE=local`.
104pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
105 [
106 "SET ENABLE_TWO_PHASE_AGG TO TRUE",
107 "SET ENABLE_TWO_PHASE_AGG TO FALSE",
108 "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
109 "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
110 ]
111 .choose(rng)
112 .unwrap()
113 .to_string()
114}
115
116pub fn generate_update_statements<R: Rng>(
117 rng: &mut R,
118 tables: &[Table],
119 inserts: &[Statement],
120 config: &Configuration,
121) -> Result<Vec<Statement>> {
122 let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
123 r#gen.generate_update_statements(tables, inserts)
124}
125
126/// Parse SQL
127/// FIXME(Noel): Introduce error type for sqlsmith for this.
128pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
129 let sql = sql.as_ref();
130 Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
131}
132
133/// Extract relevant info from CREATE TABLE statement, to construct a Table
134pub fn create_table_statement_to_table(statement: &Statement) -> Table {
135 match statement {
136 Statement::CreateTable {
137 name,
138 columns,
139 constraints,
140 append_only,
141 source_watermarks,
142 ..
143 } => {
144 let column_name_to_index_mapping: HashMap<_, _> = columns
145 .iter()
146 .enumerate()
147 .map(|(i, c)| (&c.name, i))
148 .collect();
149 let mut pk_indices = HashSet::new();
150 for (i, column) in columns.iter().enumerate() {
151 let is_primary_key = column
152 .options
153 .iter()
154 .any(|option| option.option == ColumnOption::Unique { is_primary: true });
155 if is_primary_key {
156 pk_indices.insert(i);
157 }
158 }
159 for constraint in constraints {
160 if let TableConstraint::Unique {
161 columns,
162 is_primary: true,
163 ..
164 } = constraint
165 {
166 for column in columns {
167 let pk_index = column_name_to_index_mapping.get(column).unwrap();
168 pk_indices.insert(*pk_index);
169 }
170 }
171 }
172 let mut pk_indices = pk_indices.into_iter().collect_vec();
173 pk_indices.sort_unstable();
174 Table::new_for_base_table(
175 name.0[0].real_value(),
176 columns.iter().map(|c| c.clone().into()).collect(),
177 pk_indices,
178 *append_only,
179 source_watermarks.to_vec(),
180 )
181 }
182 _ => panic!(
183 "Only CREATE TABLE statements permitted, received: {}",
184 statement
185 ),
186 }
187}
188
189pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
190 let statements = parse_sql(&sql);
191 let tables = statements
192 .iter()
193 .map(create_table_statement_to_table)
194 .collect();
195 (tables, statements)
196}
197
198#[cfg(test)]
199mod tests {
200 use std::fmt::Debug;
201
202 use expect_test::{Expect, expect};
203
204 use super::*;
205
206 fn check(actual: impl Debug, expect: Expect) {
207 let actual = format!("{:#?}", actual);
208 expect.assert_eq(&actual);
209 }
210
211 #[test]
212 fn test_parse_create_table_statements_no_pk() {
213 let test_string = "
214CREATE TABLE t(v1 int);
215CREATE TABLE t2(v1 int, v2 bool);
216CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
217 ";
218 check(
219 parse_create_table_statements(test_string),
220 expect![[r#"
221 (
222 [
223 Table {
224 name: "t",
225 columns: [
226 Column {
227 name: ObjectName(
228 [
229 Ident {
230 value: "v1",
231 quote_style: None,
232 },
233 ],
234 ),
235 data_type: Int32,
236 },
237 ],
238 pk_indices: [],
239 is_base_table: true,
240 is_append_only: false,
241 source_watermarks: [],
242 },
243 Table {
244 name: "t2",
245 columns: [
246 Column {
247 name: ObjectName(
248 [
249 Ident {
250 value: "v1",
251 quote_style: None,
252 },
253 ],
254 ),
255 data_type: Int32,
256 },
257 Column {
258 name: ObjectName(
259 [
260 Ident {
261 value: "v2",
262 quote_style: None,
263 },
264 ],
265 ),
266 data_type: Boolean,
267 },
268 ],
269 pk_indices: [],
270 is_base_table: true,
271 is_append_only: false,
272 source_watermarks: [],
273 },
274 Table {
275 name: "t3",
276 columns: [
277 Column {
278 name: ObjectName(
279 [
280 Ident {
281 value: "v1",
282 quote_style: None,
283 },
284 ],
285 ),
286 data_type: Int32,
287 },
288 Column {
289 name: ObjectName(
290 [
291 Ident {
292 value: "v2",
293 quote_style: None,
294 },
295 ],
296 ),
297 data_type: Boolean,
298 },
299 Column {
300 name: ObjectName(
301 [
302 Ident {
303 value: "v3",
304 quote_style: None,
305 },
306 ],
307 ),
308 data_type: Int16,
309 },
310 ],
311 pk_indices: [],
312 is_base_table: true,
313 is_append_only: false,
314 source_watermarks: [],
315 },
316 ],
317 [
318 CreateTable {
319 or_replace: false,
320 temporary: false,
321 if_not_exists: false,
322 name: ObjectName(
323 [
324 Ident {
325 value: "t",
326 quote_style: None,
327 },
328 ],
329 ),
330 columns: [
331 ColumnDef {
332 name: Ident {
333 value: "v1",
334 quote_style: None,
335 },
336 data_type: Some(
337 Int,
338 ),
339 collation: None,
340 options: [],
341 },
342 ],
343 wildcard_idx: None,
344 constraints: [],
345 with_options: [],
346 format_encode: None,
347 source_watermarks: [],
348 append_only: false,
349 on_conflict: None,
350 with_version_columns: [],
351 query: None,
352 cdc_table_info: None,
353 include_column_options: [],
354 webhook_info: None,
355 engine: Hummock,
356 },
357 CreateTable {
358 or_replace: false,
359 temporary: false,
360 if_not_exists: false,
361 name: ObjectName(
362 [
363 Ident {
364 value: "t2",
365 quote_style: None,
366 },
367 ],
368 ),
369 columns: [
370 ColumnDef {
371 name: Ident {
372 value: "v1",
373 quote_style: None,
374 },
375 data_type: Some(
376 Int,
377 ),
378 collation: None,
379 options: [],
380 },
381 ColumnDef {
382 name: Ident {
383 value: "v2",
384 quote_style: None,
385 },
386 data_type: Some(
387 Boolean,
388 ),
389 collation: None,
390 options: [],
391 },
392 ],
393 wildcard_idx: None,
394 constraints: [],
395 with_options: [],
396 format_encode: None,
397 source_watermarks: [],
398 append_only: false,
399 on_conflict: None,
400 with_version_columns: [],
401 query: None,
402 cdc_table_info: None,
403 include_column_options: [],
404 webhook_info: None,
405 engine: Hummock,
406 },
407 CreateTable {
408 or_replace: false,
409 temporary: false,
410 if_not_exists: false,
411 name: ObjectName(
412 [
413 Ident {
414 value: "t3",
415 quote_style: None,
416 },
417 ],
418 ),
419 columns: [
420 ColumnDef {
421 name: Ident {
422 value: "v1",
423 quote_style: None,
424 },
425 data_type: Some(
426 Int,
427 ),
428 collation: None,
429 options: [],
430 },
431 ColumnDef {
432 name: Ident {
433 value: "v2",
434 quote_style: None,
435 },
436 data_type: Some(
437 Boolean,
438 ),
439 collation: None,
440 options: [],
441 },
442 ColumnDef {
443 name: Ident {
444 value: "v3",
445 quote_style: None,
446 },
447 data_type: Some(
448 SmallInt,
449 ),
450 collation: None,
451 options: [],
452 },
453 ],
454 wildcard_idx: None,
455 constraints: [],
456 with_options: [],
457 format_encode: None,
458 source_watermarks: [],
459 append_only: false,
460 on_conflict: None,
461 with_version_columns: [],
462 query: None,
463 cdc_table_info: None,
464 include_column_options: [],
465 webhook_info: None,
466 engine: Hummock,
467 },
468 ],
469 )"#]],
470 );
471 }
472
473 #[test]
474 fn test_parse_create_table_statements_with_pk() {
475 let test_string = "
476CREATE TABLE t(v1 int PRIMARY KEY);
477CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
478CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
479CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
480";
481 check(
482 parse_create_table_statements(test_string),
483 expect![[r#"
484 (
485 [
486 Table {
487 name: "t",
488 columns: [
489 Column {
490 name: ObjectName(
491 [
492 Ident {
493 value: "v1",
494 quote_style: None,
495 },
496 ],
497 ),
498 data_type: Int32,
499 },
500 ],
501 pk_indices: [
502 0,
503 ],
504 is_base_table: true,
505 is_append_only: false,
506 source_watermarks: [],
507 },
508 Table {
509 name: "t2",
510 columns: [
511 Column {
512 name: ObjectName(
513 [
514 Ident {
515 value: "v1",
516 quote_style: None,
517 },
518 ],
519 ),
520 data_type: Int32,
521 },
522 Column {
523 name: ObjectName(
524 [
525 Ident {
526 value: "v2",
527 quote_style: None,
528 },
529 ],
530 ),
531 data_type: Int16,
532 },
533 ],
534 pk_indices: [
535 1,
536 ],
537 is_base_table: true,
538 is_append_only: false,
539 source_watermarks: [],
540 },
541 Table {
542 name: "t3",
543 columns: [
544 Column {
545 name: ObjectName(
546 [
547 Ident {
548 value: "v1",
549 quote_style: None,
550 },
551 ],
552 ),
553 data_type: Int32,
554 },
555 Column {
556 name: ObjectName(
557 [
558 Ident {
559 value: "v2",
560 quote_style: None,
561 },
562 ],
563 ),
564 data_type: Int16,
565 },
566 ],
567 pk_indices: [
568 0,
569 1,
570 ],
571 is_base_table: true,
572 is_append_only: false,
573 source_watermarks: [],
574 },
575 Table {
576 name: "t4",
577 columns: [
578 Column {
579 name: ObjectName(
580 [
581 Ident {
582 value: "v1",
583 quote_style: None,
584 },
585 ],
586 ),
587 data_type: Int32,
588 },
589 Column {
590 name: ObjectName(
591 [
592 Ident {
593 value: "v2",
594 quote_style: None,
595 },
596 ],
597 ),
598 data_type: Int16,
599 },
600 Column {
601 name: ObjectName(
602 [
603 Ident {
604 value: "v3",
605 quote_style: None,
606 },
607 ],
608 ),
609 data_type: Boolean,
610 },
611 ],
612 pk_indices: [
613 0,
614 1,
615 2,
616 ],
617 is_base_table: true,
618 is_append_only: false,
619 source_watermarks: [],
620 },
621 ],
622 [
623 CreateTable {
624 or_replace: false,
625 temporary: false,
626 if_not_exists: false,
627 name: ObjectName(
628 [
629 Ident {
630 value: "t",
631 quote_style: None,
632 },
633 ],
634 ),
635 columns: [
636 ColumnDef {
637 name: Ident {
638 value: "v1",
639 quote_style: None,
640 },
641 data_type: Some(
642 Int,
643 ),
644 collation: None,
645 options: [
646 ColumnOptionDef {
647 name: None,
648 option: Unique {
649 is_primary: true,
650 },
651 },
652 ],
653 },
654 ],
655 wildcard_idx: None,
656 constraints: [],
657 with_options: [],
658 format_encode: None,
659 source_watermarks: [],
660 append_only: false,
661 on_conflict: None,
662 with_version_columns: [],
663 query: None,
664 cdc_table_info: None,
665 include_column_options: [],
666 webhook_info: None,
667 engine: Hummock,
668 },
669 CreateTable {
670 or_replace: false,
671 temporary: false,
672 if_not_exists: false,
673 name: ObjectName(
674 [
675 Ident {
676 value: "t2",
677 quote_style: None,
678 },
679 ],
680 ),
681 columns: [
682 ColumnDef {
683 name: Ident {
684 value: "v1",
685 quote_style: None,
686 },
687 data_type: Some(
688 Int,
689 ),
690 collation: None,
691 options: [],
692 },
693 ColumnDef {
694 name: Ident {
695 value: "v2",
696 quote_style: None,
697 },
698 data_type: Some(
699 SmallInt,
700 ),
701 collation: None,
702 options: [
703 ColumnOptionDef {
704 name: None,
705 option: Unique {
706 is_primary: true,
707 },
708 },
709 ],
710 },
711 ],
712 wildcard_idx: None,
713 constraints: [],
714 with_options: [],
715 format_encode: None,
716 source_watermarks: [],
717 append_only: false,
718 on_conflict: None,
719 with_version_columns: [],
720 query: None,
721 cdc_table_info: None,
722 include_column_options: [],
723 webhook_info: None,
724 engine: Hummock,
725 },
726 CreateTable {
727 or_replace: false,
728 temporary: false,
729 if_not_exists: false,
730 name: ObjectName(
731 [
732 Ident {
733 value: "t3",
734 quote_style: None,
735 },
736 ],
737 ),
738 columns: [
739 ColumnDef {
740 name: Ident {
741 value: "v1",
742 quote_style: None,
743 },
744 data_type: Some(
745 Int,
746 ),
747 collation: None,
748 options: [
749 ColumnOptionDef {
750 name: None,
751 option: Unique {
752 is_primary: true,
753 },
754 },
755 ],
756 },
757 ColumnDef {
758 name: Ident {
759 value: "v2",
760 quote_style: None,
761 },
762 data_type: Some(
763 SmallInt,
764 ),
765 collation: None,
766 options: [
767 ColumnOptionDef {
768 name: None,
769 option: Unique {
770 is_primary: true,
771 },
772 },
773 ],
774 },
775 ],
776 wildcard_idx: None,
777 constraints: [],
778 with_options: [],
779 format_encode: None,
780 source_watermarks: [],
781 append_only: false,
782 on_conflict: None,
783 with_version_columns: [],
784 query: None,
785 cdc_table_info: None,
786 include_column_options: [],
787 webhook_info: None,
788 engine: Hummock,
789 },
790 CreateTable {
791 or_replace: false,
792 temporary: false,
793 if_not_exists: false,
794 name: ObjectName(
795 [
796 Ident {
797 value: "t4",
798 quote_style: None,
799 },
800 ],
801 ),
802 columns: [
803 ColumnDef {
804 name: Ident {
805 value: "v1",
806 quote_style: None,
807 },
808 data_type: Some(
809 Int,
810 ),
811 collation: None,
812 options: [
813 ColumnOptionDef {
814 name: None,
815 option: Unique {
816 is_primary: true,
817 },
818 },
819 ],
820 },
821 ColumnDef {
822 name: Ident {
823 value: "v2",
824 quote_style: None,
825 },
826 data_type: Some(
827 SmallInt,
828 ),
829 collation: None,
830 options: [
831 ColumnOptionDef {
832 name: None,
833 option: Unique {
834 is_primary: true,
835 },
836 },
837 ],
838 },
839 ColumnDef {
840 name: Ident {
841 value: "v3",
842 quote_style: None,
843 },
844 data_type: Some(
845 Boolean,
846 ),
847 collation: None,
848 options: [
849 ColumnOptionDef {
850 name: None,
851 option: Unique {
852 is_primary: true,
853 },
854 },
855 ],
856 },
857 ],
858 wildcard_idx: None,
859 constraints: [],
860 with_options: [],
861 format_encode: None,
862 source_watermarks: [],
863 append_only: false,
864 on_conflict: None,
865 with_version_columns: [],
866 query: None,
867 cdc_table_info: None,
868 include_column_options: [],
869 webhook_info: None,
870 engine: Hummock,
871 },
872 ],
873 )"#]],
874 );
875 }
876
877 #[test]
878 fn test_parse_create_table_statements_with_append_only_and_watermark() {
879 let test_string = r#"
880 CREATE TABLE t1 (
881 v1 INT,
882 ts TIMESTAMP,
883 WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
884 ) APPEND ONLY;
885
886 CREATE TABLE t2 (
887 v1 INT,
888 ts TIMESTAMP
889 ) APPEND ONLY;
890
891 CREATE TABLE t3 (
892 v1 INT,
893 ts TIMESTAMP
894 );
895 "#;
896
897 let (tables, _) = parse_create_table_statements(test_string);
898
899 // Check t1
900 let t1 = &tables[0];
901 assert!(t1.is_append_only);
902 assert_eq!(t1.source_watermarks.len(), 1);
903 assert_eq!(t1.source_watermarks[0].column.real_value(), "ts");
904
905 // Check t2
906 let t2 = &tables[1];
907 assert!(t2.is_append_only);
908 assert_eq!(t2.source_watermarks.len(), 0);
909
910 // Check t3
911 let t3 = &tables[2];
912 assert!(!t3.is_append_only);
913 assert_eq!(t3.source_watermarks.len(), 0);
914 }
915}