risingwave_sqlsmith/lib.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use config::Configuration;
28use itertools::Itertools;
29use rand::Rng;
30use rand::prelude::IndexedRandom;
31use risingwave_sqlparser::ast::{
32 BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
33 TableConstraint,
34};
35use risingwave_sqlparser::parser::Parser;
36
37use crate::sql_gen::SqlGenerator;
38
39pub mod config;
40pub mod reducer;
41mod sql_gen;
42pub mod test_runners;
43mod utils;
44pub mod validation;
45pub use validation::is_permissible_error;
46
47pub use crate::sql_gen::{Table, print_function_table};
48
49/// Generate a random SQL string.
50pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>, config: &Configuration) -> String {
51 let mut r#gen = SqlGenerator::new(rng, tables, config.clone());
52 format!("{}", r#gen.gen_batch_query_stmt())
53}
54
55/// Generate `INSERT`
56pub fn insert_sql_gen(
57 rng: &mut impl Rng,
58 tables: Vec<Table>,
59 count: usize,
60 config: &Configuration,
61) -> Vec<String> {
62 let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
63 tables
64 .into_iter()
65 .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
66 .collect()
67}
68
69/// Generate a random CREATE MATERIALIZED VIEW sql string.
70/// These are derived from `tables`.
71pub fn mview_sql_gen<R: Rng>(
72 rng: &mut R,
73 tables: Vec<Table>,
74 name: &str,
75 config: &Configuration,
76) -> (String, Table) {
77 let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
78 let (mview, table) = r#gen.gen_mview_stmt(name);
79 (mview.to_string(), table)
80}
81
82pub fn differential_sql_gen<R: Rng>(
83 rng: &mut R,
84 tables: Vec<Table>,
85 name: &str,
86 config: &Configuration,
87) -> Result<(String, String, Table)> {
88 let mut r#gen = SqlGenerator::new_for_mview(rng, tables.clone(), config.clone());
89 let (stream, table) = r#gen.gen_mview_stmt(name);
90 let batch = match stream {
91 Statement::CreateView { ref query, .. } => query.to_string(),
92 _ => bail!("Differential pair should be mview statement!"),
93 };
94 Ok((batch, stream.to_string(), table))
95}
96
97/// TODO(noel): Eventually all session variables should be fuzzed.
98/// For now we start of with a few hardcoded configs.
99/// Some config need workarounds, for instance `QUERY_MODE`,
100/// which can lead to stack overflow
101/// (a simple workaround is limit length of
102/// generated query when `QUERY_MODE=local`.
103pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
104 [
105 "SET ENABLE_TWO_PHASE_AGG TO TRUE",
106 "SET ENABLE_TWO_PHASE_AGG TO FALSE",
107 "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
108 "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
109 ]
110 .choose(rng)
111 .unwrap()
112 .to_string()
113}
114
115pub fn generate_update_statements<R: Rng>(
116 rng: &mut R,
117 tables: &[Table],
118 inserts: &[Statement],
119 config: &Configuration,
120) -> Result<Vec<Statement>> {
121 let mut r#gen = SqlGenerator::new(rng, vec![], config.clone());
122 r#gen.generate_update_statements(tables, inserts)
123}
124
125/// Parse SQL
126/// FIXME(Noel): Introduce error type for sqlsmith for this.
127pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
128 let sql = sql.as_ref();
129 Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
130}
131
132/// Extract relevant info from CREATE TABLE statement, to construct a Table
133pub fn create_table_statement_to_table(statement: &Statement) -> Table {
134 match statement {
135 Statement::CreateTable {
136 name,
137 columns,
138 constraints,
139 append_only,
140 source_watermarks,
141 ..
142 } => {
143 let column_name_to_index_mapping: HashMap<_, _> = columns
144 .iter()
145 .enumerate()
146 .map(|(i, c)| (&c.name, i))
147 .collect();
148 let mut pk_indices = HashSet::new();
149 for (i, column) in columns.iter().enumerate() {
150 let is_primary_key = column
151 .options
152 .iter()
153 .any(|option| option.option == ColumnOption::Unique { is_primary: true });
154 if is_primary_key {
155 pk_indices.insert(i);
156 }
157 }
158 for constraint in constraints {
159 if let TableConstraint::Unique {
160 columns,
161 is_primary: true,
162 ..
163 } = constraint
164 {
165 for column in columns {
166 let pk_index = column_name_to_index_mapping.get(column).unwrap();
167 pk_indices.insert(*pk_index);
168 }
169 }
170 }
171 let mut pk_indices = pk_indices.into_iter().collect_vec();
172 pk_indices.sort_unstable();
173 Table::new_for_base_table(
174 name.0[0].real_value(),
175 columns.iter().map(|c| c.clone().into()).collect(),
176 pk_indices,
177 *append_only,
178 source_watermarks.to_vec(),
179 )
180 }
181 _ => panic!(
182 "Only CREATE TABLE statements permitted, received: {}",
183 statement
184 ),
185 }
186}
187
188pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
189 let statements = parse_sql(&sql);
190 let tables = statements
191 .iter()
192 .map(create_table_statement_to_table)
193 .collect();
194 (tables, statements)
195}
196
197#[cfg(test)]
198mod tests {
199 use std::fmt::Debug;
200
201 use expect_test::{Expect, expect};
202
203 use super::*;
204
205 fn check(actual: impl Debug, expect: Expect) {
206 let actual = format!("{:#?}", actual);
207 expect.assert_eq(&actual);
208 }
209
210 #[test]
211 fn test_parse_create_table_statements_no_pk() {
212 let test_string = "
213CREATE TABLE t(v1 int);
214CREATE TABLE t2(v1 int, v2 bool);
215CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
216 ";
217 check(
218 parse_create_table_statements(test_string),
219 expect![[r#"
220 (
221 [
222 Table {
223 name: "t",
224 columns: [
225 Column {
226 name: ObjectName(
227 [
228 Ident {
229 value: "v1",
230 quote_style: None,
231 },
232 ],
233 ),
234 data_type: Int32,
235 },
236 ],
237 pk_indices: [],
238 is_base_table: true,
239 is_append_only: false,
240 source_watermarks: [],
241 },
242 Table {
243 name: "t2",
244 columns: [
245 Column {
246 name: ObjectName(
247 [
248 Ident {
249 value: "v1",
250 quote_style: None,
251 },
252 ],
253 ),
254 data_type: Int32,
255 },
256 Column {
257 name: ObjectName(
258 [
259 Ident {
260 value: "v2",
261 quote_style: None,
262 },
263 ],
264 ),
265 data_type: Boolean,
266 },
267 ],
268 pk_indices: [],
269 is_base_table: true,
270 is_append_only: false,
271 source_watermarks: [],
272 },
273 Table {
274 name: "t3",
275 columns: [
276 Column {
277 name: ObjectName(
278 [
279 Ident {
280 value: "v1",
281 quote_style: None,
282 },
283 ],
284 ),
285 data_type: Int32,
286 },
287 Column {
288 name: ObjectName(
289 [
290 Ident {
291 value: "v2",
292 quote_style: None,
293 },
294 ],
295 ),
296 data_type: Boolean,
297 },
298 Column {
299 name: ObjectName(
300 [
301 Ident {
302 value: "v3",
303 quote_style: None,
304 },
305 ],
306 ),
307 data_type: Int16,
308 },
309 ],
310 pk_indices: [],
311 is_base_table: true,
312 is_append_only: false,
313 source_watermarks: [],
314 },
315 ],
316 [
317 CreateTable {
318 or_replace: false,
319 temporary: false,
320 if_not_exists: false,
321 name: ObjectName(
322 [
323 Ident {
324 value: "t",
325 quote_style: None,
326 },
327 ],
328 ),
329 columns: [
330 ColumnDef {
331 name: Ident {
332 value: "v1",
333 quote_style: None,
334 },
335 data_type: Some(
336 Int,
337 ),
338 collation: None,
339 options: [],
340 },
341 ],
342 wildcard_idx: None,
343 constraints: [],
344 with_options: [],
345 format_encode: None,
346 source_watermarks: [],
347 append_only: false,
348 on_conflict: None,
349 with_version_column: None,
350 query: None,
351 cdc_table_info: None,
352 include_column_options: [],
353 webhook_info: None,
354 engine: Hummock,
355 },
356 CreateTable {
357 or_replace: false,
358 temporary: false,
359 if_not_exists: false,
360 name: ObjectName(
361 [
362 Ident {
363 value: "t2",
364 quote_style: None,
365 },
366 ],
367 ),
368 columns: [
369 ColumnDef {
370 name: Ident {
371 value: "v1",
372 quote_style: None,
373 },
374 data_type: Some(
375 Int,
376 ),
377 collation: None,
378 options: [],
379 },
380 ColumnDef {
381 name: Ident {
382 value: "v2",
383 quote_style: None,
384 },
385 data_type: Some(
386 Boolean,
387 ),
388 collation: None,
389 options: [],
390 },
391 ],
392 wildcard_idx: None,
393 constraints: [],
394 with_options: [],
395 format_encode: None,
396 source_watermarks: [],
397 append_only: false,
398 on_conflict: None,
399 with_version_column: None,
400 query: None,
401 cdc_table_info: None,
402 include_column_options: [],
403 webhook_info: None,
404 engine: Hummock,
405 },
406 CreateTable {
407 or_replace: false,
408 temporary: false,
409 if_not_exists: false,
410 name: ObjectName(
411 [
412 Ident {
413 value: "t3",
414 quote_style: None,
415 },
416 ],
417 ),
418 columns: [
419 ColumnDef {
420 name: Ident {
421 value: "v1",
422 quote_style: None,
423 },
424 data_type: Some(
425 Int,
426 ),
427 collation: None,
428 options: [],
429 },
430 ColumnDef {
431 name: Ident {
432 value: "v2",
433 quote_style: None,
434 },
435 data_type: Some(
436 Boolean,
437 ),
438 collation: None,
439 options: [],
440 },
441 ColumnDef {
442 name: Ident {
443 value: "v3",
444 quote_style: None,
445 },
446 data_type: Some(
447 SmallInt,
448 ),
449 collation: None,
450 options: [],
451 },
452 ],
453 wildcard_idx: None,
454 constraints: [],
455 with_options: [],
456 format_encode: None,
457 source_watermarks: [],
458 append_only: false,
459 on_conflict: None,
460 with_version_column: None,
461 query: None,
462 cdc_table_info: None,
463 include_column_options: [],
464 webhook_info: None,
465 engine: Hummock,
466 },
467 ],
468 )"#]],
469 );
470 }
471
472 #[test]
473 fn test_parse_create_table_statements_with_pk() {
474 let test_string = "
475CREATE TABLE t(v1 int PRIMARY KEY);
476CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
477CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
478CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
479";
480 check(
481 parse_create_table_statements(test_string),
482 expect![[r#"
483 (
484 [
485 Table {
486 name: "t",
487 columns: [
488 Column {
489 name: ObjectName(
490 [
491 Ident {
492 value: "v1",
493 quote_style: None,
494 },
495 ],
496 ),
497 data_type: Int32,
498 },
499 ],
500 pk_indices: [
501 0,
502 ],
503 is_base_table: true,
504 is_append_only: false,
505 source_watermarks: [],
506 },
507 Table {
508 name: "t2",
509 columns: [
510 Column {
511 name: ObjectName(
512 [
513 Ident {
514 value: "v1",
515 quote_style: None,
516 },
517 ],
518 ),
519 data_type: Int32,
520 },
521 Column {
522 name: ObjectName(
523 [
524 Ident {
525 value: "v2",
526 quote_style: None,
527 },
528 ],
529 ),
530 data_type: Int16,
531 },
532 ],
533 pk_indices: [
534 1,
535 ],
536 is_base_table: true,
537 is_append_only: false,
538 source_watermarks: [],
539 },
540 Table {
541 name: "t3",
542 columns: [
543 Column {
544 name: ObjectName(
545 [
546 Ident {
547 value: "v1",
548 quote_style: None,
549 },
550 ],
551 ),
552 data_type: Int32,
553 },
554 Column {
555 name: ObjectName(
556 [
557 Ident {
558 value: "v2",
559 quote_style: None,
560 },
561 ],
562 ),
563 data_type: Int16,
564 },
565 ],
566 pk_indices: [
567 0,
568 1,
569 ],
570 is_base_table: true,
571 is_append_only: false,
572 source_watermarks: [],
573 },
574 Table {
575 name: "t4",
576 columns: [
577 Column {
578 name: ObjectName(
579 [
580 Ident {
581 value: "v1",
582 quote_style: None,
583 },
584 ],
585 ),
586 data_type: Int32,
587 },
588 Column {
589 name: ObjectName(
590 [
591 Ident {
592 value: "v2",
593 quote_style: None,
594 },
595 ],
596 ),
597 data_type: Int16,
598 },
599 Column {
600 name: ObjectName(
601 [
602 Ident {
603 value: "v3",
604 quote_style: None,
605 },
606 ],
607 ),
608 data_type: Boolean,
609 },
610 ],
611 pk_indices: [
612 0,
613 1,
614 2,
615 ],
616 is_base_table: true,
617 is_append_only: false,
618 source_watermarks: [],
619 },
620 ],
621 [
622 CreateTable {
623 or_replace: false,
624 temporary: false,
625 if_not_exists: false,
626 name: ObjectName(
627 [
628 Ident {
629 value: "t",
630 quote_style: None,
631 },
632 ],
633 ),
634 columns: [
635 ColumnDef {
636 name: Ident {
637 value: "v1",
638 quote_style: None,
639 },
640 data_type: Some(
641 Int,
642 ),
643 collation: None,
644 options: [
645 ColumnOptionDef {
646 name: None,
647 option: Unique {
648 is_primary: true,
649 },
650 },
651 ],
652 },
653 ],
654 wildcard_idx: None,
655 constraints: [],
656 with_options: [],
657 format_encode: None,
658 source_watermarks: [],
659 append_only: false,
660 on_conflict: None,
661 with_version_column: None,
662 query: None,
663 cdc_table_info: None,
664 include_column_options: [],
665 webhook_info: None,
666 engine: Hummock,
667 },
668 CreateTable {
669 or_replace: false,
670 temporary: false,
671 if_not_exists: false,
672 name: ObjectName(
673 [
674 Ident {
675 value: "t2",
676 quote_style: None,
677 },
678 ],
679 ),
680 columns: [
681 ColumnDef {
682 name: Ident {
683 value: "v1",
684 quote_style: None,
685 },
686 data_type: Some(
687 Int,
688 ),
689 collation: None,
690 options: [],
691 },
692 ColumnDef {
693 name: Ident {
694 value: "v2",
695 quote_style: None,
696 },
697 data_type: Some(
698 SmallInt,
699 ),
700 collation: None,
701 options: [
702 ColumnOptionDef {
703 name: None,
704 option: Unique {
705 is_primary: true,
706 },
707 },
708 ],
709 },
710 ],
711 wildcard_idx: None,
712 constraints: [],
713 with_options: [],
714 format_encode: None,
715 source_watermarks: [],
716 append_only: false,
717 on_conflict: None,
718 with_version_column: None,
719 query: None,
720 cdc_table_info: None,
721 include_column_options: [],
722 webhook_info: None,
723 engine: Hummock,
724 },
725 CreateTable {
726 or_replace: false,
727 temporary: false,
728 if_not_exists: false,
729 name: ObjectName(
730 [
731 Ident {
732 value: "t3",
733 quote_style: None,
734 },
735 ],
736 ),
737 columns: [
738 ColumnDef {
739 name: Ident {
740 value: "v1",
741 quote_style: None,
742 },
743 data_type: Some(
744 Int,
745 ),
746 collation: None,
747 options: [
748 ColumnOptionDef {
749 name: None,
750 option: Unique {
751 is_primary: true,
752 },
753 },
754 ],
755 },
756 ColumnDef {
757 name: Ident {
758 value: "v2",
759 quote_style: None,
760 },
761 data_type: Some(
762 SmallInt,
763 ),
764 collation: None,
765 options: [
766 ColumnOptionDef {
767 name: None,
768 option: Unique {
769 is_primary: true,
770 },
771 },
772 ],
773 },
774 ],
775 wildcard_idx: None,
776 constraints: [],
777 with_options: [],
778 format_encode: None,
779 source_watermarks: [],
780 append_only: false,
781 on_conflict: None,
782 with_version_column: None,
783 query: None,
784 cdc_table_info: None,
785 include_column_options: [],
786 webhook_info: None,
787 engine: Hummock,
788 },
789 CreateTable {
790 or_replace: false,
791 temporary: false,
792 if_not_exists: false,
793 name: ObjectName(
794 [
795 Ident {
796 value: "t4",
797 quote_style: None,
798 },
799 ],
800 ),
801 columns: [
802 ColumnDef {
803 name: Ident {
804 value: "v1",
805 quote_style: None,
806 },
807 data_type: Some(
808 Int,
809 ),
810 collation: None,
811 options: [
812 ColumnOptionDef {
813 name: None,
814 option: Unique {
815 is_primary: true,
816 },
817 },
818 ],
819 },
820 ColumnDef {
821 name: Ident {
822 value: "v2",
823 quote_style: None,
824 },
825 data_type: Some(
826 SmallInt,
827 ),
828 collation: None,
829 options: [
830 ColumnOptionDef {
831 name: None,
832 option: Unique {
833 is_primary: true,
834 },
835 },
836 ],
837 },
838 ColumnDef {
839 name: Ident {
840 value: "v3",
841 quote_style: None,
842 },
843 data_type: Some(
844 Boolean,
845 ),
846 collation: None,
847 options: [
848 ColumnOptionDef {
849 name: None,
850 option: Unique {
851 is_primary: true,
852 },
853 },
854 ],
855 },
856 ],
857 wildcard_idx: None,
858 constraints: [],
859 with_options: [],
860 format_encode: None,
861 source_watermarks: [],
862 append_only: false,
863 on_conflict: None,
864 with_version_column: None,
865 query: None,
866 cdc_table_info: None,
867 include_column_options: [],
868 webhook_info: None,
869 engine: Hummock,
870 },
871 ],
872 )"#]],
873 );
874 }
875
876 #[test]
877 fn test_parse_create_table_statements_with_append_only_and_watermark() {
878 let test_string = r#"
879 CREATE TABLE t1 (
880 v1 INT,
881 ts TIMESTAMP,
882 WATERMARK FOR ts AS ts - INTERVAL '5' SECOND
883 ) APPEND ONLY;
884
885 CREATE TABLE t2 (
886 v1 INT,
887 ts TIMESTAMP
888 ) APPEND ONLY;
889
890 CREATE TABLE t3 (
891 v1 INT,
892 ts TIMESTAMP
893 );
894 "#;
895
896 let (tables, _) = parse_create_table_statements(test_string);
897
898 // Check t1
899 let t1 = &tables[0];
900 assert!(t1.is_append_only);
901 assert_eq!(t1.source_watermarks.len(), 1);
902 assert_eq!(t1.source_watermarks[0].column.real_value(), "ts");
903
904 // Check t2
905 let t2 = &tables[1];
906 assert!(t2.is_append_only);
907 assert_eq!(t2.source_watermarks.len(), 0);
908
909 // Check t3
910 let t3 = &tables[2];
911 assert!(!t3.is_append_only);
912 assert_eq!(t3.source_watermarks.len(), 0);
913 }
914}