risingwave_sqlsmith/lib.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#![feature(let_chains)]
16#![feature(if_let_guard)]
17#![feature(box_patterns)]
18#![feature(register_tool)]
19#![register_tool(rw)]
20#![allow(rw::format_error)] // test code
21
22risingwave_expr_impl::enable!();
23
24use std::collections::{HashMap, HashSet};
25
26use anyhow::{Result, bail};
27use itertools::Itertools;
28use rand::Rng;
29use rand::prelude::IndexedRandom;
30use risingwave_sqlparser::ast::{
31 BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement,
32 TableConstraint,
33};
34use risingwave_sqlparser::parser::Parser;
35
36use crate::sql_gen::SqlGenerator;
37
38pub mod reducer;
39mod sql_gen;
40pub mod test_runners;
41mod utils;
42pub mod validation;
43pub use validation::is_permissible_error;
44
45pub use crate::sql_gen::{Table, print_function_table};
46
47/// Generate a random SQL string.
48pub fn sql_gen(rng: &mut impl Rng, tables: Vec<Table>) -> String {
49 let mut r#gen = SqlGenerator::new(rng, tables);
50 format!("{}", r#gen.gen_batch_query_stmt())
51}
52
53/// Generate `INSERT`
54pub fn insert_sql_gen(rng: &mut impl Rng, tables: Vec<Table>, count: usize) -> Vec<String> {
55 let mut r#gen = SqlGenerator::new(rng, vec![]);
56 tables
57 .into_iter()
58 .map(|table| format!("{}", r#gen.generate_insert_statement(&table, count)))
59 .collect()
60}
61
62/// Generate a random CREATE MATERIALIZED VIEW sql string.
63/// These are derived from `tables`.
64pub fn mview_sql_gen<R: Rng>(rng: &mut R, tables: Vec<Table>, name: &str) -> (String, Table) {
65 let mut r#gen = SqlGenerator::new_for_mview(rng, tables);
66 let (mview, table) = r#gen.gen_mview_stmt(name);
67 (mview.to_string(), table)
68}
69
70pub fn differential_sql_gen<R: Rng>(
71 rng: &mut R,
72 tables: Vec<Table>,
73 name: &str,
74) -> Result<(String, String, Table)> {
75 let mut r#gen = SqlGenerator::new_for_mview(rng, tables);
76 let (stream, table) = r#gen.gen_mview_stmt(name);
77 let batch = match stream {
78 Statement::CreateView { ref query, .. } => query.to_string(),
79 _ => bail!("Differential pair should be mview statement!"),
80 };
81 Ok((batch, stream.to_string(), table))
82}
83
84/// TODO(noel): Eventually all session variables should be fuzzed.
85/// For now we start of with a few hardcoded configs.
86/// Some config need workarounds, for instance `QUERY_MODE`,
87/// which can lead to stack overflow
88/// (a simple workaround is limit length of
89/// generated query when `QUERY_MODE=local`.
90pub fn session_sql_gen<R: Rng>(rng: &mut R) -> String {
91 [
92 "SET ENABLE_TWO_PHASE_AGG TO TRUE",
93 "SET ENABLE_TWO_PHASE_AGG TO FALSE",
94 "SET RW_FORCE_TWO_PHASE_AGG TO TRUE",
95 "SET RW_FORCE_TWO_PHASE_AGG TO FALSE",
96 ]
97 .choose(rng)
98 .unwrap()
99 .to_string()
100}
101
102pub fn generate_update_statements<R: Rng>(
103 rng: &mut R,
104 tables: &[Table],
105 inserts: &[Statement],
106) -> Result<Vec<Statement>> {
107 let mut r#gen = SqlGenerator::new(rng, vec![]);
108 r#gen.generate_update_statements(tables, inserts)
109}
110
111/// Parse SQL
112/// FIXME(Noel): Introduce error type for sqlsmith for this.
113pub fn parse_sql<S: AsRef<str>>(sql: S) -> Vec<Statement> {
114 let sql = sql.as_ref();
115 Parser::parse_sql(sql).unwrap_or_else(|_| panic!("Failed to parse SQL: {}", sql))
116}
117
118/// Extract relevant info from CREATE TABLE statement, to construct a Table
119pub fn create_table_statement_to_table(statement: &Statement) -> Table {
120 match statement {
121 Statement::CreateTable {
122 name,
123 columns,
124 constraints,
125 ..
126 } => {
127 let column_name_to_index_mapping: HashMap<_, _> = columns
128 .iter()
129 .enumerate()
130 .map(|(i, c)| (&c.name, i))
131 .collect();
132 let mut pk_indices = HashSet::new();
133 for (i, column) in columns.iter().enumerate() {
134 let is_primary_key = column
135 .options
136 .iter()
137 .any(|option| option.option == ColumnOption::Unique { is_primary: true });
138 if is_primary_key {
139 pk_indices.insert(i);
140 }
141 }
142 for constraint in constraints {
143 if let TableConstraint::Unique {
144 columns,
145 is_primary: true,
146 ..
147 } = constraint
148 {
149 for column in columns {
150 let pk_index = column_name_to_index_mapping.get(column).unwrap();
151 pk_indices.insert(*pk_index);
152 }
153 }
154 }
155 let mut pk_indices = pk_indices.into_iter().collect_vec();
156 pk_indices.sort_unstable();
157 Table::new_for_base_table(
158 name.0[0].real_value(),
159 columns.iter().map(|c| c.clone().into()).collect(),
160 pk_indices,
161 )
162 }
163 _ => panic!(
164 "Only CREATE TABLE statements permitted, received: {}",
165 statement
166 ),
167 }
168}
169
170pub fn parse_create_table_statements(sql: impl AsRef<str>) -> (Vec<Table>, Vec<Statement>) {
171 let statements = parse_sql(&sql);
172 let tables = statements
173 .iter()
174 .map(create_table_statement_to_table)
175 .collect();
176 (tables, statements)
177}
178
179#[cfg(test)]
180mod tests {
181 use std::fmt::Debug;
182
183 use expect_test::{Expect, expect};
184
185 use super::*;
186
187 fn check(actual: impl Debug, expect: Expect) {
188 let actual = format!("{:#?}", actual);
189 expect.assert_eq(&actual);
190 }
191
192 #[test]
193 fn test_parse_create_table_statements_no_pk() {
194 let test_string = "
195CREATE TABLE t(v1 int);
196CREATE TABLE t2(v1 int, v2 bool);
197CREATE TABLE t3(v1 int, v2 bool, v3 smallint);
198 ";
199 check(
200 parse_create_table_statements(test_string),
201 expect![[r#"
202 (
203 [
204 Table {
205 name: "t",
206 columns: [
207 Column {
208 name: "v1",
209 data_type: Int32,
210 },
211 ],
212 pk_indices: [],
213 is_base_table: true,
214 },
215 Table {
216 name: "t2",
217 columns: [
218 Column {
219 name: "v1",
220 data_type: Int32,
221 },
222 Column {
223 name: "v2",
224 data_type: Boolean,
225 },
226 ],
227 pk_indices: [],
228 is_base_table: true,
229 },
230 Table {
231 name: "t3",
232 columns: [
233 Column {
234 name: "v1",
235 data_type: Int32,
236 },
237 Column {
238 name: "v2",
239 data_type: Boolean,
240 },
241 Column {
242 name: "v3",
243 data_type: Int16,
244 },
245 ],
246 pk_indices: [],
247 is_base_table: true,
248 },
249 ],
250 [
251 CreateTable {
252 or_replace: false,
253 temporary: false,
254 if_not_exists: false,
255 name: ObjectName(
256 [
257 Ident {
258 value: "t",
259 quote_style: None,
260 },
261 ],
262 ),
263 columns: [
264 ColumnDef {
265 name: Ident {
266 value: "v1",
267 quote_style: None,
268 },
269 data_type: Some(
270 Int,
271 ),
272 collation: None,
273 options: [],
274 },
275 ],
276 wildcard_idx: None,
277 constraints: [],
278 with_options: [],
279 format_encode: None,
280 source_watermarks: [],
281 append_only: false,
282 on_conflict: None,
283 with_version_column: None,
284 query: None,
285 cdc_table_info: None,
286 include_column_options: [],
287 webhook_info: None,
288 engine: Hummock,
289 },
290 CreateTable {
291 or_replace: false,
292 temporary: false,
293 if_not_exists: false,
294 name: ObjectName(
295 [
296 Ident {
297 value: "t2",
298 quote_style: None,
299 },
300 ],
301 ),
302 columns: [
303 ColumnDef {
304 name: Ident {
305 value: "v1",
306 quote_style: None,
307 },
308 data_type: Some(
309 Int,
310 ),
311 collation: None,
312 options: [],
313 },
314 ColumnDef {
315 name: Ident {
316 value: "v2",
317 quote_style: None,
318 },
319 data_type: Some(
320 Boolean,
321 ),
322 collation: None,
323 options: [],
324 },
325 ],
326 wildcard_idx: None,
327 constraints: [],
328 with_options: [],
329 format_encode: None,
330 source_watermarks: [],
331 append_only: false,
332 on_conflict: None,
333 with_version_column: None,
334 query: None,
335 cdc_table_info: None,
336 include_column_options: [],
337 webhook_info: None,
338 engine: Hummock,
339 },
340 CreateTable {
341 or_replace: false,
342 temporary: false,
343 if_not_exists: false,
344 name: ObjectName(
345 [
346 Ident {
347 value: "t3",
348 quote_style: None,
349 },
350 ],
351 ),
352 columns: [
353 ColumnDef {
354 name: Ident {
355 value: "v1",
356 quote_style: None,
357 },
358 data_type: Some(
359 Int,
360 ),
361 collation: None,
362 options: [],
363 },
364 ColumnDef {
365 name: Ident {
366 value: "v2",
367 quote_style: None,
368 },
369 data_type: Some(
370 Boolean,
371 ),
372 collation: None,
373 options: [],
374 },
375 ColumnDef {
376 name: Ident {
377 value: "v3",
378 quote_style: None,
379 },
380 data_type: Some(
381 SmallInt,
382 ),
383 collation: None,
384 options: [],
385 },
386 ],
387 wildcard_idx: None,
388 constraints: [],
389 with_options: [],
390 format_encode: None,
391 source_watermarks: [],
392 append_only: false,
393 on_conflict: None,
394 with_version_column: None,
395 query: None,
396 cdc_table_info: None,
397 include_column_options: [],
398 webhook_info: None,
399 engine: Hummock,
400 },
401 ],
402 )"#]],
403 );
404 }
405
406 #[test]
407 fn test_parse_create_table_statements_with_pk() {
408 let test_string = "
409CREATE TABLE t(v1 int PRIMARY KEY);
410CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY);
411CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY);
412CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY);
413";
414 check(
415 parse_create_table_statements(test_string),
416 expect![[r#"
417 (
418 [
419 Table {
420 name: "t",
421 columns: [
422 Column {
423 name: "v1",
424 data_type: Int32,
425 },
426 ],
427 pk_indices: [
428 0,
429 ],
430 is_base_table: true,
431 },
432 Table {
433 name: "t2",
434 columns: [
435 Column {
436 name: "v1",
437 data_type: Int32,
438 },
439 Column {
440 name: "v2",
441 data_type: Int16,
442 },
443 ],
444 pk_indices: [
445 1,
446 ],
447 is_base_table: true,
448 },
449 Table {
450 name: "t3",
451 columns: [
452 Column {
453 name: "v1",
454 data_type: Int32,
455 },
456 Column {
457 name: "v2",
458 data_type: Int16,
459 },
460 ],
461 pk_indices: [
462 0,
463 1,
464 ],
465 is_base_table: true,
466 },
467 Table {
468 name: "t4",
469 columns: [
470 Column {
471 name: "v1",
472 data_type: Int32,
473 },
474 Column {
475 name: "v2",
476 data_type: Int16,
477 },
478 Column {
479 name: "v3",
480 data_type: Boolean,
481 },
482 ],
483 pk_indices: [
484 0,
485 1,
486 2,
487 ],
488 is_base_table: true,
489 },
490 ],
491 [
492 CreateTable {
493 or_replace: false,
494 temporary: false,
495 if_not_exists: false,
496 name: ObjectName(
497 [
498 Ident {
499 value: "t",
500 quote_style: None,
501 },
502 ],
503 ),
504 columns: [
505 ColumnDef {
506 name: Ident {
507 value: "v1",
508 quote_style: None,
509 },
510 data_type: Some(
511 Int,
512 ),
513 collation: None,
514 options: [
515 ColumnOptionDef {
516 name: None,
517 option: Unique {
518 is_primary: true,
519 },
520 },
521 ],
522 },
523 ],
524 wildcard_idx: None,
525 constraints: [],
526 with_options: [],
527 format_encode: None,
528 source_watermarks: [],
529 append_only: false,
530 on_conflict: None,
531 with_version_column: None,
532 query: None,
533 cdc_table_info: None,
534 include_column_options: [],
535 webhook_info: None,
536 engine: Hummock,
537 },
538 CreateTable {
539 or_replace: false,
540 temporary: false,
541 if_not_exists: false,
542 name: ObjectName(
543 [
544 Ident {
545 value: "t2",
546 quote_style: None,
547 },
548 ],
549 ),
550 columns: [
551 ColumnDef {
552 name: Ident {
553 value: "v1",
554 quote_style: None,
555 },
556 data_type: Some(
557 Int,
558 ),
559 collation: None,
560 options: [],
561 },
562 ColumnDef {
563 name: Ident {
564 value: "v2",
565 quote_style: None,
566 },
567 data_type: Some(
568 SmallInt,
569 ),
570 collation: None,
571 options: [
572 ColumnOptionDef {
573 name: None,
574 option: Unique {
575 is_primary: true,
576 },
577 },
578 ],
579 },
580 ],
581 wildcard_idx: None,
582 constraints: [],
583 with_options: [],
584 format_encode: None,
585 source_watermarks: [],
586 append_only: false,
587 on_conflict: None,
588 with_version_column: None,
589 query: None,
590 cdc_table_info: None,
591 include_column_options: [],
592 webhook_info: None,
593 engine: Hummock,
594 },
595 CreateTable {
596 or_replace: false,
597 temporary: false,
598 if_not_exists: false,
599 name: ObjectName(
600 [
601 Ident {
602 value: "t3",
603 quote_style: None,
604 },
605 ],
606 ),
607 columns: [
608 ColumnDef {
609 name: Ident {
610 value: "v1",
611 quote_style: None,
612 },
613 data_type: Some(
614 Int,
615 ),
616 collation: None,
617 options: [
618 ColumnOptionDef {
619 name: None,
620 option: Unique {
621 is_primary: true,
622 },
623 },
624 ],
625 },
626 ColumnDef {
627 name: Ident {
628 value: "v2",
629 quote_style: None,
630 },
631 data_type: Some(
632 SmallInt,
633 ),
634 collation: None,
635 options: [
636 ColumnOptionDef {
637 name: None,
638 option: Unique {
639 is_primary: true,
640 },
641 },
642 ],
643 },
644 ],
645 wildcard_idx: None,
646 constraints: [],
647 with_options: [],
648 format_encode: None,
649 source_watermarks: [],
650 append_only: false,
651 on_conflict: None,
652 with_version_column: None,
653 query: None,
654 cdc_table_info: None,
655 include_column_options: [],
656 webhook_info: None,
657 engine: Hummock,
658 },
659 CreateTable {
660 or_replace: false,
661 temporary: false,
662 if_not_exists: false,
663 name: ObjectName(
664 [
665 Ident {
666 value: "t4",
667 quote_style: None,
668 },
669 ],
670 ),
671 columns: [
672 ColumnDef {
673 name: Ident {
674 value: "v1",
675 quote_style: None,
676 },
677 data_type: Some(
678 Int,
679 ),
680 collation: None,
681 options: [
682 ColumnOptionDef {
683 name: None,
684 option: Unique {
685 is_primary: true,
686 },
687 },
688 ],
689 },
690 ColumnDef {
691 name: Ident {
692 value: "v2",
693 quote_style: None,
694 },
695 data_type: Some(
696 SmallInt,
697 ),
698 collation: None,
699 options: [
700 ColumnOptionDef {
701 name: None,
702 option: Unique {
703 is_primary: true,
704 },
705 },
706 ],
707 },
708 ColumnDef {
709 name: Ident {
710 value: "v3",
711 quote_style: None,
712 },
713 data_type: Some(
714 Boolean,
715 ),
716 collation: None,
717 options: [
718 ColumnOptionDef {
719 name: None,
720 option: Unique {
721 is_primary: true,
722 },
723 },
724 ],
725 },
726 ],
727 wildcard_idx: None,
728 constraints: [],
729 with_options: [],
730 format_encode: None,
731 source_watermarks: [],
732 append_only: false,
733 on_conflict: None,
734 with_version_column: None,
735 query: None,
736 cdc_table_info: None,
737 include_column_options: [],
738 webhook_info: None,
739 engine: Hummock,
740 },
741 ],
742 )"#]],
743 );
744 }
745}