risingwave_sqlsmith/sql_gen/
table_functions.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use chrono::{Duration, NaiveDateTime};
16use rand::Rng;
17use rand::distr::Alphanumeric;
18use risingwave_sqlparser::ast::{
19    DataType as AstDataType, Expr, FunctionArg, ObjectName, TableAlias, TableFactor, Value,
20};
21
22use crate::sql_gen::utils::{create_args, create_table_alias};
23use crate::sql_gen::{SqlGenerator, Table};
24
25#[derive(Clone, Copy)]
26enum JsonTopLevelKind {
27    Any,
28    Array,
29    Object,
30}
31
32impl<R: Rng> SqlGenerator<'_, R> {
33    /// Generates table functions.
34    pub(crate) fn gen_table_func(&mut self) -> (TableFactor, Table) {
35        match self.rng.random_range(0..=8) {
36            0 => self.gen_generate_series(),
37            1 => self.gen_range(),
38            2 => self.gen_unnest(),
39            3..=7 => self.gen_jsonb_func(),
40            8 => self.gen_regexp_match(),
41            _ => unreachable!(),
42        }
43    }
44
45    /// Generates `GENERATE_SERIES`.
46    /// `GENERATE_SERIES(start: INT | TIMESTAMP, end: INT | TIMESTAMP, step?: INT | INTERVAL)`
47    /// - When type is INT: step is optional.
48    /// - When type is TIMESTAMP: step (INTERVAL) is required.
49    fn gen_generate_series(&mut self) -> (TableFactor, Table) {
50        let table_name = self.gen_table_name_with_prefix("generate_series");
51        let alias = create_table_alias(&table_name);
52
53        let (start, end, step) = self.gen_start_end_step();
54        let args = vec![Some(start), Some(end), step]
55            .into_iter()
56            .flatten()
57            .collect();
58
59        let table = Table::new(table_name, vec![]);
60
61        let relation = create_tvf("generate_series", alias, create_args(args), false);
62
63        (relation, table)
64    }
65
66    /// Generates `RANGE`.
67    /// `RANGE(start: INT | TIMESTAMP, end: INT | TIMESTAMP, step?: INT | INTERVAL)`
68    /// - When type is INT: step is optional.
69    /// - When type is TIMESTAMP: step (INTERVAL) is required.
70    fn gen_range(&mut self) -> (TableFactor, Table) {
71        let table_name = self.gen_table_name_with_prefix("range");
72        let alias = create_table_alias(&table_name);
73
74        let (start, end, step) = self.gen_start_end_step();
75        let args = vec![Some(start), Some(end), step]
76            .into_iter()
77            .flatten()
78            .collect();
79
80        let table = Table::new(table_name, vec![]);
81
82        let relation = create_tvf("range", alias, create_args(args), false);
83
84        (relation, table)
85    }
86
87    /// Generates `UNNEST`.
88    /// `UNNEST(arr1 [, arr2, ...])`
89    fn gen_unnest(&mut self) -> (TableFactor, Table) {
90        let table_name = self.gen_table_name_with_prefix("unnest");
91        let alias = create_table_alias(&table_name);
92
93        let depth = self.rng.random_range(0..=5);
94        let list_type = self.gen_list_data_type(depth);
95
96        let array_expr = self.gen_simple_scalar(&list_type);
97
98        let table = Table::new(table_name, vec![]);
99        let relation = create_tvf("unnest", alias, create_args(vec![array_expr]), false);
100
101        (relation, table)
102    }
103
104    /// Generates one of the JSONB-related table functions,
105    /// including:
106    /// - `JSON_ARRAY_ELEMENTS(JSONB)`
107    /// - `JSON_ARRAY_ELEMENTS_TEXT(JSONB)`
108    /// - `JSON_EACH(JSONB)`
109    /// - `JSON_EACH_TEXT(JSONB)`
110    /// - `JSON_OBJECT_KEYS(JSONB)`
111    ///
112    /// These functions require specific top-level JSONB types:
113    /// - `JSON_ARRAY_ELEMENTS[_TEXT]` expects a JSON array
114    /// - `JSON_EACH[_TEXT]` and `JSON_OBJECT_KEYS` expect a JSON object
115    fn gen_jsonb_func(&mut self) -> (TableFactor, Table) {
116        match self.rng.random_range(0..=4) {
117            0 => self.gen_jsonb_tvf("jsonb_array_elements", JsonTopLevelKind::Array),
118            1 => self.gen_jsonb_tvf("jsonb_array_elements_text", JsonTopLevelKind::Array),
119            2 => self.gen_jsonb_tvf("jsonb_each", JsonTopLevelKind::Object),
120            3 => self.gen_jsonb_tvf("jsonb_each_text", JsonTopLevelKind::Object),
121            4 => self.gen_jsonb_tvf("jsonb_object_keys", JsonTopLevelKind::Object),
122            _ => unreachable!(),
123        }
124    }
125
126    /// Generates `REGEXP_MATCH`.
127    /// `REGEXP_MATCH(input_string, pattern [, optional_flag ])`
128    fn gen_regexp_match(&mut self) -> (TableFactor, Table) {
129        let input_string = self.gen_random_string();
130        let is_match = self.flip_coin();
131        let pattern = if is_match {
132            let depth = self.rng.random_range(1..=3);
133            self.gen_matched_regexp_pattern(&input_string, depth)
134        } else {
135            self.gen_noise_regexp_pattern(&input_string)
136        };
137        let flags = self.gen_regex_flags();
138
139        let input_expr = Expr::Value(Value::SingleQuotedString(input_string));
140        let pattern_expr = Expr::Value(Value::SingleQuotedString(pattern));
141        let flags_expr = Expr::Value(Value::SingleQuotedString(flags));
142
143        let table_name = self.gen_table_name_with_prefix("regexp_match");
144        let alias = create_table_alias(&table_name);
145        let relation = create_tvf(
146            "regexp_match",
147            alias,
148            create_args(vec![input_expr, pattern_expr, flags_expr]),
149            false,
150        );
151
152        let table = Table::new(table_name, vec![]);
153        (relation, table)
154    }
155
156    /// Recursively generates a regular expression pattern that matches the given input string.
157    /// The strategy includes:
158    /// - Escaping literals
159    /// - Replacing parts with character classes, alternations, or grouped sub-patterns
160    /// - Applying random quantifiers
161    fn gen_matched_regexp_pattern(&mut self, input: &str, depth: usize) -> String {
162        if depth == 0 || input.len() <= 1 {
163            return self.add_quantifier(regex::escape(input));
164        }
165
166        let chars: Vec<char> = input.chars().collect();
167        let len = chars.len();
168        let (start, end) = self.rand_segment(len);
169        assert!(start < end);
170        let (prefix, mid, suffix) = (&input[..start], &input[start..end], &input[end..]);
171
172        let mid_transformed = match self.rng.random_range(0..=4) {
173            0 => {
174                if let Some(c) = mid.chars().next() {
175                    self.gen_char_class(c)
176                } else {
177                    ".".to_owned()
178                }
179            }
180            1 => self.wrap_group(regex::escape(mid)),
181            2 => self.gen_alternation(mid),
182            3 => self.add_quantifier(regex::escape(mid)),
183            4 => self.gen_matched_regexp_pattern(mid, depth - 1),
184            _ => unreachable!(),
185        };
186
187        let result = format!(
188            "{}{}{}",
189            regex::escape(prefix),
190            mid_transformed,
191            regex::escape(suffix)
192        );
193
194        if self.flip_coin() {
195            self.wrap_group(result)
196        } else {
197            result
198        }
199    }
200
201    /// Generates a regular expression pattern that is very unlikely to match the input string.
202    /// Strategies include:
203    /// - Inserting noise characters
204    /// - Shuffling characters
205    /// - Using strict anchors with unrelated content
206    /// - Over-constraining with numeric quantifiers
207    fn gen_noise_regexp_pattern(&mut self, input: &str) -> String {
208        if input.is_empty() {
209            return "[A-Z]{3,}".to_owned();
210        }
211
212        let chars: Vec<char> = input.chars().collect();
213        let len = chars.len();
214
215        match self.rng.random_range(0..=3) {
216            0 => {
217                let mut s = input.to_owned();
218                let pos = self.rng.random_range(0..=len);
219                s.insert(pos, 'Z');
220                regex::escape(&s)
221            }
222            1 => {
223                let mut shuffled = chars.clone();
224                shuffled.reverse();
225                regex::escape(&shuffled.into_iter().collect::<String>())
226            }
227            2 => {
228                format!("^{}$", self.gen_noise_string(input))
229            }
230            3 => {
231                format!("[0-9]{{{}}}", len + 3)
232            }
233            _ => unreachable!(),
234        }
235    }
236
237    fn gen_regex_flags(&mut self) -> String {
238        let flag_set = ['i', 'c', 'n', 's', 'x'];
239        let len = self.rng.random_range(0..=3);
240        (0..len)
241            .map(|_| {
242                let idx = self.rng.random_range(0..flag_set.len());
243                flag_set[idx]
244            })
245            .collect()
246    }
247
248    fn gen_char_class(&mut self, c: char) -> String {
249        if c.is_ascii_digit() {
250            match self.rng.random_range(0..=2) {
251                0 => "[0-9]".to_owned(),
252                1 => format!("[{}]", c),
253                2 => ".".to_owned(),
254                _ => unreachable!(),
255            }
256        } else if c.is_ascii_alphabetic() {
257            match self.rng.random_range(0..=2) {
258                0 => format!("[{}{}]", c.to_ascii_lowercase(), c.to_ascii_uppercase()),
259                1 => "[a-zA-Z]".to_owned(),
260                2 => ".".to_owned(),
261                _ => unreachable!(),
262            }
263        } else {
264            ".".to_owned()
265        }
266    }
267
268    fn wrap_group(&mut self, inner: String) -> String {
269        match self.rng.random_range(0..=2) {
270            0 => format!("({})", inner),
271            1 => format!("(?:{})", inner),
272            2 => format!("({})?", inner),
273            _ => unreachable!(),
274        }
275    }
276
277    fn gen_alternation(&mut self, base: &str) -> String {
278        let alt = regex::escape(&self.gen_random_string());
279        let base = regex::escape(base);
280
281        if self.flip_coin() {
282            format!("({}|{})", base, alt)
283        } else {
284            format!("({}|{})", alt, base)
285        }
286    }
287
288    fn rand_segment(&mut self, len: usize) -> (usize, usize) {
289        if len <= 1 {
290            return (0, len);
291        }
292        let start = self.rng.random_range(0..len);
293        let end = self.rng.random_range(start + 1..=len);
294        (start, end)
295    }
296
297    fn add_quantifier(&mut self, pattern: String) -> String {
298        match self.rng.random_range(0..=13) {
299            0 => pattern,
300            1 => format!("{}*", pattern),
301            2 => format!("{}+", pattern),
302            3 => format!("{}?", pattern),
303            4 => format!("{}*?", pattern),
304            5 => format!("{}+?", pattern),
305            6 => format!("{}??", pattern),
306            7 => format!("{}{{{}}}", pattern, self.rng.random_range(1..=3)),
307            8 => format!("{}{{{}}}?", pattern, self.rng.random_range(1..=3)),
308            9 => format!("{}{{{},}}", pattern, self.rng.random_range(1..=3)),
309            10 => format!("{}{{{},}}?", pattern, self.rng.random_range(1..=3)),
310            11 => {
311                let m = self.rng.random_range(1..=3);
312                let n = self.rng.random_range(m..=m + 2);
313                format!("{}{{{},{}}}", pattern, m, n)
314            }
315            12 => {
316                let m = self.rng.random_range(1..=3);
317                let n = self.rng.random_range(m..=m + 2);
318                format!("{}{{{},{}}}?", pattern, m, n)
319            }
320            13 => format!("({}){{1,{}}}", pattern, self.rng.random_range(2..=4)),
321            _ => unreachable!(),
322        }
323    }
324
325    fn gen_noise_string(&mut self, input: &str) -> String {
326        loop {
327            let s: String = (0..self.rng.random_range(4..8))
328                .map(|_| match self.rng.random_range(0..=2) {
329                    0 => 'Z',
330                    1 => 'Q',
331                    2 => 'X',
332                    _ => unreachable!(),
333                })
334                .collect();
335            if s != input {
336                return s;
337            }
338        }
339    }
340
341    fn gen_jsonb_tvf(&mut self, name: &str, kind: JsonTopLevelKind) -> (TableFactor, Table) {
342        let table_name = self.gen_table_name_with_prefix(name);
343        let alias = create_table_alias(&table_name);
344
345        let depth = self.rng.random_range(1..=5);
346        let jsonb_expr = self.gen_jsonb(depth, kind);
347
348        let table = Table::new(table_name, vec![]);
349        let relation = create_tvf(name, alias, create_args(vec![jsonb_expr]), false);
350
351        (relation, table)
352    }
353
354    fn integer_to_value_expr(num: i32) -> Expr {
355        Expr::Value(Value::Number(num.to_string()))
356    }
357
358    fn gen_simple_integer_range(&mut self) -> (Expr, Expr, Option<Expr>) {
359        let mut values: Vec<i32> = (0..3).map(|_| self.rng.random_range(0..100)).collect();
360        values.sort_unstable();
361
362        let start = Self::integer_to_value_expr(values[0]);
363        let end = Self::integer_to_value_expr(values[2]);
364        let step = Some(Self::integer_to_value_expr(values[1]));
365
366        if self.flip_coin() {
367            (start, end, step)
368        } else {
369            (start, end, None)
370        }
371    }
372
373    fn integer_to_timestamp_expr(offset_secs: i32) -> Expr {
374        let base =
375            NaiveDateTime::parse_from_str("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S").unwrap();
376        let ts = base + Duration::seconds(offset_secs as i64);
377        Expr::TypedString {
378            data_type: AstDataType::Timestamp(false),
379            value: ts.format("%Y-%m-%d %H:%M:%S").to_string(),
380        }
381    }
382
383    fn integer_to_interval_expr(num: i32) -> Expr {
384        Expr::TypedString {
385            data_type: AstDataType::Interval,
386            value: format!("{} seconds", num),
387        }
388    }
389
390    fn gen_simple_timestamp_range(&mut self) -> (Expr, Expr, Option<Expr>) {
391        let mut secs: Vec<i32> = (0..3).map(|_| self.rng.random_range(0..3600)).collect();
392        secs.sort_unstable();
393
394        let start = Self::integer_to_timestamp_expr(secs[0]);
395        let end = Self::integer_to_timestamp_expr(secs[2]);
396        let step = Some(Self::integer_to_interval_expr(secs[1]));
397
398        (start, end, step)
399    }
400
401    fn gen_start_end_step(&mut self) -> (Expr, Expr, Option<Expr>) {
402        match self.rng.random_range(0..=1) {
403            0..=0 => self.gen_simple_integer_range(),
404            1..=1 => self.gen_simple_timestamp_range(),
405            _ => unreachable!(),
406        }
407    }
408
409    fn gen_jsonb(&mut self, n: usize, kind: JsonTopLevelKind) -> Expr {
410        Expr::Cast {
411            expr: Box::new(Expr::Value(Value::SingleQuotedString(
412                self.gen_json_value(0, n, kind),
413            ))),
414            data_type: AstDataType::Jsonb,
415        }
416    }
417
418    fn gen_json_value(&mut self, depth: usize, max_depth: usize, kind: JsonTopLevelKind) -> String {
419        if depth >= max_depth {
420            return match self.rng.random_range(0..=3) {
421                0 => format!("\"{}\"", self.gen_random_string()),
422                1 => self.rng.random_range(-1000..1000).to_string(),
423                2 => self.flip_coin().to_string(),
424                3 => "null".into(),
425                _ => unreachable!(),
426            };
427        }
428
429        match if depth == 0 {
430            match kind {
431                JsonTopLevelKind::Array => 4,
432                JsonTopLevelKind::Object => 5,
433                JsonTopLevelKind::Any => self.rng.random_range(0..=5),
434            }
435        } else {
436            self.rng.random_range(0..=5)
437        } {
438            0 => "null".into(),
439            1 => self.flip_coin().to_string(),
440            2 => self.rng.random_range(-1000..1000).to_string(),
441            3 => format!("\"{}\"", self.gen_random_string()),
442            4 => {
443                let len = self.rng.random_range(1..=3);
444                let elems: Vec<String> = (0..len)
445                    .map(|_| self.gen_json_value(depth + 1, max_depth, JsonTopLevelKind::Any))
446                    .collect();
447                format!("[{}]", elems.join(","))
448            }
449            5 => {
450                let len = self.rng.random_range(1..=3);
451                let fields: Vec<String> = (0..len)
452                    .map(|_| {
453                        let key = self.gen_random_string();
454                        let val = self.gen_json_value(depth + 1, max_depth, JsonTopLevelKind::Any);
455                        format!("\"{}\":{}", key, val)
456                    })
457                    .collect();
458                format!("{{{}}}", fields.join(","))
459            }
460            _ => unreachable!(),
461        }
462    }
463
464    fn gen_random_string(&mut self) -> String {
465        let len = self.rng.random_range(3..8);
466        (0..len)
467            .map(|_| self.rng.sample(Alphanumeric) as char)
468            .collect()
469    }
470}
471
472/// Create a table view function.
473fn create_tvf(
474    name: &str,
475    alias: TableAlias,
476    args: Vec<FunctionArg>,
477    with_ordinality: bool,
478) -> TableFactor {
479    TableFactor::TableFunction {
480        name: ObjectName(vec![name.into()]),
481        alias: Some(alias),
482        args,
483        with_ordinality,
484    }
485}