1use chrono::{Duration, NaiveDateTime};
16use rand::Rng;
17use rand::distr::Alphanumeric;
18use risingwave_sqlparser::ast::{
19 DataType as AstDataType, Expr, FunctionArg, ObjectName, TableAlias, TableFactor, Value,
20};
21
22use crate::sql_gen::utils::{create_args, create_table_alias};
23use crate::sql_gen::{SqlGenerator, Table};
24
25#[derive(Clone, Copy)]
26enum JsonTopLevelKind {
27 Any,
28 Array,
29 Object,
30}
31
32impl<R: Rng> SqlGenerator<'_, R> {
33 pub(crate) fn gen_table_func(&mut self) -> (TableFactor, Table) {
35 match self.rng.random_range(0..=8) {
36 0 => self.gen_generate_series(),
37 1 => self.gen_range(),
38 2 => self.gen_unnest(),
39 3..=7 => self.gen_jsonb_func(),
40 8 => self.gen_regexp_match(),
41 _ => unreachable!(),
42 }
43 }
44
45 fn gen_generate_series(&mut self) -> (TableFactor, Table) {
50 let table_name = self.gen_table_name_with_prefix("generate_series");
51 let alias = create_table_alias(&table_name);
52
53 let (start, end, step) = self.gen_start_end_step();
54 let args = vec![Some(start), Some(end), step]
55 .into_iter()
56 .flatten()
57 .collect();
58
59 let table = Table::new(table_name, vec![]);
60
61 let relation = create_tvf("generate_series", alias, create_args(args), false);
62
63 (relation, table)
64 }
65
66 fn gen_range(&mut self) -> (TableFactor, Table) {
71 let table_name = self.gen_table_name_with_prefix("range");
72 let alias = create_table_alias(&table_name);
73
74 let (start, end, step) = self.gen_start_end_step();
75 let args = vec![Some(start), Some(end), step]
76 .into_iter()
77 .flatten()
78 .collect();
79
80 let table = Table::new(table_name, vec![]);
81
82 let relation = create_tvf("range", alias, create_args(args), false);
83
84 (relation, table)
85 }
86
87 fn gen_unnest(&mut self) -> (TableFactor, Table) {
90 let table_name = self.gen_table_name_with_prefix("unnest");
91 let alias = create_table_alias(&table_name);
92
93 let depth = self.rng.random_range(0..=5);
94 let list_type = self.gen_list_data_type(depth);
95
96 let array_expr = self.gen_simple_scalar(&list_type);
97
98 let table = Table::new(table_name, vec![]);
99 let relation = create_tvf("unnest", alias, create_args(vec![array_expr]), false);
100
101 (relation, table)
102 }
103
104 fn gen_jsonb_func(&mut self) -> (TableFactor, Table) {
116 match self.rng.random_range(0..=4) {
117 0 => self.gen_jsonb_tvf("jsonb_array_elements", JsonTopLevelKind::Array),
118 1 => self.gen_jsonb_tvf("jsonb_array_elements_text", JsonTopLevelKind::Array),
119 2 => self.gen_jsonb_tvf("jsonb_each", JsonTopLevelKind::Object),
120 3 => self.gen_jsonb_tvf("jsonb_each_text", JsonTopLevelKind::Object),
121 4 => self.gen_jsonb_tvf("jsonb_object_keys", JsonTopLevelKind::Object),
122 _ => unreachable!(),
123 }
124 }
125
126 fn gen_regexp_match(&mut self) -> (TableFactor, Table) {
129 let input_string = self.gen_random_string();
130 let is_match = self.flip_coin();
131 let pattern = if is_match {
132 let depth = self.rng.random_range(1..=3);
133 self.gen_matched_regexp_pattern(&input_string, depth)
134 } else {
135 self.gen_noise_regexp_pattern(&input_string)
136 };
137 let flags = self.gen_regex_flags();
138
139 let input_expr = Expr::Value(Value::SingleQuotedString(input_string));
140 let pattern_expr = Expr::Value(Value::SingleQuotedString(pattern));
141 let flags_expr = Expr::Value(Value::SingleQuotedString(flags));
142
143 let table_name = self.gen_table_name_with_prefix("regexp_match");
144 let alias = create_table_alias(&table_name);
145 let relation = create_tvf(
146 "regexp_match",
147 alias,
148 create_args(vec![input_expr, pattern_expr, flags_expr]),
149 false,
150 );
151
152 let table = Table::new(table_name, vec![]);
153 (relation, table)
154 }
155
156 fn gen_matched_regexp_pattern(&mut self, input: &str, depth: usize) -> String {
162 if depth == 0 || input.len() <= 1 {
163 return self.add_quantifier(regex::escape(input));
164 }
165
166 let chars: Vec<char> = input.chars().collect();
167 let len = chars.len();
168 let (start, end) = self.rand_segment(len);
169 assert!(start < end);
170 let (prefix, mid, suffix) = (&input[..start], &input[start..end], &input[end..]);
171
172 let mid_transformed = match self.rng.random_range(0..=4) {
173 0 => {
174 if let Some(c) = mid.chars().next() {
175 self.gen_char_class(c)
176 } else {
177 ".".to_owned()
178 }
179 }
180 1 => self.wrap_group(regex::escape(mid)),
181 2 => self.gen_alternation(mid),
182 3 => self.add_quantifier(regex::escape(mid)),
183 4 => self.gen_matched_regexp_pattern(mid, depth - 1),
184 _ => unreachable!(),
185 };
186
187 let result = format!(
188 "{}{}{}",
189 regex::escape(prefix),
190 mid_transformed,
191 regex::escape(suffix)
192 );
193
194 if self.flip_coin() {
195 self.wrap_group(result)
196 } else {
197 result
198 }
199 }
200
201 fn gen_noise_regexp_pattern(&mut self, input: &str) -> String {
208 if input.is_empty() {
209 return "[A-Z]{3,}".to_owned();
210 }
211
212 let chars: Vec<char> = input.chars().collect();
213 let len = chars.len();
214
215 match self.rng.random_range(0..=3) {
216 0 => {
217 let mut s = input.to_owned();
218 let pos = self.rng.random_range(0..=len);
219 s.insert(pos, 'Z');
220 regex::escape(&s)
221 }
222 1 => {
223 let mut shuffled = chars.clone();
224 shuffled.reverse();
225 regex::escape(&shuffled.into_iter().collect::<String>())
226 }
227 2 => {
228 format!("^{}$", self.gen_noise_string(input))
229 }
230 3 => {
231 format!("[0-9]{{{}}}", len + 3)
232 }
233 _ => unreachable!(),
234 }
235 }
236
237 fn gen_regex_flags(&mut self) -> String {
238 let flag_set = ['i', 'c', 'n', 's', 'x'];
239 let len = self.rng.random_range(0..=3);
240 (0..len)
241 .map(|_| {
242 let idx = self.rng.random_range(0..flag_set.len());
243 flag_set[idx]
244 })
245 .collect()
246 }
247
248 fn gen_char_class(&mut self, c: char) -> String {
249 if c.is_ascii_digit() {
250 match self.rng.random_range(0..=2) {
251 0 => "[0-9]".to_owned(),
252 1 => format!("[{}]", c),
253 2 => ".".to_owned(),
254 _ => unreachable!(),
255 }
256 } else if c.is_ascii_alphabetic() {
257 match self.rng.random_range(0..=2) {
258 0 => format!("[{}{}]", c.to_ascii_lowercase(), c.to_ascii_uppercase()),
259 1 => "[a-zA-Z]".to_owned(),
260 2 => ".".to_owned(),
261 _ => unreachable!(),
262 }
263 } else {
264 ".".to_owned()
265 }
266 }
267
268 fn wrap_group(&mut self, inner: String) -> String {
269 match self.rng.random_range(0..=2) {
270 0 => format!("({})", inner),
271 1 => format!("(?:{})", inner),
272 2 => format!("({})?", inner),
273 _ => unreachable!(),
274 }
275 }
276
277 fn gen_alternation(&mut self, base: &str) -> String {
278 let alt = regex::escape(&self.gen_random_string());
279 let base = regex::escape(base);
280
281 if self.flip_coin() {
282 format!("({}|{})", base, alt)
283 } else {
284 format!("({}|{})", alt, base)
285 }
286 }
287
288 fn rand_segment(&mut self, len: usize) -> (usize, usize) {
289 if len <= 1 {
290 return (0, len);
291 }
292 let start = self.rng.random_range(0..len);
293 let end = self.rng.random_range(start + 1..=len);
294 (start, end)
295 }
296
297 fn add_quantifier(&mut self, pattern: String) -> String {
298 match self.rng.random_range(0..=13) {
299 0 => pattern,
300 1 => format!("{}*", pattern),
301 2 => format!("{}+", pattern),
302 3 => format!("{}?", pattern),
303 4 => format!("{}*?", pattern),
304 5 => format!("{}+?", pattern),
305 6 => format!("{}??", pattern),
306 7 => format!("{}{{{}}}", pattern, self.rng.random_range(1..=3)),
307 8 => format!("{}{{{}}}?", pattern, self.rng.random_range(1..=3)),
308 9 => format!("{}{{{},}}", pattern, self.rng.random_range(1..=3)),
309 10 => format!("{}{{{},}}?", pattern, self.rng.random_range(1..=3)),
310 11 => {
311 let m = self.rng.random_range(1..=3);
312 let n = self.rng.random_range(m..=m + 2);
313 format!("{}{{{},{}}}", pattern, m, n)
314 }
315 12 => {
316 let m = self.rng.random_range(1..=3);
317 let n = self.rng.random_range(m..=m + 2);
318 format!("{}{{{},{}}}?", pattern, m, n)
319 }
320 13 => format!("({}){{1,{}}}", pattern, self.rng.random_range(2..=4)),
321 _ => unreachable!(),
322 }
323 }
324
325 fn gen_noise_string(&mut self, input: &str) -> String {
326 loop {
327 let s: String = (0..self.rng.random_range(4..8))
328 .map(|_| match self.rng.random_range(0..=2) {
329 0 => 'Z',
330 1 => 'Q',
331 2 => 'X',
332 _ => unreachable!(),
333 })
334 .collect();
335 if s != input {
336 return s;
337 }
338 }
339 }
340
341 fn gen_jsonb_tvf(&mut self, name: &str, kind: JsonTopLevelKind) -> (TableFactor, Table) {
342 let table_name = self.gen_table_name_with_prefix(name);
343 let alias = create_table_alias(&table_name);
344
345 let depth = self.rng.random_range(1..=5);
346 let jsonb_expr = self.gen_jsonb(depth, kind);
347
348 let table = Table::new(table_name, vec![]);
349 let relation = create_tvf(name, alias, create_args(vec![jsonb_expr]), false);
350
351 (relation, table)
352 }
353
354 fn integer_to_value_expr(num: i32) -> Expr {
355 Expr::Value(Value::Number(num.to_string()))
356 }
357
358 fn gen_simple_integer_range(&mut self) -> (Expr, Expr, Option<Expr>) {
359 let mut values: Vec<i32> = (0..3).map(|_| self.rng.random_range(0..100)).collect();
360 values.sort_unstable();
361
362 let start = Self::integer_to_value_expr(values[0]);
363 let end = Self::integer_to_value_expr(values[2]);
364 let step = Some(Self::integer_to_value_expr(values[1]));
365
366 if self.flip_coin() {
367 (start, end, step)
368 } else {
369 (start, end, None)
370 }
371 }
372
373 fn integer_to_timestamp_expr(offset_secs: i32) -> Expr {
374 let base =
375 NaiveDateTime::parse_from_str("2020-01-01 00:00:00", "%Y-%m-%d %H:%M:%S").unwrap();
376 let ts = base + Duration::seconds(offset_secs as i64);
377 Expr::TypedString {
378 data_type: AstDataType::Timestamp(false),
379 value: ts.format("%Y-%m-%d %H:%M:%S").to_string(),
380 }
381 }
382
383 fn integer_to_interval_expr(num: i32) -> Expr {
384 Expr::TypedString {
385 data_type: AstDataType::Interval,
386 value: format!("{} seconds", num),
387 }
388 }
389
390 fn gen_simple_timestamp_range(&mut self) -> (Expr, Expr, Option<Expr>) {
391 let mut secs: Vec<i32> = (0..3).map(|_| self.rng.random_range(0..3600)).collect();
392 secs.sort_unstable();
393
394 let start = Self::integer_to_timestamp_expr(secs[0]);
395 let end = Self::integer_to_timestamp_expr(secs[2]);
396 let step = Some(Self::integer_to_interval_expr(secs[1]));
397
398 (start, end, step)
399 }
400
401 fn gen_start_end_step(&mut self) -> (Expr, Expr, Option<Expr>) {
402 match self.rng.random_range(0..=1) {
403 0..=0 => self.gen_simple_integer_range(),
404 1..=1 => self.gen_simple_timestamp_range(),
405 _ => unreachable!(),
406 }
407 }
408
409 fn gen_jsonb(&mut self, n: usize, kind: JsonTopLevelKind) -> Expr {
410 Expr::Cast {
411 expr: Box::new(Expr::Value(Value::SingleQuotedString(
412 self.gen_json_value(0, n, kind),
413 ))),
414 data_type: AstDataType::Jsonb,
415 }
416 }
417
418 fn gen_json_value(&mut self, depth: usize, max_depth: usize, kind: JsonTopLevelKind) -> String {
419 if depth >= max_depth {
420 return match self.rng.random_range(0..=3) {
421 0 => format!("\"{}\"", self.gen_random_string()),
422 1 => self.rng.random_range(-1000..1000).to_string(),
423 2 => self.flip_coin().to_string(),
424 3 => "null".into(),
425 _ => unreachable!(),
426 };
427 }
428
429 match if depth == 0 {
430 match kind {
431 JsonTopLevelKind::Array => 4,
432 JsonTopLevelKind::Object => 5,
433 JsonTopLevelKind::Any => self.rng.random_range(0..=5),
434 }
435 } else {
436 self.rng.random_range(0..=5)
437 } {
438 0 => "null".into(),
439 1 => self.flip_coin().to_string(),
440 2 => self.rng.random_range(-1000..1000).to_string(),
441 3 => format!("\"{}\"", self.gen_random_string()),
442 4 => {
443 let len = self.rng.random_range(1..=3);
444 let elems: Vec<String> = (0..len)
445 .map(|_| self.gen_json_value(depth + 1, max_depth, JsonTopLevelKind::Any))
446 .collect();
447 format!("[{}]", elems.join(","))
448 }
449 5 => {
450 let len = self.rng.random_range(1..=3);
451 let fields: Vec<String> = (0..len)
452 .map(|_| {
453 let key = self.gen_random_string();
454 let val = self.gen_json_value(depth + 1, max_depth, JsonTopLevelKind::Any);
455 format!("\"{}\":{}", key, val)
456 })
457 .collect();
458 format!("{{{}}}", fields.join(","))
459 }
460 _ => unreachable!(),
461 }
462 }
463
464 fn gen_random_string(&mut self) -> String {
465 let len = self.rng.random_range(3..8);
466 (0..len)
467 .map(|_| self.rng.sample(Alphanumeric) as char)
468 .collect()
469 }
470}
471
472fn create_tvf(
474 name: &str,
475 alias: TableAlias,
476 args: Vec<FunctionArg>,
477 with_ordinality: bool,
478) -> TableFactor {
479 TableFactor::TableFunction {
480 name: ObjectName(vec![name.into()]),
481 alias: Some(alias),
482 args,
483 with_ordinality,
484 }
485}