risingwave_sqlsmith/sql_gen/
scalar.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::time::{Duration, SystemTime};
16
17use chrono::{DateTime, Utc};
18use rand::Rng;
19use rand::distr::Alphanumeric;
20use rand::prelude::IndexedRandom;
21use risingwave_common::types::DataType;
22use risingwave_sqlparser::ast::{Array, DataType as AstDataType, Expr, Value};
23
24use crate::sql_gen::SqlGenerator;
25use crate::sql_gen::expr::typed_null;
26
27impl<R: Rng> SqlGenerator<'_, R> {
28    /// Generates integer scalar expression.
29    /// Bound: [start, end).
30    /// Type: `DataType`.
31    pub(super) fn gen_range_scalar(
32        &mut self,
33        typ: &DataType,
34        start: i64,
35        end: i64,
36    ) -> Option<Expr> {
37        use DataType as T;
38        let value = self.rng.random_range(start..end).to_string();
39        match *typ {
40            T::Int64 => Some(Expr::TypedString {
41                data_type: AstDataType::BigInt,
42                value,
43            }),
44            T::Int32 => Some(Expr::TypedString {
45                data_type: AstDataType::Int,
46                value,
47            }),
48            T::Int16 => Some(Expr::TypedString {
49                data_type: AstDataType::SmallInt,
50                value,
51            }),
52            _ => None,
53        }
54    }
55
56    pub(super) fn gen_simple_scalar(&mut self, typ: &DataType) -> Expr {
57        use DataType as T;
58        // NOTE(kwannoel): Since this generates many invalid queries,
59        // its probability should be set to low, e.g. 0.02.
60        // ENABLE: https://github.com/risingwavelabs/risingwave/issues/7327
61        if self.rng.random_bool(0.0) {
62            // NOTE(kwannoel): We generate Cast with NULL to avoid generating lots of ambiguous
63            // expressions. For instance agg calls such as `max(NULL)` may be generated,
64            // and coerced to VARCHAR, where we require a `NULL::int` instead.
65            return typed_null(typ);
66        }
67        // Scalars which may generate negative numbers are wrapped in
68        // `Nested` to ambiguity while parsing.
69        // e.g. -1 becomes -(1).
70        // See: https://github.com/risingwavelabs/risingwave/issues/4344
71        match *typ {
72            T::Int64 => Expr::Nested(Box::new(Expr::TypedString {
73                data_type: AstDataType::BigInt,
74                value: self.gen_int(i64::MIN as _, i64::MAX as _),
75            })),
76            T::Int32 => Expr::Nested(Box::new(Expr::TypedString {
77                data_type: AstDataType::Int,
78                value: self.gen_int(i32::MIN as _, i32::MAX as _),
79            })),
80            T::Int16 => Expr::Nested(Box::new(Expr::TypedString {
81                data_type: AstDataType::SmallInt,
82                value: self.gen_int(i16::MIN as _, i16::MAX as _),
83            })),
84            T::Varchar => Expr::Cast {
85                // since we are generating random scalar literal, we should cast it to avoid unknown type
86                expr: Box::new(Expr::Value(Value::SingleQuotedString(
87                    (0..10)
88                        .map(|_| self.rng.sample(Alphanumeric) as char)
89                        .collect(),
90                ))),
91                data_type: AstDataType::Varchar,
92            },
93            T::Decimal => Expr::Nested(Box::new(Expr::Value(Value::Number(self.gen_float())))),
94            T::Float64 => Expr::Nested(Box::new(Expr::TypedString {
95                data_type: AstDataType::Float(None),
96                value: self.gen_float(),
97            })),
98            T::Float32 => Expr::Nested(Box::new(Expr::TypedString {
99                data_type: AstDataType::Real,
100                value: self.gen_float(),
101            })),
102            T::Boolean => Expr::Value(Value::Boolean(self.rng.random_bool(0.5))),
103            T::Date => Expr::TypedString {
104                data_type: AstDataType::Date,
105                value: self.gen_temporal_scalar(typ),
106            },
107            T::Time => Expr::TypedString {
108                data_type: AstDataType::Time(false),
109                value: self.gen_temporal_scalar(typ),
110            },
111            T::Timestamp => Expr::TypedString {
112                data_type: AstDataType::Timestamp(false),
113                value: self.gen_temporal_scalar(typ),
114            },
115            T::Timestamptz => Expr::TypedString {
116                data_type: AstDataType::Timestamp(true),
117                value: self.gen_temporal_scalar(typ),
118            },
119            T::Interval => Expr::Nested(Box::new(Expr::TypedString {
120                data_type: AstDataType::Interval,
121                value: self.gen_temporal_scalar(typ),
122            })),
123            T::List(ref ty) => {
124                let n = self.rng.random_range(1..=4);
125                Expr::Array(Array {
126                    elem: self.gen_simple_scalar_list(ty, n),
127                    named: true,
128                })
129            }
130            // ENABLE: https://github.com/risingwavelabs/risingwave/issues/6934
131            // T::Struct(ref inner) => Expr::Row(
132            //     inner
133            //         .fields
134            //         .iter()
135            //         .map(|typ| self.gen_simple_scalar(typ))
136            //         .collect(),
137            // ),
138            _ => typed_null(typ),
139        }
140    }
141
142    /// Generates a list of `n` simple scalar values of a specific `type`.
143    fn gen_simple_scalar_list(&mut self, ty: &DataType, n: usize) -> Vec<Expr> {
144        (0..n).map(|_| self.gen_simple_scalar(ty)).collect()
145    }
146
147    fn gen_int(&mut self, min: i64, max: i64) -> String {
148        // NOTE: Reduced chance for extreme values,
149        // since these tend to generate invalid expressions.
150        let n = match self.rng.random_range(1..=100) {
151            1..=5 => 0,
152            6..=10 => 1,
153            11..=15 => max,
154            16..=20 => min,
155            21..=25 => self.rng.random_range(min + 1..0),
156            26..=30 => self.rng.random_range(1000..max),
157            31..=100 => self.rng.random_range(2..1000),
158            _ => unreachable!(),
159        };
160        n.to_string()
161    }
162
163    fn gen_float(&mut self) -> String {
164        // NOTE: Reduced chance for extreme values,
165        // since these tend to generate invalid expressions.
166        let n = match self.rng.random_range(1..=100) {
167            1..=5 => 0.0,
168            6..=10 => 1.0,
169            11..=15 => i32::MAX as f64,
170            16..=20 => i32::MIN as f64,
171            21..=25 => self.rng.random_range(i32::MIN + 1..0) as f64,
172            26..=30 => self.rng.random_range(1000..i32::MAX) as f64,
173            31..=100 => self.rng.random_range(2..1000) as f64,
174            _ => unreachable!(),
175        };
176        n.to_string()
177    }
178
179    fn gen_temporal_scalar(&mut self, typ: &DataType) -> String {
180        use DataType as T;
181
182        let minute = 60;
183        let hour = 60 * minute;
184        let day = 24 * hour;
185        let week = 7 * day;
186        let choices = [0, 1, minute, hour, day, week];
187
188        let secs = match self.rng.random_range(1..=100) {
189            1..=30 => *choices.choose(&mut self.rng).unwrap(),
190            31..=100 => self.rng.random_range(2..100) as u64,
191            _ => unreachable!(),
192        };
193
194        let tm = DateTime::<Utc>::from(SystemTime::now() - Duration::from_secs(secs));
195        match typ {
196            T::Date => tm.format("%F").to_string(),
197            T::Timestamp | T::Timestamptz => tm.format("%Y-%m-%d %H:%M:%S").to_string(),
198            // ENABLE: https://github.com/risingwavelabs/risingwave/issues/5826
199            // T::Timestamptz => {
200            //     let timestamp = tm.format("%Y-%m-%d %H:%M:%S");
201            //     let timezone = self.rng.random_range(0..=15);
202            //     format!("{}+{}", timestamp, timezone)
203            // }
204            T::Time => tm.format("%T").to_string(),
205            T::Interval => {
206                if self.rng.random_bool(0.5) {
207                    (-(secs as i64)).to_string()
208                } else {
209                    secs.to_string()
210                }
211            }
212            _ => unreachable!(),
213        }
214    }
215}