risingwave_common/cast/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use itertools::Itertools;
16
17use crate::types::{Timestamp, Timestamptz};
18
19type Result<T> = std::result::Result<T, String>;
20
21pub const PARSE_ERROR_STR_TO_BYTEA: &str = "Invalid Bytea syntax";
22
23/// Parse a string into a bool.
24///
25/// See [`https://www.postgresql.org/docs/9.5/datatype-boolean.html`]
26pub fn str_to_bool(input: &str) -> Result<bool> {
27    /// String literals for bool type.
28    const TRUE_BOOL_LITERALS: [&str; 9] = ["true", "tru", "tr", "t", "on", "1", "yes", "ye", "y"];
29    const FALSE_BOOL_LITERALS: [&str; 10] = [
30        "false", "fals", "fal", "fa", "f", "off", "of", "0", "no", "n",
31    ];
32
33    let trimmed_input = input.trim();
34    if TRUE_BOOL_LITERALS
35        .iter()
36        .any(|s| s.eq_ignore_ascii_case(trimmed_input))
37    {
38        Ok(true)
39    } else if FALSE_BOOL_LITERALS
40        .iter()
41        .any(|s| trimmed_input.eq_ignore_ascii_case(s))
42    {
43        Ok(false)
44    } else {
45        Err("Invalid bool".into())
46    }
47}
48
49/// Converts UNIX epoch time to timestamptz.
50///
51/// The input UNIX epoch time is interpreted as follows:
52///
53/// - [0, 1e11) are assumed to be in seconds.
54/// - [1e11, 1e14) are assumed to be in milliseconds.
55/// - [1e14, 1e17) are assumed to be in microseconds.
56/// - [1e17, upper) are assumed to be in nanoseconds.
57///
58/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
59#[inline]
60pub fn i64_to_timestamptz(t: i64) -> Result<Timestamptz> {
61    const E11: u64 = 100_000_000_000;
62    const E14: u64 = 100_000_000_000_000;
63    const E17: u64 = 100_000_000_000_000_000;
64    match t.abs_diff(0) {
65        0..E11 => Ok(Timestamptz::from_secs(t).unwrap()), // s
66        E11..E14 => Ok(Timestamptz::from_millis(t).unwrap()), // ms
67        E14..E17 => Ok(Timestamptz::from_micros(t)),      // us
68        E17.. => Ok(Timestamptz::from_micros(t / 1000)),  // ns
69    }
70}
71
72/// Converts UNIX epoch time to timestamp.
73///
74/// The input UNIX epoch time is interpreted as follows:
75///
76/// - [0, 1e11) are assumed to be in seconds.
77/// - [1e11, 1e14) are assumed to be in milliseconds.
78/// - [1e14, 1e17) are assumed to be in microseconds.
79/// - [1e17, upper) are assumed to be in nanoseconds.
80///
81/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
82///
83/// # Example
84/// ```
85/// # use risingwave_common::cast::i64_to_timestamp;
86/// assert_eq!(
87///     i64_to_timestamp(1_666_666_666).unwrap().to_string(),
88///     "2022-10-25 02:57:46"
89/// );
90/// assert_eq!(
91///     i64_to_timestamp(1_666_666_666_666).unwrap().to_string(),
92///     "2022-10-25 02:57:46.666"
93/// );
94/// assert_eq!(
95///     i64_to_timestamp(1_666_666_666_666_666).unwrap().to_string(),
96///     "2022-10-25 02:57:46.666666"
97/// );
98/// assert_eq!(
99///     i64_to_timestamp(1_666_666_666_666_666_666)
100///         .unwrap()
101///         .to_string(),
102///     // note that we only support microseconds precision
103///     "2022-10-25 02:57:46.666666"
104/// );
105/// ```
106#[inline]
107pub fn i64_to_timestamp(t: i64) -> Result<Timestamp> {
108    let tz = i64_to_timestamptz(t)?;
109    Ok(Timestamp::from_timestamp_uncheck(
110        tz.timestamp(),
111        tz.timestamp_subsec_nanos(),
112    ))
113}
114
115/// Refer to PostgreSQL's implementation <https://github.com/postgres/postgres/blob/5cb54fc310fb84287cbdc74533f3420490a2f63a/src/backend/utils/adt/varlena.c#L276-L288>
116pub fn str_to_bytea(elem: &str) -> Result<Box<[u8]>> {
117    if let Some(remainder) = elem.strip_prefix(r"\x") {
118        Ok(parse_bytes_hex(remainder)?.into())
119    } else {
120        Ok(parse_bytes_traditional(elem)?.into())
121    }
122}
123
124/// Ref: <https://docs.rs/hex/0.4.3/src/hex/lib.rs.html#175-185>
125fn get_hex(c: u8) -> Result<u8> {
126    match c {
127        b'A'..=b'F' => Ok(c - b'A' + 10),
128        b'a'..=b'f' => Ok(c - b'a' + 10),
129        b'0'..=b'9' => Ok(c - b'0'),
130        _ => Err(format!("invalid hexadecimal digit: \"{}\"", c as char)),
131    }
132}
133
134/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
135pub fn parse_bytes_hex(s: &str) -> Result<Vec<u8>> {
136    let mut res = Vec::with_capacity(s.len() / 2);
137
138    let mut bytes = s.bytes();
139    while let Some(c) = bytes.next() {
140        // white spaces are tolerated
141        if c == b' ' || c == b'\n' || c == b'\t' || c == b'\r' {
142            continue;
143        }
144        let v1 = get_hex(c)?;
145
146        match bytes.next() {
147            Some(c) => {
148                let v2 = get_hex(c)?;
149                res.push((v1 << 4) | v2);
150            }
151            None => return Err("invalid hexadecimal data: odd number of digits".to_owned()),
152        }
153    }
154
155    Ok(res)
156}
157
158/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
159pub fn parse_bytes_traditional(s: &str) -> Result<Vec<u8>> {
160    let mut bytes = s.bytes();
161
162    let mut res = Vec::new();
163    while let Some(b) = bytes.next() {
164        if b != b'\\' {
165            res.push(b);
166        } else {
167            match bytes.next() {
168                Some(b'\\') => {
169                    res.push(b'\\');
170                }
171                Some(b1 @ b'0'..=b'3') => match bytes.next_tuple() {
172                    Some((b2 @ b'0'..=b'7', b3 @ b'0'..=b'7')) => {
173                        res.push(((b1 - b'0') << 6) + ((b2 - b'0') << 3) + (b3 - b'0'));
174                    }
175                    _ => {
176                        // one backslash, not followed by another or ### valid octal
177                        return Err("invalid input syntax for type bytea".to_owned());
178                    }
179                },
180                _ => {
181                    // one backslash, not followed by another or ### valid octal
182                    return Err("invalid input syntax for type bytea".to_owned());
183                }
184            }
185        }
186    }
187
188    Ok(res)
189}
190
191#[cfg(test)]
192mod tests {
193    use chrono::{DateTime, Utc};
194
195    use super::*;
196
197    #[test]
198    fn test_negative_int_to_timestamptz() {
199        let x = i64_to_timestamptz(-2208988800000000000)
200            .unwrap()
201            .to_datetime_utc();
202        let ans: DateTime<Utc> = "1900-01-01T00:00:00Z".parse().unwrap();
203        assert_eq!(x, ans);
204    }
205
206    #[test]
207    fn test_bytea() {
208        use crate::types::ToText;
209        assert_eq!(str_to_bytea("fgo").unwrap().as_ref().to_text(), r"\x66676f");
210        assert_eq!(
211            str_to_bytea(r"\xDeadBeef").unwrap().as_ref().to_text(),
212            r"\xdeadbeef"
213        );
214        assert_eq!(
215            str_to_bytea("12CD").unwrap().as_ref().to_text(),
216            r"\x31324344"
217        );
218        assert_eq!(
219            str_to_bytea("1234").unwrap().as_ref().to_text(),
220            r"\x31323334"
221        );
222        assert_eq!(
223            str_to_bytea(r"\x12CD").unwrap().as_ref().to_text(),
224            r"\x12cd"
225        );
226        assert_eq!(
227            str_to_bytea(r"\x De Ad Be Ef ").unwrap().as_ref().to_text(),
228            r"\xdeadbeef"
229        );
230        assert_eq!(
231            str_to_bytea("x De Ad Be Ef ").unwrap().as_ref().to_text(),
232            r"\x7820446520416420426520456620"
233        );
234        assert_eq!(
235            str_to_bytea(r"De\\123dBeEf").unwrap().as_ref().to_text(),
236            r"\x44655c3132336442654566"
237        );
238        assert_eq!(
239            str_to_bytea(r"De\123dBeEf").unwrap().as_ref().to_text(),
240            r"\x4465536442654566"
241        );
242        assert_eq!(
243            str_to_bytea(r"De\\000dBeEf").unwrap().as_ref().to_text(),
244            r"\x44655c3030306442654566"
245        );
246
247        assert_eq!(str_to_bytea(r"\123").unwrap().as_ref().to_text(), r"\x53");
248        assert_eq!(str_to_bytea(r"\\").unwrap().as_ref().to_text(), r"\x5c");
249        assert_eq!(
250            str_to_bytea(r"123").unwrap().as_ref().to_text(),
251            r"\x313233"
252        );
253        assert_eq!(
254            str_to_bytea(r"\\123").unwrap().as_ref().to_text(),
255            r"\x5c313233"
256        );
257
258        assert!(str_to_bytea(r"\1").is_err());
259        assert!(str_to_bytea(r"\12").is_err());
260        assert!(str_to_bytea(r"\400").is_err());
261        assert!(str_to_bytea(r"\378").is_err());
262        assert!(str_to_bytea(r"\387").is_err());
263        assert!(str_to_bytea(r"\377").is_ok());
264    }
265}