risingwave_common/cast/
mod.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use itertools::Itertools;
16use thiserror_ext::AsReport;
17
18use crate::types::{Timestamp, Timestamptz};
19
20type Result<T> = std::result::Result<T, String>;
21
22pub const PARSE_ERROR_STR_TO_BYTEA: &str = "Invalid Bytea syntax";
23
24/// Parse a string into a bool.
25///
26/// See `https://www.postgresql.org/docs/9.5/datatype-boolean.html`
27pub fn str_to_bool(input: &str) -> Result<bool> {
28    /// String literals for bool type.
29    const TRUE_BOOL_LITERALS: [&str; 9] = ["true", "tru", "tr", "t", "on", "1", "yes", "ye", "y"];
30    const FALSE_BOOL_LITERALS: [&str; 10] = [
31        "false", "fals", "fal", "fa", "f", "off", "of", "0", "no", "n",
32    ];
33
34    let trimmed_input = input.trim();
35    if TRUE_BOOL_LITERALS
36        .iter()
37        .any(|s| s.eq_ignore_ascii_case(trimmed_input))
38    {
39        Ok(true)
40    } else if FALSE_BOOL_LITERALS
41        .iter()
42        .any(|s| trimmed_input.eq_ignore_ascii_case(s))
43    {
44        Ok(false)
45    } else {
46        Err("Invalid bool".into())
47    }
48}
49
50/// Converts UNIX epoch time to timestamptz.
51///
52/// The input UNIX epoch time is interpreted as follows:
53///
54/// - [0, 1e11) are assumed to be in seconds.
55/// - [1e11, 1e14) are assumed to be in milliseconds.
56/// - [1e14, 1e17) are assumed to be in microseconds.
57/// - [1e17, upper) are assumed to be in nanoseconds.
58///
59/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
60#[inline]
61pub fn i64_to_timestamptz(t: i64) -> Result<Timestamptz> {
62    const E11: u64 = 100_000_000_000;
63    const E14: u64 = 100_000_000_000_000;
64    const E17: u64 = 100_000_000_000_000_000;
65    match t.abs_diff(0) {
66        0..E11 => Ok(Timestamptz::from_secs(t).unwrap()), // s
67        E11..E14 => Ok(Timestamptz::from_millis(t).unwrap()), // ms
68        E14..E17 => Ok(Timestamptz::from_micros(t)),      // us
69        E17.. => Ok(Timestamptz::from_micros(t / 1000)),  // ns
70    }
71}
72
73/// Converts UNIX epoch time to timestamp.
74///
75/// The input UNIX epoch time is interpreted as follows:
76///
77/// - [0, 1e11) are assumed to be in seconds.
78/// - [1e11, 1e14) are assumed to be in milliseconds.
79/// - [1e14, 1e17) are assumed to be in microseconds.
80/// - [1e17, upper) are assumed to be in nanoseconds.
81///
82/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
83///
84/// # Example
85/// ```
86/// # use risingwave_common::cast::i64_to_timestamp;
87/// assert_eq!(
88///     i64_to_timestamp(1_666_666_666).unwrap().to_string(),
89///     "2022-10-25 02:57:46"
90/// );
91/// assert_eq!(
92///     i64_to_timestamp(1_666_666_666_666).unwrap().to_string(),
93///     "2022-10-25 02:57:46.666"
94/// );
95/// assert_eq!(
96///     i64_to_timestamp(1_666_666_666_666_666).unwrap().to_string(),
97///     "2022-10-25 02:57:46.666666"
98/// );
99/// assert_eq!(
100///     i64_to_timestamp(1_666_666_666_666_666_666)
101///         .unwrap()
102///         .to_string(),
103///     // note that we only support microseconds precision
104///     "2022-10-25 02:57:46.666666"
105/// );
106/// ```
107#[inline]
108pub fn i64_to_timestamp(t: i64) -> Result<Timestamp> {
109    let tz = i64_to_timestamptz(t)?;
110    Ok(Timestamp::from_timestamp_uncheck(
111        tz.timestamp(),
112        tz.timestamp_subsec_nanos(),
113    ))
114}
115
116/// Refer to PostgreSQL's implementation <https://github.com/postgres/postgres/blob/5cb54fc310fb84287cbdc74533f3420490a2f63a/src/backend/utils/adt/varlena.c#L276-L288>
117pub fn str_to_bytea(elem: &str, writer: &mut impl std::io::Write) -> Result<()> {
118    if let Some(remainder) = elem.strip_prefix(r"\x") {
119        Ok(parse_bytes_hex(remainder, writer)?)
120    } else {
121        Ok(parse_bytes_traditional(elem, writer)?)
122    }
123}
124
125/// Ref: <https://docs.rs/hex/0.4.3/src/hex/lib.rs.html#175-185>
126fn get_hex(c: u8) -> Result<u8> {
127    match c {
128        b'A'..=b'F' => Ok(c - b'A' + 10),
129        b'a'..=b'f' => Ok(c - b'a' + 10),
130        b'0'..=b'9' => Ok(c - b'0'),
131        _ => Err(format!("invalid hexadecimal digit: \"{}\"", c as char)),
132    }
133}
134
135/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
136pub fn parse_bytes_hex(s: &str, writer: &mut impl std::io::Write) -> Result<()> {
137    let mut bytes = s.bytes();
138    while let Some(c) = bytes.next() {
139        // white spaces are tolerated
140        if c == b' ' || c == b'\n' || c == b'\t' || c == b'\r' {
141            continue;
142        }
143        let v1 = get_hex(c)?;
144
145        match bytes.next() {
146            Some(c) => {
147                let v2 = get_hex(c)?;
148                writer
149                    .write_all(&[(v1 << 4) | v2])
150                    .map_err(|e| e.to_report_string())?;
151            }
152            None => return Err("invalid hexadecimal data: odd number of digits".to_owned()),
153        }
154    }
155
156    Ok(())
157}
158
159/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
160pub fn parse_bytes_traditional(s: &str, writer: &mut impl std::io::Write) -> Result<()> {
161    let mut bytes = s.bytes();
162
163    while let Some(b) = bytes.next() {
164        if b != b'\\' {
165            writer.write_all(&[b]).map_err(|e| e.to_report_string())?;
166        } else {
167            match bytes.next() {
168                Some(b'\\') => {
169                    writer.write_all(b"\\").map_err(|e| e.to_report_string())?;
170                }
171                Some(b1 @ b'0'..=b'3') => match bytes.next_tuple() {
172                    Some((b2 @ b'0'..=b'7', b3 @ b'0'..=b'7')) => {
173                        let byte = (b1 - b'0') << 6 | (b2 - b'0') << 3 | (b3 - b'0');
174                        writer
175                            .write_all(&[byte])
176                            .map_err(|e| e.to_report_string())?;
177                    }
178                    _ => {
179                        // one backslash, not followed by another or ### valid octal
180                        return Err("invalid input syntax for type bytea".to_owned());
181                    }
182                },
183                _ => {
184                    // one backslash, not followed by another or ### valid octal
185                    return Err("invalid input syntax for type bytea".to_owned());
186                }
187            }
188        }
189    }
190
191    Ok(())
192}
193
194#[cfg(test)]
195mod tests {
196    use chrono::{DateTime, Utc};
197
198    use super::*;
199
200    #[test]
201    fn test_negative_int_to_timestamptz() {
202        let x = i64_to_timestamptz(-2208988800000000000)
203            .unwrap()
204            .to_datetime_utc();
205        let ans: DateTime<Utc> = "1900-01-01T00:00:00Z".parse().unwrap();
206        assert_eq!(x, ans);
207    }
208
209    #[test]
210    fn test_bytea() {
211        use crate::types::ToText;
212
213        fn str_to_bytea(s: &str) -> Result<Box<[u8]>> {
214            let mut buf = Vec::new();
215            super::str_to_bytea(s, &mut buf)?;
216            Ok(buf.into())
217        }
218
219        assert_eq!(str_to_bytea("fgo").unwrap().as_ref().to_text(), r"\x66676f");
220        assert_eq!(
221            str_to_bytea(r"\xDeadBeef").unwrap().as_ref().to_text(),
222            r"\xdeadbeef"
223        );
224        assert_eq!(
225            str_to_bytea("12CD").unwrap().as_ref().to_text(),
226            r"\x31324344"
227        );
228        assert_eq!(
229            str_to_bytea("1234").unwrap().as_ref().to_text(),
230            r"\x31323334"
231        );
232        assert_eq!(
233            str_to_bytea(r"\x12CD").unwrap().as_ref().to_text(),
234            r"\x12cd"
235        );
236        assert_eq!(
237            str_to_bytea(r"\x De Ad Be Ef ").unwrap().as_ref().to_text(),
238            r"\xdeadbeef"
239        );
240        assert_eq!(
241            str_to_bytea("x De Ad Be Ef ").unwrap().as_ref().to_text(),
242            r"\x7820446520416420426520456620"
243        );
244        assert_eq!(
245            str_to_bytea(r"De\\123dBeEf").unwrap().as_ref().to_text(),
246            r"\x44655c3132336442654566"
247        );
248        assert_eq!(
249            str_to_bytea(r"De\123dBeEf").unwrap().as_ref().to_text(),
250            r"\x4465536442654566"
251        );
252        assert_eq!(
253            str_to_bytea(r"De\\000dBeEf").unwrap().as_ref().to_text(),
254            r"\x44655c3030306442654566"
255        );
256
257        assert_eq!(str_to_bytea(r"\123").unwrap().as_ref().to_text(), r"\x53");
258        assert_eq!(str_to_bytea(r"\\").unwrap().as_ref().to_text(), r"\x5c");
259        assert_eq!(
260            str_to_bytea(r"123").unwrap().as_ref().to_text(),
261            r"\x313233"
262        );
263        assert_eq!(
264            str_to_bytea(r"\\123").unwrap().as_ref().to_text(),
265            r"\x5c313233"
266        );
267
268        assert!(str_to_bytea(r"\1").is_err());
269        assert!(str_to_bytea(r"\12").is_err());
270        assert!(str_to_bytea(r"\400").is_err());
271        assert!(str_to_bytea(r"\378").is_err());
272        assert!(str_to_bytea(r"\387").is_err());
273        assert!(str_to_bytea(r"\377").is_ok());
274    }
275}