risingwave_common/cast/
mod.rs

1// Copyright 2023 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use chrono::NaiveDateTime;
16use itertools::Itertools;
17use thiserror_ext::AsReport;
18
19use crate::types::{Timestamp, Timestamptz};
20
21type Result<T> = std::result::Result<T, String>;
22
23pub const PARSE_ERROR_STR_TO_BYTEA: &str = "Invalid Bytea syntax";
24
25/// Parse a string into a bool.
26///
27/// See `https://www.postgresql.org/docs/9.5/datatype-boolean.html`
28pub fn str_to_bool(input: &str) -> Result<bool> {
29    /// String literals for bool type.
30    const TRUE_BOOL_LITERALS: [&str; 9] = ["true", "tru", "tr", "t", "on", "1", "yes", "ye", "y"];
31    const FALSE_BOOL_LITERALS: [&str; 10] = [
32        "false", "fals", "fal", "fa", "f", "off", "of", "0", "no", "n",
33    ];
34
35    let trimmed_input = input.trim();
36    if TRUE_BOOL_LITERALS
37        .iter()
38        .any(|s| s.eq_ignore_ascii_case(trimmed_input))
39    {
40        Ok(true)
41    } else if FALSE_BOOL_LITERALS
42        .iter()
43        .any(|s| trimmed_input.eq_ignore_ascii_case(s))
44    {
45        Ok(false)
46    } else {
47        Err("Invalid bool".into())
48    }
49}
50
51/// Converts UNIX epoch time to timestamptz.
52///
53/// The input UNIX epoch time is interpreted as follows:
54///
55/// - [0, 1e11) are assumed to be in seconds.
56/// - [1e11, 1e14) are assumed to be in milliseconds.
57/// - [1e14, 1e17) are assumed to be in microseconds.
58/// - [1e17, upper) are assumed to be in nanoseconds.
59///
60/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
61#[inline]
62pub fn i64_to_timestamptz(t: i64) -> Result<Timestamptz> {
63    const E11: u64 = 100_000_000_000;
64    const E14: u64 = 100_000_000_000_000;
65    const E17: u64 = 100_000_000_000_000_000;
66    match t.abs_diff(0) {
67        0..E11 => Ok(Timestamptz::from_secs(t).unwrap()), // s
68        E11..E14 => Ok(Timestamptz::from_millis(t).unwrap()), // ms
69        E14..E17 => Ok(Timestamptz::from_micros(t)),      // us
70        E17.. => Ok(Timestamptz::from_micros(t / 1000)),  // ns
71    }
72}
73
74/// Converts UNIX epoch time to timestamp.
75///
76/// The input UNIX epoch time is interpreted as follows:
77///
78/// - [0, 1e11) are assumed to be in seconds.
79/// - [1e11, 1e14) are assumed to be in milliseconds.
80/// - [1e14, 1e17) are assumed to be in microseconds.
81/// - [1e17, upper) are assumed to be in nanoseconds.
82///
83/// This would cause no problem for timestamp in [1973-03-03 09:46:40, 5138-11-16 09:46:40).
84///
85/// # Example
86/// ```
87/// # use risingwave_common::cast::i64_to_timestamp;
88/// assert_eq!(
89///     i64_to_timestamp(1_666_666_666).unwrap().to_string(),
90///     "2022-10-25 02:57:46"
91/// );
92/// assert_eq!(
93///     i64_to_timestamp(1_666_666_666_666).unwrap().to_string(),
94///     "2022-10-25 02:57:46.666"
95/// );
96/// assert_eq!(
97///     i64_to_timestamp(1_666_666_666_666_666).unwrap().to_string(),
98///     "2022-10-25 02:57:46.666666"
99/// );
100/// assert_eq!(
101///     i64_to_timestamp(1_666_666_666_666_666_666)
102///         .unwrap()
103///         .to_string(),
104///     // note that we only support microseconds precision
105///     "2022-10-25 02:57:46.666666"
106/// );
107/// ```
108#[inline]
109pub fn i64_to_timestamp(t: i64) -> Result<Timestamp> {
110    let tz = i64_to_timestamptz(t)?;
111    Ok(Timestamp::from_timestamp_uncheck(
112        tz.timestamp(),
113        tz.timestamp_subsec_nanos(),
114    ))
115}
116
117/// Refer to PostgreSQL's implementation <https://github.com/postgres/postgres/blob/5cb54fc310fb84287cbdc74533f3420490a2f63a/src/backend/utils/adt/varlena.c#L276-L288>
118pub fn str_to_bytea(elem: &str, writer: &mut impl std::io::Write) -> Result<()> {
119    if let Some(remainder) = elem.strip_prefix(r"\x") {
120        Ok(parse_bytes_hex(remainder, writer)?)
121    } else {
122        Ok(parse_bytes_traditional(elem, writer)?)
123    }
124}
125
126/// Ref: <https://docs.rs/hex/0.4.3/src/hex/lib.rs.html#175-185>
127fn get_hex(c: u8) -> Result<u8> {
128    match c {
129        b'A'..=b'F' => Ok(c - b'A' + 10),
130        b'a'..=b'f' => Ok(c - b'a' + 10),
131        b'0'..=b'9' => Ok(c - b'0'),
132        _ => Err(format!("invalid hexadecimal digit: \"{}\"", c as char)),
133    }
134}
135
136/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
137pub fn parse_bytes_hex(s: &str, writer: &mut impl std::io::Write) -> Result<()> {
138    let mut bytes = s.bytes();
139    while let Some(c) = bytes.next() {
140        // white spaces are tolerated
141        if c == b' ' || c == b'\n' || c == b'\t' || c == b'\r' {
142            continue;
143        }
144        let v1 = get_hex(c)?;
145
146        match bytes.next() {
147            Some(c) => {
148                let v2 = get_hex(c)?;
149                writer
150                    .write_all(&[(v1 << 4) | v2])
151                    .map_err(|e| e.to_report_string())?;
152            }
153            None => return Err("invalid hexadecimal data: odd number of digits".to_owned()),
154        }
155    }
156
157    Ok(())
158}
159
160/// Refer to <https://www.postgresql.org/docs/current/datatype-binary.html#id-1.5.7.12.10> for specification.
161pub fn parse_bytes_traditional(s: &str, writer: &mut impl std::io::Write) -> Result<()> {
162    let mut bytes = s.bytes();
163
164    while let Some(b) = bytes.next() {
165        if b != b'\\' {
166            writer.write_all(&[b]).map_err(|e| e.to_report_string())?;
167        } else {
168            match bytes.next() {
169                Some(b'\\') => {
170                    writer.write_all(b"\\").map_err(|e| e.to_report_string())?;
171                }
172                Some(b1 @ b'0'..=b'3') => match bytes.next_tuple() {
173                    Some((b2 @ b'0'..=b'7', b3 @ b'0'..=b'7')) => {
174                        let byte = (b1 - b'0') << 6 | (b2 - b'0') << 3 | (b3 - b'0');
175                        writer
176                            .write_all(&[byte])
177                            .map_err(|e| e.to_report_string())?;
178                    }
179                    _ => {
180                        // one backslash, not followed by another or ### valid octal
181                        return Err("invalid input syntax for type bytea".to_owned());
182                    }
183                },
184                _ => {
185                    // one backslash, not followed by another or ### valid octal
186                    return Err("invalid input syntax for type bytea".to_owned());
187                }
188            }
189        }
190    }
191
192    Ok(())
193}
194
195pub fn datetime_to_timestamp_millis(datetime: NaiveDateTime) -> i64 {
196    datetime.and_utc().timestamp_millis()
197}
198
199#[cfg(test)]
200mod tests {
201    use chrono::{DateTime, Utc};
202
203    use super::*;
204
205    #[test]
206    fn test_negative_int_to_timestamptz() {
207        let x = i64_to_timestamptz(-2208988800000000000)
208            .unwrap()
209            .to_datetime_utc();
210        let ans: DateTime<Utc> = "1900-01-01T00:00:00Z".parse().unwrap();
211        assert_eq!(x, ans);
212    }
213
214    #[test]
215    fn test_bytea() {
216        use crate::types::ToText;
217
218        fn str_to_bytea(s: &str) -> Result<Box<[u8]>> {
219            let mut buf = Vec::new();
220            super::str_to_bytea(s, &mut buf)?;
221            Ok(buf.into())
222        }
223
224        assert_eq!(str_to_bytea("fgo").unwrap().as_ref().to_text(), r"\x66676f");
225        assert_eq!(
226            str_to_bytea(r"\xDeadBeef").unwrap().as_ref().to_text(),
227            r"\xdeadbeef"
228        );
229        assert_eq!(
230            str_to_bytea("12CD").unwrap().as_ref().to_text(),
231            r"\x31324344"
232        );
233        assert_eq!(
234            str_to_bytea("1234").unwrap().as_ref().to_text(),
235            r"\x31323334"
236        );
237        assert_eq!(
238            str_to_bytea(r"\x12CD").unwrap().as_ref().to_text(),
239            r"\x12cd"
240        );
241        assert_eq!(
242            str_to_bytea(r"\x De Ad Be Ef ").unwrap().as_ref().to_text(),
243            r"\xdeadbeef"
244        );
245        assert_eq!(
246            str_to_bytea("x De Ad Be Ef ").unwrap().as_ref().to_text(),
247            r"\x7820446520416420426520456620"
248        );
249        assert_eq!(
250            str_to_bytea(r"De\\123dBeEf").unwrap().as_ref().to_text(),
251            r"\x44655c3132336442654566"
252        );
253        assert_eq!(
254            str_to_bytea(r"De\123dBeEf").unwrap().as_ref().to_text(),
255            r"\x4465536442654566"
256        );
257        assert_eq!(
258            str_to_bytea(r"De\\000dBeEf").unwrap().as_ref().to_text(),
259            r"\x44655c3030306442654566"
260        );
261
262        assert_eq!(str_to_bytea(r"\123").unwrap().as_ref().to_text(), r"\x53");
263        assert_eq!(str_to_bytea(r"\\").unwrap().as_ref().to_text(), r"\x5c");
264        assert_eq!(
265            str_to_bytea(r"123").unwrap().as_ref().to_text(),
266            r"\x313233"
267        );
268        assert_eq!(
269            str_to_bytea(r"\\123").unwrap().as_ref().to_text(),
270            r"\x5c313233"
271        );
272
273        assert!(str_to_bytea(r"\1").is_err());
274        assert!(str_to_bytea(r"\12").is_err());
275        assert!(str_to_bytea(r"\400").is_err());
276        assert!(str_to_bytea(r"\378").is_err());
277        assert!(str_to_bytea(r"\387").is_err());
278        assert!(str_to_bytea(r"\377").is_ok());
279    }
280}