risingwave_expr_impl/scalar/
split_part.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Write;
16
17use risingwave_expr::{ExprError, Result, function};
18
19#[function("split_part(varchar, varchar, int4) -> varchar")]
20pub fn split_part(
21    string_expr: &str,
22    delimiter_expr: &str,
23    nth_expr: i32,
24    writer: &mut impl Write,
25) -> Result<()> {
26    if nth_expr == 0 {
27        return Err(ExprError::InvalidParam {
28            name: "data",
29            reason: "can't be zero".into(),
30        });
31    };
32
33    let mut split = string_expr.split(delimiter_expr);
34    let nth_val = if string_expr.is_empty() {
35        // postgres: return empty string for empty input string
36        Default::default()
37    } else if delimiter_expr.is_empty() {
38        // postgres: handle empty field separator
39        //           if first or last field, return input string, else empty string
40        if nth_expr == 1 || nth_expr == -1 {
41            string_expr
42        } else {
43            Default::default()
44        }
45    } else {
46        match nth_expr.cmp(&0) {
47            std::cmp::Ordering::Equal => unreachable!(),
48
49            // Since `nth_expr` can not be 0, so the `abs()` of it can not be smaller than 1
50            // (that's `abs(1)` or `abs(-1)`).  Hence the result of sub 1 can not be less than 0.
51            // postgres: if nonexistent field, return empty string
52            std::cmp::Ordering::Greater => split.nth(nth_expr as usize - 1).unwrap_or_default(),
53            std::cmp::Ordering::Less => {
54                let split = split.collect::<Vec<_>>();
55                split
56                    .iter()
57                    .rev()
58                    .nth(nth_expr.unsigned_abs() as usize - 1)
59                    .cloned()
60                    .unwrap_or_default()
61            }
62        }
63    };
64    writer.write_str(nth_val).unwrap();
65    Ok(())
66}
67
68#[cfg(test)]
69mod tests {
70    use super::split_part;
71
72    #[test]
73    fn test_split_part() {
74        let cases: Vec<(&str, &str, i32, Option<&str>)> = vec![
75            // postgres cases
76            ("", "@", 1, Some("")),
77            ("", "@", -1, Some("")),
78            ("joeuser@mydatabase", "", 1, Some("joeuser@mydatabase")),
79            ("joeuser@mydatabase", "", 2, Some("")),
80            ("joeuser@mydatabase", "", -1, Some("joeuser@mydatabase")),
81            ("joeuser@mydatabase", "", -2, Some("")),
82            ("joeuser@mydatabase", "@", 0, None),
83            ("joeuser@mydatabase", "@@", 1, Some("joeuser@mydatabase")),
84            ("joeuser@mydatabase", "@@", 2, Some("")),
85            ("joeuser@mydatabase", "@", 1, Some("joeuser")),
86            ("joeuser@mydatabase", "@", 2, Some("mydatabase")),
87            ("joeuser@mydatabase", "@", 3, Some("")),
88            ("@joeuser@mydatabase@", "@", 2, Some("joeuser")),
89            ("joeuser@mydatabase", "@", -1, Some("mydatabase")),
90            ("joeuser@mydatabase", "@", -2, Some("joeuser")),
91            ("joeuser@mydatabase", "@", -3, Some("")),
92            ("@joeuser@mydatabase@", "@", -2, Some("mydatabase")),
93            // other cases
94
95            // makes sure that `rsplit` is not used internally when `nth` is negative
96            ("@@@", "@@", -1, Some("@")),
97        ];
98
99        for (i, case @ (string_expr, delimiter_expr, nth_expr, expected)) in
100            cases.iter().enumerate()
101        {
102            let mut writer = String::new();
103            let actual = match split_part(string_expr, delimiter_expr, *nth_expr, &mut writer) {
104                Ok(_) => Some(writer.as_str()),
105                Err(_) => None,
106            };
107            assert_eq!(&actual, expected, "\nat case {i}: {:?}\n", case);
108        }
109    }
110}