risingwave_expr/scalar/
like.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use const_currying::const_currying;
16use itertools::Itertools;
17use risingwave_expr::function;
18
19use crate::{ExprError, Result};
20
21#[const_currying]
22fn like_impl<const CASE_INSENSITIVE: bool>(
23    s: &str,
24    p: &str,
25    #[maybe_const(consts = [b'\\'])] escape: u8,
26) -> bool {
27    let (mut px, mut sx) = (0, 0);
28    let (mut next_px, mut next_sx) = (0, 0);
29    let (pbytes, sbytes) = (p.as_bytes(), s.as_bytes());
30    while px < pbytes.len() || sx < sbytes.len() {
31        if px < pbytes.len() {
32            let c = pbytes[px];
33            match c {
34                b'_' => {
35                    if escape == b'_' {
36                        if px > 0 && pbytes[px - 1] != escape {
37                            px += 1;
38                            continue;
39                        }
40                    }
41                    if sx < sbytes.len() {
42                        px += 1;
43                        sx += 1;
44                        continue;
45                    }
46                }
47                b'%' => {
48                    next_px = px;
49                    next_sx = sx + 1;
50                    px += 1;
51                    continue;
52                }
53                mut pc => {
54                    if ((!CASE_INSENSITIVE && pc == escape)
55                        || (CASE_INSENSITIVE && pc.eq_ignore_ascii_case(&escape)))
56                        && px + 1 < pbytes.len()
57                    {
58                        px += 1;
59                        pc = pbytes[px];
60                    }
61                    if sx < sbytes.len()
62                        && ((!CASE_INSENSITIVE && sbytes[sx] == pc)
63                            || (CASE_INSENSITIVE && sbytes[sx].eq_ignore_ascii_case(&pc)))
64                    {
65                        px += 1;
66                        sx += 1;
67                        continue;
68                    }
69                }
70            }
71        }
72        if 0 < next_sx && next_sx <= sbytes.len() {
73            px = next_px;
74            sx = next_sx;
75            continue;
76        }
77        return false;
78    }
79    true
80}
81
82#[function("like(varchar, varchar) -> boolean")]
83pub fn like_default(s: &str, p: &str) -> bool {
84    like_impl_escape::<false, b'\\'>(s, p)
85}
86
87#[function("i_like(varchar, varchar) -> boolean")]
88pub fn i_like_default(s: &str, p: &str) -> bool {
89    like_impl_escape::<true, b'\\'>(s, p)
90}
91
92#[function(
93    "like(varchar, varchar, varchar) -> boolean",
94    prebuild = "EscapeChar::from_str($2)?"
95)]
96fn like(s: &str, p: &str, escape: &EscapeChar) -> bool {
97    like_impl::<false>(s, p, escape.0)
98}
99
100// TODO: We should support '' as escape character.
101// TODO: We should support any UTF-8 character as escape character.
102#[derive(Copy, Clone, Debug)]
103struct EscapeChar(u8);
104
105impl EscapeChar {
106    fn from_str(escape: &str) -> Result<Self> {
107        escape
108            .chars()
109            .exactly_one()
110            .ok()
111            .and_then(|c| c.as_ascii().map(|c| c.to_u8()))
112            .ok_or_else(|| ExprError::InvalidParam {
113                name: "escape",
114                reason: "only single ascii character is supported now".into(),
115            })
116            .map(Self)
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use risingwave_expr::scalar::like::EscapeChar;
123
124    use super::{i_like_default, like, like_default};
125
126    static CASES: &[(&str, &str, bool, bool)] = &[
127        (r#"ABCDE"#, r#"%abcde%"#, false, false),
128        (r#"Like, expression"#, r#"Like, expression"#, false, true),
129        (r#"Like, expression"#, r#"Like, %"#, false, true),
130        (r#"Like, expression"#, r#"%, expression"#, false, true),
131        (r#"like"#, r#"li%ke"#, false, true),
132        (r#"like"#, r#"l%ik%e"#, false, true),
133        (r#"like"#, r#"%like%"#, false, true),
134        (r#"like"#, r#"l%i%k%e%"#, false, true),
135        (r#"like"#, r#"_%_e"#, false, true),
136        (r#"like"#, r#"l%__"#, false, true),
137        (r#"like"#, r#"_%_%_%_"#, false, true),
138        (r#"abctest"#, r#"__test"#, false, false),
139        (r#"abctest"#, r#"%_test"#, false, true),
140        (r#"aaaaabbb"#, r#"a%a%a%a%a%a%b"#, false, false),
141        (
142            r#"blush thistle blue yellow saddle"#,
143            r#"%yellow%"#,
144            false,
145            true,
146        ),
147        (r#"ABC_123"#, r#"ABC_123"#, false, true),
148        (r#"ABCD123"#, r#"ABC_123"#, false, true),
149        (r#"ABC_123"#, r"ABC\_123", false, true),
150        (r#"ABCD123"#, r"ABC\_123", false, false),
151        (r"ABC\123", r#"ABC_123"#, false, true),
152        (r"ABC\123", r"ABC\\123", false, true),
153        (r"ABC\123", r"ABC\123", false, false),
154        ("apple", r#"App%"#, true, true),
155        ("banana", r#"B%nana"#, true, true),
156        ("apple", r#"B%nana"#, true, false),
157        ("grape", "Gr_P_", true, true),
158    ];
159
160    #[test]
161    fn test_like() {
162        for (target, pattern, case_insensitive, expected) in CASES {
163            let output = if *case_insensitive {
164                i_like_default(target, pattern)
165            } else {
166                like_default(target, pattern)
167            };
168            assert_eq!(
169                output, *expected,
170                "target={}, pattern={}, case_insensitive={}",
171                target, pattern, case_insensitive
172            );
173        }
174    }
175
176    static ESCAPE_CASES: &[(&str, &str, &str, bool)] = &[
177        (r"bear", r"b_ear", r"_", true),
178        (r"be_r", r"b_e__r", r"_", true),
179        (r"be__r", r"b_e___r", r"_", true),
180        (r"be___r", r"b_e____r", r"_", true),
181        (r"be_r", r"__e__r", r"_", false),
182        // TODO: Wrong behavior
183        (r"___r", r"____r", r"_", false),
184    ];
185
186    #[test]
187    fn test_escape_like() {
188        for (target, pattern, escape, expected) in ESCAPE_CASES {
189            let output = like(target, pattern, &EscapeChar::from_str(escape).unwrap());
190            assert_eq!(
191                output, *expected,
192                "target={}, pattern={}, escape={}",
193                target, pattern, escape
194            );
195        }
196    }
197}