risingwave_expr_impl/scalar/
similar_to_escape.rs1use std::fmt::Write;
16
17use risingwave_expr::{ExprError, Result, function};
18
19fn similar_escape_internal(
23 pat: &str,
24 esc_text: Option<char>,
25 writer: &mut impl Write,
26) -> std::result::Result<(), ExprError> {
27 macro_rules! write_ {
28 ($s:expr) => {
29 write!(writer, "{}", $s).unwrap()
30 };
31 }
32
33 write_!("^(?:");
34
35 let mut nquotes = 0;
36 let mut afterescape = false;
37 let mut incharclass = false;
38
39 for chr in pat.chars() {
40 match chr {
41 c if afterescape => {
42 if c == '"' && !incharclass {
43 match nquotes {
44 0 => write_!("){1,1}?("),
45 1 => write_!("){1,1}(?:"),
46 _ => {
47 return Err(ExprError::InvalidParam {
48 name: "pat",
49 reason: "SQL regular expression may not contain more than two escape-double-quote separators".into()
50 });
51 }
52 }
53 nquotes += 1;
54 } else {
55 write_!('\\');
56 write_!(c);
57 }
58
59 afterescape = false;
60 }
61 c if esc_text.is_some_and(|t| t == c) => {
62 afterescape = true;
63 }
64 c if incharclass => {
65 if c == '\\' {
66 write_!('\\');
67 }
68 write_!(c);
69
70 if c == ']' {
71 incharclass = false;
72 }
73 }
74 c @ '[' => {
75 write_!(c);
76 incharclass = true;
77 }
78 '%' => {
79 write_!(".*");
80 }
81 '_' => {
82 write_!('.');
83 }
84 '(' => {
85 write_!("(?:");
87 }
88 c @ ('\\' | '.' | '^' | '$') => {
89 write_!('\\');
90 write_!(c);
91 }
92 c => {
93 write_!(c);
94 }
95 }
96 }
97
98 write_!(")$");
99
100 Ok(())
101}
102
103#[function(
104 "similar_to_escape(varchar) -> varchar",
106)]
107fn similar_to_escape_default(pat: &str, writer: &mut impl Write) -> Result<()> {
108 similar_escape_internal(pat, Some('\\'), writer)
109}
110
111#[function(
112 "similar_to_escape(varchar, varchar) -> varchar"
114)]
115fn similar_to_escape_with_escape_text(
116 pat: &str,
117 esc_text: &str,
118 writer: &mut impl Write,
119) -> Result<()> {
120 if esc_text.chars().nth(1).is_some() {
121 return Err(ExprError::InvalidParam {
122 name: "escape string",
123 reason: format!(
124 "Invalid escape string: `{}`, must be empty or one character",
125 esc_text
126 )
127 .into(),
128 });
129 }
130
131 similar_escape_internal(pat, esc_text.chars().next(), writer)
132}
133
134#[cfg(test)]
135mod tests {
136 use super::{similar_to_escape_default, similar_to_escape_with_escape_text};
137
138 #[test]
139 fn test_default_escape() {
140 let cases = vec![
141 ("", "^(?:)$"),
142 ("_bcd%", r#"^(?:.bcd.*)$"#),
143 ("bcd%", r#"^(?:bcd.*)$"#),
144 (r#"_bcd\%"#, r#"^(?:.bcd\%)$"#),
145 ("bcd[]ee", "^(?:bcd[]ee)$"),
146 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
147 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
148 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
149 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
150 (r#"%\"o_b\"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
151 ];
152
153 for (pat, escaped) in cases {
154 let mut writer = String::new();
155 similar_to_escape_default(pat, &mut writer).ok();
156 assert_eq!(writer, escaped);
157 }
158
159 let pat = r#"one\"two\"three\"four"#;
162 let mut writer = String::new();
163 let res = similar_to_escape_default(pat, &mut writer);
164 assert!(res.is_err());
165 }
166
167 #[test]
168 fn test_escape_with_escape_text() {
169 let cases = vec![
170 ("", "^(?:)$"),
171 ("_bcd%", "^(?:.bcd.*)$"),
172 ("bcd%", "^(?:bcd.*)$"),
173 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
174 ("bcd[]ee", "^(?:bcd[]ee)$"),
175 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
176 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
177 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
178 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
179 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
180 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
181 (r#"%#"o_b#"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
182 ];
183
184 for (pat, escaped) in cases {
185 let mut writer = String::new();
186 similar_to_escape_with_escape_text(pat, "#", &mut writer).ok();
187 assert_eq!(writer, escaped);
188 }
189
190 let pat = "xxx";
191 let mut writer = String::new();
192 let res = similar_to_escape_with_escape_text(pat, "##", &mut writer);
193 assert!(res.is_err())
194 }
195
196 #[test]
197 fn test_escape_with_escape_unicode() {
198 let cases = vec![
199 ("", "^(?:)$"),
200 ("_bcd%", "^(?:.bcd.*)$"),
201 ("bcd%", "^(?:bcd.*)$"),
202 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
203 ("bcd[]ee", "^(?:bcd[]ee)$"),
204 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
205 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
206 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
207 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
208 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
209 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
210 (r#"%💅"o_b💅"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
211 ];
212
213 for (pat, escaped) in cases {
214 let mut writer = String::new();
215 similar_to_escape_with_escape_text(pat, "💅", &mut writer).ok();
216 assert_eq!(writer, escaped);
217 }
218
219 let pat = "xxx";
220 let mut writer = String::new();
221 let res = similar_to_escape_with_escape_text(pat, "💅💅", &mut writer);
222 assert!(res.is_err())
223 }
224
225 #[test]
226 fn test_escape_with_escape_disabled() {
227 let cases = vec![
228 ("", "^(?:)$"),
229 ("_bcd%", "^(?:.bcd.*)$"),
230 ("bcd%", "^(?:bcd.*)$"),
231 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
232 ("bcd[]ee", "^(?:bcd[]ee)$"),
233 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
234 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
235 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
236 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
237 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
238 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
239 (r#"%\"o_b\"%"#, r#"^(?:.*\\"o.b\\".*)$"#),
240 ];
241
242 for (pat, escaped) in cases {
243 let mut writer = String::new();
244 similar_to_escape_with_escape_text(pat, "", &mut writer).ok();
245 assert_eq!(writer, escaped);
246 }
247 }
248}