risingwave_expr_impl/scalar/
similar_to_escape.rs1use risingwave_expr::{ExprError, Result, function};
16
17fn similar_escape_internal(
21 pat: &str,
22 esc_text: Option<char>,
23 writer: &mut impl std::fmt::Write,
24) -> std::result::Result<(), ExprError> {
25 macro_rules! write_ {
26 ($s:expr) => {
27 write!(writer, "{}", $s).unwrap()
28 };
29 }
30
31 write_!("^(?:");
32
33 let mut nquotes = 0;
34 let mut afterescape = false;
35 let mut incharclass = false;
36
37 for chr in pat.chars() {
38 match chr {
39 c if afterescape => {
40 if c == '"' && !incharclass {
41 match nquotes {
42 0 => write_!("){1,1}?("),
43 1 => write_!("){1,1}(?:"),
44 _ => {
45 return Err(ExprError::InvalidParam {
46 name: "pat",
47 reason: "SQL regular expression may not contain more than two escape-double-quote separators".into()
48 });
49 }
50 }
51 nquotes += 1;
52 } else {
53 write_!('\\');
54 write_!(c);
55 }
56
57 afterescape = false;
58 }
59 c if esc_text.is_some_and(|t| t == c) => {
60 afterescape = true;
61 }
62 c if incharclass => {
63 if c == '\\' {
64 write_!('\\');
65 }
66 write_!(c);
67
68 if c == ']' {
69 incharclass = false;
70 }
71 }
72 c @ '[' => {
73 write_!(c);
74 incharclass = true;
75 }
76 '%' => {
77 write_!(".*");
78 }
79 '_' => {
80 write_!('.');
81 }
82 '(' => {
83 write_!("(?:");
85 }
86 c @ ('\\' | '.' | '^' | '$') => {
87 write_!('\\');
88 write_!(c);
89 }
90 c => {
91 write_!(c);
92 }
93 }
94 }
95
96 write_!(")$");
97
98 Ok(())
99}
100
101#[function(
102 "similar_to_escape(varchar) -> varchar",
104)]
105fn similar_to_escape_default(pat: &str, writer: &mut impl std::fmt::Write) -> Result<()> {
106 similar_escape_internal(pat, Some('\\'), writer)
107}
108
109#[function(
110 "similar_to_escape(varchar, varchar) -> varchar"
112)]
113fn similar_to_escape_with_escape_text(
114 pat: &str,
115 esc_text: &str,
116 writer: &mut impl std::fmt::Write,
117) -> Result<()> {
118 if esc_text.chars().nth(1).is_some() {
119 return Err(ExprError::InvalidParam {
120 name: "escape string",
121 reason: format!(
122 "Invalid escape string: `{}`, must be empty or one character",
123 esc_text
124 )
125 .into(),
126 });
127 }
128
129 similar_escape_internal(pat, esc_text.chars().next(), writer)
130}
131
132#[cfg(test)]
133mod tests {
134 use super::{similar_to_escape_default, similar_to_escape_with_escape_text};
135
136 #[test]
137 fn test_default_escape() {
138 let cases = vec![
139 ("", "^(?:)$"),
140 ("_bcd%", r#"^(?:.bcd.*)$"#),
141 ("bcd%", r#"^(?:bcd.*)$"#),
142 (r#"_bcd\%"#, r#"^(?:.bcd\%)$"#),
143 ("bcd[]ee", "^(?:bcd[]ee)$"),
144 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
145 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
146 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
147 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
148 (r#"%\"o_b\"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
149 ];
150
151 for (pat, escaped) in cases {
152 let mut writer = String::new();
153 similar_to_escape_default(pat, &mut writer).ok();
154 assert_eq!(writer, escaped);
155 }
156
157 let pat = r#"one\"two\"three\"four"#;
160 let mut writer = String::new();
161 let res = similar_to_escape_default(pat, &mut writer);
162 assert!(res.is_err());
163 }
164
165 #[test]
166 fn test_escape_with_escape_text() {
167 let cases = vec![
168 ("", "^(?:)$"),
169 ("_bcd%", "^(?:.bcd.*)$"),
170 ("bcd%", "^(?:bcd.*)$"),
171 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
172 ("bcd[]ee", "^(?:bcd[]ee)$"),
173 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
174 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
175 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
176 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
177 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
178 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
179 (r#"%#"o_b#"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
180 ];
181
182 for (pat, escaped) in cases {
183 let mut writer = String::new();
184 similar_to_escape_with_escape_text(pat, "#", &mut writer).ok();
185 assert_eq!(writer, escaped);
186 }
187
188 let pat = "xxx";
189 let mut writer = String::new();
190 let res = similar_to_escape_with_escape_text(pat, "##", &mut writer);
191 assert!(res.is_err())
192 }
193
194 #[test]
195 fn test_escape_with_escape_unicode() {
196 let cases = vec![
197 ("", "^(?:)$"),
198 ("_bcd%", "^(?:.bcd.*)$"),
199 ("bcd%", "^(?:bcd.*)$"),
200 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
201 ("bcd[]ee", "^(?:bcd[]ee)$"),
202 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
203 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
204 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
205 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
206 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
207 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
208 (r#"%💅"o_b💅"%"#, "^(?:.*){1,1}?(o.b){1,1}(?:.*)$"),
209 ];
210
211 for (pat, escaped) in cases {
212 let mut writer = String::new();
213 similar_to_escape_with_escape_text(pat, "💅", &mut writer).ok();
214 assert_eq!(writer, escaped);
215 }
216
217 let pat = "xxx";
218 let mut writer = String::new();
219 let res = similar_to_escape_with_escape_text(pat, "💅💅", &mut writer);
220 assert!(res.is_err())
221 }
222
223 #[test]
224 fn test_escape_with_escape_disabled() {
225 let cases = vec![
226 ("", "^(?:)$"),
227 ("_bcd%", "^(?:.bcd.*)$"),
228 ("bcd%", "^(?:bcd.*)$"),
229 (r#"_bcd\%"#, r#"^(?:.bcd\\.*)$"#),
230 ("bcd[]ee", "^(?:bcd[]ee)$"),
231 (r#"bcd[]ee"""#, r#"^(?:bcd[]ee"")$"#),
232 (r#"bcd[]"ee""#, r#"^(?:bcd[]"ee")$"#),
233 ("bcd[pp]ee", "^(?:bcd[pp]ee)$"),
234 ("bcd[pp_%.]ee", "^(?:bcd[pp_%.]ee)$"),
235 ("bcd[pp_%.]ee_%.", r#"^(?:bcd[pp_%.]ee..*\.)$"#),
236 ("bcd[pp_%.](ee_%.)", r#"^(?:bcd[pp_%.](?:ee..*\.))$"#),
237 (r#"%\"o_b\"%"#, r#"^(?:.*\\"o.b\\".*)$"#),
238 ];
239
240 for (pat, escaped) in cases {
241 let mut writer = String::new();
242 similar_to_escape_with_escape_text(pat, "", &mut writer).ok();
243 assert_eq!(writer, escaped);
244 }
245 }
246}