risingwave_expr_impl/scalar/overlay.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Write;
16
17use risingwave_expr::{ExprError, Result, function};
18
19/// Replaces a substring of the given string with a new substring.
20///
21/// ```slt
22/// query T
23/// select overlay('αβγδεζ' placing '💯' from 3);
24/// ----
25/// αβ💯δεζ
26/// ```
27#[function("overlay(varchar, varchar, int4) -> varchar")]
28pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: &mut impl Write) -> Result<()> {
29 let sub_len = new_sub_str
30 .chars()
31 .count()
32 .try_into()
33 .map_err(|_| ExprError::NumericOutOfRange)?;
34 overlay_for(s, new_sub_str, start, sub_len, writer)
35}
36
37/// Replaces a substring of the given string with a new substring.
38///
39/// ```slt
40/// statement error not positive
41/// select overlay('αβγδεζ' placing '①②③' from 0);
42///
43/// query T
44/// select overlay('αβγδεζ' placing '①②③' from 10);
45/// ----
46/// αβγδεζ①②③
47///
48/// query T
49/// select overlay('αβγδεζ' placing '①②③' from 4 for 2);
50/// ----
51/// αβγ①②③ζ
52///
53/// query T
54/// select overlay('αβγδεζ' placing '①②③' from 4);
55/// ----
56/// αβγ①②③
57///
58/// query T
59/// select overlay('αβγδεζ' placing '①②③' from 2 for 4);
60/// ----
61/// α①②③ζ
62///
63/// query T
64/// select overlay('αβγδεζ' placing '①②③' from 2 for 7);
65/// ----
66/// α①②③
67///
68/// query T
69/// select overlay('αβγδεζ' placing '①②③' from 4 for 0);
70/// ----
71/// αβγ①②③δεζ
72///
73/// query T
74/// select overlay('αβγδεζ' placing '①②③' from 4 for -2);
75/// ----
76/// αβγ①②③βγδεζ
77///
78/// query T
79/// select overlay('αβγδεζ' placing '①②③' from 4 for -1000);
80/// ----
81/// αβγ①②③αβγδεζ
82/// ```
83#[function("overlay(varchar, varchar, int4, int4) -> varchar")]
84pub fn overlay_for(
85 s: &str,
86 new_sub_str: &str,
87 start: i32,
88 count: i32,
89 writer: &mut impl Write,
90) -> Result<()> {
91 if start <= 0 {
92 return Err(ExprError::InvalidParam {
93 name: "start",
94 reason: format!("{start} is not positive").into(),
95 });
96 }
97
98 let mut chars = s.char_indices().skip(start as usize - 1).peekable();
99
100 // write the substring before the overlay.
101 let leading = match chars.peek() {
102 Some((i, _)) => &s[..*i],
103 None => s,
104 };
105 writer.write_str(leading).unwrap();
106
107 // write the new substring.
108 writer.write_str(new_sub_str).unwrap();
109
110 let Ok(count) = count.try_into() else {
111 // For negative `count`, which is rare in practice, we hand over to `substr`
112 let start_right = start
113 .checked_add(count)
114 .ok_or(ExprError::NumericOutOfRange)?;
115 return super::substr::substr_start(s, start_right, writer);
116 };
117
118 // write the substring after the overlay.
119 if let Some((i, _)) = chars.nth(count) {
120 writer.write_str(&s[i..]).unwrap();
121 }
122
123 Ok(())
124}
125
126/// Replaces a subsequence of the given bytea with a new bytea value.
127///
128/// # Example
129///
130/// ```slt
131/// query T
132/// select overlay('\x616263646566'::bytea placing '\x9999'::bytea from 3);
133/// ----
134/// \x616299996566
135/// ```
136#[function("overlay(bytea, bytea, int4) -> bytea")]
137pub fn overlay_bytea(s: &[u8], new_sub_str: &[u8], start: i32) -> Result<Box<[u8]>> {
138 let count = new_sub_str
139 .len()
140 .try_into()
141 .map_err(|_| ExprError::NumericOutOfRange)?;
142 overlay_for_bytea(s, new_sub_str, start, count)
143}
144
145/// Replaces a range of bytes in a bytea value with another bytea.
146///
147/// ```slt
148/// statement error not positive
149/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 0);
150///
151/// query T
152/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 10);
153/// ----
154/// \x616263313233
155///
156/// query T
157/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 2);
158/// ----
159/// \x61626331323366
160///
161/// query T
162/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4);
163/// ----
164/// \x616263313233
165///
166/// query T
167/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 4);
168/// ----
169/// \x6131323366
170///
171/// query T
172/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 7);
173/// ----
174/// \x61313233
175///
176/// query T
177/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 0);
178/// ----
179/// \x616263313233646566
180///
181/// query T
182/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -2);
183/// ----
184/// \x6162633132336263646566
185///
186/// query T
187/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -1000);
188/// ----
189/// \x616263313233616263646566
190/// ```
191#[function("overlay(bytea, bytea, int4, int4) -> bytea")]
192pub fn overlay_for_bytea(
193 s: &[u8],
194 new_sub_str: &[u8],
195 start: i32,
196 count: i32,
197) -> Result<Box<[u8]>> {
198 if start <= 0 {
199 return Err(ExprError::InvalidParam {
200 name: "start",
201 reason: format!("{start} is not positive").into(),
202 });
203 }
204
205 // write the substring_bytea before the overlay.
206 let start_idx = (start - 1) as usize;
207 let mut result = Vec::with_capacity(s.len() + new_sub_str.len());
208 if start_idx >= s.len() {
209 result.extend_from_slice(s);
210 } else {
211 result.extend_from_slice(&s[..start_idx]);
212 }
213
214 // write the new substring_bytea.
215 result.extend_from_slice(new_sub_str);
216
217 if count < 0 {
218 // For negative `count`, which is rare in practice, we hand over to `substr_bytea`
219 let start_right = start
220 .checked_add(count)
221 .ok_or(ExprError::NumericOutOfRange)?;
222 result.extend_from_slice(&super::substr::substr_start_bytea(s, start_right));
223 return Ok(result.into_boxed_slice());
224 };
225
226 // write the substring_bytea after the overlay.
227 let count = count as usize;
228 let skip_end = start_idx.saturating_add(count);
229 if skip_end <= s.len() {
230 result.extend_from_slice(&s[skip_end..]);
231 }
232 Ok(result.into_boxed_slice())
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_overlay() {
241 case("aaa__aaa", "XY", 4, None, "aaaXYaaa");
242 // Place at end.
243 case("aaa", "XY", 4, None, "aaaXY");
244 // Place at start.
245 case("aaa", "XY", 1, Some(0), "XYaaa");
246 // Replace shorter string.
247 case("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa");
248 case("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa");
249 // Replace longer string.
250 case("aaa___aaa", "X", 4, Some(3), "aaaXaaa");
251 // start too large.
252 case("aaa", "XY", 123, None, "aaaXY");
253 // count too small or large.
254 case("aaa", "X", 4, Some(-123), "aaaXaaa");
255 case("aaa_", "X", 4, Some(123), "aaaX");
256 // very large start and count
257 case("aaa", "X", i32::MAX, Some(i32::MAX), "aaaX");
258
259 #[track_caller]
260 fn case(s: &str, new_sub_str: &str, start: i32, count: Option<i32>, expected: &str) {
261 let mut writer = String::new();
262 match count {
263 None => overlay(s, new_sub_str, start, &mut writer),
264 Some(count) => overlay_for(s, new_sub_str, start, count, &mut writer),
265 }
266 .unwrap();
267 assert_eq!(writer, expected);
268 }
269 }
270
271 #[test]
272 fn test_overlay_bytea() {
273 // (input, replace, start, count, expected)
274 case(
275 b"\x61\x61\x61\x5f\x5f\x61\x61\x61",
276 b"\x58\x59",
277 4,
278 None,
279 b"\x61\x61\x61\x58\x59\x61\x61\x61",
280 );
281 // Place at end
282 case(
283 b"\x61\x61\x61",
284 b"\x58\x59",
285 4,
286 None,
287 b"\x61\x61\x61\x58\x59",
288 );
289 // Place at start
290 case(
291 b"\x61\x61\x61",
292 b"\x58\x59",
293 1,
294 Some(0),
295 b"\x58\x59\x61\x61\x61",
296 );
297 // Replace shorter string
298 case(
299 b"\x61\x61\x61\x5f\x61\x61\x61",
300 b"\x58\x59\x5A",
301 4,
302 Some(1),
303 b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
304 );
305 case(
306 b"\x61\x61\x61\x61\x61\x61",
307 b"\x58\x59\x5A",
308 4,
309 Some(0),
310 b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
311 );
312 // Replace longer string
313 case(
314 b"\x61\x61\x61\x5f\x5f\x5f\x61\x61\x61",
315 b"\x58",
316 4,
317 Some(3),
318 b"\x61\x61\x61\x58\x61\x61\x61",
319 );
320 // start too large
321 case(
322 b"\x61\x61\x61",
323 b"\x58\x59",
324 123,
325 None,
326 b"\x61\x61\x61\x58\x59",
327 );
328 // count too small or large
329 case(
330 b"\x61\x61\x61",
331 b"\x58",
332 4,
333 Some(-123),
334 b"\x61\x61\x61\x58\x61\x61\x61",
335 );
336 case(
337 b"\x61\x61\x61\x5f",
338 b"\x58",
339 4,
340 Some(123),
341 b"\x61\x61\x61\x58",
342 );
343 // very large start and count
344 case(
345 b"\x61\x61\x61",
346 b"\x58",
347 i32::MAX,
348 Some(i32::MAX),
349 b"\x61\x61\x61\x58",
350 );
351
352 #[track_caller]
353 fn case(s: &[u8], new_sub_str: &[u8], start: i32, count: Option<i32>, expected: &[u8]) {
354 let result: Box<[u8]> = match count {
355 None => overlay_bytea(s, new_sub_str, start),
356 Some(count) => overlay_for_bytea(s, new_sub_str, start, count),
357 }
358 .unwrap();
359 let expected_boxed: Box<[u8]> = expected.into();
360 assert_eq!(result, expected_boxed);
361 }
362 }
363}