risingwave_expr_impl/scalar/overlay.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use risingwave_expr::{ExprError, Result, function};
16
17/// Replaces a substring of the given string with a new substring.
18///
19/// ```slt
20/// query T
21/// select overlay('αβγδεζ' placing '💯' from 3);
22/// ----
23/// αβ💯δεζ
24/// ```
25#[function("overlay(varchar, varchar, int4) -> varchar")]
26pub fn overlay(
27 s: &str,
28 new_sub_str: &str,
29 start: i32,
30 writer: &mut impl std::fmt::Write,
31) -> Result<()> {
32 let sub_len = new_sub_str
33 .chars()
34 .count()
35 .try_into()
36 .map_err(|_| ExprError::NumericOutOfRange)?;
37 overlay_for(s, new_sub_str, start, sub_len, writer)
38}
39
40/// Replaces a substring of the given string with a new substring.
41///
42/// ```slt
43/// statement error not positive
44/// select overlay('αβγδεζ' placing '①②③' from 0);
45///
46/// query T
47/// select overlay('αβγδεζ' placing '①②③' from 10);
48/// ----
49/// αβγδεζ①②③
50///
51/// query T
52/// select overlay('αβγδεζ' placing '①②③' from 4 for 2);
53/// ----
54/// αβγ①②③ζ
55///
56/// query T
57/// select overlay('αβγδεζ' placing '①②③' from 4);
58/// ----
59/// αβγ①②③
60///
61/// query T
62/// select overlay('αβγδεζ' placing '①②③' from 2 for 4);
63/// ----
64/// α①②③ζ
65///
66/// query T
67/// select overlay('αβγδεζ' placing '①②③' from 2 for 7);
68/// ----
69/// α①②③
70///
71/// query T
72/// select overlay('αβγδεζ' placing '①②③' from 4 for 0);
73/// ----
74/// αβγ①②③δεζ
75///
76/// query T
77/// select overlay('αβγδεζ' placing '①②③' from 4 for -2);
78/// ----
79/// αβγ①②③βγδεζ
80///
81/// query T
82/// select overlay('αβγδεζ' placing '①②③' from 4 for -1000);
83/// ----
84/// αβγ①②③αβγδεζ
85/// ```
86#[function("overlay(varchar, varchar, int4, int4) -> varchar")]
87pub fn overlay_for(
88 s: &str,
89 new_sub_str: &str,
90 start: i32,
91 count: i32,
92 writer: &mut impl std::fmt::Write,
93) -> Result<()> {
94 if start <= 0 {
95 return Err(ExprError::InvalidParam {
96 name: "start",
97 reason: format!("{start} is not positive").into(),
98 });
99 }
100
101 let mut chars = s.char_indices().skip(start as usize - 1).peekable();
102
103 // write the substring before the overlay.
104 let leading = match chars.peek() {
105 Some((i, _)) => &s[..*i],
106 None => s,
107 };
108 writer.write_str(leading).unwrap();
109
110 // write the new substring.
111 writer.write_str(new_sub_str).unwrap();
112
113 let Ok(count) = count.try_into() else {
114 // For negative `count`, which is rare in practice, we hand over to `substr`
115 let start_right = start
116 .checked_add(count)
117 .ok_or(ExprError::NumericOutOfRange)?;
118 return super::substr::substr_start(s, start_right, writer);
119 };
120
121 // write the substring after the overlay.
122 if let Some((i, _)) = chars.nth(count) {
123 writer.write_str(&s[i..]).unwrap();
124 }
125
126 Ok(())
127}
128
129/// Replaces a subsequence of the given bytea with a new bytea value.
130///
131/// # Example
132///
133/// ```slt
134/// query T
135/// select overlay('\x616263646566'::bytea placing '\x9999'::bytea from 3);
136/// ----
137/// \x616299996566
138/// ```
139#[function("overlay(bytea, bytea, int4) -> bytea")]
140pub fn overlay_bytea(
141 s: &[u8],
142 new_sub_str: &[u8],
143 start: i32,
144 writer: &mut impl std::io::Write,
145) -> Result<()> {
146 let count = new_sub_str
147 .len()
148 .try_into()
149 .map_err(|_| ExprError::NumericOutOfRange)?;
150 overlay_for_bytea(s, new_sub_str, start, count, writer)
151}
152
153/// Replaces a range of bytes in a bytea value with another bytea.
154///
155/// ```slt
156/// statement error not positive
157/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 0);
158///
159/// query T
160/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 10);
161/// ----
162/// \x616263313233
163///
164/// query T
165/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 2);
166/// ----
167/// \x61626331323366
168///
169/// query T
170/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4);
171/// ----
172/// \x616263313233
173///
174/// query T
175/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 4);
176/// ----
177/// \x6131323366
178///
179/// query T
180/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 7);
181/// ----
182/// \x61313233
183///
184/// query T
185/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 0);
186/// ----
187/// \x616263313233646566
188///
189/// query T
190/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -2);
191/// ----
192/// \x6162633132336263646566
193///
194/// query T
195/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -1000);
196/// ----
197/// \x616263313233616263646566
198/// ```
199#[function("overlay(bytea, bytea, int4, int4) -> bytea")]
200pub fn overlay_for_bytea(
201 s: &[u8],
202 new_sub_str: &[u8],
203 start: i32,
204 count: i32,
205 writer: &mut impl std::io::Write,
206) -> Result<()> {
207 if start <= 0 {
208 return Err(ExprError::InvalidParam {
209 name: "start",
210 reason: format!("{start} is not positive").into(),
211 });
212 }
213
214 // write the substring_bytea before the overlay.
215 let start_idx = (start - 1) as usize;
216 if start_idx >= s.len() {
217 writer.write_all(s).unwrap();
218 } else {
219 writer.write_all(&s[..start_idx]).unwrap();
220 }
221
222 // write the new substring_bytea.
223 writer.write_all(new_sub_str).unwrap();
224
225 if count < 0 {
226 // For negative `count`, which is rare in practice, we hand over to `substr_bytea`
227 let start_right = start
228 .checked_add(count)
229 .ok_or(ExprError::NumericOutOfRange)?;
230 super::substr::substr_start_bytea(s, start_right, writer);
231 return Ok(());
232 };
233
234 // write the substring_bytea after the overlay.
235 let count = count as usize;
236 let skip_end = start_idx.saturating_add(count);
237 if skip_end <= s.len() {
238 writer.write_all(&s[skip_end..]).unwrap();
239 }
240 Ok(())
241}
242
243#[cfg(test)]
244mod tests {
245 use super::*;
246
247 #[test]
248 fn test_overlay() {
249 case("aaa__aaa", "XY", 4, None, "aaaXYaaa");
250 // Place at end.
251 case("aaa", "XY", 4, None, "aaaXY");
252 // Place at start.
253 case("aaa", "XY", 1, Some(0), "XYaaa");
254 // Replace shorter string.
255 case("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa");
256 case("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa");
257 // Replace longer string.
258 case("aaa___aaa", "X", 4, Some(3), "aaaXaaa");
259 // start too large.
260 case("aaa", "XY", 123, None, "aaaXY");
261 // count too small or large.
262 case("aaa", "X", 4, Some(-123), "aaaXaaa");
263 case("aaa_", "X", 4, Some(123), "aaaX");
264 // very large start and count
265 case("aaa", "X", i32::MAX, Some(i32::MAX), "aaaX");
266
267 #[track_caller]
268 fn case(s: &str, new_sub_str: &str, start: i32, count: Option<i32>, expected: &str) {
269 let mut writer = String::new();
270 match count {
271 None => overlay(s, new_sub_str, start, &mut writer),
272 Some(count) => overlay_for(s, new_sub_str, start, count, &mut writer),
273 }
274 .unwrap();
275 assert_eq!(writer, expected);
276 }
277 }
278
279 #[test]
280 fn test_overlay_bytea() {
281 // (input, replace, start, count, expected)
282 case(
283 b"\x61\x61\x61\x5f\x5f\x61\x61\x61",
284 b"\x58\x59",
285 4,
286 None,
287 b"\x61\x61\x61\x58\x59\x61\x61\x61",
288 );
289 // Place at end
290 case(
291 b"\x61\x61\x61",
292 b"\x58\x59",
293 4,
294 None,
295 b"\x61\x61\x61\x58\x59",
296 );
297 // Place at start
298 case(
299 b"\x61\x61\x61",
300 b"\x58\x59",
301 1,
302 Some(0),
303 b"\x58\x59\x61\x61\x61",
304 );
305 // Replace shorter string
306 case(
307 b"\x61\x61\x61\x5f\x61\x61\x61",
308 b"\x58\x59\x5A",
309 4,
310 Some(1),
311 b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
312 );
313 case(
314 b"\x61\x61\x61\x61\x61\x61",
315 b"\x58\x59\x5A",
316 4,
317 Some(0),
318 b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
319 );
320 // Replace longer string
321 case(
322 b"\x61\x61\x61\x5f\x5f\x5f\x61\x61\x61",
323 b"\x58",
324 4,
325 Some(3),
326 b"\x61\x61\x61\x58\x61\x61\x61",
327 );
328 // start too large
329 case(
330 b"\x61\x61\x61",
331 b"\x58\x59",
332 123,
333 None,
334 b"\x61\x61\x61\x58\x59",
335 );
336 // count too small or large
337 case(
338 b"\x61\x61\x61",
339 b"\x58",
340 4,
341 Some(-123),
342 b"\x61\x61\x61\x58\x61\x61\x61",
343 );
344 case(
345 b"\x61\x61\x61\x5f",
346 b"\x58",
347 4,
348 Some(123),
349 b"\x61\x61\x61\x58",
350 );
351 // very large start and count
352 case(
353 b"\x61\x61\x61",
354 b"\x58",
355 i32::MAX,
356 Some(i32::MAX),
357 b"\x61\x61\x61\x58",
358 );
359
360 #[track_caller]
361 fn case(s: &[u8], new_sub_str: &[u8], start: i32, count: Option<i32>, expected: &[u8]) {
362 let mut result = Vec::new();
363 match count {
364 None => overlay_bytea(s, new_sub_str, start, &mut result).unwrap(),
365 Some(count) => {
366 overlay_for_bytea(s, new_sub_str, start, count, &mut result).unwrap()
367 }
368 }
369 assert_eq!(&result, expected);
370 }
371 }
372}