risingwave_expr_impl/scalar/
overlay.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use risingwave_expr::{ExprError, Result, function};
16
17/// Replaces a substring of the given string with a new substring.
18///
19/// ```slt
20/// query T
21/// select overlay('αβγδεζ' placing '💯' from 3);
22/// ----
23/// αβ💯δεζ
24/// ```
25#[function("overlay(varchar, varchar, int4) -> varchar")]
26pub fn overlay(
27    s: &str,
28    new_sub_str: &str,
29    start: i32,
30    writer: &mut impl std::fmt::Write,
31) -> Result<()> {
32    let sub_len = new_sub_str
33        .chars()
34        .count()
35        .try_into()
36        .map_err(|_| ExprError::NumericOutOfRange)?;
37    overlay_for(s, new_sub_str, start, sub_len, writer)
38}
39
40/// Replaces a substring of the given string with a new substring.
41///
42/// ```slt
43/// statement error not positive
44/// select overlay('αβγδεζ' placing '①②③' from 0);
45///
46/// query T
47/// select overlay('αβγδεζ' placing '①②③' from 10);
48/// ----
49/// αβγδεζ①②③
50///
51/// query T
52/// select overlay('αβγδεζ' placing '①②③' from 4 for 2);
53/// ----
54/// αβγ①②③ζ
55///
56/// query T
57/// select overlay('αβγδεζ' placing '①②③' from 4);
58/// ----
59/// αβγ①②③
60///
61/// query T
62/// select overlay('αβγδεζ' placing '①②③' from 2 for 4);
63/// ----
64/// α①②③ζ
65///
66/// query T
67/// select overlay('αβγδεζ' placing '①②③' from 2 for 7);
68/// ----
69/// α①②③
70///
71/// query T
72/// select overlay('αβγδεζ' placing '①②③' from 4 for 0);
73/// ----
74/// αβγ①②③δεζ
75///
76/// query T
77/// select overlay('αβγδεζ' placing '①②③' from 4 for -2);
78/// ----
79/// αβγ①②③βγδεζ
80///
81/// query T
82/// select overlay('αβγδεζ' placing '①②③' from 4 for -1000);
83/// ----
84/// αβγ①②③αβγδεζ
85/// ```
86#[function("overlay(varchar, varchar, int4, int4) -> varchar")]
87pub fn overlay_for(
88    s: &str,
89    new_sub_str: &str,
90    start: i32,
91    count: i32,
92    writer: &mut impl std::fmt::Write,
93) -> Result<()> {
94    if start <= 0 {
95        return Err(ExprError::InvalidParam {
96            name: "start",
97            reason: format!("{start} is not positive").into(),
98        });
99    }
100
101    let mut chars = s.char_indices().skip(start as usize - 1).peekable();
102
103    // write the substring before the overlay.
104    let leading = match chars.peek() {
105        Some((i, _)) => &s[..*i],
106        None => s,
107    };
108    writer.write_str(leading).unwrap();
109
110    // write the new substring.
111    writer.write_str(new_sub_str).unwrap();
112
113    let Ok(count) = count.try_into() else {
114        // For negative `count`, which is rare in practice, we hand over to `substr`
115        let start_right = start
116            .checked_add(count)
117            .ok_or(ExprError::NumericOutOfRange)?;
118        return super::substr::substr_start(s, start_right, writer);
119    };
120
121    // write the substring after the overlay.
122    if let Some((i, _)) = chars.nth(count) {
123        writer.write_str(&s[i..]).unwrap();
124    }
125
126    Ok(())
127}
128
129/// Replaces a subsequence of the given bytea with a new bytea value.
130///
131/// # Example
132///
133/// ```slt
134/// query T
135/// select overlay('\x616263646566'::bytea placing '\x9999'::bytea from 3);
136/// ----
137/// \x616299996566
138/// ```
139#[function("overlay(bytea, bytea, int4) -> bytea")]
140pub fn overlay_bytea(
141    s: &[u8],
142    new_sub_str: &[u8],
143    start: i32,
144    writer: &mut impl std::io::Write,
145) -> Result<()> {
146    let count = new_sub_str
147        .len()
148        .try_into()
149        .map_err(|_| ExprError::NumericOutOfRange)?;
150    overlay_for_bytea(s, new_sub_str, start, count, writer)
151}
152
153/// Replaces a range of bytes in a bytea value with another bytea.
154///
155/// ```slt
156/// statement error not positive
157/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 0);
158///
159/// query T
160/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 10);
161/// ----
162/// \x616263313233
163///
164/// query T
165/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 2);
166/// ----
167/// \x61626331323366
168///
169/// query T
170/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4);
171/// ----
172/// \x616263313233
173///
174/// query T
175/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 4);
176/// ----
177/// \x6131323366
178///
179/// query T
180/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 7);
181/// ----
182/// \x61313233
183///
184/// query T
185/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 0);
186/// ----
187/// \x616263313233646566
188///
189/// query T
190/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -2);
191/// ----
192/// \x6162633132336263646566
193///
194/// query T
195/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -1000);
196/// ----
197/// \x616263313233616263646566
198/// ```
199#[function("overlay(bytea, bytea, int4, int4) -> bytea")]
200pub fn overlay_for_bytea(
201    s: &[u8],
202    new_sub_str: &[u8],
203    start: i32,
204    count: i32,
205    writer: &mut impl std::io::Write,
206) -> Result<()> {
207    if start <= 0 {
208        return Err(ExprError::InvalidParam {
209            name: "start",
210            reason: format!("{start} is not positive").into(),
211        });
212    }
213
214    // write the substring_bytea before the overlay.
215    let start_idx = (start - 1) as usize;
216    if start_idx >= s.len() {
217        writer.write_all(s).unwrap();
218    } else {
219        writer.write_all(&s[..start_idx]).unwrap();
220    }
221
222    // write the new substring_bytea.
223    writer.write_all(new_sub_str).unwrap();
224
225    if count < 0 {
226        // For negative `count`, which is rare in practice, we hand over to `substr_bytea`
227        let start_right = start
228            .checked_add(count)
229            .ok_or(ExprError::NumericOutOfRange)?;
230        super::substr::substr_start_bytea(s, start_right, writer);
231        return Ok(());
232    };
233
234    // write the substring_bytea after the overlay.
235    let count = count as usize;
236    let skip_end = start_idx.saturating_add(count);
237    if skip_end <= s.len() {
238        writer.write_all(&s[skip_end..]).unwrap();
239    }
240    Ok(())
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246
247    #[test]
248    fn test_overlay() {
249        case("aaa__aaa", "XY", 4, None, "aaaXYaaa");
250        // Place at end.
251        case("aaa", "XY", 4, None, "aaaXY");
252        // Place at start.
253        case("aaa", "XY", 1, Some(0), "XYaaa");
254        // Replace shorter string.
255        case("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa");
256        case("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa");
257        // Replace longer string.
258        case("aaa___aaa", "X", 4, Some(3), "aaaXaaa");
259        // start too large.
260        case("aaa", "XY", 123, None, "aaaXY");
261        // count too small or large.
262        case("aaa", "X", 4, Some(-123), "aaaXaaa");
263        case("aaa_", "X", 4, Some(123), "aaaX");
264        // very large start and count
265        case("aaa", "X", i32::MAX, Some(i32::MAX), "aaaX");
266
267        #[track_caller]
268        fn case(s: &str, new_sub_str: &str, start: i32, count: Option<i32>, expected: &str) {
269            let mut writer = String::new();
270            match count {
271                None => overlay(s, new_sub_str, start, &mut writer),
272                Some(count) => overlay_for(s, new_sub_str, start, count, &mut writer),
273            }
274            .unwrap();
275            assert_eq!(writer, expected);
276        }
277    }
278
279    #[test]
280    fn test_overlay_bytea() {
281        // (input, replace, start, count, expected)
282        case(
283            b"\x61\x61\x61\x5f\x5f\x61\x61\x61",
284            b"\x58\x59",
285            4,
286            None,
287            b"\x61\x61\x61\x58\x59\x61\x61\x61",
288        );
289        // Place at end
290        case(
291            b"\x61\x61\x61",
292            b"\x58\x59",
293            4,
294            None,
295            b"\x61\x61\x61\x58\x59",
296        );
297        // Place at start
298        case(
299            b"\x61\x61\x61",
300            b"\x58\x59",
301            1,
302            Some(0),
303            b"\x58\x59\x61\x61\x61",
304        );
305        // Replace shorter string
306        case(
307            b"\x61\x61\x61\x5f\x61\x61\x61",
308            b"\x58\x59\x5A",
309            4,
310            Some(1),
311            b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
312        );
313        case(
314            b"\x61\x61\x61\x61\x61\x61",
315            b"\x58\x59\x5A",
316            4,
317            Some(0),
318            b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
319        );
320        // Replace longer string
321        case(
322            b"\x61\x61\x61\x5f\x5f\x5f\x61\x61\x61",
323            b"\x58",
324            4,
325            Some(3),
326            b"\x61\x61\x61\x58\x61\x61\x61",
327        );
328        // start too large
329        case(
330            b"\x61\x61\x61",
331            b"\x58\x59",
332            123,
333            None,
334            b"\x61\x61\x61\x58\x59",
335        );
336        // count too small or large
337        case(
338            b"\x61\x61\x61",
339            b"\x58",
340            4,
341            Some(-123),
342            b"\x61\x61\x61\x58\x61\x61\x61",
343        );
344        case(
345            b"\x61\x61\x61\x5f",
346            b"\x58",
347            4,
348            Some(123),
349            b"\x61\x61\x61\x58",
350        );
351        // very large start and count
352        case(
353            b"\x61\x61\x61",
354            b"\x58",
355            i32::MAX,
356            Some(i32::MAX),
357            b"\x61\x61\x61\x58",
358        );
359
360        #[track_caller]
361        fn case(s: &[u8], new_sub_str: &[u8], start: i32, count: Option<i32>, expected: &[u8]) {
362            let mut result = Vec::new();
363            match count {
364                None => overlay_bytea(s, new_sub_str, start, &mut result).unwrap(),
365                Some(count) => {
366                    overlay_for_bytea(s, new_sub_str, start, count, &mut result).unwrap()
367                }
368            }
369            assert_eq!(&result, expected);
370        }
371    }
372}