risingwave_expr_impl/scalar/
overlay.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Write;
16
17use risingwave_expr::{ExprError, Result, function};
18
19/// Replaces a substring of the given string with a new substring.
20///
21/// ```slt
22/// query T
23/// select overlay('αβγδεζ' placing '💯' from 3);
24/// ----
25/// αβ💯δεζ
26/// ```
27#[function("overlay(varchar, varchar, int4) -> varchar")]
28pub fn overlay(s: &str, new_sub_str: &str, start: i32, writer: &mut impl Write) -> Result<()> {
29    let sub_len = new_sub_str
30        .chars()
31        .count()
32        .try_into()
33        .map_err(|_| ExprError::NumericOutOfRange)?;
34    overlay_for(s, new_sub_str, start, sub_len, writer)
35}
36
37/// Replaces a substring of the given string with a new substring.
38///
39/// ```slt
40/// statement error not positive
41/// select overlay('αβγδεζ' placing '①②③' from 0);
42///
43/// query T
44/// select overlay('αβγδεζ' placing '①②③' from 10);
45/// ----
46/// αβγδεζ①②③
47///
48/// query T
49/// select overlay('αβγδεζ' placing '①②③' from 4 for 2);
50/// ----
51/// αβγ①②③ζ
52///
53/// query T
54/// select overlay('αβγδεζ' placing '①②③' from 4);
55/// ----
56/// αβγ①②③
57///
58/// query T
59/// select overlay('αβγδεζ' placing '①②③' from 2 for 4);
60/// ----
61/// α①②③ζ
62///
63/// query T
64/// select overlay('αβγδεζ' placing '①②③' from 2 for 7);
65/// ----
66/// α①②③
67///
68/// query T
69/// select overlay('αβγδεζ' placing '①②③' from 4 for 0);
70/// ----
71/// αβγ①②③δεζ
72///
73/// query T
74/// select overlay('αβγδεζ' placing '①②③' from 4 for -2);
75/// ----
76/// αβγ①②③βγδεζ
77///
78/// query T
79/// select overlay('αβγδεζ' placing '①②③' from 4 for -1000);
80/// ----
81/// αβγ①②③αβγδεζ
82/// ```
83#[function("overlay(varchar, varchar, int4, int4) -> varchar")]
84pub fn overlay_for(
85    s: &str,
86    new_sub_str: &str,
87    start: i32,
88    count: i32,
89    writer: &mut impl Write,
90) -> Result<()> {
91    if start <= 0 {
92        return Err(ExprError::InvalidParam {
93            name: "start",
94            reason: format!("{start} is not positive").into(),
95        });
96    }
97
98    let mut chars = s.char_indices().skip(start as usize - 1).peekable();
99
100    // write the substring before the overlay.
101    let leading = match chars.peek() {
102        Some((i, _)) => &s[..*i],
103        None => s,
104    };
105    writer.write_str(leading).unwrap();
106
107    // write the new substring.
108    writer.write_str(new_sub_str).unwrap();
109
110    let Ok(count) = count.try_into() else {
111        // For negative `count`, which is rare in practice, we hand over to `substr`
112        let start_right = start
113            .checked_add(count)
114            .ok_or(ExprError::NumericOutOfRange)?;
115        return super::substr::substr_start(s, start_right, writer);
116    };
117
118    // write the substring after the overlay.
119    if let Some((i, _)) = chars.nth(count) {
120        writer.write_str(&s[i..]).unwrap();
121    }
122
123    Ok(())
124}
125
126/// Replaces a subsequence of the given bytea with a new bytea value.
127///
128/// # Example
129///
130/// ```slt
131/// query T
132/// select overlay('\x616263646566'::bytea placing '\x9999'::bytea from 3);
133/// ----
134/// \x616299996566
135/// ```
136#[function("overlay(bytea, bytea, int4) -> bytea")]
137pub fn overlay_bytea(s: &[u8], new_sub_str: &[u8], start: i32) -> Result<Box<[u8]>> {
138    let count = new_sub_str
139        .len()
140        .try_into()
141        .map_err(|_| ExprError::NumericOutOfRange)?;
142    overlay_for_bytea(s, new_sub_str, start, count)
143}
144
145/// Replaces a range of bytes in a bytea value with another bytea.
146///
147/// ```slt
148/// statement error not positive
149/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 0);
150///
151/// query T
152/// select overlay('\x616263'::bytea placing '\x313233'::bytea from 10);
153/// ----
154/// \x616263313233
155///
156/// query T
157/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 2);
158/// ----
159/// \x61626331323366
160///
161/// query T
162/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4);
163/// ----
164/// \x616263313233
165///
166/// query T
167/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 4);
168/// ----
169/// \x6131323366
170///
171/// query T
172/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 2 for 7);
173/// ----
174/// \x61313233
175///
176/// query T
177/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for 0);
178/// ----
179/// \x616263313233646566
180///
181/// query T
182/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -2);
183/// ----
184/// \x6162633132336263646566
185///
186/// query T
187/// select overlay('\x616263646566'::bytea placing '\x313233'::bytea from 4 for -1000);
188/// ----
189/// \x616263313233616263646566
190/// ```
191#[function("overlay(bytea, bytea, int4, int4) -> bytea")]
192pub fn overlay_for_bytea(
193    s: &[u8],
194    new_sub_str: &[u8],
195    start: i32,
196    count: i32,
197) -> Result<Box<[u8]>> {
198    if start <= 0 {
199        return Err(ExprError::InvalidParam {
200            name: "start",
201            reason: format!("{start} is not positive").into(),
202        });
203    }
204
205    // write the substring_bytea before the overlay.
206    let start_idx = (start - 1) as usize;
207    let mut result = Vec::with_capacity(s.len() + new_sub_str.len());
208    if start_idx >= s.len() {
209        result.extend_from_slice(s);
210    } else {
211        result.extend_from_slice(&s[..start_idx]);
212    }
213
214    // write the new substring_bytea.
215    result.extend_from_slice(new_sub_str);
216
217    if count < 0 {
218        // For negative `count`, which is rare in practice, we hand over to `substr_bytea`
219        let start_right = start
220            .checked_add(count)
221            .ok_or(ExprError::NumericOutOfRange)?;
222        result.extend_from_slice(&super::substr::substr_start_bytea(s, start_right));
223        return Ok(result.into_boxed_slice());
224    };
225
226    // write the substring_bytea after the overlay.
227    let count = count as usize;
228    let skip_end = start_idx.saturating_add(count);
229    if skip_end <= s.len() {
230        result.extend_from_slice(&s[skip_end..]);
231    }
232    Ok(result.into_boxed_slice())
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn test_overlay() {
241        case("aaa__aaa", "XY", 4, None, "aaaXYaaa");
242        // Place at end.
243        case("aaa", "XY", 4, None, "aaaXY");
244        // Place at start.
245        case("aaa", "XY", 1, Some(0), "XYaaa");
246        // Replace shorter string.
247        case("aaa_aaa", "XYZ", 4, Some(1), "aaaXYZaaa");
248        case("aaaaaa", "XYZ", 4, Some(0), "aaaXYZaaa");
249        // Replace longer string.
250        case("aaa___aaa", "X", 4, Some(3), "aaaXaaa");
251        // start too large.
252        case("aaa", "XY", 123, None, "aaaXY");
253        // count too small or large.
254        case("aaa", "X", 4, Some(-123), "aaaXaaa");
255        case("aaa_", "X", 4, Some(123), "aaaX");
256        // very large start and count
257        case("aaa", "X", i32::MAX, Some(i32::MAX), "aaaX");
258
259        #[track_caller]
260        fn case(s: &str, new_sub_str: &str, start: i32, count: Option<i32>, expected: &str) {
261            let mut writer = String::new();
262            match count {
263                None => overlay(s, new_sub_str, start, &mut writer),
264                Some(count) => overlay_for(s, new_sub_str, start, count, &mut writer),
265            }
266            .unwrap();
267            assert_eq!(writer, expected);
268        }
269    }
270
271    #[test]
272    fn test_overlay_bytea() {
273        // (input, replace, start, count, expected)
274        case(
275            b"\x61\x61\x61\x5f\x5f\x61\x61\x61",
276            b"\x58\x59",
277            4,
278            None,
279            b"\x61\x61\x61\x58\x59\x61\x61\x61",
280        );
281        // Place at end
282        case(
283            b"\x61\x61\x61",
284            b"\x58\x59",
285            4,
286            None,
287            b"\x61\x61\x61\x58\x59",
288        );
289        // Place at start
290        case(
291            b"\x61\x61\x61",
292            b"\x58\x59",
293            1,
294            Some(0),
295            b"\x58\x59\x61\x61\x61",
296        );
297        // Replace shorter string
298        case(
299            b"\x61\x61\x61\x5f\x61\x61\x61",
300            b"\x58\x59\x5A",
301            4,
302            Some(1),
303            b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
304        );
305        case(
306            b"\x61\x61\x61\x61\x61\x61",
307            b"\x58\x59\x5A",
308            4,
309            Some(0),
310            b"\x61\x61\x61\x58\x59\x5A\x61\x61\x61",
311        );
312        // Replace longer string
313        case(
314            b"\x61\x61\x61\x5f\x5f\x5f\x61\x61\x61",
315            b"\x58",
316            4,
317            Some(3),
318            b"\x61\x61\x61\x58\x61\x61\x61",
319        );
320        // start too large
321        case(
322            b"\x61\x61\x61",
323            b"\x58\x59",
324            123,
325            None,
326            b"\x61\x61\x61\x58\x59",
327        );
328        // count too small or large
329        case(
330            b"\x61\x61\x61",
331            b"\x58",
332            4,
333            Some(-123),
334            b"\x61\x61\x61\x58\x61\x61\x61",
335        );
336        case(
337            b"\x61\x61\x61\x5f",
338            b"\x58",
339            4,
340            Some(123),
341            b"\x61\x61\x61\x58",
342        );
343        // very large start and count
344        case(
345            b"\x61\x61\x61",
346            b"\x58",
347            i32::MAX,
348            Some(i32::MAX),
349            b"\x61\x61\x61\x58",
350        );
351
352        #[track_caller]
353        fn case(s: &[u8], new_sub_str: &[u8], start: i32, count: Option<i32>, expected: &[u8]) {
354            let result: Box<[u8]> = match count {
355                None => overlay_bytea(s, new_sub_str, start),
356                Some(count) => overlay_for_bytea(s, new_sub_str, start, count),
357            }
358            .unwrap();
359            let expected_boxed: Box<[u8]> = expected.into();
360            assert_eq!(result, expected_boxed);
361        }
362    }
363}