risingwave_expr_impl/scalar/
string.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String functions
16//!
17//! <https://www.postgresql.org/docs/current/functions-string.html>
18
19use std::fmt::Write;
20
21use risingwave_common::util::quote_ident::QuoteIdent;
22use risingwave_expr::function;
23
24/// Returns the character with the specified Unicode code point.
25///
26/// # Example
27///
28/// ```slt
29/// query T
30/// select chr(65);
31/// ----
32/// A
33/// ```
34#[function("chr(int4) -> varchar")]
35pub fn chr(code: i32, writer: &mut impl Write) {
36    if let Some(c) = std::char::from_u32(code as u32) {
37        write!(writer, "{}", c).unwrap();
38    }
39}
40
41/// Returns true if the given string starts with the specified prefix.
42///
43/// # Example
44///
45/// ```slt
46/// query T
47/// select starts_with('abcdef', 'abc');
48/// ----
49/// t
50///
51/// query T
52/// select 'abcdef' ^@ 'abc';
53/// ----
54/// t
55///
56/// query T
57/// select 'abcdef' ^@ some(array['x', 'a', 't']);
58/// ----
59/// t
60/// ```
61#[function("starts_with(varchar, varchar) -> boolean")]
62pub fn starts_with(s: &str, prefix: &str) -> bool {
63    s.starts_with(prefix)
64}
65
66/// Capitalizes the first letter of each word in the given string.
67///
68/// # Example
69///
70/// ```slt
71/// query T
72/// select initcap('the quick brown fox');
73/// ----
74/// The Quick Brown Fox
75/// ```
76#[function("initcap(varchar) -> varchar")]
77pub fn initcap(s: &str, writer: &mut impl Write) {
78    let mut capitalize_next = true;
79    for c in s.chars() {
80        if capitalize_next {
81            write!(writer, "{}", c.to_uppercase()).unwrap();
82            capitalize_next = false;
83        } else {
84            write!(writer, "{}", c.to_lowercase()).unwrap();
85        }
86        if c.is_whitespace() {
87            capitalize_next = true;
88        }
89    }
90}
91
92/// Extends the given string on the left until it is at least the specified length,
93/// using the specified fill character (or a space by default).
94///
95/// # Example
96///
97/// ```slt
98/// query T
99/// select lpad('abc', 5);
100/// ----
101///   abc
102///
103/// query T
104/// select lpad('abcdef', 3);
105/// ----
106/// abc
107/// ```
108#[function("lpad(varchar, int4) -> varchar")]
109pub fn lpad(s: &str, length: i32, writer: &mut impl Write) {
110    lpad_fill(s, length, " ", writer);
111}
112
113/// Extends the string to the specified length by prepending the characters fill.
114/// If the string is already longer than the specified length, it is truncated on the right.
115///
116/// # Example
117///
118/// ```slt
119/// query T
120/// select lpad('hi', 5, 'xy');
121/// ----
122/// xyxhi
123///
124/// query T
125/// select lpad('hi', 5, '');
126/// ----
127/// hi
128/// ```
129#[function("lpad(varchar, int4, varchar) -> varchar")]
130pub fn lpad_fill(s: &str, length: i32, fill: &str, writer: &mut impl Write) {
131    let s_len = s.chars().count();
132    let fill_len = fill.chars().count();
133
134    if length <= 0 {
135        return;
136    }
137    if s_len >= length as usize {
138        for c in s.chars().take(length as usize) {
139            write!(writer, "{c}").unwrap();
140        }
141    } else if fill_len == 0 {
142        write!(writer, "{s}").unwrap();
143    } else {
144        let mut remaining_length = length as usize - s_len;
145        while remaining_length >= fill_len {
146            write!(writer, "{fill}").unwrap();
147            remaining_length -= fill_len;
148        }
149        for c in fill.chars().take(remaining_length) {
150            write!(writer, "{c}").unwrap();
151        }
152        write!(writer, "{s}").unwrap();
153    }
154}
155
156/// Extends the given string on the right until it is at least the specified length,
157/// using the specified fill character (or a space by default).
158///
159/// # Example
160///
161/// ```slt
162/// query T
163/// select rpad('abc', 5);
164/// ----
165/// abc
166///
167/// query T
168/// select rpad('abcdef', 3);
169/// ----
170/// abc
171/// ```
172#[function("rpad(varchar, int4) -> varchar")]
173pub fn rpad(s: &str, length: i32, writer: &mut impl Write) {
174    rpad_fill(s, length, " ", writer);
175}
176
177/// Extends the given string on the right until it is at least the specified length,
178/// using the specified fill string, truncating the string if it is already longer
179/// than the specified length.
180///
181/// # Example
182///
183/// ```slt
184/// query T
185/// select rpad('hi', 5, 'xy');
186/// ----
187/// hixyx
188///
189/// query T
190/// select rpad('abc', 5, '😀');
191/// ----
192/// abc😀😀
193///
194/// query T
195/// select rpad('abcdef', 3, '0');
196/// ----
197/// abc
198///
199/// query T
200/// select rpad('hi', 5, '');
201/// ----
202/// hi
203/// ```
204#[function("rpad(varchar, int4, varchar) -> varchar")]
205pub fn rpad_fill(s: &str, length: i32, fill: &str, writer: &mut impl Write) {
206    let s_len = s.chars().count();
207    let fill_len = fill.chars().count();
208
209    if length <= 0 {
210        return;
211    }
212
213    if s_len >= length as usize {
214        for c in s.chars().take(length as usize) {
215            write!(writer, "{c}").unwrap();
216        }
217    } else if fill_len == 0 {
218        write!(writer, "{s}").unwrap();
219    } else {
220        write!(writer, "{s}").unwrap();
221        let mut remaining_length = length as usize - s_len;
222        while remaining_length >= fill_len {
223            write!(writer, "{fill}").unwrap();
224            remaining_length -= fill_len;
225        }
226        for c in fill.chars().take(remaining_length) {
227            write!(writer, "{c}").unwrap();
228        }
229    }
230}
231
232/// Reverses the characters in the given string.
233///
234/// # Example
235///
236/// ```slt
237/// query T
238/// select reverse('abcdef');
239/// ----
240/// fedcba
241/// ```
242#[function("reverse(varchar) -> varchar")]
243pub fn reverse(s: &str, writer: &mut impl Write) {
244    for c in s.chars().rev() {
245        write!(writer, "{}", c).unwrap();
246    }
247}
248
249/// Converts the input string to ASCII by dropping accents, assuming that the input string
250/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250).
251///
252/// # Example
253///
254/// ```slt
255/// query T
256/// select to_ascii('Karél');
257/// ----
258/// Karel
259/// ```
260#[function("to_ascii(varchar) -> varchar")]
261pub fn to_ascii(s: &str, writer: &mut impl Write) {
262    for c in s.chars() {
263        let ascii = match c {
264            'Á' | 'À' | 'Â' | 'Ã' => 'A',
265            'á' | 'à' | 'â' | 'ã' => 'a',
266            'Č' | 'Ć' | 'Ç' => 'C',
267            'č' | 'ć' | 'ç' => 'c',
268            'Ď' => 'D',
269            'ď' => 'd',
270            'É' | 'È' | 'Ê' | 'Ẽ' => 'E',
271            'é' | 'è' | 'ê' | 'ẽ' => 'e',
272            'Í' | 'Ì' | 'Î' | 'Ĩ' => 'I',
273            'í' | 'ì' | 'î' | 'ĩ' => 'i',
274            'Ľ' => 'L',
275            'ľ' => 'l',
276            'Ň' => 'N',
277            'ň' => 'n',
278            'Ó' | 'Ò' | 'Ô' | 'Õ' => 'O',
279            'ó' | 'ò' | 'ô' | 'õ' => 'o',
280            'Ŕ' => 'R',
281            'ŕ' => 'r',
282            'Š' | 'Ś' => 'S',
283            'š' | 'ś' => 's',
284            'Ť' => 'T',
285            'ť' => 't',
286            'Ú' | 'Ù' | 'Û' | 'Ũ' => 'U',
287            'ú' | 'ù' | 'û' | 'ũ' => 'u',
288            'Ý' | 'Ỳ' => 'Y',
289            'ý' | 'ỳ' => 'y',
290            'Ž' | 'Ź' | 'Ż' => 'Z',
291            'ž' | 'ź' | 'ż' => 'z',
292            _ => c,
293        };
294        write!(writer, "{}", ascii).unwrap();
295    }
296}
297
298/// Converts the given integer to its equivalent hexadecimal representation.
299///
300/// # Example
301///
302/// ```slt
303/// query T
304/// select to_hex(2147483647);
305/// ----
306/// 7fffffff
307///
308/// query T
309/// select to_hex(-2147483648);
310/// ----
311/// 80000000
312///
313/// query T
314/// select to_hex(9223372036854775807);
315/// ----
316/// 7fffffffffffffff
317///
318/// query T
319/// select to_hex(-9223372036854775808);
320/// ----
321/// 8000000000000000
322/// ```
323#[function("to_hex(int4) -> varchar")]
324pub fn to_hex_i32(n: i32, writer: &mut impl Write) {
325    write!(writer, "{:x}", n).unwrap();
326}
327
328#[function("to_hex(int8) -> varchar")]
329pub fn to_hex_i64(n: i64, writer: &mut impl Write) {
330    write!(writer, "{:x}", n).unwrap();
331}
332
333/// Returns the given string suitably quoted to be used as an identifier in an SQL statement string.
334/// Quotes are added only if necessary (i.e., if the string contains non-identifier characters or
335/// would be case-folded). Embedded quotes are properly doubled.
336///
337/// Refer to <https://github.com/postgres/postgres/blob/90189eefc1e11822794e3386d9bafafd3ba3a6e8/src/backend/utils/adt/ruleutils.c#L11506>
338///
339/// # Example
340///
341/// ```slt
342/// query T
343/// select quote_ident('foo bar')
344/// ----
345/// "foo bar"
346///
347/// query T
348/// select quote_ident('FooBar')
349/// ----
350/// "FooBar"
351///
352/// query T
353/// select quote_ident('foo_bar')
354/// ----
355/// foo_bar
356///
357/// query T
358/// select quote_ident('foo"bar')
359/// ----
360/// "foo""bar"
361///
362/// # FIXME: quote SQL keywords is not supported yet
363/// query T
364/// select quote_ident('select')
365/// ----
366/// select
367/// ```
368#[function("quote_ident(varchar) -> varchar")]
369pub fn quote_ident(s: &str, writer: &mut impl Write) {
370    write!(writer, "{}", QuoteIdent(s)).unwrap();
371}
372
373/// Returns the first n characters in the string.
374/// If n is a negative value, the function will return all but last |n| characters.
375///
376/// # Example
377///
378/// ```slt
379/// query T
380/// select left('RisingWave', 6)
381/// ----
382/// Rising
383///
384/// query T
385/// select left('RisingWave', 42)
386/// ----
387/// RisingWave
388///
389/// query T
390/// select left('RisingWave', 0)
391/// ----
392/// (empty)
393///
394/// query T
395/// select left('RisingWave', -4)
396/// ----
397/// Rising
398///
399/// query T
400/// select left('RisingWave', -2147483648);
401/// ----
402/// (empty)
403/// ```
404#[function("left(varchar, int4) -> varchar")]
405pub fn left(s: &str, n: i32, writer: &mut impl Write) {
406    let n = if n >= 0 {
407        n as usize
408    } else {
409        s.chars().count().saturating_add_signed(n as isize)
410    };
411
412    s.chars()
413        .take(n)
414        .for_each(|c| writer.write_char(c).unwrap());
415}
416
417/// Returns the last n characters in the string.
418/// If n is a negative value, the function will return all but first |n| characters.
419///
420/// # Example
421///
422/// ```slt
423/// query T
424/// select right('RisingWave', 4)
425/// ----
426/// Wave
427///
428/// query T
429/// select left('RisingWave', 42)
430/// ----
431/// RisingWave
432///
433/// query T
434/// select right('RisingWave', 0)
435/// ----
436/// (empty)
437///
438/// query T
439/// select right('RisingWave', -6)
440/// ----
441/// Wave
442///
443/// # PostgreSQL returns the whole string due to an overflow bug, which we do not follow.
444/// query T
445/// select right('RisingWave', -2147483648);
446/// ----
447/// (empty)
448/// ```
449#[function("right(varchar, int4) -> varchar")]
450pub fn right(s: &str, n: i32, writer: &mut impl Write) {
451    let skip = if n >= 0 {
452        s.chars().count().saturating_sub(n as usize)
453    } else {
454        // `n as usize` is signed extended. This is `-n` without overflow.
455        usize::MAX - (n as usize) + 1
456    };
457
458    s.chars()
459        .skip(skip)
460        .for_each(|c| writer.write_char(c).unwrap());
461}
462
463/// `quote_literal(string text)`
464/// `quote_literal(value anyelement)`
465///
466/// Return the given string suitably quoted to be used as a string literal in an SQL statement
467/// string. Embedded single-quotes and backslashes are properly doubled.
468/// Note that `quote_literal` returns null on null input; if the argument might be null,
469/// `quote_nullable` is often more suitable.
470///
471/// # Example
472///
473/// Note that the quotes are part of the output string.
474///
475/// ```slt
476/// query T
477/// select quote_literal(E'O\'Reilly')
478/// ----
479/// 'O''Reilly'
480///
481/// query T
482/// select quote_literal(E'C:\\Windows\\')
483/// ----
484/// E'C:\\Windows\\'
485///
486/// query T
487/// select quote_literal(42.5)
488/// ----
489/// '42.5'
490///
491/// query T
492/// select quote_literal('hello'::bytea);
493/// ----
494/// E'\\x68656c6c6f'
495///
496/// query T
497/// select quote_literal('{"hello":"world","foo":233}'::jsonb);
498/// ----
499/// '{"foo": 233, "hello": "world"}'
500/// ```
501#[function("quote_literal(varchar) -> varchar")]
502pub fn quote_literal(s: &str, writer: &mut impl Write) {
503    if s.contains('\\') {
504        // use escape format: E'...'
505        write!(writer, "E").unwrap();
506    }
507    write!(writer, "'").unwrap();
508    for c in s.chars() {
509        match c {
510            '\'' => write!(writer, "''").unwrap(),
511            '\\' => write!(writer, "\\\\").unwrap(),
512            _ => write!(writer, "{}", c).unwrap(),
513        }
514    }
515    write!(writer, "'").unwrap();
516}
517
518/// `quote_nullable(string text)`
519///
520/// Return the given string suitably quoted to be used as a string literal in an SQL statement
521/// string; or, if the argument is null, return NULL.
522/// Embedded single-quotes and backslashes are properly doubled.
523#[function("quote_nullable(varchar) -> varchar")]
524pub fn quote_nullable(s: Option<&str>, writer: &mut impl Write) {
525    match s {
526        Some(s) => quote_literal(s, writer),
527        None => write!(writer, "NULL").unwrap(),
528    }
529}
530
531#[cfg(test)]
532mod tests {
533    use super::*;
534
535    #[test]
536    fn test_left_and_right() {
537        let s = "cxscgccdd";
538        let us = "上海自来水来自海上";
539
540        let cases = [
541            (s, 3, "cxs", "cdd"),
542            (s, -3, "cxscgc", "cgccdd"),
543            (s, 0, "", ""),
544            (s, 15, "cxscgccdd", "cxscgccdd"),
545            // Unicode test
546            (us, 5, "上海自来水", "水来自海上"),
547            (us, -6, "上海自", "自海上"),
548        ];
549
550        for (s, n, left_expected, right_expected) in cases {
551            let mut left_writer = String::new();
552            let mut right_writer = String::new();
553            left(s, n, &mut left_writer);
554            right(s, n, &mut right_writer);
555            assert_eq!(left_writer, left_expected);
556            assert_eq!(right_writer, right_expected);
557        }
558    }
559}