risingwave_expr_impl/scalar/string.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! String functions
16//!
17//! <https://www.postgresql.org/docs/current/functions-string.html>
18
19use std::fmt::Write;
20
21use risingwave_common::util::quote_ident::QuoteIdent;
22use risingwave_expr::function;
23
24/// Returns the character with the specified Unicode code point.
25///
26/// # Example
27///
28/// ```slt
29/// query T
30/// select chr(65);
31/// ----
32/// A
33/// ```
34#[function("chr(int4) -> varchar")]
35pub fn chr(code: i32, writer: &mut impl Write) {
36 if let Some(c) = std::char::from_u32(code as u32) {
37 write!(writer, "{}", c).unwrap();
38 }
39}
40
41/// Returns true if the given string starts with the specified prefix.
42///
43/// # Example
44///
45/// ```slt
46/// query T
47/// select starts_with('abcdef', 'abc');
48/// ----
49/// t
50///
51/// query T
52/// select 'abcdef' ^@ 'abc';
53/// ----
54/// t
55///
56/// query T
57/// select 'abcdef' ^@ some(array['x', 'a', 't']);
58/// ----
59/// t
60/// ```
61#[function("starts_with(varchar, varchar) -> boolean")]
62pub fn starts_with(s: &str, prefix: &str) -> bool {
63 s.starts_with(prefix)
64}
65
66/// Capitalizes the first letter of each word in the given string.
67///
68/// # Example
69///
70/// ```slt
71/// query T
72/// select initcap('the quick brown fox');
73/// ----
74/// The Quick Brown Fox
75/// ```
76#[function("initcap(varchar) -> varchar")]
77pub fn initcap(s: &str, writer: &mut impl Write) {
78 let mut capitalize_next = true;
79 for c in s.chars() {
80 if capitalize_next {
81 write!(writer, "{}", c.to_uppercase()).unwrap();
82 capitalize_next = false;
83 } else {
84 write!(writer, "{}", c.to_lowercase()).unwrap();
85 }
86 if c.is_whitespace() {
87 capitalize_next = true;
88 }
89 }
90}
91
92/// Extends the given string on the left until it is at least the specified length,
93/// using the specified fill character (or a space by default).
94///
95/// # Example
96///
97/// ```slt
98/// query T
99/// select lpad('abc', 5);
100/// ----
101/// abc
102///
103/// query T
104/// select lpad('abcdef', 3);
105/// ----
106/// abc
107/// ```
108#[function("lpad(varchar, int4) -> varchar")]
109pub fn lpad(s: &str, length: i32, writer: &mut impl Write) {
110 lpad_fill(s, length, " ", writer);
111}
112
113/// Extends the string to the specified length by prepending the characters fill.
114/// If the string is already longer than the specified length, it is truncated on the right.
115///
116/// # Example
117///
118/// ```slt
119/// query T
120/// select lpad('hi', 5, 'xy');
121/// ----
122/// xyxhi
123///
124/// query T
125/// select lpad('hi', 5, '');
126/// ----
127/// hi
128/// ```
129#[function("lpad(varchar, int4, varchar) -> varchar")]
130pub fn lpad_fill(s: &str, length: i32, fill: &str, writer: &mut impl Write) {
131 let s_len = s.chars().count();
132 let fill_len = fill.chars().count();
133
134 if length <= 0 {
135 return;
136 }
137 if s_len >= length as usize {
138 for c in s.chars().take(length as usize) {
139 write!(writer, "{c}").unwrap();
140 }
141 } else if fill_len == 0 {
142 write!(writer, "{s}").unwrap();
143 } else {
144 let mut remaining_length = length as usize - s_len;
145 while remaining_length >= fill_len {
146 write!(writer, "{fill}").unwrap();
147 remaining_length -= fill_len;
148 }
149 for c in fill.chars().take(remaining_length) {
150 write!(writer, "{c}").unwrap();
151 }
152 write!(writer, "{s}").unwrap();
153 }
154}
155
156/// Extends the given string on the right until it is at least the specified length,
157/// using the specified fill character (or a space by default).
158///
159/// # Example
160///
161/// ```slt
162/// query T
163/// select rpad('abc', 5);
164/// ----
165/// abc
166///
167/// query T
168/// select rpad('abcdef', 3);
169/// ----
170/// abc
171/// ```
172#[function("rpad(varchar, int4) -> varchar")]
173pub fn rpad(s: &str, length: i32, writer: &mut impl Write) {
174 rpad_fill(s, length, " ", writer);
175}
176
177/// Extends the given string on the right until it is at least the specified length,
178/// using the specified fill string, truncating the string if it is already longer
179/// than the specified length.
180///
181/// # Example
182///
183/// ```slt
184/// query T
185/// select rpad('hi', 5, 'xy');
186/// ----
187/// hixyx
188///
189/// query T
190/// select rpad('abc', 5, '😀');
191/// ----
192/// abc😀😀
193///
194/// query T
195/// select rpad('abcdef', 3, '0');
196/// ----
197/// abc
198///
199/// query T
200/// select rpad('hi', 5, '');
201/// ----
202/// hi
203/// ```
204#[function("rpad(varchar, int4, varchar) -> varchar")]
205pub fn rpad_fill(s: &str, length: i32, fill: &str, writer: &mut impl Write) {
206 let s_len = s.chars().count();
207 let fill_len = fill.chars().count();
208
209 if length <= 0 {
210 return;
211 }
212
213 if s_len >= length as usize {
214 for c in s.chars().take(length as usize) {
215 write!(writer, "{c}").unwrap();
216 }
217 } else if fill_len == 0 {
218 write!(writer, "{s}").unwrap();
219 } else {
220 write!(writer, "{s}").unwrap();
221 let mut remaining_length = length as usize - s_len;
222 while remaining_length >= fill_len {
223 write!(writer, "{fill}").unwrap();
224 remaining_length -= fill_len;
225 }
226 for c in fill.chars().take(remaining_length) {
227 write!(writer, "{c}").unwrap();
228 }
229 }
230}
231
232/// Reverses the characters in the given string.
233///
234/// # Example
235///
236/// ```slt
237/// query T
238/// select reverse('abcdef');
239/// ----
240/// fedcba
241/// ```
242#[function("reverse(varchar) -> varchar")]
243pub fn reverse(s: &str, writer: &mut impl Write) {
244 for c in s.chars().rev() {
245 write!(writer, "{}", c).unwrap();
246 }
247}
248
249/// Converts the input string to ASCII by dropping accents, assuming that the input string
250/// is encoded in one of the supported encodings (Latin1, Latin2, Latin9, or WIN1250).
251///
252/// # Example
253///
254/// ```slt
255/// query T
256/// select to_ascii('Karél');
257/// ----
258/// Karel
259/// ```
260#[function("to_ascii(varchar) -> varchar")]
261pub fn to_ascii(s: &str, writer: &mut impl Write) {
262 for c in s.chars() {
263 let ascii = match c {
264 'Á' | 'À' | 'Â' | 'Ã' => 'A',
265 'á' | 'à' | 'â' | 'ã' => 'a',
266 'Č' | 'Ć' | 'Ç' => 'C',
267 'č' | 'ć' | 'ç' => 'c',
268 'Ď' => 'D',
269 'ď' => 'd',
270 'É' | 'È' | 'Ê' | 'Ẽ' => 'E',
271 'é' | 'è' | 'ê' | 'ẽ' => 'e',
272 'Í' | 'Ì' | 'Î' | 'Ĩ' => 'I',
273 'í' | 'ì' | 'î' | 'ĩ' => 'i',
274 'Ľ' => 'L',
275 'ľ' => 'l',
276 'Ň' => 'N',
277 'ň' => 'n',
278 'Ó' | 'Ò' | 'Ô' | 'Õ' => 'O',
279 'ó' | 'ò' | 'ô' | 'õ' => 'o',
280 'Ŕ' => 'R',
281 'ŕ' => 'r',
282 'Š' | 'Ś' => 'S',
283 'š' | 'ś' => 's',
284 'Ť' => 'T',
285 'ť' => 't',
286 'Ú' | 'Ù' | 'Û' | 'Ũ' => 'U',
287 'ú' | 'ù' | 'û' | 'ũ' => 'u',
288 'Ý' | 'Ỳ' => 'Y',
289 'ý' | 'ỳ' => 'y',
290 'Ž' | 'Ź' | 'Ż' => 'Z',
291 'ž' | 'ź' | 'ż' => 'z',
292 _ => c,
293 };
294 write!(writer, "{}", ascii).unwrap();
295 }
296}
297
298/// Converts the given integer to its equivalent hexadecimal representation.
299///
300/// # Example
301///
302/// ```slt
303/// query T
304/// select to_hex(2147483647);
305/// ----
306/// 7fffffff
307///
308/// query T
309/// select to_hex(-2147483648);
310/// ----
311/// 80000000
312///
313/// query T
314/// select to_hex(9223372036854775807);
315/// ----
316/// 7fffffffffffffff
317///
318/// query T
319/// select to_hex(-9223372036854775808);
320/// ----
321/// 8000000000000000
322/// ```
323#[function("to_hex(int4) -> varchar")]
324pub fn to_hex_i32(n: i32, writer: &mut impl Write) {
325 write!(writer, "{:x}", n).unwrap();
326}
327
328#[function("to_hex(int8) -> varchar")]
329pub fn to_hex_i64(n: i64, writer: &mut impl Write) {
330 write!(writer, "{:x}", n).unwrap();
331}
332
333/// Returns the given string suitably quoted to be used as an identifier in an SQL statement string.
334/// Quotes are added only if necessary (i.e., if the string contains non-identifier characters or
335/// would be case-folded). Embedded quotes are properly doubled.
336///
337/// Refer to <https://github.com/postgres/postgres/blob/90189eefc1e11822794e3386d9bafafd3ba3a6e8/src/backend/utils/adt/ruleutils.c#L11506>
338///
339/// # Example
340///
341/// ```slt
342/// query T
343/// select quote_ident('foo bar')
344/// ----
345/// "foo bar"
346///
347/// query T
348/// select quote_ident('FooBar')
349/// ----
350/// "FooBar"
351///
352/// query T
353/// select quote_ident('foo_bar')
354/// ----
355/// foo_bar
356///
357/// query T
358/// select quote_ident('foo"bar')
359/// ----
360/// "foo""bar"
361///
362/// # FIXME: quote SQL keywords is not supported yet
363/// query T
364/// select quote_ident('select')
365/// ----
366/// select
367/// ```
368#[function("quote_ident(varchar) -> varchar")]
369pub fn quote_ident(s: &str, writer: &mut impl Write) {
370 write!(writer, "{}", QuoteIdent(s)).unwrap();
371}
372
373/// Returns the first n characters in the string.
374/// If n is a negative value, the function will return all but last |n| characters.
375///
376/// # Example
377///
378/// ```slt
379/// query T
380/// select left('RisingWave', 6)
381/// ----
382/// Rising
383///
384/// query T
385/// select left('RisingWave', 42)
386/// ----
387/// RisingWave
388///
389/// query T
390/// select left('RisingWave', 0)
391/// ----
392/// (empty)
393///
394/// query T
395/// select left('RisingWave', -4)
396/// ----
397/// Rising
398///
399/// query T
400/// select left('RisingWave', -2147483648);
401/// ----
402/// (empty)
403/// ```
404#[function("left(varchar, int4) -> varchar")]
405pub fn left(s: &str, n: i32, writer: &mut impl Write) {
406 let n = if n >= 0 {
407 n as usize
408 } else {
409 s.chars().count().saturating_add_signed(n as isize)
410 };
411
412 s.chars()
413 .take(n)
414 .for_each(|c| writer.write_char(c).unwrap());
415}
416
417/// Returns the last n characters in the string.
418/// If n is a negative value, the function will return all but first |n| characters.
419///
420/// # Example
421///
422/// ```slt
423/// query T
424/// select right('RisingWave', 4)
425/// ----
426/// Wave
427///
428/// query T
429/// select left('RisingWave', 42)
430/// ----
431/// RisingWave
432///
433/// query T
434/// select right('RisingWave', 0)
435/// ----
436/// (empty)
437///
438/// query T
439/// select right('RisingWave', -6)
440/// ----
441/// Wave
442///
443/// # PostgreSQL returns the whole string due to an overflow bug, which we do not follow.
444/// query T
445/// select right('RisingWave', -2147483648);
446/// ----
447/// (empty)
448/// ```
449#[function("right(varchar, int4) -> varchar")]
450pub fn right(s: &str, n: i32, writer: &mut impl Write) {
451 let skip = if n >= 0 {
452 s.chars().count().saturating_sub(n as usize)
453 } else {
454 // `n as usize` is signed extended. This is `-n` without overflow.
455 usize::MAX - (n as usize) + 1
456 };
457
458 s.chars()
459 .skip(skip)
460 .for_each(|c| writer.write_char(c).unwrap());
461}
462
463/// `quote_literal(string text)`
464/// `quote_literal(value anyelement)`
465///
466/// Return the given string suitably quoted to be used as a string literal in an SQL statement
467/// string. Embedded single-quotes and backslashes are properly doubled.
468/// Note that `quote_literal` returns null on null input; if the argument might be null,
469/// `quote_nullable` is often more suitable.
470///
471/// # Example
472///
473/// Note that the quotes are part of the output string.
474///
475/// ```slt
476/// query T
477/// select quote_literal(E'O\'Reilly')
478/// ----
479/// 'O''Reilly'
480///
481/// query T
482/// select quote_literal(E'C:\\Windows\\')
483/// ----
484/// E'C:\\Windows\\'
485///
486/// query T
487/// select quote_literal(42.5)
488/// ----
489/// '42.5'
490///
491/// query T
492/// select quote_literal('hello'::bytea);
493/// ----
494/// E'\\x68656c6c6f'
495///
496/// query T
497/// select quote_literal('{"hello":"world","foo":233}'::jsonb);
498/// ----
499/// '{"foo": 233, "hello": "world"}'
500/// ```
501#[function("quote_literal(varchar) -> varchar")]
502pub fn quote_literal(s: &str, writer: &mut impl Write) {
503 if s.contains('\\') {
504 // use escape format: E'...'
505 write!(writer, "E").unwrap();
506 }
507 write!(writer, "'").unwrap();
508 for c in s.chars() {
509 match c {
510 '\'' => write!(writer, "''").unwrap(),
511 '\\' => write!(writer, "\\\\").unwrap(),
512 _ => write!(writer, "{}", c).unwrap(),
513 }
514 }
515 write!(writer, "'").unwrap();
516}
517
518/// `quote_nullable(string text)`
519///
520/// Return the given string suitably quoted to be used as a string literal in an SQL statement
521/// string; or, if the argument is null, return NULL.
522/// Embedded single-quotes and backslashes are properly doubled.
523#[function("quote_nullable(varchar) -> varchar")]
524pub fn quote_nullable(s: Option<&str>, writer: &mut impl Write) {
525 match s {
526 Some(s) => quote_literal(s, writer),
527 None => write!(writer, "NULL").unwrap(),
528 }
529}
530
531#[cfg(test)]
532mod tests {
533 use super::*;
534
535 #[test]
536 fn test_left_and_right() {
537 let s = "cxscgccdd";
538 let us = "上海自来水来自海上";
539
540 let cases = [
541 (s, 3, "cxs", "cdd"),
542 (s, -3, "cxscgc", "cgccdd"),
543 (s, 0, "", ""),
544 (s, 15, "cxscgccdd", "cxscgccdd"),
545 // Unicode test
546 (us, 5, "上海自来水", "水来自海上"),
547 (us, -6, "上海自", "自海上"),
548 ];
549
550 for (s, n, left_expected, right_expected) in cases {
551 let mut left_writer = String::new();
552 let mut right_writer = String::new();
553 left(s, n, &mut left_writer);
554 right(s, n, &mut right_writer);
555 assert_eq!(left_writer, left_expected);
556 assert_eq!(right_writer, right_expected);
557 }
558 }
559}