risingwave_expr_impl/table_function/regexp_matches.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use risingwave_common::array::{ListValue, Utf8Array};
16use risingwave_expr::function;
17
18use crate::scalar::regexp::RegexpContext;
19
20#[function(
21 "regexp_matches(varchar, varchar) -> setof varchar[]",
22 prebuild = "RegexpContext::from_pattern($1)?"
23)]
24#[function(
25 "regexp_matches(varchar, varchar, varchar) -> setof varchar[]",
26 prebuild = "RegexpContext::from_pattern_flags($1, $2)?"
27)]
28fn regexp_matches<'a>(
29 text: &'a str,
30 regex: &'a RegexpContext,
31) -> impl Iterator<Item = ListValue> + 'a {
32 regex.regex.captures_iter(text).map(|capture| {
33 // If there are multiple captures, then the first one is the whole match, and should be
34 // ignored in PostgreSQL's behavior.
35 let skip_flag = regex.regex.captures_len() > 1;
36 let list = capture
37 .unwrap()
38 .iter()
39 .skip(if skip_flag { 1 } else { 0 })
40 .map(|mat| mat.map(|m| m.as_str()))
41 .collect::<Utf8Array>();
42 ListValue::new(list.into())
43 })
44}