risingwave_expr/expr/mod.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Expressions in RisingWave.
16//!
17//! All expressions are implemented under the [`Expression`] trait.
18//!
19//! ## Construction
20//!
21//! Expressions can be constructed by [`build_func()`] function, which returns a
22//! [`BoxedExpression`].
23//!
24//! They can also be transformed from the prost [`ExprNode`] using the [`build_from_prost()`]
25//! function.
26//!
27//! ## Evaluation
28//!
29//! Expressions can be evaluated using the [`eval`] function.
30//!
31//! [`ExprNode`]: risingwave_pb::expr::ExprNode
32//! [`eval`]: Expression::eval
33
34// These modules define concrete expression structures.
35mod and_or;
36mod expr_input_ref;
37mod expr_literal;
38mod expr_some_all;
39pub(crate) mod expr_udf;
40pub(crate) mod wrapper;
41
42mod build;
43pub mod test_utils;
44mod value;
45
46use futures_util::TryFutureExt;
47use risingwave_common::array::{ArrayRef, DataChunk};
48use risingwave_common::row::OwnedRow;
49use risingwave_common::types::{DataType, Datum};
50
51pub use self::build::*;
52pub use self::expr_input_ref::InputRefExpression;
53pub use self::expr_literal::LiteralExpression;
54pub use self::value::{ValueImpl, ValueRef};
55pub use self::wrapper::*;
56pub use super::{ExprError, Result};
57
58/// Interface of an expression.
59///
60/// There're two functions to evaluate an expression: `eval` and `eval_v2`, exactly one of them
61/// should be implemented. Prefer calling and implementing `eval_v2` instead of `eval` if possible,
62/// to gain the performance benefit of scalar expression.
63#[async_trait::async_trait]
64#[auto_impl::auto_impl(&, Box)]
65pub trait Expression: std::fmt::Debug + Sync + Send {
66 /// Get the return data type.
67 fn return_type(&self) -> DataType;
68
69 /// Evaluate the expression in vectorized execution. Returns an array.
70 ///
71 /// The default implementation calls `eval_v2` and always converts the result to an array.
72 async fn eval(&self, input: &DataChunk) -> Result<ArrayRef> {
73 let value = self.eval_v2(input).await?;
74 Ok(match value {
75 ValueImpl::Array(array) => array,
76 ValueImpl::Scalar { value, capacity } => {
77 let mut builder = self.return_type().create_array_builder(capacity);
78 builder.append_n(capacity, value);
79 builder.finish().into()
80 }
81 })
82 }
83
84 /// Evaluate the expression in vectorized execution. Returns a value that can be either an
85 /// array, or a scalar if all values in the array are the same.
86 ///
87 /// The default implementation calls `eval` and puts the result into the `Array` variant.
88 async fn eval_v2(&self, input: &DataChunk) -> Result<ValueImpl> {
89 self.eval(input).map_ok(ValueImpl::Array).await
90 }
91
92 /// Evaluate the expression in row-based execution. Returns a nullable scalar.
93 async fn eval_row(&self, input: &OwnedRow) -> Result<Datum>;
94
95 /// Evaluate if the expression is constant.
96 fn eval_const(&self) -> Result<Datum> {
97 Err(ExprError::NotConstant)
98 }
99
100 /// Get the index if the expression is an `InputRef`.
101 fn input_ref_index(&self) -> Option<usize> {
102 None
103 }
104}
105
106/// An owned dynamically typed [`Expression`].
107pub type BoxedExpression = Box<dyn Expression>;
108
109/// Extension trait for boxing expressions.
110///
111/// This is not directly made into [`Expression`] trait because...
112/// - an expression does not have to be `'static`,
113/// - and for the ease of `auto_impl`.
114#[easy_ext::ext(ExpressionBoxExt)]
115impl<E: Expression + 'static> E {
116 /// Wrap the expression in a Box.
117 pub fn boxed(self) -> BoxedExpression {
118 Box::new(self)
119 }
120}
121
122/// An type-safe wrapper that indicates the inner expression can be evaluated in a non-strict
123/// manner, i.e., developers can directly call `eval_infallible` and `eval_row_infallible` without
124/// checking the result.
125///
126/// This is usually created by non-strict build functions like [`crate::expr::build_non_strict_from_prost`]
127/// and [`crate::expr::build_func_non_strict`]. It can also be created directly by
128/// [`NonStrictExpression::new_topmost`], where only the evaluation of the topmost level expression
129/// node is non-strict and should be treated as a TODO.
130///
131/// Compared to [`crate::expr::wrapper::non_strict::NonStrict`], this is more like an indicator
132/// applied on the root of an expression tree, while the latter is a wrapper that can be applied on
133/// each node of the tree and actually changes the behavior. As a result, [`NonStrictExpression`]
134/// does not implement [`Expression`] trait and instead deals directly with developers.
135#[derive(Debug)]
136pub struct NonStrictExpression<E = BoxedExpression>(E);
137
138impl<E> NonStrictExpression<E>
139where
140 E: Expression,
141{
142 /// Create a non-strict expression directly wrapping the given expression.
143 ///
144 /// Should only be used in tests as evaluation may panic.
145 pub fn for_test(inner: E) -> NonStrictExpression
146 where
147 E: 'static,
148 {
149 NonStrictExpression(inner.boxed())
150 }
151
152 /// Create a non-strict expression from the given expression, where only the evaluation of the
153 /// topmost level expression node is non-strict (which is subtly different from
154 /// [`crate::expr::build_non_strict_from_prost`] where every node is non-strict).
155 ///
156 /// This should be used as a TODO.
157 pub fn new_topmost(
158 inner: E,
159 error_report: impl EvalErrorReport,
160 ) -> NonStrictExpression<impl Expression> {
161 let inner = wrapper::non_strict::NonStrict::new(inner, error_report);
162 NonStrictExpression(inner)
163 }
164
165 /// Get the return data type.
166 pub fn return_type(&self) -> DataType {
167 self.0.return_type()
168 }
169
170 /// Evaluate the expression in vectorized execution and assert it succeeds. Returns an array.
171 ///
172 /// Use with expressions built in non-strict mode.
173 pub async fn eval_infallible(&self, input: &DataChunk) -> ArrayRef {
174 self.0.eval(input).await.expect("evaluation failed")
175 }
176
177 /// Evaluate the expression in row-based execution and assert it succeeds. Returns a nullable
178 /// scalar.
179 ///
180 /// Use with expressions built in non-strict mode.
181 pub async fn eval_row_infallible(&self, input: &OwnedRow) -> Datum {
182 self.0.eval_row(input).await.expect("evaluation failed")
183 }
184
185 /// Unwrap the inner expression.
186 pub fn into_inner(self) -> E {
187 self.0
188 }
189
190 /// Get a reference to the inner expression.
191 pub fn inner(&self) -> &E {
192 &self.0
193 }
194}
195
196/// An optional context that can be used in a function.
197///
198/// # Example
199/// ```ignore
200/// #[function("foo(int4) -> int8")]
201/// fn foo(a: i32, ctx: &Context) -> i64 {
202/// assert_eq!(ctx.arg_types[0], DataType::Int32);
203/// assert_eq!(ctx.return_type, DataType::Int64);
204/// // ...
205/// }
206/// ```
207#[derive(Debug)]
208pub struct Context {
209 pub arg_types: Vec<DataType>,
210 pub return_type: DataType,
211 /// Whether the function is variadic.
212 pub variadic: bool,
213}