Skip to main content

risingwave_stream/from_proto/
gap_fill.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::HashMap;
16
17use itertools::Itertools;
18use risingwave_common::gap_fill::FillStrategy;
19use risingwave_expr::expr::build_non_strict_from_prost;
20use risingwave_pb::stream_plan::GapFillNode;
21use risingwave_storage::StateStore;
22
23use super::ExecutorBuilder;
24use crate::common::table::state_table::StateTableBuilder;
25use crate::error::StreamResult;
26use crate::executor::{Executor, GapFillExecutor, GapFillExecutorArgs};
27use crate::task::ExecutorParams;
28
29pub struct GapFillExecutorBuilder;
30
31impl_stream_node_body!(GapFill(GapFillNode) => GapFillExecutorBuilder);
32
33impl ExecutorBuilder for GapFillExecutorBuilder {
34    type Node = GapFillNode;
35
36    async fn new_boxed_executor(
37        params: ExecutorParams,
38        node: &GapFillNode,
39        store: impl StateStore,
40    ) -> StreamResult<Executor> {
41        let [input]: [_; 1] = params.input.try_into().unwrap();
42
43        let time_column_index = node.get_time_column_index() as usize;
44
45        // Parse interval from ExprNode
46        let interval_expr_node = node.get_interval()?;
47        let interval_expr =
48            build_non_strict_from_prost(interval_expr_node, params.eval_error_report)?;
49
50        let fill_columns: Vec<usize> = node
51            .get_fill_columns()
52            .iter()
53            .map(|&x| x as usize)
54            .collect();
55
56        let fill_strategies: Vec<FillStrategy> = node
57            .get_fill_strategies()
58            .iter()
59            .map(|s| match s.as_str() {
60                "locf" => Ok(FillStrategy::Locf),
61                "interpolate" => Ok(FillStrategy::Interpolate),
62                "null" => Ok(FillStrategy::Null),
63                _ => anyhow::bail!("unknown fill strategy: {}", s),
64            })
65            .collect::<anyhow::Result<_>>()?;
66
67        let fill_columns_with_strategies: HashMap<usize, FillStrategy> =
68            fill_columns.into_iter().zip_eq(fill_strategies).collect();
69
70        let partition_by_indices: Vec<usize> = node
71            .get_partition_by_indices()
72            .iter()
73            .map(|&x| x as usize)
74            .collect();
75
76        // Hash-distributed state table needs vnodes; singleton (no partition) does not.
77        let vnodes = if partition_by_indices.is_empty() {
78            None
79        } else {
80            Some(std::sync::Arc::new(
81                params
82                    .vnode_bitmap
83                    .expect("vnodes not set for hash-distributed GapFill"),
84            ))
85        };
86
87        let state_table =
88            StateTableBuilder::new(node.get_state_table().as_ref().unwrap(), store, vnodes)
89                .forbid_preload_all_rows()
90                .build()
91                .await;
92
93        let pointer_key_indices: Vec<usize> = (!node.get_pointer_key_indices().is_empty())
94            .then(|| {
95                node.get_pointer_key_indices()
96                    .iter()
97                    .map(|&x| x as usize)
98                    .collect()
99            })
100            .expect("GapFillNode should always carry pointer_key_indices");
101
102        let exec = GapFillExecutor::new(GapFillExecutorArgs {
103            ctx: params.actor_context,
104            input,
105            schema: params.info.schema.clone(),
106            chunk_size: params.config.developer.chunk_size,
107            time_column_index,
108            fill_columns: fill_columns_with_strategies,
109            gap_interval: interval_expr,
110            state_table,
111            partition_by_indices,
112            pointer_key_indices,
113            high_gap_fill_amplification_threshold: params
114                .config
115                .developer
116                .high_gap_fill_amplification_threshold,
117        });
118
119        Ok((params.info, exec).into())
120    }
121}