risingwave_connector/sink/file_sink/
fs.rs

1// Copyright 2024 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeMap, HashMap};
16
17use anyhow::anyhow;
18use opendal::Operator;
19use opendal::layers::{LoggingLayer, RetryLayer};
20use opendal::services::Fs;
21use serde::Deserialize;
22use serde_with::serde_as;
23use with_options::WithOptions;
24
25use super::opendal_sink::BatchingStrategy;
26use crate::sink::file_sink::opendal_sink::{FileSink, OpendalSinkBackend};
27use crate::sink::{Result, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, SinkError};
28use crate::source::UnknownFields;
29
30#[derive(Deserialize, Debug, Clone, WithOptions)]
31pub struct FsCommon {
32    /// The directory where the sink file is located.
33    #[serde(rename = "fs.path")]
34    pub path: String,
35}
36
37#[serde_as]
38#[derive(Clone, Debug, Deserialize, WithOptions)]
39pub struct FsConfig {
40    #[serde(flatten)]
41    pub common: FsCommon,
42    #[serde(flatten)]
43    pub batching_strategy: BatchingStrategy,
44
45    pub r#type: String, // accept "append-only"
46
47    #[serde(flatten)]
48    pub unknown_fields: HashMap<String, String>,
49}
50
51impl UnknownFields for FsConfig {
52    fn unknown_fields(&self) -> HashMap<String, String> {
53        self.unknown_fields.clone()
54    }
55}
56
57pub const FS_SINK: &str = "fs";
58
59impl<S: OpendalSinkBackend> FileSink<S> {
60    pub fn new_fs_sink(config: FsConfig) -> Result<Operator> {
61        // Create fs builder.
62        let builder = Fs::default().root(&config.common.path);
63        let operator: Operator = Operator::new(builder)?
64            .layer(LoggingLayer::default())
65            .layer(RetryLayer::default())
66            .finish();
67        Ok(operator)
68    }
69}
70
71#[derive(Debug, Clone, Copy, PartialEq, Eq)]
72pub struct FsSink;
73
74impl OpendalSinkBackend for FsSink {
75    type Properties = FsConfig;
76
77    const SINK_NAME: &'static str = FS_SINK;
78
79    fn from_btreemap(btree_map: BTreeMap<String, String>) -> Result<Self::Properties> {
80        let config = serde_json::from_value::<FsConfig>(serde_json::to_value(btree_map).unwrap())
81            .map_err(|e| SinkError::Config(anyhow!(e)))?;
82        if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
83            return Err(SinkError::Config(anyhow!(
84                "`{}` must be {}, or {}",
85                SINK_TYPE_OPTION,
86                SINK_TYPE_APPEND_ONLY,
87                SINK_TYPE_UPSERT
88            )));
89        }
90        Ok(config)
91    }
92
93    fn new_operator(properties: FsConfig) -> Result<Operator> {
94        FileSink::<FsSink>::new_fs_sink(properties)
95    }
96
97    fn get_path(properties: Self::Properties) -> String {
98        properties.common.path
99    }
100
101    fn get_engine_type() -> super::opendal_sink::EngineType {
102        super::opendal_sink::EngineType::Fs
103    }
104
105    fn get_batching_strategy(properties: Self::Properties) -> BatchingStrategy {
106        BatchingStrategy {
107            max_row_count: properties.batching_strategy.max_row_count,
108            rollover_seconds: properties.batching_strategy.rollover_seconds,
109            path_partition_prefix: properties.batching_strategy.path_partition_prefix,
110        }
111    }
112}