risingwave_connector/sink/file_sink/
gcs.rs

1// Copyright 2024 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::collections::{BTreeMap, HashMap};
16
17use anyhow::anyhow;
18use opendal::Operator;
19use opendal::layers::{LoggingLayer, RetryLayer};
20use opendal::services::Gcs;
21use serde::Deserialize;
22use serde_with::serde_as;
23use with_options::WithOptions;
24
25use super::opendal_sink::{BatchingStrategy, FileSink};
26use crate::sink::file_sink::opendal_sink::OpendalSinkBackend;
27use crate::sink::{Result, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT, SinkError};
28use crate::source::UnknownFields;
29
30#[derive(Deserialize, Debug, Clone, WithOptions)]
31pub struct GcsCommon {
32    #[serde(rename = "gcs.bucket_name")]
33    pub bucket_name: String,
34
35    /// The base64 encoded credential key. If not set, ADC will be used.
36    #[serde(rename = "gcs.credential")]
37    pub credential: String,
38
39    /// If credential/ADC is not set. The service account can be used to provide the credential info.
40    #[serde(rename = "gcs.service_account", default)]
41    pub service_account: String,
42
43    /// The directory where the sink file is located
44    #[serde(rename = "gcs.path")]
45    pub path: String,
46}
47
48#[serde_as]
49#[derive(Clone, Debug, Deserialize, WithOptions)]
50pub struct GcsConfig {
51    #[serde(flatten)]
52    pub common: GcsCommon,
53
54    #[serde(flatten)]
55    pub batching_strategy: BatchingStrategy,
56
57    pub r#type: String, // accept "append-only"
58
59    #[serde(flatten)]
60    pub unknown_fields: HashMap<String, String>,
61}
62
63impl UnknownFields for GcsConfig {
64    fn unknown_fields(&self) -> HashMap<String, String> {
65        self.unknown_fields.clone()
66    }
67}
68
69pub const GCS_SINK: &str = "gcs";
70
71impl<S: OpendalSinkBackend> FileSink<S> {
72    pub fn new_gcs_sink(config: GcsConfig) -> Result<Operator> {
73        // Create gcs builder.
74        let builder = Gcs::default()
75            .bucket(&config.common.bucket_name)
76            .credential(&config.common.credential)
77            .service_account(&config.common.service_account);
78
79        let operator: Operator = Operator::new(builder)?
80            .layer(LoggingLayer::default())
81            .layer(RetryLayer::default())
82            .finish();
83        Ok(operator)
84    }
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq)]
88pub struct GcsSink;
89
90impl OpendalSinkBackend for GcsSink {
91    type Properties = GcsConfig;
92
93    const SINK_NAME: &'static str = GCS_SINK;
94
95    fn from_btreemap(btree_map: BTreeMap<String, String>) -> Result<Self::Properties> {
96        let config = serde_json::from_value::<GcsConfig>(serde_json::to_value(btree_map).unwrap())
97            .map_err(|e| SinkError::Config(anyhow!(e)))?;
98        if config.r#type != SINK_TYPE_APPEND_ONLY && config.r#type != SINK_TYPE_UPSERT {
99            return Err(SinkError::Config(anyhow!(
100                "`{}` must be {}, or {}",
101                SINK_TYPE_OPTION,
102                SINK_TYPE_APPEND_ONLY,
103                SINK_TYPE_UPSERT
104            )));
105        }
106        Ok(config)
107    }
108
109    fn new_operator(properties: GcsConfig) -> Result<Operator> {
110        FileSink::<GcsSink>::new_gcs_sink(properties)
111    }
112
113    fn get_path(properties: Self::Properties) -> String {
114        properties.common.path
115    }
116
117    fn get_engine_type() -> super::opendal_sink::EngineType {
118        super::opendal_sink::EngineType::Gcs
119    }
120
121    fn get_batching_strategy(properties: Self::Properties) -> BatchingStrategy {
122        BatchingStrategy {
123            max_row_count: properties.batching_strategy.max_row_count,
124            rollover_seconds: properties.batching_strategy.rollover_seconds,
125            path_partition_prefix: properties.batching_strategy.path_partition_prefix,
126        }
127    }
128}