risingwave_connector/parser/avro/
confluent_resolver.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use anyhow::Context;
18use apache_avro::Schema;
19use moka::future::Cache;
20
21use crate::error::ConnectorResult;
22use crate::schema::schema_registry::{Client, ConfluentSchema};
23
24/// Fetch schemas from confluent schema registry and cache them.
25///
26/// Background: This is mainly used for Avro **writer schema** (during schema evolution): When decoding an Avro message,
27/// we must get the message's schema id, and use the *exactly same schema* to decode the message, and then
28/// convert it with the reader schema. (This is also why Avro has to be used with a schema registry instead of a static schema file.)
29///
30/// TODO: support protobuf (not sure if it's needed)
31#[derive(Debug)]
32pub struct ConfluentSchemaCache {
33    writer_schemas: Cache<i32, Arc<Schema>>,
34    confluent_client: Client,
35}
36
37impl ConfluentSchemaCache {
38    async fn parse_and_cache_schema(
39        &self,
40        raw_schema: ConfluentSchema,
41    ) -> ConnectorResult<Arc<Schema>> {
42        let schema =
43            Schema::parse_str(&raw_schema.content).context("failed to parse avro schema")?;
44        let schema = Arc::new(schema);
45        self.writer_schemas
46            .insert(raw_schema.id, Arc::clone(&schema))
47            .await;
48        Ok(schema)
49    }
50
51    /// Create a new `ConfluentSchemaResolver`
52    pub fn new(client: Client) -> Self {
53        ConfluentSchemaCache {
54            writer_schemas: Cache::new(u64::MAX),
55            confluent_client: client,
56        }
57    }
58
59    /// Gets the latest schema by subject name, which is used as *reader schema*.
60    pub async fn get_by_subject(&self, subject_name: &str) -> ConnectorResult<Arc<Schema>> {
61        let raw_schema = self
62            .confluent_client
63            .get_schema_by_subject(subject_name)
64            .await?;
65        self.parse_and_cache_schema(raw_schema).await
66    }
67
68    /// Gets the a specific schema by id, which is used as *writer schema*.
69    pub async fn get_by_id(&self, schema_id: i32) -> ConnectorResult<Arc<Schema>> {
70        // TODO: use `get_with`
71        if let Some(schema) = self.writer_schemas.get(&schema_id).await {
72            Ok(schema)
73        } else {
74            let raw_schema = self.confluent_client.get_schema_by_id(schema_id).await?;
75            self.parse_and_cache_schema(raw_schema).await
76        }
77    }
78}