risingwave_connector/parser/avro/confluent_resolver.rs
1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::sync::Arc;
16
17use anyhow::Context;
18use apache_avro::Schema;
19use moka::future::Cache;
20
21use crate::error::ConnectorResult;
22use crate::schema::schema_registry::{Client, ConfluentSchema};
23
24/// Fetch schemas from confluent schema registry and cache them.
25///
26/// Background: This is mainly used for Avro **writer schema** (during schema evolution): When decoding an Avro message,
27/// we must get the message's schema id, and use the *exactly same schema* to decode the message, and then
28/// convert it with the reader schema. (This is also why Avro has to be used with a schema registry instead of a static schema file.)
29///
30/// TODO: support protobuf (not sure if it's needed)
31#[derive(Debug)]
32pub struct ConfluentSchemaCache {
33 writer_schemas: Cache<i32, Arc<Schema>>,
34 confluent_client: Client,
35}
36
37impl ConfluentSchemaCache {
38 async fn parse_and_cache_schema(
39 &self,
40 raw_schema: ConfluentSchema,
41 ) -> ConnectorResult<Arc<Schema>> {
42 let schema =
43 Schema::parse_str(&raw_schema.content).context("failed to parse avro schema")?;
44 let schema = Arc::new(schema);
45 self.writer_schemas
46 .insert(raw_schema.id, Arc::clone(&schema))
47 .await;
48 Ok(schema)
49 }
50
51 /// Create a new `ConfluentSchemaResolver`
52 pub fn new(client: Client) -> Self {
53 ConfluentSchemaCache {
54 writer_schemas: Cache::new(u64::MAX),
55 confluent_client: client,
56 }
57 }
58
59 /// Gets the latest schema by subject name, which is used as *reader schema*.
60 pub async fn get_by_subject(&self, subject_name: &str) -> ConnectorResult<Arc<Schema>> {
61 let raw_schema = self
62 .confluent_client
63 .get_schema_by_subject(subject_name)
64 .await?;
65 self.parse_and_cache_schema(raw_schema).await
66 }
67
68 /// Gets the a specific schema by id, which is used as *writer schema*.
69 pub async fn get_by_id(&self, schema_id: i32) -> ConnectorResult<Arc<Schema>> {
70 // TODO: use `get_with`
71 if let Some(schema) = self.writer_schemas.get(&schema_id).await {
72 Ok(schema)
73 } else {
74 let raw_schema = self.confluent_client.get_schema_by_id(schema_id).await?;
75 self.parse_and_cache_schema(raw_schema).await
76 }
77 }
78}