risingwave_connector/parser/avro/confluent_resolver.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
// Copyright 2024 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::sync::Arc;
use anyhow::Context;
use apache_avro::Schema;
use moka::future::Cache;
use crate::error::ConnectorResult;
use crate::schema::schema_registry::{Client, ConfluentSchema};
/// Fetch schemas from confluent schema registry and cache them.
///
/// Background: This is mainly used for Avro **writer schema** (during schema evolution): When decoding an Avro message,
/// we must get the message's schema id, and use the *exactly same schema* to decode the message, and then
/// convert it with the reader schema. (This is also why Avro has to be used with a schema registry instead of a static schema file.)
///
/// TODO: support protobuf (not sure if it's needed)
#[derive(Debug)]
pub struct ConfluentSchemaCache {
writer_schemas: Cache<i32, Arc<Schema>>,
confluent_client: Client,
}
impl ConfluentSchemaCache {
async fn parse_and_cache_schema(
&self,
raw_schema: ConfluentSchema,
) -> ConnectorResult<Arc<Schema>> {
let schema =
Schema::parse_str(&raw_schema.content).context("failed to parse avro schema")?;
let schema = Arc::new(schema);
self.writer_schemas
.insert(raw_schema.id, Arc::clone(&schema))
.await;
Ok(schema)
}
/// Create a new `ConfluentSchemaResolver`
pub fn new(client: Client) -> Self {
ConfluentSchemaCache {
writer_schemas: Cache::new(u64::MAX),
confluent_client: client,
}
}
/// Gets the latest schema by subject name, which is used as *reader schema*.
pub async fn get_by_subject(&self, subject_name: &str) -> ConnectorResult<Arc<Schema>> {
let raw_schema = self
.confluent_client
.get_schema_by_subject(subject_name)
.await?;
self.parse_and_cache_schema(raw_schema).await
}
/// Gets the a specific schema by id, which is used as *writer schema*.
pub async fn get_by_id(&self, schema_id: i32) -> ConnectorResult<Arc<Schema>> {
// TODO: use `get_with`
if let Some(schema) = self.writer_schemas.get(&schema_id).await {
Ok(schema)
} else {
let raw_schema = self.confluent_client.get_schema_by_id(schema_id).await?;
self.parse_and_cache_schema(raw_schema).await
}
}
}