risingwave_common/array/
vector_array.rs

1// Copyright 2025 RisingWave Labs
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15use std::fmt::Debug;
16
17use risingwave_common_estimate_size::EstimateSize;
18use risingwave_pb::data::PbArray;
19
20use super::{Array, ArrayBuilder, ListArray, ListArrayBuilder, ListRef, ListValue};
21use crate::bitmap::Bitmap;
22use crate::types::{DataType, Scalar, ScalarRef, ToText};
23
24#[derive(Debug, Clone, EstimateSize)]
25pub struct VectorArrayBuilder {
26    inner: ListArrayBuilder,
27    elem_size: usize,
28}
29
30impl ArrayBuilder for VectorArrayBuilder {
31    type ArrayType = VectorArray;
32
33    #[cfg(not(test))]
34    fn new(_capacity: usize) -> Self {
35        panic!("please use `VectorArrayBuilder::with_type` instead");
36    }
37
38    #[cfg(test)]
39    fn new(capacity: usize) -> Self {
40        Self::with_type(capacity, DataType::Vector(3))
41    }
42
43    fn with_type(capacity: usize, ty: DataType) -> Self {
44        let DataType::Vector(elem_size) = ty else {
45            panic!("VectorArrayBuilder only supports Vector type");
46        };
47        Self {
48            inner: ListArrayBuilder::with_type(capacity, DataType::List(DataType::Float32.into())),
49            elem_size,
50        }
51    }
52
53    fn append_n(&mut self, n: usize, value: Option<VectorRef<'_>>) {
54        if let Some(value) = value {
55            assert_eq!(self.elem_size, value.inner.len());
56        }
57        self.inner.append_n(n, value.map(|v| v.inner))
58    }
59
60    fn append_array(&mut self, other: &VectorArray) {
61        assert_eq!(self.elem_size, other.elem_size);
62        self.inner.append_array(&other.inner)
63    }
64
65    fn pop(&mut self) -> Option<()> {
66        self.inner.pop()
67    }
68
69    fn len(&self) -> usize {
70        self.inner.len()
71    }
72
73    fn finish(self) -> VectorArray {
74        VectorArray {
75            inner: self.inner.finish(),
76            elem_size: self.elem_size,
77        }
78    }
79}
80
81#[derive(Debug, Clone)]
82pub struct VectorArray {
83    inner: ListArray,
84    elem_size: usize,
85}
86
87impl EstimateSize for VectorArray {
88    fn estimated_heap_size(&self) -> usize {
89        self.inner.estimated_heap_size()
90    }
91}
92
93impl Array for VectorArray {
94    type Builder = VectorArrayBuilder;
95    type OwnedItem = VectorVal;
96    type RefItem<'a> = VectorRef<'a>;
97
98    unsafe fn raw_value_at_unchecked(&self, idx: usize) -> Self::RefItem<'_> {
99        VectorRef {
100            inner: unsafe { self.inner.raw_value_at_unchecked(idx) },
101        }
102    }
103
104    fn len(&self) -> usize {
105        self.inner.len()
106    }
107
108    fn to_protobuf(&self) -> PbArray {
109        let mut pb_array = self.inner.to_protobuf();
110        pb_array.set_array_type(risingwave_pb::data::PbArrayType::Vector);
111        pb_array.list_array_data.as_mut().unwrap().elem_size = Some(self.elem_size as _);
112        pb_array
113    }
114
115    fn null_bitmap(&self) -> &Bitmap {
116        self.inner.null_bitmap()
117    }
118
119    fn into_null_bitmap(self) -> Bitmap {
120        self.inner.into_null_bitmap()
121    }
122
123    fn set_bitmap(&mut self, bitmap: Bitmap) {
124        self.inner.set_bitmap(bitmap)
125    }
126
127    fn data_type(&self) -> DataType {
128        DataType::Vector(self.elem_size)
129    }
130}
131
132impl VectorArray {
133    pub fn from_protobuf(
134        pb_array: &risingwave_pb::data::PbArray,
135    ) -> super::ArrayResult<super::ArrayImpl> {
136        let inner = ListArray::from_protobuf(pb_array)?.into_list();
137        let elem_size = pb_array
138            .list_array_data
139            .as_ref()
140            .unwrap()
141            .elem_size
142            .unwrap() as _;
143        Ok(Self { inner, elem_size }.into())
144    }
145}
146
147#[derive(Clone, EstimateSize)]
148pub struct VectorVal {
149    inner: ListValue,
150}
151
152impl Debug for VectorVal {
153    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
154        self.as_scalar_ref().fmt(f)
155    }
156}
157
158impl PartialEq for VectorVal {
159    fn eq(&self, _other: &Self) -> bool {
160        todo!("VECTOR_PLACEHOLDER")
161    }
162}
163impl Eq for VectorVal {}
164impl PartialOrd for VectorVal {
165    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
166        Some(self.cmp(other))
167    }
168}
169impl Ord for VectorVal {
170    fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
171        todo!("VECTOR_PLACEHOLDER")
172    }
173}
174
175impl Scalar for VectorVal {
176    type ScalarRefType<'a> = VectorRef<'a>;
177
178    fn as_scalar_ref(&self) -> VectorRef<'_> {
179        VectorRef {
180            inner: self.inner.as_scalar_ref(),
181        }
182    }
183}
184
185impl VectorVal {
186    pub fn from_text(text: &str, size: usize) -> Result<Self, String> {
187        let text = text.trim();
188        let text = text
189            .strip_prefix('[')
190            .ok_or_else(|| "vector must start with [".to_owned())?
191            .strip_suffix(']')
192            .ok_or_else(|| "vector must end with ]".to_owned())?;
193        let inner = text
194            .split(',')
195            .map(|s| {
196                s.trim()
197                    .parse::<f32>()
198                    .map_err(|_| format!("invalid f32: {s}"))
199                    .and_then(|f| {
200                        if f.is_finite() {
201                            Ok(crate::types::F32::from(f))
202                        } else {
203                            Err(format!("{f} not allowed in vector"))
204                        }
205                    })
206            })
207            .collect::<Result<ListValue, _>>()?;
208        if inner.len() != size {
209            return Err(format!("expected {} dimensions, not {}", size, inner.len()));
210        }
211        Ok(Self { inner })
212    }
213
214    /// Create a new vector from inner [`ListValue`].
215    ///
216    /// This is leak of implementation. Prefer [`VectorVal::from_iter`] below.
217    pub fn from_inner(inner: ListValue) -> Self {
218        Self { inner }
219    }
220}
221
222// The `F32` wrapping is unnecessary given nan/inf/-inf are not allowed in vector.
223// There is not going to be `F16` for `halfvec` later; just `f16`.
224// We keep it for now because the inner `List` type contains `PrimitiveArray<F32>`.
225impl FromIterator<crate::types::F32> for VectorVal {
226    fn from_iter<I: IntoIterator<Item = crate::types::F32>>(iter: I) -> Self {
227        let inner = ListValue::from_iter(iter);
228        Self { inner }
229    }
230}
231
232impl FromIterator<f32> for VectorVal {
233    fn from_iter<I: IntoIterator<Item = f32>>(iter: I) -> Self {
234        let inner = ListValue::from_iter(iter.into_iter().map(crate::types::F32::from));
235        Self { inner }
236    }
237}
238
239#[derive(Clone, Copy)]
240pub struct VectorRef<'a> {
241    inner: ListRef<'a>,
242}
243
244impl Debug for VectorRef<'_> {
245    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
246        self.write_with_type(&DataType::Vector(self.into_slice().len()), f)
247    }
248}
249
250impl PartialEq for VectorRef<'_> {
251    fn eq(&self, _other: &Self) -> bool {
252        todo!("VECTOR_PLACEHOLDER")
253    }
254}
255impl Eq for VectorRef<'_> {}
256impl PartialOrd for VectorRef<'_> {
257    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
258        Some(self.cmp(other))
259    }
260}
261impl Ord for VectorRef<'_> {
262    fn cmp(&self, _other: &Self) -> std::cmp::Ordering {
263        todo!("VECTOR_PLACEHOLDER")
264    }
265}
266
267impl ToText for VectorRef<'_> {
268    fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
269        self.write_with_type(&DataType::Vector(self.into_slice().len()), f)
270    }
271
272    fn write_with_type<W: std::fmt::Write>(&self, _ty: &DataType, f: &mut W) -> std::fmt::Result {
273        write!(f, "[")?;
274        for (i, item) in self.inner.iter().enumerate() {
275            if i > 0 {
276                write!(f, ",")?;
277            }
278            item.write_with_type(&DataType::Float32, f)?;
279        }
280        write!(f, "]")
281    }
282}
283
284impl<'a> ScalarRef<'a> for VectorRef<'a> {
285    type ScalarType = VectorVal;
286
287    fn to_owned_scalar(&self) -> VectorVal {
288        VectorVal {
289            inner: self.inner.to_owned_scalar(),
290        }
291    }
292
293    fn hash_scalar<H: std::hash::Hasher>(&self, state: &mut H) {
294        self.inner.hash_scalar(state)
295    }
296}
297
298impl<'a> VectorRef<'a> {
299    /// Get the inner [`ListRef`].
300    ///
301    /// This is leak of implementation. Prefer [`Self::into_slice`] below.
302    pub fn into_inner(self) -> ListRef<'a> {
303        self.inner
304    }
305
306    /// Get the slice of floats in this vector.
307    pub fn into_slice(self) -> &'a [f32] {
308        crate::types::F32::inner_slice(self.inner.as_primitive_slice().unwrap())
309    }
310}