risingwave_common::array::data_chunk

Struct DataChunk

source
pub struct DataChunk {
    columns: Arc<[ArrayRef]>,
    visibility: Bitmap,
}
Expand description

DataChunk is a collection of Columns, a with visibility mask for each row. For instance, we could have a DataChunk of this format.

v1v2v3
1at
2bf
3ct
4df

Our columns are v1, v2, v3. Then, if the Visibility Mask hides rows 2 and 4, We will only have these rows visible:

v1v2v3
1at
3ct

Fields§

§columns: Arc<[ArrayRef]>§visibility: Bitmap

Implementations§

source§

impl DataChunk

source

pub(crate) const PRETTY_TABLE_PRESET: &'static str = "||--+-++| ++++++"

source

pub fn new(columns: Vec<ArrayRef>, visibility: impl Into<Bitmap>) -> Self

Create a DataChunk with columns and visibility.

The visibility can either be a Bitmap or a simple cardinality number.

source

pub fn new_dummy(cardinality: usize) -> Self

new_dummy creates a data chunk without columns but only a cardinality.

source

pub fn from_rows(rows: &[impl Row], data_types: &[DataType]) -> Self

Build a DataChunk with rows.

Panics if the rows is empty.

Should prefer using DataChunkBuilder instead to avoid unnecessary allocation of rows.

source

pub fn next_visible_row_idx(&self, row_idx: usize) -> Option<usize>

Return the next visible row index on or after row_idx.

source

pub fn into_parts(self) -> (Vec<ArrayRef>, Bitmap)

source

pub fn into_parts_v2(self) -> (Arc<[ArrayRef]>, Bitmap)

source

pub fn from_parts(columns: Arc<[ArrayRef]>, visibilities: Bitmap) -> Self

source

pub fn dimension(&self) -> usize

source

pub fn cardinality(&self) -> usize

cardinality returns the number of visible tuples

source

pub fn capacity(&self) -> usize

capacity returns physical length of any chunk column

source

pub fn selectivity(&self) -> f64

source

pub fn with_visibility(&self, visibility: impl Into<Bitmap>) -> Self

source

pub fn visibility(&self) -> &Bitmap

source

pub fn set_visibility(&mut self, visibility: Bitmap)

source

pub fn is_compacted(&self) -> bool

source

pub fn column_at(&self, idx: usize) -> &ArrayRef

source

pub fn columns(&self) -> &[ArrayRef]

source

pub fn data_types(&self) -> Vec<DataType>

Returns the data types of all columns.

source

pub fn split_column_at(&self, idx: usize) -> (Self, Self)

Divides one chunk into two at an column index.

§Panics

Panics if idx > columns.len().

source

pub fn to_protobuf(&self) -> PbDataChunk

source

pub fn compact(self) -> Self

compact will convert the chunk to compact format. Compacting removes the hidden rows, and returns a new visibility mask which indicates this.

compact has trade-offs:

Cost: It has to rebuild the each column, meaning it will incur cost of copying over bytes from the original column array to the new one.

Benefit: The main benefit is that the data chunk is smaller, taking up less memory. We can also save the cost of iterating over many hidden rows.

source

pub fn uncompact(self, vis: Bitmap) -> Self

Scatter a compacted chunk to a new chunk with the given visibility.

source

pub fn compact_cow(&self) -> Cow<'_, Self>

Convert the chunk to compact format.

If the chunk is not compacted, return a new compacted chunk, otherwise return a reference to self.

source

pub fn from_protobuf(proto: &PbDataChunk) -> ArrayResult<Self>

source

pub fn rechunk( chunks: &[DataChunk], each_size_limit: usize, ) -> ArrayResult<Vec<DataChunk>>

rechunk creates a new vector of data chunk whose size is each_size_limit. When the total cardinality of all the chunks is not evenly divided by the each_size_limit, the last new chunk will be the remainder.

source

pub fn get_hash_values<H: BuildHasher>( &self, column_idxes: &[usize], hasher_builder: H, ) -> Vec<HashCode<H>>

Compute hash values for each row. The number of the returning HashCodes is self.capacity(). When skip_invisible_row is true, the HashCode for the invisible rows is arbitrary.

source

pub fn row_at(&self, pos: usize) -> (RowRef<'_>, bool)

Random access a tuple in a data chunk. Return in a row format.

§Arguments
  • pos - Index of look up tuple
  • RowRef - Reference of data tuple
  • bool - whether this tuple is visible
source

pub fn row_at_unchecked_vis(&self, pos: usize) -> RowRef<'_>

Random access a tuple in a data chunk. Return in a row format. Note that this function do not return whether the row is visible.

§Arguments
  • pos - Index of look up tuple
source

pub fn to_pretty(&self) -> impl Display

Returns a table-like text representation of the DataChunk.

source

pub fn keep_columns(&self, column_indices: &[usize]) -> Self

Keep the specified columns and set the rest elements to null.

§Example
i i i                            i i i
1 2 3  --> keep_columns([1]) --> . 2 .
4 5 6                            . 5 .
source

pub fn project(&self, indices: &[usize]) -> Self

Reorder (and possibly remove) columns.

e.g. if indices is [2, 1, 0], and the chunk contains column [a, b, c], then the output will be [c, b, a]. If indices is [2, 0], then the output will be [c, a]. If the input mapping is identity mapping, no reorder will be performed.

source

pub fn project_with_vis(&self, indices: &[usize], visibility: Bitmap) -> Self

Reorder columns and set visibility.

source

pub fn reorder_rows(&self, indexes: &[usize]) -> Self

Reorder rows by indexes.

source

fn partition_sizes(&self) -> (usize, Vec<&ArrayRef>)

§Partition fixed size datums and variable length ones.

In some cases, we have fixed size for the entire column, when the datatypes are fixed size or the datums are constants. As such we can compute the size for it just once for the column.

Otherwise, for variable sized datatypes, such as varchar, we have to individually compute their sizes per row.

source

unsafe fn compute_size_of_variable_cols_in_row( variable_cols: &[&ArrayRef], row_idx: usize, ) -> usize

source

unsafe fn init_buffer( row_len_fixed: usize, variable_cols: &[&ArrayRef], row_idx: usize, ) -> Vec<u8>

source

pub fn serialize(&self) -> Vec<Bytes>

Serialize each row into value encoding bytes.

The returned vector’s size is self.capacity() and for the invisible row will give a empty bytes.

source

pub fn serialize_with(&self, serializer: &impl ValueRowSerializer) -> Vec<Bytes>

Serialize each row into bytes with given serializer.

This is similar to serialize but it uses a custom serializer. Prefer serialize if possible since it might be more efficient due to columnar operations.

source

pub fn estimate_value_encoding_size(&self, column_indices: &[usize]) -> usize

Estimate size of hash keys. Their indices in a row are indicated by column_indices. Size here refers to the number of u8s required to store the serialized datum.

source§

impl DataChunk

source

pub fn rows(&self) -> DataChunkRefIter<'_>

Get an iterator for visible rows.

source

pub fn rows_in(&self, range: Range<usize>) -> DataChunkRefIter<'_>

Get an iterator for visible rows in range.

source

pub fn rows_with_holes(&self) -> DataChunkRefIterWithHoles<'_>

Get an iterator for all rows in the chunk, and a None represents an invisible row.

Trait Implementations§

source§

impl Clone for DataChunk

source§

fn clone(&self) -> DataChunk

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl DataChunkTestExt for DataChunk

source§

fn from_pretty(s: &str) -> Self

Parse a chunk from string. Read more
source§

fn with_invisible_holes(self) -> Self
where Self: Sized,

Insert one invisible hole after every record.
source§

fn assert_valid(&self)

Panic if the chunk is invalid.
source§

fn gen_data_chunk( chunk_offset: usize, chunk_size: usize, data_types: &[DataType], varchar_properties: &VarcharProperty, visibility_percent: f64, ) -> Self

Generate data chunk when supplied with chunk_size and column data types.
source§

fn gen_data_chunks( num_of_chunks: usize, chunk_size: usize, data_types: &[DataType], varchar_properties: &VarcharProperty, visibility_percent: f64, ) -> Vec<Self>

Generate data chunks when supplied with chunk_size and column data types.
source§

const SEED: u64 = 18_403_958_415_801_939_711u64

SEED for generating data chunk.
source§

impl Debug for DataChunk

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl EstimateSize for DataChunk

source§

fn estimated_heap_size(&self) -> usize

The estimated heap size of the current struct in bytes.
source§

fn estimated_size(&self) -> usize
where Self: Sized,

The estimated total size of the current struct in bytes, including the estimated_heap_size and the size of Self.
source§

impl<'a> From<&'a StructArray> for DataChunk

source§

fn from(array: &'a StructArray) -> Self

Converts to this type from the input type.
source§

impl From<DataChunk> for StreamChunk

StreamChunk can be created from DataChunk with all operations set to Insert.

source§

fn from(data: DataChunk) -> Self

Converts to this type from the input type.
source§

impl From<DataChunk> for StructArray

source§

fn from(chunk: DataChunk) -> Self

Converts to this type from the input type.
source§

impl PartialEq for DataChunk

source§

fn eq(&self, other: &DataChunk) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl __ExtTrait9496273680842896069 for DataChunk

source§

fn estimate_hash_key_sizes(&self, column_indices: &[usize]) -> Vec<usize>

source§

impl StructuralPartialEq for DataChunk

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> CloneToUninit for T
where T: Clone,

source§

unsafe fn clone_to_uninit(&self, dst: *mut T)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dst. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

§

impl<T> FromRef<T> for T
where T: Clone,

§

fn from_ref(input: &T) -> T

Converts to this type from a reference to the input type.
§

impl<T> FutureExt for T

§

fn with_context(self, otel_cx: Context) -> WithContext<Self>

Attaches the provided Context to this type, returning a WithContext wrapper. Read more
§

fn with_current_context(self) -> WithContext<Self>

Attaches the current Context to this type, returning a WithContext wrapper. Read more
§

impl<T> Instrument for T

§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided [Span], returning an Instrumented wrapper. Read more
§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

impl<T> IntoRequest<T> for T

source§

fn into_request(self) -> Request<T>

Wrap the input message T in a tonic::Request
§

impl<T> IntoResult<T> for T

§

type Err = Infallible

§

fn into_result(self) -> Result<T, <T as IntoResult<T>>::Err>

source§

impl<M> MetricVecRelabelExt for M

source§

fn relabel( self, metric_level: MetricLevel, relabel_threshold: MetricLevel, ) -> RelabeledMetricVec<M>

source§

fn relabel_n( self, metric_level: MetricLevel, relabel_threshold: MetricLevel, relabel_num: usize, ) -> RelabeledMetricVec<M>

source§

fn relabel_debug_1( self, relabel_threshold: MetricLevel, ) -> RelabeledMetricVec<M>

Equivalent to RelabeledMetricVec::with_metric_level_relabel_n with metric_level set to MetricLevel::Debug and relabel_num set to 1.
§

impl<T> Pointable for T

§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T> Same for T

source§

type Output = T

Should always be Self
source§

impl<T> ToOwned for T
where T: Clone,

source§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

source§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

§

fn vzip(self) -> V

§

impl<T> WithSubscriber for T

§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a [WithDispatch] wrapper. Read more
§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a [WithDispatch] wrapper. Read more
source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

§

impl<T> Allocation for T
where T: RefUnwindSafe + Send + Sync,

§

impl<T> DevConfig for T
where T: Send + Sync + 'static + Debug,

source§

impl<T> LruValue for T
where T: Send + Sync,

§

impl<T> Value for T
where T: Send + Sync + 'static,