Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

use std::ops::Not;

use arrow_buffer::bit_chunk_iterator::BitChunks;
use arrow_buffer::bit_chunk_iterator::{BitChunks, UnalignedBitChunk};
use bitvec::view::BitView;

use crate::bit::{get_bit_unchecked, ops, set_bit_unchecked, unset_bit_unchecked};
Expand Down Expand Up @@ -495,6 +495,21 @@ impl BitBufferMut {
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.buffer.as_mut_ptr()
}

/// Access chunks of the buffer aligned to 8 byte boundary as [prefix, \<full chunks\>, suffix]
pub fn unaligned_chunks(&self) -> UnalignedBitChunk<'_> {
UnalignedBitChunk::new(self.buffer.as_slice(), self.offset, self.len)
}

/// Get the number of set bits in the buffer.
pub fn true_count(&self) -> usize {
self.unaligned_chunks().count_ones()
}

/// Get the number of unset bits in the buffer.
pub fn false_count(&self) -> usize {
self.len - self.true_count()
}
}

impl Default for BitBufferMut {
Expand Down
14 changes: 14 additions & 0 deletions vortex-compute/src/arrow/list.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use arrow_array::ArrayRef;
use vortex_error::VortexResult;
use vortex_vector::listview::ListViewVector;

use crate::arrow::IntoArrow;

impl IntoArrow<ArrayRef> for ListViewVector {
fn into_arrow(self) -> VortexResult<ArrayRef> {
todo!("Figure out how to do this")
}
}
1 change: 1 addition & 0 deletions vortex-compute/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ mod binaryview;
mod bool;
mod decimal;
mod fixed_size_list;
mod list;
mod mask;
mod null;
mod primitive;
Expand Down
34 changes: 22 additions & 12 deletions vortex-compute/src/mask/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use vortex_vector::binaryview::{BinaryViewType, BinaryViewVector};
use vortex_vector::bool::BoolVector;
use vortex_vector::decimal::{DVector, DecimalVector};
use vortex_vector::fixed_size_list::FixedSizeListVector;
use vortex_vector::listview::ListViewVector;
use vortex_vector::null::NullVector;
use vortex_vector::primitive::{PVector, PrimitiveVector};
use vortex_vector::struct_::StructVector;
Expand Down Expand Up @@ -46,31 +47,31 @@ impl MaskValidity for BoolVector {
}
}

impl MaskValidity for DecimalVector {
impl MaskValidity for PrimitiveVector {
fn mask_validity(self, mask: &Mask) -> Self {
match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
}
}

impl<D: NativeDecimalType> MaskValidity for DVector<D> {
impl<T: NativePType> MaskValidity for PVector<T> {
fn mask_validity(self, mask: &Mask) -> Self {
let (ps, elements, validity) = self.into_parts();
// SAFETY: we are preserving the original elements buffer and only modifying the validity.
unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) }
let (data, validity) = self.into_parts();
// SAFETY: we are preserving the original data buffer and only modifying the validity.
unsafe { Self::new_unchecked(data, validity.bitand(mask)) }
}
}

impl MaskValidity for PrimitiveVector {
impl MaskValidity for DecimalVector {
fn mask_validity(self, mask: &Mask) -> Self {
match_each_pvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
match_each_dvector!(self, |v| { MaskValidity::mask_validity(v, mask).into() })
}
}

impl<T: NativePType> MaskValidity for PVector<T> {
impl<D: NativeDecimalType> MaskValidity for DVector<D> {
fn mask_validity(self, mask: &Mask) -> Self {
let (data, validity) = self.into_parts();
// SAFETY: we are preserving the original data buffer and only modifying the validity.
unsafe { Self::new_unchecked(data, validity.bitand(mask)) }
let (ps, elements, validity) = self.into_parts();
// SAFETY: we are preserving the original elements buffer and only modifying the validity.
unsafe { Self::new_unchecked(ps, elements, validity.bitand(mask)) }
}
}

Expand All @@ -82,6 +83,15 @@ impl<T: BinaryViewType> MaskValidity for BinaryViewVector<T> {
}
}

impl MaskValidity for ListViewVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (elements, offsets, sizes, validity) = self.into_parts();
// SAFETY: we are preserving the original elements and `list_size`, only modifying the
// validity.
unsafe { Self::new_unchecked(elements, offsets, sizes, validity.bitand(mask)) }
}
}

impl MaskValidity for FixedSizeListVector {
fn mask_validity(self, mask: &Mask) -> Self {
let (elements, list_size, validity) = self.into_parts();
Expand Down
174 changes: 87 additions & 87 deletions vortex-mask/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,93 +123,6 @@ pub struct MaskValues {
density: f64,
}

impl MaskValues {
/// Returns the length of the mask.
#[inline]
pub fn len(&self) -> usize {
self.buffer.len()
}

/// Returns true if the mask is empty i.e., it's length is 0.
#[inline]
pub fn is_empty(&self) -> bool {
self.buffer.is_empty()
}

/// Returns the true count of the mask.
#[inline]
pub fn true_count(&self) -> usize {
self.true_count
}

/// Returns the boolean buffer representation of the mask.
#[inline]
pub fn bit_buffer(&self) -> &BitBuffer {
&self.buffer
}

/// Returns the boolean value at a given index.
#[inline]
pub fn value(&self, index: usize) -> bool {
self.buffer.value(index)
}

/// Constructs an indices vector from one of the other representations.
pub fn indices(&self) -> &[usize] {
self.indices.get_or_init(|| {
if self.true_count == 0 {
return vec![];
}

if self.true_count == self.len() {
return (0..self.len()).collect();
}

if let Some(slices) = self.slices.get() {
let mut indices = Vec::with_capacity(self.true_count);
indices.extend(slices.iter().flat_map(|(start, end)| *start..*end));
debug_assert!(indices.is_sorted());
assert_eq!(indices.len(), self.true_count);
return indices;
}

let mut indices = Vec::with_capacity(self.true_count);
indices.extend(self.buffer.set_indices());
debug_assert!(indices.is_sorted());
assert_eq!(indices.len(), self.true_count);
indices
})
}

/// Constructs a slices vector from one of the other representations.
#[allow(clippy::cast_possible_truncation)]
#[inline]
pub fn slices(&self) -> &[(usize, usize)] {
self.slices.get_or_init(|| {
if self.true_count == self.len() {
return vec![(0, self.len())];
}

self.buffer.set_slices().collect()
})
}

/// Return an iterator over either indices or slices of the mask based on a density threshold.
#[inline]
pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> {
if self.density >= threshold {
MaskIter::Slices(self.slices())
} else {
MaskIter::Indices(self.indices())
}
}

/// Extracts the internal [`BitBuffer`].
pub(crate) fn into_buffer(self) -> BitBuffer {
self.buffer
}
}

impl Mask {
/// Create a new Mask where all values are set.
#[inline]
Expand Down Expand Up @@ -642,6 +555,93 @@ impl Mask {
}
}

impl MaskValues {
/// Returns the length of the mask.
#[inline]
pub fn len(&self) -> usize {
self.buffer.len()
}

/// Returns true if the mask is empty i.e., it's length is 0.
#[inline]
pub fn is_empty(&self) -> bool {
self.buffer.is_empty()
}

/// Returns the true count of the mask.
#[inline]
pub fn true_count(&self) -> usize {
self.true_count
}

/// Returns the boolean buffer representation of the mask.
#[inline]
pub fn bit_buffer(&self) -> &BitBuffer {
&self.buffer
}

/// Returns the boolean value at a given index.
#[inline]
pub fn value(&self, index: usize) -> bool {
self.buffer.value(index)
}

/// Constructs an indices vector from one of the other representations.
pub fn indices(&self) -> &[usize] {
self.indices.get_or_init(|| {
if self.true_count == 0 {
return vec![];
}

if self.true_count == self.len() {
return (0..self.len()).collect();
}

if let Some(slices) = self.slices.get() {
let mut indices = Vec::with_capacity(self.true_count);
indices.extend(slices.iter().flat_map(|(start, end)| *start..*end));
debug_assert!(indices.is_sorted());
assert_eq!(indices.len(), self.true_count);
return indices;
}

let mut indices = Vec::with_capacity(self.true_count);
indices.extend(self.buffer.set_indices());
debug_assert!(indices.is_sorted());
assert_eq!(indices.len(), self.true_count);
indices
})
}

/// Constructs a slices vector from one of the other representations.
#[allow(clippy::cast_possible_truncation)]
#[inline]
pub fn slices(&self) -> &[(usize, usize)] {
self.slices.get_or_init(|| {
if self.true_count == self.len() {
return vec![(0, self.len())];
}

self.buffer.set_slices().collect()
})
}

/// Return an iterator over either indices or slices of the mask based on a density threshold.
#[inline]
pub fn threshold_iter(&self, threshold: f64) -> MaskIter<'_> {
if self.density >= threshold {
MaskIter::Slices(self.slices())
} else {
MaskIter::Indices(self.indices())
}
}

/// Extracts the internal [`BitBuffer`].
pub(crate) fn into_buffer(self) -> BitBuffer {
self.buffer
}
}

/// Iterator over the indices or slices of a mask.
pub enum MaskIter<'a> {
/// Slice of pre-cached indices of a mask.
Expand Down
18 changes: 18 additions & 0 deletions vortex-mask/src/mask_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,24 @@ impl MaskMut {
pub fn is_empty(&self) -> bool {
self.len() == 0
}

/// Returns true if all values in the mask are true.
pub fn all_true(&self) -> bool {
match &self.0 {
Inner::Empty { .. } => true,
Inner::Constant { value, .. } => *value,
Inner::Builder(bits) => bits.true_count() == bits.len(),
}
}

/// Returns true if all values in the mask are false.
pub fn all_false(&self) -> bool {
match &self.0 {
Inner::Empty { .. } => true,
Inner::Constant { value, .. } => !*value,
Inner::Builder(bits) => !bits.is_empty() && bits.true_count() == 0,
}
}
}

impl Mask {
Expand Down
11 changes: 7 additions & 4 deletions vortex-vector/src/fixed_size_list/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,22 +149,24 @@ impl VectorOps for FixedSizeListVector {
&self.validity
}

fn try_into_mut(self) -> Result<Self::Mutable, Self>
fn try_into_mut(self) -> Result<FixedSizeListVectorMut, Self>
where
Self: Sized,
{
let len = self.len;
let list_size = self.list_size;

// Try to unwrap the `Arc`.
let elements = match Arc::try_unwrap(self.elements) {
Ok(elements) => elements,
Err(elements) => return Err(FixedSizeListVector { elements, ..self }),
Err(elements) => return Err(Self { elements, ..self }),
};

// Try to make validity mutable.
let validity = match self.validity.try_into_mut() {
Ok(validity) => validity,
Err(validity) => {
return Err(FixedSizeListVector {
return Err(Self {
elements: Arc::new(elements),
list_size,
validity,
Expand All @@ -173,14 +175,15 @@ impl VectorOps for FixedSizeListVector {
}
};

// Try to make the elements mutable.
match elements.try_into_mut() {
Ok(mutable_elements) => Ok(FixedSizeListVectorMut {
elements: Box::new(mutable_elements),
list_size,
validity,
len,
}),
Err(elements) => Err(FixedSizeListVector {
Err(elements) => Err(Self {
elements: Arc::new(elements),
list_size,
validity: validity.freeze(),
Expand Down
1 change: 1 addition & 0 deletions vortex-vector/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub mod binaryview;
pub mod bool;
pub mod decimal;
pub mod fixed_size_list;
pub mod listview;
pub mod null;
pub mod primitive;
pub mod struct_;
Expand Down
Loading
Loading