Rust Quickstart

Install

Install vortex and all the first-party array encodings:

cargo add vortex

Convert

You can either use your own Parquet file or download the example used here.

Use Arrow to read a Parquet file and then construct an uncompressed Vortex array:

use std::fs::File;

use arrow_array::RecordBatchReader;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex::Array;
use vortex::arrays::ChunkedArray;
use vortex::dtype::DType;
use vortex::dtype::arrow::FromArrowType;
use vortex_array::arrow::FromArrowArray;

let reader = ParquetRecordBatchReaderBuilder::try_new(File::open(
    "../docs/_static/example.parquet",
)?)?
.build()?;

let dtype = DType::from_arrow(reader.schema());
let chunks = reader
    .map_ok(|record_batch| ArrayRef::from_arrow(record_batch, false))
    .try_collect()?;
let vortex_array = ChunkedArray::try_new(chunks, dtype)?.into_array();

Compress

Use the sampling compressor to compress the Vortex array and check the relative size:

use vortex::compressor::{BtrBlocksCompressor, CompactCompressor};

let array = PrimitiveArray::new(buffer![42u64; 100_000], Validity::NonNullable);

// You can compress an array in-memory with the BtrBlocks compressor
let compressed = BtrBlocksCompressor.compress(array.as_ref())?;
println!(
    "BtrBlocks size: {} / {}",
    compressed.nbytes(),
    array.nbytes()
);

// Or apply generally stronger compression with the compact compressor
let compressed = CompactCompressor::default()
    .with_values_per_page(8192)
    .compress(array.as_ref())?;
println!("Compact size: {} / {}", compressed.nbytes(), array.nbytes());

Write

Reading and writing both require an async runtime; in this example we use Tokio. The VortexFileWriter knows how to write Vortex arrays to disk:

let array = PrimitiveArray::new(buffer![0u64, 1, 2, 3, 4], Validity::NonNullable);

// Write a Vortex file with the default compression and layout strategy.
VortexWriteOptions::default()
    .write(
        tokio::fs::File::create("example.vortex").await?,
        array.to_array_stream(),
    )
    .await?;

Read

let array = VortexOpenOptions::file()
    .open("example.vortex")
    .await?
    .scan()?
    .with_filter(gt(root(), lit(2u64)))
    .into_array_iter()?
    .read_all()?;

assert_eq!(array.len(), 2);