Rust Quickstart

Install

Install vortex and all the first-party array encodings:

cargo add vortex

Convert

You can either use your own Parquet file or download the example used here.

Use Arrow to read a Parquet file and then construct an uncompressed Vortex array:

use std::fs::File;

use arrow::array::RecordBatchReader;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex::Array;
use vortex::arrays::ChunkedArray;
use vortex::dtype::DType;
use vortex::dtype::arrow::FromArrowType;

let reader = ParquetRecordBatchReaderBuilder::try_new(File::open(
    "../docs/_static/example.parquet",
)?)?
.build()?;

let dtype = DType::from_arrow(reader.schema());
let chunks = reader
    .map(|record_batch| record_batch?.try_into_array())
    .try_collect()?;
let vortex_array = ChunkedArray::try_new(chunks, dtype)?.into_array();

Compress

Use the sampling compressor to compress the Vortex array and check the relative size:

use vortex::compressor::BtrBlocksCompressor;
use vortex::nbytes::NBytes;

let array = PrimitiveArray::new(buffer![42u64; 100_000], Validity::NonNullable);

let compressed = BtrBlocksCompressor.compress(&array)?;
println!("{} / {}", compressed.nbytes(), array.nbytes());

Write

Reading and writing both require an async runtime; in this example we use Tokio. The VortexFileWriter knows how to write Vortex arrays to disk:

let array = PrimitiveArray::new(buffer![0u64, 1, 2, 3, 4], Validity::NonNullable);

// Write a Vortex file with the default compression and layout strategy.
VortexWriteOptions::default()
    .write(
        tokio::fs::File::create("example.vortex").await?,
        array.to_array_stream(),
    )
    .await?;

Read

let array = VortexOpenOptions::file()
    .open("example.vortex")
    .await?
    .scan()?
    .with_filter(gt(ident(), lit(2u64)))
    .into_array_stream()?
    .read_all()
    .await?;

assert_eq!(array.len(), 2);