Rust Quickstart¶
Install¶
Install vortex and all the first-party array encodings:
cargo add vortex
Convert¶
You can either use your own Parquet file or download the example used here.
Use Arrow to read a Parquet file and then construct an uncompressed Vortex array:
use std::fs::File;
use arrow::array::RecordBatchReader;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use vortex::Array;
use vortex::arrays::ChunkedArray;
use vortex::dtype::DType;
use vortex::dtype::arrow::FromArrowType;
let reader = ParquetRecordBatchReaderBuilder::try_new(File::open(
"../docs/_static/example.parquet",
)?)?
.build()?;
let dtype = DType::from_arrow(reader.schema());
let chunks = reader
.map(|record_batch| record_batch?.try_into_array())
.try_collect()?;
let vortex_array = ChunkedArray::try_new(chunks, dtype)?.into_array();
Compress¶
Use the sampling compressor to compress the Vortex array and check the relative size:
use vortex::compressor::BtrBlocksCompressor;
use vortex::nbytes::NBytes;
let array = PrimitiveArray::new(buffer![42u64; 100_000], Validity::NonNullable);
let compressed = BtrBlocksCompressor.compress(&array)?;
println!("{} / {}", compressed.nbytes(), array.nbytes());
Write¶
Reading and writing both require an async runtime; in this example we use Tokio. The VortexFileWriter knows how to write Vortex arrays to disk:
let array = PrimitiveArray::new(buffer![0u64, 1, 2, 3, 4], Validity::NonNullable);
// Write a Vortex file with the default compression and layout strategy.
VortexWriteOptions::default()
.write(
tokio::fs::File::create("example.vortex").await?,
array.to_array_stream(),
)
.await?;
Read¶
let array = VortexOpenOptions::file()
.open("example.vortex")
.await?
.scan()?
.with_filter(gt(ident(), lit(2u64)))
.into_array_stream()?
.read_all()
.await?;
assert_eq!(array.len(), 2);