Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ members = [
"vortex-python",
"vortex-scan",
"vortex-scalar",
"vortex-session",
"vortex-tui",
"vortex-utils",
"vortex-vector",
Expand Down Expand Up @@ -242,6 +243,7 @@ vortex-runend = { version = "0.1.0", path = "./encodings/runend", default-featur
vortex-scalar = { version = "0.1.0", path = "./vortex-scalar", default-features = false }
vortex-scan = { version = "0.1.0", path = "./vortex-scan", default-features = false }
vortex-sequence = { version = "0.1.0", path = "encodings/sequence", default-features = false }
vortex-session = { version = "0.1.0", path = "./vortex-session", default-features = false }
vortex-sparse = { version = "0.1.0", path = "./encodings/sparse", default-features = false }
vortex-tui = { version = "0.1.0", path = "./vortex-tui", default-features = false }
vortex-utils = { version = "0.1.0", path = "./vortex-utils", default-features = false }
Expand Down
1 change: 1 addition & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ vortex-io = { workspace = true }
vortex-mask = { workspace = true }
vortex-metrics = { workspace = true }
vortex-scalar = { workspace = true }
vortex-session = { workspace = true }
vortex-utils = { workspace = true }
vortex-vector = { workspace = true }

Expand Down
36 changes: 36 additions & 0 deletions vortex-array/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,16 @@
//! Every data type recognized by Vortex also has a canonical physical encoding format, which
//! arrays can be [canonicalized](Canonical) into for ease of access in compute functions.

use crate::vtable::VTable;
pub use array::*;
pub use canonical::*;
pub use context::*;
pub use encoding::*;
pub use hash::*;
pub use mask_future::*;
pub use metadata::*;
use std::sync::Arc;
use vortex_session::VortexSession;

pub mod accessor;
#[doc(hidden)]
Expand Down Expand Up @@ -54,3 +57,36 @@ pub mod flatbuffers {
//! Re-exported autogenerated code from the core Vortex flatbuffer definitions.
pub use vortex_flatbuffers::array::*;
}

#[derive(Debug)]
pub struct ArraySession {
registry: ArrayRegistry,
}

impl Default for ArraySession {
fn default() -> Self {
Self {
registry: ArrayRegistry::canonical_only(),
}
}
}

pub trait ArraySessionExt {
/// Register an array encoding with the session.
fn register_encoding(&self, encoding: EncodingRef);

/// Returns the array registry.
fn array_registry(&self) -> ArrayRegistry;
}

impl ArraySessionExt for VortexSession {
fn register_encoding(&self, encoding: EncodingRef) {
self.get_mut::<ArraySession>().registry.register(encoding)
}

fn array_registry(&self) -> Arc<ArrayRegistry> {
// TODO(ngates): the registry type is weird... we shouldn't arc it here, but it's weirdly
// mutable.
Arc::new(self.get::<ArraySession>().registry.clone())
}
}
7 changes: 5 additions & 2 deletions vortex-array/src/vtable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ mod visitor;
use std::fmt::Debug;
use std::ops::Deref;

use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
pub use array::*;
pub use canonical::*;
pub use compute::*;
Expand All @@ -25,8 +26,7 @@ pub use operator::*;
pub use serde::*;
pub use validity::*;
pub use visitor::*;

use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
use vortex_session::VortexSession;

/// The encoding [`VTable`] encapsulates logic for an Encoding type and associated Array type.
/// The logic is split across several "VTable" traits to enable easier code organization than
Expand Down Expand Up @@ -72,6 +72,9 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {

/// Returns the encoding for the array.
fn encoding(array: &Self::Array) -> EncodingRef;

/// Initialize the encoding.
fn init(session: &VortexSession);
}

/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
Expand Down
1 change: 1 addition & 0 deletions vortex-file/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ vortex-runend = { workspace = true }
vortex-scalar = { workspace = true }
vortex-scan = { workspace = true }
vortex-sequence = { workspace = true }
vortex-session = { workspace = true }
vortex-sparse = { workspace = true }
vortex-utils = { workspace = true, features = ["dashmap"] }
vortex-zigzag = { workspace = true }
Expand Down
21 changes: 18 additions & 3 deletions vortex-file/src/open.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use std::sync::Arc;

use futures::executor::block_on;
use parking_lot::RwLock;
use vortex_array::ArrayRegistry;
use vortex_array::{ArrayRegistry, ArraySessionExt};
use vortex_buffer::{Alignment, ByteBuffer};
use vortex_dtype::DType;
use vortex_error::{VortexError, VortexExpect, VortexResult, vortex_bail};
use vortex_error::{vortex_bail, VortexError, VortexExpect, VortexResult};
use vortex_io::file::IntoReadSource;
use vortex_io::runtime::Handle;
use vortex_io::{InstrumentedReadAt, VortexReadAt};
Expand All @@ -18,11 +18,12 @@ use vortex_layout::segments::{
};
use vortex_layout::{LayoutRegistry, LayoutRegistryExt};
use vortex_metrics::VortexMetrics;
use vortex_session::VortexSession;
use vortex_utils::aliases::hash_map::HashMap;

use crate::footer::Footer;
use crate::segments::{FileSegmentSource, InitialReadSegmentCache};
use crate::{DEFAULT_REGISTRY, DeserializeStep, EOF_SIZE, MAX_POSTSCRIPT_SIZE, VortexFile};
use crate::{DeserializeStep, VortexFile, DEFAULT_REGISTRY, EOF_SIZE, MAX_POSTSCRIPT_SIZE};

const INITIAL_READ_SIZE: usize = 1 << 20; // 1 MB

Expand Down Expand Up @@ -56,11 +57,25 @@ impl Default for VortexOpenOptions {
}
}

pub trait OpenOptionsSessionExt {
fn open(&self) -> VortexOpenOptions;
}

impl OpenOptionsSessionExt for VortexSession {
fn open(&self) -> VortexOpenOptions {
// Construct the open options using values from session's array registry.
VortexOpenOptions::new().with_array_registry(self.array_registry())
}
}

impl VortexOpenOptions {
/// Create a new [`VortexOpenOptions`] with the expected options for the file source.
///
/// This should not be used directly, instead public API clients are expected to
/// access either `VortexOpenOptions::new()` or `VortexOpenOptions::memory()`
///
// FIXME(ngates): so instead of allowing users to construct these things incorrectly, we force
// them to go via the session.
pub fn new() -> Self {
Self {
handle: Handle::find(),
Expand Down
24 changes: 24 additions & 0 deletions vortex-session/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "vortex-session"
authors.workspace = true
description = "Session object for Vortex"
edition = { workspace = true }
homepage = { workspace = true }
categories = { workspace = true }
include = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
readme = { workspace = true }
repository = { workspace = true }
rust-version = { workspace = true }
version = { workspace = true }

[package.metadata.docs.rs]
all-features = true

[lints]
workspace = true

[dependencies]
dashmap = { workspace = true }
vortex-error = { workspace = true }
96 changes: 96 additions & 0 deletions vortex-session/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use dashmap::DashMap;
use std::any::{Any, TypeId};
use std::fmt::Debug;
use std::hash::{BuildHasherDefault, Hasher};
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use vortex_error::VortexExpect;

/// A Vortex session encapsulates the set of extensible arrays, layouts, compute functions, dtypes,
/// etc. that are available for use in a given context.
///
/// It is also the entry-point passed to dynamic libraries to initialize Vortex plugins.
#[derive(Clone, Debug)]
pub struct VortexSession(Arc<SessionVars>);

impl VortexSession {
/// Creates an empty Vortex session.
///
/// Do not call this function otherwise you will end up with an empty session!
pub fn _empty() -> Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason to make pub?

if we really don't want people calling it we could slap a #[doc(hidden)] on it

Copy link
Contributor Author

@gatesn gatesn Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't yet decided between:

  • User creates an empty session, in which case we can just impl Default and the functions on the session are get_or_create.
  • Downstream crates must explicitly initialize their state into the session. Possibly using inventory?? This seems weirder to me though. But it does allow for non-default state on the session, for example I don't think the async runtime should be defaulted on first access?

Self(Arc::new(
DashMap::with_hasher(BuildHasherDefault::default()),
))
}

/// Returns the scope variable of type `V`, or inserts a default one if it does not exist.
pub fn get<V: SessionVar + Default>(&self) -> impl Deref<Target = V> {
self.0
.entry(TypeId::of::<V>())
.or_insert_with(|| Box::new(V::default()))
.downgrade()
.map(|v| {
v.as_any()
.downcast_ref::<V>()
.vortex_expect("Type mismatch - this is a bug")
})
}

/// Returns the scope variable of type `V`, or inserts a default one if it does not exist.
///
/// Note that the returned value internally holds a lock on the variable.
pub fn get_mut<V: SessionVar + Default>(&self) -> impl DerefMut<Target = V> {
self.0
.entry(TypeId::of::<V>())
.or_insert_with(|| Box::new(V::default()))
.map(|v| {
v.as_any_mut()
.downcast_mut::<V>()
.vortex_expect("Type mismatch - this is a bug")
})
}
}

/// A TypeMap based on `https://docs.rs/http/1.2.0/src/http/extensions.rs.html#41-266`.
type SessionVars = DashMap<TypeId, Box<dyn SessionVar>, BuildHasherDefault<IdHasher>>;

/// With TypeIds as keys, there's no need to hash them. They are already hashes
/// themselves, coming from the compiler. The IdHasher just holds the u64 of
/// the TypeId, and then returns it, instead of doing any bit fiddling.
#[derive(Default)]
struct IdHasher(u64);

impl Hasher for IdHasher {
#[inline]
fn finish(&self) -> u64 {
self.0
}

fn write(&mut self, _: &[u8]) {
unreachable!("TypeId calls write_u64");
}

#[inline]
fn write_u64(&mut self, id: u64) {
self.0 = id;
}
}

/// This trait defines variables that can be stored against a Vortex session.
pub trait SessionVar: Any + Send + Debug {
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
}

impl<T: Send + Debug + 'static> SessionVar for T {
fn as_any(&self) -> &dyn Any {
self
}

fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
Loading