Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
## 2026-01-29 - Single Pass Variance Calculation in Manifold Heap
**Learning:** The `ChebyshevGuard::calculate` function in `ManifoldHeap` was performing two passes over the memory blocks to calculate mean and variance separately. This is a common pattern when following the mathematical definition directly. However, in a performance-critical "metabolism" loop (GC), this doubles the memory access overhead.
**Action:** Always check for opportunities to compute statistics (mean, variance) in a single pass using Welford's algorithm or accumulated sums, especially when iterating over large data structures.

<<<<<<< HEAD
## 2026-05-01 - Avoid High-Level Tensor Ops in Scalar Reductions
**Learning:** High-level `Tensor` operations like `sub()` and `mul()` trigger intermediate heap allocations for shape and stride metadata. When computing scalar reductions (like MSE, distances, or loss functions), using these operations introduces severe memory overhead inside hot loops. Attempting to use `.min()` length truncation as a safeguard is an anti-pattern as it masks shape mismatch errors.
**Action:** For scalar reductions, assert shape equality (`assert_eq!(a.shape, b.shape)`) and perform a single-pass iteration directly over the underlying borrowed data arrays (`a.data.borrow()`) to eliminate intermediate allocations and safely compute the result.
=======
## 2026-05-18 - Single-Pass Scalar Reductions in Linear Algebra
**Learning:** High-level tensor operations like `a.sub(b)` and `.mul()` generate costly intermediate `Tensor` heap allocations containing shape and strides metadata. When computing scalar reductions (like loss functions or distance metrics) over tensors, this overhead is unnecessary and significantly impacts performance in hot paths.
**Action:** Always avoid intermediate `Tensor` allocations for scalar reductions (e.g., `mse`, `mae`, `binary_cross_entropy`, `hinge_loss`, `euclidean_distance`, `manhattan_distance`, `chebyshev_distance`, `rbf_kernel`). Instead, assert shape equality (`assert_eq!(a.shape, b.shape)`) and perform a single-pass iteration directly over the underlying borrowed data arrays (`a.data.borrow()`).
>>>>>>> 5c3b036 (Perf: single pass scalar reductions)
31 changes: 18 additions & 13 deletions crates/aegis-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,17 @@ fn main() {
// Spawn a thread with 8MB stack to prevent overflow.
let builder = std::thread::Builder::new().stack_size(8 * 1024 * 1024);

let handler = builder.spawn(|| {
let cli = Cli::parse();

match cli.command {
Some(Commands::Repl) | None => run_repl(),
Some(Commands::Run { file, mode }) => run_file(&file, &mode),
Some(Commands::Check { file }) => check_file(&file),
}
}).unwrap();
let handler = builder
.spawn(|| {
let cli = Cli::parse();

match cli.command {
Some(Commands::Repl) | None => run_repl(),
Some(Commands::Run { file, mode }) => run_file(&file, &mode),
Some(Commands::Check { file }) => check_file(&file),
}
})
.unwrap();

handler.join().unwrap();
}
Expand Down Expand Up @@ -166,7 +168,10 @@ fn run_file(path: &PathBuf, mode: &str) {
if let Some(ext) = path.extension() {
let s = ext.to_string_lossy();
if s != "aegis" && s != "ag" {
println!("Warning: File extension '.{}' is not standard (.aegis or .ag)", s);
println!(
"Warning: File extension '.{}' is not standard (.aegis or .ag)",
s
);
}
}

Expand All @@ -181,14 +186,14 @@ fn run_file(path: &PathBuf, mode: &str) {
};

if mode == "titan" {
use aegis_lang::vm::{TitanVM, Compiler};
use aegis_lang::vm::{Compiler, TitanVM};
// Compile to Bytecode
let compiler = Compiler::new();
let code = compiler.compile(&ast);

let mut vm = TitanVM::new();
vm.load_code(code);

match vm.run() {
Ok(result) => {
println!("{:?}", result);
Expand Down
95 changes: 51 additions & 44 deletions crates/aegis-core/src/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#[cfg(feature = "std")]
use std::thread;

use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use core::cell::UnsafeCell;
use core::marker::PhantomData;
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};

/// The number of concurrent "Time Dimensions" (Shards) in the clock.
/// 32 Shards ensures minimal contention even on high-core-count Titan machines.
Expand Down Expand Up @@ -65,31 +65,36 @@ impl<T, const SIZE: usize> TitanClock<T, SIZE> {

pub fn new() -> Self {
// Assert SIZE is divisible by SHARDS for simplicity
assert!(SIZE % SHARDS == 0, "Manifold SIZE must be divisible by 32 (SHARDS)");
assert!(
SIZE % SHARDS == 0,
"Manifold SIZE must be divisible by 32 (SHARDS)"
);

Self {
manifold: core::array::from_fn(|_| Slot::new()),
hands: core::array::from_fn(|_| TimeHand { index: AtomicUsize::new(0) }),
hands: core::array::from_fn(|_| TimeHand {
index: AtomicUsize::new(0),
}),
_marker: PhantomData,
}
}

/// Reserve a slot using the clock algorithm.
/// Returns the index of a metabolically available slot.
fn reserve_slot(&self) -> usize {
// In a real implementation, we would hash the Thread ID to pick a starting shard.
// For portable no_std, we can use a relaxed global counter or just start at 0.
// To reduce contention, let's just create a pseudo-random start based on the stack pointer or similar?
// Or just iterate efficiently.
let start_shard = 0;
let start_shard = 0;

for i in 0..SHARDS {
let shard_id = (start_shard + i) % SHARDS;
if let Some(idx) = self.try_reserve_in_shard(shard_id) {
return idx;
}
let shard_id = (start_shard + i) % SHARDS;
if let Some(idx) = self.try_reserve_in_shard(shard_id) {
return idx;
}
}

// If all shards are saturated (entropy storm), we force the "Big Bang" (overwrite) in shard 0.
self.force_reserve_in_shard(0)
}
Expand All @@ -102,7 +107,7 @@ impl<T, const SIZE: usize> TitanClock<T, SIZE> {

// Limit search to 2 revolutions (Second Chance Algorithm requirement)
let limit = Self::STACK_SIZE * 2;

for _ in 0..limit {
// Atomic increment of the hand
let local_idx = hand.index.fetch_add(1, Ordering::Relaxed) % Self::STACK_SIZE;
Expand All @@ -112,19 +117,19 @@ impl<T, const SIZE: usize> TitanClock<T, SIZE> {
// Bio-Clock Logic:
// If Energy=1 (Hot) -> Set Energy=0 (Cold) and Continue.
// If Energy=0 (Cold) -> Claim it.

// We use compare_exchange to be pedantic, but specialized Load/Store is fine for heuristic.
// If we see Hot, make it Cold.
if slot.energy.load(Ordering::Acquire) {
slot.energy.store(false, Ordering::Release);
// We don't take it. We give it a second chance.
slot.energy.store(false, Ordering::Release);
// We don't take it. We give it a second chance.
} else {
// It's cold. We take it.
// Ideally we should CAS a "claiming" bit to ensure unique ownership in race.
// But for this "Bio" memory, Last-Writer-Wins on the same slot is acceptable noise
// provided we don't drop live data.
// Since it was cold, it deemed dead.
return Some(global_idx);
// It's cold. We take it.
// Ideally we should CAS a "claiming" bit to ensure unique ownership in race.
// But for this "Bio" memory, Last-Writer-Wins on the same slot is acceptable noise
// provided we don't drop live data.
// Since it was cold, it deemed dead.
return Some(global_idx);
}
}
None
Expand All @@ -136,33 +141,35 @@ impl<T, const SIZE: usize> TitanClock<T, SIZE> {
let local_idx = hand.index.fetch_add(1, Ordering::Relaxed) % Self::STACK_SIZE;
shard_id * Self::STACK_SIZE + local_idx
}

/// Public Allocator API
/// O(1) amortized. Lock-Free.
pub fn alloc(&self, item: T) -> usize {
let idx = self.reserve_slot();
let slot = &self.manifold[idx];

unsafe {
// Drop old data if present (metabolism)
// *slot.data.get() = None; // redundant if we overwrite immediately
*slot.data.get() = Some(item);
// Drop old data if present (metabolism)
// *slot.data.get() = None; // redundant if we overwrite immediately
*slot.data.get() = Some(item);
}

// Spark of Life
slot.energy.store(true, Ordering::Release);

idx
}

/// Access data. Energizes the slot (refreshes the bit).
pub fn access(&self, index: usize) -> Option<&T> {
if index >= SIZE { return None; }

if index >= SIZE {
return None;
}

let slot = &self.manifold[index];
// Bio-Feedback: Reading the memory strengthens its synapse
slot.energy.store(true, Ordering::Relaxed);

unsafe { (*slot.data.get()).as_ref() }
}
}
Expand All @@ -176,7 +183,7 @@ mod tests {
use super::*;
use std::sync::Arc;
use std::thread;

#[test]
fn test_titan_genesis() {
// 32 Shards * 2 = 64 slots
Expand All @@ -185,36 +192,36 @@ mod tests {
assert!(idx < 64);
assert_eq!(*clock.access(idx).unwrap(), 42);
}

#[test]
fn test_shard_saturation() {
// 32 shards * 1 slot each = 32 slots total.
let clock: TitanClock<i32, 32> = TitanClock::new();
let clock: TitanClock<i32, 32> = TitanClock::new();

// Fill everything
for i in 0..32 {
clock.alloc(i);
}

// Access everything to make it HOT
for i in 0..32 {
clock.access(i);
}

// Now Alloc 33.
// It must scan, turn something cold, and eventualy overwrite.
let idx_new = clock.alloc(100);

assert_eq!(*clock.access(idx_new).unwrap(), 100);
}

/*
#[test]
fn test_multithreaded_stress() {
let clock = Arc::new(TitanClock::<usize, 1024>::new()); // 32 slots per shard

let mut handles: Vec<std::thread::JoinHandle<()>> = Vec::new(); // Use Vec::new()

// Spawn 10 Titan Threads
for t in 0..10 {
let c = clock.clone();
Expand All @@ -226,11 +233,11 @@ mod tests {
}
}));
}

for h in handles {
h.join().unwrap();
}

// Verify manifold integrity
// Just checking we can read index 0 without panic
assert!(clock.access(0).is_some() || clock.access(0).is_none());
Expand Down
31 changes: 18 additions & 13 deletions crates/aether-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,17 @@ fn main() {
// Spawn a thread with 8MB stack to prevent overflow.
let builder = std::thread::Builder::new().stack_size(8 * 1024 * 1024);

let handler = builder.spawn(|| {
let cli = Cli::parse();

match cli.command {
Some(Commands::Repl) | None => run_repl(),
Some(Commands::Run { file, mode }) => run_file(&file, &mode),
Some(Commands::Check { file }) => check_file(&file),
}
}).unwrap();
let handler = builder
.spawn(|| {
let cli = Cli::parse();

match cli.command {
Some(Commands::Repl) | None => run_repl(),
Some(Commands::Run { file, mode }) => run_file(&file, &mode),
Some(Commands::Check { file }) => check_file(&file),
}
})
.unwrap();

handler.join().unwrap();
}
Expand Down Expand Up @@ -166,7 +168,10 @@ fn run_file(path: &PathBuf, mode: &str) {
if let Some(ext) = path.extension() {
let s = ext.to_string_lossy();
if s != "aether" && s != "ae" {
println!("Warning: File extension '.{}' is not standard (.aether or .ae)", s);
println!(
"Warning: File extension '.{}' is not standard (.aether or .ae)",
s
);
}
}

Expand All @@ -181,14 +186,14 @@ fn run_file(path: &PathBuf, mode: &str) {
};

if mode == "titan" {
use aether_lang::vm::{TitanVM, Compiler};
use aether_lang::vm::{Compiler, TitanVM};
// Compile to Bytecode
let compiler = Compiler::new();
let code = compiler.compile(&ast);

let mut vm = TitanVM::new();
vm.load_code(code);

match vm.run() {
Ok(result) => {
println!("{:?}", result);
Expand Down
12 changes: 8 additions & 4 deletions crates/aether-core/src/aether.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,17 +304,21 @@ impl<const D: usize> HierarchicalBlockTree<D> {
let mut active_l1 = [false; MAX_BLOCKS];
for (i, active) in active_l1.iter_mut().enumerate().take(self.counts[1]) {
let parent = i / 4;
if parent < self.counts[2] && active_l2[parent]
&& !self.levels[1][i].can_prune(query, threshold) {
if parent < self.counts[2]
&& active_l2[parent]
&& !self.levels[1][i].can_prune(query, threshold)
{
*active = true;
}
}

// Level 0 (finest) - final result
for (i, res) in result.iter_mut().enumerate().take(self.counts[0]) {
let parent = i / 4;
if parent < self.counts[1] && active_l1[parent]
&& !self.levels[0][i].can_prune(query, threshold) {
if parent < self.counts[1]
&& active_l1[parent]
&& !self.levels[0][i].can_prune(query, threshold)
{
*res = true;
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/aether-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ extern crate alloc;
pub mod aether;
pub mod governor;
pub mod manifold;
pub mod memory;
pub mod ml;
pub mod state;
pub mod os;
pub mod state;
pub mod topology;
pub mod memory;

// Re-export key types for convenience
pub use aether::{BlockMetadata, DriftDetector, HierarchicalBlockTree};
Expand Down
Loading