From 354c2d7d6d2724a481b8af9a23343b5cce105ad1 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 14 May 2024 18:39:49 +0100 Subject: [PATCH 01/19] change Vec to PRegSet --- src/ion/liveranges.rs | 5 +++-- src/ion/process.rs | 6 ++++-- src/ion/reg_traversal.rs | 10 +++++----- src/lib.rs | 17 +++++++++-------- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index b3ee32cb..b71f72bb 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -114,9 +114,10 @@ impl<'a, F: Function> Env<'a, F> { } for class in 0..self.preferred_victim_by_class.len() { self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] + .into_iter() .last() - .or(self.env.preferred_regs_by_class[class].last()) - .cloned() + .or(self.env.preferred_regs_by_class[class].into_iter().last()) + // .cloned() .unwrap_or(PReg::invalid()); } // Create VRegs from the vreg count. diff --git a/src/ion/process.rs b/src/ion/process.rs index 371ab84b..5d45af14 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -1222,8 +1222,10 @@ impl<'a, F: Function> Env<'a, F> { let mut fixed_assigned = 0; let mut total_regs = 0; for preg in self.env.preferred_regs_by_class[class as u8 as usize] - .iter() - .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) + .into_iter() + .chain( + self.env.non_preferred_regs_by_class[class as u8 as usize].into_iter(), + ) { trace!(" -> PR {:?}", preg); let start = LiveRangeKey::from_range(&CodeRange { diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 729fd33e..3ede5074 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,5 +1,5 @@ use crate::{MachineEnv, PReg, RegClass}; - +use alloc::vec::Vec; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -53,13 +53,13 @@ impl<'a> RegTraversalIter<'a> { } let hints = [hint_reg, hint2_reg]; let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { - offset % env.preferred_regs_by_class[class].len() + let offset_pref = if env.preferred_regs_by_class[class].n_regs() > 0 { + offset % env.preferred_regs_by_class[class].n_regs() } else { 0 }; - let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { - offset % env.non_preferred_regs_by_class[class].len() + let offset_non_pref = if env.non_preferred_regs_by_class[class].n_regs() > 0 { + offset % env.non_preferred_regs_by_class[class].n_regs() } else { 0 }; diff --git a/src/lib.rs b/src/lib.rs index 3f73719e..18c9d9c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -240,6 +240,11 @@ impl PRegSet { self.bits[0] |= other.bits[0]; self.bits[1] |= other.bits[1]; } + + /// Get the number of registers in the set + pub fn n_regs(&self) -> usize { + self.bits[0].count_ones() as usize + self.bits[1].count_ones() as usize + } } impl IntoIterator for PRegSet { @@ -276,15 +281,11 @@ impl From<&MachineEnv> for PRegSet { let mut res = Self::default(); for class in env.preferred_regs_by_class.iter() { - for preg in class { - res.add(*preg) - } + res.union_from(*class); } for class in env.non_preferred_regs_by_class.iter() { - for preg in class { - res.add(*preg) - } + res.union_from(*class); } res @@ -1380,7 +1381,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub preferred_regs_by_class: [Vec; 3], + pub preferred_regs_by_class: [PRegSet; 3], /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is @@ -1389,7 +1390,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub non_preferred_regs_by_class: [Vec; 3], + pub non_preferred_regs_by_class: [PRegSet; 3], /// Optional dedicated scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The From 2585169753fede27e609706b0db8cdbfd1137e55 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 14 May 2024 18:50:52 +0100 Subject: [PATCH 02/19] fix reg_traversal --- src/ion/reg_traversal.rs | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 3ede5074..4092dfcf 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,5 +1,4 @@ use crate::{MachineEnv, PReg, RegClass}; -use alloc::vec::Vec; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -100,23 +99,30 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { self.hint_idx += 1; return h; } - while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { - let arr = &self.env.preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; + + let n_pref_regs = self.env.preferred_regs_by_class[self.class].n_regs(); + while self.pref_idx < n_pref_regs { + let mut arr = self.env.preferred_regs_by_class[self.class].into_iter(); + let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); self.pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + if r == self.hints[0] || r == self.hints[1] { continue; } - return Some(r); + return r; } - while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { - let arr = &self.env.non_preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; + + let n_non_pref_regs = self.env.non_preferred_regs_by_class[self.class].n_regs(); + while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].n_regs() { + let mut arr = self.env.non_preferred_regs_by_class[self.class].into_iter(); + let r = arr.nth(wrap( + self.non_pref_idx + self.offset_non_pref, + n_non_pref_regs, + )); self.non_pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + if r == self.hints[0] || r == self.hints[1] { continue; } - return Some(r); + return r; } None } From fc37a21894ae84918899f333724975986a87113b Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 14 May 2024 19:17:09 +0100 Subject: [PATCH 03/19] fix fuzz --- src/fuzzing/func.rs | 16 ++++++++-------- src/lib.rs | 12 ++++++++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 8bdb326e..3d74c42c 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -645,15 +645,15 @@ pub fn machine_env() -> MachineEnv { fn regs(r: core::ops::Range, c: RegClass) -> Vec { r.map(|i| PReg::new(i, c)).collect() } - let preferred_regs_by_class: [Vec; 3] = [ - regs(0..24, RegClass::Int), - regs(0..24, RegClass::Float), - regs(0..24, RegClass::Vector), + let preferred_regs_by_class: [PRegSet; 3] = [ + regs(0..24, RegClass::Int).into(), + regs(0..24, RegClass::Float).into(), + regs(0..24, RegClass::Vector).into(), ]; - let non_preferred_regs_by_class: [Vec; 3] = [ - regs(24..32, RegClass::Int), - regs(24..32, RegClass::Float), - regs(24..32, RegClass::Vector), + let non_preferred_regs_by_class: [PRegSet; 3] = [ + regs(24..32, RegClass::Int).into(), + regs(24..32, RegClass::Float).into(), + regs(24..32, RegClass::Vector).into(), ]; let scratch_by_class: [Option; 3] = [None, None, None]; let fixed_stack_slots = (32..63) diff --git a/src/lib.rs b/src/lib.rs index 18c9d9c4..e1a3179a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -292,6 +292,18 @@ impl From<&MachineEnv> for PRegSet { } } +impl From> for PRegSet { + fn from(regs: Vec) -> Self { + let mut res = Self::default(); + + for preg in regs { + res.add(preg); + } + + res + } +} + /// A virtual register. Contains a virtual register number and a /// class. /// From d71c2bf5c7f9fac82993828295834e8645eeb27f Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 14 May 2024 21:04:56 +0100 Subject: [PATCH 04/19] impl iterator directly on PRegSet --- src/ion/reg_traversal.rs | 14 +++++++------- src/lib.rs | 28 +++++++++++----------------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 4092dfcf..075ff912 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -52,13 +52,13 @@ impl<'a> RegTraversalIter<'a> { } let hints = [hint_reg, hint2_reg]; let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class[class].n_regs() > 0 { - offset % env.preferred_regs_by_class[class].n_regs() + let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { + offset % env.preferred_regs_by_class[class].len() } else { 0 }; - let offset_non_pref = if env.non_preferred_regs_by_class[class].n_regs() > 0 { - offset % env.non_preferred_regs_by_class[class].n_regs() + let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { + offset % env.non_preferred_regs_by_class[class].len() } else { 0 }; @@ -100,7 +100,7 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return h; } - let n_pref_regs = self.env.preferred_regs_by_class[self.class].n_regs(); + let n_pref_regs = self.env.preferred_regs_by_class[self.class].len(); while self.pref_idx < n_pref_regs { let mut arr = self.env.preferred_regs_by_class[self.class].into_iter(); let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); @@ -111,8 +111,8 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return r; } - let n_non_pref_regs = self.env.non_preferred_regs_by_class[self.class].n_regs(); - while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].n_regs() { + let n_non_pref_regs = self.env.non_preferred_regs_by_class[self.class].len(); + while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { let mut arr = self.env.non_preferred_regs_by_class[self.class].into_iter(); let r = arr.nth(wrap( self.non_pref_idx + self.offset_non_pref, diff --git a/src/lib.rs b/src/lib.rs index e1a3179a..57386c98 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -241,25 +241,13 @@ impl PRegSet { self.bits[1] |= other.bits[1]; } - /// Get the number of registers in the set - pub fn n_regs(&self) -> usize { - self.bits[0].count_ones() as usize + self.bits[1].count_ones() as usize - } + // Get the number of registers in the set + // pub fn n_regs(&self) -> usize { + // (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize + // } } -impl IntoIterator for PRegSet { - type Item = PReg; - type IntoIter = PRegSetIter; - fn into_iter(self) -> PRegSetIter { - PRegSetIter { bits: self.bits } - } -} - -pub struct PRegSetIter { - bits: [u128; 2], -} - -impl Iterator for PRegSetIter { +impl Iterator for PRegSet { type Item = PReg; fn next(&mut self) -> Option { if self.bits[0] != 0 { @@ -276,6 +264,12 @@ impl Iterator for PRegSetIter { } } +impl ExactSizeIterator for PRegSet { + fn len(&self) -> usize { + (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize + } +} + impl From<&MachineEnv> for PRegSet { fn from(env: &MachineEnv) -> Self { let mut res = Self::default(); From a131511746cc71fb3a57a0ed79e45c1784e3b7be Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Tue, 14 May 2024 21:28:10 +0100 Subject: [PATCH 05/19] Use size_hint, method for last and call last directly on iterator without into_iter --- src/ion/liveranges.rs | 3 +-- src/lib.rs | 33 +++++++++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index b71f72bb..8275a996 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -114,9 +114,8 @@ impl<'a, F: Function> Env<'a, F> { } for class in 0..self.preferred_victim_by_class.len() { self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] - .into_iter() .last() - .or(self.env.preferred_regs_by_class[class].into_iter().last()) + .or(self.env.preferred_regs_by_class[class].last()) // .cloned() .unwrap_or(PReg::invalid()); } diff --git a/src/lib.rs b/src/lib.rs index 57386c98..1250714a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -262,14 +262,27 @@ impl Iterator for PRegSet { None } } -} -impl ExactSizeIterator for PRegSet { - fn len(&self) -> usize { - (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize + fn size_hint(&self) -> (usize, Option) { + let len = (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize; + (len, Some(len)) + } + + fn last(self) -> Option { + if self.bits[1] != 0 { + let index = 127 - self.bits[1].leading_zeros(); + Some(PReg::from_index(index as usize + 128)) + } else if self.bits[0] != 0 { + let index = self.bits[0].leading_zeros(); + Some(PReg::from_index(index as usize)) + } else { + None + } } } +impl ExactSizeIterator for PRegSet {} + impl From<&MachineEnv> for PRegSet { fn from(env: &MachineEnv) -> Self { let mut res = Self::default(); @@ -1553,3 +1566,15 @@ pub struct RegallocOptions { /// Run the SSA validator before allocating registers. pub validate_ssa: bool, } + +#[cfg(test)] +mod test { + use super::PRegSet; + + #[test] + fn test_set_bits_iter() { + let registers = PRegSet { bits: [112, 131] }.into_iter(); + let last = registers.last().unwrap().bits; + assert_eq!(last, 135); + } +} From 98588af9e6eae862179e9df8e48df88604bdd738 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 17 May 2024 16:01:21 +0100 Subject: [PATCH 06/19] start redev for PRegSet for full use --- src/lib.rs | 68 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1250714a..2813f5f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,6 +160,12 @@ impl PReg { } } +impl From for PReg { + fn from(raw_index: u8) -> Self { + PReg { bits: raw_index } + } +} + impl core::fmt::Debug for PReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( @@ -191,47 +197,47 @@ impl core::fmt::Display for PReg { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct PRegSet { - bits: [u128; 2], + bits: [u64; 3], } impl PRegSet { /// Create an empty set. pub const fn empty() -> Self { - Self { bits: [0; 2] } + Self { bits: [0; 3] } } /// Returns whether the given register is part of the set. pub fn contains(&self, reg: PReg) -> bool { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] & (1u128 << bit) != 0 + let bit = reg.index() & 63; + let index = reg.index() >> 6; + self.bits[index] & (1u64 << bit) != 0 } /// Add a physical register (PReg) to the set, returning the new value. pub const fn with(self, reg: PReg) -> Self { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; + let bit = reg.index() & 63; + let index = reg.index() >> 6; let mut out = self; - out.bits[index] |= 1u128 << bit; + out.bits[index] |= 1u64 << bit; out } /// Add a physical register (PReg) to the set. pub fn add(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] |= 1u128 << bit; + let bit = reg.index() & 63; + let index = reg.index() >> 3; + self.bits[index] |= 1u64 << bit; } /// Remove a physical register (PReg) from the set. pub fn remove(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] &= !(1u128 << bit); + let bit = reg.index() & 63; + let index = reg.index() >> 3; + self.bits[index] &= !(1u64 << bit); } /// Add all of the registers in one set to this one, mutating in @@ -239,12 +245,14 @@ impl PRegSet { pub fn union_from(&mut self, other: PRegSet) { self.bits[0] |= other.bits[0]; self.bits[1] |= other.bits[1]; + self.bits[2] |= other.bits[2]; } - // Get the number of registers in the set - // pub fn n_regs(&self) -> usize { - // (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize - // } + pub fn last_in_class(&self, class: usize) -> PReg { + let i_in_class = 63 - self.bits[class].leading_zeros(); + let i = i_in_class as usize | (class << 6); + PReg::from_index(i) + } } impl Iterator for PRegSet { @@ -252,11 +260,15 @@ impl Iterator for PRegSet { fn next(&mut self) -> Option { if self.bits[0] != 0 { let index = self.bits[0].trailing_zeros(); - self.bits[0] &= !(1u128 << index); + self.bits[0] &= !(1u64 << index); Some(PReg::from_index(index as usize)) } else if self.bits[1] != 0 { let index = self.bits[1].trailing_zeros(); - self.bits[1] &= !(1u128 << index); + self.bits[1] &= !(1u64 << index); + Some(PReg::from_index(index as usize + 64)) + } else if self.bits[2] != 0 { + let index = self.bits[2].trailing_zeros(); + self.bits[2] &= !(1u64 << index); Some(PReg::from_index(index as usize + 128)) } else { None @@ -264,14 +276,19 @@ impl Iterator for PRegSet { } fn size_hint(&self) -> (usize, Option) { - let len = (self.bits[0].count_ones() + self.bits[1].count_ones()) as usize; + let len = (self.bits[0].count_ones() + + self.bits[1].count_ones() + + self.bits[2].count_ones()) as usize; (len, Some(len)) } fn last(self) -> Option { - if self.bits[1] != 0 { - let index = 127 - self.bits[1].leading_zeros(); + if self.bits[2] != 0 { + let index = 63 - self.bits[2].leading_zeros(); Some(PReg::from_index(index as usize + 128)) + } else if self.bits[1] != 0 { + let index = self.bits[1].leading_zeros(); + Some(PReg::from_index(index as usize + 64)) } else if self.bits[0] != 0 { let index = self.bits[0].leading_zeros(); Some(PReg::from_index(index as usize)) @@ -1573,7 +1590,10 @@ mod test { #[test] fn test_set_bits_iter() { - let registers = PRegSet { bits: [112, 131] }.into_iter(); + let registers = PRegSet { + bits: [112, 0, 131], + } + .into_iter(); let last = registers.last().unwrap().bits; assert_eq!(last, 135); } From de88b98abb33dab697ccf197ef37d2c8db5cb83e Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 17 May 2024 16:35:45 +0100 Subject: [PATCH 07/19] only use one PRegSet --- src/fuzzing/func.rs | 28 +++++++++----- src/ion/liveranges.rs | 8 ++-- src/ion/process.rs | 10 ++++- src/ion/reg_traversal.rs | 26 ++++++++----- src/lib.rs | 82 +++++++++++++++++++++++++++++++++------- 5 files changed, 116 insertions(+), 38 deletions(-) diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 3d74c42c..6ea94538 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -645,16 +645,24 @@ pub fn machine_env() -> MachineEnv { fn regs(r: core::ops::Range, c: RegClass) -> Vec { r.map(|i| PReg::new(i, c)).collect() } - let preferred_regs_by_class: [PRegSet; 3] = [ - regs(0..24, RegClass::Int).into(), - regs(0..24, RegClass::Float).into(), - regs(0..24, RegClass::Vector).into(), - ]; - let non_preferred_regs_by_class: [PRegSet; 3] = [ - regs(24..32, RegClass::Int).into(), - regs(24..32, RegClass::Float).into(), - regs(24..32, RegClass::Vector).into(), - ]; + let int_regs: PRegSet = regs(0..24, RegClass::Int).into(); + let float_regs: PRegSet = regs(0..24, RegClass::Float).into(); + let vector_regs: PRegSet = regs(0..24, RegClass::Vector).into(); + + let mut preferred_regs_by_class = PRegSet::default(); + preferred_regs_by_class.union_from(int_regs); + preferred_regs_by_class.union_from(float_regs); + preferred_regs_by_class.union_from(vector_regs); + + let int_regs: PRegSet = regs(24..32, RegClass::Int).into(); + let float_regs: PRegSet = regs(24..32, RegClass::Float).into(); + let vector_regs: PRegSet = regs(24..32, RegClass::Vector).into(); + + let mut non_preferred_regs_by_class = PRegSet::default(); + non_preferred_regs_by_class.union_from(int_regs); + non_preferred_regs_by_class.union_from(float_regs); + non_preferred_regs_by_class.union_from(vector_regs); + let scratch_by_class: [Option; 3] = [None, None, None]; let fixed_stack_slots = (32..63) .flat_map(|i| { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 8275a996..f585b527 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -113,9 +113,11 @@ impl<'a, F: Function> Env<'a, F> { self.pregs[preg.index()].is_stack = true; } for class in 0..self.preferred_victim_by_class.len() { - self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] - .last() - .or(self.env.preferred_regs_by_class[class].last()) + self.preferred_victim_by_class[class] = self + .env + .non_preferred_regs_by_class + .last_in_class(class) + .or(self.env.preferred_regs_by_class.last_in_class(class)) // .cloned() .unwrap_or(PReg::invalid()); } diff --git a/src/ion/process.rs b/src/ion/process.rs index 5d45af14..8c6e9e80 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -1221,10 +1221,16 @@ impl<'a, F: Function> Env<'a, F> { let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; let mut total_regs = 0; - for preg in self.env.preferred_regs_by_class[class as u8 as usize] + for preg in self + .env + .preferred_regs_by_class + .to_preg_class(class as u8 as usize) .into_iter() .chain( - self.env.non_preferred_regs_by_class[class as u8 as usize].into_iter(), + self.env + .non_preferred_regs_by_class + .to_preg_class(class as u8 as usize) + .into_iter(), ) { trace!(" -> PR {:?}", preg); diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 075ff912..f6f913b0 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -52,13 +52,13 @@ impl<'a> RegTraversalIter<'a> { } let hints = [hint_reg, hint2_reg]; let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { - offset % env.preferred_regs_by_class[class].len() + let offset_pref = if env.preferred_regs_by_class.len_class(class) > 0 { + offset % env.preferred_regs_by_class.len_class(class) } else { 0 }; - let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { - offset % env.non_preferred_regs_by_class[class].len() + let offset_non_pref = if env.non_preferred_regs_by_class.len_class(class) > 0 { + offset % env.non_preferred_regs_by_class.len_class(class) } else { 0 }; @@ -100,9 +100,13 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return h; } - let n_pref_regs = self.env.preferred_regs_by_class[self.class].len(); + let n_pref_regs = self.env.preferred_regs_by_class.len_class(self.class); while self.pref_idx < n_pref_regs { - let mut arr = self.env.preferred_regs_by_class[self.class].into_iter(); + let mut arr = self + .env + .preferred_regs_by_class + .to_preg_class(self.class) + .into_iter(); let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); self.pref_idx += 1; if r == self.hints[0] || r == self.hints[1] { @@ -111,9 +115,13 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return r; } - let n_non_pref_regs = self.env.non_preferred_regs_by_class[self.class].len(); - while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { - let mut arr = self.env.non_preferred_regs_by_class[self.class].into_iter(); + let n_non_pref_regs = self.env.non_preferred_regs_by_class.len_class(self.class); + while self.non_pref_idx < n_non_pref_regs { + let mut arr = self + .env + .non_preferred_regs_by_class + .to_preg_class(self.class) + .into_iter(); let r = arr.nth(wrap( self.non_pref_idx + self.offset_non_pref, n_non_pref_regs, diff --git a/src/lib.rs b/src/lib.rs index 2813f5f5..82ab5dae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -248,10 +248,29 @@ impl PRegSet { self.bits[2] |= other.bits[2]; } - pub fn last_in_class(&self, class: usize) -> PReg { - let i_in_class = 63 - self.bits[class].leading_zeros(); - let i = i_in_class as usize | (class << 6); - PReg::from_index(i) + pub fn last_in_class(&self, class: usize) -> Option { + if self.bits[class] == 0 { + None + } else { + // get the index in the class vec + let i_in_class = 63 - self.bits[class].leading_zeros(); + // apply the class mask + let i = i_in_class as u8 | ((class as u8) << 6); + // return the PReg + Some(PReg::from(i)) + } + } + + pub fn len_class(&self, class: usize) -> usize { + self.bits[class].count_ones() as usize + } + + /// Get a iterator over only one class in the set + fn to_preg_class(&self, class: usize) -> PRegClass { + PRegClass { + class_mask: (class as u8) << 6, + regs: self.bits[class], + } } } @@ -287,10 +306,10 @@ impl Iterator for PRegSet { let index = 63 - self.bits[2].leading_zeros(); Some(PReg::from_index(index as usize + 128)) } else if self.bits[1] != 0 { - let index = self.bits[1].leading_zeros(); + let index = 63 - self.bits[1].leading_zeros(); Some(PReg::from_index(index as usize + 64)) } else if self.bits[0] != 0 { - let index = self.bits[0].leading_zeros(); + let index = 63 - self.bits[0].leading_zeros(); Some(PReg::from_index(index as usize)) } else { None @@ -304,13 +323,9 @@ impl From<&MachineEnv> for PRegSet { fn from(env: &MachineEnv) -> Self { let mut res = Self::default(); - for class in env.preferred_regs_by_class.iter() { - res.union_from(*class); - } + res.union_from(env.preferred_regs_by_class); - for class in env.non_preferred_regs_by_class.iter() { - res.union_from(*class); - } + res.union_from(env.non_preferred_regs_by_class); res } @@ -328,6 +343,45 @@ impl From> for PRegSet { } } +/// A compact iterator over a single register class +struct PRegClass { + // bit mask containing class data in UPPER two bits + class_mask: u8, + // bit packed vec of registers + regs: u64, +} + +impl Iterator for PRegClass { + type Item = PReg; + fn next(&mut self) -> Option { + if self.regs == 0 { + None + } else { + let index = self.regs.trailing_zeros() as u8; + self.regs &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + Some(PReg::from_index(reg_index as usize)) + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = (self.regs.count_ones()) as usize; + (len, Some(len)) + } + + fn last(self) -> Option { + if self.regs == 0 { + None + } else { + let index = 63 - self.regs.leading_zeros(); + let reg_index = index as u8 | self.class_mask; + Some(PReg::from_index(reg_index as usize)) + } + } +} + +impl ExactSizeIterator for PRegClass {} + /// A virtual register. Contains a virtual register number and a /// class. /// @@ -1417,7 +1471,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub preferred_regs_by_class: [PRegSet; 3], + pub preferred_regs_by_class: PRegSet, /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is @@ -1426,7 +1480,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub non_preferred_regs_by_class: [PRegSet; 3], + pub non_preferred_regs_by_class: PRegSet, /// Optional dedicated scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The From 14fb9bc6a63ee4552f11404d007272502f14057b Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Fri, 17 May 2024 16:50:34 +0100 Subject: [PATCH 08/19] correect bitshifts --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 82ab5dae..8178364e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -228,7 +228,7 @@ impl PRegSet { pub fn add(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); let bit = reg.index() & 63; - let index = reg.index() >> 3; + let index = reg.index() >> 6; self.bits[index] |= 1u64 << bit; } @@ -236,7 +236,7 @@ impl PRegSet { pub fn remove(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); let bit = reg.index() & 63; - let index = reg.index() >> 3; + let index = reg.index() >> 6; self.bits[index] &= !(1u64 << bit); } From 179a7b6d92a937628ddedcfee5f14ef742bab220 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 15:37:14 +0100 Subject: [PATCH 09/19] remove MachinEnv from RegTraversalIter --- src/ion/reg_traversal.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index f6f913b0..de987b61 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,4 +1,4 @@ -use crate::{MachineEnv, PReg, RegClass}; +use crate::{MachineEnv, PReg, PRegSet, RegClass}; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -13,8 +13,9 @@ use crate::{MachineEnv, PReg, RegClass}; /// usage, these consist of caller-save and callee-save registers /// respectively, to minimize clobber-saves; but they need not.) -pub struct RegTraversalIter<'a> { - env: &'a MachineEnv, +pub struct RegTraversalIter { + pref_regs_by_class: PRegSet, + non_pref_regs_by_class: PRegSet, class: usize, hints: [Option; 2], hint_idx: usize, @@ -26,9 +27,9 @@ pub struct RegTraversalIter<'a> { fixed: Option, } -impl<'a> RegTraversalIter<'a> { +impl RegTraversalIter { pub fn new( - env: &'a MachineEnv, + env: &MachineEnv, class: RegClass, hint_reg: PReg, hint2_reg: PReg, @@ -63,7 +64,8 @@ impl<'a> RegTraversalIter<'a> { 0 }; Self { - env, + pref_regs_by_class: env.preferred_regs_by_class, + non_pref_regs_by_class: env.non_preferred_regs_by_class, class, hints, hint_idx: 0, @@ -77,7 +79,7 @@ impl<'a> RegTraversalIter<'a> { } } -impl<'a> core::iter::Iterator for RegTraversalIter<'a> { +impl core::iter::Iterator for RegTraversalIter { type Item = PReg; fn next(&mut self) -> Option { @@ -100,11 +102,10 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return h; } - let n_pref_regs = self.env.preferred_regs_by_class.len_class(self.class); + let n_pref_regs = self.pref_regs_by_class.len_class(self.class); while self.pref_idx < n_pref_regs { let mut arr = self - .env - .preferred_regs_by_class + .pref_regs_by_class .to_preg_class(self.class) .into_iter(); let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); @@ -115,11 +116,10 @@ impl<'a> core::iter::Iterator for RegTraversalIter<'a> { return r; } - let n_non_pref_regs = self.env.non_preferred_regs_by_class.len_class(self.class); + let n_non_pref_regs = self.non_pref_regs_by_class.len_class(self.class); while self.non_pref_idx < n_non_pref_regs { let mut arr = self - .env - .non_preferred_regs_by_class + .non_pref_regs_by_class .to_preg_class(self.class) .into_iter(); let r = arr.nth(wrap( From 2d875a9baa4065ec8e9921b1a5688e11e0c1d360 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 15:42:49 +0100 Subject: [PATCH 10/19] clean up docs and iterator code --- src/lib.rs | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8178364e..61b9ab10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -248,6 +248,7 @@ impl PRegSet { self.bits[2] |= other.bits[2]; } + /// Get the last register in a specific register class pub fn last_in_class(&self, class: usize) -> Option { if self.bits[class] == 0 { None @@ -261,6 +262,7 @@ impl PRegSet { } } + /// Get the number of registers in a specific register class pub fn len_class(&self, class: usize) -> usize { self.bits[class].count_ones() as usize } @@ -278,17 +280,17 @@ impl Iterator for PRegSet { type Item = PReg; fn next(&mut self) -> Option { if self.bits[0] != 0 { - let index = self.bits[0].trailing_zeros(); + let index = self.bits[0].trailing_zeros() as u8; self.bits[0] &= !(1u64 << index); - Some(PReg::from_index(index as usize)) + Some(PReg::from(index)) } else if self.bits[1] != 0 { - let index = self.bits[1].trailing_zeros(); + let index = self.bits[1].trailing_zeros() as u8; self.bits[1] &= !(1u64 << index); - Some(PReg::from_index(index as usize + 64)) + Some(PReg::from(index + 64)) } else if self.bits[2] != 0 { - let index = self.bits[2].trailing_zeros(); + let index = self.bits[2].trailing_zeros() as u8; self.bits[2] &= !(1u64 << index); - Some(PReg::from_index(index as usize + 128)) + Some(PReg::from(index + 128)) } else { None } @@ -303,14 +305,14 @@ impl Iterator for PRegSet { fn last(self) -> Option { if self.bits[2] != 0 { - let index = 63 - self.bits[2].leading_zeros(); - Some(PReg::from_index(index as usize + 128)) + let index = 63 - self.bits[2].leading_zeros() as u8; + Some(PReg::from(index + 128)) } else if self.bits[1] != 0 { - let index = 63 - self.bits[1].leading_zeros(); - Some(PReg::from_index(index as usize + 64)) + let index = 63 - self.bits[1].leading_zeros() as u8; + Some(PReg::from(index + 64)) } else if self.bits[0] != 0 { - let index = 63 - self.bits[0].leading_zeros(); - Some(PReg::from_index(index as usize)) + let index = 63 - self.bits[0].leading_zeros() as u8; + Some(PReg::from(index)) } else { None } @@ -360,7 +362,7 @@ impl Iterator for PRegClass { let index = self.regs.trailing_zeros() as u8; self.regs &= !(1u64 << index); let reg_index = index as u8 | self.class_mask; - Some(PReg::from_index(reg_index as usize)) + Some(PReg::from(reg_index)) } } @@ -375,7 +377,7 @@ impl Iterator for PRegClass { } else { let index = 63 - self.regs.leading_zeros(); let reg_index = index as u8 | self.class_mask; - Some(PReg::from_index(reg_index as usize)) + Some(PReg::from(reg_index)) } } } From 45c2104b808d6f015d85b9c3ff54df68e4f590b0 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 15:45:15 +0100 Subject: [PATCH 11/19] clear up docs and iter methods --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 61b9ab10..b0beecc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -346,6 +346,7 @@ impl From> for PRegSet { } /// A compact iterator over a single register class +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] struct PRegClass { // bit mask containing class data in UPPER two bits class_mask: u8, From a71dd12654cdf9585559f8c349d95f19b239f532 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 15:45:25 +0100 Subject: [PATCH 12/19] use PRegClass iterator directly --- src/ion/reg_traversal.rs | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index de987b61..80ed7363 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,4 +1,4 @@ -use crate::{MachineEnv, PReg, PRegSet, RegClass}; +use crate::{MachineEnv, PReg, PRegClass, RegClass}; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -14,8 +14,8 @@ use crate::{MachineEnv, PReg, PRegSet, RegClass}; /// respectively, to minimize clobber-saves; but they need not.) pub struct RegTraversalIter { - pref_regs_by_class: PRegSet, - non_pref_regs_by_class: PRegSet, + pref_regs_by_class: PRegClass, + non_pref_regs_by_class: PRegClass, class: usize, hints: [Option; 2], hint_idx: usize, @@ -64,8 +64,8 @@ impl RegTraversalIter { 0 }; Self { - pref_regs_by_class: env.preferred_regs_by_class, - non_pref_regs_by_class: env.non_preferred_regs_by_class, + pref_regs_by_class: env.preferred_regs_by_class.to_preg_class(class), + non_pref_regs_by_class: env.non_preferred_regs_by_class.to_preg_class(class), class, hints, hint_idx: 0, @@ -102,12 +102,9 @@ impl core::iter::Iterator for RegTraversalIter { return h; } - let n_pref_regs = self.pref_regs_by_class.len_class(self.class); + let n_pref_regs = self.pref_regs_by_class.len(); while self.pref_idx < n_pref_regs { - let mut arr = self - .pref_regs_by_class - .to_preg_class(self.class) - .into_iter(); + let mut arr = self.pref_regs_by_class.into_iter(); let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); self.pref_idx += 1; if r == self.hints[0] || r == self.hints[1] { @@ -116,12 +113,9 @@ impl core::iter::Iterator for RegTraversalIter { return r; } - let n_non_pref_regs = self.non_pref_regs_by_class.len_class(self.class); + let n_non_pref_regs = self.non_pref_regs_by_class.len(); while self.non_pref_idx < n_non_pref_regs { - let mut arr = self - .non_pref_regs_by_class - .to_preg_class(self.class) - .into_iter(); + let mut arr = self.non_pref_regs_by_class.into_iter(); let r = arr.nth(wrap( self.non_pref_idx + self.offset_non_pref, n_non_pref_regs, From 4ae3d1e8ac664f870fe213518baf85d62c41eec1 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 16:41:11 +0100 Subject: [PATCH 13/19] simplify reg traversal further --- src/ion/reg_traversal.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 80ed7363..0a8ee6f1 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -16,7 +16,6 @@ use crate::{MachineEnv, PReg, PRegClass, RegClass}; pub struct RegTraversalIter { pref_regs_by_class: PRegClass, non_pref_regs_by_class: PRegClass, - class: usize, hints: [Option; 2], hint_idx: usize, pref_idx: usize, @@ -53,20 +52,23 @@ impl RegTraversalIter { } let hints = [hint_reg, hint2_reg]; let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class.len_class(class) > 0 { - offset % env.preferred_regs_by_class.len_class(class) + + let pref_regs_by_class = env.preferred_regs_by_class.to_preg_class(class); + let non_pref_regs_by_class = env.non_preferred_regs_by_class.to_preg_class(class); + + let offset_pref = if pref_regs_by_class.len() > 0 { + offset % pref_regs_by_class.len() } else { 0 }; - let offset_non_pref = if env.non_preferred_regs_by_class.len_class(class) > 0 { - offset % env.non_preferred_regs_by_class.len_class(class) + let offset_non_pref = if non_pref_regs_by_class.len() > 0 { + offset % non_pref_regs_by_class.len() } else { 0 }; Self { - pref_regs_by_class: env.preferred_regs_by_class.to_preg_class(class), - non_pref_regs_by_class: env.non_preferred_regs_by_class.to_preg_class(class), - class, + pref_regs_by_class, + non_pref_regs_by_class, hints, hint_idx: 0, pref_idx: 0, From 9aa721641bb9261e9dcd83c12c25a4ae1708679b Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 16:45:39 +0100 Subject: [PATCH 14/19] document reg traversal iter --- src/ion/reg_traversal.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 0a8ee6f1..ca38e29a 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -85,25 +85,22 @@ impl core::iter::Iterator for RegTraversalIter { type Item = PReg; fn next(&mut self) -> Option { + // only take the fixed register if it exists if self.is_fixed { let ret = self.fixed; self.fixed = None; return ret; } - fn wrap(idx: usize, limit: usize) -> usize { - if idx >= limit { - idx - limit - } else { - idx - } - } + // if there are hints, return them first if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { let h = self.hints[self.hint_idx]; self.hint_idx += 1; return h; } + // iterate over the preferred register rotated by offset + // ignoring hint register let n_pref_regs = self.pref_regs_by_class.len(); while self.pref_idx < n_pref_regs { let mut arr = self.pref_regs_by_class.into_iter(); @@ -115,6 +112,8 @@ impl core::iter::Iterator for RegTraversalIter { return r; } + // iterate over the nonpreferred register rotated by offset + // ignoring hint register let n_non_pref_regs = self.non_pref_regs_by_class.len(); while self.non_pref_idx < n_non_pref_regs { let mut arr = self.non_pref_regs_by_class.into_iter(); @@ -131,3 +130,12 @@ impl core::iter::Iterator for RegTraversalIter { None } } + +/// Wrapping function to wrap around the index for an iterator +fn wrap(idx: usize, limit: usize) -> usize { + if idx >= limit { + idx - limit + } else { + idx + } +} From 08f0b2b63315e01e05e978a29fe7d4ea8e1a35a4 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 22:09:38 +0100 Subject: [PATCH 15/19] const time n-th for PRegClass --- src/lib.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index b0beecc8..97fcf0da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -381,6 +381,64 @@ impl Iterator for PRegClass { Some(PReg::from(reg_index)) } } + + fn nth(&mut self, n: usize) -> Option { + if n >= self.len() { + self.regs = 0; + None + } else { + let n_from_right = self.len() - n; + // this is the number of trailing zeros for the n-th set bit from the right + let index = find_nth(self.regs, n_from_right as u64); + // clear those bits + self.regs &= u64::MAX << index; + self.regs &= !(1u64 << index); + // calculate the PReg + let reg_index = index as u8 | self.class_mask; + Some(PReg::from(reg_index)) + } + } +} + +// find the n-th set bit from the RIGHT, +// using 1 based indexing +// returns distance from the LEFT +fn find_nth(v: u64, mut r: u64) -> u64 { + const C: u64 = 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111; // 0x00FF00FF + const D: u64 = 0b00001111_00001111_00001111_00001111_00001111_00001111_00001111_00001111; // 0xF0F0F0F0 + const E: u64 = 0b00110011_00110011_00110011_00110011_00110011_00110011_00110011_00110011; // 0x33333333 + const F: u64 = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; // 0x55555555 + + // from https://graphics.stanford.edu/~seander/bithacks.html: uses 1 based indexing + let a = (v & F) + ((v >> 1) & F); + let b = (a & E) + ((a >> 2) & E); + let c = (b & D) + ((b >> 4) & D); + let d = (c & C) + ((c >> 8) & C); + let mut t = (d >> 32) + (d >> 48); + let mut s = 64; + // if (r > t) {s -= 32; r -= t;} + s -= ((t - r) & 256) >> 3; + r -= t & ((t - r) >> 8); + t = (d >> (s - 16)) & 0xff; + // if (r > t) {s -= 16; r -= t;} + s -= ((t - r) & 256) >> 4; + r -= t & ((t - r) >> 8); + t = (c >> (s - 8)) & 0xf; + // if (r > t) {s -= 8; r -= t;} + s -= ((t - r) & 256) >> 5; + r -= t & ((t - r) >> 8); + t = (b >> (s - 4)) & 0x7; + // if (r > t) {s -= 4; r -= t;} + s -= ((t - r) & 256) >> 6; + r -= t & ((t - r) >> 8); + t = (a >> (s - 2)) & 0x3; + // if (r > t) {s -= 2; r -= t;} + s -= ((t - r) & 256) >> 7; + r -= t & ((t - r) >> 8); + t = (v >> (s - 1)) & 0x1; + // if (r > t) s--; + s -= ((t - r) & 256) >> 8; + s - 1 } impl ExactSizeIterator for PRegClass {} From edb4a924013877ae4686690513d132e8d40ecac4 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 22:22:29 +0100 Subject: [PATCH 16/19] explicitly use wrapping sub so behaviour doesnt change based on opt mode --- src/lib.rs | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 97fcf0da..6997827a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -389,7 +389,7 @@ impl Iterator for PRegClass { } else { let n_from_right = self.len() - n; // this is the number of trailing zeros for the n-th set bit from the right - let index = find_nth(self.regs, n_from_right as u64); + let index = find_nth(self.regs, n_from_right as u64) as u8; // clear those bits self.regs &= u64::MAX << index; self.regs &= !(1u64 << index); @@ -403,7 +403,8 @@ impl Iterator for PRegClass { // find the n-th set bit from the RIGHT, // using 1 based indexing // returns distance from the LEFT -fn find_nth(v: u64, mut r: u64) -> u64 { +// saturates on 0 if the bit requested exceeds the set amount +fn find_nth(v: u64, mut r: u64) -> u8 { const C: u64 = 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111; // 0x00FF00FF const D: u64 = 0b00001111_00001111_00001111_00001111_00001111_00001111_00001111_00001111; // 0xF0F0F0F0 const E: u64 = 0b00110011_00110011_00110011_00110011_00110011_00110011_00110011_00110011; // 0x33333333 @@ -417,28 +418,28 @@ fn find_nth(v: u64, mut r: u64) -> u64 { let mut t = (d >> 32) + (d >> 48); let mut s = 64; // if (r > t) {s -= 32; r -= t;} - s -= ((t - r) & 256) >> 3; - r -= t & ((t - r) >> 8); + s -= ((t.wrapping_sub(r)) & 256) >> 3; + r -= t & ((t.wrapping_sub(r)) >> 8); t = (d >> (s - 16)) & 0xff; // if (r > t) {s -= 16; r -= t;} - s -= ((t - r) & 256) >> 4; - r -= t & ((t - r) >> 8); - t = (c >> (s - 8)) & 0xf; + s -= ((t.wrapping_sub(r)) & 256) >> 4; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (c >> (s.wrapping_sub(8))) & 0xf; // if (r > t) {s -= 8; r -= t;} - s -= ((t - r) & 256) >> 5; - r -= t & ((t - r) >> 8); - t = (b >> (s - 4)) & 0x7; + s -= ((t.wrapping_sub(r)) & 256) >> 5; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (b >> (s.wrapping_sub(4))) & 0x7; // if (r > t) {s -= 4; r -= t;} - s -= ((t - r) & 256) >> 6; - r -= t & ((t - r) >> 8); - t = (a >> (s - 2)) & 0x3; + s -= ((t.wrapping_sub(r)) & 256) >> 6; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (a >> (s.wrapping_sub(2))) & 0x3; // if (r > t) {s -= 2; r -= t;} - s -= ((t - r) & 256) >> 7; - r -= t & ((t - r) >> 8); - t = (v >> (s - 1)) & 0x1; + s -= ((t.wrapping_sub(r)) & 256) >> 7; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (v >> (s.wrapping_sub(1))) & 0x1; // if (r > t) s--; - s -= ((t - r) & 256) >> 8; - s - 1 + s -= ((t.wrapping_sub(r)) & 256) >> 8; + (s as u8).wrapping_sub(1) } impl ExactSizeIterator for PRegClass {} From db3f165fd6b237cf36238c48904af453c018e8d8 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sat, 18 May 2024 23:47:15 +0100 Subject: [PATCH 17/19] use bitpacked register info for reg_traversal --- src/ion/reg_traversal.rs | 151 ++++++++++++++++++++++----------------- src/lib.rs | 4 +- 2 files changed, 89 insertions(+), 66 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index ca38e29a..f35678ba 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,4 +1,4 @@ -use crate::{MachineEnv, PReg, PRegClass, RegClass}; +use crate::{find_nth, MachineEnv, PReg, RegClass}; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -14,16 +14,14 @@ use crate::{MachineEnv, PReg, PRegClass, RegClass}; /// respectively, to minimize clobber-saves; but they need not.) pub struct RegTraversalIter { - pref_regs_by_class: PRegClass, - non_pref_regs_by_class: PRegClass, - hints: [Option; 2], - hint_idx: usize, - pref_idx: usize, - non_pref_idx: usize, - offset_pref: usize, - offset_non_pref: usize, + pref_regs_first: u64, + pref_regs_second: u64, + non_pref_regs_first: u64, + non_pref_regs_second: u64, + hint_regs: u64, is_fixed: bool, fixed: Option, + class_mask: u8, } impl RegTraversalIter { @@ -35,48 +33,69 @@ impl RegTraversalIter { offset: usize, fixed: Option, ) -> Self { - let mut hint_reg = if hint_reg != PReg::invalid() { - Some(hint_reg) - } else { - None - }; - let mut hint2_reg = if hint2_reg != PReg::invalid() { - Some(hint2_reg) - } else { - None - }; + // get a mask for the hint registers + let mut hint_mask = 0u64; - if hint_reg.is_none() { - hint_reg = hint2_reg; - hint2_reg = None; + if hint_reg != PReg::invalid() { + let mask = 1u64 << (hint_reg.bits & 0b0011_1111); + hint_mask |= mask; } - let hints = [hint_reg, hint2_reg]; + + if hint2_reg != PReg::invalid() { + let mask = 1u64 << (hint2_reg.bits & 0b0011_1111); + hint_mask |= mask; + } + let class = class as u8 as usize; - let pref_regs_by_class = env.preferred_regs_by_class.to_preg_class(class); - let non_pref_regs_by_class = env.non_preferred_regs_by_class.to_preg_class(class); + let pref_regs_by_class = env.preferred_regs_by_class.bits[class]; + let non_pref_regs_by_class = env.non_preferred_regs_by_class.bits[class]; + + let n_pref_regs = pref_regs_by_class.count_ones() as usize; + let n_non_pref_regs = non_pref_regs_by_class.count_ones() as usize; - let offset_pref = if pref_regs_by_class.len() > 0 { - offset % pref_regs_by_class.len() + let offset_pref = if n_pref_regs > 0 { + offset % n_pref_regs } else { 0 }; - let offset_non_pref = if non_pref_regs_by_class.len() > 0 { - offset % non_pref_regs_by_class.len() + let offset_non_pref = if n_non_pref_regs > 0 { + offset % n_non_pref_regs } else { 0 }; + + // we want to split the pref registers bit vectors into two sets + // with the offset lowest bits in one and the rest in the other + let split_num = (n_pref_regs - offset_pref) as u64; + let split_pos = find_nth(pref_regs_by_class, split_num); + let mask = (1 << split_pos) - 1; + let pref_regs_first = pref_regs_by_class & !mask; + let pref_regs_second = pref_regs_by_class & mask; + + let split_num = (n_non_pref_regs - offset_non_pref) as u64; + let split_pos = find_nth(non_pref_regs_by_class, split_num); + let mask = (1 << split_pos) - 1; + let non_pref_regs_first = non_pref_regs_by_class & !mask; + let non_pref_regs_second = non_pref_regs_by_class & mask; + + // remove the hint registers from the bit vectors + let pref_regs_first = pref_regs_first & !hint_mask; + let pref_regs_second = pref_regs_second & !hint_mask; + let non_pref_regs_first = non_pref_regs_first & !hint_mask; + let non_pref_regs_second = non_pref_regs_second & !hint_mask; + + let class_mask = (class as u8) << 6; + Self { - pref_regs_by_class, - non_pref_regs_by_class, - hints, - hint_idx: 0, - pref_idx: 0, - non_pref_idx: 0, - offset_pref, - offset_non_pref, + pref_regs_first, + pref_regs_second, + non_pref_regs_first, + non_pref_regs_second, + hint_regs: hint_mask, is_fixed: fixed.is_some(), fixed, + class_mask, } } } @@ -93,39 +112,43 @@ impl core::iter::Iterator for RegTraversalIter { } // if there are hints, return them first - if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { - let h = self.hints[self.hint_idx]; - self.hint_idx += 1; - return h; + if self.hint_regs != 0 { + let index = self.hint_regs.trailing_zeros() as u8; + self.hint_regs &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } // iterate over the preferred register rotated by offset - // ignoring hint register - let n_pref_regs = self.pref_regs_by_class.len(); - while self.pref_idx < n_pref_regs { - let mut arr = self.pref_regs_by_class.into_iter(); - let r = arr.nth(wrap(self.pref_idx + self.offset_pref, n_pref_regs)); - self.pref_idx += 1; - if r == self.hints[0] || r == self.hints[1] { - continue; - } - return r; + // iterate over first half + if self.pref_regs_first != 0 { + let index = self.pref_regs_first.trailing_zeros() as u8; + self.pref_regs_first &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); + } + // iterate over second half + if self.pref_regs_second != 0 { + let index = self.pref_regs_second.trailing_zeros() as u8; + self.pref_regs_second &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } // iterate over the nonpreferred register rotated by offset - // ignoring hint register - let n_non_pref_regs = self.non_pref_regs_by_class.len(); - while self.non_pref_idx < n_non_pref_regs { - let mut arr = self.non_pref_regs_by_class.into_iter(); - let r = arr.nth(wrap( - self.non_pref_idx + self.offset_non_pref, - n_non_pref_regs, - )); - self.non_pref_idx += 1; - if r == self.hints[0] || r == self.hints[1] { - continue; - } - return r; + // iterate over first half + if self.non_pref_regs_first != 0 { + let index = self.non_pref_regs_first.trailing_zeros() as u8; + self.non_pref_regs_first &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); + } + // iterate over second half + if self.non_pref_regs_second != 0 { + let index = self.non_pref_regs_second.trailing_zeros() as u8; + self.non_pref_regs_second &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } None } diff --git a/src/lib.rs b/src/lib.rs index 6997827a..4941850b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -400,11 +400,11 @@ impl Iterator for PRegClass { } } -// find the n-th set bit from the RIGHT, +// find the r-th set bit in v from the RIGHT, // using 1 based indexing // returns distance from the LEFT // saturates on 0 if the bit requested exceeds the set amount -fn find_nth(v: u64, mut r: u64) -> u8 { +pub(crate) fn find_nth(v: u64, mut r: u64) -> u8 { const C: u64 = 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111; // 0x00FF00FF const D: u64 = 0b00001111_00001111_00001111_00001111_00001111_00001111_00001111_00001111; // 0xF0F0F0F0 const E: u64 = 0b00110011_00110011_00110011_00110011_00110011_00110011_00110011_00110011; // 0x33333333 From afd9b8952cac1f72f88af4c36e754876bbb49d23 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sun, 19 May 2024 13:54:20 +0100 Subject: [PATCH 18/19] remove unused wrap function --- src/ion/reg_traversal.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index f35678ba..461dcfe4 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -153,12 +153,3 @@ impl core::iter::Iterator for RegTraversalIter { None } } - -/// Wrapping function to wrap around the index for an iterator -fn wrap(idx: usize, limit: usize) -> usize { - if idx >= limit { - idx - limit - } else { - idx - } -} From 5e902b1d73c70c1c7e98a2b0cc16946a36e3fda9 Mon Sep 17 00:00:00 2001 From: Kirpal Grewal Date: Sun, 19 May 2024 14:43:48 +0100 Subject: [PATCH 19/19] correct link to stanford code --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 4941850b..da5638de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -410,7 +410,7 @@ pub(crate) fn find_nth(v: u64, mut r: u64) -> u8 { const E: u64 = 0b00110011_00110011_00110011_00110011_00110011_00110011_00110011_00110011; // 0x33333333 const F: u64 = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; // 0x55555555 - // from https://graphics.stanford.edu/~seander/bithacks.html: uses 1 based indexing + // from https://graphics.stanford.edu/~seander/bithacks.html##SelectPosFromMSBRank: uses 1 based indexing let a = (v & F) + ((v >> 1) & F); let b = (a & E) + ((a >> 2) & E); let c = (b & D) + ((b >> 4) & D);