diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 8bdb326e..6ea94538 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -645,16 +645,24 @@ pub fn machine_env() -> MachineEnv { fn regs(r: core::ops::Range, c: RegClass) -> Vec { r.map(|i| PReg::new(i, c)).collect() } - let preferred_regs_by_class: [Vec; 3] = [ - regs(0..24, RegClass::Int), - regs(0..24, RegClass::Float), - regs(0..24, RegClass::Vector), - ]; - let non_preferred_regs_by_class: [Vec; 3] = [ - regs(24..32, RegClass::Int), - regs(24..32, RegClass::Float), - regs(24..32, RegClass::Vector), - ]; + let int_regs: PRegSet = regs(0..24, RegClass::Int).into(); + let float_regs: PRegSet = regs(0..24, RegClass::Float).into(); + let vector_regs: PRegSet = regs(0..24, RegClass::Vector).into(); + + let mut preferred_regs_by_class = PRegSet::default(); + preferred_regs_by_class.union_from(int_regs); + preferred_regs_by_class.union_from(float_regs); + preferred_regs_by_class.union_from(vector_regs); + + let int_regs: PRegSet = regs(24..32, RegClass::Int).into(); + let float_regs: PRegSet = regs(24..32, RegClass::Float).into(); + let vector_regs: PRegSet = regs(24..32, RegClass::Vector).into(); + + let mut non_preferred_regs_by_class = PRegSet::default(); + non_preferred_regs_by_class.union_from(int_regs); + non_preferred_regs_by_class.union_from(float_regs); + non_preferred_regs_by_class.union_from(vector_regs); + let scratch_by_class: [Option; 3] = [None, None, None]; let fixed_stack_slots = (32..63) .flat_map(|i| { diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index b3ee32cb..f585b527 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -113,10 +113,12 @@ impl<'a, F: Function> Env<'a, F> { self.pregs[preg.index()].is_stack = true; } for class in 0..self.preferred_victim_by_class.len() { - self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] - .last() - .or(self.env.preferred_regs_by_class[class].last()) - .cloned() + self.preferred_victim_by_class[class] = self + .env + .non_preferred_regs_by_class + .last_in_class(class) + .or(self.env.preferred_regs_by_class.last_in_class(class)) + // .cloned() .unwrap_or(PReg::invalid()); } // Create VRegs from the vreg count. diff --git a/src/ion/process.rs b/src/ion/process.rs index 371ab84b..8c6e9e80 100644 --- a/src/ion/process.rs +++ b/src/ion/process.rs @@ -1221,9 +1221,17 @@ impl<'a, F: Function> Env<'a, F> { let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; let mut total_regs = 0; - for preg in self.env.preferred_regs_by_class[class as u8 as usize] - .iter() - .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) + for preg in self + .env + .preferred_regs_by_class + .to_preg_class(class as u8 as usize) + .into_iter() + .chain( + self.env + .non_preferred_regs_by_class + .to_preg_class(class as u8 as usize) + .into_iter(), + ) { trace!(" -> PR {:?}", preg); let start = LiveRangeKey::from_range(&CodeRange { diff --git a/src/ion/reg_traversal.rs b/src/ion/reg_traversal.rs index 729fd33e..461dcfe4 100644 --- a/src/ion/reg_traversal.rs +++ b/src/ion/reg_traversal.rs @@ -1,5 +1,4 @@ -use crate::{MachineEnv, PReg, RegClass}; - +use crate::{find_nth, MachineEnv, PReg, RegClass}; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. @@ -14,109 +13,142 @@ use crate::{MachineEnv, PReg, RegClass}; /// usage, these consist of caller-save and callee-save registers /// respectively, to minimize clobber-saves; but they need not.) -pub struct RegTraversalIter<'a> { - env: &'a MachineEnv, - class: usize, - hints: [Option; 2], - hint_idx: usize, - pref_idx: usize, - non_pref_idx: usize, - offset_pref: usize, - offset_non_pref: usize, +pub struct RegTraversalIter { + pref_regs_first: u64, + pref_regs_second: u64, + non_pref_regs_first: u64, + non_pref_regs_second: u64, + hint_regs: u64, is_fixed: bool, fixed: Option, + class_mask: u8, } -impl<'a> RegTraversalIter<'a> { +impl RegTraversalIter { pub fn new( - env: &'a MachineEnv, + env: &MachineEnv, class: RegClass, hint_reg: PReg, hint2_reg: PReg, offset: usize, fixed: Option, ) -> Self { - let mut hint_reg = if hint_reg != PReg::invalid() { - Some(hint_reg) - } else { - None - }; - let mut hint2_reg = if hint2_reg != PReg::invalid() { - Some(hint2_reg) - } else { - None - }; + // get a mask for the hint registers + let mut hint_mask = 0u64; - if hint_reg.is_none() { - hint_reg = hint2_reg; - hint2_reg = None; + if hint_reg != PReg::invalid() { + let mask = 1u64 << (hint_reg.bits & 0b0011_1111); + hint_mask |= mask; } - let hints = [hint_reg, hint2_reg]; + + if hint2_reg != PReg::invalid() { + let mask = 1u64 << (hint2_reg.bits & 0b0011_1111); + hint_mask |= mask; + } + let class = class as u8 as usize; - let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { - offset % env.preferred_regs_by_class[class].len() + + let pref_regs_by_class = env.preferred_regs_by_class.bits[class]; + let non_pref_regs_by_class = env.non_preferred_regs_by_class.bits[class]; + + let n_pref_regs = pref_regs_by_class.count_ones() as usize; + let n_non_pref_regs = non_pref_regs_by_class.count_ones() as usize; + + let offset_pref = if n_pref_regs > 0 { + offset % n_pref_regs } else { 0 }; - let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { - offset % env.non_preferred_regs_by_class[class].len() + let offset_non_pref = if n_non_pref_regs > 0 { + offset % n_non_pref_regs } else { 0 }; + + // we want to split the pref registers bit vectors into two sets + // with the offset lowest bits in one and the rest in the other + let split_num = (n_pref_regs - offset_pref) as u64; + let split_pos = find_nth(pref_regs_by_class, split_num); + let mask = (1 << split_pos) - 1; + let pref_regs_first = pref_regs_by_class & !mask; + let pref_regs_second = pref_regs_by_class & mask; + + let split_num = (n_non_pref_regs - offset_non_pref) as u64; + let split_pos = find_nth(non_pref_regs_by_class, split_num); + let mask = (1 << split_pos) - 1; + let non_pref_regs_first = non_pref_regs_by_class & !mask; + let non_pref_regs_second = non_pref_regs_by_class & mask; + + // remove the hint registers from the bit vectors + let pref_regs_first = pref_regs_first & !hint_mask; + let pref_regs_second = pref_regs_second & !hint_mask; + let non_pref_regs_first = non_pref_regs_first & !hint_mask; + let non_pref_regs_second = non_pref_regs_second & !hint_mask; + + let class_mask = (class as u8) << 6; + Self { - env, - class, - hints, - hint_idx: 0, - pref_idx: 0, - non_pref_idx: 0, - offset_pref, - offset_non_pref, + pref_regs_first, + pref_regs_second, + non_pref_regs_first, + non_pref_regs_second, + hint_regs: hint_mask, is_fixed: fixed.is_some(), fixed, + class_mask, } } } -impl<'a> core::iter::Iterator for RegTraversalIter<'a> { +impl core::iter::Iterator for RegTraversalIter { type Item = PReg; fn next(&mut self) -> Option { + // only take the fixed register if it exists if self.is_fixed { let ret = self.fixed; self.fixed = None; return ret; } - fn wrap(idx: usize, limit: usize) -> usize { - if idx >= limit { - idx - limit - } else { - idx - } + // if there are hints, return them first + if self.hint_regs != 0 { + let index = self.hint_regs.trailing_zeros() as u8; + self.hint_regs &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); + } + + // iterate over the preferred register rotated by offset + // iterate over first half + if self.pref_regs_first != 0 { + let index = self.pref_regs_first.trailing_zeros() as u8; + self.pref_regs_first &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } - if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { - let h = self.hints[self.hint_idx]; - self.hint_idx += 1; - return h; + // iterate over second half + if self.pref_regs_second != 0 { + let index = self.pref_regs_second.trailing_zeros() as u8; + self.pref_regs_second &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } - while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { - let arr = &self.env.preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; - self.pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { - continue; - } - return Some(r); + + // iterate over the nonpreferred register rotated by offset + // iterate over first half + if self.non_pref_regs_first != 0 { + let index = self.non_pref_regs_first.trailing_zeros() as u8; + self.non_pref_regs_first &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } - while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { - let arr = &self.env.non_preferred_regs_by_class[self.class][..]; - let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; - self.non_pref_idx += 1; - if Some(r) == self.hints[0] || Some(r) == self.hints[1] { - continue; - } - return Some(r); + // iterate over second half + if self.non_pref_regs_second != 0 { + let index = self.non_pref_regs_second.trailing_zeros() as u8; + self.non_pref_regs_second &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + return Some(PReg::from(reg_index)); } None } diff --git a/src/lib.rs b/src/lib.rs index 3f73719e..da5638de 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -160,6 +160,12 @@ impl PReg { } } +impl From for PReg { + fn from(raw_index: u8) -> Self { + PReg { bits: raw_index } + } +} + impl core::fmt::Debug for PReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( @@ -191,47 +197,47 @@ impl core::fmt::Display for PReg { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct PRegSet { - bits: [u128; 2], + bits: [u64; 3], } impl PRegSet { /// Create an empty set. pub const fn empty() -> Self { - Self { bits: [0; 2] } + Self { bits: [0; 3] } } /// Returns whether the given register is part of the set. pub fn contains(&self, reg: PReg) -> bool { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] & (1u128 << bit) != 0 + let bit = reg.index() & 63; + let index = reg.index() >> 6; + self.bits[index] & (1u64 << bit) != 0 } /// Add a physical register (PReg) to the set, returning the new value. pub const fn with(self, reg: PReg) -> Self { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; + let bit = reg.index() & 63; + let index = reg.index() >> 6; let mut out = self; - out.bits[index] |= 1u128 << bit; + out.bits[index] |= 1u64 << bit; out } /// Add a physical register (PReg) to the set. pub fn add(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] |= 1u128 << bit; + let bit = reg.index() & 63; + let index = reg.index() >> 6; + self.bits[index] |= 1u64 << bit; } /// Remove a physical register (PReg) from the set. pub fn remove(&mut self, reg: PReg) { debug_assert!(reg.index() < 256); - let bit = reg.index() & 127; - let index = reg.index() >> 7; - self.bits[index] &= !(1u128 << bit); + let bit = reg.index() & 63; + let index = reg.index() >> 6; + self.bits[index] &= !(1u64 << bit); } /// Add all of the registers in one set to this one, mutating in @@ -239,58 +245,205 @@ impl PRegSet { pub fn union_from(&mut self, other: PRegSet) { self.bits[0] |= other.bits[0]; self.bits[1] |= other.bits[1]; + self.bits[2] |= other.bits[2]; } -} -impl IntoIterator for PRegSet { - type Item = PReg; - type IntoIter = PRegSetIter; - fn into_iter(self) -> PRegSetIter { - PRegSetIter { bits: self.bits } + /// Get the last register in a specific register class + pub fn last_in_class(&self, class: usize) -> Option { + if self.bits[class] == 0 { + None + } else { + // get the index in the class vec + let i_in_class = 63 - self.bits[class].leading_zeros(); + // apply the class mask + let i = i_in_class as u8 | ((class as u8) << 6); + // return the PReg + Some(PReg::from(i)) + } + } + + /// Get the number of registers in a specific register class + pub fn len_class(&self, class: usize) -> usize { + self.bits[class].count_ones() as usize } -} -pub struct PRegSetIter { - bits: [u128; 2], + /// Get a iterator over only one class in the set + fn to_preg_class(&self, class: usize) -> PRegClass { + PRegClass { + class_mask: (class as u8) << 6, + regs: self.bits[class], + } + } } -impl Iterator for PRegSetIter { +impl Iterator for PRegSet { type Item = PReg; fn next(&mut self) -> Option { if self.bits[0] != 0 { - let index = self.bits[0].trailing_zeros(); - self.bits[0] &= !(1u128 << index); - Some(PReg::from_index(index as usize)) + let index = self.bits[0].trailing_zeros() as u8; + self.bits[0] &= !(1u64 << index); + Some(PReg::from(index)) + } else if self.bits[1] != 0 { + let index = self.bits[1].trailing_zeros() as u8; + self.bits[1] &= !(1u64 << index); + Some(PReg::from(index + 64)) + } else if self.bits[2] != 0 { + let index = self.bits[2].trailing_zeros() as u8; + self.bits[2] &= !(1u64 << index); + Some(PReg::from(index + 128)) + } else { + None + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = (self.bits[0].count_ones() + + self.bits[1].count_ones() + + self.bits[2].count_ones()) as usize; + (len, Some(len)) + } + + fn last(self) -> Option { + if self.bits[2] != 0 { + let index = 63 - self.bits[2].leading_zeros() as u8; + Some(PReg::from(index + 128)) } else if self.bits[1] != 0 { - let index = self.bits[1].trailing_zeros(); - self.bits[1] &= !(1u128 << index); - Some(PReg::from_index(index as usize + 128)) + let index = 63 - self.bits[1].leading_zeros() as u8; + Some(PReg::from(index + 64)) + } else if self.bits[0] != 0 { + let index = 63 - self.bits[0].leading_zeros() as u8; + Some(PReg::from(index)) } else { None } } } +impl ExactSizeIterator for PRegSet {} + impl From<&MachineEnv> for PRegSet { fn from(env: &MachineEnv) -> Self { let mut res = Self::default(); - for class in env.preferred_regs_by_class.iter() { - for preg in class { - res.add(*preg) - } - } + res.union_from(env.preferred_regs_by_class); - for class in env.non_preferred_regs_by_class.iter() { - for preg in class { - res.add(*preg) - } + res.union_from(env.non_preferred_regs_by_class); + + res + } +} + +impl From> for PRegSet { + fn from(regs: Vec) -> Self { + let mut res = Self::default(); + + for preg in regs { + res.add(preg); } res } } +/// A compact iterator over a single register class +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct PRegClass { + // bit mask containing class data in UPPER two bits + class_mask: u8, + // bit packed vec of registers + regs: u64, +} + +impl Iterator for PRegClass { + type Item = PReg; + fn next(&mut self) -> Option { + if self.regs == 0 { + None + } else { + let index = self.regs.trailing_zeros() as u8; + self.regs &= !(1u64 << index); + let reg_index = index as u8 | self.class_mask; + Some(PReg::from(reg_index)) + } + } + + fn size_hint(&self) -> (usize, Option) { + let len = (self.regs.count_ones()) as usize; + (len, Some(len)) + } + + fn last(self) -> Option { + if self.regs == 0 { + None + } else { + let index = 63 - self.regs.leading_zeros(); + let reg_index = index as u8 | self.class_mask; + Some(PReg::from(reg_index)) + } + } + + fn nth(&mut self, n: usize) -> Option { + if n >= self.len() { + self.regs = 0; + None + } else { + let n_from_right = self.len() - n; + // this is the number of trailing zeros for the n-th set bit from the right + let index = find_nth(self.regs, n_from_right as u64) as u8; + // clear those bits + self.regs &= u64::MAX << index; + self.regs &= !(1u64 << index); + // calculate the PReg + let reg_index = index as u8 | self.class_mask; + Some(PReg::from(reg_index)) + } + } +} + +// find the r-th set bit in v from the RIGHT, +// using 1 based indexing +// returns distance from the LEFT +// saturates on 0 if the bit requested exceeds the set amount +pub(crate) fn find_nth(v: u64, mut r: u64) -> u8 { + const C: u64 = 0b00000000_11111111_00000000_11111111_00000000_11111111_00000000_11111111; // 0x00FF00FF + const D: u64 = 0b00001111_00001111_00001111_00001111_00001111_00001111_00001111_00001111; // 0xF0F0F0F0 + const E: u64 = 0b00110011_00110011_00110011_00110011_00110011_00110011_00110011_00110011; // 0x33333333 + const F: u64 = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101; // 0x55555555 + + // from https://graphics.stanford.edu/~seander/bithacks.html##SelectPosFromMSBRank: uses 1 based indexing + let a = (v & F) + ((v >> 1) & F); + let b = (a & E) + ((a >> 2) & E); + let c = (b & D) + ((b >> 4) & D); + let d = (c & C) + ((c >> 8) & C); + let mut t = (d >> 32) + (d >> 48); + let mut s = 64; + // if (r > t) {s -= 32; r -= t;} + s -= ((t.wrapping_sub(r)) & 256) >> 3; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (d >> (s - 16)) & 0xff; + // if (r > t) {s -= 16; r -= t;} + s -= ((t.wrapping_sub(r)) & 256) >> 4; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (c >> (s.wrapping_sub(8))) & 0xf; + // if (r > t) {s -= 8; r -= t;} + s -= ((t.wrapping_sub(r)) & 256) >> 5; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (b >> (s.wrapping_sub(4))) & 0x7; + // if (r > t) {s -= 4; r -= t;} + s -= ((t.wrapping_sub(r)) & 256) >> 6; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (a >> (s.wrapping_sub(2))) & 0x3; + // if (r > t) {s -= 2; r -= t;} + s -= ((t.wrapping_sub(r)) & 256) >> 7; + r -= t & ((t.wrapping_sub(r)) >> 8); + t = (v >> (s.wrapping_sub(1))) & 0x1; + // if (r > t) s--; + s -= ((t.wrapping_sub(r)) & 256) >> 8; + (s as u8).wrapping_sub(1) +} + +impl ExactSizeIterator for PRegClass {} + /// A virtual register. Contains a virtual register number and a /// class. /// @@ -1380,7 +1533,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub preferred_regs_by_class: [Vec; 3], + pub preferred_regs_by_class: PRegSet, /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is @@ -1389,7 +1542,7 @@ pub struct MachineEnv { /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. - pub non_preferred_regs_by_class: [Vec; 3], + pub non_preferred_regs_by_class: PRegSet, /// Optional dedicated scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The @@ -1546,3 +1699,18 @@ pub struct RegallocOptions { /// Run the SSA validator before allocating registers. pub validate_ssa: bool, } + +#[cfg(test)] +mod test { + use super::PRegSet; + + #[test] + fn test_set_bits_iter() { + let registers = PRegSet { + bits: [112, 0, 131], + } + .into_iter(); + let last = registers.last().unwrap().bits; + assert_eq!(last, 135); + } +}