Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions applications/tests/test_dvfs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ edition = "2024"

[dependencies]
log = "0.4"
array-macro = "2.1"
serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
num-traits = { version = "0.2", default-features = false }

[dependencies.awkernel_async_lib]
path = "../../../awkernel_async_lib"
Expand Down
249 changes: 214 additions & 35 deletions applications/tests/test_dvfs/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,62 +1,241 @@
#![no_std]

use core::time::Duration;
use core::{
sync::atomic::{AtomicU64, AtomicUsize, Ordering, fence},
time::Duration,
};

use alloc::{format, vec::Vec};
use array_macro::array;
use awkernel_lib::{
dvfs::DesiredPerformance,
sync::{mcs::MCSNode, mutex::Mutex},
};

extern crate alloc;

const APP_NAME: &str = "test DVFS";
mod nbody;

const NUM_LOOP: usize = 1000000;
const NUM_CPU: usize = 14;
const NUM_TRIALS_LATENCY: usize = 100;
const NUM_BUSY_LOOP: usize = 1000000000;

static LATENCY: [[[AtomicU64; NUM_TRIALS_LATENCY]; 11]; NUM_CPU] =
array![_ => array![_ => array![_ => AtomicU64::new(0); NUM_TRIALS_LATENCY]; 11]; NUM_CPU];

static COUNT: [[AtomicUsize; 11]; NUM_CPU] =
array![_ => array![_ => AtomicUsize::new(0); 11]; NUM_CPU];
static TOTAL_COUNT: AtomicUsize = AtomicUsize::new(0);

pub async fn run() {
awkernel_async_lib::spawn(
APP_NAME.into(),
test_dvfs(),
awkernel_async_lib::scheduler::SchedulerType::FIFO,
)
.await;
let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2);

for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) {
let w = awkernel_async_lib::spawn(
"test_latency_diff".into(),
test_latency_diff(),
awkernel_async_lib::scheduler::SchedulerType::FIFO,
)
.await;

waiter.push(w);
}

for w in waiter {
let _ = w.join().await;
}

let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2);

for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) {
let w = awkernel_async_lib::spawn(
"test_latency".into(),
test_latency(),
awkernel_async_lib::scheduler::SchedulerType::FIFO,
)
.await;

waiter.push(w);
}

for w in waiter {
let _ = w.join().await;
}
}

async fn test_dvfs() {
loop {
let max = awkernel_lib::dvfs::get_max_freq();
let cpuid = awkernel_lib::cpu::cpu_id();
async fn test_latency() {
let end_count = (awkernel_lib::cpu::num_cpu() - 1) * NUM_TRIALS_LATENCY * 11;

while TOTAL_COUNT.load(Ordering::Relaxed) + 1 < end_count {
let cpu_id = awkernel_lib::cpu::cpu_id();

for i in 0..=10 {
awkernel_lib::dvfs::set_min_max_performance(10 * i);
awkernel_lib::dvfs::set_energy_efficiency(0);
awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);

warm_up();

let elapsed = workload();

log::debug!(
"CPU {cpu_id}: Performance {}: Elapsed: {} [us]",
i * 10,
elapsed.as_micros()
);

let count =
COUNT[cpu_id][i as usize].fetch_add(1, core::sync::atomic::Ordering::Relaxed);
if count < NUM_TRIALS_LATENCY {
LATENCY[cpu_id][i as usize][count].store(
elapsed.as_micros() as u64,
core::sync::atomic::Ordering::Relaxed,
);

let total_count = TOTAL_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);

log::debug!("progress: {total_count} / {end_count}");

if total_count + 1 == end_count {
print_latency();
}
}
}

awkernel_async_lib::r#yield().await;
}
}

// Maximum frequency.
awkernel_lib::dvfs::fix_freq(max);
fn warm_up() {
for _ in 0..(NUM_BUSY_LOOP) {
core::hint::black_box(());
}
}

let start = awkernel_async_lib::time::Time::now();
fn workload() -> Duration {
let t = awkernel_async_lib::time::Time::now();
nbody::simulate();
t.elapsed()
}

for _ in 0..NUM_LOOP {
core::hint::black_box(());
fn print_latency() {
let mut result: [[Vec<u64>; 11]; NUM_CPU] =
array![_ => array![_ => Vec::with_capacity(NUM_TRIALS_LATENCY); 11]; NUM_CPU];

for (j, latency_cpu) in LATENCY.iter().enumerate() {
for (k, latency) in latency_cpu.iter().enumerate() {
let mut sum = 0;
let mut min = u64::MAX;
let mut max = 0;
for usec in latency.iter() {
let val = usec.load(core::sync::atomic::Ordering::Relaxed);
if min > val {
min = val;
}
if max < val {
max = val;
}
sum += val;

result[j][k].push(val);
}
let avg = sum / NUM_TRIALS_LATENCY as u64;

let msg = format!(
"CPU {j}: Performance {}: Average: {avg} us, Min: {min} us, Max: {max} us\r\n",
k * 10
);
awkernel_lib::console::print(&msg);
}
}

let t = start.elapsed();
let result_json = serde_json::to_string(&result).unwrap();
let result_str = format!("{result_json}\r\n");
awkernel_lib::console::print(&result_str);
}

let current = awkernel_lib::dvfs::get_curr_freq();
const NUM_TRIALS_LATENCY_DIFF: usize = 20;
static FREQ_LATENCY: [[Mutex<Vec<(u64, i64)>>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] =
array![_ => array![_ => Mutex::new(Vec::new()); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU];
static TOTAL_COUNT_LATENCY_DIFF: AtomicUsize = AtomicUsize::new(0);
static N: usize = 500;

log::debug!(
"cpuid = {cpuid}, max = {max}, current = {current}, expected = {max}, time = {t:?}"
);
async fn test_latency_diff() {
loop {
awkernel_lib::dvfs::set_min_max_performance(10);
awkernel_lib::dvfs::set_energy_efficiency(0);
awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);

workload();

let mut diff = Vec::with_capacity(N);

awkernel_lib::dvfs::set_min_max_performance(100);
awkernel_lib::dvfs::set_energy_efficiency(0);
awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);

let t = awkernel_async_lib::time::Time::now();
for _ in 0..N {
let start = unsafe { core::arch::x86_64::_rdtsc() };
fence(Ordering::AcqRel);
for _ in 0..1000 {
core::hint::black_box(());
}
fence(Ordering::AcqRel);
let end = unsafe { core::arch::x86_64::_rdtsc() };
diff.push((t.elapsed(), (end - start) as i64));
}

// Maximum / 2 frequency.
awkernel_lib::dvfs::fix_freq(max / 2);
let mut result = Vec::with_capacity(diff.len());

let start = awkernel_async_lib::time::Time::now();
for (t, d) in diff.iter() {
result.push((t.as_nanos() as u64, *d));
}

for _ in 0..NUM_LOOP {
core::hint::black_box(());
let cpu_id = awkernel_lib::cpu::cpu_id();
for (i, r) in FREQ_LATENCY[cpu_id].iter().enumerate() {
let mut node = MCSNode::new();
let mut guard = r.lock(&mut node);
if guard.is_empty() {
*guard = result;
drop(guard);

let old_total = TOTAL_COUNT_LATENCY_DIFF.fetch_add(1, Ordering::Relaxed);

log::debug!("{cpu_id}: {i}, {old_total}");

if old_total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF - 1 {
print_latency_diff();
}

break;
}
}

let t = start.elapsed();
let total = TOTAL_COUNT_LATENCY_DIFF.load(Ordering::Relaxed);

let current = awkernel_lib::dvfs::get_curr_freq();
if total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF {
break;
}

log::debug!(
"cpuid = {cpuid}, max = {max}, current = {current}, expected = {}, time = {t:?}",
max / 2
);
awkernel_async_lib::r#yield().await;
}
}

fn print_latency_diff() {
let mut result: [[Vec<(u64, i64)>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] =
array![_ => array![_ => Vec::new(); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU];

for (dst, src) in result.iter_mut().zip(FREQ_LATENCY.iter()) {
for (dst, src) in dst.iter_mut().zip(src.iter()) {
let mut node = MCSNode::new();
let guard = src.lock(&mut node);

awkernel_async_lib::sleep(Duration::from_secs(1)).await;
*dst = guard.clone();
}
}

let result_json = serde_json::to_string(&result).unwrap();
let result_str = format!("{result_json}\r\n");
awkernel_lib::console::print(&result_str);
}
102 changes: 102 additions & 0 deletions applications/tests/test_dvfs/src/nbody.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
use alloc::{vec, vec::Vec};
use num_traits::float::Float;

#[derive(Clone, Copy, Debug)]
struct Body {
x: f64,
y: f64,
vx: f64,
vy: f64,
mass: f64,
}

impl Body {
fn update_velocity(&mut self, fx: f64, fy: f64, dt: f64) {
self.vx += fx / self.mass * dt;
self.vy += fy / self.mass * dt;
}

fn update_position(&mut self, dt: f64) {
self.x += self.vx * dt;
self.y += self.vy * dt;
}
}

fn compute_force(a: &Body, b: &Body, g: f64, eps: f64) -> (f64, f64) {
let dx = b.x - a.x;
let dy = b.y - a.y;
let dist_sq = dx * dx + dy * dy + eps * eps; // softening
let dist = dist_sq.sqrt();
let f = g * a.mass * b.mass / dist_sq;
let fx = f * dx / dist;
let fy = f * dy / dist;
(fx, fy)
}

fn nbody_step(bodies: &mut [Body], g: f64, dt: f64, eps: f64) {
let n = bodies.len();
let mut forces = vec![(0.0, 0.0); n];

for i in 0..n {
for j in 0..n {
if i != j {
let (fx, fy) = compute_force(&bodies[i], &bodies[j], g, eps);
forces[i].0 += fx;
forces[i].1 += fy;
}
}
}

for i in 0..n {
bodies[i].update_velocity(forces[i].0, forces[i].1, dt);
bodies[i].update_position(dt);
}
}

pub fn simulate() {
const N: usize = 5000;
const STEPS: usize = 2;
const G: f64 = 6.67430e-11;
const DT: f64 = 0.1;
const EPS: f64 = 1e-3;

let mut rnd = XorShift64::new(0x12345678); // 乱数生成器の初期化

// 初期化:ランダムにばら撒く(実用では乱数を使ってもよい)
let mut bodies = (0..N)
.map(|_| Body {
x: rnd.next_f64(),
y: rnd.next_f64(),
vx: 0.0,
vy: 0.0,
mass: rnd.next_f64(),
})
.collect::<Vec<_>>();

for _ in 0..STEPS {
nbody_step(&mut bodies, G, DT, EPS);
}
}

pub struct XorShift64 {
state: u64,
}

impl XorShift64 {
pub fn new(seed: u64) -> Self {
Self { state: seed }
}

pub fn next(&mut self) -> u64 {
let mut x = self.state;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.state = x;
x
}

pub fn next_f64(&mut self) -> f64 {
(self.next() as f64) / (u64::MAX as f64)
}
}
Loading
Loading