tier4 · ytakano · Apr 3, 2025 · Apr 4, 2025 · Apr 4, 2025 · Apr 4, 2025
diff --git a/applications/tests/test_dvfs/Cargo.toml b/applications/tests/test_dvfs/Cargo.toml
@@ -5,6 +5,9 @@ edition = "2024"
 
 [dependencies]
 log = "0.4"
+array-macro = "2.1"
+serde_json = { version = "1.0", default-features = false, features = ["alloc"] }
+num-traits = { version = "0.2", default-features = false }
 
 [dependencies.awkernel_async_lib]
 path = "../../../awkernel_async_lib"

diff --git a/applications/tests/test_dvfs/src/lib.rs b/applications/tests/test_dvfs/src/lib.rs
@@ -1,62 +1,241 @@
 #![no_std]
 
-use core::time::Duration;
+use core::{
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering, fence},
+    time::Duration,
+};
+
+use alloc::{format, vec::Vec};
+use array_macro::array;
+use awkernel_lib::{
+    dvfs::DesiredPerformance,
+    sync::{mcs::MCSNode, mutex::Mutex},
+};
 
 extern crate alloc;
 
-const APP_NAME: &str = "test DVFS";
+mod nbody;
 
-const NUM_LOOP: usize = 1000000;
+const NUM_CPU: usize = 14;
+const NUM_TRIALS_LATENCY: usize = 100;
+const NUM_BUSY_LOOP: usize = 1000000000;
+
+static LATENCY: [[[AtomicU64; NUM_TRIALS_LATENCY]; 11]; NUM_CPU] =
+    array![_ => array![_ => array![_ => AtomicU64::new(0); NUM_TRIALS_LATENCY]; 11]; NUM_CPU];
+
+static COUNT: [[AtomicUsize; 11]; NUM_CPU] =
+    array![_ => array![_ => AtomicUsize::new(0); 11]; NUM_CPU];
+static TOTAL_COUNT: AtomicUsize = AtomicUsize::new(0);
 
 pub async fn run() {
-    awkernel_async_lib::spawn(
-        APP_NAME.into(),
-        test_dvfs(),
-        awkernel_async_lib::scheduler::SchedulerType::FIFO,
-    )
-    .await;
+    let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2);
+
+    for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) {
+        let w = awkernel_async_lib::spawn(
+            "test_latency_diff".into(),
+            test_latency_diff(),
+            awkernel_async_lib::scheduler::SchedulerType::FIFO,
+        )
+        .await;
+
+        waiter.push(w);
+    }
+
+    for w in waiter {
+        let _ = w.join().await;
+    }
+
+    let mut waiter = Vec::with_capacity(awkernel_lib::cpu::num_cpu() - 2);
+
+    for _ in 0..(awkernel_lib::cpu::num_cpu() - 2) {
+        let w = awkernel_async_lib::spawn(
+            "test_latency".into(),
+            test_latency(),
+            awkernel_async_lib::scheduler::SchedulerType::FIFO,
+        )
+        .await;
+
+        waiter.push(w);
+    }
+
+    for w in waiter {
+        let _ = w.join().await;
+    }
 }
 
-async fn test_dvfs() {
-    loop {
-        let max = awkernel_lib::dvfs::get_max_freq();
-        let cpuid = awkernel_lib::cpu::cpu_id();
+async fn test_latency() {
+    let end_count = (awkernel_lib::cpu::num_cpu() - 1) * NUM_TRIALS_LATENCY * 11;
+
+    while TOTAL_COUNT.load(Ordering::Relaxed) + 1 < end_count {
+        let cpu_id = awkernel_lib::cpu::cpu_id();
+
+        for i in 0..=10 {
+            awkernel_lib::dvfs::set_min_max_performance(10 * i);
+            awkernel_lib::dvfs::set_energy_efficiency(0);
+            awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);
+
+            warm_up();
+
+            let elapsed = workload();
+
+            log::debug!(
+                "CPU {cpu_id}: Performance {}: Elapsed: {} [us]",
+                i * 10,
+                elapsed.as_micros()
+            );
+
+            let count =
+                COUNT[cpu_id][i as usize].fetch_add(1, core::sync::atomic::Ordering::Relaxed);
+            if count < NUM_TRIALS_LATENCY {
+                LATENCY[cpu_id][i as usize][count].store(
+                    elapsed.as_micros() as u64,
+                    core::sync::atomic::Ordering::Relaxed,
+                );
+
+                let total_count = TOTAL_COUNT.fetch_add(1, core::sync::atomic::Ordering::Relaxed);
+
+                log::debug!("progress: {total_count} / {end_count}");
+
+                if total_count + 1 == end_count {
+                    print_latency();
+                }
+            }
+        }
+
+        awkernel_async_lib::r#yield().await;
+    }
+}
 
-        // Maximum frequency.
-        awkernel_lib::dvfs::fix_freq(max);
+fn warm_up() {
+    for _ in 0..(NUM_BUSY_LOOP) {
+        core::hint::black_box(());
+    }
+}
 
-        let start = awkernel_async_lib::time::Time::now();
+fn workload() -> Duration {
+    let t = awkernel_async_lib::time::Time::now();
+    nbody::simulate();
+    t.elapsed()
+}
 
-        for _ in 0..NUM_LOOP {
-            core::hint::black_box(());
+fn print_latency() {
+    let mut result: [[Vec<u64>; 11]; NUM_CPU] =
+        array![_ => array![_ => Vec::with_capacity(NUM_TRIALS_LATENCY); 11]; NUM_CPU];
+
+    for (j, latency_cpu) in LATENCY.iter().enumerate() {
+        for (k, latency) in latency_cpu.iter().enumerate() {
+            let mut sum = 0;
+            let mut min = u64::MAX;
+            let mut max = 0;
+            for usec in latency.iter() {
+                let val = usec.load(core::sync::atomic::Ordering::Relaxed);
+                if min > val {
+                    min = val;
+                }
+                if max < val {
+                    max = val;
+                }
+                sum += val;
+
+                result[j][k].push(val);
+            }
+            let avg = sum / NUM_TRIALS_LATENCY as u64;
+
+            let msg = format!(
+                "CPU {j}: Performance {}: Average: {avg} us, Min: {min} us, Max: {max} us\r\n",
+                k * 10
+            );
+            awkernel_lib::console::print(&msg);
         }
+    }
 
-        let t = start.elapsed();
+    let result_json = serde_json::to_string(&result).unwrap();
+    let result_str = format!("{result_json}\r\n");
+    awkernel_lib::console::print(&result_str);
+}
 
-        let current = awkernel_lib::dvfs::get_curr_freq();
+const NUM_TRIALS_LATENCY_DIFF: usize = 20;
+static FREQ_LATENCY: [[Mutex<Vec<(u64, i64)>>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] =
+    array![_ => array![_ => Mutex::new(Vec::new()); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU];
+static TOTAL_COUNT_LATENCY_DIFF: AtomicUsize = AtomicUsize::new(0);
+static N: usize = 500;
 
-        log::debug!(
-            "cpuid = {cpuid}, max = {max}, current = {current}, expected = {max}, time = {t:?}"
-        );
+async fn test_latency_diff() {
+    loop {
+        awkernel_lib::dvfs::set_min_max_performance(10);
+        awkernel_lib::dvfs::set_energy_efficiency(0);
+        awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);
+
+        workload();
+
+        let mut diff = Vec::with_capacity(N);
+
+        awkernel_lib::dvfs::set_min_max_performance(100);
+        awkernel_lib::dvfs::set_energy_efficiency(0);
+        awkernel_lib::dvfs::set_desired_performance(DesiredPerformance::Auto);
+
+        let t = awkernel_async_lib::time::Time::now();
+        for _ in 0..N {
+            let start = unsafe { core::arch::x86_64::_rdtsc() };
+            fence(Ordering::AcqRel);
+            for _ in 0..1000 {
+                core::hint::black_box(());
+            }
+            fence(Ordering::AcqRel);
+            let end = unsafe { core::arch::x86_64::_rdtsc() };
+            diff.push((t.elapsed(), (end - start) as i64));
+        }
 
-        // Maximum / 2 frequency.
-        awkernel_lib::dvfs::fix_freq(max / 2);
+        let mut result = Vec::with_capacity(diff.len());
 
-        let start = awkernel_async_lib::time::Time::now();
+        for (t, d) in diff.iter() {
+            result.push((t.as_nanos() as u64, *d));
+        }
 
-        for _ in 0..NUM_LOOP {
-            core::hint::black_box(());
+        let cpu_id = awkernel_lib::cpu::cpu_id();
+        for (i, r) in FREQ_LATENCY[cpu_id].iter().enumerate() {
+            let mut node = MCSNode::new();
+            let mut guard = r.lock(&mut node);
+            if guard.is_empty() {
+                *guard = result;
+                drop(guard);
+
+                let old_total = TOTAL_COUNT_LATENCY_DIFF.fetch_add(1, Ordering::Relaxed);
+
+                log::debug!("{cpu_id}: {i}, {old_total}");
+
+                if old_total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF - 1 {
+                    print_latency_diff();
+                }
+
+                break;
+            }
         }
 
-        let t = start.elapsed();
+        let total = TOTAL_COUNT_LATENCY_DIFF.load(Ordering::Relaxed);
 
-        let current = awkernel_lib::dvfs::get_curr_freq();
+        if total == (NUM_CPU - 1) * NUM_TRIALS_LATENCY_DIFF {
+            break;
+        }
 
-        log::debug!(
-            "cpuid = {cpuid}, max = {max}, current = {current}, expected = {}, time = {t:?}",
-            max / 2
-        );
+        awkernel_async_lib::r#yield().await;
+    }
+}
+
+fn print_latency_diff() {
+    let mut result: [[Vec<(u64, i64)>; NUM_TRIALS_LATENCY_DIFF]; NUM_CPU] =
+        array![_ => array![_ => Vec::new(); NUM_TRIALS_LATENCY_DIFF]; NUM_CPU];
+
+    for (dst, src) in result.iter_mut().zip(FREQ_LATENCY.iter()) {
+        for (dst, src) in dst.iter_mut().zip(src.iter()) {
+            let mut node = MCSNode::new();
+            let guard = src.lock(&mut node);
 
-        awkernel_async_lib::sleep(Duration::from_secs(1)).await;
+            *dst = guard.clone();
+        }
     }
+
+    let result_json = serde_json::to_string(&result).unwrap();
+    let result_str = format!("{result_json}\r\n");
+    awkernel_lib::console::print(&result_str);
 }
diff --git a/applications/tests/test_dvfs/src/nbody.rs b/applications/tests/test_dvfs/src/nbody.rs
@@ -0,0 +1,102 @@
+use alloc::{vec, vec::Vec};
+use num_traits::float::Float;
+
+#[derive(Clone, Copy, Debug)]
+struct Body {
+    x: f64,
+    y: f64,
+    vx: f64,
+    vy: f64,
+    mass: f64,
+}
+
+impl Body {
+    fn update_velocity(&mut self, fx: f64, fy: f64, dt: f64) {
+        self.vx += fx / self.mass * dt;
+        self.vy += fy / self.mass * dt;
+    }
+
+    fn update_position(&mut self, dt: f64) {
+        self.x += self.vx * dt;
+        self.y += self.vy * dt;
+    }
+}
+
+fn compute_force(a: &Body, b: &Body, g: f64, eps: f64) -> (f64, f64) {
+    let dx = b.x - a.x;
+    let dy = b.y - a.y;
+    let dist_sq = dx * dx + dy * dy + eps * eps; // softening
+    let dist = dist_sq.sqrt();
+    let f = g * a.mass * b.mass / dist_sq;
+    let fx = f * dx / dist;
+    let fy = f * dy / dist;
+    (fx, fy)
+}
+
+fn nbody_step(bodies: &mut [Body], g: f64, dt: f64, eps: f64) {
+    let n = bodies.len();
+    let mut forces = vec![(0.0, 0.0); n];
+
+    for i in 0..n {
+        for j in 0..n {
+            if i != j {
+                let (fx, fy) = compute_force(&bodies[i], &bodies[j], g, eps);
+                forces[i].0 += fx;
+                forces[i].1 += fy;
+            }
+        }
+    }
+
+    for i in 0..n {
+        bodies[i].update_velocity(forces[i].0, forces[i].1, dt);
+        bodies[i].update_position(dt);
+    }
+}
+
+pub fn simulate() {
+    const N: usize = 5000;
+    const STEPS: usize = 2;
+    const G: f64 = 6.67430e-11;
+    const DT: f64 = 0.1;
+    const EPS: f64 = 1e-3;
+
+    let mut rnd = XorShift64::new(0x12345678); // 乱数生成器の初期化
+
+    // 初期化：ランダムにばら撒く（実用では乱数を使ってもよい）
+    let mut bodies = (0..N)
+        .map(|_| Body {
+            x: rnd.next_f64(),
+            y: rnd.next_f64(),
+            vx: 0.0,
+            vy: 0.0,
+            mass: rnd.next_f64(),
+        })
+        .collect::<Vec<_>>();
+
+    for _ in 0..STEPS {
+        nbody_step(&mut bodies, G, DT, EPS);
+    }
+}
+
+pub struct XorShift64 {
+    state: u64,
+}
+
+impl XorShift64 {
+    pub fn new(seed: u64) -> Self {
+        Self { state: seed }
+    }
+
+    pub fn next(&mut self) -> u64 {
+        let mut x = self.state;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.state = x;
+        x
+    }
+
+    pub fn next_f64(&mut self) -> f64 {
+        (self.next() as f64) / (u64::MAX as f64)
+    }
+}