diff --git a/CMakeLists.txt b/CMakeLists.txt index cc2af97..44c1f58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,18 @@ if (ALLSCALE_WITH_PAPI) add_definitions(-DHAVE_PAPI) endif() +if (ALLSCALE_WITH_CRAY_PM) +add_definitions(-DCRAY_COUNTERS) +endif() + +if (ALLSCALE_WITH_POWER_ESTIMATE) +add_definitions(-DPOWER_ESTIMATE) +endif() + +if (ALLSCALE_READ_VOLTAGE) +add_definitions(-DREAD_VOLTAGE_FILE) +endif() + if (ALLSCALE_WITH_EXTRAE) if(NOT EXTRAE_DIR) message(FATAL_ERROR "EXTRAE_DIR Cmake variable not set. Please set it to point to the directory where EXTRAE is installed") diff --git a/allscale/components/monitor.hpp b/allscale/components/monitor.hpp index 813c4e9..2643679 100644 --- a/allscale/components/monitor.hpp +++ b/allscale/components/monitor.hpp @@ -22,8 +22,6 @@ #include #include #include -#include - #include @@ -46,7 +44,6 @@ namespace allscale { namespace components { struct HPX_COMPONENT_EXPORT monitor : hpx::components::component_base { - typedef hpx::lcos::local::spinlock mutex_type; monitor() { @@ -65,14 +62,6 @@ namespace allscale { namespace components { // hpx::id_type get_left_neighbour() { return left_; } // hpx::id_type get_right_neighbour() { return right_; } - mutex_type task_times_mtx_; - task_times task_times_; - task_times last_task_times_; - std::chrono::high_resolution_clock::time_point last_task_times_sample_; - - void add_task_time(task_id::task_path const& path, task_times::time_t const& time); - - task_times get_task_times(); ///////////////////////////////////////////////////////////////////////////////////// /// Performance Data Introspection @@ -346,9 +335,19 @@ namespace allscale { namespace components { // // /// \returns Cpu load float get_cpu_load(); - double get_avg_task_duration(); + + /// \brief This function returns the current power + // // /// \returns Power + float get_current_power(); + + + /// \brief This function returns the max power that can be consumed + // // /// \returns Max power + float get_max_power(); + private: + typedef hpx::lcos::local::spinlock mutex_type; // MONITOR MANAGEMENT // Measuring total execution time @@ -360,9 +359,7 @@ namespace allscale { namespace components { std::uint64_t num_localities_; mutex_type init_mutex; bool initialized = false; - public: bool enable_monitor; - private: // System parameters unsigned long long total_memory_; @@ -465,12 +462,12 @@ namespace allscale { namespace components { // hpx::id_type idle_rate_avg_counter_; // double idle_rate_avg_; +#ifdef REALTIME_VIZ // REALTIME VIZ std::mutex counter_mutex_; std::uint64_t num_active_tasks_; std::uint64_t total_tasks_; double total_task_duration_; -#ifdef REALTIME_VIZ hpx::id_type idle_rate_counter_; double idle_rate_; @@ -482,6 +479,7 @@ namespace allscale { namespace components { unsigned long long int sample_id_; bool sample_task_stats(); + double get_avg_task_duration(); #endif // HISTORICAL DATA diff --git a/allscale/power.hpp b/allscale/power.hpp new file mode 100644 index 0000000..851562c --- /dev/null +++ b/allscale/power.hpp @@ -0,0 +1,180 @@ +#ifndef ALLSCALE_POWER_HPP +#define ALLSCALE_POWER_HPP + +#include +#include +#include +#include +#include + + +#ifdef CRAY_COUNTERS +#define NUM_PM_COUNTERS 3 +#define FRESHNESS_COUNTER 0 +#define ENERGY_COUNTER 1 +#define POWER_COUNTER 2 + +#define PM_MAX_ATTEMPTS 10 +#endif + +namespace allscale { namespace power +{ + +#ifdef CRAY_COUNTERS + std::ifstream pm_files[NUM_PM_COUNTERS]; +#endif + +// std::vector power_history; +// std::vector energy_history; + double last_instant_power; + double last_instant_energy; + double instant_power; + double instant_energy; + + + void init_power_measurements() + { + +#ifdef CRAY_COUNTERS + pm_files[FRESHNESS_COUNTER].open("/sys/cray/pm_counters/freshness"); + if(!pm_files[FRESHNESS_COUNTER]) { + std::cerr << "ERROR: Cannot open /sys/cray/pm_counters/freshness\n"; + exit(1); + } + + pm_files[ENERGY_COUNTER].open("/sys/cray/pm_counters/energy"); + if(!pm_files[ENERGY_COUNTER]) { + std::cerr << "ERROR: Cannot open /sys/cray/pm_counters/energy\n"; + exit(1); + } + + pm_files[POWER_COUNTER].open("/sys/cray/pm_counters/power"); + if(!pm_files[POWER_COUNTER]) { + std::cerr << "ERROR: Cannot open /sys/cray/pm_counters/power\n"; + exit(1); + } +#endif + last_instant_power = instant_power = last_instant_energy = instant_energy = 0.0; + } + + + + void finish_power_measurements() + { +#ifdef CRAY_COUNTERS + if(pm_files[FRESHNESS_COUNTER].is_open()) pm_files[FRESHNESS_COUNTER].close(); + + if(pm_files[ENERGY_COUNTER].is_open()) pm_files[ENERGY_COUNTER].close(); + + if(pm_files[POWER_COUNTER].is_open()) pm_files[POWER_COUNTER].close(); +#endif + } + + + // Returns last instant power in Watts + double get_instant_power() { return instant_power; } + + // Returns last instant energy in J + double get_instant_energy() { return instant_energy; } + + // Returns all power samples +// std::vector get_power_history() { return power_history; } + + // Returns all energy samples +// std::vector get_energy_history() { return energy_history; } + + +#ifdef CRAY_COUNTERS + // Returns 0 is the results are valid + int read_pm_counters() { + + int freshness1, freshness2, n_attempts = 0; + std::string line; + unsigned long long tmp_energy, tmp_power; + + // We need to check that the counters have not been updated while we were accessing them + do { + + n_attempts++; + pm_files[FRESHNESS_COUNTER].seekg(0); + std::getline(pm_files[FRESHNESS_COUNTER], line); + + freshness1 = std::atoi(line.c_str()); + + // Read energy + pm_files[ENERGY_COUNTER].seekg(0); + std::getline(pm_files[ENERGY_COUNTER], line); + + tmp_energy = std::strtoull(line.c_str(), NULL, 10); + + // Read power + pm_files[POWER_COUNTER].seekg(0); + std::getline(pm_files[POWER_COUNTER], line); + + tmp_power = std::strtoull(line.c_str(), NULL, 10); + + pm_files[FRESHNESS_COUNTER].seekg(0); + std::getline(pm_files[FRESHNESS_COUNTER], line); + + freshness2 = std::atoi(line.c_str()); + + } while(n_attempts < PM_MAX_ATTEMPTS && freshness1 != freshness2); + + if(freshness1 != freshness2) return 1; + else { +// power_history.push_back(tmp_power); +// energy_history.push_back(tmp_energy); + + instant_power = (double)tmp_power - last_instant_power; + last_instant_power = (double)tmp_power; + + instant_energy = (double)tmp_energy - last_instant_energy; + last_instant_energy = (double)tmp_energy; + + return 0; + } + } + + +#endif + +#ifdef POWER_ESTIMATE + + // Estimate basic power + double estimate_power(std::uint64_t frequency) + { + static const char * file_name = "/sys/class/i2c-dev/i2c-3/device/3-002d/regulator/regulator.1/microvolts"; + std::ifstream file; + std::uint64_t microvolts = 0; + float U = 0.9; + + // Estimate using C * U^2 * f + + // for now C is 30pF, not able to find it for Cortex A53 + auto C = 30 * 1.0e-12; // + + + // voltage U +#ifdef READ_VOLTAGE_FILE + file.open(file_name); + if(!file) + std::cerr << "Warning: Cannot read voltage from /sys/class, using 0.9 instead" << std::endl; + else { + file >> microvolts; + U = (double)microvolts * 1.0e-6; + + file.close(); + } +#endif + instant_power = (double)C * (U * U) * (double)(frequency * 1000); // freq is in kHz + + return instant_power; + } +#endif + + + +}} + +#endif + diff --git a/src/components/monitor_component.cpp b/src/components/monitor_component.cpp index ecc6b8d..80bad11 100644 --- a/src/components/monitor_component.cpp +++ b/src/components/monitor_component.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -34,6 +35,8 @@ #include "extrae.h" #endif +#define SAMPLE_INTERVAL 1000 + char const* gather_basename1 = "allscale/monitor/gather1"; HPX_REGISTER_GATHER(profile_map, profile_gatherer); @@ -51,16 +54,25 @@ namespace allscale { namespace components { monitor::monitor(std::uint64_t rank) - : last_task_times_sample_(std::chrono::high_resolution_clock::now()) - , rank_(rank) + : rank_(rank) , num_localities_(0) , enable_monitor(true) - , total_memory_(0) - , num_cpus_(0) + , output_profile_table_(0) + , output_treeture_(0) + , output_iteration_trees_(0) + , collect_papi_(0) + , cutoff_level_(0) + , print_throughput_hm_(0) + , print_idle_hm_(0) + , done(false) , current_read_queue(0) , current_write_queue(0) - , done(false) -//#ifdef WI_STATS + , sampling_interval_ms(SAMPLE_INTERVAL) + , finished_tasks(0) + , task_throughput(0) + , cpu_load_(0.0) + , total_memory_(0) + , num_cpus_(0) , total_split_time(0) , total_process_time(0) , num_split_tasks(0) @@ -69,28 +81,13 @@ namespace allscale { namespace components { , max_split_task(0) , min_process_task(0) , max_process_task(0) - , finished_tasks(0) - , sampling_interval_ms(1000) -// , metric_sampler_( -// hpx::util::bind( -// &monitor::sample_node, -// this -// ), -// 2000000, -// "monitor::sample_node", -// false -// ) - , task_throughput(0) , weighted_sum(0.0) , weighted_throughput(0.0) , bytes_sent_(0) , bytes_recv_(0) - , cpu_load_(0.0) -//#endif +#ifdef REALTIME_VIZ , num_active_tasks_(0) , total_tasks_(0) - , total_task_duration_(0.0) -#ifdef REALTIME_VIZ , realtime_viz(0) , sample_id_(0) , timer_( @@ -103,45 +100,10 @@ namespace allscale { namespace components { true ) #endif - , output_profile_table_(0) - , output_treeture_(0) - , output_iteration_trees_(0) - , collect_papi_(0) - , cutoff_level_(0) - , print_throughput_hm_(0) - , print_idle_hm_(0) { } - void monitor::add_task_time(task_id::task_path const& path, task_times::time_t const& time) - { - if (!enable_monitor) return; - - std::lock_guard l(task_times_mtx_); -// if (hpx::get_locality_id() == 0) -// { -// std::cout << path.getPath() << " " << time.count() << "\n"; -// } - task_times_.add(path, time); - } - - task_times monitor::get_task_times() - { - if (!enable_monitor) return task_times{}; - - std::lock_guard l(task_times_mtx_); - auto now = std::chrono::high_resolution_clock::now(); - - // normalize to one second - auto interval = std::chrono::duration_cast(now - last_task_times_sample_); - auto res = (task_times_ - last_task_times_) / (interval.count() * 1e-9f); - - last_task_times_sample_ = now; - last_task_times_ = task_times_; - return res; - } - #ifdef REALTIME_VIZ bool monitor::sample_task_stats() { @@ -166,7 +128,7 @@ namespace allscale { namespace components { // << "Average time per task: " << get_avg_task_duration() << "IDLE RATE: " << idle_rate_ << std::endl; return true; } -#endif + double monitor::get_avg_task_duration() { @@ -174,6 +136,7 @@ namespace allscale { namespace components { else return total_task_duration_/(double)total_tasks_; } +#endif std::uint64_t monitor::get_timestamp( void ) { @@ -311,6 +274,21 @@ namespace allscale { namespace components { } + float monitor::get_current_power() + { + return allscale::power::get_instant_power() * num_cpus_; + } + + + float monitor::get_max_power() + { +#ifdef POWER_ESTIMATE + return allscale::power::estimate_power(get_max_freq(0)) * num_cpus_; +#else + return 0.0; +#endif + } + std::uint64_t monitor::get_network_out() { @@ -451,6 +429,12 @@ namespace allscale { namespace components { pstat >> foo_word >> user_time >> nice_time >> system_time >> idle_time; } + // Power +#ifdef CRAY_COUNTERS + allscale::power::read_pm_counters(); +#elif POWER_ESTIMATE + allscale::power::estimate_power(get_current_freq(0)); +#endif // Compute statistics std::unique_lock lock(sampling_mutex); @@ -583,11 +567,15 @@ namespace allscale { namespace components { std::shared_ptr stats; auto my_wid = w.id(); - - { +#ifdef REALTIME_VIZ + if(realtime_viz) { + // Global task stats std::unique_lock lock2(counter_mutex_); + total_tasks_++; num_active_tasks_--; total_task_duration_ += p->get_exclusive_time(); + lock2.unlock(); } +#endif #ifdef HAVE_PAPI @@ -1284,13 +1272,6 @@ namespace allscale { namespace components { double monitor::get_avg_idle_rate() { - auto now = std::chrono::steady_clock::now(); - std::chrono::duration time_elapsed = - std::chrono::duration_cast>(now - execution_start); - - return get_avg_task_duration() / time_elapsed.count(); - - /* hpx::performance_counters::counter_value idle_avg_value; idle_avg_value = hpx::performance_counters::stubs::performance_counter::get_value( @@ -1298,8 +1279,8 @@ namespace allscale { namespace components { return idle_avg_value.get_value() * 0.01; - return 0.0; */ + return 0.0; } double monitor::get_avg_idle_rate_remote(hpx::id_type locality) @@ -2196,17 +2177,20 @@ namespace allscale { namespace components { initialized = true; - std::cerr + std::cerr << "Monitor component with rank " << rank_ << " created!\n"; if (rank_ == 0) { dashboard::update(); - dashboard::get_commands(); } } + + + + }} //HPX_REGISTER_ACTION(allscale::components::monitor::get_my_rank_action, get_my_rank_action); diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 4219734..1dbb630 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -67,6 +67,11 @@ namespace allscale { namespace dashboard state.efficiency = used_cycles / float(max_cycles); state.speed = used_cycles / float(avail_cycles); +#ifdef POWER_ESTIMATE + state.cur_power = monitor_c->get_current_power(); + state.max_power = monitor_c->get_max_power(); +#endif + return state; } }}