Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 78 additions & 70 deletions classes/cron_processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,18 @@
* @copyright 2022, Catalyst IT
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class cron_processor implements processor {
class cron_processor extends processor {
/** @var float Timestamp updated after processing each sample */
public $sampletime;

/** @var sample_set A sample set recorded while processing a task */
public $tasksampleset = null;

/** @var sample_set A sample set for memory usage recorded while processing a task */
protected $memoryusagesampleset;

/** @var int */
protected $samplems;

/** @var bool */
protected static $alreadyprofiling = false;
/**
* Construct the web processor.
*/
public function __construct() {
// Preload config values to avoid DB access during processing. See manager::get_altconnection() for more information.
parent::__construct();
$this->minduration = (float) get_config('tool_excimer', 'task_min_duration');
}

/**
* Initialises the processor
Expand All @@ -52,18 +49,14 @@ class cron_processor implements processor {
public function init(manager $manager) {
$this->sampletime = $manager->get_starttime();

$manager->get_timer()->setCallback(function () use ($manager) {
$this->on_interval($manager);
});

// Preload config values to avoid DB access during processing. See manager::get_altconnection() for more information.
$this->samplems = (int) get_config('tool_excimer', 'sample_ms');
// The callback is triggered when the number of samples reach the maxsamples and when profiler is destroyed.
$manager->get_profiler()->setFlushCallback(function ($log) use ($manager) {
$this->on_interval($log, $manager);
}, $this->maxsamples);

\core_shutdown_manager::register_function(
function () use ($manager) {
$manager->get_timer()->stop();
$manager->get_profiler()->stop();
$this->on_interval($manager, true);
if ($this->tasksampleset) {
$this->process($manager, microtime(true));
}
Expand All @@ -72,13 +65,21 @@ function () use ($manager) {
}

/**
* Gets the minimum duration required for a profile to be saved, as seconds.
*
* @return float
* @throws \dml_exception
* Get sampling period of processor
* (used for testing)
* @return int sampling period
*/
public function get_min_duration(): float {
return (float) get_config('tool_excimer', 'task_min_duration');
public function get_sampling_period() {
return $this->samplingperiod;
}

/**
* Get sample limit of processor
* (used for testing)
* @return int sample_limit
*/
public function get_sample_limit() {
return $this->samplelimit;
}

/**
Expand All @@ -92,65 +93,71 @@ public function get_min_duration(): float {
*
* We then check for a new task with the current sample.
*
* @param \ExcimerLog $log
* @param manager $manager
* @param bool $isfinal Set to true when this is called during shutdown.
*/
public function on_interval(manager $manager, bool $isfinal = false) {
public function on_interval($log, manager $manager) {
// We want to prevent doubling up of processing, so skip if an existing process is still executing.
// The profile logs will be kept and processed the next time.
self::$logs[] = $log;
$this->logcount += $log->count();
// Doubling sampling period if it reaches the limit.
if ($this->logcount >= $this->samplelimit) {
$this->on_reach_limit($manager);
$this->logcount = $this->logcount - $this->samplelimit;
}
if (self::$alreadyprofiling) {
debugging('tool_excimer: starting cron_processor::on_interval when previous call has not yet finished');
if ($isfinal) {
// The final flush call when profiler is destroyed.
if ($log->count() < $this->samplelimit) {
// This should never happen.
debugging('tool_excimer: alreadyprofiling is true during final on_interval.');
}
return;
}
self::$alreadyprofiling = true;
Comment on lines 100 to 118
Copy link
Contributor

@bwalkerl bwalkerl Dec 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The logic to prevent double up of processing may not be needed anymore now that we've switched from timers to flush callbacks (I'm not sure if this is paused during the callbacks). I think it's worth looking into further because it would be great if we could remove $self::logs entirely.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At this stage, the exact underlying scenario isn’t clear because the documentation doesn’t cover this case. However, this likely happens when maxSamples is set too low, causing the process to continue running while callbacks are triggered repeatedly. In this scenario, self::logs can be used for stored new coming samples.

Copy link
Contributor

@bwalkerl bwalkerl Dec 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously there were edge cases of the 10s interval overlapping, see #377

With your changes these should be much rarer as in the worst case the number of possible samples in on_interval should be very low. I'm not even sure if the profiler would be taking samples here.

I'm interested in what happens when max samples is set too low. The previous issue this fixed shouldn't be a concern if it only happens with bad config, but is there a chance of endless loops if this is set to something ridiculous like 1?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should be possible, because we don’t have any constraint on the maxSamples value. That’s why I introduced the $logs array as a waiting list. New logs from the callback are collected in this array until the current process completes. I’m not sure how often overlaps will occur with the new logic, but $logs won’t impact the existing flow. On the contrary, it helps mitigate overlapping if it does occur.

foreach (self::$logs as $log) {
$memoryusage = memory_get_usage();
foreach ($log as $sample) {
$taskname = $this->findtaskname($sample);
$sampletime = $manager->get_starttime() + $sample->getTimestamp();

// If there is a task and the current task name is different from the previous, then store the profile.
if ($this->tasksampleset && ($this->tasksampleset->name != $taskname)) {
$profile = $this->process($manager, $this->sampletime);
// Keep an approximate count of each profile.
page_group::record_fuzzy_counts($profile);
$this->tasksampleset = null;
}
Comment on lines +122 to +131
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think detecting new tasks like this is incompatible with the increased sample periods. Because of this, new tasks will start off with the increased sample periods from the previous tasks and likely be missing the most important parts of the profile.

This is likely the biggest issues here and I'm not sure the best way of handling this.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my view, the sampling period isn’t tied to a specific task. It’s increased to reduce impact on site performance, as stated. So even if there’s a new task, if the site is very busy, a higher sampling period is still reasonable.

Regarding missing information: if a task is lightweight and completes before the next sampling interval, I think that’s acceptable. Our focus is on bottlenecks (heavy tasks). Lightweight tasks are also filtered out, at least at the stack trace level, by the merge logic.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a serious regression, and is much more broken than you think.

Multiple cron tasks are processed in a single call. The sampling period is never reset. A task that starts several hours in would have a ridiculously high sampling period and not contain the samples required to debug it properly.

Even if you reset that when a new task is detected, the gap of processing at higher intervals is likely to cause issues following long tasks. On top of this going from a higher sampling period to a lower sampling period in the middle of the task would significantly distort the display and misrepresent parts of the task.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need specific cases to evaluate this. It’s too abstract for me to visualize what’s happening behind the scenes. Based on what you’re saying, it sounds like we should not be updating the sampling period.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I feel like this is the main reason cron was processed on interval before. This did come with the drawback of taking extra samples and then discarding them based on the filter it, so there's still value in swapping if we can handle this somehow.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I’m happy to remove the sampling-period doubling and keep the flush callback, but I can’t justify the mismatch between what we are trying to solve and what we’re doing. At this point, the sample limit is primarily to prevent excessive log storage, rather than to address sampling spam during long-running tasks.


$profiler = $manager->get_profiler();
$log = $profiler->flush();
$memoryusage = memory_get_usage(); // Record and set initial memory usage at this point.
foreach ($log as $sample) {
$taskname = $this->findtaskname($sample);
$sampletime = $manager->get_starttime() + $sample->getTimestamp();

// If there is a task and the current task name is different from the previous, then store the profile.
if ($this->tasksampleset && ($this->tasksampleset->name != $taskname)) {
$profile = $this->process($manager, $this->sampletime);

// Keep an approximate count of each profile.
page_group::record_fuzzy_counts($profile);
$this->tasksampleset = null;
}

// If there exists a current task, and the sampleset for it is not created yet, create it.
if ($taskname && ($this->tasksampleset === null)) {
$this->tasksampleset = new sample_set($taskname, $this->sampletime);
$this->memoryusagesampleset = new sample_set($taskname, $this->sampletime);
if ($memoryusage) { // Ensure this only adds the mem usage for the initial base sample due to accuracy.
$this->memoryusagesampleset->add_sample(['sampleindex' => 0, 'value' => $memoryusage]);
$memoryusage = 0;
// If there exists a current task, and the sampleset for it is not created yet, create it.
if ($taskname && ($this->tasksampleset === null)) {
$this->tasksampleset = new sample_set($taskname, $this->sampletime);
$this->memoryusagesampleset = new sample_set($taskname, $this->sampletime);
if ($memoryusage) { // Ensure this only adds the mem usage for the initial base sample due to accuracy.
$this->memoryusagesampleset->add_sample(['sampleindex' => 0, 'value' => $memoryusage]);
$memoryusage = 0;
}
}
}

// If the sampleset exists, add the current sample to it.
if ($this->tasksampleset) {
$this->tasksampleset->add_sample($sample);
// If the sampleset exists, add the current sample to it.
if ($this->tasksampleset) {
$this->tasksampleset->add_sample($sample);

// Add memory usage:
// Note that due to the looping this is probably inaccurate.
$this->memoryusagesampleset->add_sample([
'sampleindex' => $this->tasksampleset->total_added() - 1,
'value' => memory_get_usage(),
]);
}

// Add memory usage:
// Note that due to the looping this is probably inaccurate.
$this->memoryusagesampleset->add_sample([
'sampleindex' => $this->tasksampleset->total_added() + $this->memoryusagesampleset->count() - 1,
'value' => memory_get_usage(),
]);
// Instances of task_sample are always created with the previous sample's timestamp.
// So it needs to be saved each loop.
$this->sampletime = $sampletime;
}

// Instances of task_sample are always created with the previous sample's timestamp.
// So it needs to be saved each loop.
$this->sampletime = $sampletime;
}

self::$logs = [];
self::$alreadyprofiling = false;
}

Expand Down Expand Up @@ -196,9 +203,10 @@ public function process(manager $manager, float $finishtime): profile {
$profile->set('reason', $reasons);
$profile->set('finished', (int) $finishtime);
$profile->set('memoryusagedatad3', $this->memoryusagesampleset->samples);
$profile->set('flamedatad3', flamed3_node::from_excimer_log_entries($this->tasksampleset->samples));
$profile->set('numsamples', $this->tasksampleset->count());
$profile->set('samplerate', $this->tasksampleset->filter_rate() * $this->samplems);
$profile->set('flamedatad3', flamed3_node::from_sample_set_samples($this->tasksampleset->samples));
$profile->set('numsamples', count($this->tasksampleset->samples));
$profile->set('numevents', $this->tasksampleset->count());
$profile->set('samplerate', (int) (($duration * 1000) / $this->tasksampleset->count()));
$profile->save_record();
}
return $profile;
Expand Down
12 changes: 5 additions & 7 deletions classes/flamed3_node.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,16 @@ public function add_excimer_trace_tail(array $tail, int $eventcount): void {
}

/**
* Extracts data from Excimer log entries and converts to a flame node tree, compatible with
* Extracts data from sample set samples and converts to a flame node tree, compatible with
* d3-flame-graph.
*
* @param iterable $entries
* @param array $samples samples
* @return flamed3_node
*/
public static function from_excimer_log_entries(iterable $entries): flamed3_node {
public static function from_sample_set_samples(array $samples): flamed3_node {
$root = new flamed3_node('root');
foreach ($entries as $entry) {
$eventcount = $entry->getEventCount();
$trace = array_reverse($entry->getTrace());
$root->add_excimer_trace_tail($trace, $eventcount);
foreach ($samples as $sample) {
$root->add_excimer_trace_tail(array_reverse($sample['trace']), $sample['eventcount']);
}
return $root;
}
Expand Down
26 changes: 4 additions & 22 deletions classes/manager.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ class manager {
private $processor;
/** @var \ExcimerProfiler */
private $profiler;
/** @var \ExcimerTimer */
private $timer;

/** @var float */
private $starttime;
/** @var array */
Expand Down Expand Up @@ -113,15 +112,6 @@ public function get_profiler(): \ExcimerProfiler {
return $this->profiler;
}

/**
* Timer to create events to process samples generated so far.
*
* @return \ExcimerTimer
*/
public function get_timer(): \ExcimerTimer {
return $this->timer;
}

/**
* Start time for the script.
*
Expand Down Expand Up @@ -169,29 +159,19 @@ public function __construct(processor $processor) {
}

/**
* Initialises the manager. Creates and starts the profiler and timer.
* Initialises the manager. Creates and starts the profiler.
*
* @throws \dml_exception
*/
public function init() {
if (!self::is_testing()) {
script_metadata::init();
profile_helper::init();

$sampleperiod = script_metadata::get_sampling_period();
$timerinterval = script_metadata::get_timer_interval();

$this->profiler = new \ExcimerProfiler();
$this->profiler->setPeriod($sampleperiod);

$this->timer = new \ExcimerTimer();
$this->timer->setPeriod($timerinterval);

$this->starttime = microtime(true);
$this->startrusage = getrusage();

$this->profiler->start();
$this->timer->start();
}
}

Expand All @@ -210,6 +190,8 @@ public function start_processor() {
*
*/
public static function create() {
script_metadata::init();
profile_helper::init();
if (self::is_cron()) {
self::$instance = new manager(new cron_processor());
} else {
Expand Down
54 changes: 49 additions & 5 deletions classes/processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,62 @@
*
* @package tool_excimer
*/
interface processor {
class processor {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend making this an abstract class and keeping init as an abstract method

/** @var int */
protected $minduration;

/** @var sample_set */
public $tasksampleset;

/** @var sample_set */
public $memoryusagesampleset;

/** @var int */
protected $samplingperiod;

/** @var int */
protected $samplelimit;

/** @var int */
protected $maxsamples;

/** @var int */
protected $logcount = 0;

/** @var bool */
protected static $alreadyprofiling = false;

/** @var array */
protected static $logs = [];

/**
* Construct the processor.
*/
public function __construct() {
// Preload config values to avoid DB access during processing. See manager::get_altconnection() for more information.
$this->samplingperiod = script_metadata::get_sampling_period();
$this->samplelimit = script_metadata::get_sample_limit();
$this->maxsamples = script_metadata::get_max_samples();
}

/**
* Initialises the processor
* Doubling the sampling period when we reach the samples limit
*
* @param manager $manager The profiler manager object
* @param manager $manager
*/
public function init(manager $manager);
public function on_reach_limit(manager $manager) {
$this->samplingperiod *= 2;
// This will take effect the next time start() is called.
$manager->get_profiler()->setPeriod($this->samplingperiod);
$manager->get_profiler()->start();
}

/**
* Gets the minimum duration required for a profile to be saved, as seconds.
*
* @return float
*/
public function get_min_duration(): float;
public function get_min_duration(): float {
return $this->minduration;
}
}
6 changes: 5 additions & 1 deletion classes/profile.php
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ public function download(): void {
*
* @param string $json
* @return bool|int the id of the imported profile, or false if unsuccessful
* @throws \coding_exception
*/
public static function import(string $json) {
global $DB;
Expand Down Expand Up @@ -225,7 +226,9 @@ public static function import(string $json) {

foreach ($data as $property => $value) {
if (isset($value) && !in_array($property, $removeproperties)) {
$profile->set($property, $value);
if (property_exists($profile, $property)) {
$profile->set($property, $value);
}
}
}

Expand Down Expand Up @@ -475,6 +478,7 @@ protected static function define_properties(): array {
'versionhash' => ['type' => PARAM_TEXT, 'default' => ''],
'datasize' => ['type' => PARAM_INT, 'default' => 0],
'numsamples' => ['type' => PARAM_INT, 'default' => 0],
'numevents' => ['type' => PARAM_INT, 'default' => 0],
'samplerate' => ['type' => PARAM_INT, 'default' => 0],
'memoryusagedatad3' => ['type' => PARAM_RAW],
'memoryusagemax' => ['type' => PARAM_INT],
Expand Down
Loading
Loading