Skip to content

Commit 2c135d2

Browse files
committed
Inline engine stages.
Apply AggressiveOptimization to engine methods.
1 parent 804482d commit 2c135d2

File tree

3 files changed

+203
-27
lines changed

3 files changed

+203
-27
lines changed

src/BenchmarkDotNet/Engines/Engine.cs

+186-10
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
using System;
22
using System.Collections.Generic;
3+
using System.Diagnostics;
34
using System.Globalization;
45
using System.Linq;
56
using System.Runtime.CompilerServices;
67
using BenchmarkDotNet.Characteristics;
8+
using BenchmarkDotNet.Environments;
79
using BenchmarkDotNet.Jobs;
10+
using BenchmarkDotNet.Mathematics;
811
using BenchmarkDotNet.Portability;
912
using BenchmarkDotNet.Reports;
1013
using JetBrains.Annotations;
@@ -102,8 +105,14 @@ public void Dispose()
102105
}
103106
}
104107

108+
// AggressiveOptimization forces the method to go straight to tier1 JIT, and will never be re-jitted,
109+
// eliminating tiered JIT as a potential variable in measurements.
110+
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
105111
public RunResults Run()
106112
{
113+
// This method is huge, because all stages are inlined. This ensures the stack size
114+
// remains constant for each benchmark invocation, eliminating stack sizes as a potential variable in measurements.
115+
// #1120
107116
var measurements = new List<Measurement>();
108117
measurements.AddRange(jittingMeasurements);
109118

@@ -116,23 +125,185 @@ public RunResults Run()
116125
{
117126
if (Strategy != RunStrategy.Monitoring)
118127
{
119-
var pilotStageResult = pilotStage.Run();
120-
invokeCount = pilotStageResult.PerfectInvocationCount;
121-
measurements.AddRange(pilotStageResult.Measurements);
128+
// Pilot Stage
129+
{
130+
// If InvocationCount is specified, pilot stage should be skipped
131+
if (TargetJob.HasValue(RunMode.InvocationCountCharacteristic))
132+
{
133+
}
134+
// Here we want to guess "perfect" amount of invocation
135+
else if (TargetJob.HasValue(RunMode.IterationTimeCharacteristic))
136+
{
137+
// Perfect invocation count
138+
invokeCount = pilotStage.Autocorrect(MinInvokeCount);
139+
140+
int iterationCounter = 0;
141+
142+
int downCount = 0; // Amount of iterations where newInvokeCount < invokeCount
143+
while (true)
144+
{
145+
iterationCounter++;
146+
var measurement = RunIteration(new IterationData(IterationMode.Workload, IterationStage.Pilot, iterationCounter, invokeCount, UnrollFactor));
147+
measurements.Add(measurement);
148+
double actualIterationTime = measurement.Nanoseconds;
149+
long newInvokeCount = pilotStage.Autocorrect(Math.Max(pilotStage.minInvokeCount, (long) Math.Round(invokeCount * pilotStage.targetIterationTime / actualIterationTime)));
150+
151+
if (newInvokeCount < invokeCount)
152+
downCount++;
153+
154+
if (Math.Abs(newInvokeCount - invokeCount) <= 1 || downCount >= 3)
155+
break;
156+
157+
invokeCount = newInvokeCount;
158+
}
159+
WriteLine();
160+
}
161+
else
162+
{
163+
// A case where we don't have specific iteration time.
164+
invokeCount = pilotStage.Autocorrect(pilotStage.minInvokeCount);
165+
166+
int iterationCounter = 0;
167+
while (true)
168+
{
169+
iterationCounter++;
170+
var measurement = RunIteration(new IterationData(IterationMode.Workload, IterationStage.Pilot, iterationCounter, invokeCount, UnrollFactor));
171+
measurements.Add(measurement);
172+
double iterationTime = measurement.Nanoseconds;
173+
double operationError = 2.0 * pilotStage.resolution / invokeCount; // An operation error which has arisen due to the Chronometer precision
174+
175+
// Max acceptable operation error
176+
double operationMaxError1 = iterationTime / invokeCount * pilotStage.maxRelativeError;
177+
double operationMaxError2 = pilotStage.maxAbsoluteError?.Nanoseconds ?? double.MaxValue;
178+
double operationMaxError = Math.Min(operationMaxError1, operationMaxError2);
179+
180+
bool isFinished = operationError < operationMaxError && iterationTime >= pilotStage.minIterationTime.Nanoseconds;
181+
if (isFinished)
182+
break;
183+
if (invokeCount >= EnginePilotStage.MaxInvokeCount)
184+
break;
185+
186+
if (UnrollFactor == 1 && invokeCount < EnvironmentResolver.DefaultUnrollFactorForThroughput)
187+
invokeCount += 1;
188+
else
189+
invokeCount *= 2;
190+
}
191+
WriteLine();
192+
}
193+
}
194+
// End Pilot Stage
122195

123196
if (EvaluateOverhead)
124197
{
125-
measurements.AddRange(warmupStage.RunOverhead(invokeCount, UnrollFactor));
126-
measurements.AddRange(actualStage.RunOverhead(invokeCount, UnrollFactor));
198+
// Warmup Overhead
199+
{
200+
var warmupMeasurements = new List<Measurement>();
201+
202+
var criteria = DefaultStoppingCriteriaFactory.Instance.CreateWarmup(TargetJob, Resolver, IterationMode.Overhead, RunStrategy.Throughput);
203+
int iterationCounter = 0;
204+
while (!criteria.Evaluate(warmupMeasurements).IsFinished)
205+
{
206+
iterationCounter++;
207+
warmupMeasurements.Add(RunIteration(new IterationData(IterationMode.Overhead, IterationStage.Warmup, iterationCounter, invokeCount, UnrollFactor)));
208+
}
209+
WriteLine();
210+
211+
measurements.AddRange(warmupMeasurements);
212+
}
213+
// End Warmup Overhead
214+
215+
// Actual Overhead
216+
{
217+
var measurementsForStatistics = new List<Measurement>(actualStage.maxIterationCount);
218+
219+
int iterationCounter = 0;
220+
double effectiveMaxRelativeError = EngineActualStage.MaxOverheadRelativeError;
221+
while (true)
222+
{
223+
iterationCounter++;
224+
var measurement = RunIteration(new IterationData(IterationMode.Overhead, IterationStage.Actual, iterationCounter, invokeCount, UnrollFactor));
225+
measurements.Add(measurement);
226+
measurementsForStatistics.Add(measurement);
227+
228+
var statistics = MeasurementsStatistics.Calculate(measurementsForStatistics, actualStage.outlierMode);
229+
double actualError = statistics.LegacyConfidenceInterval.Margin;
230+
231+
double maxError1 = effectiveMaxRelativeError * statistics.Mean;
232+
double maxError2 = actualStage.maxAbsoluteError?.Nanoseconds ?? double.MaxValue;
233+
double maxError = Math.Min(maxError1, maxError2);
234+
235+
if (iterationCounter >= actualStage.minIterationCount && actualError < maxError)
236+
break;
237+
238+
if (iterationCounter >= actualStage.maxIterationCount || iterationCounter >= EngineActualStage.MaxOverheadIterationCount)
239+
break;
240+
}
241+
WriteLine();
242+
}
243+
// End Actual Overhead
127244
}
128245
}
129246

130-
measurements.AddRange(warmupStage.RunWorkload(invokeCount, UnrollFactor, Strategy));
247+
// Warmup Workload
248+
{
249+
var workloadMeasurements = new List<Measurement>();
250+
251+
var criteria = DefaultStoppingCriteriaFactory.Instance.CreateWarmup(TargetJob, Resolver, IterationMode.Workload, Strategy);
252+
int iterationCounter = 0;
253+
while (!criteria.Evaluate(workloadMeasurements).IsFinished)
254+
{
255+
iterationCounter++;
256+
workloadMeasurements.Add(RunIteration(new IterationData(IterationMode.Workload, IterationStage.Warmup, iterationCounter, invokeCount, UnrollFactor)));
257+
}
258+
WriteLine();
259+
260+
measurements.AddRange(workloadMeasurements);
261+
}
262+
// End Warmup Workload
131263
}
132264

133265
Host.BeforeMainRun();
134266

135-
measurements.AddRange(actualStage.RunWorkload(invokeCount, UnrollFactor, forceSpecific: Strategy == RunStrategy.Monitoring));
267+
// Actual Workload
268+
{
269+
if (actualStage.iterationCount == null && Strategy != RunStrategy.Monitoring)
270+
{
271+
// RunAuto
272+
var measurementsForStatistics = new List<Measurement>(actualStage.maxIterationCount);
273+
274+
int iterationCounter = 0;
275+
double effectiveMaxRelativeError = actualStage.maxRelativeError;
276+
while (true)
277+
{
278+
iterationCounter++;
279+
var measurement = RunIteration(new IterationData(IterationMode.Workload, IterationStage.Actual, iterationCounter, invokeCount, UnrollFactor));
280+
measurements.Add(measurement);
281+
measurementsForStatistics.Add(measurement);
282+
283+
var statistics = MeasurementsStatistics.Calculate(measurementsForStatistics, actualStage.outlierMode);
284+
double actualError = statistics.LegacyConfidenceInterval.Margin;
285+
286+
double maxError1 = effectiveMaxRelativeError * statistics.Mean;
287+
double maxError2 = actualStage.maxAbsoluteError?.Nanoseconds ?? double.MaxValue;
288+
double maxError = Math.Min(maxError1, maxError2);
289+
290+
if (iterationCounter >= actualStage.minIterationCount && actualError < maxError)
291+
break;
292+
293+
if (iterationCounter >= actualStage.maxIterationCount)
294+
break;
295+
}
296+
}
297+
else
298+
{
299+
// RunSpecific
300+
var iterationCount = actualStage.iterationCount ?? EngineActualStage.DefaultWorkloadCount;
301+
for (int i = 0; i < iterationCount; i++)
302+
measurements.Add(RunIteration(new IterationData(IterationMode.Workload, IterationStage.Actual, i + 1, invokeCount, UnrollFactor)));
303+
}
304+
WriteLine();
305+
}
306+
// End Actual Workload
136307

137308
Host.AfterMainRun();
138309

@@ -148,11 +319,15 @@ public RunResults Run()
148319
return new RunResults(measurements, outlierMode, workGcHasDone, threadingStats, exceptionFrequency);
149320
}
150321

322+
[MethodImpl(CodeGenHelper.AggressiveOptimizationOption)]
151323
public Measurement RunIteration(IterationData data)
152324
{
153325
// Initialization
154326
long invokeCount = data.InvokeCount;
155327
int unrollFactor = data.UnrollFactor;
328+
if (invokeCount % unrollFactor != 0)
329+
throw new ArgumentOutOfRangeException($"InvokeCount({invokeCount}) should be a multiple of UnrollFactor({unrollFactor}).");
330+
156331
long totalOperations = invokeCount * OperationsPerInvoke;
157332
bool isOverhead = data.IterationMode == IterationMode.Overhead;
158333
bool randomizeMemory = !isOverhead && MemoryRandomization;
@@ -167,7 +342,7 @@ public Measurement RunIteration(IterationData data)
167342
EngineEventSource.Log.IterationStart(data.IterationMode, data.IterationStage, totalOperations);
168343

169344
var clockSpan = randomizeMemory
170-
? MeasureWithRandomMemory(action, invokeCount / unrollFactor)
345+
? MeasureWithRandomStack(action, invokeCount / unrollFactor)
171346
: Measure(action, invokeCount / unrollFactor);
172347

173348
if (EngineEventSource.Log.IsEnabled())
@@ -193,8 +368,8 @@ public Measurement RunIteration(IterationData data)
193368
// This is in a separate method, because stackalloc can affect code alignment,
194369
// resulting in unexpected measurements on some AMD cpus,
195370
// even if the stackalloc branch isn't executed. (#2366)
196-
[MethodImpl(MethodImplOptions.NoInlining)]
197-
private unsafe ClockSpan MeasureWithRandomMemory(Action<long> action, long invokeCount)
371+
[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
372+
private unsafe ClockSpan MeasureWithRandomStack(Action<long> action, long invokeCount)
198373
{
199374
byte* stackMemory = stackalloc byte[random.Next(32)];
200375
var clockSpan = Measure(action, invokeCount);
@@ -205,6 +380,7 @@ private unsafe ClockSpan MeasureWithRandomMemory(Action<long> action, long invok
205380
[MethodImpl(MethodImplOptions.NoInlining)]
206381
private unsafe void Consume(byte* _) { }
207382

383+
[MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)]
208384
private ClockSpan Measure(Action<long> action, long invokeCount)
209385
{
210386
var clock = Clock.Start();

src/BenchmarkDotNet/Engines/EngineGeneralStage.cs

+9-9
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ namespace BenchmarkDotNet.Engines
1111
public class EngineActualStage : EngineStage
1212
{
1313
internal const int MaxOverheadIterationCount = 20;
14-
private const double MaxOverheadRelativeError = 0.05;
15-
private const int DefaultWorkloadCount = 10;
16-
17-
private readonly int? iterationCount;
18-
private readonly double maxRelativeError;
19-
private readonly TimeInterval? maxAbsoluteError;
20-
private readonly OutlierMode outlierMode;
21-
private readonly int minIterationCount;
22-
private readonly int maxIterationCount;
14+
internal const double MaxOverheadRelativeError = 0.05;
15+
internal const int DefaultWorkloadCount = 10;
16+
17+
internal readonly int? iterationCount;
18+
internal readonly double maxRelativeError;
19+
internal readonly TimeInterval? maxAbsoluteError;
20+
internal readonly OutlierMode outlierMode;
21+
internal readonly int minIterationCount;
22+
internal readonly int maxIterationCount;
2323

2424
public EngineActualStage(IEngine engine) : base(engine)
2525
{

src/BenchmarkDotNet/Engines/EnginePilotStage.cs

+8-8
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ public PilotStageResult(long perfectInvocationCount)
3030

3131
internal const long MaxInvokeCount = (long.MaxValue / 2 + 1) / 2;
3232

33-
private readonly int unrollFactor;
34-
private readonly TimeInterval minIterationTime;
35-
private readonly int minInvokeCount;
36-
private readonly double maxRelativeError;
37-
private readonly TimeInterval? maxAbsoluteError;
38-
private readonly double targetIterationTime;
39-
private readonly double resolution;
33+
internal readonly int unrollFactor;
34+
internal readonly TimeInterval minIterationTime;
35+
internal readonly int minInvokeCount;
36+
internal readonly double maxRelativeError;
37+
internal readonly TimeInterval? maxAbsoluteError;
38+
internal readonly double targetIterationTime;
39+
internal readonly double resolution;
4040

4141
public EnginePilotStage(IEngine engine) : base(engine)
4242
{
@@ -132,6 +132,6 @@ private PilotStageResult RunSpecific()
132132
return new PilotStageResult(invokeCount, measurements);
133133
}
134134

135-
private long Autocorrect(long count) => (count + unrollFactor - 1) / unrollFactor * unrollFactor;
135+
internal long Autocorrect(long count) => (count + unrollFactor - 1) / unrollFactor * unrollFactor;
136136
}
137137
}

0 commit comments

Comments
 (0)