Skip to content

Commit f160e0d

Browse files
authored
moved average benchmark results to p90 results (#2152)
* moved average benchmark results to p90 results * increased the measured iterations from 15 to 20 and reduced warmups from 3 to 2 by default * removed testOrchestrator option from saucelabs config and gradle * profiling benchmark now prints raw values to the console, to later read them from the log file, but we just assert on the cpu overhead * Benchmarks in SauceLabs will now run on 2 devices with Andorid 12, 3 with Android 11, 2 with Android 10 * added collecting the refresh rate of the device * sdk init duration increase threshold increased to 250 milliseconds * cpu overhead range for the same operation increased to -2%..2% * added a test to send profiles to a Sentry project (dogfooding test)
1 parent 3ad96de commit f160e0d

File tree

18 files changed

+525
-194
lines changed

18 files changed

+525
-194
lines changed

.sauce/sentry-uitest-android-benchmark.yml

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,27 +21,25 @@ suites:
2121
# Devices are chosen so that there is a high-end and a low-end device for each api level
2222
- name: "Android 12 (api 31)"
2323
devices:
24-
- id: Google_Pixel_6_Pro_real_us # Google Pixel 6 Pro - api 31 (12)
25-
- id: Google_Pixel_3_12_real_us # Google Pixel 3 - api 31 (12)
26-
testOptions:
27-
useTestOrchestrator: true
24+
- id: Google_Pixel_6_Pro_real_us # Google Pixel 6 Pro - api 31 (12) - high end
25+
- id: Google_Pixel_3_12_real_us # Google Pixel 3 - api 31 (12) - low end
2826

2927
- name: "Android 11 (api 30)"
3028
devices:
31-
- id: OnePlus_9_Pro_real_us # OnePlus 9 Pro - api 30 (11)
32-
- id: Google_Pixel_2_real_us # Google Pixel 2 - api 30 (11)
33-
testOptions:
34-
useTestOrchestrator: true
29+
- id: OnePlus_9_Pro_real_us # OnePlus 9 Pro - api 30 (11) - high end
30+
- id: Google_Pixel_4_real_us # Google Pixel 4 - api 30 (11) - mid end
31+
- id: Google_Pixel_2_real_us # Google Pixel 2 - api 30 (11) - low end
3532

36-
# Commenting for the moment, due to the error "Cannot install test-services-1.4.1.apk on device" on low Android versions
37-
# - name: "Android 5 (api 22)"
38-
# devices:
39-
# - id: Amazon_Kindle_Fire_HD_8_real_us # Amazon Kindle Fire HD 8 - api 22 (5.1.1)
40-
# testOptions:
41-
# useTestOrchestrator: true
33+
- name: "Android 10 (api 29)"
34+
devices:
35+
- id: Google_Pixel_4_XL_real_us1 # Google Pixel 4 XL - api 29 (10)
36+
- id: Nokia_7_1_real_us # Nokia 7.1 - api 29 (10)
4237

43-
# - id: Google_Pixel_4_XL_real_us1 # Google Pixel 4 XL - api 29 (10)
44-
# - id: Motorola_Moto_G_Power_real_us # Motorola Moto G Power (2021) - api 29 (10)
38+
# At the time of writing (July, 4, 2022), the market share per android version is:
39+
# 12.0 = 17.54%, 11.0 = 31.65%, 10.0 = 21.92%
40+
# Using these 3 versions we cover 71,11% of all devices out there. Currently, this is enough for benchmarking scope
41+
# Leaving these devices here in case we change mind on them
42+
# devices:
4543
# - id: Samsung_Galaxy_S8_plus_real_us # Samsung Galaxy S8+ - api 28 (9)
4644
# - id: LG_G8_ThinQ_real_us # LG G8 ThinQ - api 28 (9)
4745
# - id: OnePlus_5_real_us # OnePlus 5 - api 27 (8.1.0)
@@ -54,6 +52,7 @@ suites:
5452
# - id: LG_K10_real # LG K10 - api 24 (7.0)
5553
# - id: Samsung_Galaxy_S6_Edge_Plus_real # Samsung Galaxy S6 Edge+ - api 23 (6.0.1)
5654
# - id: Samsung_Tab_E_real_us # Samsung Tab E - api 23 (6.0.1)
55+
# - id: Amazon_Kindle_Fire_HD_8_real_us # Amazon Kindle Fire HD 8 - api 22 (5.1.1)
5756

5857
artifacts:
5958
download:

.sauce/sentry-uitest-android-ui.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ suites:
1717
- name: "Android 12 (api 31)"
1818
devices:
1919
- id: Samsung_Galaxy_S22_Ultra_5G_real_us # Samsung Galaxy S22 Ultra 5G - api 31 (12)
20-
testOptions:
21-
useTestOrchestrator: true
2220

2321
# Controls what artifacts to fetch when the suite on Sauce Cloud has finished.
2422
artifacts:

sentry-android-integration-tests/sentry-uitest-android-benchmark/build.gradle.kts

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ android {
2525
// https://developer.android.com/training/testing/instrumented-tests/androidx-test-libraries/runner#enable-gradle
2626
// This doesn't work on some devices with Android 11+. Clearing package data resets permissions.
2727
// Check the readme for more info.
28+
// Test orchestrator was removed due to issues with SauceLabs
2829
// testInstrumentationRunnerArguments["clearPackageData"] = "true"
2930
}
3031

@@ -34,10 +35,6 @@ android {
3435
viewBinding = true
3536
}
3637

37-
testOptions {
38-
execution = "ANDROIDX_TEST_ORCHESTRATOR"
39-
}
40-
4138
signingConfigs {
4239
getByName("debug") {
4340
storeFile = rootProject.file("debug.keystore")
@@ -51,7 +48,6 @@ android {
5148

5249
buildTypes {
5350
getByName("debug") {
54-
isDebuggable = false
5551
isMinifyEnabled = true
5652
signingConfig = signingConfigs.getByName("debug")
5753
proguardFiles(getDefaultProguardFile("proguard-android-optimize.txt"), "benchmark-proguard-rules.pro")

sentry-android-integration-tests/sentry-uitest-android-benchmark/src/androidTest/java/io/sentry/uitest/android/benchmark/BaseBenchmarkTest.kt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package io.sentry.uitest.android.benchmark
22

33
import android.content.Context
44
import android.view.Choreographer
5+
import androidx.lifecycle.Lifecycle
56
import androidx.test.core.app.ApplicationProvider
7+
import androidx.test.core.app.launchActivity
68
import androidx.test.platform.app.InstrumentationRegistry
79
import androidx.test.runner.AndroidJUnitRunner
810
import kotlin.test.BeforeTest
@@ -22,5 +24,8 @@ abstract class BaseBenchmarkTest {
2224
runner.runOnMainSync {
2325
choreographer = Choreographer.getInstance()
2426
}
27+
// We need the refresh rate, but we can get it only from the activity, so we start and destroy one
28+
val benchmarkScenario = launchActivity<BenchmarkActivity>()
29+
benchmarkScenario.moveToState(Lifecycle.State.DESTROYED)
2530
}
2631
}

sentry-android-integration-tests/sentry-uitest-android-benchmark/src/androidTest/java/io/sentry/uitest/android/benchmark/SdkBenchmarkTest.kt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,19 @@ class SdkBenchmarkTest : BaseBenchmarkTest() {
3232
it.tracesSampleRate = 1.0
3333
}
3434
}
35-
val simpleSdkResult = BenchmarkOperation.compare(opNoSdk, "No Sdk", opSimpleSdk, "Simple Sdk")
36-
val perfProfilingSdkResult = BenchmarkOperation.compare(opNoSdk2, "No Sdk", opPerfProfilingSdk, "Sdk with perf and profiling")
35+
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
36+
val simpleSdkResults = BenchmarkOperation.compare(opNoSdk, "No Sdk", opSimpleSdk, "Simple Sdk", refreshRate)
37+
val simpleSdkResult = simpleSdkResults.getSummaryResult()
38+
simpleSdkResult.printResults()
39+
val perfProfilingSdkResults = BenchmarkOperation.compare(opNoSdk2, "No Sdk", opPerfProfilingSdk, "Sdk with perf and profiling", refreshRate)
40+
val perfProfilingSdkResult = perfProfilingSdkResults.getSummaryResult()
41+
perfProfilingSdkResult.printResults()
3742

38-
val maxDurationThreshold = TimeUnit.MILLISECONDS.toNanos(100)
43+
val maxDurationThreshold = TimeUnit.MILLISECONDS.toNanos(250)
3944
assertTrue(simpleSdkResult.durationIncreaseNanos in 0..maxDurationThreshold)
4045
assertTrue(simpleSdkResult.cpuTimeIncreaseMillis in 0..100)
41-
assertTrue(perfProfilingSdkResult.durationIncreaseNanos in simpleSdkResult.durationIncreaseNanos..maxDurationThreshold)
42-
assertTrue(perfProfilingSdkResult.cpuTimeIncreaseMillis in simpleSdkResult.cpuTimeIncreaseMillis..100)
46+
assertTrue(perfProfilingSdkResult.durationIncreaseNanos in 0..maxDurationThreshold)
47+
assertTrue(perfProfilingSdkResult.cpuTimeIncreaseMillis in 0..100)
4348
}
4449

4550
private fun getOperation(init: (() -> Unit)? = null) = BenchmarkOperation(

sentry-android-integration-tests/sentry-uitest-android-benchmark/src/androidTest/java/io/sentry/uitest/android/benchmark/SentryBenchmarkTest.kt

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
4040
// should be very similar.
4141
val op1 = BenchmarkOperation(choreographer, op = getOperation(runner))
4242
val op2 = BenchmarkOperation(choreographer, op = getOperation(runner))
43-
val comparisonResult = BenchmarkOperation.compare(op1, "Op1", op2, "Op2")
43+
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
44+
val comparisonResults = BenchmarkOperation.compare(op1, "Op1", op2, "Op2", refreshRate)
45+
val comparisonResult = comparisonResults.getSummaryResult()
46+
comparisonResult.printResults()
4447

45-
assertTrue(comparisonResult.durationIncreasePercentage in -1F..1F)
46-
assertTrue(comparisonResult.cpuTimeIncreasePercentage in -1F..1F)
48+
// Currently we just want to assert the cpu overhead
49+
assertTrue(comparisonResult.cpuTimeIncreasePercentage in -2F..2F)
4750
// The fps decrease comparison is skipped, due to approximation: 59.51 and 59.49 fps are considered 60 and 59,
4851
// respectively. Also, if the average fps is 20 or 60, a difference of 1 fps becomes 5% or 1.66% respectively.
49-
assertTrue(comparisonResult.droppedFramesIncreasePercentage in -1F..1F)
5052
}
5153

5254
@Test
@@ -76,17 +78,20 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
7678
}
7779
}
7880
)
79-
val comparisonResult = BenchmarkOperation.compare(
81+
val refreshRate = BenchmarkActivity.refreshRate ?: 60F
82+
val comparisonResults = BenchmarkOperation.compare(
8083
benchmarkOperationNoTransaction,
8184
"NoTransaction",
8285
benchmarkOperationProfiled,
83-
"ProfiledTransaction"
86+
"ProfiledTransaction",
87+
refreshRate
8488
)
89+
comparisonResults.printAllRuns("Profiling Benchmark")
90+
val comparisonResult = comparisonResults.getSummaryResult()
91+
comparisonResult.printResults()
8592

86-
assertTrue(comparisonResult.durationIncreasePercentage in 0F..5F)
93+
// Currently we just want to assert the cpu overhead
8794
assertTrue(comparisonResult.cpuTimeIncreasePercentage in 0F..5F)
88-
assertTrue(comparisonResult.fpsDecreasePercentage in 0F..5F)
89-
assertTrue(comparisonResult.droppedFramesIncreasePercentage in 0F..5F)
9095
}
9196

9297
/**
@@ -103,12 +108,10 @@ class SentryBenchmarkTest : BaseBenchmarkTest() {
103108
}
104109
// Just swipe the list some times: this is the benchmarked operation
105110
swipeList(2)
106-
// We finish the transaction
111+
// We finish the transaction. We do it on main thread, so there's no need to perform other operations after it
107112
runner.runOnMainSync {
108113
transaction?.finish()
109114
}
110-
// We swipe a last time to measure how finishing the transaction may affect other operations
111-
swipeList(1)
112115

113116
benchmarkScenario.moveToState(Lifecycle.State.DESTROYED)
114117
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package io.sentry.uitest.android.benchmark.util
2+
3+
import java.util.concurrent.TimeUnit
4+
5+
/** Result of the [BenchmarkOperation] comparison. */
6+
internal data class BenchmarkComparisonResult(
7+
/** Number of measured iterations. */
8+
val iterations: Int,
9+
/** Screen refresh rate. */
10+
val refreshRate: Float,
11+
/** Screen refresh rate. */
12+
val cores: Int,
13+
/** Name of the first compared operation. */
14+
val op1Name: String,
15+
/** Name of the second compared operation. */
16+
val op2Name: String,
17+
/** Raw cpu time in milliseconds of op1. */
18+
val op1CpuTime: List<Long>,
19+
/** Raw cpu time in milliseconds of op2. */
20+
val op2CpuTime: List<Long>,
21+
/** Increase of cpu time in milliseconds. */
22+
val cpuTimeIncreases: List<Long>,
23+
/** Increase of cpu time in percentage. */
24+
val cpuTimeIncreasePercentages: List<Double>,
25+
/** Raw dropped frames of op1. */
26+
val op1DroppedFrames: List<Double>,
27+
/** Raw dropped frames of op2. */
28+
val op2DroppedFrames: List<Double>,
29+
/** Increase of dropped frames. */
30+
val droppedFramesIncreases: List<Double>,
31+
/** Increase of dropped frames in percentage. */
32+
val droppedFramesIncreasePercentages: List<Double>,
33+
/** Raw duration in nanoseconds of op1. */
34+
val op1Duration: List<Long>,
35+
/** Raw duration in nanoseconds of op2. */
36+
val op2Duration: List<Long>,
37+
/** Increase of duration in nanoseconds. If it's low enough, no end user will ever realize it. */
38+
val durationIncreaseNanos: List<Long>,
39+
/** Increase of duration in percentage. */
40+
val durationIncreasePercentage: List<Double>,
41+
/** Raw fps of op1. */
42+
val op1Fps: List<Int>,
43+
/** Raw fps of op2. */
44+
val op2Fps: List<Int>,
45+
/** Decrease of fps. */
46+
val fpsDecreases: List<Int>,
47+
/** Decrease of fps in percentage. */
48+
val fpsDecreasePercentages: List<Double>
49+
) {
50+
51+
/**
52+
* Prints the raw results of all runs of the comparison.
53+
* Each printed line is prefixed by [prefix], to allow parsers to easily parse log files to read raw values.
54+
*/
55+
fun printAllRuns(prefix: String) {
56+
repeat(iterations) { index ->
57+
58+
println("$prefix ==================== Iteration $index ====================")
59+
60+
println("$prefix [$op2Name]: duration=${op2Duration[index]} ns, cpuTime=${op2CpuTime[index]}, fps=${op2Fps[index]}, droppedFrames=${op2DroppedFrames[index]}")
61+
println("$prefix [$op1Name]: duration=${op1Duration[index]} ns, cpuTime=${op1CpuTime[index]}, fps=${op1Fps[index]}, droppedFrames=${op1DroppedFrames[index]}")
62+
println(
63+
"$prefix Duration increase: %.2f%% (%d ns = %d ms)".format(
64+
durationIncreasePercentage[index],
65+
durationIncreaseNanos[index],
66+
TimeUnit.NANOSECONDS.toMillis(durationIncreaseNanos[index])
67+
)
68+
)
69+
70+
println(
71+
"$prefix CPU time overhead, over $cores cores: %.2f%% (%d ms)".format(
72+
cpuTimeIncreasePercentages[index],
73+
cpuTimeIncreases[index]
74+
)
75+
)
76+
77+
println("$prefix FPS decrease: %.2f%% (%d fps)".format(fpsDecreasePercentages[index], fpsDecreases[index]))
78+
79+
val expectedFrames = TimeUnit.NANOSECONDS.toMillis(op2Duration[index]) * refreshRate / 1000
80+
println(
81+
"$prefix Frame drop increase, over $expectedFrames total frames, with $refreshRate hz: %.2f%% (%.2f)".format(
82+
droppedFramesIncreasePercentages[index],
83+
droppedFramesIncreases[index]
84+
)
85+
)
86+
}
87+
}
88+
89+
fun getSummaryResult() = BenchmarkSummaryResult(
90+
calculatePercentile(cpuTimeIncreases, 90),
91+
calculatePercentile(cpuTimeIncreasePercentages, 90),
92+
calculatePercentile(droppedFramesIncreases, 90),
93+
calculatePercentile(droppedFramesIncreasePercentages, 90),
94+
calculatePercentile(durationIncreaseNanos, 90),
95+
calculatePercentile(durationIncreasePercentage, 90),
96+
calculatePercentile(fpsDecreases, 90),
97+
calculatePercentile(fpsDecreasePercentages, 90)
98+
)
99+
100+
/** Calculate the [percentile] of the [list]. [percentile] should be in the range 0, 100. */
101+
private fun <T : Number> calculatePercentile(list: List<T>, percentile: Int): T {
102+
if (list.isEmpty()) {
103+
return 0 as T
104+
}
105+
val sortedList = list.sortedBy { it.toDouble() }
106+
val percentileIndex = (list.size * percentile / 100 - 1).coerceIn(0, list.size)
107+
return sortedList[percentileIndex]
108+
}
109+
}
110+
111+
/** Result of the [BenchmarkOperation] comparison. */
112+
internal data class BenchmarkSummaryResult(
113+
/**
114+
* Increase of cpu time in milliseconds.
115+
* It has no direct impact on performance of the app, but it has on battery usage, as the cpu is 'awaken' longer.
116+
*/
117+
val cpuTimeIncreaseMillis: Long,
118+
/** Increase of cpu time in percentage. */
119+
val cpuTimeIncreasePercentage: Double,
120+
/**
121+
* Increase of dropped frames.Very important, as it weights dropped frames based on the time
122+
* passed between each frame. This is the metric end users can perceive as 'performance' in app usage.
123+
*/
124+
val droppedFramesIncrease: Double,
125+
/** Increase of dropped frames in percentage. */
126+
val droppedFramesIncreasePercentage: Double,
127+
/** Increase of duration in nanoseconds. If it's low enough, no end user will ever realize it. */
128+
val durationIncreaseNanos: Long,
129+
/** Increase of duration in percentage. */
130+
val durationIncreasePercentage: Double,
131+
/**
132+
* Decrease of fps. Not really important, as even if fps are the same, the cpu could be
133+
* doing more work in the frame window, and it could be hidden by checking average fps only.
134+
*/
135+
val fpsDecrease: Int,
136+
/** Decrease of fps in percentage. */
137+
val fpsDecreasePercentage: Double
138+
) {
139+
140+
/** Prints the summary results of the comparison. */
141+
fun printResults() {
142+
println(
143+
"Duration increase: %.2f%% (%d ns = %d ms)".format(
144+
durationIncreasePercentage,
145+
durationIncreaseNanos,
146+
TimeUnit.NANOSECONDS.toMillis(durationIncreaseNanos)
147+
)
148+
)
149+
println("CPU time overhead: %.2f%% (%d ms)".format(cpuTimeIncreasePercentage, cpuTimeIncreaseMillis))
150+
println("FPS decrease: %.2f%% (%d fps)".format(fpsDecreasePercentage, fpsDecrease))
151+
println("Frame drop increase: %.2f%% (%.2f)".format(droppedFramesIncreasePercentage, droppedFramesIncrease))
152+
}
153+
}

0 commit comments

Comments
 (0)