Skip to content

Commit 769c552

Browse files
committed
update all benchmarks, fix bugs
1 parent 9ab9f87 commit 769c552

File tree

11 files changed

+177
-93
lines changed

11 files changed

+177
-93
lines changed

Diff for: benchmarks/BenchmarksBase.cs

+4-3
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,19 @@ public class BenchmarksBase
1515
{
1616
private class ConfigWithCustomEnvVars : ManualConfig
1717
{
18-
private const string JitNoInline = "COMPlus_TieredCompilation";
18+
private const string JitTieredCompilation = "COMPlus_TieredCompilation";
1919

2020
public ConfigWithCustomEnvVars()
2121
{
22-
Add(Job.Core.With(new[] { new EnvironmentVariable(JitNoInline, "1") }));
22+
Add(Job.ShortRun.With(new[] { new EnvironmentVariable(JitTieredCompilation, "1") }));
23+
Add(Job.ShortRun.With(new[] { new EnvironmentVariable(JitTieredCompilation, "0") }));
2324
}
2425
}
2526

2627
public IEnumerable<object[]> TestData()
2728
{
2829
var testDataDir = @"C:\prj\simdjsonsharp\jsonexamples"; // TODO: fix absolute path
29-
string[] files = Directory.GetFiles(testDataDir, "*.json", SearchOption.AllDirectories).Take(5).ToArray();
30+
string[] files = Directory.GetFiles(testDataDir, "*.json", SearchOption.AllDirectories).ToArray();
3031

3132
foreach (var file in files)
3233
{

Diff for: benchmarks/CountStrings.cs

+50-7
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ public class CountStrings : BenchmarksBase
1010
{
1111
[Benchmark(Baseline = true)]
1212
[ArgumentsSource(nameof(TestData))]
13-
public unsafe ulong _SimdJson(byte[] data, string fileName, string fileSize)
13+
public unsafe ulong SimdJsonUtf16(byte[] data, string fileName, string fileSize)
1414
{
1515
ulong wordsCount = 0;
1616
fixed (byte* dataPtr = data)
@@ -22,8 +22,6 @@ public unsafe ulong _SimdJson(byte[] data, string fileName, string fileSize)
2222
{
2323
if (iterator.IsString)
2424
{
25-
// count all strings starting with 'a'
26-
// NOTE: it could be much faster with direct UTF8 API: (*iterator.GetUtf8String()) == (byte)'a')
2725
if (iterator.GetUtf16String().StartsWith('a'))
2826
wordsCount++;
2927
}
@@ -36,7 +34,7 @@ public unsafe ulong _SimdJson(byte[] data, string fileName, string fileSize)
3634

3735
//[Benchmark]
3836
[ArgumentsSource(nameof(TestData))]
39-
public unsafe ulong _SimdJsonNative(byte[] data, string fileName, string fileSize)
37+
public unsafe ulong SimdJsonNUtf16(byte[] data, string fileName, string fileSize)
4038
{
4139
ulong wordsCount = 0;
4240
fixed (byte* dataPtr = data)
@@ -48,9 +46,54 @@ public unsafe ulong _SimdJsonNative(byte[] data, string fileName, string fileSiz
4846
{
4947
if (iterator.IsString)
5048
{
51-
// count all strings starting with 'a'
52-
// NOTE: it could be much faster with direct UTF8 API: (*iterator.GetUtf8String()) == (byte)'a')
53-
if (iterator.GetUtf16String().StartsWith('a'))
49+
if (iterator.GetUtf16String().StartsWith('a')) // UTF16 in SimdJsonN is expected to be slow for now (see https://github.com/lemire/simdjson/pull/101)
50+
wordsCount++;
51+
}
52+
}
53+
}
54+
}
55+
56+
return wordsCount;
57+
}
58+
[Benchmark]
59+
[ArgumentsSource(nameof(TestData))]
60+
public unsafe ulong SimdJsonUtf8(byte[] data, string fileName, string fileSize)
61+
{
62+
ulong wordsCount = 0;
63+
fixed (byte* dataPtr = data)
64+
{
65+
using (ParsedJson doc = SimdJson.ParseJson(dataPtr, data.Length))
66+
using (var iterator = new ParsedJsonIterator(doc))
67+
{
68+
while (iterator.MoveForward())
69+
{
70+
if (iterator.IsString)
71+
{
72+
if (*iterator.GetUtf8String() == (byte)'a')
73+
wordsCount++;
74+
}
75+
}
76+
}
77+
}
78+
79+
return wordsCount;
80+
}
81+
82+
//[Benchmark]
83+
[ArgumentsSource(nameof(TestData))]
84+
public unsafe ulong SimdJsonNUtf8(byte[] data, string fileName, string fileSize)
85+
{
86+
ulong wordsCount = 0;
87+
fixed (byte* dataPtr = data)
88+
{
89+
using (ParsedJsonN doc = SimdJsonN.ParseJson(dataPtr, data.Length))
90+
using (var iterator = new ParsedJsonIteratorN(doc))
91+
{
92+
while (iterator.MoveForward())
93+
{
94+
if (iterator.IsString)
95+
{
96+
if (*iterator.GetUtf8String() == (byte)'a')
5497
wordsCount++;
5598
}
5699
}

Diff for: benchmarks/CountTokens.cs

+8-27
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,17 @@ public unsafe ulong _SimdJson(byte[] data, string fileName, string fileSize)
1919
using (var iterator = new ParsedJsonIterator(doc))
2020
{
2121
while (iterator.MoveForward())
22-
{
2322
if (iterator.IsDouble || iterator.IsInteger)
24-
{
25-
if (iterator.GetDouble() > 42000)
26-
numbersCount++;
27-
}
28-
}
23+
numbersCount++;
2924
}
3025
}
3126

3227
return numbersCount;
3328
}
3429

35-
//[Benchmark]
30+
[Benchmark]
3631
[ArgumentsSource(nameof(TestData))]
37-
public unsafe ulong _SimdJsonNative(byte[] data, string fileName, string fileSize)
32+
public unsafe ulong _SimdJsonN(byte[] data, string fileName, string fileSize)
3833
{
3934
ulong numbersCount = 0;
4035
fixed (byte* dataPtr = data)
@@ -43,13 +38,8 @@ public unsafe ulong _SimdJsonNative(byte[] data, string fileName, string fileSiz
4338
using (var iterator = new ParsedJsonIteratorN(doc))
4439
{
4540
while (iterator.MoveForward())
46-
{
47-
if (iterator.IsDouble || iterator.IsInteger)
48-
{
49-
if (iterator.GetDouble() > 42000)
50-
numbersCount++;
51-
}
52-
}
41+
//if (iterator.IsDouble || iterator.IsInteger)
42+
numbersCount++;
5343
}
5444
}
5545

@@ -65,10 +55,7 @@ public ulong _Utf8JsonReader(byte[] data, string fileName, string fileSize)
6555
while (reader.Read())
6656
{
6757
if (reader.TokenType == JsonTokenType.Number)
68-
{
69-
if (reader.GetDouble() > 42000)
70-
numbersCount++;
71-
}
58+
numbersCount++;
7259
}
7360

7461
return numbersCount;
@@ -86,10 +73,7 @@ public ulong JsonNet(byte[] data, string fileName, string fileSize)
8673
{
8774
if (reader.TokenType == JsonToken.Float ||
8875
reader.TokenType == JsonToken.Integer)
89-
{
90-
if (reader.ReadAsDouble() > 42000)
91-
numbersCount++;
92-
}
76+
numbersCount++;
9377
}
9478
}
9579

@@ -106,10 +90,7 @@ public ulong SpanJsonUtf8(byte[] data, string fileName, string fileSize)
10690
while ((token = reader.ReadUtf8NextToken()) != SpanJson.JsonToken.None)
10791
{
10892
if (token == SpanJson.JsonToken.Number)
109-
{
110-
if (reader.ReadDouble() > 42000)
111-
numbersCount++;
112-
}
93+
numbersCount++;
11394
reader.SkipNextUtf8Value(token);
11495
}
11596

Diff for: benchmarks/MinifyBenchmarks.cs

+21-14
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,23 @@ public class MinifyBenchmarks : BenchmarksBase
1010
{
1111
[Benchmark(Baseline = true)]
1212
[ArgumentsSource(nameof(TestData))]
13-
public string _SimdJsonWithoutValidation(byte[] jsonData, string fileName, string fileSize)
13+
public string SimdJson_NoValidation(byte[] jsonData, string fileName, string fileSize)
1414
{
1515
string json = Encoding.UTF8.GetString(jsonData);
1616
return SimdJson.MinifyJson(json);
1717
}
1818

19-
//[Benchmark]
19+
[Benchmark]
2020
[ArgumentsSource(nameof(TestData))]
21-
public string _SimdJsonNativeWithoutValidation(byte[] jsonData, string fileName, string fileSize)
21+
public string SimdJsonN_NoValidation(byte[] jsonData, string fileName, string fileSize)
2222
{
2323
string json = Encoding.UTF8.GetString(jsonData);
2424
return SimdJsonN.MinifyJson(json);
2525
}
2626

2727
[Benchmark]
2828
[ArgumentsSource(nameof(TestData))]
29-
public unsafe string _SimdJson(byte[] jsonData, string fileName, string fileSize)
29+
public unsafe string SimdJson_Validation(byte[] jsonData, string fileName, string fileSize)
3030
{
3131
string json = Encoding.UTF8.GetString(jsonData);
3232

@@ -44,29 +44,36 @@ public unsafe string _SimdJson(byte[] jsonData, string fileName, string fileSize
4444

4545
[Benchmark]
4646
[ArgumentsSource(nameof(TestData))]
47-
public string JsonNet(byte[] jsonData, string fileName, string fileSize)
47+
public unsafe string SimdJsonN_Validation(byte[] jsonData, string fileName, string fileSize)
4848
{
4949
string json = Encoding.UTF8.GetString(jsonData);
50-
// let's benchmark string API.
51-
return JObject.Parse(json).ToString(Newtonsoft.Json.Formatting.None);
50+
51+
// Validate json first
52+
// this step is not required for minification, it's here because JSON.NET also does validation
53+
fixed (byte* dataPtr = jsonData)
54+
{
55+
using (var doc = SimdJsonN.ParseJson(dataPtr, jsonData.Length))
56+
if (!doc.IsValid)
57+
throw new InvalidOperationException("Json is invalid");
58+
}
59+
60+
return SimdJsonN.MinifyJson(json);
5261
}
5362

5463
[Benchmark]
5564
[ArgumentsSource(nameof(TestData))]
56-
public string SpanJsonUtf16(byte[] jsonData, string fileName, string fileSize)
65+
public string JsonNet(byte[] jsonData, string fileName, string fileSize)
5766
{
5867
string json = Encoding.UTF8.GetString(jsonData);
59-
// let's benchmark string API.
60-
return SpanJson.JsonSerializer.Minifier.Minify(json);
68+
return JObject.Parse(json).ToString(Newtonsoft.Json.Formatting.None);
6169
}
6270

63-
6471
[Benchmark]
6572
[ArgumentsSource(nameof(TestData))]
66-
public byte[] SpanJsonUtf8(byte[] jsonData, string fileName, string fileSize)
73+
public string SpanJsonUtf16(byte[] jsonData, string fileName, string fileSize)
6774
{
68-
// let's benchmark string API.
69-
return SpanJson.JsonSerializer.Minifier.Minify(jsonData);
75+
string json = Encoding.UTF8.GetString(jsonData);
76+
return SpanJson.JsonSerializer.Minifier.Minify(json);
7077
}
7178
}
7279
}

Diff for: benchmarks/Program.cs

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
using BenchmarkDotNet.Running;
1+
using System;
2+
using System.IO;
3+
using BenchmarkDotNet.Running;
24

35
namespace Benchmarks
46
{
57
public class Program
68
{
7-
public static void Main()
9+
public static void Main(string[] args)
810
{
9-
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run();
11+
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
1012
}
1113
}
1214
}

Diff for: src/BindingsForNativeLib/SimdJsonNative/SimdJsonNative.vcxproj

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@
156156
<ConformanceMode>true</ConformanceMode>
157157
<EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
158158
<LanguageStandard>stdcpp17</LanguageStandard>
159+
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
159160
</ClCompile>
160161
<Link>
161162
<SubSystem>Windows</SubSystem>

0 commit comments

Comments
 (0)