Skip to content

Commit

Permalink
Improved comparison for GEDCOM file synchronization (#583)
Browse files Browse the repository at this point in the history
  • Loading branch information
Serg-Norseman committed Jul 1, 2024
1 parent 905af92 commit bd3a87f
Show file tree
Hide file tree
Showing 9 changed files with 406 additions and 119 deletions.
14 changes: 14 additions & 0 deletions projects/GKCore/GDModel/GDMTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,20 @@ public List<T> GetRecords<T>() where T : GDMRecord
return result;
}

public List<GDMRecord> GetRecords(GDMRecordType recType)
{
var result = new List<GDMRecord>();

for (int i = 0; i < fRecords.Count; i++) {
var rec = fRecords[i];
if (recType == GDMRecordType.rtNone || rec.RecordType == recType) {
result.Add(rec);
}
}

return result;
}

public IGDMTreeEnumerator GetEnumerator(GDMRecordType recType)
{
return new TreeEnumerator(this, recType);
Expand Down
1 change: 1 addition & 0 deletions projects/GKCore/GKCore.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@
<Compile Include="GKCore\Names\NameEntry.cs" />
<Compile Include="GKCore\Names\NamesTable.cs" />
<Compile Include="GKCore\NetDiff\DiffUtil.cs" />
<Compile Include="GKCore\HashCode.cs" />
<Compile Include="GKCore\Options\ListOptions.cs" />
<Compile Include="GKCore\Options\LocaleOptions.cs" />
<Compile Include="GKCore\Plugins\OrdinaryPlugin.cs" />
Expand Down
219 changes: 219 additions & 0 deletions projects/GKCore/GKCore/HashCode.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
The xxHash32 implementation is based on the code published by Yann Collet:
https://raw.githubusercontent.com/Cyan4973/xxHash/5c174cfa4e45a42f94082dc0d4539b39696afea1/xxhash.c
xxHash - Fast Hash algorithm
Copyright (C) 2012-2016, Yann Collet
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash homepage: http://www.xxhash.com
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/

using System.Runtime.CompilerServices;

namespace System
{
public struct HashCode
{
private static readonly uint s_seed = GenerateGlobalSeed();

private const uint Prime1 = 2654435761U;
private const uint Prime2 = 2246822519U;
private const uint Prime3 = 3266489917U;
private const uint Prime4 = 668265263U;
private const uint Prime5 = 374761393U;

private uint _v1, _v2, _v3, _v4;
private uint _queue1, _queue2, _queue3;
private uint _length;

#region Private and inlined

private static unsafe uint GenerateGlobalSeed()
{
var rnd = new Random();

uint result;
//Interop.GetRandomBytes((byte*)&result, sizeof(uint));
result = (uint)rnd.Next();
return result;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void Initialize(out uint v1, out uint v2, out uint v3, out uint v4)
{
v1 = s_seed + Prime1 + Prime2;
v2 = s_seed + Prime2;
v3 = s_seed;
v4 = s_seed - Prime1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint RotateLeft(uint value, int offset)
{
return (value << offset) | (value >> (32 - offset));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint Round(uint hash, uint input)
{
return RotateLeft(hash + input * Prime2, 13) * Prime1;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint QueueRound(uint hash, uint queuedValue)
{
return RotateLeft(hash + queuedValue * Prime3, 17) * Prime4;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixState(uint v1, uint v2, uint v3, uint v4)
{
return RotateLeft(v1, 1) + RotateLeft(v2, 7) + RotateLeft(v3, 12) + RotateLeft(v4, 18);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixEmptyState()
{
return s_seed + Prime5;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static uint MixFinal(uint hash)
{
hash ^= hash >> 15;
hash *= Prime2;
hash ^= hash >> 13;
hash *= Prime3;
hash ^= hash >> 16;
return hash;
}

#endregion

public void AddVal<T>(T value) where T : struct
{
AddHash(value.GetHashCode());
}

public void AddObj<T>(T value) where T : class
{
AddHash(value == null ? 0 : value.GetHashCode());
}

public void AddHash(int value)
{
// The original xxHash works as follows:
// 0. Initialize immediately. We can't do this in a struct (no
// default ctor).
// 1. Accumulate blocks of length 16 (4 uints) into 4 accumulators.
// 2. Accumulate remaining blocks of length 4 (1 uint) into the
// hash.
// 3. Accumulate remaining blocks of length 1 into the hash.

// There is no need for #3 as this type only accepts ints. _queue1,
// _queue2 and _queue3 are basically a buffer so that when
// ToHashCode is called we can execute #2 correctly.

// We need to initialize the xxHash32 state (_v1 to _v4) lazily (see
// #0) nd the last place that can be done if you look at the
// original code is just before the first block of 16 bytes is mixed
// in. The xxHash32 state is never used for streams containing fewer
// than 16 bytes.

// To see what's really going on here, have a look at the Combine
// methods.

uint val = (uint)value;

// Storing the value of _length locally shaves of quite a few bytes
// in the resulting machine code.
uint previousLength = _length++;
uint position = previousLength % 4;

// Switch can't be inlined.

if (position == 0)
_queue1 = val;
else if (position == 1)
_queue2 = val;
else if (position == 2)
_queue3 = val;
else // position == 3
{
if (previousLength == 3)
Initialize(out _v1, out _v2, out _v3, out _v4);

_v1 = Round(_v1, _queue1);
_v2 = Round(_v2, _queue2);
_v3 = Round(_v3, _queue3);
_v4 = Round(_v4, val);
}
}

public int ToHashCode()
{
// Storing the value of _length locally shaves of quite a few bytes
// in the resulting machine code.
uint length = _length;

// position refers to the *next* queue position in this method, so
// position == 1 means that _queue1 is populated; _queue2 would have
// been populated on the next call to Add.
uint position = length % 4;

// If the length is less than 4, _v1 to _v4 don't contain anything
// yet. xxHash32 treats this differently.

uint hash = length < 4 ? MixEmptyState() : MixState(_v1, _v2, _v3, _v4);

// _length is incremented once per Add(Int32) and is therefore 4
// times too small (xxHash length is in bytes, not ints).

hash += length * 4;

// Mix what remains in the queue

// Switch can't be inlined right now, so use as few branches as
// possible by manually excluding impossible scenarios (position > 1
// is always false if position is not > 0).
if (position > 0) {
hash = QueueRound(hash, _queue1);
if (position > 1) {
hash = QueueRound(hash, _queue2);
if (position > 2)
hash = QueueRound(hash, _queue3);
}
}

hash = MixFinal(hash);
return (int)hash;
}
}
}
8 changes: 4 additions & 4 deletions projects/GKCore/GKCore/NetDiff/DiffUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class DiffResult<T>
{
public T Obj1 { get; private set; }
public T Obj2 { get; private set; }
public DiffStatus Status { get; private set; }
public DiffStatus Status { get; set; }

public DiffResult(T obj1, T obj2, DiffStatus status)
{
Expand Down Expand Up @@ -182,14 +182,14 @@ private static DiffStatus GetStatus(Point current, Point prev)
throw new Exception();
}

internal static char GetStatusChar(DiffStatus status)
public static char GetStatusChar(DiffStatus status)
{
switch (status)
{
case DiffStatus.Equal: return '=';
case DiffStatus.Deleted: return '-';
case DiffStatus.Inserted: return '+';
case DiffStatus.Modified: return 'M';
case DiffStatus.Modified: return '';
}

throw new System.Exception();
Expand All @@ -209,7 +209,7 @@ internal struct Point : IEquatable<Point>
public int X { get; private set; }
public int Y { get; private set; }

public Point(int x, int y)
public Point(int x, int y) : this()
{
X = x;
Y = y;
Expand Down
67 changes: 53 additions & 14 deletions projects/GKCore/GKCore/Tools/SyncTool.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,24 @@
*/

using System;
using System.Collections.Generic;
using System.Linq;
using GDModel;
using GDModel.Providers.GEDCOM;
using GKCore.NetDiff;

namespace GKCore.Tools
{
public enum RecordStatus
{
Unknown, // gray
Identical, // white
Changed, // yellow
Deleted, // orange or red
Added, // lightblue or cyan
}


/// <summary>
///
/// </summary>
public class SyncTool
{
private GDMTree fMainTree;
private GDMTree fOtherTree;

public List<DiffResult<GDMRecord>> Results;

public void LoadOtherFile(GDMTree mainTree, string fileName)
{
if (mainTree == null)
Expand All @@ -47,11 +45,52 @@ public void LoadOtherFile(GDMTree mainTree, string fileName)
if (string.IsNullOrEmpty(fileName))
throw new ArgumentNullException("fileName");

using (var extTree = new GDMTree()) {
var gedcomProvider = new GEDCOMProvider(extTree);
gedcomProvider.LoadFromFile(fileName);
fMainTree = mainTree;

fOtherTree = new GDMTree();
var gedcomProvider = new GEDCOMProvider(fOtherTree);
gedcomProvider.LoadFromFile(fileName);
}

public void CompareRecords(GDMRecordType recordType)
{
var records1 = fMainTree.GetRecords(recordType);
var records2 = fOtherTree.GetRecords(recordType);

var option = new DiffOption<GDMRecord>();
option.EqualityComparer = new Stage1Comparer();

Results = DiffUtil.Diff(records1, records2, option).ToList();
CheckModified();
CheckContents();
}

private void CheckModified()
{
foreach (var diffRes in Results) {
if (diffRes.Status != DiffStatus.Equal) continue;

if ((diffRes.Obj1.XRef != diffRes.Obj2.XRef) || (diffRes.Obj1.ChangeDate.ChangeDateTime != diffRes.Obj2.ChangeDate.ChangeDateTime)) {
diffRes.Status = DiffStatus.Modified;
}
}
}

private void CheckContents()
{
// IEquatable<T> and GetHashCode() - on all records, structures and tags
}

internal class Stage1Comparer : IEqualityComparer<GDMRecord>
{
public bool Equals(GDMRecord x, GDMRecord y)
{
return x.UID == y.UID;
}

//
public int GetHashCode(GDMRecord obj)
{
return obj.GetHashCode();
}
}
}
Expand Down
1 change: 1 addition & 0 deletions projects/GKTests/GKCore/ControllerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,7 @@ public void Test_TreeSplitController()
SubstituteControl<IButton>(view, "btnSelectFamily");
SubstituteControl<IButton>(view, "btnSelectAncestors");
SubstituteControl<IButton>(view, "btnSelectDescendants");
SubstituteControl<IButton>(view, "btnSelectList");
SubstituteControl<IButton>(view, "btnDelete");
SubstituteControl<IButton>(view, "btnSave");

Expand Down
Loading

0 comments on commit bd3a87f

Please sign in to comment.