Skip to content

Commit

Permalink
#236 : Changes to file name truncation and checksum calculation (#235)
Browse files Browse the repository at this point in the history
* Changes to file name truncation and checksum calculation

* fixing unit test failure

* Added test coverage, changed checksum version and minor fixes

* relaxing checksum match for older checksum versions

* abstracting logic to extract checksum version to helper method

* Resolving PR Comments
  • Loading branch information
suskumar-MSFT authored Jun 5, 2021
1 parent dc3117d commit bc17b24
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 110 deletions.
19 changes: 17 additions & 2 deletions src/PAModel/Checksum/ChecksumMaker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ namespace Microsoft.PowerPlatform.Formulas.Tools
public class ChecksumMaker
{
// Given checksum an easy prefix so that we can identify algorithm version changes.
public static string Version = "C6";
// The checksum is prefix with 'C' and this int value.
public static int Version = 7;

public const string ChecksumName = "checksum.json";

Expand Down Expand Up @@ -175,7 +176,19 @@ private static byte[] ChecksumJsonFile<T>(string filename, byte[] bytes)

internal static string ChecksumToString(byte[] bytes)
{
return Version + "_" + Convert.ToBase64String(bytes);
return "C" + Version.ToString() + "_" + Convert.ToBase64String(bytes);
}

/// <summary>
/// Checksum version is of the format: C<int version><hash>. Eg. C6_S88Ujgp7HEnrT7m55yNtgjimcgSoY70Krf8k2HecWYA=
/// This method extracts the int version of the checksum.
/// </summary>
/// <param name="checksum">The checksum.</param>
/// <returns>Returns the int version of the checksum</returns>
internal static int GetChecksumVersion(string checksum)
{
var version = checksum.Split('_').First();
return int.Parse(version.Substring(1));
}

/// <summary>
Expand All @@ -189,6 +202,8 @@ internal static string ChecksumToString(byte[] bytes)

foreach (var kv in _files.OrderBy(x => x.Key))
{
var fileNameInBytes = Encoding.ASCII.GetBytes(kv.Key);
hash.AppendData(fileNameInBytes);
hash.AppendData(kv.Value);
singleFileHash.AppendData(kv.Value);
var singleFileHashResult = singleFileHash.GetHashAndReset();
Expand Down
15 changes: 7 additions & 8 deletions src/PAModel/Serializers/MsAppSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using Microsoft.PowerPlatform.Formulas.Tools.Utility;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Linq;
Expand Down Expand Up @@ -63,7 +64,7 @@ public static CanvasDocument Load(Stream streamToMsapp, ErrorContainer errors)
{
foreach (var entry in z.Entries)
{
checksumMaker.AddFile(entry.FullName, entry.ToBytes());
checksumMaker.AddFile(FileEntry.FromZip(entry).Name.ToMsAppPath(), entry.ToBytes());

var fullName = entry.FullName;
var kind = FileEntry.TriageKind(FilePath.FromMsAppPath(fullName));
Expand Down Expand Up @@ -224,10 +225,9 @@ public static CanvasDocument Load(Stream streamToMsapp, ErrorContainer errors)
// Checksums?
var currentChecksum = checksumMaker.GetChecksum();

// This is debug only. The server checksum is out of date with the client checksum
// The main checksum validation that matters is the repack after unpack
#if DEBUG
if (app._checksum.ServerStampedChecksum != null && app._checksum.ServerStampedChecksum != currentChecksum.wholeChecksum)
// In case the server stamped checksum on the msapp is older than the client, treat it like its missing checksum.
var isNullOrOlderChecksum = app._checksum.ServerStampedChecksum == null || ChecksumMaker.GetChecksumVersion(app._checksum.ServerStampedChecksum) < ChecksumMaker.Version;
if (!isNullOrOlderChecksum && app._checksum.ServerStampedChecksum != currentChecksum.wholeChecksum)
{
// The server checksum doesn't match the actual contents.
// likely has been tampered.
Expand All @@ -240,7 +240,7 @@ public static CanvasDocument Load(Stream streamToMsapp, ErrorContainer errors)
{
errors.ChecksumMismatch("Missing file " + file.Key);
}
if (fileChecksum != file.Value)
else if (fileChecksum != file.Value)
{
errors.ChecksumMismatch($"File {file.Key} checksum does not match on extract");
}
Expand All @@ -254,7 +254,6 @@ public static CanvasDocument Load(Stream streamToMsapp, ErrorContainer errors)
}
}
}
#endif

app._checksum.ClientStampedChecksum = currentChecksum.wholeChecksum;
app._checksum.ClientPerFileChecksums = currentChecksum.perFileChecksum;
Expand Down Expand Up @@ -436,7 +435,7 @@ private static void ComputeAndWriteChecksum(CanvasDocument app, ChecksumMaker ch
{
errors.ChecksumMismatch("Missing file " + file.Key);
}
if (fileChecksum != file.Value)
else if (fileChecksum != file.Value)
{
errors.ChecksumMismatch($"File {file.Key} checksum does not match on extract");
}
Expand Down
12 changes: 7 additions & 5 deletions src/PAModel/Serializers/SourceSerializer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ private static void RestoreViewName(DataSourceEntry ds, string dataSetName)
/// for a single top level control, such as the App object, a screen, or component
/// Name refers to the control name
/// Only in case of AppTest, the topParentName is passed down, since for AppTest the TestSuites are sharded into individual files.
/// We truncate the control names to limit it to 50 charactes length (escaped name).
private static void WriteTopParent(
DirectoryWriter dir,
CanvasDocument app,
Expand All @@ -816,10 +817,11 @@ private static void WriteTopParent(
var controlName = name;
var text = PAWriterVisitor.PrettyPrint(ir);

string filename = controlName + ".fx.yaml";
var newControlName = Utilities.TruncateNameIfTooLong(controlName);

string filename = newControlName + ".fx.yaml";

dir.WriteAllText(subDir, new FilePath(filename), text);
dir.WriteAllText(subDir, filename, text);

var extraData = new Dictionary<string, ControlState>();
foreach (var item in app._editorStateStore.GetControlsWithTopParent(topParentname ?? controlName))
Expand All @@ -828,19 +830,19 @@ private static void WriteTopParent(
}

// Write out of all the other state for roundtripping
string extraContent = (topParentname ?? controlName) + ".editorstate.json";
string extraContent = (topParentname ?? newControlName) + ".editorstate.json";

// We write editorstate.json file per top parent control, and hence for the TestSuite control since it is not a top parent
// use the top parent name (i.e. Test_7F478737223C4B69) to create the editorstate.json file.
if (!File.Exists(Path.Combine(subDir, extraContent)))
{
dir.WriteAllJson(EditorStateDir, new FilePath(extraContent), extraData);
dir.WriteAllJson(EditorStateDir, extraContent, extraData);
}

// Write out component templates next to the component
if (app._templateStore.TryGetTemplate(name, out var templateState))
{
dir.WriteAllJson(subDir, new FilePath(controlName + ".json"), templateState);
dir.WriteAllJson(subDir, newControlName + ".json", templateState);
}
}

Expand Down
16 changes: 16 additions & 0 deletions src/PAModel/Utility/DirectoryHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ public void WriteAllJson<T>(string subdir, FilePath filename, T obj)
WriteAllText(subdir, filename, text);
}

// Use this if the filename is already escaped.
public void WriteAllJson<T>(string subdir, string filename, T obj)
{
var text = JsonSerializer.Serialize<T>(obj, Utilities._jsonOpts);
text = JsonNormalizer.Normalize(text);
WriteAllText(subdir, filename, text);
}

public void WriteDoubleEncodedJson(string subdir, FilePath filename, string jsonStr)
{
if (!string.IsNullOrWhiteSpace(jsonStr) && jsonStr != "{}")
Expand All @@ -76,6 +84,14 @@ public void WriteAllXML(string subdir, FilePath filename, string xmlText)
public void WriteAllText(string subdir, FilePath filename, string text)
{
string path = Path.Combine(_directory, subdir, filename.ToPlatformPath());
EnsureFileDirExists(path);
File.WriteAllText(path, text);
}

// Use this if the filename is already escaped.
public void WriteAllText(string subdir, string filename, string text)
{
string path = Path.Combine(_directory, subdir, filename);

// Check for collision so that we don't overwrite an existing file.
if (File.Exists(path))
Expand Down
56 changes: 11 additions & 45 deletions src/PAModel/Utility/FilePath.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,21 @@ public FilePath(params string[] segments)

public string ToMsAppPath()
{
return string.Join("\\", _pathSegments);
var path = string.Join("\\", _pathSegments);

// Some paths mistakenly start with DirectorySepChar in the msapp,
// We replaced it with `_/` when writing, remove that now.
if (path.StartsWith(FileEntry.FilenameLeadingUnderscore.ToString()))
{
path = path.TrimStart(FileEntry.FilenameLeadingUnderscore);
}

return path;
}

/// <summary>
/// Performs escaping on the path, and also truncates the filename to a max length of 60, if longer.
/// </summary>
/// <returns></returns>
public string ToPlatformPath()
{
var originalFileName = this.GetFileName();
var newFileName = GetTruncatedFileNameIfTooLong(originalFileName);
var remainingPath = Path.Combine(_pathSegments.Where(x => x != originalFileName).Select(Utilities.EscapeFilename).ToArray());
return Path.Combine(remainingPath, newFileName);
return Path.Combine(_pathSegments.Select(Utilities.EscapeFilename).ToArray());
}

public static FilePath FromPlatformPath(string path)
Expand Down Expand Up @@ -150,42 +152,6 @@ public string HandleFileNameCollisions(string path)
return path;
}

/// <summary>
/// If the file name length is longer than 60, it is truncated and appended with a hash (to avoid collisions).
/// Checks the length of the escaped file name, since its possible that the length is under 60 before escaping but goes beyond 60 later.
/// We do modulo by 1000 of the hash to limit it to 3 characters.
/// </summary>
/// <returns></returns>
private string GetTruncatedFileNameIfTooLong(string fileName)
{
var newFileName = Utilities.EscapeFilename(fileName);
if (newFileName.Length > MaxFileNameLength)
{
var extension = GetCustomExtension(newFileName);

// limit the hash to 3 characters by doing a module by 1000
var hash = (GetHash(newFileName.Substring(0, newFileName.Length - extension.Length)) % 1000).ToString("x3");
newFileName = newFileName.Substring(0, MaxFileNameLength - extension.Length - hash.Length) + hash + extension;
}
return newFileName;
}

/// <summary>
/// djb2 algorithm to compute the hash of a string
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
private ulong GetHash(string str)
{
ulong hash = 5381;
foreach (char c in str)
{
hash = ((hash << 5) + hash) + c;
}

return hash;
}

private string GetCustomExtension(string fileName)
{
var extension = fileName.EndsWith(yamlExtension, StringComparison.OrdinalIgnoreCase)
Expand Down
38 changes: 38 additions & 0 deletions src/PAModel/Utility/Utilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ namespace Microsoft.PowerPlatform.Formulas.Tools
// Various utility methods.
internal static class Utilities
{
public const int MaxNameLength = 50;

// Allows using with { } initializers, which require an Add() method.
public static void Add<T>(this Stack<T> stack, T item)
{
Expand Down Expand Up @@ -358,5 +360,41 @@ public static string GetResourceRelativePath(ContentKind contentType)
throw new NotSupportedException("Unrecognized Content Kind for local resource");
}
}

/// <summary>
/// If the name length is longer than 50, it is truncated and appended with a hash (to avoid collisions).
/// Checks the length of the escaped name, since its possible that the length is under 60 before escaping but goes beyond 60 later.
/// We do modulo by 1000 of the hash to limit it to 3 characters.
/// </summary>
/// <param name="name">The name that needs to be truncated.</param>
/// <returns>Returns the truncated name if the escaped version of it is longer than 50 characters.</returns>
public static string TruncateNameIfTooLong(string name)
{
var escapedName = EscapeFilename(name);
if (escapedName.Length > MaxNameLength)
{
// limit the hash to 3 characters by doing a module by 4096 (16^3)
var hash = (GetHash(escapedName) % 4096).ToString("x3");
escapedName = escapedName.Substring(0, MaxNameLength - (hash.Length + 1)) + "_" + hash;
}
return escapedName;
}

/// <summary>
/// djb2 algorithm to compute the hash of a string
/// This must be determinstic and stable since it's used in file names
/// </summary>
/// <param name="str">The string for which we need to compute the hash.</param>
/// <returns></returns>
public static ulong GetHash(string str)
{
ulong hash = 5381;
foreach (char c in str)
{
hash = ((hash << 5) + hash) + c;
}

return hash;
}
}
}
49 changes: 26 additions & 23 deletions src/PAModelTests/ChecksumTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace PAModelTests
public class ChecksumTests
{
[DataTestMethod]
[DataRow("MyWeather.msapp", "C6_UGmkj/aVNihiaLzi2BGj+AOXj6fsO/1GGSkEz/rVgk8=", 11, "References\\DataSources.json", "C6_2dpVudcymwNaHoHtQugF1MSpzsY1I6syuPiB0B+jTYc=")]
[DataRow("MyWeather.msapp", "C7_ZXZwZAG3P0lmCkNAGjsIjYb503akWCyudsk8DEi2aX0=", 11, "References\\DataSources.json", "C7_2dpVudcymwNaHoHtQugF1MSpzsY1I6syuPiB0B+jTYc=")]
public void TestChecksum(string filename, string expectedChecksum, int expectedFileCount, string file, string innerExpectedChecksum)
{
var root = Path.Combine(Environment.CurrentDirectory, "Apps", filename);
Expand All @@ -27,6 +27,9 @@ public void TestChecksum(string filename, string expectedChecksum, int expectedF
Assert.AreEqual(expectedFileCount, actualChecksum.perFileChecksum.Count);
Assert.IsTrue(actualChecksum.perFileChecksum.TryGetValue(file, out var perFileChecksum));
Assert.AreEqual(innerExpectedChecksum, perFileChecksum);

// Test checksum version
Assert.AreEqual(ChecksumMaker.GetChecksumVersion(expectedChecksum), ChecksumMaker.Version);
}

[DataTestMethod]
Expand Down Expand Up @@ -58,28 +61,28 @@ public void ChecksumsUnique()
// Physically check in the actual checksum results.
// Checksum must be very stable since these get checked into all our customers now.
// Use const fields to help for cases when checksums should be the same.
const string C1 = "C6_VrBVZxNCyyq6nNHBFhEi1p8/+LEgauFoOxISpTMfidA=";
const string C2 = "C6_bSgalCJTSBUuXr/l9syvLFQtQveX9OeUkRFO+bXaxsY=";
const string C3 = "C6_pV4gc7whiqiJA6qNnhtYNgnoy0AV19//Bs/JF+bwvks=";
const string C4 = "C6_XRxPTS3gnYmfjZL2jlOD/3STXSC8VniAOx8bfu0p3Bc=";
const string C5 = "C6_6mPsJ1PnWkhrz7kOmeIdrtdFtkUH+WrnfOrR/YWwB4Q=";
const string C6 = "C6_JpWWJaI11Wjp10J8M8WQ/ZKTOLDqyuh2XpuhrnjNIZI=";
const string C7 = "C6_SsPqlOsAkyDo7QmBft/8uwvbbDAEpX0rfiYqNUZdC/s=";
const string C8 = "C6_8ZdpKBDUV+KX/OnFZTsCWB/5mlCFI3DynX5f5H2dN+Y=";
const string C9 = "C6_mBjUrHTZUVjb7YgLtnZAWXjp25eh8h/uN1rgzH0qTmY=";
const string C10 = "C6_B0Pu6r/4VrrelJ4VYXbhTNJQfG6zem/OvUxIEweFAa4=";
const string C11 = "C6_rNmFqjFuTa2nSPd+9J11Dkq2CE1d3UHrdRPbakKNsz8=";

const string C12 = "C6_xinyFpAi3E1fug553zcwOQphnA26HzffwtFHFYvwBIQ=";
const string C13 = "C6_tLvF8n0SCv0h+OVpNzeBfjvnMsfWN2SvOttGXgkzud8=";
const string C14 = "C6_But9amnuGeX733SQGNPSq/oEvL0TZdsxLrhtxxaTibg=";
const string C15 = "C6_mF50+afGew2Ea527fnP0EhsQSunAh8GbgaBbxpNPKpM=";
const string C16 = "C6_tpIqQWd4zNcNsjI8xQKjvoocOT+cACVDU9oEmpw/I7Y=";
const string C17 = "C6_nxln5Ugv128wodPRdYyn8U9mXYzeCvPefCJwkLyof8A=";

const string C18 = "C6_HyC9hHMBR+0EPsD6yYguZ+cYv2ovwlgEfh3iygNbWHM=";
const string C19 = "C6_f8iCQ1KTyoVxp/xpbpwNXvjEZgIeNIaTXdzTe0q26ps=";
const string C20 = "C6_2YUFpLVLEYtdFvV9iLN8F6TM+cWczemMx4m0VEIpfrg=";
const string C1 = "C7_VrBVZxNCyyq6nNHBFhEi1p8/+LEgauFoOxISpTMfidA=";
const string C2 = "C7_bSgalCJTSBUuXr/l9syvLFQtQveX9OeUkRFO+bXaxsY=";
const string C3 = "C7_pV4gc7whiqiJA6qNnhtYNgnoy0AV19//Bs/JF+bwvks=";
const string C4 = "C7_XRxPTS3gnYmfjZL2jlOD/3STXSC8VniAOx8bfu0p3Bc=";
const string C5 = "C7_6mPsJ1PnWkhrz7kOmeIdrtdFtkUH+WrnfOrR/YWwB4Q=";
const string C6 = "C7_JpWWJaI11Wjp10J8M8WQ/ZKTOLDqyuh2XpuhrnjNIZI=";
const string C7 = "C7_SsPqlOsAkyDo7QmBft/8uwvbbDAEpX0rfiYqNUZdC/s=";
const string C8 = "C7_8ZdpKBDUV+KX/OnFZTsCWB/5mlCFI3DynX5f5H2dN+Y=";
const string C9 = "C7_mBjUrHTZUVjb7YgLtnZAWXjp25eh8h/uN1rgzH0qTmY=";
const string C10 = "C7_B0Pu6r/4VrrelJ4VYXbhTNJQfG6zem/OvUxIEweFAa4=";
const string C11 = "C7_rNmFqjFuTa2nSPd+9J11Dkq2CE1d3UHrdRPbakKNsz8=";

const string C12 = "C7_xinyFpAi3E1fug553zcwOQphnA26HzffwtFHFYvwBIQ=";
const string C13 = "C7_tLvF8n0SCv0h+OVpNzeBfjvnMsfWN2SvOttGXgkzud8=";
const string C14 = "C7_But9amnuGeX733SQGNPSq/oEvL0TZdsxLrhtxxaTibg=";
const string C15 = "C7_mF50+afGew2Ea527fnP0EhsQSunAh8GbgaBbxpNPKpM=";
const string C16 = "C7_tpIqQWd4zNcNsjI8xQKjvoocOT+cACVDU9oEmpw/I7Y=";
const string C17 = "C7_nxln5Ugv128wodPRdYyn8U9mXYzeCvPefCJwkLyof8A=";

const string C18 = "C7_HyC9hHMBR+0EPsD6yYguZ+cYv2ovwlgEfh3iygNbWHM=";
const string C19 = "C7_f8iCQ1KTyoVxp/xpbpwNXvjEZgIeNIaTXdzTe0q26ps=";
const string C20 = "C7_2YUFpLVLEYtdFvV9iLN8F6TM+cWczemMx4m0VEIpfrg=";


[DataTestMethod]
Expand Down
27 changes: 0 additions & 27 deletions src/PAModelTests/FilePathTests.cs

This file was deleted.

Loading

0 comments on commit bc17b24

Please sign in to comment.