diff --git a/OpenXmlPowerTools.Tests/OpenXmlPowerTools.Tests.csproj b/OpenXmlPowerTools.Tests/OpenXmlPowerTools.Tests.csproj index 6bb94044..dca4c8f9 100644 --- a/OpenXmlPowerTools.Tests/OpenXmlPowerTools.Tests.csproj +++ b/OpenXmlPowerTools.Tests/OpenXmlPowerTools.Tests.csproj @@ -1,7 +1,7 @@  - net452;net461;netcoreapp2.0 + net452;net461;net6.0 true true true @@ -19,7 +19,7 @@ - + diff --git a/OpenXmlPowerTools/Comparer/ComparisonUnit.cs b/OpenXmlPowerTools/Comparer/ComparisonUnit.cs new file mode 100644 index 00000000..67a0836f --- /dev/null +++ b/OpenXmlPowerTools/Comparer/ComparisonUnit.cs @@ -0,0 +1,67 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace OpenXmlPowerTools +{ + public abstract class ComparisonUnit + { + private int? _descendantContentAtomsCount; + + public CorrelationStatus CorrelationStatus { get; set; } + + public List Contents { get; protected set; } + + public string SHA1Hash { get; protected set; } + + public int DescendantContentAtomsCount + { + get + { + if (_descendantContentAtomsCount != null) return (int) _descendantContentAtomsCount; + + _descendantContentAtomsCount = DescendantContentAtoms().Count(); + return (int) _descendantContentAtomsCount; + } + } + + private IEnumerable Descendants() + { + var comparisonUnitList = new List(); + DescendantsInternal(this, comparisonUnitList); + return comparisonUnitList; + } + + public IEnumerable DescendantContentAtoms() + { + return Descendants().OfType(); + } + + private static void DescendantsInternal( + ComparisonUnit comparisonUnit, + List comparisonUnitList) + { + foreach (ComparisonUnit cu in comparisonUnit.Contents) + { + comparisonUnitList.Add(cu); + if (cu.Contents != null && cu.Contents.Any()) + DescendantsInternal(cu, comparisonUnitList); + } + } + + public abstract string ToString(int indent); + + internal static string ComparisonUnitListToString(ComparisonUnit[] cul) + { + var sb = new StringBuilder(); + sb.Append("Dump Comparision Unit List To String" + Environment.NewLine); + foreach (ComparisonUnit item in cul) sb.Append(item.ToString(2) + Environment.NewLine); + + return sb.ToString(); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/ComparisonUnitAtom.cs b/OpenXmlPowerTools/Comparer/ComparisonUnitAtom.cs new file mode 100644 index 00000000..5046d4ad --- /dev/null +++ b/OpenXmlPowerTools/Comparer/ComparisonUnitAtom.cs @@ -0,0 +1,215 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public class ComparisonUnitAtom : ComparisonUnit + { + public ComparisonUnitAtom( + XElement contentElement, + XElement[] ancestorElements, + OpenXmlPart part, + WmlComparerSettings settings) + { + ContentElement = contentElement; + AncestorElements = ancestorElements; + Part = part; + RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements); + + if (RevTrackElement == null) + { + CorrelationStatus = CorrelationStatus.Equal; + } + else + { + if (RevTrackElement.Name == W.del) + { + CorrelationStatus = CorrelationStatus.Deleted; + } + else if (RevTrackElement.Name == W.ins) + { + CorrelationStatus = CorrelationStatus.Inserted; + } + } + + var sha1Hash = (string) contentElement.Attribute(PtOpenXml.SHA1Hash); + if (sha1Hash != null) + { + SHA1Hash = sha1Hash; + } + else + { + string shaHashString = GetSha1HashStringForElement(ContentElement, settings); + SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString); + } + } + + // AncestorElements are kept in order from the body to the leaf, because this is the order in which we need to access in order + // to reassemble the document. However, in many places in the code, it is necessary to find the nearest ancestor, i.e. cell + // so it is necessary to reverse the order when looking for it, i.e. look from the leaf back to the body element. + + public XElement[] AncestorElements { get; } + + public XElement ContentElement { get; } + + public XElement RevTrackElement { get; } + + public string[] AncestorUnids { get; set; } + + public ComparisonUnitAtom ComparisonUnitAtomBefore { get; set; } + + public XElement ContentElementBefore { get; set; } + + public OpenXmlPart Part { get; } + + private static string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings) + { + string text = contentElement.Value; + if (settings.CaseInsensitive) + { + text = text.ToUpper(settings.CultureInfo); + } + + return contentElement.Name.LocalName + text; + } + + private static XElement GetRevisionTrackingElementFromAncestors( + XElement contentElement, + IEnumerable ancestors) + { + return contentElement.Name == W.pPr + ? contentElement.Elements(W.rPr).Elements().FirstOrDefault(e => e.Name == W.del || e.Name == W.ins) + : ancestors.FirstOrDefault(a => a.Name == W.del || a.Name == W.ins); + } + + public override string ToString() + { + return ToString(0); + } + + public override string ToString(int indent) + { + const int xNamePad = 16; + string indentString = "".PadRight(indent); + + var sb = new StringBuilder(); + sb.Append(indentString); + + var correlationStatus = ""; + if (CorrelationStatus != CorrelationStatus.Nil) + { + correlationStatus = $"[{CorrelationStatus.ToString().PadRight(8)}] "; + } + + if (ContentElement.Name == W.t || ContentElement.Name == W.delText) + { + sb.AppendFormat( + "Atom {0}: {1} {2} SHA1:{3} ", + PadLocalName(xNamePad, this), + ContentElement.Value, + correlationStatus, + SHA1Hash.Substring(0, 8)); + + AppendAncestorsDump(sb, this); + } + else + { + sb.AppendFormat( + "Atom {0}: {1} SHA1:{2} ", + PadLocalName(xNamePad, this), + correlationStatus, + SHA1Hash.Substring(0, 8)); + + AppendAncestorsDump(sb, this); + } + + return sb.ToString(); + } + + public string ToStringAncestorUnids() + { + return ToStringAncestorUnids(0); + } + + private string ToStringAncestorUnids(int indent) + { + const int xNamePad = 16; + string indentString = "".PadRight(indent); + + var sb = new StringBuilder(); + sb.Append(indentString); + + var correlationStatus = ""; + if (CorrelationStatus != CorrelationStatus.Nil) + { + correlationStatus = $"[{CorrelationStatus.ToString().PadRight(8)}] "; + } + + if (ContentElement.Name == W.t || ContentElement.Name == W.delText) + { + sb.AppendFormat( + "Atom {0}: {1} {2} SHA1:{3} ", + PadLocalName(xNamePad, this), + ContentElement.Value, + correlationStatus, + SHA1Hash.Substring(0, 8)); + + AppendAncestorsUnidsDump(sb, this); + } + else + { + sb.AppendFormat( + "Atom {0}: {1} SHA1:{2} ", + PadLocalName(xNamePad, this), + correlationStatus, + SHA1Hash.Substring(0, 8)); + + AppendAncestorsUnidsDump(sb, this); + } + + return sb.ToString(); + } + + private static string PadLocalName(int xNamePad, ComparisonUnitAtom item) + { + return (item.ContentElement.Name.LocalName + " ").PadRight(xNamePad, '-') + " "; + } + + private static void AppendAncestorsDump(StringBuilder sb, ComparisonUnitAtom sr) + { + string s = sr + .AncestorElements.Select(p => p.Name.LocalName + GetUnid(p) + "/") + .StringConcatenate() + .TrimEnd('/'); + + sb.Append("Ancestors:" + s); + } + + private static void AppendAncestorsUnidsDump(StringBuilder sb, ComparisonUnitAtom sr) + { + var zipped = sr.AncestorElements.Zip(sr.AncestorUnids, (a, u) => new + { + AncestorElement = a, + AncestorUnid = u + }); + + string s = zipped + .Select(p => p.AncestorElement.Name.LocalName + "[" + p.AncestorUnid.Substring(0, 8) + "]/") + .StringConcatenate().TrimEnd('/'); + + sb.Append("Ancestors:" + s); + } + + private static string GetUnid(XElement p) + { + var unid = (string) p.Attribute(PtOpenXml.Unid); + return unid == null ? "" : "[" + unid.Substring(0, 8) + "]"; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/ComparisonUnitGroup.cs b/OpenXmlPowerTools/Comparer/ComparisonUnitGroup.cs new file mode 100644 index 00000000..360f61f3 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/ComparisonUnitGroup.cs @@ -0,0 +1,77 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + internal class ComparisonUnitGroup : ComparisonUnit + { + public ComparisonUnitGroup( + IEnumerable comparisonUnitList, + ComparisonUnitGroupType groupType, + int level) + { + Contents = comparisonUnitList.ToList(); + ComparisonUnitGroupType = groupType; + ComparisonUnit first = Contents.First(); + ComparisonUnitAtom comparisonUnitAtom = GetFirstComparisonUnitAtomOfGroup(first); + + XElement[] ancestorsToLookAt = comparisonUnitAtom + .AncestorElements + .Where(e => e.Name == W.tbl || e.Name == W.tr || e.Name == W.tc || e.Name == W.p || e.Name == W.txbxContent) + .ToArray(); + + XElement ancestor = ancestorsToLookAt[level]; + if (ancestor == null) throw new OpenXmlPowerToolsException("Internal error: ComparisonUnitGroup"); + + SHA1Hash = (string) ancestor.Attribute(PtOpenXml.SHA1Hash); + CorrelatedSHA1Hash = (string) ancestor.Attribute(PtOpenXml.CorrelatedSHA1Hash); + StructureSHA1Hash = (string) ancestor.Attribute(PtOpenXml.StructureSHA1Hash); + } + + public ComparisonUnitGroupType ComparisonUnitGroupType { get; } + + public string CorrelatedSHA1Hash { get; } + + public string StructureSHA1Hash { get; } + + private static ComparisonUnitAtom GetFirstComparisonUnitAtomOfGroup(ComparisonUnit group) + { + ComparisonUnit thisGroup = group; + while (true) + { + if (thisGroup is ComparisonUnitGroup tg) + { + thisGroup = tg.Contents.First(); + continue; + } + + if (!(thisGroup is ComparisonUnitWord tw)) + { + throw new OpenXmlPowerToolsException("Internal error: GetFirstComparisonUnitAtomOfGroup"); + } + + var ca = (ComparisonUnitAtom) tw.Contents.First(); + return ca; + } + } + + public override string ToString(int indent) + { + var sb = new StringBuilder(); + sb.Append("".PadRight(indent) + "Group Type: " + ComparisonUnitGroupType + " SHA1:" + SHA1Hash + Environment.NewLine); + + foreach (ComparisonUnit comparisonUnitAtom in Contents) + { + sb.Append(comparisonUnitAtom.ToString(indent + 2)); + } + + return sb.ToString(); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/ComparisonUnitGroupType.cs b/OpenXmlPowerTools/Comparer/ComparisonUnitGroupType.cs new file mode 100644 index 00000000..8315264c --- /dev/null +++ b/OpenXmlPowerTools/Comparer/ComparisonUnitGroupType.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace OpenXmlPowerTools +{ + internal enum ComparisonUnitGroupType + { + Paragraph, + Table, + Row, + Cell, + Textbox, + }; +} diff --git a/OpenXmlPowerTools/Comparer/ComparisonUnitWord.cs b/OpenXmlPowerTools/Comparer/ComparisonUnitWord.cs new file mode 100644 index 00000000..ed0dd38e --- /dev/null +++ b/OpenXmlPowerTools/Comparer/ComparisonUnitWord.cs @@ -0,0 +1,81 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + internal class ComparisonUnitWord : ComparisonUnit + { + public static readonly XName[] ElementsWithRelationshipIds = + { + A.blip, + A.hlinkClick, + A.relIds, + C.chart, + C.externalData, + C.userShapes, + DGM.relIds, + O.OLEObject, + VML.fill, + VML.imagedata, + VML.stroke, + W.altChunk, + W.attachedTemplate, + W.control, + W.dataSource, + W.embedBold, + W.embedBoldItalic, + W.embedItalic, + W.embedRegular, + W.footerReference, + W.headerReference, + W.headerSource, + W.hyperlink, + W.printerSettings, + W.recipientData, + W.saveThroughXslt, + W.sourceFileName, + W.src, + W.subDoc, + WNE.toolbarData + }; + + public static readonly XName[] RelationshipAttributeNames = + { + R.embed, + R.link, + R.id, + R.cs, + R.dm, + R.lo, + R.qs, + R.href, + R.pict + }; + + public ComparisonUnitWord(IEnumerable comparisonUnitAtomList) + { + Contents = comparisonUnitAtomList.OfType().ToList(); + string sha1String = Contents.Select(c => c.SHA1Hash).StringConcatenate(); + SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(sha1String); + } + + public override string ToString(int indent) + { + var sb = new StringBuilder(); + sb.Append("".PadRight(indent) + "Word SHA1:" + SHA1Hash.Substring(0, 8) + Environment.NewLine); + + foreach (ComparisonUnit comparisonUnitAtom in Contents) + { + sb.Append(comparisonUnitAtom.ToString(indent + 2) + Environment.NewLine); + } + + return sb.ToString(); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/CorrelatedSequence.cs b/OpenXmlPowerTools/Comparer/CorrelatedSequence.cs new file mode 100644 index 00000000..0e9d4057 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/CorrelatedSequence.cs @@ -0,0 +1,70 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Text; + +namespace OpenXmlPowerTools +{ + internal class CorrelatedSequence + { +#if DEBUG + public string SourceFile; + public int SourceLine; +#endif + + public CorrelatedSequence() + { +#if DEBUG + SourceFile = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileName(); + SourceLine = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileLineNumber(); +#endif + } + + public CorrelationStatus CorrelationStatus { get; set; } + + // if ComparisonUnitList1 == null and ComparisonUnitList2 contains sequence, then inserted content. + // if ComparisonUnitList2 == null and ComparisonUnitList1 contains sequence, then deleted content. + // if ComparisonUnitList2 contains sequence and ComparisonUnitList1 contains sequence, then either is Unknown or Equal. + public ComparisonUnit[] ComparisonUnitArray1 { get; set; } + + public ComparisonUnit[] ComparisonUnitArray2 { get; set; } + + public override string ToString() + { + var sb = new StringBuilder(); + const string indentString = " "; + const string indentString4 = " "; + sb.Append("CorrelatedSequence =====" + Environment.NewLine); +#if DEBUG + sb.Append(indentString + "Created at Line: " + SourceLine + Environment.NewLine); +#endif + sb.Append(indentString + "CorrelatedItem =====" + Environment.NewLine); + sb.Append(indentString4 + "CorrelationStatus: " + CorrelationStatus + Environment.NewLine); + if (CorrelationStatus == CorrelationStatus.Equal) + { + sb.Append(indentString4 + "ComparisonUnitList =====" + Environment.NewLine); + foreach (ComparisonUnit item in ComparisonUnitArray2) + sb.Append(item.ToString(6) + Environment.NewLine); + } + else + { + if (ComparisonUnitArray1 != null) + { + sb.Append(indentString4 + "ComparisonUnitList1 =====" + Environment.NewLine); + foreach (ComparisonUnit item in ComparisonUnitArray1) + sb.Append(item.ToString(6) + Environment.NewLine); + } + + if (ComparisonUnitArray2 != null) + { + sb.Append(indentString4 + "ComparisonUnitList2 =====" + Environment.NewLine); + foreach (ComparisonUnit item in ComparisonUnitArray2) + sb.Append(item.ToString(6) + Environment.NewLine); + } + } + + return sb.ToString(); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/CorrelationStatus.cs b/OpenXmlPowerTools/Comparer/CorrelationStatus.cs new file mode 100644 index 00000000..f3448dda --- /dev/null +++ b/OpenXmlPowerTools/Comparer/CorrelationStatus.cs @@ -0,0 +1,16 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace OpenXmlPowerTools +{ + public enum CorrelationStatus + { + Nil, + Normal, + Unknown, + Inserted, + Deleted, + Equal, + Group, + } +} diff --git a/OpenXmlPowerTools/Comparer/PartSHA1HashAnnotation.cs b/OpenXmlPowerTools/Comparer/PartSHA1HashAnnotation.cs new file mode 100644 index 00000000..b127f86a --- /dev/null +++ b/OpenXmlPowerTools/Comparer/PartSHA1HashAnnotation.cs @@ -0,0 +1,15 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace OpenXmlPowerTools +{ + internal class PartSHA1HashAnnotation + { + public string Hash; + + public PartSHA1HashAnnotation(string hash) + { + Hash = hash; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WithHierarchicalGroupingKey.cs b/OpenXmlPowerTools/Comparer/WithHierarchicalGroupingKey.cs new file mode 100644 index 00000000..0e71efcb --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WithHierarchicalGroupingKey.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace OpenXmlPowerTools +{ + internal class WithHierarchicalGroupingKey + { + public string[] HierarchicalGroupingArray { get; set; } + + public ComparisonUnitWord ComparisonUnitWord { get; set; } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Internal.Methods.ComparisonUnits.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Internal.Methods.ComparisonUnits.cs new file mode 100644 index 00000000..e35729e1 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Internal.Methods.ComparisonUnits.cs @@ -0,0 +1,401 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + #region CreateComparisonUnitAtomList + + internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList( + OpenXmlPart part, + XElement contentParent, + WmlComparerSettings settings) + { + VerifyNoInvalidContent(contentParent); + AssignUnidToAllElements(contentParent); // add the Guid id to every element + MoveLastSectPrIntoLastParagraph(contentParent); + ComparisonUnitAtom[] cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in cal) + sb.Append(item + Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return cal; + } + + private static void VerifyNoInvalidContent(XElement contentParent) + { + XElement invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name)); + if (invalidElement == null) + return; + + throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName); + } + + private static void MoveLastSectPrIntoLastParagraph(XElement contentParent) + { + List lastSectPrList = contentParent.Elements(W.sectPr).ToList(); + if (lastSectPrList.Count() > 1) + throw new OpenXmlPowerToolsException("Invalid document"); + + XElement lastSectPr = lastSectPrList.FirstOrDefault(); + if (lastSectPr != null) + { + XElement lastParagraph = contentParent.Elements(W.p).LastOrDefault(); + if (lastParagraph == null) + throw new OpenXmlPowerToolsException("Invalid document"); + + XElement pPr = lastParagraph.Element(W.pPr); + if (pPr == null) + { + pPr = new XElement(W.pPr); + lastParagraph.AddFirst(W.pPr); + } + + pPr.Add(lastSectPr); + contentParent.Elements(W.sectPr).Remove(); + } + } + + private static List CreateComparisonUnitAtomListInternal( + OpenXmlPart part, + XElement contentParent, + WmlComparerSettings settings) + { + var comparisonUnitAtomList = new List(); + CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings); + return comparisonUnitAtomList; + } + + private static void CreateComparisonUnitAtomListRecurse( + OpenXmlPart part, + XElement element, + List comparisonUnitAtomList, + WmlComparerSettings settings) + { + if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote) + { + foreach (XElement item in element.Elements()) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + return; + } + + if (element.Name == W.p) + { + IEnumerable paraChildrenToProcess = element + .Elements() + .Where(e => e.Name != W.pPr); + foreach (XElement item in paraChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + XElement paraProps = element.Element(W.pPr); + if (paraProps == null) + { + var pPrComparisonUnitAtom = new ComparisonUnitAtom( + new XElement(W.pPr), + element.AncestorsAndSelf() + .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() + .ToArray(), + part, + settings); + comparisonUnitAtomList.Add(pPrComparisonUnitAtom); + } + else + { + var pPrComparisonUnitAtom = new ComparisonUnitAtom( + paraProps, + element.AncestorsAndSelf() + .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() + .ToArray(), + part, + settings); + comparisonUnitAtomList.Add(pPrComparisonUnitAtom); + } + + return; + } + + if (element.Name == W.r) + { + IEnumerable runChildrenToProcess = element + .Elements() + .Where(e => e.Name != W.rPr); + foreach (XElement item in runChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + return; + } + + if (element.Name == W.t || element.Name == W.delText) + { + string val = element.Value; + foreach (char ch in val) + { + var sr = new ComparisonUnitAtom( + new XElement(element.Name, ch), + element.AncestorsAndSelf() + .TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse() + .ToArray(), + part, + settings); + comparisonUnitAtomList.Add(sr); + } + + return; + } + + if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object) + { + var sr3 = new ComparisonUnitAtom( + element, + element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes) + .Reverse().ToArray(), + part, + settings); + comparisonUnitAtomList.Add(sr3); + return; + } + + RecursionInfo re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name); + if (re != null) + { + AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings); + return; + } + + if (ElementsToThrowAway.Contains(element.Name)) + return; + + AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings); + } + + private static void AnnotateElementWithProps( + OpenXmlPart part, + XElement element, + List comparisonUnitAtomList, + XName[] childElementPropertyNames, + WmlComparerSettings settings) + { + IEnumerable runChildrenToProcess; + if (childElementPropertyNames == null) + runChildrenToProcess = element.Elements(); + else + runChildrenToProcess = element + .Elements() + .Where(e => !childElementPropertyNames.Contains(e.Name)); + + foreach (XElement item in runChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + } + + #endregion CreateComparisonUnitAtomList + + #region GetComparisonUnitList + + // The following method must be made internal if we ever turn this part of the partial class + // into its own class. + private static ComparisonUnit[] GetComparisonUnitList( + ComparisonUnitAtom[] comparisonUnitAtomList, + WmlComparerSettings settings) + { + var seed = new Atgbw + { + Key = null, + ComparisonUnitAtomMember = null, + NextIndex = 0 + }; + + IEnumerable groupingKey = comparisonUnitAtomList + .Rollup(seed, (sr, prevAtgbw, i) => + { + int? key; + int nextIndex = prevAtgbw.NextIndex; + if (sr.ContentElement.Name == W.t) + { + string chr = sr.ContentElement.Value; + char ch = chr[0]; + if (ch == '.' || ch == ',') + { + var beforeIsDigit = false; + if (i > 0) + { + ComparisonUnitAtom prev = comparisonUnitAtomList[i - 1]; + if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0])) + beforeIsDigit = true; + } + + var afterIsDigit = false; + if (i < comparisonUnitAtomList.Length - 1) + { + ComparisonUnitAtom next = comparisonUnitAtomList[i + 1]; + if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0])) + afterIsDigit = true; + } + + if (beforeIsDigit || afterIsDigit) + { + key = nextIndex; + } + else + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + } + else if (settings.WordSeparators.Contains(ch)) + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + else + { + key = nextIndex; + } + } + else if (WordBreakElements.Contains(sr.ContentElement.Name)) + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + else + { + key = nextIndex; + } + + return new Atgbw + { + Key = key, + ComparisonUnitAtomMember = sr, + NextIndex = nextIndex + }; + }) + .ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (Atgbw item in groupingKey) + { + sb.Append(item.Key + Environment.NewLine); + sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + IEnumerable> groupedByWords = groupingKey + .GroupAdjacent(gc => gc.Key) + .ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (IGrouping group in groupedByWords) + { + sb.Append("Group ===== " + @group.Key + Environment.NewLine); + foreach (Atgbw gc in @group) + { + sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); + } + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + WithHierarchicalGroupingKey[] withHierarchicalGroupingKey = groupedByWords + .Select(g => + { + string[] hierarchicalGroupingArray = g + .First() + .ComparisonUnitAtomMember + .AncestorElements + .Where(a => ComparisonGroupingElements.Contains(a.Name)) + .Select(a => a.Name.LocalName + ":" + (string) a.Attribute(PtOpenXml.Unid)) + .ToArray(); + + return new WithHierarchicalGroupingKey + { + ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)), + HierarchicalGroupingArray = hierarchicalGroupingArray + }; + } + ) + .ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (WithHierarchicalGroupingKey group in withHierarchicalGroupingKey) + { + sb.Append("Grouping Array: " + + @group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() + + Environment.NewLine); + foreach (ComparisonUnit gc in @group.ComparisonUnitWord.Contents) + { + sb.Append(" " + gc.ToString(0) + Environment.NewLine); + } + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + ComparisonUnit[] cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray(); + + if (False) + { + string str = ComparisonUnit.ComparisonUnitListToString(cul); + TestUtil.NotePad(str); + } + + return cul; + } + + private static IEnumerable GetHierarchicalComparisonUnits( + IEnumerable input, + int level) + { + IEnumerable> grouped = input + .GroupAdjacent( + whgk => level >= whgk.HierarchicalGroupingArray.Length ? "" : whgk.HierarchicalGroupingArray[level]); + + List retList = grouped + .Select(gc => + { + if (gc.Key == "") + { + return (IEnumerable) gc.Select(whgk => whgk.ComparisonUnitWord).ToList(); + } + + string[] spl = gc.Key.Split(':'); + ComparisonUnitGroupType groupType = WmlComparerUtil.ComparisonUnitGroupTypeFromLocalName(spl[0]); + IEnumerable childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1); + var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, groupType, level); + + return new[] { newCompUnitGroup }; + }) + .SelectMany(m => m) + .ToList(); + + return retList; + } + + #endregion GetComparisonUnitList + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Fields.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Fields.cs new file mode 100644 index 00000000..257ba2c8 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Fields.cs @@ -0,0 +1,276 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { +#pragma warning disable 414 + private static readonly bool False = false; + private static readonly bool True = true; + private static readonly bool SaveIntermediateFilesForDebugging = false; +#pragma warning restore 414 + + private static readonly string NewLine = Environment.NewLine; + + private static readonly XAttribute[] NamespaceAttributes = + { + new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc), + new XAttribute(XNamespace.Xmlns + "mc", MC.mc), + new XAttribute(XNamespace.Xmlns + "o", O.o), + new XAttribute(XNamespace.Xmlns + "r", R.r), + new XAttribute(XNamespace.Xmlns + "m", M.m), + new XAttribute(XNamespace.Xmlns + "v", VML.vml), + new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14), + new XAttribute(XNamespace.Xmlns + "wp", WP.wp), + new XAttribute(XNamespace.Xmlns + "w10", W10.w10), + new XAttribute(XNamespace.Xmlns + "w", W.w), + new XAttribute(XNamespace.Xmlns + "w14", W14.w14), + new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg), + new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi), + new XAttribute(XNamespace.Xmlns + "wne", WNE.wne), + new XAttribute(XNamespace.Xmlns + "wps", WPS.wps), + new XAttribute(MC.Ignorable, "w14 wp14") + }; + + private static readonly XName[] RevElementsWithNoText = + { + M.oMath, + M.oMathPara, + W.drawing + }; + + private static readonly XName[] AttributesToTrimWhenCloning = + { + WP14.anchorId, + WP14.editId, + "ObjectID", + "ShapeID", + "id", + "type" + }; + + private static int _maxId; + + private static readonly XName[] WordBreakElements = + { + W.pPr, + W.tab, + W.br, + W.continuationSeparator, + W.cr, + W.dayLong, + W.dayShort, + W.drawing, + W.pict, + W.endnoteRef, + W.footnoteRef, + W.monthLong, + W.monthShort, + W.noBreakHyphen, + W._object, + W.ptab, + W.separator, + W.sym, + W.yearLong, + W.yearShort, + M.oMathPara, + M.oMath, + W.footnoteReference, + W.endnoteReference + }; + + private static readonly XName[] AllowableRunChildren = + { + W.br, + W.drawing, + W.cr, + W.dayLong, + W.dayShort, + W.footnoteReference, + W.endnoteReference, + W.monthLong, + W.monthShort, + W.noBreakHyphen, + + //W._object, + W.pgNum, + W.ptab, + W.softHyphen, + W.sym, + W.tab, + W.yearLong, + W.yearShort, + M.oMathPara, + M.oMath, + W.fldChar, + W.instrText + }; + + private static readonly XName[] ElementsToThrowAway = + { + W.bookmarkStart, + W.bookmarkEnd, + W.commentRangeStart, + W.commentRangeEnd, + W.lastRenderedPageBreak, + W.proofErr, + W.tblPr, + W.sectPr, + W.permEnd, + W.permStart, + W.footnoteRef, + W.endnoteRef, + W.separator, + W.continuationSeparator + }; + + private static readonly XName[] ElementsToHaveSha1Hash = + { + W.p, + W.tbl, + W.tr, + W.tc, + W.drawing, + W.pict, + W.txbxContent + }; + + private static readonly XName[] InvalidElements = + { + W.altChunk, + W.customXml, + W.customXmlDelRangeEnd, + W.customXmlDelRangeStart, + W.customXmlInsRangeEnd, + W.customXmlInsRangeStart, + W.customXmlMoveFromRangeEnd, + W.customXmlMoveFromRangeStart, + W.customXmlMoveToRangeEnd, + W.customXmlMoveToRangeStart, + W.moveFrom, + W.moveFromRangeStart, + W.moveFromRangeEnd, + W.moveTo, + W.moveToRangeStart, + W.moveToRangeEnd, + W.subDoc + }; + + private static readonly RecursionInfo[] RecursionElements = + { + new RecursionInfo + { + ElementName = W.del, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.ins, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.tbl, + ChildElementPropertyNames = new[] { W.tblPr, W.tblGrid, W.tblPrEx } + }, + new RecursionInfo + { + ElementName = W.tr, + ChildElementPropertyNames = new[] { W.trPr, W.tblPrEx } + }, + new RecursionInfo + { + ElementName = W.tc, + ChildElementPropertyNames = new[] { W.tcPr, W.tblPrEx } + }, + new RecursionInfo + { + ElementName = W.pict, + ChildElementPropertyNames = new[] { VML.shapetype } + }, + new RecursionInfo + { + ElementName = VML.group, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = VML.shape, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = VML.rect, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = VML.textbox, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = O._lock, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.txbxContent, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W10.wrap, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.sdt, + ChildElementPropertyNames = new[] { W.sdtPr, W.sdtEndPr } + }, + new RecursionInfo + { + ElementName = W.sdtContent, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.hyperlink, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.fldSimple, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = VML.shapetype, + ChildElementPropertyNames = null + }, + new RecursionInfo + { + ElementName = W.smartTag, + ChildElementPropertyNames = new[] { W.smartTagPr } + }, + new RecursionInfo + { + ElementName = W.ruby, + ChildElementPropertyNames = new[] { W.rubyPr } + } + }; + + private static readonly XName[] ComparisonGroupingElements = + { + W.p, + W.tbl, + W.tr, + W.tc, + W.txbxContent + }; + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Hashing.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Hashing.cs new file mode 100644 index 00000000..14129746 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Hashing.cs @@ -0,0 +1,328 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private static WmlDocument HashBlockLevelContent( + WmlDocument source, + WmlDocument sourceAfterProc, + WmlComparerSettings settings) + { + using (var msSource = new MemoryStream()) + using (var msAfterProc = new MemoryStream()) + { + msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + msAfterProc.Write(sourceAfterProc.DocumentByteArray, 0, sourceAfterProc.DocumentByteArray.Length); + + using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true)) + using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true)) + { + // create Unid dictionary for source + XDocument sourceMainXDoc = wDocSource.MainDocumentPart.GetXDocument(); + XElement sourceMainRoot = sourceMainXDoc.Root ?? throw new ArgumentException(); + Dictionary sourceUnidDict = sourceMainRoot + .Descendants() + .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr) + .ToDictionary(d => (string) d.Attribute(PtOpenXml.Unid)); + + XDocument afterProcMainXDoc = wDocAfterProc.MainDocumentPart.GetXDocument(); + XElement afterProcMainRoot = afterProcMainXDoc.Root ?? throw new ArgumentException(); + IEnumerable blockLevelElements = afterProcMainRoot + .Descendants() + .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr); + + foreach (XElement blockLevelContent in blockLevelElements) + { + var cloneBlockLevelContentForHashing = (XElement) CloneBlockLevelContentForHashing( + wDocAfterProc.MainDocumentPart, + blockLevelContent, + true, + settings); + + string shaString = cloneBlockLevelContentForHashing + .ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + + string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + var thisUnid = (string) blockLevelContent.Attribute(PtOpenXml.Unid); + if (thisUnid != null) + { + if (sourceUnidDict.ContainsKey(thisUnid)) + { + XElement correlatedBlockLevelContent = sourceUnidDict[thisUnid]; + correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash)); + } + } + } + + wDocSource.MainDocumentPart.PutXDocument(); + } + + var sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray()); + return sourceWithCorrelatedSHA1Hash; + } + } + + // prohibit + // - altChunk + // - subDoc + // - contentPart + + // This strips all text nodes from the XML tree, thereby leaving only the structure. + + private static object CloneBlockLevelContentForHashing( + OpenXmlPart mainDocumentPart, + XNode node, + bool includeRelatedParts, + WmlComparerSettings settings) + { + if (node is XElement element) + { + if (element.Name == W.bookmarkStart || + element.Name == W.bookmarkEnd || + element.Name == W.pPr || + element.Name == W.rPr) + { + return null; + } + + if (element.Name == W.p) + { + var clonedPara = new XElement(element.Name, + element.Attributes().Where(a => a.Name != W.rsid && + a.Name != W.rsidDel && + a.Name != W.rsidP && + a.Name != W.rsidR && + a.Name != W.rsidRDefault && + a.Name != W.rsidRPr && + a.Name != W.rsidSect && + a.Name != W.rsidTr && + a.Name.Namespace != PtOpenXml.pt), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + + IEnumerable> groupedRuns = clonedPara + .Elements() + .GroupAdjacent(e => e.Name == W.r && + e.Elements().Count() == 1 && + e.Element(W.t) != null); + + var clonedParaWithGroupedRuns = new XElement(element.Name, + groupedRuns.Select(g => + { + if (g.Key) + { + string text = g.Select(t => t.Value).StringConcatenate(); + if (settings.CaseInsensitive) + text = text.ToUpper(settings.CultureInfo); + var newRun = (object) new XElement(W.r, + new XElement(W.t, + text)); + return newRun; + } + + return g; + })); + + return clonedParaWithGroupedRuns; + } + + if (element.Name == W.r) + { + IEnumerable clonedRuns = element + .Elements() + .Where(e => e.Name != W.rPr) + .Select(rc => new XElement(W.r, + CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings))); + return clonedRuns; + } + + if (element.Name == W.tbl) + { + var clonedTable = new XElement(W.tbl, + element.Elements(W.tr).Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedTable; + } + + if (element.Name == W.tr) + { + var clonedRow = new XElement(W.tr, + element.Elements(W.tc).Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedRow; + } + + if (element.Name == W.tc) + { + var clonedCell = new XElement(W.tc, + element.Elements().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedCell; + } + + if (element.Name == W.tcPr) + { + var clonedCellProps = new XElement(W.tcPr, + element.Elements(W.gridSpan).Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedCellProps; + } + + if (element.Name == W.gridSpan) + { + var clonedGridSpan = new XElement(W.gridSpan, + new XAttribute("val", (string) element.Attribute(W.val))); + return clonedGridSpan; + } + + if (element.Name == W.txbxContent) + { + var clonedTextbox = new XElement(W.txbxContent, + element.Elements().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedTextbox; + } + + if (includeRelatedParts) + { + if (ComparisonUnitWord.ElementsWithRelationshipIds.Contains(element.Name)) + { + var newElement = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)) + .Select(a => + { + if (!ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name)) + return a; + + var rId = (string) a; + + // could be an hyperlink relationship + try + { + OpenXmlPart oxp = mainDocumentPart.GetPartById(rId); + if (oxp == null) + throw new FileFormatException("Invalid WordprocessingML Document"); + + var anno = oxp.Annotation(); + if (anno != null) + return new XAttribute(a.Name, anno.Hash); + + if (!oxp.ContentType.EndsWith("xml")) + { + using (Stream str = oxp.GetStream()) + { + byte[] ba; + using (var br = new BinaryReader(str)) + { + ba = br.ReadBytes((int) str.Length); + } + + string sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba); + oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1)); + return new XAttribute(a.Name, sha1); + } + } + } + catch (ArgumentOutOfRangeException) + { + HyperlinkRelationship hr = + mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId); + if (hr != null) + { + string str = hr.Uri.ToString(); + return new XAttribute(a.Name, str); + } + + // could be an external relationship + ExternalRelationship er = + mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId); + if (er != null) + { + string str = er.Uri.ToString(); + return new XAttribute(a.Name, str); + } + + return new XAttribute(a.Name, "NULL Relationship"); + } + + return null; + }), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return newElement; + } + } + + if (element.Name == VML.shape) + { + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + + if (element.Name == O.OLEObject) + { + var o = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => a.Name != "ObjectID" && a.Name != R.id), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return o; + } + + if (element.Name == W._object) + { + var o = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return o; + } + + if (element.Name == WP.docPr) + { + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)), + element.Nodes().Select(n => + CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + + if (settings.CaseInsensitive) + { + if (node is XText xt) + { + string newText = xt.Value.ToUpper(settings.CultureInfo); + return new XText(newText); + } + } + + return node; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Lcs.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Lcs.cs new file mode 100644 index 00000000..61482068 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Lcs.cs @@ -0,0 +1,1326 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private static List DoLcsAlgorithm(CorrelatedSequence unknown, WmlComparerSettings settings) + { + var newListOfCorrelatedSequence = new List(); + + ComparisonUnit[] cul1 = unknown.ComparisonUnitArray1; + ComparisonUnit[] cul2 = unknown.ComparisonUnitArray2; + + // first thing to do - if we have an unknown with zero length on left or right side, create appropriate + // this is a code optimization that enables easier processing of cases elsewhere. + if (cul1.Length > 0 && cul2.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = cul1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + return newListOfCorrelatedSequence; + } + + if (cul1.Length == 0 && cul2.Length > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = cul2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + return newListOfCorrelatedSequence; + } + + if (cul1.Length == 0 && cul2.Length == 0) + { + // this will effectively remove the unknown with no data on either side from the current data model. + return newListOfCorrelatedSequence; + } + + var currentLongestCommonSequenceLength = 0; + int currentI1 = -1; + int currentI2 = -1; + for (var i1 = 0; i1 < cul1.Length - currentLongestCommonSequenceLength; i1++) + { + for (var i2 = 0; i2 < cul2.Length - currentLongestCommonSequenceLength; i2++) + { + var thisSequenceLength = 0; + int thisI1 = i1; + int thisI2 = i2; + + while (true) + { + if (cul1[thisI1].SHA1Hash == cul2[thisI2].SHA1Hash) + { + thisI1++; + thisI2++; + thisSequenceLength++; + + if (thisI1 == cul1.Length || thisI2 == cul2.Length) + { + if (thisSequenceLength > currentLongestCommonSequenceLength) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentI1 = i1; + currentI2 = i2; + } + + break; + } + } + else + { + if (thisSequenceLength > currentLongestCommonSequenceLength) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentI1 = i1; + currentI2 = i2; + } + + break; + } + } + } + } + + // never start a common section with a paragraph mark. + while (true) + { + if (currentLongestCommonSequenceLength <= 1) + break; + + ComparisonUnit firstCommon = cul1[currentI1]; + + if (!(firstCommon is ComparisonUnitWord firstCommonWord)) + break; + + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count != 1) + break; + + if (!(firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom)) + break; + + if (firstCommonAtom.ContentElement.Name != W.pPr) + break; + + --currentLongestCommonSequenceLength; + if (currentLongestCommonSequenceLength == 0) + { + currentI1 = -1; + currentI2 = -1; + } + else + { + ++currentI1; + ++currentI2; + } + } + + var isOnlyParagraphMark = false; + if (currentLongestCommonSequenceLength == 1) + { + ComparisonUnit firstCommon = cul1[currentI1]; + + if (firstCommon is ComparisonUnitWord firstCommonWord) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count == 1) + { + if (firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom) + { + if (firstCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + + // don't match just a single character + if (currentLongestCommonSequenceLength == 1) + { + if (cul2[currentI2] is ComparisonUnitAtom cuw2) + { + if (cuw2.ContentElement.Name == W.t && cuw2.ContentElement.Value == " ") + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + // don't match only word break characters + if (currentLongestCommonSequenceLength > 0 && currentLongestCommonSequenceLength <= 3) + { + ComparisonUnit[] commonSequence = cul1.Skip(currentI1).Take(currentLongestCommonSequenceLength).ToArray(); + + // if they are all ComparisonUnitWord objects + bool oneIsNotWord = commonSequence.Any(cs => !(cs is ComparisonUnitWord)); + bool allAreWords = !oneIsNotWord; + if (allAreWords) + { + bool contentOtherThanWordSplitChars = commonSequence + .Cast() + .Any(cs => + { + bool otherThanText = cs.DescendantContentAtoms().Any(dca => dca.ContentElement.Name != W.t); + if (otherThanText) return true; + + bool otherThanWordSplit = cs + .DescendantContentAtoms() + .Any(dca => + { + string charValue = dca.ContentElement.Value; + bool isWordSplit = settings.WordSeparators.Contains(charValue[0]); + return !isWordSplit; + }); + + return otherThanWordSplit; + }); + if (!contentOtherThanWordSplitChars) + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + // if we are only looking at text, and if the longest common subsequence is less than 15% of the whole, then forget it, + // don't find that LCS. + if (!isOnlyParagraphMark && currentLongestCommonSequenceLength > 0) + { + bool anyButWord1 = cul1.Any(cu => !(cu is ComparisonUnitWord)); + bool anyButWord2 = cul2.Any(cu => !(cu is ComparisonUnitWord)); + + if (!anyButWord1 && !anyButWord2) + { + int maxLen = Math.Max(cul1.Length, cul2.Length); + if (currentLongestCommonSequenceLength / (double) maxLen < settings.DetailThreshold) + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + if (currentI1 == -1 && currentI2 == -1) + { + int leftLength = unknown.ComparisonUnitArray1.Length; + + int leftTables = unknown + .ComparisonUnitArray1 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table); + + int leftRows = unknown + .ComparisonUnitArray1 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row); + + int leftParagraphs = unknown + .ComparisonUnitArray1 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph); + + int leftTextboxes = unknown + .ComparisonUnitArray1 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox); + + int leftWords = unknown + .ComparisonUnitArray1 + .OfType() + .Count(); + + int rightLength = unknown.ComparisonUnitArray2.Length; + + int rightTables = unknown + .ComparisonUnitArray2 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table); + + int rightRows = unknown + .ComparisonUnitArray2 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row); + + int rightParagraphs = unknown + .ComparisonUnitArray2 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph); + + int rightTextboxes = unknown + .ComparisonUnitArray2 + .OfType() + .Count(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox); + + int rightWords = unknown + .ComparisonUnitArray2 + .OfType() + .Count(); + + // if either side has both words, rows and text boxes, then we need to separate out into separate + // unknown correlated sequences + // group adjacent based on whether word, row, or textbox + // in most cases, the count of groups will be the same, but they may differ + // if the first group on either side is word, then create a deleted or inserted corr sequ for it. + // then have counter on both sides pointing to the first matched pairs of rows + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + bool leftOnlyWordsRowsTextboxes = leftLength == leftWords + leftRows + leftTextboxes; + bool rightOnlyWordsRowsTextboxes = rightLength == rightWords + rightRows + rightTextboxes; + if ((leftWords > 0 || rightWords > 0) && + (leftRows > 0 || rightRows > 0 || leftTextboxes > 0 || rightTextboxes > 0) && + leftOnlyWordsRowsTextboxes && + rightOnlyWordsRowsTextboxes) + { + IGrouping[] leftGrouped = unknown + .ComparisonUnitArray1 + .GroupAdjacent(cu => + { + if (cu is ComparisonUnitWord) + { + return "Word"; + } + + var cug = cu as ComparisonUnitGroup; + if (cug?.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + return "Row"; + } + + if (cug?.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox) + { + return "Textbox"; + } + + throw new OpenXmlPowerToolsException("Internal error"); + }) + .ToArray(); + + IGrouping[] rightGrouped = unknown + .ComparisonUnitArray2 + .GroupAdjacent(cu => + { + if (cu is ComparisonUnitWord) + { + return "Word"; + } + + var cug = cu as ComparisonUnitGroup; + if (cug?.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + return "Row"; + } + + if (cug?.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox) + { + return "Textbox"; + } + + throw new OpenXmlPowerToolsException("Internal error"); + }) + .ToArray(); + + var iLeft = 0; + var iRight = 0; + + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + while (true) + { + if (leftGrouped[iLeft].Key == rightGrouped[iRight].Key) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(), + ComparisonUnitArray2 = rightGrouped[iRight].ToArray(), + CorrelationStatus = CorrelationStatus.Unknown + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + ++iLeft; + ++iRight; + } + + // have to decide which of the following two branches to do first based on whether the left contains a paragraph mark + // i.e. cant insert a string of deleted text right before a table. + + else if (leftGrouped[iLeft].Key == "Word" && + leftGrouped[iLeft] + .Select(lg => lg.DescendantContentAtoms()) + .SelectMany(m => m).Last() + .ContentElement + .Name != W.pPr && + rightGrouped[iRight].Key == "Row") + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = rightGrouped[iRight].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + else if (rightGrouped[iRight].Key == "Word" && + rightGrouped[iRight] + .Select(lg => lg.DescendantContentAtoms()) + .SelectMany(m => m) + .Last() + .ContentElement + .Name != W.pPr && + leftGrouped[iLeft].Key == "Row") + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = leftGrouped[iLeft].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iLeft; + } + + else if (leftGrouped[iLeft].Key == "Word" && rightGrouped[iRight].Key != "Word") + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + ++iLeft; + } + + else if (leftGrouped[iLeft].Key != "Word" && rightGrouped[iRight].Key == "Word") + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = rightGrouped[iRight].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + + if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length) + return newListOfCorrelatedSequence; + + // if there is content on the left, but not content on the right + if (iRight == rightGrouped.Length) + { + for (int j = iLeft; j < leftGrouped.Length; j++) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[j].ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + + return newListOfCorrelatedSequence; + } + + // there is content on the right but not on the left + + if (iLeft == leftGrouped.Length) + { + for (int j = iRight; j < rightGrouped.Length; j++) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = rightGrouped[j].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + + return newListOfCorrelatedSequence; + } + + // else continue on next round. + } + } + + // if both sides contain tables and paragraphs, then split into multiple unknown corr sequ + if (leftTables > 0 && rightTables > 0 && + leftParagraphs > 0 && rightParagraphs > 0 && + (leftLength > 1 || rightLength > 1)) + { + IGrouping[] leftGrouped = unknown + .ComparisonUnitArray1 + .GroupAdjacent(cu => + { + var cug = (ComparisonUnitGroup) cu; + return cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ? "Table" : "Para"; + }) + .ToArray(); + + IGrouping[] rightGrouped = unknown + .ComparisonUnitArray2 + .GroupAdjacent(cu => + { + var cug = (ComparisonUnitGroup) cu; + return cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table ? "Table" : "Para"; + }) + .ToArray(); + + var iLeft = 0; + var iRight = 0; + + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + while (true) + { + if (leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Table" || + leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Para") + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(), + ComparisonUnitArray2 = rightGrouped[iRight].ToArray(), + CorrelationStatus = CorrelationStatus.Unknown + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + ++iLeft; + ++iRight; + } + else if (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Table") + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + ++iLeft; + } + else if (leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Para") + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = rightGrouped[iRight].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + + if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length) + return newListOfCorrelatedSequence; + + // if there is content on the left, but not content on the right + if (iRight == rightGrouped.Length) + { + for (int j = iLeft; j < leftGrouped.Length; j++) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = leftGrouped[j].ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + + return newListOfCorrelatedSequence; + } + + // there is content on the right but not on the left + + if (iLeft == leftGrouped.Length) + { + for (int j = iRight; j < rightGrouped.Length; j++) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = rightGrouped[j].ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + + return newListOfCorrelatedSequence; + } + + // else continue on next round. + } + } + + // If both sides consists of a single table, and if the table contains merged cells, then mark as deleted/inserted + if (leftTables == 1 && leftLength == 1 && + rightTables == 1 && rightLength == 1) + { + List result = DoLcsAlgorithmForTable(unknown); + if (result != null) + return result; + } + + // If either side contains only paras or tables, then flatten and iterate. + bool leftOnlyParasTablesTextboxes = leftLength == leftTables + leftParagraphs + leftTextboxes; + bool rightOnlyParasTablesTextboxes = rightLength == rightTables + rightParagraphs + rightTextboxes; + if (leftOnlyParasTablesTextboxes && rightOnlyParasTablesTextboxes) + { + // flatten paras and tables, and iterate + ComparisonUnit[] left = unknown + .ComparisonUnitArray1 + .Select(cu => cu.Contents) + .SelectMany(m => m) + .ToArray(); + + ComparisonUnit[] right = unknown + .ComparisonUnitArray2 + .Select(cu => cu.Contents) + .SelectMany(m => m) + .ToArray(); + + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = left, + ComparisonUnitArray2 = right + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + + return newListOfCorrelatedSequence; + } + + // if first of left is a row and first of right is a row + // then flatten the row to cells and iterate. + + if (unknown.ComparisonUnitArray1.FirstOrDefault() is ComparisonUnitGroup firstLeft && + unknown.ComparisonUnitArray2.FirstOrDefault() is ComparisonUnitGroup firstRight) + { + if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Row && + firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + ComparisonUnit[] leftContent = firstLeft.Contents.ToArray(); + ComparisonUnit[] rightContent = firstRight.Contents.ToArray(); + + int lenLeft = leftContent.Length; + int lenRight = rightContent.Length; + + if (lenLeft < lenRight) + { + leftContent = leftContent + .Concat(Enumerable.Repeat(null, lenRight - lenLeft)) + .ToArray(); + } + else if (lenRight < lenLeft) + { + rightContent = rightContent + .Concat(Enumerable.Repeat(null, lenLeft - lenRight)) + .ToArray(); + } + + List newCs = leftContent.Zip(rightContent, (l, r) => + { + if (l != null && r != null) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = new[] { l }, + ComparisonUnitArray2 = new[] { r }, + CorrelationStatus = CorrelationStatus.Unknown + }; + return new[] { unknownCorrelatedSequence }; + } + + if (l == null) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = r.Contents.ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + return new[] { insertedCorrelatedSequence }; + } + + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = l.Contents.ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + return new[] { deletedCorrelatedSequence }; + }) + .SelectMany(m => m) + .ToList(); + + foreach (CorrelatedSequence cs in newCs) + { + newListOfCorrelatedSequence.Add(cs); + } + + ComparisonUnit[] remainderLeft = unknown + .ComparisonUnitArray1 + .Skip(1) + .ToArray(); + + ComparisonUnit[] remainderRight = unknown + .ComparisonUnitArray2 + .Skip(1) + .ToArray(); + + if (remainderLeft.Length > 0 && remainderRight.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = remainderLeft, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainderRight.Length > 0 && remainderLeft.Length == 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = remainderRight + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainderLeft.Length > 0 && remainderRight.Length > 0) + { + var unknownCorrelatedSequence2 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = remainderLeft, + ComparisonUnitArray2 = remainderRight + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2); + } + + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in newListOfCorrelatedSequence) + { + sb.Append(item).Append(Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return newListOfCorrelatedSequence; + } + + if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell && + firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell) + { + ComparisonUnit[] left = firstLeft + .Contents + .ToArray(); + + ComparisonUnit[] right = firstRight + .Contents + .ToArray(); + + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = left, + ComparisonUnitArray2 = right + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + + ComparisonUnit[] remainderLeft = unknown + .ComparisonUnitArray1 + .Skip(1) + .ToArray(); + + ComparisonUnit[] remainderRight = unknown + .ComparisonUnitArray2 + .Skip(1) + .ToArray(); + + if (remainderLeft.Length > 0 && remainderRight.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = remainderLeft, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainderRight.Length > 0 && remainderLeft.Length == 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = remainderRight + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainderLeft.Length > 0 && remainderRight.Length > 0) + { + var unknownCorrelatedSequence2 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = remainderLeft, + ComparisonUnitArray2 = remainderRight + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2); + } + + return newListOfCorrelatedSequence; + } + } + + if (unknown.ComparisonUnitArray1.Any() && unknown.ComparisonUnitArray2.Any()) + { + if (unknown.ComparisonUnitArray1.First() is ComparisonUnitWord && + unknown.ComparisonUnitArray2.First() is ComparisonUnitGroup right && + right.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + var insertedCorrelatedSequence3 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3); + + var deletedCorrelatedSequence3 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3); + + return newListOfCorrelatedSequence; + } + + if (unknown.ComparisonUnitArray2.First() is ComparisonUnitWord && + unknown.ComparisonUnitArray1.First() is ComparisonUnitGroup left2 && + left2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + var deletedCorrelatedSequence3 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3); + + var insertedCorrelatedSequence3 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3); + + return newListOfCorrelatedSequence; + } + + ComparisonUnitAtom lastContentAtomLeft = unknown + .ComparisonUnitArray1 + .Select(cu => cu.DescendantContentAtoms().Last()) + .LastOrDefault(); + + ComparisonUnitAtom lastContentAtomRight = unknown + .ComparisonUnitArray2 + .Select(cu => cu.DescendantContentAtoms().Last()) + .LastOrDefault(); + + if (lastContentAtomLeft != null && lastContentAtomRight != null) + { + if (lastContentAtomLeft.ContentElement.Name == W.pPr && + lastContentAtomRight.ContentElement.Name != W.pPr) + { + var insertedCorrelatedSequence5 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5); + + var deletedCorrelatedSequence5 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5); + + return newListOfCorrelatedSequence; + } + + if (lastContentAtomLeft.ContentElement.Name != W.pPr && + lastContentAtomRight.ContentElement.Name == W.pPr) + { + var deletedCorrelatedSequence5 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5); + + var insertedCorrelatedSequence5 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5); + + return newListOfCorrelatedSequence; + } + } + } + + var deletedCorrelatedSequence4 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1, + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence4); + + var insertedCorrelatedSequence4 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence4); + + return newListOfCorrelatedSequence; + } + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // here we have the longest common subsequence. + // but it may start in the middle of a paragraph. + // therefore need to dispose of the content from the beginning of the longest common subsequence to the + // beginning of the paragraph. + // this should be in a separate unknown region + // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in + // the same paragraph before the common at end (in either version) + // then we want to put all of those comparison units into a single unknown, where they must be resolved + // against each other. We don't want those comparison units to go into the middle unknown comparison unit. + + var remainingInLeftParagraph = 0; + var remainingInRightParagraph = 0; + if (currentLongestCommonSequenceLength != 0) + { + List commonSeq = unknown + .ComparisonUnitArray1 + .Skip(currentI1) + .Take(currentLongestCommonSequenceLength) + .ToList(); + + ComparisonUnit firstOfCommonSeq = commonSeq.First(); + if (firstOfCommonSeq is ComparisonUnitWord) + { + // are there any paragraph marks in the common seq at end? + if (commonSeq.Any(cu => + { + ComparisonUnitAtom firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return false; + + return firstComparisonUnitAtom.ContentElement.Name == W.pPr; + })) + { + remainingInLeftParagraph = unknown + .ComparisonUnitArray1 + .Take(currentI1) + .Reverse() + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + + ComparisonUnitAtom firstComparisonUnitAtom = + cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + remainingInRightParagraph = unknown + .ComparisonUnitArray2 + .Take(currentI2) + .Reverse() + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + + ComparisonUnitAtom firstComparisonUnitAtom = + cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + } + } + } + + int countBeforeCurrentParagraphLeft = currentI1 - remainingInLeftParagraph; + int countBeforeCurrentParagraphRight = currentI2 - remainingInRightParagraph; + + if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = cul1 + .Take(countBeforeCurrentParagraphLeft) + .ToArray(), + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = cul2 + .Take(countBeforeCurrentParagraphRight) + .ToArray() + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cul1 + .Take(countBeforeCurrentParagraphLeft) + .ToArray(), + ComparisonUnitArray2 = cul2 + .Take(countBeforeCurrentParagraphRight) + .ToArray() + }; + + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight == 0) + { + // nothing to do + } + + if (remainingInLeftParagraph > 0 && remainingInRightParagraph == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = cul1 + .Skip(countBeforeCurrentParagraphLeft) + .Take(remainingInLeftParagraph) + .ToArray(), + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = cul2 + .Skip(countBeforeCurrentParagraphRight) + .Take(remainingInRightParagraph) + .ToArray() + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainingInLeftParagraph > 0 && remainingInRightParagraph > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cul1 + .Skip(countBeforeCurrentParagraphLeft) + .Take(remainingInLeftParagraph) + .ToArray(), + ComparisonUnitArray2 = cul2 + .Skip(countBeforeCurrentParagraphRight) + .Take(remainingInRightParagraph) + .ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0) + { + // nothing to do + } + + var middleEqual = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Equal, + ComparisonUnitArray1 = cul1 + .Skip(currentI1) + .Take(currentLongestCommonSequenceLength) + .ToArray(), + ComparisonUnitArray2 = cul2 + .Skip(currentI2) + .Take(currentLongestCommonSequenceLength) + .ToArray() + }; + newListOfCorrelatedSequence.Add(middleEqual); + + + int endI1 = currentI1 + currentLongestCommonSequenceLength; + int endI2 = currentI2 + currentLongestCommonSequenceLength; + + ComparisonUnit[] remaining1 = cul1 + .Skip(endI1) + .ToArray(); + + ComparisonUnit[] remaining2 = cul2 + .Skip(endI2) + .ToArray(); + + // here is the point that we want to make a new unknown from this point to the end of the paragraph that + // contains the equal parts. + // this will never hurt anything, and will in many cases result in a better difference. + + if (middleEqual.ComparisonUnitArray1[middleEqual.ComparisonUnitArray1.Length - 1] is ComparisonUnitWord leftCuw) + { + ComparisonUnitAtom lastContentAtom = leftCuw.DescendantContentAtoms().LastOrDefault(); + + // if the middleEqual did not end with a paragraph mark + if (lastContentAtom != null && lastContentAtom.ContentElement.Name != W.pPr) + { + int idx1 = FindIndexOfNextParaMark(remaining1); + int idx2 = FindIndexOfNextParaMark(remaining2); + + var unknownCorrelatedSequenceRemaining = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = remaining1.Take(idx1).ToArray(), + ComparisonUnitArray2 = remaining2.Take(idx2).ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceRemaining); + + var unknownCorrelatedSequenceAfter = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = remaining1.Skip(idx1).ToArray(), + ComparisonUnitArray2 = remaining2.Skip(idx2).ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceAfter); + + return newListOfCorrelatedSequence; + } + } + + var unknownCorrelatedSequence20 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = remaining1, + ComparisonUnitArray2 = remaining2 + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence20); + + return newListOfCorrelatedSequence; + } + + private static List DoLcsAlgorithmForTable(CorrelatedSequence unknown) + { + var newListOfCorrelatedSequence = new List(); + + /////////////////////////////////////////////////////////////////////////////////////////////////////////// + // if we have a table with the same number of rows, and all rows have equal CorrelatedSHA1Hash, then we can + // flatten and compare every corresponding row. + // This is true regardless of whether there are horizontally or vertically merged cells, since that + // characteristic is incorporated into the CorrespondingSHA1Hash. This is probably not very common, but it + // will never do any harm. + var tblGroup1 = (ComparisonUnitGroup) unknown.ComparisonUnitArray1.First(); + var tblGroup2 = (ComparisonUnitGroup) unknown.ComparisonUnitArray2.First(); + + if (tblGroup1.Contents.Count == tblGroup2.Contents.Count) // if there are the same number of rows + { + var zipped = tblGroup1 + .Contents + .Zip( + tblGroup2.Contents, + (r1, r2) => new + { + Row1 = r1 as ComparisonUnitGroup, + Row2 = r2 as ComparisonUnitGroup + }) + .ToList(); + + bool canCollapse = zipped.All(z => z.Row1.CorrelatedSHA1Hash == z.Row2.CorrelatedSHA1Hash); + + if (canCollapse) + { + newListOfCorrelatedSequence = zipped + .Select(z => + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = new ComparisonUnit[] { z.Row1 }, + ComparisonUnitArray2 = new ComparisonUnit[] { z.Row2 }, + CorrelationStatus = CorrelationStatus.Unknown + }; + return unknownCorrelatedSequence; + }) + .ToList(); + return newListOfCorrelatedSequence; + } + } + + ComparisonUnitAtom firstContentAtom1 = tblGroup1.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom1 == null) + { + throw new OpenXmlPowerToolsException("Internal error"); + } + + XElement tblElement1 = firstContentAtom1 + .AncestorElements + .Reverse() + .First(a => a.Name == W.tbl); + + ComparisonUnitAtom firstContentAtom2 = tblGroup2.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom2 == null) + { + throw new OpenXmlPowerToolsException("Internal error"); + } + + XElement tblElement2 = firstContentAtom2 + .AncestorElements + .Reverse() + .First(a => a.Name == W.tbl); + + bool leftContainsMerged = tblElement1 + .Descendants() + .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan); + + bool rightContainsMerged = tblElement2 + .Descendants() + .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan); + + if (leftContainsMerged || rightContainsMerged) + { + // If StructureSha1Hash is the same for both tables, then we know that the structure of the tables is + // identical, so we can break into correlated sequences for rows. + if (tblGroup1.StructureSHA1Hash != null && + tblGroup2.StructureSHA1Hash != null && + tblGroup1.StructureSHA1Hash == tblGroup2.StructureSHA1Hash) + { + var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new + { + Row1 = r1 as ComparisonUnitGroup, + Row2 = r2 as ComparisonUnitGroup + }); + newListOfCorrelatedSequence = zipped + .Select(z => + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = new ComparisonUnit[] { z.Row1 }, + ComparisonUnitArray2 = new ComparisonUnit[] { z.Row2 }, + CorrelationStatus = CorrelationStatus.Unknown + }; + return unknownCorrelatedSequence; + }) + .ToList(); + return newListOfCorrelatedSequence; + } + + // otherwise flatten to rows + var deletedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Select(z => z.Contents) + .SelectMany(m => m) + .ToArray(), + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + }; + + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + + var insertedCorrelatedSequence = new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Select(z => z.Contents) + .SelectMany(m => m) + .ToArray(), + CorrelationStatus = CorrelationStatus.Inserted + }; + + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + + return newListOfCorrelatedSequence; + } + + return null; + } + + private static int FindIndexOfNextParaMark(ComparisonUnit[] cul) + { + for (var i = 0; i < cul.Length; i++) + { + var cuw = (ComparisonUnitWord) cul[i]; + ComparisonUnitAtom lastAtom = cuw.DescendantContentAtoms().LastOrDefault(); + if (lastAtom?.ContentElement.Name == W.pPr) + { + return i; + } + } + + return cul.Length; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.PreProcessMarkup.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.PreProcessMarkup.cs new file mode 100644 index 00000000..99c46ab0 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.PreProcessMarkup.cs @@ -0,0 +1,331 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.IO; +using System.Linq; +using System.Xml.Linq; +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes) + { + // open and close to get rid of MC content + using (var ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + var os = new OpenSettings + { + MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings( + MarkupCompatibilityProcessMode.ProcessAllParts, + FileFormatVersions.Office2007) + }; + + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) + { + OpenXmlPartRootElement unused = wDoc.MainDocumentPart.RootElement; + if (wDoc.MainDocumentPart.FootnotesPart != null) + { + // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause + // the SDK to process MC markup. + OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.FootnotesPart.RootElement; + } + + if (wDoc.MainDocumentPart.EndnotesPart != null) + { + OpenXmlPartRootElement unused1 = wDoc.MainDocumentPart.EndnotesPart.RootElement; + } + } + + source = new WmlDocument(source.FileName, ms.ToArray()); + } + + // open and close to get rid of MC content + using (var ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + var os = new OpenSettings + { + MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings( + MarkupCompatibilityProcessMode.ProcessAllParts, + FileFormatVersions.Office2007) + }; + + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) + { + TestForInvalidContent(wDoc); + RemoveExistingPowerToolsMarkup(wDoc); + + // Removing content controls, field codes, and bookmarks is a no-no for many use cases. + // We need content controls, e.g., on the title page. Field codes are required for + // automatic cross-references, which require bookmarks. + // TODO: Revisit + var msSettings = new SimplifyMarkupSettings + { + RemoveBookmarks = true, + + AcceptRevisions = false, + RemoveComments = true, + + RemoveContentControls = true, + RemoveFieldCodes = true, + + RemoveGoBackBookmark = true, + RemoveLastRenderedPageBreak = true, + RemovePermissions = true, + RemoveProof = true, + RemoveSmartTags = true, + RemoveSoftHyphens = true, + RemoveHyperlinks = true + }; + MarkupSimplifier.SimplifyMarkup(wDoc, msSettings); + ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes); + AddUnidsToMarkupInContentParts(wDoc); + AddFootnotesEndnotesParts(wDoc); + FillInEmptyFootnotesEndnotes(wDoc); + } + + return new WmlDocument(source.FileName, ms.ToArray()); + } + } + + private static void TestForInvalidContent(WordprocessingDocument wDoc) + { + foreach (OpenXmlPart part in wDoc.ContentParts()) + { + XDocument xDoc = part.GetXDocument(); + if (xDoc.Descendants(W.altChunk).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:altChunk"); + if (xDoc.Descendants(W.subDoc).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:subDoc"); + if (xDoc.Descendants(W.contentPart).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:contentPart"); + } + } + + private static void RemoveExistingPowerToolsMarkup(WordprocessingDocument wDoc) + { + wDoc.MainDocumentPart + .GetXDocument() + .Root? + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + + wDoc.MainDocumentPart.PutXDocument(); + + FootnotesPart fnPart = wDoc.MainDocumentPart.FootnotesPart; + if (fnPart != null) + { + XDocument fnXDoc = fnPart.GetXDocument(); + fnXDoc + .Root? + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + + fnPart.PutXDocument(); + } + + EndnotesPart enPart = wDoc.MainDocumentPart.EndnotesPart; + if (enPart != null) + { + XDocument enXDoc = enPart.GetXDocument(); + enXDoc + .Root? + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + + enPart.PutXDocument(); + } + } + + private static void ChangeFootnoteEndnoteReferencesToUniqueRange( + WordprocessingDocument wDoc, + int startingIdForFootnotesEndnotes) + { + MainDocumentPart mainDocPart = wDoc.MainDocumentPart; + FootnotesPart footnotesPart = wDoc.MainDocumentPart.FootnotesPart; + EndnotesPart endnotesPart = wDoc.MainDocumentPart.EndnotesPart; + + XElement document = + mainDocPart.GetXDocument().Root ?? throw new OpenXmlPowerToolsException("Invalid document."); + + XElement footnotes = footnotesPart?.GetXDocument().Root; + XElement endnotes = endnotesPart?.GetXDocument().Root; + + IEnumerable references = document + .Descendants() + .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference); + + foreach (XElement r in references) + { + var oldId = (string) r.Attribute(W.id); + string newId = startingIdForFootnotesEndnotes.ToString(); + startingIdForFootnotesEndnotes++; + r.SetAttributeValue(W.id, newId); + if (r.Name == W.footnoteReference) + { + XElement fn = footnotes? + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + + if (fn == null) + { + throw new OpenXmlPowerToolsException("Invalid document"); + } + + fn.SetAttributeValue(W.id, newId); + } + else + { + XElement en = endnotes? + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + + if (en == null) + { + throw new OpenXmlPowerToolsException("Invalid document"); + } + + en.SetAttributeValue(W.id, newId); + } + } + + mainDocPart.PutXDocument(); + footnotesPart?.PutXDocument(); + endnotesPart?.PutXDocument(); + } + + private static void AddUnidsToMarkupInContentParts(WordprocessingDocument wDoc) + { + XDocument mdp = wDoc.MainDocumentPart.GetXDocument(); + AssignUnidToAllElements(mdp.Root); + IgnorePt14Namespace(mdp.Root); + wDoc.MainDocumentPart.PutXDocument(); + + if (wDoc.MainDocumentPart.FootnotesPart != null) + { + XDocument p = wDoc.MainDocumentPart.FootnotesPart.GetXDocument(); + AssignUnidToAllElements(p.Root); + IgnorePt14Namespace(p.Root); + wDoc.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (wDoc.MainDocumentPart.EndnotesPart != null) + { + XDocument p = wDoc.MainDocumentPart.EndnotesPart.GetXDocument(); + AssignUnidToAllElements(p.Root); + IgnorePt14Namespace(p.Root); + wDoc.MainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private static void AssignUnidToAllElements(XElement contentParent) + { + IEnumerable content = contentParent.Descendants(); + foreach (XElement d in content) + { + if (d.Attribute(PtOpenXml.Unid) == null) + { + string unid = Guid.NewGuid().ToString().Replace("-", ""); + var newAtt = new XAttribute(PtOpenXml.Unid, unid); + d.Add(newAtt); + } + } + } + + [SuppressMessage("ReSharper", "CoVariantArrayConversion")] + private static void AddFootnotesEndnotesParts(WordprocessingDocument wDoc) + { + MainDocumentPart mdp = wDoc.MainDocumentPart; + if (mdp.FootnotesPart == null) + { + mdp.AddNewPart(); + XDocument newFootnotes = wDoc.MainDocumentPart.FootnotesPart.GetXDocument(); + newFootnotes.Declaration.Standalone = "yes"; + newFootnotes.Declaration.Encoding = "UTF-8"; + newFootnotes.Add(new XElement(W.footnotes, NamespaceAttributes)); + mdp.FootnotesPart.PutXDocument(); + } + + if (mdp.EndnotesPart == null) + { + mdp.AddNewPart(); + XDocument newEndnotes = wDoc.MainDocumentPart.EndnotesPart.GetXDocument(); + newEndnotes.Declaration.Standalone = "yes"; + newEndnotes.Declaration.Encoding = "UTF-8"; + newEndnotes.Add(new XElement(W.endnotes, NamespaceAttributes)); + mdp.EndnotesPart.PutXDocument(); + } + } + + private static void FillInEmptyFootnotesEndnotes(WordprocessingDocument wDoc) + { + XElement emptyFootnote = XElement.Parse( + @" + + + + + + + + + +"); + + XElement emptyEndnote = XElement.Parse( + @" + + + + + + + + + +"); + + FootnotesPart footnotePart = wDoc.MainDocumentPart.FootnotesPart; + if (footnotePart != null) + { + XElement fnRoot = footnotePart.GetXDocument().Root ?? throw new ArgumentException(); + foreach (XElement fn in fnRoot.Elements(W.footnote)) + { + if (!fn.HasElements) + fn.Add(emptyFootnote); + } + + footnotePart.PutXDocument(); + } + + EndnotesPart endnotePart = wDoc.MainDocumentPart.EndnotesPart; + if (endnotePart != null) + { + XElement fnRoot = endnotePart.GetXDocument().Root ?? throw new ArgumentException(); + foreach (XElement fn in fnRoot.Elements(W.endnote)) + { + if (!fn.HasElements) + fn.Add(emptyEndnote); + } + + endnotePart.PutXDocument(); + } + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.ProduceDocument.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.ProduceDocument.cs new file mode 100644 index 00000000..e24d258c --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.ProduceDocument.cs @@ -0,0 +1,3232 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Packaging; +using System.Linq; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private static WmlDocument ProduceDocumentWithTrackedRevisions( + WmlComparerSettings settings, + WmlDocument wmlResult, + WordprocessingDocument wDoc1, + WordprocessingDocument wDoc2) + { + // save away sectPr so that can set in the newly produced document. + XElement savedSectPr = wDoc1 + .MainDocumentPart + .GetXDocument() + .Root? + .Element(W.body)? + .Element(W.sectPr); + + XElement contentParent1 = wDoc1.MainDocumentPart.GetXDocument().Root?.Element(W.body); + AddSha1HashToBlockLevelContent(wDoc1.MainDocumentPart, contentParent1, settings); + + XElement contentParent2 = wDoc2.MainDocumentPart.GetXDocument().Root?.Element(W.body); + AddSha1HashToBlockLevelContent(wDoc2.MainDocumentPart, contentParent2, settings); + + ComparisonUnitAtom[] cal1 = CreateComparisonUnitAtomList( + wDoc1.MainDocumentPart, + wDoc1.MainDocumentPart.GetXDocument().Root?.Element(W.body), + settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in cal1) + sb.Append(item + Environment.NewLine); + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + ComparisonUnit[] cus1 = GetComparisonUnitList(cal1, settings); + + if (False) + { + string sbs = ComparisonUnit.ComparisonUnitListToString(cus1); + TestUtil.NotePad(sbs); + } + + ComparisonUnitAtom[] cal2 = CreateComparisonUnitAtomList( + wDoc2.MainDocumentPart, + wDoc2.MainDocumentPart.GetXDocument().Root?.Element(W.body), + settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in cal2) + sb.Append(item + Environment.NewLine); + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + ComparisonUnit[] cus2 = GetComparisonUnitList(cal2, settings); + + if (False) + { + string sbs = ComparisonUnit.ComparisonUnitListToString(cus2); + TestUtil.NotePad(sbs); + } + + if (False) + { + var sb3 = new StringBuilder(); + sb3.Append("ComparisonUnitList 1 =====" + Environment.NewLine + Environment.NewLine); + sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus1)); + sb3.Append(Environment.NewLine); + sb3.Append("ComparisonUnitList 2 =====" + Environment.NewLine + Environment.NewLine); + sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus2)); + string sbs3 = sb3.ToString(); + TestUtil.NotePad(sbs3); + } + + List correlatedSequence = Lcs(cus1, cus2, settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in correlatedSequence) + { + sb.Append(item + Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or + // w:del element, and therefore when generating the document, the appropriate row will be marked as deleted + // or inserted. + MarkRowsAsDeletedOrInserted(settings, correlatedSequence); + + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each + // ComparisonUnitAtom: Deleted, Inserted, or Equal + List listOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(correlatedSequence, settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms) + { + sb.Append(item + Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // note - we don't want to do the hack until after flattening all of the groups. At the end of the + // flattening, we should simply have a list of ComparisonUnitAtoms, appropriately marked as equal, + // inserted, or deleted. + + // the table id will be hacked in the normal course of events. + // in the case where a row is deleted, not necessary to hack - the deleted row ID will do. + // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well. + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(listOfComparisonUnitAtoms); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms) + sb.Append(item.ToStringAncestorUnids() + Environment.NewLine); + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // and then finally can generate the document with revisions + using (var ms = new MemoryStream()) + { + ms.Write(wmlResult.DocumentByteArray, 0, wmlResult.DocumentByteArray.Length); + using (WordprocessingDocument wDocWithRevisions = WordprocessingDocument.Open(ms, true)) + { + XDocument xDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + List rootNamespaceAttributes = xDoc + .Root? + .Attributes() + .Where(a => a.IsNamespaceDeclaration || a.Name.Namespace == MC.mc) + .ToList(); + + // ====================================== + // The following produces a new valid WordprocessingML document from the listOfComparisonUnitAtoms + object newBodyChildren = ProduceNewWmlMarkupFromCorrelatedSequence( + wDocWithRevisions.MainDocumentPart, + listOfComparisonUnitAtoms, + settings); + + var newXDoc = new XDocument(); + newXDoc.Add( + new XElement(W.document, + rootNamespaceAttributes, + new XElement(W.body, newBodyChildren))); + + MarkContentAsDeletedOrInserted(newXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(newXDoc); + IgnorePt14Namespace(newXDoc.Root); + + ProcessFootnoteEndnote(settings, + listOfComparisonUnitAtoms, + wDoc1.MainDocumentPart, + wDoc2.MainDocumentPart, + newXDoc); + + RectifyFootnoteEndnoteIds( + wDoc1.MainDocumentPart, + wDoc2.MainDocumentPart, + wDocWithRevisions.MainDocumentPart, + newXDoc, + settings); + + ConjoinDeletedInsertedParagraphMarks(wDocWithRevisions.MainDocumentPart, newXDoc); + + FixUpRevisionIds(wDocWithRevisions, newXDoc); + + // little bit of cleanup + MoveLastSectPrToChildOfBody(newXDoc); + var newXDoc2Root = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root); + xDoc.Root?.ReplaceWith(newXDoc2Root); + + /**********************************************************************************************/ + // temporary code to remove sections. When remove this code, get validation errors for some ITU documents. + // Note: This is a no-go for use cases in which documents have multiple sections, e.g., for title pages, + // front matter, and body matter. Another example is where you have to switch between portrait and + // landscape orientation, which requires sections. + // TODO: Revisit + xDoc.Root?.Descendants(W.sectPr).Remove(); + + // move w:sectPr from source document into newly generated document. + if (savedSectPr != null) + { + XDocument xd = wDocWithRevisions.MainDocumentPart.GetXDocument(); + + // add everything but headers/footers + var clonedSectPr = new XElement(W.sectPr, + savedSectPr.Attributes(), + savedSectPr.Element(W.type), + savedSectPr.Element(W.pgSz), + savedSectPr.Element(W.pgMar), + savedSectPr.Element(W.cols), + savedSectPr.Element(W.titlePg)); + xd.Root?.Element(W.body)?.Add(clonedSectPr); + } + /**********************************************************************************************/ + + wDocWithRevisions.MainDocumentPart.PutXDocument(); + + FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions); + FixUpRevMarkIds(wDocWithRevisions); + FixUpDocPrIds(wDocWithRevisions); + FixUpShapeIds(wDocWithRevisions); + FixUpShapeTypeIds(wDocWithRevisions); + AddFootnotesEndnotesStyles(wDocWithRevisions); + CopyMissingStylesFromOneDocToAnother(wDoc2, wDocWithRevisions); + DeleteFootnotePropertiesInSettings(wDocWithRevisions); + } + + foreach (OpenXmlPart part in wDoc1.ContentParts()) + { + part.PutXDocument(); + } + + foreach (OpenXmlPart part in wDoc2.ContentParts()) + { + part.PutXDocument(); + } + + var updatedWmlResult = new WmlDocument("Dummy.docx", ms.ToArray()); + return updatedWmlResult; + } + } + + private static void AddSha1HashToBlockLevelContent(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) + { + IEnumerable blockLevelContentToAnnotate = contentParent + .Descendants() + .Where(d => ElementsToHaveSha1Hash.Contains(d.Name)); + + foreach (XElement blockLevelContent in blockLevelContentToAnnotate) + { + var cloneBlockLevelContentForHashing = + (XElement) CloneBlockLevelContentForHashing(part, blockLevelContent, true, settings); + string shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + blockLevelContent.Add(new XAttribute(PtOpenXml.SHA1Hash, sha1Hash)); + + if (blockLevelContent.Name == W.tbl || + blockLevelContent.Name == W.tr) + { + var clonedForStructureHash = (XElement) CloneForStructureHash(cloneBlockLevelContentForHashing); + + // this is a convenient place to look at why tables are being compared as different. + + //if (blockLevelContent.Name == W.tbl) + // Console.WriteLine(); + + string shaString2 = clonedForStructureHash.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + string sha1Hash2 = WmlComparerUtil.SHA1HashStringForUTF8String(shaString2); + blockLevelContent.Add(new XAttribute(PtOpenXml.StructureSHA1Hash, sha1Hash2)); + } + } + } + + private static List Lcs(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings) + { + // set up initial state - one CorrelatedSequence, UnKnown, contents == entire sequences (both) + var cs = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cu1, + ComparisonUnitArray2 = cu2 + }; + var csList = new List + { + cs + }; + + while (true) + { + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in csList) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + CorrelatedSequence unknown = csList + .FirstOrDefault(z => z.CorrelationStatus == CorrelationStatus.Unknown); + + if (unknown != null) + { + // if unknown consists of a single group of the same type in each side, then can set some Unids in the 'after' document. + // if the unknown is a pair of single tables, then can set table Unid. + // if the unknown is a pair of single rows, then can set table and rows Unids. + // if the unknown is a pair of single cells, then can set table, row, and cell Unids. + // if the unknown is a pair of paragraphs, then can set paragraph (and all ancestor) Unids. + SetAfterUnids(unknown); + + if (False) + { + var sb = new StringBuilder(); + sb.Append(unknown); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List newSequence = ProcessCorrelatedHashes(unknown, settings); + if (newSequence == null) + { + newSequence = FindCommonAtBeginningAndEnd(unknown, settings); + if (newSequence == null) + { + newSequence = DoLcsAlgorithm(unknown, settings); + } + } + + int indexOfUnknown = csList.IndexOf(unknown); + csList.Remove(unknown); + + newSequence.Reverse(); + foreach (CorrelatedSequence item in newSequence) + csList.Insert(indexOfUnknown, item); + + continue; + } + + return csList; + } + } + + private static void MarkRowsAsDeletedOrInserted(WmlComparerSettings settings, List correlatedSequence) + { + foreach (CorrelatedSequence dcs in correlatedSequence.Where(cs => + cs.CorrelationStatus == CorrelationStatus.Deleted || cs.CorrelationStatus == CorrelationStatus.Inserted)) + { + // iterate through all deleted/inserted items in dcs.ComparisonUnitArray1/ComparisonUnitArray2 + ComparisonUnit[] toIterateThrough = dcs.ComparisonUnitArray1; + if (dcs.CorrelationStatus == CorrelationStatus.Inserted) + toIterateThrough = dcs.ComparisonUnitArray2; + + foreach (ComparisonUnit ca in toIterateThrough) + { + var cug = ca as ComparisonUnitGroup; + + // this works because we will never see a table in this list, only rows. If tables were in this list, would need to recursively + // go into children, but tables are always flattened in the LCS process. + + // when we have a row, it is only necessary to find the first content atom of the row, then find the row ancestor, and then tweak + // the w:trPr + + if (cug != null && cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + ComparisonUnitAtom firstContentAtom = cug.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom == null) + throw new OpenXmlPowerToolsException("Internal error"); + + XElement tr = firstContentAtom + .AncestorElements + .Reverse() + .FirstOrDefault(a => a.Name == W.tr); + + if (tr == null) + throw new OpenXmlPowerToolsException("Internal error"); + + XElement trPr = tr.Element(W.trPr); + if (trPr == null) + { + trPr = new XElement(W.trPr); + tr.AddFirst(trPr); + } + + XName revTrackElementName = null; + if (dcs.CorrelationStatus == CorrelationStatus.Deleted) + revTrackElementName = W.del; + else if (dcs.CorrelationStatus == CorrelationStatus.Inserted) + revTrackElementName = W.ins; + trPr.Add(new XElement(revTrackElementName, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + } + } + } + } + + private static List FlattenToComparisonUnitAtomList( + List correlatedSequence, + WmlComparerSettings settings) + { + List listOfComparisonUnitAtoms = correlatedSequence + .Select(cs => + { + // need to write some code here to find out if we are assembling a paragraph (or anything) that contains the following unid. + // why do are we dropping content??????? + //string searchFor = "0ecb9184"; + + + if (cs.CorrelationStatus == CorrelationStatus.Equal) + { + IEnumerable contentAtomsBefore = cs + .ComparisonUnitArray1 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m); + + IEnumerable contentAtomsAfter = cs + .ComparisonUnitArray2 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m); + + List comparisonUnitAtomList = contentAtomsBefore + .Zip(contentAtomsAfter, + (before, after) => new ComparisonUnitAtom( + after.ContentElement, + after.AncestorElements, + after.Part, + settings) + { + CorrelationStatus = CorrelationStatus.Equal, + ContentElementBefore = before.ContentElement, + ComparisonUnitAtomBefore = before + }) + .ToList(); + + return comparisonUnitAtomList; + } + + if (cs.CorrelationStatus == CorrelationStatus.Deleted) + { + IEnumerable comparisonUnitAtomList = cs + .ComparisonUnitArray1 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m) + .Select(ca => + new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings) + { + CorrelationStatus = CorrelationStatus.Deleted + }); + + return comparisonUnitAtomList; + } + + if (cs.CorrelationStatus == CorrelationStatus.Inserted) + { + IEnumerable comparisonUnitAtomList = cs + .ComparisonUnitArray2 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m) + .Select(ca => + new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings) + { + CorrelationStatus = CorrelationStatus.Inserted + }); + return comparisonUnitAtomList; + } + + throw new OpenXmlPowerToolsException("Internal error"); + }) + .SelectMany(m => m) + .ToList(); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in listOfComparisonUnitAtoms) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return listOfComparisonUnitAtoms; + } + + /// Here is the crux of the fix to the algorithm. After assembling the entire list of ComparisonUnitAtoms, we do the following: + /// - First, figure out the maximum hierarchy depth, considering only paragraphs, txbx, txbxContent, tables, rows, cells, and content controls. + /// - For documents that do not contain tables, nor text boxes, this maximum hierarchy depth will always be 1. + /// - For atoms within a table, the depth will be 4. The first level is the table, the second level is row, third is cell, fourth is paragraph. + /// - For atoms within a nested table, the depth will be 7: Table / Row / Cell / Table / Row / Cell / Paragraph + /// - For atoms within a text box, the depth will be 3: Paragraph / txbxContent / Paragraph + /// - For atoms within a table in a text box, the depth will be 5: Paragraph / txbxContent / Table / Row / Cell / Paragraph + /// In any case, we figure out the maximum depth. + /// + /// Then we iterate through the list of content atoms backwards. We do this n times, where n is the maximum depth. + /// + /// At each level, we find a paragraph mark, and working backwards, we set the guids in the hierarchy so that the content will be assembled together correctly. + /// + /// For each iteration, we only set unids at the level that we are working at. + /// + /// So first we will set all unids at level 1. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another + /// paragraph mark, we set all unids at level 1 to the same unid as level 1 of the paragraph mark. + /// + /// Then we set all unids at level 2. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another paragraph + /// mark, we set all unids at level 2 to the same unid as level 2 of the paragraph mark. At some point, we will find a paragraph mark with no level 2. This is + /// not a problem. We stop setting anything until we find another paragraph mark that has a level 2, at which point we resume setting values at level 2. + /// + /// Same process for level 3, and so on, until we have processed to the maximum depth of the hierarchy. + /// + /// At the end of this process, we will be able to do the coalsce recurse algorithm, and the content atom list will be put back together into a beautiful tree, + /// where every element is correctly positioned in the hierarchy. + /// + /// This should also properly assemble the test where just the paragraph marks have been deleted for a range of paragraphs. + /// + /// There is an interesting thought - it is possible that I have set two runs of text that were initially in the same paragraph, but then after + /// processing, they match up to text in different paragraphs. Therefore this will not work. We need to actually keep a list of reconstructed ancestor + /// Unids, because the same paragraph would get set to two different IDs - two ComparisonUnitAtoms need to be in separate paragraphs in the reconstructed + /// document, but their ancestors actually point to the same paragraph. + /// + /// Fix this in the algorithm, and also keep the appropriate list in ComparisonUnitAtom class. + private static void AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(List comparisonUnitAtomList) + { + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in comparisonUnitAtomList) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // the following loop sets all ancestor unids in the after document to the unids in the before document for all pPr where the status is equal. + // this should always be true. + + // one additional modification to make to this loop - where we find a pPr in a text box, we want to do this as well, regardless of whether the status is equal, inserted, or deleted. + // reason being that this module does not support insertion / deletion of text boxes themselves. If a text box is in the before or after document, it will be in the document that + // contains deltas. It may have inserted or deleted text, but regardless, it will be in the result document. + foreach (ComparisonUnitAtom cua in comparisonUnitAtomList) + { + var doSet = false; + if (cua.ContentElement.Name == W.pPr) + { + if (cua.AncestorElements.Any(ae => ae.Name == W.txbxContent)) + doSet = true; + if (cua.CorrelationStatus == CorrelationStatus.Equal) + doSet = true; + } + + if (doSet) + { + ComparisonUnitAtom cuaBefore = cua.ComparisonUnitAtomBefore; + XElement[] ancestorsAfter = cua.AncestorElements; + if (cuaBefore != null) + { + XElement[] ancestorsBefore = cuaBefore.AncestorElements; + if (ancestorsAfter.Length == ancestorsBefore.Length) + { + var zipped = ancestorsBefore.Zip(ancestorsAfter, (b, a) => + new + { + After = a, + Before = b + }); + + foreach (var z in zipped) + { + XAttribute afterUnidAtt = z.After.Attribute(PtOpenXml.Unid); + XAttribute beforeUnidAtt = z.Before.Attribute(PtOpenXml.Unid); + if (afterUnidAtt != null && beforeUnidAtt != null) + afterUnidAtt.Value = beforeUnidAtt.Value; + } + } + } + } + } + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in comparisonUnitAtomList) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List rComparisonUnitAtomList = + ((IEnumerable) comparisonUnitAtomList).Reverse().ToList(); + + // the following should always succeed, because there will always be at least one element in + // rComparisonUnitAtomList, and there will always be at least one ancestor in AncestorElements + XElement deepestAncestor = rComparisonUnitAtomList.First().AncestorElements.First(); + XName deepestAncestorName = deepestAncestor.Name; + string deepestAncestorUnid = null; + if (deepestAncestorName == W.footnote || deepestAncestorName == W.endnote) + { + deepestAncestorUnid = (string) deepestAncestor.Attribute(PtOpenXml.Unid); + } + + // If the following loop finds a pPr that is in a text box, then continue on, processing the pPr and all of its contents as though it were + // content in the containing text box. This is going to leave it after this loop where the AncestorUnids for the content in the text box will be + // incomplete. We then will need to go through the rComparisonUnitAtomList a second time, processing all of the text boxes. + + // Note that this makes the basic assumption that a text box can't be nested inside of a text box, which, as far as I know, is a good assumption. + + // This also makes the basic assumption that an endnote / footnote can't contain a text box, which I believe is a good assumption. + + + string[] currentAncestorUnids = null; + foreach (ComparisonUnitAtom cua in rComparisonUnitAtomList) + { + if (cua.ContentElement.Name == W.pPr) + { + bool pPr_inTextBox = cua + .AncestorElements + .Any(ae => ae.Name == W.txbxContent); + + if (!pPr_inTextBox) + { + // this will collect the ancestor unids for the paragraph. + // my hypothesis is that these ancestor unids should be the same for all content unit atoms within that paragraph. + currentAncestorUnids = cua + .AncestorElements + .Select(ae => + { + var thisUnid = (string) ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + return thisUnid; + }) + .ToArray(); + cua.AncestorUnids = currentAncestorUnids; + if (deepestAncestorUnid != null) + cua.AncestorUnids[0] = deepestAncestorUnid; + continue; + } + } + + int thisDepth = cua.AncestorElements.Length; + IEnumerable additionalAncestorUnids = cua + .AncestorElements + .Skip(currentAncestorUnids.Length) + .Select(ae => + { + var thisUnid = (string) ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + Guid.NewGuid().ToString().Replace("-", ""); + return thisUnid; + }); + string[] thisAncestorUnids = currentAncestorUnids + .Concat(additionalAncestorUnids) + .ToArray(); + cua.AncestorUnids = thisAncestorUnids; + if (deepestAncestorUnid != null) + cua.AncestorUnids[0] = deepestAncestorUnid; + } + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in comparisonUnitAtomList) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // this is the second loop that processes all text boxes. + currentAncestorUnids = null; + var skipUntilNextPpr = false; + foreach (ComparisonUnitAtom cua in rComparisonUnitAtomList) + { + if (currentAncestorUnids != null && cua.AncestorElements.Length < currentAncestorUnids.Length) + { + skipUntilNextPpr = true; + currentAncestorUnids = null; + continue; + } + + if (cua.ContentElement.Name == W.pPr) + { + //if (s_True) + //{ + // var sb = new StringBuilder(); + // foreach (var item in comparisonUnitAtomList) + // sb.Append(item.ToString()).Append(Environment.NewLine); + // var sbs = sb.ToString(); + // TestUtil.NotePad(sbs); + //} + + bool pPr_inTextBox = cua + .AncestorElements + .Any(ae => ae.Name == W.txbxContent); + + if (!pPr_inTextBox) + { + skipUntilNextPpr = true; + currentAncestorUnids = null; + continue; + } + + skipUntilNextPpr = false; + + currentAncestorUnids = cua + .AncestorElements + .Select(ae => + { + var thisUnid = (string) ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + return thisUnid; + }) + .ToArray(); + cua.AncestorUnids = currentAncestorUnids; + continue; + } + + if (skipUntilNextPpr) + continue; + + int thisDepth = cua.AncestorElements.Length; + IEnumerable additionalAncestorUnids = cua + .AncestorElements + .Skip(currentAncestorUnids.Length) + .Select(ae => + { + var thisUnid = (string) ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + Guid.NewGuid().ToString().Replace("-", ""); + return thisUnid; + }); + string[] thisAncestorUnids = currentAncestorUnids + .Concat(additionalAncestorUnids) + .ToArray(); + cua.AncestorUnids = thisAncestorUnids; + } + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in comparisonUnitAtomList) + sb.Append(item.ToStringAncestorUnids()).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + } + + private static object ProduceNewWmlMarkupFromCorrelatedSequence( + OpenXmlPart part, + IEnumerable comparisonUnitAtomList, + WmlComparerSettings settings) + { + // fabricate new MainDocumentPart from correlatedSequence + _maxId = 0; + object newBodyChildren = CoalesceRecurse(part, comparisonUnitAtomList, 0, settings); + return newBodyChildren; + } + + private static void MarkContentAsDeletedOrInserted(XDocument newXDoc, WmlComparerSettings settings) + { + object newRoot = MarkContentAsDeletedOrInsertedTransform(newXDoc.Root, settings); + newXDoc.Root?.ReplaceWith(newRoot); + } + + private static object MarkContentAsDeletedOrInsertedTransform(XNode node, WmlComparerSettings settings) + { + if (node is XElement element) + { + if (element.Name == W.r) + { + List statusList = element + .DescendantsTrimmed(W.txbxContent) + .Where(d => d.Name == W.t || d.Name == W.delText || AllowableRunChildren.Contains(d.Name)) + .Attributes(PtOpenXml.Status) + .Select(a => (string) a) + .Distinct() + .ToList(); + + if (statusList.Count() > 1) + { + throw new OpenXmlPowerToolsException( + "Internal error - have both deleted and inserted text elements in the same run."); + } + + if (statusList.Count == 0) + { + return new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + } + + if (statusList.First() == "Deleted") + { + return new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)))); + } + + if (statusList.First() == "Inserted") + { + return new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)))); + } + } + + if (element.Name == W.pPr) + { + var status = (string) element.Attribute(PtOpenXml.Status); + if (status == null) + return new XElement(W.pPr, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + + var pPr = new XElement(element); + if (status == "Deleted") + { + XElement rPr = pPr.Element(W.rPr); + if (rPr == null) + rPr = new XElement(W.rPr); + rPr.Add(new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + if (pPr.Element(W.rPr) != null) + pPr.Element(W.rPr).ReplaceWith(rPr); + else + pPr.AddFirst(rPr); + } + else if (status == "Inserted") + { + XElement rPr = pPr.Element(W.rPr); + if (rPr == null) + rPr = new XElement(W.rPr); + rPr.Add(new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + if (pPr.Element(W.rPr) != null) + pPr.Element(W.rPr).ReplaceWith(rPr); + else + pPr.AddFirst(rPr); + } + else + { + throw new OpenXmlPowerToolsException("Internal error"); + } + + return pPr; + } + + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + } + + return node; + } + + private static void CoalesceAdjacentRunsWithIdenticalFormatting(XDocument xDoc) + { + IEnumerable paras = xDoc.Root.DescendantsTrimmed(W.txbxContent).Where(d => d.Name == W.p); + foreach (XElement para in paras) + { + XElement newPara = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(para); + para.ReplaceNodes(newPara.Nodes()); + } + } + + private static void IgnorePt14Namespace(XElement root) + { + if (root.Attribute(XNamespace.Xmlns + "pt14") == null) + { + root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName)); + } + + var ignorable = (string) root.Attribute(MC.Ignorable); + if (ignorable != null) + { + string[] list = ignorable.Split(' '); + if (!list.Contains("pt14")) + { + ignorable += " pt14"; + root.Attribute(MC.Ignorable).Value = ignorable; + } + } + else + { + root.Add(new XAttribute(MC.Ignorable, "pt14")); + } + } + + private static void ProcessFootnoteEndnote( + WmlComparerSettings settings, + List listOfComparisonUnitAtoms, + MainDocumentPart mainDocumentPartBefore, + MainDocumentPart mainDocumentPartAfter, + XDocument mainDocumentXDoc) + { + FootnotesPart footnotesPartBefore = mainDocumentPartBefore.FootnotesPart; + EndnotesPart endnotesPartBefore = mainDocumentPartBefore.EndnotesPart; + FootnotesPart footnotesPartAfter = mainDocumentPartAfter.FootnotesPart; + EndnotesPart endnotesPartAfter = mainDocumentPartAfter.EndnotesPart; + + XDocument footnotesPartBeforeXDoc = null; + if (footnotesPartBefore != null) + footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument(); + XDocument footnotesPartAfterXDoc = null; + if (footnotesPartAfter != null) + footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument(); + XDocument endnotesPartBeforeXDoc = null; + if (endnotesPartBefore != null) + endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument(); + XDocument endnotesPartAfterXDoc = null; + if (endnotesPartAfter != null) + endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument(); + + List possiblyModifiedFootnotesEndNotes = listOfComparisonUnitAtoms + .Where(cua => + cua.ContentElement.Name == W.footnoteReference || + cua.ContentElement.Name == W.endnoteReference) + .ToList(); + + foreach (ComparisonUnitAtom fn in possiblyModifiedFootnotesEndNotes) + { + string beforeId = null; + if (fn.ContentElementBefore != null) + beforeId = (string) fn.ContentElementBefore.Attribute(W.id); + var afterId = (string) fn.ContentElement.Attribute(W.id); + + XElement footnoteEndnoteBefore = null; + XElement footnoteEndnoteAfter = null; + OpenXmlPart partToUseBefore = null; + OpenXmlPart partToUseAfter = null; + XDocument partToUseBeforeXDoc = null; + XDocument partToUseAfterXDoc = null; + + if (fn.CorrelationStatus == CorrelationStatus.Equal) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteBefore = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == beforeId); + footnoteEndnoteAfter = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseBefore = footnotesPartBefore; + partToUseAfter = footnotesPartAfter; + partToUseBeforeXDoc = footnotesPartBeforeXDoc; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteBefore = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == beforeId); + footnoteEndnoteAfter = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseBefore = endnotesPartBefore; + partToUseAfter = endnotesPartAfter; + partToUseBeforeXDoc = endnotesPartBeforeXDoc; + partToUseAfterXDoc = endnotesPartAfterXDoc; + } + + AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings); + AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings); + + ComparisonUnitAtom[] fncal1 = CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings); + ComparisonUnit[] fncus1 = GetComparisonUnitList(fncal1, settings); + + ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings); + ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings); + + if (!(fncus1.Length == 0 && fncus2.Length == 0)) + { + List fnCorrelatedSequence = Lcs(fncus1, fncus2, settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in fnCorrelatedSequence) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore + // when generating the document, the appropriate row will be marked as deleted or inserted. + MarkRowsAsDeletedOrInserted(settings, fnCorrelatedSequence); + + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal + List fnListOfComparisonUnitAtoms = + FlattenToComparisonUnitAtomList(fnCorrelatedSequence, settings); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in fnListOfComparisonUnitAtoms) + sb.Append(item + Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // hack = set the guid ID of the table, row, or cell from the 'before' document to be equal to the 'after' document. + + // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply + // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted. + + // the table id will be hacked in the normal course of events. + // in the case where a row is deleted, not necessary to hack - the deleted row ID will do. + // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well. + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + object newFootnoteEndnoteChildren = + ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == + "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + XElement firstRun = firstPara.Element(W.r); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + + var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + XElement newContentElement = newTempElement.Descendants() + .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + + footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes()); + } + } + else if (fn.CorrelationStatus == CorrelationStatus.Inserted) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteAfter = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseAfter = footnotesPartAfter; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteAfter = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseAfter = endnotesPartAfter; + partToUseAfterXDoc = endnotesPartAfterXDoc; + } + + AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings); + + ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings); + ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings); + + var insertedCorrSequ = new List + { + new CorrelatedSequence + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = fncus2, + CorrelationStatus = CorrelationStatus.Inserted + } + }; + + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in insertedCorrSequ) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + MarkRowsAsDeletedOrInserted(settings, insertedCorrSequ); + + List fnListOfComparisonUnitAtoms = + FlattenToComparisonUnitAtomList(insertedCorrSequ, settings); + + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + object newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, + fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == + "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + XElement firstRun = firstPara.Descendants(W.r).FirstOrDefault(); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + + var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + XElement newContentElement = newTempElement + .Descendants() + .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + + footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes()); + } + else if (fn.CorrelationStatus == CorrelationStatus.Deleted) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteBefore = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseAfter = footnotesPartAfter; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteBefore = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string) fnn.Attribute(W.id) == afterId); + partToUseBefore = endnotesPartBefore; + partToUseBeforeXDoc = endnotesPartBeforeXDoc; + } + + AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings); + + ComparisonUnitAtom[] fncal2 = CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings); + ComparisonUnit[] fncus2 = GetComparisonUnitList(fncal2, settings); + + var deletedCorrSequ = new List + { + new CorrelatedSequence + { + ComparisonUnitArray1 = fncus2, + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted + } + }; + + if (False) + { + var sb = new StringBuilder(); + foreach (CorrelatedSequence item in deletedCorrSequ) + sb.Append(item).Append(Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + MarkRowsAsDeletedOrInserted(settings, deletedCorrSequ); + + List fnListOfComparisonUnitAtoms = + FlattenToComparisonUnitAtomList(deletedCorrSequ, settings); + + if (fnListOfComparisonUnitAtoms.Any()) + { + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + object newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseBefore, + fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + bool hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string) r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == + "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + XElement firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + XElement firstRun = firstPara.Descendants(W.r).FirstOrDefault(); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + + var newTempElement = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + XElement newContentElement = newTempElement.Descendants() + .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + + footnoteEndnoteBefore.ReplaceNodes(newContentElement.Nodes()); + } + } + else + { + throw new OpenXmlPowerToolsException("Internal error"); + } + } + } + + private static void RectifyFootnoteEndnoteIds( + MainDocumentPart mainDocumentPartBefore, + MainDocumentPart mainDocumentPartAfter, + MainDocumentPart mainDocumentPartWithRevisions, + XDocument mainDocumentXDoc, + WmlComparerSettings settings) + { + FootnotesPart footnotesPartBefore = mainDocumentPartBefore.FootnotesPart; + EndnotesPart endnotesPartBefore = mainDocumentPartBefore.EndnotesPart; + FootnotesPart footnotesPartAfter = mainDocumentPartAfter.FootnotesPart; + EndnotesPart endnotesPartAfter = mainDocumentPartAfter.EndnotesPart; + FootnotesPart footnotesPartWithRevisions = mainDocumentPartWithRevisions.FootnotesPart; + EndnotesPart endnotesPartWithRevisions = mainDocumentPartWithRevisions.EndnotesPart; + + XDocument footnotesPartBeforeXDoc = null; + if (footnotesPartBefore != null) + footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument(); + + XDocument footnotesPartAfterXDoc = null; + if (footnotesPartAfter != null) + footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument(); + + XDocument footnotesPartWithRevisionsXDoc = null; + if (footnotesPartWithRevisions != null) + { + footnotesPartWithRevisionsXDoc = footnotesPartWithRevisions.GetXDocument(); + footnotesPartWithRevisionsXDoc + .Root + .Elements(W.footnote) + .Where(e => (string) e.Attribute(W.id) != "-1" && (string) e.Attribute(W.id) != "0") + .Remove(); + } + + XDocument endnotesPartBeforeXDoc = null; + if (endnotesPartBefore != null) + endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument(); + + XDocument endnotesPartAfterXDoc = null; + if (endnotesPartAfter != null) + endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument(); + + XDocument endnotesPartWithRevisionsXDoc = null; + if (endnotesPartWithRevisions != null) + { + endnotesPartWithRevisionsXDoc = endnotesPartWithRevisions.GetXDocument(); + endnotesPartWithRevisionsXDoc + .Root + .Elements(W.endnote) + .Where(e => (string) e.Attribute(W.id) != "-1" && (string) e.Attribute(W.id) != "0") + .Remove(); + } + + var footnotesRefs = mainDocumentXDoc + .Descendants(W.footnoteReference) + .Select((fn, idx) => + { + return new + { + FootNote = fn, + Idx = idx + }; + }); + + foreach (var fn in footnotesRefs) + { + var oldId = (string) fn.FootNote.Attribute(W.id); + string newId = (fn.Idx + 1).ToString(); + fn.FootNote.Attribute(W.id).Value = newId; + XElement footnote = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + if (footnote == null) + { + footnote = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + } + + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal error"); + + var cloned = new XElement(footnote); + cloned.Attribute(W.id).Value = newId; + footnotesPartWithRevisionsXDoc + .Root + .Add(cloned); + } + + var endnotesRefs = mainDocumentXDoc + .Descendants(W.endnoteReference) + .Select((fn, idx) => + { + return new + { + Endnote = fn, + Idx = idx + }; + }); + + foreach (var fn in endnotesRefs) + { + var oldId = (string) fn.Endnote.Attribute(W.id); + string newId = (fn.Idx + 1).ToString(); + fn.Endnote.Attribute(W.id).Value = newId; + XElement endnote = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + if (endnote == null) + { + endnote = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(e => (string) e.Attribute(W.id) == oldId); + } + + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal error"); + + var cloned = new XElement(endnote); + cloned.Attribute(W.id).Value = newId; + endnotesPartWithRevisionsXDoc + .Root + .Add(cloned); + } + + if (footnotesPartWithRevisionsXDoc != null) + { + MarkContentAsDeletedOrInserted(footnotesPartWithRevisionsXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(footnotesPartWithRevisionsXDoc); + var newXDocRoot = + (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(footnotesPartWithRevisionsXDoc.Root); + footnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot); + IgnorePt14Namespace(footnotesPartWithRevisionsXDoc.Root); + footnotesPartWithRevisions.PutXDocument(); + } + + if (endnotesPartWithRevisionsXDoc != null) + { + MarkContentAsDeletedOrInserted(endnotesPartWithRevisionsXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(endnotesPartWithRevisionsXDoc); + var newXDocRoot = (XElement) WordprocessingMLUtil.WmlOrderElementsPerStandard(endnotesPartWithRevisionsXDoc.Root); + endnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot); + IgnorePt14Namespace(endnotesPartWithRevisionsXDoc.Root); + endnotesPartWithRevisions.PutXDocument(); + } + } + + private static void ConjoinDeletedInsertedParagraphMarks(MainDocumentPart mainDocumentPart, XDocument newXDoc) + { + ConjoinMultipleParagraphMarks(newXDoc); + if (mainDocumentPart.FootnotesPart != null) + { + XDocument fnXDoc = mainDocumentPart.FootnotesPart.GetXDocument(); + ConjoinMultipleParagraphMarks(fnXDoc); + mainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (mainDocumentPart.EndnotesPart != null) + { + XDocument fnXDoc = mainDocumentPart.EndnotesPart.GetXDocument(); + ConjoinMultipleParagraphMarks(fnXDoc); + mainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + // it is possible, per the algorithm, for the algorithm to find that the paragraph mark for a single paragraph has been + // inserted and deleted. If the algorithm sets them to equal, then sometimes it will equate paragraph marks that should + // not be equated. + + private static void ConjoinMultipleParagraphMarks(XDocument xDoc) + { + object newRoot = ConjoinTransform(xDoc.Root); + xDoc.Root?.ReplaceWith(newRoot); + } + + private static object ConjoinTransform(XNode node) + { + if (node is XElement element) + { + if (element.Name == W.p && element.Elements(W.pPr).Count() >= 2) + { + var pPr = new XElement(element.Elements(W.pPr).First()); + pPr.Elements(W.rPr).Elements().Where(r => r.Name == W.ins || r.Name == W.del).Remove(); + pPr.Attributes(PtOpenXml.Status).Remove(); + var newPara = new XElement(W.p, + element.Attributes(), + pPr, + element.Elements().Where(c => c.Name != W.pPr)); + return newPara; + } + + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(ConjoinTransform)); + } + + return node; + } + + private static void FixUpRevisionIds(WordprocessingDocument wDocWithRevisions, XDocument newXDoc) + { + IEnumerable footnoteRevisions = Enumerable.Empty(); + if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null) + { + XDocument fnxd = wDocWithRevisions.MainDocumentPart.FootnotesPart.GetXDocument(); + footnoteRevisions = fnxd + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + } + + IEnumerable endnoteRevisions = Enumerable.Empty(); + if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null) + { + XDocument fnxd = wDocWithRevisions.MainDocumentPart.EndnotesPart.GetXDocument(); + endnoteRevisions = fnxd + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + } + + IEnumerable mainRevisions = newXDoc + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + var allRevisions = mainRevisions + .Concat(footnoteRevisions) + .Concat(endnoteRevisions) + .Select((r, i) => + { + return new + { + Rev = r, + Idx = i + 1 + }; + }); + foreach (var item in allRevisions) + item.Rev.Attribute(W.id).Value = item.Idx.ToString(); + if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null) + wDocWithRevisions.MainDocumentPart.FootnotesPart.PutXDocument(); + if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null) + wDocWithRevisions.MainDocumentPart.EndnotesPart.PutXDocument(); + } + + private static void MoveLastSectPrToChildOfBody(XDocument newXDoc) + { + XElement lastParaWithSectPr = newXDoc + .Root + .Elements(W.body) + .Elements(W.p) + .Where(p => p.Elements(W.pPr).Elements(W.sectPr).Any()) + .LastOrDefault(); + if (lastParaWithSectPr != null) + { + newXDoc.Root.Element(W.body).Add(lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr)); + lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr).Remove(); + } + } + + private static void FixUpFootnotesEndnotesWithCustomMarkers(WordprocessingDocument wDocWithRevisions) + { +#if FALSE + +// this needs to change + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + // to this + + + + + + + + + + + + 1 + + +#endif + + // this is pretty random - a bug in Word prevents display of a document if the delText element does not immediately follow the footnoteReference element, in the same run. + XDocument mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + var newRoot = (XElement) FootnoteEndnoteReferenceCleanupTransform(mainXDoc.Root); + mainXDoc.Root?.ReplaceWith(newRoot); + wDocWithRevisions.MainDocumentPart.PutXDocument(); + } + + private static object FootnoteEndnoteReferenceCleanupTransform(XNode node) + { + var element = node as XElement; + if (element != null) + { + // small optimization to eliminate the work for most elements + if (element.Element(W.del) != null || element.Element(W.ins) != null) + { + bool hasFootnoteEndnoteReferencesThatNeedCleanedUp = element + .Elements() + .Where(e => e.Name == W.del || e.Name == W.ins) + .Elements(W.r) + .Elements() + .Where(e => e.Name == W.footnoteReference || e.Name == W.endnoteReference) + .Attributes(W.customMarkFollows) + .Any(); + + if (hasFootnoteEndnoteReferencesThatNeedCleanedUp) + { + var clone = new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n))); + IEnumerable footnoteEndnoteReferencesToAdjust = clone + .Descendants() + .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference) + .Where(d => d.Attribute(W.customMarkFollows) != null); + foreach (XElement fnenr in footnoteEndnoteReferencesToAdjust) + { + XElement par = fnenr.Parent; + XElement gp = fnenr.Parent.Parent; + if (par.Name == W.r && + gp.Name == W.del) + { + if (par.Element(W.delText) != null) + continue; + + XElement afterGp = gp.ElementsAfterSelf().FirstOrDefault(); + if (afterGp == null) + continue; + + IEnumerable afterGpDelText = afterGp.Elements(W.r).Elements(W.delText); + if (afterGpDelText.Any()) + { + par.Add(afterGpDelText); // this will clone and add to run that contains the reference + afterGpDelText.Remove(); // this leaves an empty run, does not matter. + } + } + + if (par.Name == W.r && + gp.Name == W.ins) + { + if (par.Element(W.t) != null) + continue; + + XElement afterGp = gp.ElementsAfterSelf().FirstOrDefault(); + if (afterGp == null) + continue; + + IEnumerable afterGpText = afterGp.Elements(W.r).Elements(W.t); + if (afterGpText.Any()) + { + par.Add(afterGpText); // this will clone and add to run that contains the reference + afterGpText.Remove(); // this leaves an empty run, does not matter. + } + } + } + + return clone; + } + } + else + { + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n))); + } + } + + return node; + } + + private static void FixUpRevMarkIds(WordprocessingDocument wDoc) + { + IEnumerable revMarksToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == W.ins || d.Name == W.del)) + .SelectMany(m => m); + var nextId = 0; + foreach (XElement item in revMarksToChange) + { + XAttribute idAtt = item.Attribute(W.id); + if (idAtt != null) + idAtt.Value = nextId++.ToString(); + } + + foreach (OpenXmlPart cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpDocPrIds(WordprocessingDocument wDoc) + { + XName elementToFind = WP.docPr; + IEnumerable docPrToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (XElement item in docPrToChange) + { + XAttribute idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = nextId++.ToString(); + } + + foreach (OpenXmlPart cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpShapeIds(WordprocessingDocument wDoc) + { + XName elementToFind = VML.shape; + IEnumerable shapeIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (XElement item in shapeIdsToChange) + { + int thisId = nextId++; + + XAttribute idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + + XElement oleObject = item.Parent.Element(O.OLEObject); + if (oleObject != null) + { + XAttribute shapeIdAtt = oleObject.Attribute("ShapeID"); + if (shapeIdAtt != null) + shapeIdAtt.Value = thisId.ToString(); + } + } + + foreach (OpenXmlPart cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpShapeTypeIds(WordprocessingDocument wDoc) + { + XName elementToFind = VML.shapetype; + IEnumerable shapeTypeIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (XElement item in shapeTypeIdsToChange) + { + int thisId = nextId++; + + XAttribute idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + + XElement shape = item.Parent.Element(VML.shape); + if (shape != null) + { + XAttribute typeAtt = shape.Attribute("type"); + if (typeAtt != null) + typeAtt.Value = thisId.ToString(); + } + } + + foreach (OpenXmlPart cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void AddFootnotesEndnotesStyles(WordprocessingDocument wDocWithRevisions) + { + XDocument mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + bool hasFootnotes = mainXDoc.Descendants(W.footnoteReference).Any(); + bool hasEndnotes = mainXDoc.Descendants(W.endnoteReference).Any(); + StyleDefinitionsPart styleDefinitionsPart = wDocWithRevisions.MainDocumentPart.StyleDefinitionsPart; + XDocument sXDoc = styleDefinitionsPart.GetXDocument(); + if (hasFootnotes) + { + XElement footnoteTextStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteText"); + if (footnoteTextStyle == null) + { + var footnoteTextStyleMarkup = + @" + + + + + + + + + + + + + + "; + XElement ftsElement = XElement.Parse(footnoteTextStyleMarkup); + sXDoc.Root.Add(ftsElement); + } + + XElement footnoteTextCharStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteTextChar"); + if (footnoteTextCharStyle == null) + { + var footnoteTextCharStyleMarkup = + @" + + + + + + + + + + "; + XElement fntcsElement = XElement.Parse(footnoteTextCharStyleMarkup); + sXDoc.Root.Add(fntcsElement); + } + + XElement footnoteReferenceStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "FootnoteReference"); + if (footnoteReferenceStyle == null) + { + var footnoteReferenceStyleMarkup = + @" + + + + + + + + + "; + XElement fnrsElement = XElement.Parse(footnoteReferenceStyleMarkup); + sXDoc.Root.Add(fnrsElement); + } + } + + if (hasEndnotes) + { + XElement endnoteTextStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteText"); + if (endnoteTextStyle == null) + { + var endnoteTextStyleMarkup = + @" + + + + + + + + + + + + + + "; + XElement etsElement = XElement.Parse(endnoteTextStyleMarkup); + sXDoc.Root.Add(etsElement); + } + + XElement endnoteTextCharStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteTextChar"); + if (endnoteTextCharStyle == null) + { + var endnoteTextCharStyleMarkup = + @" + + + + + + + + + + "; + XElement entcsElement = XElement.Parse(endnoteTextCharStyleMarkup); + sXDoc.Root.Add(entcsElement); + } + + XElement endnoteReferenceStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "EndnoteReference"); + if (endnoteReferenceStyle == null) + { + var endnoteReferenceStyleMarkup = + @" + + + + + + + + + "; + XElement enrsElement = XElement.Parse(endnoteReferenceStyleMarkup); + sXDoc.Root.Add(enrsElement); + } + } + + if (hasFootnotes || hasEndnotes) + { + styleDefinitionsPart.PutXDocument(); + } + } + + private static void CopyMissingStylesFromOneDocToAnother(WordprocessingDocument wDocFrom, WordprocessingDocument wDocTo) + { + XDocument revisionsStylesXDoc = wDocTo.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); + XDocument afterStylesXDoc = wDocFrom.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); + foreach (XElement style in afterStylesXDoc.Root.Elements(W.style)) + { + var type = (string) style.Attribute(W.type); + var styleId = (string) style.Attribute(W.styleId); + XElement styleInRevDoc = revisionsStylesXDoc + .Root + .Elements(W.style) + .FirstOrDefault(st => (string) st.Attribute(W.type) == type && + (string) st.Attribute(W.styleId) == styleId); + if (styleInRevDoc != null) + continue; + + var cloned = new XElement(style); + if (cloned.Attribute(W._default) != null) + cloned.Attribute(W._default).Remove(); + revisionsStylesXDoc.Root.Add(cloned); + } + + wDocTo.MainDocumentPart.StyleDefinitionsPart.PutXDocument(); + } + + private static void DeleteFootnotePropertiesInSettings(WordprocessingDocument wDocWithRevisions) + { + DocumentSettingsPart settingsPart = wDocWithRevisions.MainDocumentPart.DocumentSettingsPart; + if (settingsPart != null) + { + XDocument sxDoc = settingsPart.GetXDocument(); + sxDoc.Root?.Elements().Where(e => e.Name == W.footnotePr || e.Name == W.endnotePr).Remove(); + settingsPart.PutXDocument(); + } + } + + private static object CloneForStructureHash(XNode node) + { + if (node is XElement element) + { + return new XElement(element.Name, + element.Attributes(), + element.Elements().Select(CloneForStructureHash)); + } + + return null; + } + + private static List FindCommonAtBeginningAndEnd( + CorrelatedSequence unknown, + WmlComparerSettings settings) + { + int lengthToCompare = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length); + + int countCommonAtBeginning = unknown + .ComparisonUnitArray1 + .Take(lengthToCompare) + .Zip(unknown.ComparisonUnitArray2, + (pu1, pu2) => new + { + Pu1 = pu1, + Pu2 = pu2 + }) + .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash) + .Count(); + + if (countCommonAtBeginning != 0 && countCommonAtBeginning / (double) lengthToCompare < settings.DetailThreshold) + countCommonAtBeginning = 0; + + if (countCommonAtBeginning != 0) + { + var newSequence = new List(); + + var csEqual = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Equal, + ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Take(countCommonAtBeginning) + .ToArray(), + ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Take(countCommonAtBeginning) + .ToArray() + }; + newSequence.Add(csEqual); + + int remainingLeft = unknown.ComparisonUnitArray1.Length - countCommonAtBeginning; + int remainingRight = unknown.ComparisonUnitArray2.Length - countCommonAtBeginning; + + if (remainingLeft != 0 && remainingRight == 0) + { + var csDeleted = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(), + ComparisonUnitArray2 = null + }; + newSequence.Add(csDeleted); + } + else if (remainingLeft == 0 && remainingRight != 0) + { + var csInserted = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray() + }; + newSequence.Add(csInserted); + } + else if (remainingLeft != 0 && remainingRight != 0) + { + if (unknown.ComparisonUnitArray1[0] is ComparisonUnitWord first1 && + unknown.ComparisonUnitArray2[0] is ComparisonUnitWord first2) + { + // if operating at the word level and + // if the last word on the left != pPr && last word on right != pPr + // then create an unknown for the rest of the paragraph, and create an unknown for the rest of the unknown + // if the last word on the left != pPr and last word on right == pPr + // then create deleted for the left, and create an unknown for the rest of the unknown + // if the last word on the left == pPr and last word on right != pPr + // then create inserted for the right, and create an unknown for the rest of the unknown + // if the last word on the left == pPr and last word on right == pPr + // then create an unknown for the rest of the unknown + + ComparisonUnit[] remainingInLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .ToArray(); + + ComparisonUnit[] remainingInRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .ToArray(); + + ComparisonUnitAtom lastContentAtomLeft = unknown.ComparisonUnitArray1[countCommonAtBeginning - 1] + .DescendantContentAtoms() + .FirstOrDefault(); + + ComparisonUnitAtom lastContentAtomRight = unknown.ComparisonUnitArray2[countCommonAtBeginning - 1] + .DescendantContentAtoms() + .FirstOrDefault(); + + if (lastContentAtomLeft?.ContentElement.Name != W.pPr && lastContentAtomRight?.ContentElement.Name != W.pPr) + { + List split1 = SplitAtParagraphMark(remainingInLeft); + List split2 = SplitAtParagraphMark(remainingInRight); + if (split1.Count() == 1 && split2.Count() == 1) + { + var csUnknown2 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = split1.First(), + ComparisonUnitArray2 = split2.First() + }; + newSequence.Add(csUnknown2); + return newSequence; + } + + if (split1.Count == 2 && split2.Count == 2) + { + var csUnknown2 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = split1.First(), + ComparisonUnitArray2 = split2.First() + }; + newSequence.Add(csUnknown2); + + var csUnknown3 = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = split1.Skip(1).First(), + ComparisonUnitArray2 = split2.Skip(1).First() + }; + newSequence.Add(csUnknown3); + + return newSequence; + } + } + } + + var csUnknown = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(), + ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray() + }; + newSequence.Add(csUnknown); + } + else if (remainingLeft == 0 && remainingRight == 0) + { + // nothing to do + } + + return newSequence; + } + + // if we get to here, then countCommonAtBeginning == 0 + + int countCommonAtEnd = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(lengthToCompare) + .Zip(unknown + .ComparisonUnitArray2 + .Reverse() + .Take(lengthToCompare), + (pu1, pu2) => new + { + Pu1 = pu1, + Pu2 = pu2 + }) + .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash) + .Count(); + + // never start a common section with a paragraph mark. However, it is OK to set two paragraph marks as equal. + while (true) + { + if (countCommonAtEnd <= 1) + break; + + ComparisonUnit firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + if (!(firstCommon is ComparisonUnitWord firstCommonWord)) + break; + + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() != 1) + break; + + if (!(firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom)) + break; + + if (firstCommonAtom.ContentElement.Name != W.pPr) + break; + + countCommonAtEnd--; + } + + var isOnlyParagraphMark = false; + if (countCommonAtEnd == 1) + { + ComparisonUnit firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + if (firstCommon is ComparisonUnitWord firstCommonWord) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count == 1) + { + if (firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom) + { + if (firstCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + + if (countCommonAtEnd == 2) + { + ComparisonUnit firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + ComparisonUnit secondCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .FirstOrDefault(); + + if (firstCommon is ComparisonUnitWord firstCommonWord && secondCommon is ComparisonUnitWord secondCommonWord) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count == 1 && secondCommonWord.Contents.Count == 1) + { + if (firstCommonWord.Contents.First() is ComparisonUnitAtom firstCommonAtom && + secondCommonWord.Contents.First() is ComparisonUnitAtom secondCommonAtom) + { + if (secondCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + + if (!isOnlyParagraphMark && countCommonAtEnd != 0 && + countCommonAtEnd / (double) lengthToCompare < settings.DetailThreshold) + { + countCommonAtEnd = 0; + } + + // If the following test is not there, the test below sets the end paragraph mark of the entire document equal to the end paragraph + // mark of the first paragraph in the other document, causing lines to be out of order. + // [InlineData("WC010-Para-Before-Table-Unmodified.docx", "WC010-Para-Before-Table-Mod.docx", 3)] + if (isOnlyParagraphMark) + { + countCommonAtEnd = 0; + } + + if (countCommonAtEnd == 0) + { + return null; + } + + // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version) + // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those comparison units to go into the middle unknown comparison unit. + + if (countCommonAtEnd != 0) + { + var remainingInLeftParagraph = 0; + var remainingInRightParagraph = 0; + + List commonEndSeq = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .Reverse() + .ToList(); + + ComparisonUnit firstOfCommonEndSeq = commonEndSeq.First(); + if (firstOfCommonEndSeq is ComparisonUnitWord) + { + // are there any paragraph marks in the common seq at end? + //if (commonEndSeq.Any(cu => cu.Contents.OfType().First().ContentElement.Name == W.pPr)) + if (commonEndSeq.Any(cu => + { + ComparisonUnitAtom firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return false; + + return firstComparisonUnitAtom.ContentElement.Name == W.pPr; + })) + { + remainingInLeftParagraph = unknown + .ComparisonUnitArray1 + .Reverse() + .Skip(countCommonAtEnd) + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + + ComparisonUnitAtom firstComparisonUnitAtom = + cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + remainingInRightParagraph = unknown + .ComparisonUnitArray2 + .Reverse() + .Skip(countCommonAtEnd) + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + + ComparisonUnitAtom firstComparisonUnitAtom = + cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + } + } + + var newSequence = new List(); + + int beforeCommonParagraphLeft = unknown.ComparisonUnitArray1.Length - remainingInLeftParagraph - countCommonAtEnd; + int beforeCommonParagraphRight = + unknown.ComparisonUnitArray2.Length - remainingInRightParagraph - countCommonAtEnd; + + if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight == 0) + { + var csDeleted = new CorrelatedSequence(); + csDeleted.CorrelationStatus = CorrelationStatus.Deleted; + csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray(); + csDeleted.ComparisonUnitArray2 = null; + newSequence.Add(csDeleted); + } + else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight != 0) + { + var csInserted = new CorrelatedSequence(); + csInserted.CorrelationStatus = CorrelationStatus.Inserted; + csInserted.ComparisonUnitArray1 = null; + csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray(); + newSequence.Add(csInserted); + } + else if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight != 0) + { + var csUnknown = new CorrelatedSequence(); + csUnknown.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray(); + csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray(); + newSequence.Add(csUnknown); + } + else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight == 0) + { + // nothing to do + } + + if (remainingInLeftParagraph != 0 && remainingInRightParagraph == 0) + { + var csDeleted = new CorrelatedSequence(); + csDeleted.CorrelationStatus = CorrelationStatus.Deleted; + csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft) + .Take(remainingInLeftParagraph).ToArray(); + csDeleted.ComparisonUnitArray2 = null; + newSequence.Add(csDeleted); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph != 0) + { + var csInserted = new CorrelatedSequence(); + csInserted.CorrelationStatus = CorrelationStatus.Inserted; + csInserted.ComparisonUnitArray1 = null; + csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight) + .Take(remainingInRightParagraph).ToArray(); + newSequence.Add(csInserted); + } + else if (remainingInLeftParagraph != 0 && remainingInRightParagraph != 0) + { + var csUnknown = new CorrelatedSequence(); + csUnknown.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft) + .Take(remainingInLeftParagraph).ToArray(); + csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight) + .Take(remainingInRightParagraph).ToArray(); + newSequence.Add(csUnknown); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0) + { + // nothing to do + } + + var csEqual = new CorrelatedSequence(); + csEqual.CorrelationStatus = CorrelationStatus.Equal; + csEqual.ComparisonUnitArray1 = unknown.ComparisonUnitArray1 + .Skip(unknown.ComparisonUnitArray1.Length - countCommonAtEnd).ToArray(); + csEqual.ComparisonUnitArray2 = unknown.ComparisonUnitArray2 + .Skip(unknown.ComparisonUnitArray2.Length - countCommonAtEnd).ToArray(); + newSequence.Add(csEqual); + + return newSequence; + } + + return null; +#if false + var middleLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .SkipLast(remainingInLeftParagraph) + .SkipLast(countCommonAtEnd) + .ToArray(); + + var middleRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .SkipLast(remainingInRightParagraph) + .SkipLast(countCommonAtEnd) + .ToArray(); + + if (middleLeft.Length > 0 && middleRight.Length == 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Deleted; + cs.ComparisonUnitArray1 = middleLeft; + cs.ComparisonUnitArray2 = null; + newSequence.Add(cs); + } + else if (middleLeft.Length == 0 && middleRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Inserted; + cs.ComparisonUnitArray1 = null; + cs.ComparisonUnitArray2 = middleRight; + newSequence.Add(cs); + } + else if (middleLeft.Length > 0 && middleRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Unknown; + cs.ComparisonUnitArray1 = middleLeft; + cs.ComparisonUnitArray2 = middleRight; + newSequence.Add(cs); + } + + var remainingInParaLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .Skip(middleLeft.Length) + .Take(remainingInLeftParagraph) + .ToArray(); + + var remainingInParaRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .Skip(middleRight.Length) + .Take(remainingInRightParagraph) + .ToArray(); + + if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length == 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Deleted; + cs.ComparisonUnitArray1 = remainingInParaLeft; + cs.ComparisonUnitArray2 = null; + newSequence.Add(cs); + } + else if (remainingInParaLeft.Length == 0 && remainingInParaRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Inserted; + cs.ComparisonUnitArray1 = null; + cs.ComparisonUnitArray2 = remainingInParaRight; + newSequence.Add(cs); + } + else if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Unknown; + cs.ComparisonUnitArray1 = remainingInParaLeft; + cs.ComparisonUnitArray2 = remainingInParaRight; + newSequence.Add(cs); + } + + if (countCommonAtEnd != 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Equal; + + cs.ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning + middleLeft.Length + remainingInParaLeft.Length) + .ToArray(); + + cs.ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning + middleRight.Length + remainingInParaRight.Length) + .ToArray(); + + if (cs.ComparisonUnitArray1.Length != cs.ComparisonUnitArray2.Length) + throw new OpenXmlPowerToolsException("Internal error"); + + newSequence.Add(cs); + } + return newSequence; +#endif + } + + private static List SplitAtParagraphMark(ComparisonUnit[] cua) + { + int i; + for (i = 0; i < cua.Length; i++) + { + ComparisonUnitAtom atom = cua[i].DescendantContentAtoms().FirstOrDefault(); + if (atom != null && atom.ContentElement.Name == W.pPr) + break; + } + + if (i == cua.Length) + { + return new List + { + cua + }; + } + + return new List + { + cua.Take(i).ToArray(), + cua.Skip(i).ToArray() + }; + } + + private static object CoalesceRecurse( + OpenXmlPart part, + IEnumerable list, + int level, + WmlComparerSettings settings) + { + IEnumerable> grouped = list.GroupBy(ca => + { + if (level >= ca.AncestorElements.Length) + return ""; + + return ca.AncestorUnids[level]; + }) + .Where(g => g.Key != ""); + + // if there are no deeper children, then we're done. + if (!grouped.Any()) + return null; + + if (False) + { + var sb = new StringBuilder(); + foreach (IGrouping group in grouped) + { + sb.AppendFormat("Group Key: {0}", @group.Key); + sb.Append(Environment.NewLine); + foreach (ComparisonUnitAtom groupChildItem in @group) + { + sb.Append(" "); + sb.Append(groupChildItem.ToString(0)); + sb.Append(Environment.NewLine); + } + + sb.Append(Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List elementList = grouped + .Select(g => + { + XElement ancestorBeingConstructed = + g.First().AncestorElements[level]; // these will all be the same, by definition + + // need to group by corr stat + List> groupedChildren = g + .GroupAdjacent(gc => + { + var key = ""; + if (level < gc.AncestorElements.Length - 1) + { + key = gc.AncestorUnids[level + 1]; + } + + if (gc.AncestorElements.Skip(level).Any(ae => ae.Name == W.txbxContent)) + key += "|" + CorrelationStatus.Equal.ToString(); + else + key += "|" + gc.CorrelationStatus.ToString(); + return key; + }) + .ToList(); + + if (ancestorBeingConstructed.Name == W.p) + { + List newChildElements = groupedChildren + .Select(gc => + { + string[] spl = gc.Key.Split('|'); + if (spl[0] == "") + { + return (object) gc.Select(gcc => + { + var dup = new XElement(gcc.ContentElement); + if (spl[1] == "Deleted") + dup.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + else if (spl[1] == "Inserted") + dup.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + } + + return CoalesceRecurse(part, gc, level + 1, settings); + }) + .ToList(); + + var newPara = new XElement(W.p, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Unid, g.Key), + newChildElements); + + return newPara; + } + + if (ancestorBeingConstructed.Name == W.r) + { + List newChildElements = groupedChildren + .Select(gc => + { + string[] spl = gc.Key.Split('|'); + if (spl[0] == "") + { + return (object) gc.Select(gcc => + { + var dup = new XElement(gcc.ContentElement); + if (spl[1] == "Deleted") + dup.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + else if (spl[1] == "Inserted") + dup.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + } + + return CoalesceRecurse(part, gc, level + 1, settings); + }) + .ToList(); + + XElement rPr = ancestorBeingConstructed.Element(W.rPr); + var newRun = new XElement(W.r, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + rPr, + newChildElements); + return newRun; + } + + if (ancestorBeingConstructed.Name == W.t) + { + List newChildElements = groupedChildren + .Select(gc => + { + string textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate(); + bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + return (object) new XElement(W.delText, + new XAttribute(PtOpenXml.Status, "Deleted"), + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement); + if (ins) + return (object) new XElement(W.t, + new XAttribute(PtOpenXml.Status, "Inserted"), + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement); + + return (object) new XElement(W.t, + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement); + }) + .ToList(); + return newChildElements; + } + + if (ancestorBeingConstructed.Name == W.drawing) + { + List newChildElements = groupedChildren + .Select(gc => + { + bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + if (del) + { + return (object) gc.Select(gcc => + { + var newDrawing = new XElement(gcc.ContentElement); + newDrawing.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + + OpenXmlPart openXmlPartOfDeletedContent = gc.First().Part; + OpenXmlPart openXmlPartInNewDocument = part; + return gc.Select(gce => + { + Package packageOfDeletedContent = openXmlPartOfDeletedContent.OpenXmlPackage.Package; + Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(part.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri); + + return MoveRelatedPartsToDestination( + partInDeletedDocument, + partInNewDocument, + newDrawing); + }); + }); + } + + bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (ins) + { + return gc.Select(gcc => + { + var newDrawing = new XElement(gcc.ContentElement); + newDrawing.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + + OpenXmlPart openXmlPartOfInsertedContent = gc.First().Part; + OpenXmlPart openXmlPartInNewDocument = part; + return gc.Select(gce => + { + Package packageOfSourceContent = openXmlPartOfInsertedContent.OpenXmlPackage.Package; + Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfSourceContent.GetPart(part.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri); + + return MoveRelatedPartsToDestination( + partInDeletedDocument, + partInNewDocument, + newDrawing); + }); + }); + } + + return gc.Select(gcc => gcc.ContentElement); + }) + .ToList(); + + return newChildElements; + } + + if (ancestorBeingConstructed.Name == M.oMath || ancestorBeingConstructed.Name == M.oMathPara) + { + List> newChildElements = groupedChildren + .Select(gc => + { + bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + if (del) + { + return gc.Select(gcc => + new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + gcc.ContentElement)); + } + + bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (ins) + { + return gc.Select(gcc => + new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, _maxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + gcc.ContentElement)); + } + + return gc.Select(gcc => gcc.ContentElement); + }) + .ToList(); + return newChildElements; + } + + if (AllowableRunChildren.Contains(ancestorBeingConstructed.Name)) + { + List> newChildElements = groupedChildren + .Select(gc => + { + bool del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + bool ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + { + return gc.Select(gcc => + { + var dup = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Status, "Deleted")); + return dup; + }); + } + + if (ins) + { + return gc.Select(gcc => + { + var dup = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + } + + return gc.Select(gcc => gcc.ContentElement); + }) + .ToList(); + return newChildElements; + } + + if (ancestorBeingConstructed.Name == W.tbl) + return ReconstructElement(part, g, ancestorBeingConstructed, W.tblPr, W.tblGrid, null, level, settings); + if (ancestorBeingConstructed.Name == W.tr) + return ReconstructElement(part, g, ancestorBeingConstructed, W.trPr, null, null, level, settings); + if (ancestorBeingConstructed.Name == W.tc) + return ReconstructElement(part, g, ancestorBeingConstructed, W.tcPr, null, null, level, settings); + if (ancestorBeingConstructed.Name == W.sdt) + return ReconstructElement(part, g, ancestorBeingConstructed, W.sdtPr, W.sdtEndPr, null, level, settings); + if (ancestorBeingConstructed.Name == W.pict) + return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, null, null, level, settings); + if (ancestorBeingConstructed.Name == VML.shape) + return ReconstructElement(part, g, ancestorBeingConstructed, W10.wrap, null, null, level, settings); + if (ancestorBeingConstructed.Name == W._object) + return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, VML.shape, O.OLEObject, level, + settings); + if (ancestorBeingConstructed.Name == W.ruby) + return ReconstructElement(part, g, ancestorBeingConstructed, W.rubyPr, null, null, level, settings); + + return (object) ReconstructElement(part, g, ancestorBeingConstructed, null, null, null, level, settings); + }) + .ToList(); + return elementList; + } + + private static XElement ReconstructElement( + OpenXmlPart part, + IGrouping g, + XElement ancestorBeingConstructed, + XName props1XName, + XName props2XName, + XName props3XName, + int level, + WmlComparerSettings settings) + { + object newChildElements = CoalesceRecurse(part, g, level + 1, settings); + + object props1 = null; + if (props1XName != null) + props1 = ancestorBeingConstructed.Elements(props1XName); + + object props2 = null; + if (props2XName != null) + props2 = ancestorBeingConstructed.Elements(props2XName); + + object props3 = null; + if (props3XName != null) + props3 = ancestorBeingConstructed.Elements(props3XName); + + var reconstructedElement = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes(), + props1, props2, props3, newChildElements); + + return reconstructedElement; + } + + private static void SetAfterUnids(CorrelatedSequence unknown) + { + if (unknown.ComparisonUnitArray1.Length == 1 && unknown.ComparisonUnitArray2.Length == 1) + { + if (unknown.ComparisonUnitArray1[0] is ComparisonUnitGroup cua1 && + unknown.ComparisonUnitArray2[0] is ComparisonUnitGroup cua2 && + cua1.ComparisonUnitGroupType == cua2.ComparisonUnitGroupType) + { + ComparisonUnitGroupType groupType = cua1.ComparisonUnitGroupType; + IEnumerable da1 = cua1.DescendantContentAtoms(); + IEnumerable da2 = cua2.DescendantContentAtoms(); + XName takeThruName = null; + switch (groupType) + { + case ComparisonUnitGroupType.Paragraph: + takeThruName = W.p; + break; + case ComparisonUnitGroupType.Table: + takeThruName = W.tbl; + break; + case ComparisonUnitGroupType.Row: + takeThruName = W.tr; + break; + case ComparisonUnitGroupType.Cell: + takeThruName = W.tc; + break; + case ComparisonUnitGroupType.Textbox: + takeThruName = W.txbxContent; + break; + } + + if (takeThruName == null) + throw new OpenXmlPowerToolsException("Internal error"); + + var relevantAncestors = new List(); + foreach (XElement ae in da1.First().AncestorElements) + { + if (ae.Name != takeThruName) + { + relevantAncestors.Add(ae); + continue; + } + + relevantAncestors.Add(ae); + break; + } + + string[] unidList = relevantAncestors + .Select(a => + { + var unid = (string) a.Attribute(PtOpenXml.Unid); + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + return unid; + }) + .ToArray(); + + foreach (ComparisonUnitAtom da in da2) + { + IEnumerable ancestorsToSet = da.AncestorElements.Take(unidList.Length); + var zipped = ancestorsToSet.Zip(unidList, (a, u) => + new + { + Ancestor = a, + Unid = u + }); + + foreach (var z in zipped) + { + XAttribute unid = z.Ancestor.Attribute(PtOpenXml.Unid); + + if (z.Ancestor.Name == W.footnotes || z.Ancestor.Name == W.endnotes) + continue; + + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + unid.Value = z.Unid; + } + } + } + } + } + + private static List ProcessCorrelatedHashes(CorrelatedSequence unknown, WmlComparerSettings settings) + { + // never attempt this optimization if there are less than 3 groups + int maxd = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length); + if (maxd < 3) + return null; + + if (unknown.ComparisonUnitArray1.FirstOrDefault() is ComparisonUnitGroup firstInCu1 && + unknown.ComparisonUnitArray2.FirstOrDefault() is ComparisonUnitGroup firstInCu2) + { + if ((firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph || + firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Table || + firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) && + (firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph || + firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Table || + firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)) + { + ComparisonUnitGroupType groupType = firstInCu1.ComparisonUnitGroupType; + + // Next want to do the lcs algorithm on this. + // potentially, we will find all paragraphs are correlated, but they may not be for two reasons- + // - if there were changes that were not tracked + // - if the anomalies in the change tracking cause there to be a mismatch in the number of paragraphs + // therefore we are going to do the whole LCS algorithm thing and at the end of the process, we set + // up the correlated sequence list where correlated paragraphs are together in their own unknown + // correlated sequence. + + ComparisonUnit[] cul1 = unknown.ComparisonUnitArray1; + ComparisonUnit[] cul2 = unknown.ComparisonUnitArray2; + var currentLongestCommonSequenceLength = 0; + var currentLongestCommonSequenceAtomCount = 0; + int currentI1 = -1; + int currentI2 = -1; + for (var i1 = 0; i1 < cul1.Length; i1++) + { + for (var i2 = 0; i2 < cul2.Length; i2++) + { + var thisSequenceLength = 0; + var thisSequenceAtomCount = 0; + int thisI1 = i1; + int thisI2 = i2; + while (true) + { + bool match = cul1[thisI1] is ComparisonUnitGroup group1 && + cul2[thisI2] is ComparisonUnitGroup group2 && + group1.ComparisonUnitGroupType == group2.ComparisonUnitGroupType && + group1.CorrelatedSHA1Hash != null && + group2.CorrelatedSHA1Hash != null && + group1.CorrelatedSHA1Hash == group2.CorrelatedSHA1Hash; + + if (match) + { + thisSequenceAtomCount += cul1[thisI1].DescendantContentAtomsCount; + thisI1++; + thisI2++; + thisSequenceLength++; + if (thisI1 == cul1.Length || thisI2 == cul2.Length) + { + if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentLongestCommonSequenceAtomCount = thisSequenceAtomCount; + currentI1 = i1; + currentI2 = i2; + } + + break; + } + } + else + { + if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentLongestCommonSequenceAtomCount = thisSequenceAtomCount; + currentI1 = i1; + currentI2 = i2; + } + + break; + } + } + } + } + + // here we want to have some sort of threshold, and if the currentLongestCommonSequenceLength is not + // longer than the threshold, then don't do anything + var doCorrelation = false; + if (currentLongestCommonSequenceLength == 1) + { + int numberOfAtoms1 = unknown.ComparisonUnitArray1[currentI1].DescendantContentAtoms().Count(); + int numberOfAtoms2 = unknown.ComparisonUnitArray2[currentI2].DescendantContentAtoms().Count(); + if (numberOfAtoms1 > 16 && numberOfAtoms2 > 16) + { + doCorrelation = true; + } + } + else if (currentLongestCommonSequenceLength > 1 && currentLongestCommonSequenceLength <= 3) + { + int numberOfAtoms1 = unknown + .ComparisonUnitArray1 + .Skip(currentI1) + .Take(currentLongestCommonSequenceLength) + .Select(z => z.DescendantContentAtoms().Count()) + .Sum(); + + int numberOfAtoms2 = unknown + .ComparisonUnitArray2 + .Skip(currentI2) + .Take(currentLongestCommonSequenceLength) + .Select(z => z.DescendantContentAtoms().Count()) + .Sum(); + + if (numberOfAtoms1 > 32 && numberOfAtoms2 > 32) + { + doCorrelation = true; + } + } + else if (currentLongestCommonSequenceLength > 3) + { + doCorrelation = true; + } + + if (doCorrelation) + { + var newListOfCorrelatedSequence = new List(); + + if (currentI1 > 0 && currentI2 == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = cul1.Take(currentI1).ToArray(), + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (currentI1 == 0 && currentI2 > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = cul2.Take(currentI2).ToArray() + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (currentI1 > 0 && currentI2 > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cul1.Take(currentI1).ToArray(), + ComparisonUnitArray2 = cul2.Take(currentI2).ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (currentI1 == 0 && currentI2 == 0) + { + // nothing to do + } + + for (var i = 0; i < currentLongestCommonSequenceLength; i++) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cul1 + .Skip(currentI1) + .Skip(i) + .Take(1) + .ToArray(), + ComparisonUnitArray2 = cul2 + .Skip(currentI2) + .Skip(i) + .Take(1) + .ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + + int endI1 = currentI1 + currentLongestCommonSequenceLength; + int endI2 = currentI2 + currentLongestCommonSequenceLength; + + if (endI1 < cul1.Length && endI2 == cul2.Length) + { + var deletedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Deleted, + ComparisonUnitArray1 = cul1.Skip(endI1).ToArray(), + ComparisonUnitArray2 = null + }; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (endI1 == cul1.Length && endI2 < cul2.Length) + { + var insertedCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Inserted, + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = cul2.Skip(endI2).ToArray() + }; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (endI1 < cul1.Length && endI2 < cul2.Length) + { + var unknownCorrelatedSequence = new CorrelatedSequence + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cul1.Skip(endI1).ToArray(), + ComparisonUnitArray2 = cul2.Skip(endI2).ToArray() + }; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (endI1 == cul1.Length && endI2 == cul2.Length) + { + // nothing to do + } + + return newListOfCorrelatedSequence; + } + + return null; + } + } + + return null; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Util.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Util.cs new file mode 100644 index 00000000..050e9312 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.Methods.Util.cs @@ -0,0 +1,103 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Packaging; +using System.Linq; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private static XElement MoveRelatedPartsToDestination( + PackagePart partOfDeletedContent, + PackagePart partInNewDocument, + XElement contentElement) + { + List elementsToUpdate = contentElement + .Descendants() + .Where(d => d.Attributes().Any(a => ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name))) + .ToList(); + + foreach (XElement element in elementsToUpdate) + { + List attributesToUpdate = element + .Attributes() + .Where(a => ComparisonUnitWord.RelationshipAttributeNames.Contains(a.Name)) + .ToList(); + + foreach (XAttribute att in attributesToUpdate) + { + var rId = (string) att; + + PackageRelationship relationshipForDeletedPart = partOfDeletedContent.GetRelationship(rId); + + Uri targetUri = PackUriHelper + .ResolvePartUri( + new Uri(partOfDeletedContent.Uri.ToString(), UriKind.Relative), + relationshipForDeletedPart.TargetUri); + + PackagePart relatedPackagePart = partOfDeletedContent.Package.GetPart(targetUri); + string[] uriSplit = relatedPackagePart.Uri.ToString().Split('/'); + string[] last = uriSplit[uriSplit.Length - 1].Split('.'); + string uriString; + if (last.Length == 2) + { + uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() + + "P" + Guid.NewGuid().ToString().Replace("-", "") + "." + last[1]; + } + else + { + uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() + + "P" + Guid.NewGuid().ToString().Replace("-", ""); + } + + Uri uri = relatedPackagePart.Uri.IsAbsoluteUri + ? new Uri(uriString, UriKind.Absolute) + : new Uri(uriString, UriKind.Relative); + + PackagePart newPart = partInNewDocument.Package.CreatePart(uri, relatedPackagePart.ContentType); + + // ReSharper disable once PossibleNullReferenceException + using (Stream oldPartStream = relatedPackagePart.GetStream()) + using (Stream newPartStream = newPart.GetStream()) + { + FileUtils.CopyStream(oldPartStream, newPartStream); + } + + string newRid = "R" + Guid.NewGuid().ToString().Replace("-", ""); + partInNewDocument.CreateRelationship(newPart.Uri, TargetMode.Internal, + relationshipForDeletedPart.RelationshipType, newRid); + att.Value = newRid; + + if (newPart.ContentType.EndsWith("xml")) + { + XDocument newPartXDoc; + using (Stream stream = newPart.GetStream()) + { + newPartXDoc = XDocument.Load(stream); + MoveRelatedPartsToDestination(relatedPackagePart, newPart, newPartXDoc.Root); + } + + using (Stream stream = newPart.GetStream()) + newPartXDoc.Save(stream); + } + } + } + + return contentElement; + } + + private static XAttribute GetXmlSpaceAttribute(string textOfTextElement) + { + if (char.IsWhiteSpace(textOfTextElement[0]) || + char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1])) + return new XAttribute(XNamespace.Xml + "space", "preserve"); + + return null; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Private.NestedTypes.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Private.NestedTypes.cs new file mode 100644 index 00000000..136a5f3a --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Private.NestedTypes.cs @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Drawing; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + private class Atgbw + { + public int? Key; + public ComparisonUnitAtom ComparisonUnitAtomMember; + public int NextIndex; + } + + private class ConsolidationInfo + { + public string Revisor; + public Color Color; + public XElement RevisionElement; + public bool InsertBefore; + public string RevisionHash; + public XElement[] Footnotes; + public XElement[] Endnotes; + public string RevisionString; // for debugging purposes only + } + + private class RecursionInfo + { + public XName ElementName; + public XName[] ChildElementPropertyNames; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Compare.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Compare.cs new file mode 100644 index 00000000..df1d10a2 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Compare.cs @@ -0,0 +1,227 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.IO; +using System.Linq; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +// It is possible to optimize DescendantContentAtoms + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Currently, the unid is set at the beginning of the algorithm. It is used by the code that establishes correlation +// based on first rejecting// tracked revisions, then correlating paragraphs/tables. It is requred for this algorithm +// - after finding a correlated sequence in the document with rejected revisions, it uses the unid to find the same +// paragraph in the document without rejected revisions, then sets the correlated sha1 hash in that document. +// +// But then when accepting tracked revisions, for certain paragraphs (where there are deleted paragraph marks) it is +// going to lose the unids. But this isn't a problem because when paragraph marks are deleted, the correlation is +// definitely no longer possible. Any paragraphs that are in a range of paragraphs that are coalesced can't be +// correlated to paragraphs in the other document via their hash. At that point we no longer care what their unids +// are. +// +// But after that it is only used to reconstruct the tree. It is also used in the debugging code that +// prints the various correlated sequences and comparison units - this is display for debugging purposes only. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// The key idea here is that a given paragraph will always have the same ancestors, and it doesn't matter whether the +// content was deleted from the old document, inserted into the new document, or set as equal. At this point, we +// identify a paragraph as a sequential list of content atoms, terminated by a paragraph mark. This entire list will +// for a single paragraph, regardless of whether the paragraph is a child of the body, or if the paragraph is in a cell +// in a table, or if the paragraph is in a text box. The list of ancestors, from the paragraph to the root of the XML +// tree will be the same for all content atoms in the paragraph. +// +// Therefore: +// +// Iterate through the list of content atoms backwards. When the loop sees a paragraph mark, it gets the ancestor +// unids from the paragraph mark to the top of the tree, and sets this as the same for all content atoms in the +// paragraph. For descendants of the paragraph mark, it doesn't really matter if content is put into separate runs +// or what not. We don't need to be concerned about what the unids are for descendants of the paragraph. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings) + { + return CompareInternal(source1, source2, settings, true); + } + + private static WmlDocument CompareInternal( + WmlDocument source1, + WmlDocument source2, + WmlComparerSettings settings, + bool preProcessMarkupInOriginal) + { + if (preProcessMarkupInOriginal) + { + source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000); + } + + source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000); + + SaveDocumentIfDesired(source1, "Source1-Step1-PreProcess.docx", settings); + SaveDocumentIfDesired(source2, "Source2-Step1-PreProcess.docx", settings); + + // at this point, both source1 and source2 have unid on every element. These are the values that will + // enable reassembly of the XML tree. But we need other values. + + // In source1: + // - accept tracked revisions + // - determine hash code for every block-level element + // - save as attribute on every element + + // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible. + // - after accepting and calculating the hash, then can use the unids to find the right block-level + // element in the unmodified source1, and install the hash + + // In source2: + // - reject tracked revisions + // - determine hash code for every block-level element + // - save as an attribute on every element + + // - after rejecting and calculating the hash, then can use the unids to find the right block-level element + // in the unmodified source2, and install the hash + + // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a + // single paragraph due to paragraph marks being inserted / deleted. + // - in this case, some paragraphs will not get a hash injected onto them. + // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such + // issues will need to be resolved in the normal execution of the LCS algorithm. + // - note that when we do propagate the unid through for the first paragraph. + + // Establish correlation between the two. + // Find the longest common sequence of block-level elements where hash codes are the same. + // this sometimes will be every block level element in the document. Or sometimes will be just a fair + // number of them. + + // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown + // correlated comparison units. Those paragraphs will only ever be matched to their corresponding paragraph. + // then the algorithm can proceed as usual. + + // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that + // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the + // result document. + + WmlDocument source1AfterAccepting = RevisionProcessor.AcceptRevisions(source1); + WmlDocument source2AfterRejecting = RevisionProcessor.RejectRevisions(source2); + + SaveDocumentIfDesired(source1AfterAccepting, "Source1-Step2-AfterAccepting.docx", settings); + SaveDocumentIfDesired(source2AfterRejecting, "Source2-Step2-AfterRejecting.docx", settings); + + // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on + // accepting in source1, rejecting in source2 + source1 = HashBlockLevelContent(source1, source1AfterAccepting, settings); + source2 = HashBlockLevelContent(source2, source2AfterRejecting, settings); + + SaveDocumentIfDesired(source1, "Source1-Step3-AfterHashing.docx", settings); + SaveDocumentIfDesired(source2, "Source2-Step3-AfterHashing.docx", settings); + + // Accept revisions in before, and after + source1 = RevisionProcessor.AcceptRevisions(source1); + source2 = RevisionProcessor.AcceptRevisions(source2); + + SaveDocumentIfDesired(source1, "Source1-Step4-AfterAccepting.docx", settings); + SaveDocumentIfDesired(source2, "Source2-Step4-AfterAccepting.docx", settings); + + // after accepting revisions, some unids may have been removed by revision accepter, along with the + // correlatedSHA1Hash codes, this is as it should be. + // but need to go back in and add guids to paragraphs that have had them removed. + + using (var ms = new MemoryStream()) + { + ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + AddUnidsToMarkupInContentParts(wDoc); + } + } + + var wmlResult = new WmlDocument(source1); + using (var ms1 = new MemoryStream()) + using (var ms2 = new MemoryStream()) + { + ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length); + ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); + WmlDocument producedDocument; + + using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true)) + using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true)) + { + producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2); + } + + SaveDocumentsAfterProducingDocument(ms1, ms2, settings); + SaveCleanedDocuments(source1, producedDocument, settings); + + return producedDocument; + } + } + + private static void SaveDocumentIfDesired(WmlDocument source, string name, WmlComparerSettings settings) + { + if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var fileInfo = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name)); + source.SaveAs(fileInfo.FullName); + } + } + + private static void SaveDocumentsAfterProducingDocument(MemoryStream ms1, MemoryStream ms2, WmlComparerSettings settings) + { + if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + SaveDocumentIfDesired(new WmlDocument("after1.docx", ms1), "Source1-Step5-AfterProducingDocument.docx", settings); + SaveDocumentIfDesired(new WmlDocument("after2.docx", ms2), "Source2-Step5-AfterProducingDocument.docx", settings); + } + } + + private static void SaveCleanedDocuments(WmlDocument source1, WmlDocument producedDocument, WmlComparerSettings settings) + { + if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + WmlDocument cleanedSource = CleanPowerToolsAndRsid(source1); + SaveDocumentIfDesired(cleanedSource, "Cleaned-Source.docx", settings); + + WmlDocument cleanedProduced = CleanPowerToolsAndRsid(producedDocument); + SaveDocumentIfDesired(cleanedProduced, "Cleaned-Produced.docx", settings); + } + } + + private static WmlDocument CleanPowerToolsAndRsid(WmlDocument producedDocument) + { + using (var ms = new MemoryStream()) + { + ms.Write(producedDocument.DocumentByteArray, 0, producedDocument.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + foreach (OpenXmlPart cp in wDoc.ContentParts()) + { + XDocument xd = cp.GetXDocument(); + object newRoot = CleanPartTransform(xd.Root); + xd.Root?.ReplaceWith(newRoot); + cp.PutXDocument(); + } + } + + var cleaned = new WmlDocument("cleaned.docx", ms.ToArray()); + return cleaned; + } + } + + private static object CleanPartTransform(XNode node) + { + if (node is XElement element) + { + return new XElement(element.Name, + element.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt && + !a.Name.LocalName.ToLower().Contains("rsid")), + element.Nodes().Select(CleanPartTransform)); + } + + return node; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Consolidate.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Consolidate.cs new file mode 100644 index 00000000..9bea4238 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.Consolidate.cs @@ -0,0 +1,1002 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.IO.Packaging; +using System.Linq; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + /*****************************************************************************************************************/ + // Consolidate processes footnotes and endnotes in a particular fashion - if the unmodified document has a footnote + // reference, and a delta has a footnote reference, we end up with two footnotes - one is unmodified, and is referred to + // from the unmodified content. The footnote reference in the delta refers to the modified footnote. This is as it + // should be. + /*****************************************************************************************************************/ + + public static WmlDocument Consolidate( + WmlDocument original, + List revisedDocumentInfoList, + WmlComparerSettings settings) + { + var consolidateSettings = new WmlComparerConsolidateSettings(); + return Consolidate(original, revisedDocumentInfoList, settings, consolidateSettings); + } + + public static WmlDocument Consolidate( + WmlDocument original, + List revisedDocumentInfoList, + WmlComparerSettings settings, + WmlComparerConsolidateSettings consolidateSettings) + { + // pre-process the original, so that it already has unids for all elements + // then when comparing all documents to the original, each one will have the unid as appropriate + // for all revision block-level content + // set unid to look for + // while true + // determine where to insert + // get the unid for the revision + // look it up in the original. if find it, then insert after that element + // if not in the original + // look backwards in revised document, set unid to look for, do the loop again + // if get to the beginning of the document + // insert at beginning of document + + settings.StartingIdForFootnotesEndnotes = 3000; + WmlDocument originalWithUnids = PreProcessMarkup(original, settings.StartingIdForFootnotesEndnotes); + var consolidated = new WmlDocument(originalWithUnids); + + if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Original-with-Unids.docx"; + var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + originalWithUnids.SaveAs(preProcFi1.FullName); + } + + int revisedDocumentInfoListCount = revisedDocumentInfoList.Count(); + + using (var consolidatedMs = new MemoryStream()) + { + consolidatedMs.Write(consolidated.DocumentByteArray, 0, consolidated.DocumentByteArray.Length); + using (WordprocessingDocument consolidatedWDoc = WordprocessingDocument.Open(consolidatedMs, true)) + { + MainDocumentPart consolidatedMainDocPart = consolidatedWDoc.MainDocumentPart; + XDocument consolidatedMainDocPartXDoc = consolidatedMainDocPart.GetXDocument(); + XElement consolidatedMainDocPartRoot = consolidatedMainDocPartXDoc.Root ?? throw new ArgumentException(); + + // save away last sectPr + XElement savedSectPr = consolidatedMainDocPartRoot + .Elements(W.body) + .Elements(W.sectPr) + .LastOrDefault(); + + consolidatedMainDocPartRoot + .Elements(W.body) + .Elements(W.sectPr) + .Remove(); + + Dictionary consolidatedByUnid = consolidatedMainDocPartXDoc + .Descendants() + .Where(d => (d.Name == W.p || d.Name == W.tbl) && d.Attribute(PtOpenXml.Unid) != null) + .ToDictionary(d => (string) d.Attribute(PtOpenXml.Unid)); + + var deltaNbr = 1; + foreach (WmlRevisedDocumentInfo revisedDocumentInfo in revisedDocumentInfoList) + { + settings.StartingIdForFootnotesEndnotes = deltaNbr * 2000 + 3000; + WmlDocument delta = CompareInternal(originalWithUnids, revisedDocumentInfo.RevisedDocument, settings, + false); + + if (SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + string name1 = string.Format("Delta-{0}.docx", deltaNbr++); + var deltaFi = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + delta.SaveAs(deltaFi.FullName); + } + + using (var msOriginalWithUnids = new MemoryStream()) + using (var msDelta = new MemoryStream()) + { + msOriginalWithUnids.Write( + originalWithUnids.DocumentByteArray, + 0, + originalWithUnids.DocumentByteArray.Length); + + msDelta.Write(delta.DocumentByteArray, 0, delta.DocumentByteArray.Length); + + using (WordprocessingDocument wDocOriginalWithUnids = WordprocessingDocument.Open(msOriginalWithUnids, true)) + using (WordprocessingDocument wDocDelta = WordprocessingDocument.Open(msDelta, true)) + { + MainDocumentPart modMainDocPart = wDocDelta.MainDocumentPart; + XDocument modMainDocPartXDoc = modMainDocPart.GetXDocument(); + List blockLevelContentToMove = modMainDocPartXDoc + .Root + .DescendantsTrimmed(d => d.Name == W.txbxContent || d.Name == W.tr) + .Where(d => d.Name == W.p || d.Name == W.tbl) + .Where(d => d.Descendants().Any(z => z.Name == W.ins || z.Name == W.del) || + ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(d, wDocDelta)) + .ToList(); + + foreach (XElement revision in blockLevelContentToMove) + { + XElement elementLookingAt = revision; + while (true) + { + var unid = (string) elementLookingAt.Attribute(PtOpenXml.Unid); + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + XElement elementToInsertAfter = null; + if (consolidatedByUnid.ContainsKey(unid)) + elementToInsertAfter = consolidatedByUnid[unid]; + + if (elementToInsertAfter != null) + { + var ci = new ConsolidationInfo(); + ci.Revisor = revisedDocumentInfo.Revisor; + ci.Color = revisedDocumentInfo.Color; + ci.RevisionElement = revision; + ci.Footnotes = revision + .Descendants(W.footnoteReference) + .Select(fr => + { + var id = (int) fr.Attribute(W.id); + XDocument fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + XElement footnote = fnXDoc.Root.Elements(W.footnote) + .FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + + return footnote; + }) + .ToArray(); + ci.Endnotes = revision + .Descendants(W.endnoteReference) + .Select(er => + { + var id = (int) er.Attribute(W.id); + XDocument enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + XElement endnote = enXDoc.Root.Elements(W.endnote) + .FirstOrDefault(en => (int) en.Attribute(W.id) == id); + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + + return endnote; + }) + .ToArray(); + AddToAnnotation( + wDocDelta, + consolidatedWDoc, + elementToInsertAfter, + ci, + settings); + break; + } + + // find an element to insert after + XElement elementBeforeRevision = elementLookingAt + .SiblingsBeforeSelfReverseDocumentOrder() + .FirstOrDefault(e => e.Attribute(PtOpenXml.Unid) != null); + if (elementBeforeRevision == null) + { + XElement firstElement = consolidatedMainDocPartXDoc + .Root + .Element(W.body) + .Elements() + .FirstOrDefault(e => e.Name == W.p || e.Name == W.tbl); + + var ci = new ConsolidationInfo(); + ci.Revisor = revisedDocumentInfo.Revisor; + ci.Color = revisedDocumentInfo.Color; + ci.RevisionElement = revision; + ci.InsertBefore = true; + ci.Footnotes = revision + .Descendants(W.footnoteReference) + .Select(fr => + { + var id = (int) fr.Attribute(W.id); + XDocument fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + XElement footnote = fnXDoc.Root.Elements(W.footnote) + .FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + + return footnote; + }) + .ToArray(); + ci.Endnotes = revision + .Descendants(W.endnoteReference) + .Select(er => + { + var id = (int) er.Attribute(W.id); + XDocument enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + XElement endnote = enXDoc.Root.Elements(W.endnote) + .FirstOrDefault(en => (int) en.Attribute(W.id) == id); + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + + return endnote; + }) + .ToArray(); + AddToAnnotation( + wDocDelta, + consolidatedWDoc, + firstElement, + ci, + settings); + break; + } + + elementLookingAt = elementBeforeRevision; + } + } + + CopyMissingStylesFromOneDocToAnother(wDocDelta, consolidatedWDoc); + } + } + } + + // at this point, everything is added as an annotation, from all documents to be merged. + // so now the process is to go through and add the annotations to the document + List elementsToProcess = consolidatedMainDocPartXDoc + .Root + .Descendants() + .Where(d => d.Annotation>() != null) + .ToList(); + + var emptyParagraph = new XElement(W.p, + new XElement(W.pPr, + new XElement(W.spacing, + new XAttribute(W.after, "0"), + new XAttribute(W.line, "240"), + new XAttribute(W.lineRule, "auto")))); + + foreach (XElement ele in elementsToProcess) + { + var lci = ele.Annotation>(); + + // process before + IEnumerable contentToAddBefore = lci + .Where(ci => ci.InsertBefore) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, + consolidatedWDoc, consolidateSettings)); + ele.AddBeforeSelf(contentToAddBefore); + + // process after + // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after + // that contains the revisions, then simply replace the paragraph with the one with the revisions. + // RC004 documents contain the test data to exercise this. + + int lciCount = lci.Where(ci => ci.InsertBefore == false).Count(); + + if (lciCount > 1 && lciCount == revisedDocumentInfoListCount) + { + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // This is the code that determines if revisions should be consolidated into one. + + List> uniqueRevisions = lci + .Where(ci => ci.InsertBefore == false) + .GroupBy(ci => + { + // Get a hash after first accepting revisions and compressing the text. + XElement acceptedRevisionElement = + RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement); + string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(acceptedRevisionElement.Value + .Replace(" ", "").Replace(" ", "").Replace(" ", "").Replace("\n", "").Replace(".", "") + .Replace(",", "").ToUpper()); + return sha1Hash; + }) + .OrderByDescending(g => g.Count()) + .ToList(); + int uniqueRevisionCount = uniqueRevisions.Count(); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if (uniqueRevisionCount == 1) + { + MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc); + + var dummyElement = new XElement("dummy", lci.First().RevisionElement); + + foreach (XElement rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + XAttribute aut = rev.Attribute(W.author); + aut.Value = "ITU"; + } + + ele.ReplaceWith(dummyElement.Elements()); + continue; + } + + // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents. + // however, the hash for all of them were not the same. + // therefore, they would be added to the consolidated document as separate revisions. + + // create a log that shows what is different, in detail. + if (settings.LogCallback != null) + { + var sb = new StringBuilder(); + sb.Append( + "====================================================================================================" + + NewLine); + sb.Append("Non-Consolidated Revision" + NewLine); + sb.Append( + "====================================================================================================" + + NewLine); + foreach (IGrouping urList in uniqueRevisions) + { + string revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate() + .TrimEnd(' ', ':'); + sb.Append("Revisors: " + revisorList + NewLine); + string str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false); + sb.Append(str); + sb.Append("=========================" + NewLine); + } + + sb.Append(NewLine); + settings.LogCallback(sb.ToString()); + } + } + + IEnumerable contentToAddAfter = lci + .Where(ci => ci.InsertBefore == false) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, + consolidatedWDoc, consolidateSettings)); + ele.AddAfterSelf(contentToAddAfter); + } + +#if false + +// old code + foreach (var ele in elementsToProcess) + { + var lci = ele.Annotation>(); + + // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after + // that contains the revisions, then simply replace the paragraph with the one with the revisions. + // RC004 documents contain the test data to exercise this. + + var lciCount = lci.Count(); + + if (lci.Count() > 1 && lciCount == revisedDocumentInfoListCount) + { + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // This is the code that determines if revisions should be consolidated into one. + + var uniqueRevisions = lci + .GroupBy(ci => + { + // Get a hash after first accepting revisions and compressing the text. + var ciz = ci; + + var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement); + var text = acceptedRevisionElement.Value + .Replace(" ", "") + .Replace(" ", "") + .Replace(" ", "") + .Replace("\n", ""); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(text); + return ci.InsertBefore.ToString() + sha1Hash; + }) + .OrderByDescending(g => g.Count()) + .ToList(); + var uniqueRevisionCount = uniqueRevisions.Count(); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if (uniqueRevisionCount == 1) + { + MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc); + + var dummyElement = new XElement("dummy", lci.First().RevisionElement); + + foreach(var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + var aut = rev.Attribute(W.author); + aut.Value = "ITU"; + } + + ele.ReplaceWith(dummyElement.Elements()); + continue; + } + + // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents. + // however, the hash for all of them were not the same. + // therefore, they would be added to the consolidated document as separate revisions. + + // create a log that shows what is different, in detail. + if (settings.LogCallback != null) + { + StringBuilder sb = new StringBuilder(); + sb.Append("====================================================================================================" + nl); + sb.Append("Non-Consolidated Revision" + nl); + sb.Append("====================================================================================================" + nl); + foreach (var urList in uniqueRevisions) + { + var revisorList = + urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':'); + sb.Append("Revisors: " + revisorList + nl); + var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false); + sb.Append(str); + sb.Append("=========================" + nl); + } + sb.Append(nl); + settings.LogCallback(sb.ToString()); + } + } + + var contentToAddBefore = lci + .Where(ci => ci.InsertBefore == true) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + var contentToAddAfter = lci + .Where(ci => ci.InsertBefore == false) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + ele.AddBeforeSelf(contentToAddBefore); + ele.AddAfterSelf(contentToAddAfter); + } +#endif + + consolidatedMainDocPartXDoc + .Root? + .Element(W.body)? + .Add(savedSectPr); + + AddTableGridStyleToStylesPart(consolidatedWDoc.MainDocumentPart.StyleDefinitionsPart); + FixUpRevisionIds(consolidatedWDoc, consolidatedMainDocPartXDoc); + IgnorePt14NamespaceForFootnotesEndnotes(consolidatedWDoc); + FixUpDocPrIds(consolidatedWDoc); + FixUpShapeIds(consolidatedWDoc); + FixUpGroupIds(consolidatedWDoc); + FixUpShapeTypeIds(consolidatedWDoc); + IgnorePt14Namespace(consolidatedMainDocPartXDoc.Root); + consolidatedWDoc.MainDocumentPart.PutXDocument(); + AddFootnotesEndnotesStyles(consolidatedWDoc); + } + + var newConsolidatedDocument = new WmlDocument("consolidated.docx", consolidatedMs.ToArray()); + return newConsolidatedDocument; + } + } + + private static void MoveFootnotesEndnotesForConsolidatedRevisions( + ConsolidationInfo ci, + WordprocessingDocument wDocConsolidated) + { + XDocument consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + XDocument consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + + var maxFootnoteId = 1; + if (consolidatedFootnoteXDoc.Root?.Elements(W.footnote).Any() == true) + maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int) e.Attribute(W.id)).Max(); + + var maxEndnoteId = 1; + if (consolidatedEndnoteXDoc.Root?.Elements(W.endnote).Any() == true) + maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int) e.Attribute(W.id)).Max(); + + // At this point, content might contain a footnote or endnote reference. + // Need to add the footnote / endnote into the consolidated document (with the same guid id) + // Because of preprocessing of the documents, all footnote and endnote references will be unique at this point + + if (ci.RevisionElement.Descendants(W.footnoteReference).Any()) + { + XDocument footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + foreach (XElement footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference)) + { + var id = (int) footnoteReference.Attribute(W.id); + XElement footnote = ci.Footnotes.FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (footnote != null) + { + int newId = ++maxFootnoteId; + footnoteReference.SetAttributeValue(W.id, newId); + + var clonedFootnote = new XElement(footnote); + clonedFootnote.SetAttributeValue(W.id, newId); + footnoteXDoc.Root?.Add(clonedFootnote); + } + } + + wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (ci.RevisionElement.Descendants(W.endnoteReference).Any()) + { + XDocument endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (XElement endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference)) + { + var id = (int) endnoteReference.Attribute(W.id); + XElement endnote = ci.Endnotes.FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (endnote != null) + { + int newId = ++maxEndnoteId; + endnoteReference.SetAttributeValue(W.id, newId); + + var clonedEndnote = new XElement(endnote); + clonedEndnote.SetAttributeValue(W.id, newId); + endnoteXDoc.Root?.Add(clonedEndnote); + } + } + + wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private static void FixUpGroupIds(WordprocessingDocument wDoc) + { + XName elementToFind = VML.@group; + IEnumerable groupIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (XElement item in groupIdsToChange) + { + int thisId = nextId++; + + XAttribute idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + } + + foreach (OpenXmlPart cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static bool ContentContainsFootnoteEndnoteReferencesThatHaveRevisions( + XElement element, + WordprocessingDocument wDocDelta) + { + IEnumerable footnoteEndnoteReferences = element + .Descendants() + .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference) + .ToList(); + + if (!footnoteEndnoteReferences.Any()) + return false; + + XDocument footnoteXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + XDocument endnoteXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + + foreach (XElement note in footnoteEndnoteReferences) + { + XElement fnen; + if (note.Name == W.footnoteReference) + { + var id = (int) note.Attribute(W.id); + fnen = footnoteXDoc + .Root? + .Elements(W.footnote) + .FirstOrDefault(n => (int) n.Attribute(W.id) == id); + + if (fnen?.Descendants().Any(d => d.Name == W.ins || d.Name == W.del) == true) + return true; + } + + if (note.Name == W.endnoteReference) + { + var id = (int) note.Attribute(W.id); + fnen = endnoteXDoc + .Root? + .Elements(W.endnote) + .FirstOrDefault(n => (int) n.Attribute(W.id) == id); + + if (fnen?.Descendants().Any(d => d.Name == W.ins || d.Name == W.del) == true) + return true; + } + } + + return false; + } + + private static string RevisionToLogFormTransform(XElement element, int depth, bool inserting) + { + if (element.Name == W.p) + return "Paragraph" + NewLine + element.Elements().Select(e => RevisionToLogFormTransform(e, depth + 2, false)) + .StringConcatenate(); + if (element.Name == W.pPr || element.Name == W.rPr) + return ""; + if (element.Name == W.r) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, inserting)).StringConcatenate(); + + if (element.Name == W.t) + { + if (inserting) + return "".PadRight(depth) + "Inserted Text:" + QuoteIt((string) element) + NewLine; + + return "".PadRight(depth) + "Text:" + QuoteIt((string) element) + NewLine; + } + + if (element.Name == W.delText) + return "".PadRight(depth) + "Deleted Text:" + QuoteIt((string) element) + NewLine; + if (element.Name == W.ins) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, true)).StringConcatenate(); + if (element.Name == W.del) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, false)).StringConcatenate(); + + return ""; + } + + private static string QuoteIt(string str) + { + var quoteString = "\""; + if (str.Contains('\"')) + { + quoteString = "\'"; + } + + return quoteString + str + quoteString; + } + + private static void IgnorePt14NamespaceForFootnotesEndnotes(WordprocessingDocument wDoc) + { + FootnotesPart footnotesPart = wDoc.MainDocumentPart.FootnotesPart; + EndnotesPart endnotesPart = wDoc.MainDocumentPart.EndnotesPart; + + if (footnotesPart != null) + { + XDocument footnotesPartXDoc = footnotesPart.GetXDocument(); + IgnorePt14Namespace(footnotesPartXDoc.Root); + } + + if (endnotesPart != null) + { + XDocument endnotesPartXDoc = endnotesPart.GetXDocument(); + IgnorePt14Namespace(endnotesPartXDoc.Root); + } + + footnotesPart?.PutXDocument(); + endnotesPart?.PutXDocument(); + } + + private static XElement[] AssembledConjoinedRevisionContent( + XElement emptyParagraph, + IGrouping groupedCi, + int idx, + WordprocessingDocument wDocConsolidated, + WmlComparerConsolidateSettings consolidateSettings) + { + XDocument consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + XDocument consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + + var maxFootnoteId = 1; + if (consolidatedFootnoteXDoc.Root?.Elements(W.footnote).Any() == true) + { + maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int) e.Attribute(W.id)).Max(); + } + + var maxEndnoteId = 1; + if (consolidatedEndnoteXDoc.Root?.Elements(W.endnote).Any() == true) + { + maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int) e.Attribute(W.id)).Max(); + } + + string revisor = groupedCi.First().Revisor; + + var captionParagraph = new XElement(W.p, + new XElement(W.pPr, + new XElement(W.jc, new XAttribute(W.val, "both")), + new XElement(W.rPr, + new XElement(W.b), + new XElement(W.bCs))), + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.b), + new XElement(W.bCs)), + new XElement(W.t, revisor))); + + int colorRgb = groupedCi.First().Color.ToArgb(); + string colorString = colorRgb.ToString("X"); + if (colorString.Length == 8) + { + colorString = colorString.Substring(2); + } + + if (consolidateSettings.ConsolidateWithTable) + { + var table = new XElement(W.tbl, + new XElement(W.tblPr, + new XElement(W.tblStyle, new XAttribute(W.val, "TableGridForRevisions")), + new XElement(W.tblW, + new XAttribute(W._w, "0"), + new XAttribute(W.type, "auto")), + new XElement(W.shd, + new XAttribute(W.val, "clear"), + new XAttribute(W.color, "auto"), + new XAttribute(W.fill, colorString)), + new XElement(W.tblLook, + new XAttribute(W.firstRow, "0"), + new XAttribute(W.lastRow, "0"), + new XAttribute(W.firstColumn, "0"), + new XAttribute(W.lastColumn, "0"), + new XAttribute(W.noHBand, "0"), + new XAttribute(W.noVBand, "0"))), + new XElement(W.tblGrid, + new XElement(W.gridCol, new XAttribute(W._w, "9576"))), + new XElement(W.tr, + new XElement(W.tc, + new XElement(W.tcPr, + new XElement(W.shd, + new XAttribute(W.val, "clear"), + new XAttribute(W.color, "auto"), + new XAttribute(W.fill, colorString))), + captionParagraph, + groupedCi.Select(ci => + { + XElement paraAfter = null; + if (ci.RevisionElement.Name == W.tbl) + paraAfter = emptyParagraph; + XElement[] revisionInTable = + { + ci.RevisionElement, + paraAfter + }; + + // At this point, content might contain a footnote or endnote reference. + // Need to add the footnote / endnote into the consolidated document (with the same + // guid id). Because of preprocessing of the documents, all footnote and endnote + // references will be unique at this point + + AddFootnotes(ci, wDocConsolidated, ref maxFootnoteId); + AddEndnotes(ci, wDocConsolidated, ref maxEndnoteId); + + return revisionInTable; + })))); + + // if the last paragraph has a deleted paragraph mark, then remove the deletion from the paragraph mark. + // This is to prevent Word from misbehaving. the last paragraph in a cell must not have a deleted + // paragraph mark. + XElement theCell = table.Descendants(W.tc).FirstOrDefault(); + XElement lastPara = theCell?.Elements(W.p).LastOrDefault(); + + if (lastPara != null) + { + bool isDeleted = lastPara + .Elements(W.pPr) + .Elements(W.rPr) + .Elements(W.del) + .Any(); + + if (isDeleted) + { + lastPara + .Elements(W.pPr) + .Elements(W.rPr) + .Elements(W.del) + .Remove(); + } + } + + XElement[] content = + { + idx == 0 ? emptyParagraph : null, + table, + emptyParagraph + }; + + return content; + } + else + { + IEnumerable content = groupedCi.Select(ci => + { + XElement paraAfter = null; + if (ci.RevisionElement.Name == W.tbl) + { + paraAfter = emptyParagraph; + } + + XElement[] revisionInTable = + { + ci.RevisionElement, + paraAfter + }; + + // At this point, content might contain a footnote or endnote reference. + // Need to add the footnote / endnote into the consolidated document (with the same + // guid id). Because of preprocessing of the documents, all footnote and endnote + // references will be unique at this point + + AddFootnotes(ci, wDocConsolidated, ref maxFootnoteId); + AddEndnotes(ci, wDocConsolidated, ref maxEndnoteId); + + return revisionInTable; + }); + + var dummyElement = new XElement("dummy", content.SelectMany(m => m)); + foreach (XElement rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + rev.SetAttributeValue(W.author, revisor); + } + + return dummyElement.Elements().ToArray(); + } + } + + private static void AddFootnotes(ConsolidationInfo ci, WordprocessingDocument wDocConsolidated, ref int maxFootnoteId) + { + if (ci.RevisionElement.Descendants(W.footnoteReference).Any()) + { + XDocument footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + foreach (XElement footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference)) + { + var id = (int) footnoteReference.Attribute(W.id); + XElement footnote = ci.Footnotes.FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (footnote != null) + { + int newId = ++maxFootnoteId; + footnoteReference.SetAttributeValue(W.id, newId); + + var clonedFootnote = new XElement(footnote); + clonedFootnote.SetAttributeValue(W.id, newId); + footnoteXDoc.Root?.Add(clonedFootnote); + } + } + + wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument(); + } + } + + private static void AddEndnotes(ConsolidationInfo ci, WordprocessingDocument wDocConsolidated, ref int maxEndnoteId) + { + if (ci.RevisionElement.Descendants(W.endnoteReference).Any()) + { + XDocument endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (XElement endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference)) + { + var id = (int) endnoteReference.Attribute(W.id); + XElement endnote = ci.Endnotes.FirstOrDefault(fn => (int) fn.Attribute(W.id) == id); + if (endnote != null) + { + int newId = ++maxEndnoteId; + endnoteReference.SetAttributeValue(W.id, newId); + + var clonedEndnote = new XElement(endnote); + clonedEndnote.SetAttributeValue(W.id, newId); + endnoteXDoc.Root?.Add(clonedEndnote); + } + } + + wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private static void AddToAnnotation( + WordprocessingDocument wDocDelta, + WordprocessingDocument consolidatedWDoc, + XElement elementToInsertAfter, + ConsolidationInfo consolidationInfo, + WmlComparerSettings settings) + { + Package packageOfDeletedContent = wDocDelta.MainDocumentPart.OpenXmlPackage.Package; + Package packageOfNewContent = consolidatedWDoc.MainDocumentPart.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(wDocDelta.MainDocumentPart.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(consolidatedWDoc.MainDocumentPart.Uri); + consolidationInfo.RevisionElement = + MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, consolidationInfo.RevisionElement); + + var clonedForHashing = (XElement) CloneBlockLevelContentForHashing(consolidatedWDoc.MainDocumentPart, + consolidationInfo.RevisionElement, false, settings); + clonedForHashing.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Attributes(W.id).Remove(); + string shaString = clonedForHashing.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + string sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + consolidationInfo.RevisionString = shaString; + consolidationInfo.RevisionHash = sha1Hash; + + var annotationList = elementToInsertAfter.Annotation>(); + if (annotationList == null) + { + annotationList = new List(); + elementToInsertAfter.AddAnnotation(annotationList); + } + + annotationList.Add(consolidationInfo); + } + + private static void AddTableGridStyleToStylesPart(StyleDefinitionsPart styleDefinitionsPart) + { + XDocument sXDoc = styleDefinitionsPart.GetXDocument(); + XElement tableGridStyle = sXDoc + .Root? + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "TableGridForRevisions"); + + if (tableGridStyle == null) + { + var tableGridForRevisionsStyleMarkup = + @" + + + + + + + + + + + + + + + + + + +"; + XElement tgsElement = XElement.Parse(tableGridForRevisionsStyleMarkup); + sXDoc.Root.Add(tgsElement); + } + + XElement tableNormalStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string) s.Attribute(W.styleId) == "TableNormal"); + if (tableNormalStyle == null) + { + var tableNormalStyleMarkup = + @" + + + + + + + + + + + + + + "; + XElement tnsElement = XElement.Parse(tableNormalStyleMarkup); + sXDoc.Root.Add(tnsElement); + } + + styleDefinitionsPart.PutXDocument(); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.GetRevisions.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.GetRevisions.cs new file mode 100644 index 00000000..04ce548d --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Public.Methods.GetRevisions.cs @@ -0,0 +1,214 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore + // when generating the document, the appropriate row will be marked as deleted or inserted. + + public static List GetRevisions(WmlDocument source, WmlComparerSettings settings) + { + using (var ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + TestForInvalidContent(wDoc); + RemoveExistingPowerToolsMarkup(wDoc); + + XElement contentParent = wDoc.MainDocumentPart.GetXDocument().Root?.Element(W.body); + ComparisonUnitAtom[] atomList = + CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in atomList) + sb.Append(item + Environment.NewLine); + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List> grouped = atomList + .GroupAdjacent(a => + { + string key = a.CorrelationStatus.ToString(); + if (a.CorrelationStatus != CorrelationStatus.Equal) + { + var rt = new XElement(a.RevTrackElement.Name, + new XAttribute(XNamespace.Xmlns + "w", + "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), + a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); + key += rt.ToString(SaveOptions.DisableFormatting); + } + + return key; + }) + .ToList(); + + List> revisions = grouped + .Where(k => k.Key != "Equal") + .ToList(); + + if (False) + { + var sb = new StringBuilder(); + foreach (IGrouping item in revisions) + { + sb.Append(item.Key + Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List mainDocPartRevisionList = revisions + .Select(rg => + { + var rev = new WmlComparerRevision(); + if (rg.Key.StartsWith("Inserted")) + { + rev.RevisionType = WmlComparerRevisionType.Inserted; + } + else if (rg.Key.StartsWith("Deleted")) + { + rev.RevisionType = WmlComparerRevisionType.Deleted; + } + + XElement revTrackElement = rg.First().RevTrackElement; + rev.RevisionXElement = revTrackElement; + rev.Author = (string) revTrackElement.Attribute(W.author); + rev.ContentXElement = rg.First().ContentElement; + rev.Date = (string) revTrackElement.Attribute(W.date); + rev.PartUri = wDoc.MainDocumentPart.Uri; + rev.PartContentType = wDoc.MainDocumentPart.ContentType; + + if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) + { + rev.Text = rg + .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value) + .StringConcatenate(); + } + + return rev; + }) + .ToList(); + + IEnumerable footnotesRevisionList = + GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings); + IEnumerable endnotesRevisionList = + GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings); + + List finalRevisionList = mainDocPartRevisionList + .Concat(footnotesRevisionList) + .Concat(endnotesRevisionList) + .ToList(); + + return finalRevisionList; + } + } + } + + private static IEnumerable GetFootnoteEndnoteRevisionList( + OpenXmlPart footnotesEndnotesPart, + XName footnoteEndnoteElementName, + WmlComparerSettings settings) + { + if (footnotesEndnotesPart == null) + { + return Enumerable.Empty(); + } + + XDocument xDoc = footnotesEndnotesPart.GetXDocument(); + IEnumerable footnotesEndnotes = + xDoc.Root?.Elements(footnoteEndnoteElementName) ?? throw new OpenXmlPowerToolsException("Invalid document."); + + var revisionsForPart = new List(); + foreach (XElement fn in footnotesEndnotes) + { + ComparisonUnitAtom[] atomList = CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray(); + + if (False) + { + var sb = new StringBuilder(); + foreach (ComparisonUnitAtom item in atomList) + { + sb.Append(item + Environment.NewLine); + } + + string sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List> grouped = atomList + .GroupAdjacent(a => + { + string key = a.CorrelationStatus.ToString(); + if (a.CorrelationStatus != CorrelationStatus.Equal) + { + var rt = new XElement(a.RevTrackElement.Name, + new XAttribute(XNamespace.Xmlns + "w", + "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), + a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); + + key += rt.ToString(SaveOptions.DisableFormatting); + } + + return key; + }) + .ToList(); + + List> revisions = grouped + .Where(k => k.Key != "Equal") + .ToList(); + + IEnumerable thisNoteRevisionList = revisions + .Select(rg => + { + var rev = new WmlComparerRevision(); + if (rg.Key.StartsWith("Inserted")) + { + rev.RevisionType = WmlComparerRevisionType.Inserted; + } + else if (rg.Key.StartsWith("Deleted")) + { + rev.RevisionType = WmlComparerRevisionType.Deleted; + } + + XElement revTrackElement = rg.First().RevTrackElement; + rev.RevisionXElement = revTrackElement; + rev.Author = (string) revTrackElement.Attribute(W.author); + rev.ContentXElement = rg.First().ContentElement; + rev.Date = (string) revTrackElement.Attribute(W.date); + rev.PartUri = footnotesEndnotesPart.Uri; + rev.PartContentType = footnotesEndnotesPart.ContentType; + + if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) + { + rev.Text = rg + .Select(rgc => rgc.ContentElement.Name == W.pPr ? NewLine : rgc.ContentElement.Value) + .StringConcatenate(); + } + + return rev; + }); + + revisionsForPart.AddRange(thisNoteRevisionList); + } + + return revisionsForPart; + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparer.Public.NestedTypes.cs b/OpenXmlPowerTools/Comparer/WmlComparer.Public.NestedTypes.cs new file mode 100644 index 00000000..5d7c09b4 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparer.Public.NestedTypes.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Xml.Linq; + +namespace OpenXmlPowerTools +{ + public static partial class WmlComparer + { + public class WmlComparerRevision + { + public WmlComparerRevisionType RevisionType; + public string Text; + public string Author; + public string Date; + public XElement ContentXElement; + public XElement RevisionXElement; + public Uri PartUri; + public string PartContentType; + } + + public enum WmlComparerRevisionType + { + Inserted, + Deleted + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparerConsolidateSettings.cs b/OpenXmlPowerTools/Comparer/WmlComparerConsolidateSettings.cs new file mode 100644 index 00000000..bc583f45 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparerConsolidateSettings.cs @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace OpenXmlPowerTools +{ + public class WmlComparerConsolidateSettings + { + public bool ConsolidateWithTable = true; + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparerExtensions.cs b/OpenXmlPowerTools/Comparer/WmlComparerExtensions.cs new file mode 100644 index 00000000..f4546548 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparerExtensions.cs @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; + +namespace OpenXmlPowerTools +{ + public static class WmlComparerExtensions + { + public static XElement GetMainDocumentBody(this WordprocessingDocument wordDocument) + { + return wordDocument.GetMainDocumentRoot().Element(W.body) ?? throw new ArgumentException("Invalid document."); + } + + public static XElement GetMainDocumentRoot(this WordprocessingDocument wordDocument) + { + return wordDocument.MainDocumentPart?.GetXElement() ?? throw new ArgumentException("Invalid document."); + } + + public static XElement GetXElement(this OpenXmlPart part) + { + return part.GetXDocument()?.Root ?? throw new ArgumentException("Invalid document."); + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparerSettings.cs b/OpenXmlPowerTools/Comparer/WmlComparerSettings.cs new file mode 100644 index 00000000..7183464a --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparerSettings.cs @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Globalization; +using System.IO; + +namespace OpenXmlPowerTools +{ + public class WmlComparerSettings + { + public char[] WordSeparators; + public string AuthorForRevisions = "Open-Xml-PowerTools"; + public string DateTimeForRevisions = DateTime.Now.ToString("o"); + public double DetailThreshold = 0.15; + public bool CaseInsensitive = false; + public CultureInfo CultureInfo = null; + public Action LogCallback = null; + public int StartingIdForFootnotesEndnotes = 1; + + public DirectoryInfo DebugTempFileDi; + + public WmlComparerSettings() + { + // note that , and . are processed explicitly to handle cases where they are in a number or word + WordSeparators = new[] { ' ', '-', ')', '(', ';', ',' }; // todo need to fix this for complete list + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlComparerUtil.cs b/OpenXmlPowerTools/Comparer/WmlComparerUtil.cs new file mode 100644 index 00000000..e0d48e08 --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlComparerUtil.cs @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Security.Cryptography; +using System.Text; + +namespace OpenXmlPowerTools +{ + internal static class WmlComparerUtil + { + public static string SHA1HashStringForUTF8String(string s) + { + var bytes = Encoding.UTF8.GetBytes(s); + var sha1 = SHA1.Create(); + var hashBytes = sha1.ComputeHash(bytes); + return HexStringFromBytes(hashBytes); + } + + public static string SHA1HashStringForByteArray(byte[] bytes) + { + var sha1 = SHA1.Create(); + var hashBytes = sha1.ComputeHash(bytes); + return HexStringFromBytes(hashBytes); + } + + public static string HexStringFromBytes(byte[] bytes) + { + var sb = new StringBuilder(); + foreach (var b in bytes) + { + var hex = b.ToString("x2"); + sb.Append(hex); + } + + return sb.ToString(); + } + + public static ComparisonUnitGroupType ComparisonUnitGroupTypeFromLocalName(string localName) + { + switch (localName) + { + case "p": + return ComparisonUnitGroupType.Paragraph; + case "tbl": + return ComparisonUnitGroupType.Table; + case "tr": + return ComparisonUnitGroupType.Row; + case "tc": + return ComparisonUnitGroupType.Cell; + case "txbxContent": + return ComparisonUnitGroupType.Textbox; + default: + throw new ArgumentOutOfRangeException(nameof(localName), + $@"Unsupported localName: '{localName}'."); + } + } + } +} diff --git a/OpenXmlPowerTools/Comparer/WmlRevisedDocumentInfo.cs b/OpenXmlPowerTools/Comparer/WmlRevisedDocumentInfo.cs new file mode 100644 index 00000000..c0d8000e --- /dev/null +++ b/OpenXmlPowerTools/Comparer/WmlRevisedDocumentInfo.cs @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Drawing; + +namespace OpenXmlPowerTools +{ + public class WmlRevisedDocumentInfo + { + public WmlDocument RevisedDocument; + public string Revisor; + public Color Color; + } +} diff --git a/OpenXmlPowerTools/DocumentBuilder.cs b/OpenXmlPowerTools/DocumentBuilder.cs index 18114cc6..8e139a91 100644 --- a/OpenXmlPowerTools/DocumentBuilder.cs +++ b/OpenXmlPowerTools/DocumentBuilder.cs @@ -483,6 +483,7 @@ are there any PtOpenXml.Insert elements in any of them? } else { + FixUpSectionProperties(output); // Any sectPr elements that do not have headers and footers should take their headers and footers from the *next* section, @@ -1145,6 +1146,7 @@ private class CachedHeaderFooter private static void ProcessSectionsForLinkToPreviousHeadersAndFooters(WordprocessingDocument doc) { + CachedHeaderFooter[] cachedHeaderFooter = new[] { new CachedHeaderFooter() { Ref = W.headerReference, Type = "first" }, @@ -1269,6 +1271,7 @@ private static void AddReferenceToExistingHeaderOrFooter(MainDocumentPart mainDo private static void InitEmptyHeaderOrFooter(MainDocumentPart mainDocPart, XElement sect, XName referenceXName, string toType) { + XDocument xDoc = null; if (referenceXName == W.headerReference) { diff --git a/OpenXmlPowerTools/FieldRetriever.cs b/OpenXmlPowerTools/FieldRetriever.cs index c45fed7c..054d7bdd 100644 --- a/OpenXmlPowerTools/FieldRetriever.cs +++ b/OpenXmlPowerTools/FieldRetriever.cs @@ -371,9 +371,7 @@ public static FieldInfo ParseField(string field) if (fieldType.ToUpper() != "HYPERLINK" && fieldType.ToUpper() != "REF" && fieldType.ToUpper() != "SEQ" && - fieldType.ToUpper() != "STYLEREF" && - fieldType.ToUpper() != "LISTNUM" && - fieldType.ToUpper() != "PAGE") + fieldType.ToUpper() != "STYLEREF") return emptyField; string[] tokens = GetTokens(field); if (tokens.Length == 0) diff --git a/OpenXmlPowerTools/FormattingAssembler.cs b/OpenXmlPowerTools/FormattingAssembler.cs index 9786c11c..f42d6e8a 100644 --- a/OpenXmlPowerTools/FormattingAssembler.cs +++ b/OpenXmlPowerTools/FormattingAssembler.cs @@ -232,7 +232,6 @@ private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, W ); XElement listItemRunProps = null; - List listItemHtmlAttributes = new List(); int? abstractNumId = null; if (listItemInfo != null) { @@ -284,33 +283,9 @@ private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, W var listItemLvlRunProps = listItemLvl.Elements(W.rPr).FirstOrDefault(); listItemRunProps = MergeStyleElement(listItemLvlRunProps, mergedRunProps); - string numFmt = null; - string format = null; - numFmt = (string)listItemLvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault(); - - if (numFmt == null) - { - var mcAlternativeContent = listItemLvl.Descendants(MC.AlternateContent).FirstOrDefault(); - if (mcAlternativeContent != null) - { - var choice = mcAlternativeContent.Element(MC.Choice); - if (choice != null) - { - numFmt = (string)choice.Elements(W.numFmt).Attributes(W.val).FirstOrDefault(); - format = (string)choice.Elements(W.numFmt).Attributes(W.format).FirstOrDefault(); - } - } - } - - if (numFmt == "bullet") + if ((string)listItemLvl.Elements(W.numFmt).Attributes(W.val).FirstOrDefault() == "bullet") { listItemRunProps.Elements(W.rtl).Remove(); - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStructure, "ul") - ); - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: circle;") - ); } else { @@ -325,46 +300,6 @@ private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, W new XElement(W.rtl)), listItemRunProps); } } - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStructure, "ol") - ); - - if (numFmt == "decimal") - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: decimal;") - ); - } - else if (numFmt == "lowerLetter") - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: lower-alpha;") - ); - } - else if (numFmt == "lowerRoman") - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: lower-roman;") - ); - } - else if (numFmt == "upperLetter") - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: upper-alpha;") - ); - } - else if (numFmt == "upperRoman") - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: upper-roman;") - ); - } - else if (numFmt == "custom" && format.StartsWith("0")) - { - listItemHtmlAttributes.Add( - new XAttribute(PtOpenXml.HtmlStyle, "list-style-type: decimal-leading-zero;") - ); - } } } @@ -377,11 +312,8 @@ private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, W .StringConcatenate() .TrimEnd('.'); - ListItemRetriever.ListItemInfo listItemInfo2 = element.Annotation(); - var listItemRun = new XElement(W.r, new XAttribute(PtOpenXml.ListItemRun, levelNumsString), - listItemInfo2 != null ? new XAttribute(PtOpenXml.AbstractNumId, listItemInfo2.AbstractNumId) : null, element.Attribute(PtOpenXml.FontName), element.Attribute(PtOpenXml.LanguageType), listItemRunProps, @@ -620,7 +552,6 @@ private static object NormalizeListItemsTransform(FormattingAssemblerInfo fai, W element.Attribute(PtOpenXml.LanguageType), element.Attribute(PtOpenXml.Unid), new XAttribute(PtOpenXml.AbstractNumId, abstractNumId), - listItemHtmlAttributes, newParaProps, listItemRun, tabRun, @@ -666,7 +597,7 @@ private static void AddTabAtLeftIndent(XElement pPr) tabs = new XElement(W.tabs); pPr.Add(tabs); } - var tabAtLeft = tabs.Elements(W.tab).FirstOrDefault(t => WordprocessingMLUtil.StringToTwips((string)t.Attribute(W.pos)) == left); + var tabAtLeft = tabs.Elements(W.tab).FirstOrDefault(t => (int)t.Attribute(W.pos) == left); if (tabAtLeft == null) { tabs.Add( @@ -679,8 +610,6 @@ private static void AddTabAtLeftIndent(XElement pPr) public static XName[] PtNamesToKeep = new[] { PtOpenXml.FontName, PtOpenXml.AbstractNumId, - PtOpenXml.HtmlStructure, - PtOpenXml.HtmlStyle, PtOpenXml.StyleName, PtOpenXml.LanguageType, PtOpenXml.ListItemRun, @@ -1960,7 +1889,7 @@ private static XElement TabsMerge(XElement higherPriorityElement, XElement lower var hps = higherPriorityElement.Elements().Select(e => new { - Pos = WordprocessingMLUtil.StringToTwips((string)e.Attribute(W.pos)), + Pos = (int)e.Attribute(W.pos), Pri = 1, Element = e, } @@ -1968,7 +1897,7 @@ private static XElement TabsMerge(XElement higherPriorityElement, XElement lower var lps = lowerPriorityElement.Elements().Select(e => new { - Pos = WordprocessingMLUtil.StringToTwips((string)e.Attribute(W.pos)), + Pos = (int)e.Attribute(W.pos), Pri = 2, Element = e, } @@ -1977,7 +1906,7 @@ private static XElement TabsMerge(XElement higherPriorityElement, XElement lower .GroupBy(s => s.Pos) .Select(g => g.OrderBy(s => s.Pri).First().Element) .Where(e => (string)e.Attribute(W.val) != "clear") - .OrderBy(e => WordprocessingMLUtil.StringToTwips((string)e.Attribute(W.pos))); + .OrderBy(e => (int)e.Attribute(W.pos)); var newTabs = new XElement(W.tabs, newTabElements); return newTabs; } diff --git a/OpenXmlPowerTools/ListItemRetriever.cs b/OpenXmlPowerTools/ListItemRetriever.cs index 75ee0311..2fcb1625 100644 --- a/OpenXmlPowerTools/ListItemRetriever.cs +++ b/OpenXmlPowerTools/ListItemRetriever.cs @@ -136,18 +136,6 @@ public ListItemSource(XDocument numXDoc, XDocument stylesXDoc, int numId) public XElement Lvl(int ilvl) { - var lvl2 = Main.Lvl(ilvl); - if (lvl2 == null) - { - for (int i = ilvl - 1; i >= 0; i--) - { - lvl2 = Main.Lvl(i); - if (lvl2 != null) - break; - } - } - if (lvl2 != null) - return lvl2; if (NumStyleLink != null) { var lvl = NumStyleLink.Lvl(ilvl); @@ -162,7 +150,17 @@ public XElement Lvl(int ilvl) } return lvl; } - return null; + var lvl2 = Main.Lvl(ilvl); + if (lvl2 == null) + { + for (int i = ilvl - 1; i >= 0; i--) + { + lvl2 = Main.Lvl(i); + if (lvl2 != null) + break; + } + } + return lvl2; } public int? StartOverride(int ilvl) @@ -761,8 +759,7 @@ public static string RetrieveListItem(WordprocessingDocument wordDoc, XElement p var numXDoc = numberingDefinitionsPart.GetXDocument(); var stylesXDoc = styleDefinitionsPart.GetXDocument(); - var paragraphLevel = GetParagraphLevel(paragraph); - var lvl = listItemInfo.Lvl(paragraphLevel); + var lvl = listItemInfo.Lvl(GetParagraphLevel(paragraph)); string lvlText = (string)lvl.Elements(W.lvlText).Attributes(W.val).FirstOrDefault(); if (lvlText == null) diff --git a/OpenXmlPowerTools/MetricsGetter.cs b/OpenXmlPowerTools/MetricsGetter.cs index 86445ef9..597d6a74 100644 --- a/OpenXmlPowerTools/MetricsGetter.cs +++ b/OpenXmlPowerTools/MetricsGetter.cs @@ -821,7 +821,7 @@ private static object GetStyleHierarchy(WordprocessingDocument document) { var styleChain = item.Split('/'); XElement elementToAddTo = styleHierarchy; - foreach (var inChain in styleChain.PtSkipLast(1)) + foreach (var inChain in styleChain.SkipLast(1)) elementToAddTo = elementToAddTo.Elements(H.Style).FirstOrDefault(z => z.Attribute(H.Id).Value == inChain); var styleToAdd = styleChain.Last(); elementToAddTo.Add( diff --git a/OpenXmlPowerTools/OpenXmlPowerTools.csproj b/OpenXmlPowerTools/OpenXmlPowerTools.csproj index c7220506..b0a84db8 100644 --- a/OpenXmlPowerTools/OpenXmlPowerTools.csproj +++ b/OpenXmlPowerTools/OpenXmlPowerTools.csproj @@ -1,24 +1,102 @@ - + + + - net45;net46;netstandard2.0 + Debug + AnyCPU + {6F957FF3-AFCC-4D69-8FBC-71AE21BC45C9} + Library + Properties + OpenXmlPowerTools + OpenXmlPowerTools + v4.5 + 512 + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + 7 + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 - - - - - + + False + OpenXmlSdk\DocumentFormat.OpenXml.dll + + + + - + + + + + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/OpenXmlPowerTools/OpenXmlPowerTools.sln b/OpenXmlPowerTools/OpenXmlPowerTools.sln new file mode 100644 index 00000000..8d9a48c8 --- /dev/null +++ b/OpenXmlPowerTools/OpenXmlPowerTools.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio 15 +VisualStudioVersion = 15.0.26228.98 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "OpenXmlPowerTools", "OpenXmlPowerTools.csproj", "{B9C353A0-D182-46D3-94E0-C11D2A9F2FAD}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B9C353A0-D182-46D3-94E0-C11D2A9F2FAD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B9C353A0-D182-46D3-94E0-C11D2A9F2FAD}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B9C353A0-D182-46D3-94E0-C11D2A9F2FAD}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B9C353A0-D182-46D3-94E0-C11D2A9F2FAD}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/OpenXmlPowerTools/Previous/WmlComparer.cs b/OpenXmlPowerTools/Previous/WmlComparer.cs new file mode 100644 index 00000000..1a02bb7e --- /dev/null +++ b/OpenXmlPowerTools/Previous/WmlComparer.cs @@ -0,0 +1,7400 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Globalization; +using System.IO; +using System.IO.Packaging; +using System.Text; +using System.Xml.Linq; +using DocumentFormat.OpenXml.Packaging; +using System.Drawing; +using System.Security.Cryptography; +using OpenXmlPowerTools; + +// It is possible to optimize DescendantContentAtoms + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// Currently, the unid is set at the beginning of the algorithm. It is used by the code that establishes correlation based on first rejecting +/// tracked revisions, then correlating paragraphs/tables. It is requred for this algorithm - after finding a correlated sequence in the document with rejected +/// revisions, it uses the unid to find the same paragraph in the document without rejected revisions, then sets the correlated sha1 hash in that document. +/// +/// But then when accepting tracked revisions, for certain paragraphs (where there are deleted paragraph marks) it is going to lose the unids. But this isn't a +/// problem because when paragraph marks are deleted, the correlation is definitely no longer possible. Any paragraphs that are in a range of paragraphs that +/// are coalesced can't be correlated to paragraphs in the other document via their hash. At that point we no longer care what their unids are. +/// +/// But after that it is only used to reconstruct the tree. It is also used in the debugging code that +/// prints the various correlated sequences and comparison units - this is display for debugging purposes only. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// The key idea here is that a given paragraph will always have the same ancestors, and it doesn't matter whether the content was deleted from the old document, +/// inserted into the new document, or set as equal. At this point, we identify a paragraph as a sequential list of content atoms, terminated by a paragraph mark. +/// This entire list will for a single paragraph, regardless of whether the paragraph is a child of the body, or if the paragraph is in a cell in a table, or if +/// the paragraph is in a text box. The list of ancestors, from the paragraph to the root of the XML tree will be the same for all content atoms in the paragraph. +/// +/// Therefore: +/// +/// Iterate through the list of content atoms backwards. When the loop sees a paragraph mark, it gets the ancestor unids from the paragraph mark to the top of the +/// tree, and sets this as the same for all content atoms in the paragraph. For descendants of the paragraph mark, it doesn't really matter if content is put into +/// separate runs or what not. We don't need to be concerned about what the unids are for descendants of the paragraph. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +namespace OpenXmlPowerTools.Previous +{ + public class WmlComparerSettings + { + public char[] WordSeparators; + public string AuthorForRevisions = "Open-Xml-PowerTools"; + public string DateTimeForRevisions = DateTime.Now.ToString("o"); + public double DetailThreshold = 0.15; + public bool CaseInsensitive = false; + public CultureInfo CultureInfo = null; + public Action LogCallback = null; + public int StartingIdForFootnotesEndnotes = 1; + + public DirectoryInfo DebugTempFileDi; + + public WmlComparerSettings() + { + // note that , and . are processed explicitly to handle cases where they are in a number or word + WordSeparators = new[] { ' ', '-', ')', '(', ';', ',' }; // todo need to fix this for complete list + } + } + + public class WmlComparerConsolidateSettings + { + public bool ConsolidateWithTable = true; + } + + public class WmlRevisedDocumentInfo + { + public WmlDocument RevisedDocument; + public string Revisor; + public Color Color; + } + + public static class WmlComparer + { + public static bool s_False = false; + public static bool s_True = true; + public static bool s_SaveIntermediateFilesForDebugging = false; + + public static WmlDocument Compare(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings) + { + return CompareInternal(source1, source2, settings, true); + } + + private static WmlDocument CompareInternal(WmlDocument source1, WmlDocument source2, WmlComparerSettings settings, + bool preProcessMarkupInOriginal) + { + if (preProcessMarkupInOriginal) + source1 = PreProcessMarkup(source1, settings.StartingIdForFootnotesEndnotes + 1000); + source2 = PreProcessMarkup(source2, settings.StartingIdForFootnotesEndnotes + 2000); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Source1-Step1-PreProcess.docx"; + var name2 = "Source2-Step1-PreProcess.docx"; + var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + source1.SaveAs(preProcFi1.FullName); + var preProcFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + source2.SaveAs(preProcFi2.FullName); + } + + // at this point, both source1 and source2 have unid on every element. These are the values that will enable reassembly of the XML tree. + // but we need other values. + + // In source1: + // - accept tracked revisions + // - determine hash code for every block-level element + // - save as attribute on every element + + // - accept tracked revisions and reject tracked revisions leave the unids alone, where possible. + // - after accepting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source1, and install the hash + + // In source2: + // - reject tracked revisions + // - determine hash code for every block-level element + // - save as an attribute on every element + + // - after rejecting and calculating the hash, then can use the unids to find the right block-level element in the unmodified source2, and install the hash + + // - sometimes after accepting or rejecting tracked revisions, several paragraphs will get coalesced into a single paragraph due to paragraph marks being inserted / deleted. + // - in this case, some paragraphs will not get a hash injected onto them. + // - if a paragraph doesn't have a hash, then it will never correspond to another paragraph, and such issues will need to be resolved in the normal execution of the LCS algorithm. + // - note that when we do propagate the unid through for the first paragraph. + + // Establish correlation between the two. + // Find the longest common sequence of block-level elements where hash codes are the same. + // this sometimes will be every block level element in the document. Or sometimes will be just a fair number of them. + + // at the start of doing the LCS algorithm, we will match up content, and put them in corresponding unknown correlated comparison units. Those paragraphs will only ever be matched to their corresponding paragraph. + // then the algorithm can proceed as usual. + + // need to call ChangeFootnoteEndnoteReferencesToUniqueRange before creating the wmlResult document, so that + // the same GUID ids are used for footnote and endnote references in both the 'after' document, and in the + // result document. + + var source1afterAccepting = RevisionProcessor.AcceptRevisions(source1); + var source2afterRejecting = RevisionProcessor.RejectRevisions(source2); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Source1-Step2-AfterAccepting.docx"; + var name2 = "Source2-Step2-AfterRejecting.docx"; + var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + source1afterAccepting.SaveAs(afterAcceptingFi1.FullName); + var afterRejectingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + source2afterRejecting.SaveAs(afterRejectingFi2.FullName); + } + + // this creates the correlated hash codes that enable us to match up ranges of paragraphs based on + // accepting in source1, rejecting in source2 + source1 = HashBlockLevelContent(source1, source1afterAccepting, settings); + source2 = HashBlockLevelContent(source2, source2afterRejecting, settings); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Source1-Step3-AfterHashing.docx"; + var name2 = "Source2-Step3-AfterHashing.docx"; + var afterHashingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + source1.SaveAs(afterHashingFi1.FullName); + var afterHashingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + source2.SaveAs(afterHashingFi2.FullName); + } + + // Accept revisions in before, and after + source1 = RevisionProcessor.AcceptRevisions(source1); + source2 = RevisionProcessor.AcceptRevisions(source2); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Source1-Step4-AfterAccepting.docx"; + var name2 = "Source2-Step4-AfterAccepting.docx"; + var afterAcceptingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + source1.SaveAs(afterAcceptingFi1.FullName); + var afterAcceptingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + source2.SaveAs(afterAcceptingFi2.FullName); + } + + // after accepting revisions, some unids may have been removed by revision accepter, along with the correlatedSHA1Hash codes, + // this is as it should be. + // but need to go back in and add guids to paragraphs that have had them removed. + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + AddUnidsToMarkupInContentParts(wDoc); + } + } + + WmlDocument wmlResult = new WmlDocument(source1); + using (MemoryStream ms1 = new MemoryStream()) + using (MemoryStream ms2 = new MemoryStream()) + { + ms1.Write(source1.DocumentByteArray, 0, source1.DocumentByteArray.Length); + ms2.Write(source2.DocumentByteArray, 0, source2.DocumentByteArray.Length); + WmlDocument producedDocument; + using (WordprocessingDocument wDoc1 = WordprocessingDocument.Open(ms1, true)) + using (WordprocessingDocument wDoc2 = WordprocessingDocument.Open(ms2, true)) + { + producedDocument = ProduceDocumentWithTrackedRevisions(settings, wmlResult, wDoc1, wDoc2); + } + + if (s_False && settings.DebugTempFileDi != null) + { + var name1 = "Source1-Step5-AfterProducingDocWithRevTrk.docx"; + var name2 = "Source2-Step5-AfterProducingDocWithRevTrk.docx"; + var afterProducingFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + var afterProducingWml1 = new WmlDocument("after1.docx", ms1.ToArray()); + afterProducingWml1.SaveAs(afterProducingFi1.FullName); + var afterProducingFi2 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + var afterProducingWml2 = new WmlDocument("after2.docx", ms2.ToArray()); + afterProducingWml2.SaveAs(afterProducingFi2.FullName); + } + + if (s_False && settings.DebugTempFileDi != null) + { + var cleanedSource = CleanPowerToolsAndRsid(source1); + var name1 = "Cleaned-Source.docx"; + var cleanedSourceFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + cleanedSource.SaveAs(cleanedSourceFi1.FullName); + + var cleanedProduced = CleanPowerToolsAndRsid(producedDocument); + var name2 = "Cleaned-Produced.docx"; + var cleanedProducedFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name2)); + cleanedProduced.SaveAs(cleanedProducedFi1.FullName); + } + + return producedDocument; + } + } + + private static WmlDocument CleanPowerToolsAndRsid(WmlDocument producedDocument) + { + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(producedDocument.DocumentByteArray, 0, producedDocument.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + foreach (var cp in wDoc.ContentParts()) + { + var xd = cp.GetXDocument(); + var newRoot = CleanPartTransform(xd.Root); + xd.Root.ReplaceWith(newRoot); + cp.PutXDocument(); + } + } + var cleaned = new WmlDocument("cleaned.docx", ms.ToArray()); + return cleaned; + } + } + + private static WmlDocument HashBlockLevelContent(WmlDocument source, WmlDocument source1afterProcessingRevTracking, WmlComparerSettings settings) + { + using (MemoryStream msSource = new MemoryStream()) + using (MemoryStream msAfterProc = new MemoryStream()) + { + msSource.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + msAfterProc.Write(source1afterProcessingRevTracking.DocumentByteArray, 0, source1afterProcessingRevTracking.DocumentByteArray.Length); + using (WordprocessingDocument wDocSource = WordprocessingDocument.Open(msSource, true)) + using (WordprocessingDocument wDocAfterProc = WordprocessingDocument.Open(msAfterProc, true)) + { + // create Unid dictionary for source + var sourceMainXDoc = wDocSource + .MainDocumentPart + .GetXDocument(); + + var sourceUnidDict = sourceMainXDoc + .Root + .Descendants() + .Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr) + .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid)); + + var afterProcMainXDoc = wDocAfterProc + .MainDocumentPart + .GetXDocument(); + + foreach (var blockLevelContent in afterProcMainXDoc.Root.Descendants().Where(d => d.Name == W.p || d.Name == W.tbl || d.Name == W.tr)) + { + var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(wDocAfterProc.MainDocumentPart, blockLevelContent, true, settings); + var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + var thisUnid = (string)blockLevelContent.Attribute(PtOpenXml.Unid); + if (thisUnid != null) + { + if (sourceUnidDict.ContainsKey(thisUnid)) + { + var correlatedBlockLevelContent = sourceUnidDict[thisUnid]; + correlatedBlockLevelContent.Add(new XAttribute(PtOpenXml.CorrelatedSHA1Hash, sha1Hash)); + } + } + } + + wDocSource.MainDocumentPart.PutXDocument(); + } + WmlDocument sourceWithCorrelatedSHA1Hash = new WmlDocument(source.FileName, msSource.ToArray()); + return sourceWithCorrelatedSHA1Hash; + } + } + + private static WmlDocument PreProcessMarkup(WmlDocument source, int startingIdForFootnotesEndnotes) + { + // open and close to get rid of MC content + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + OpenSettings os = new OpenSettings(); + os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts, + DocumentFormat.OpenXml.FileFormatVersions.Office2007); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) + { + var doc = wDoc.MainDocumentPart.RootElement; + if (wDoc.MainDocumentPart.FootnotesPart != null) + { + // contrary to what you might think, looking at the API, it is necessary to access the root element of each part to cause + // the SDK to process MC markup. + var fn = wDoc.MainDocumentPart.FootnotesPart.RootElement; + } + if (wDoc.MainDocumentPart.EndnotesPart != null) + { + var en = wDoc.MainDocumentPart.EndnotesPart.RootElement; + } + } + source = new WmlDocument(source.FileName, ms.ToArray()); + } + + // open and close to get rid of MC content + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + OpenSettings os = new OpenSettings(); + os.MarkupCompatibilityProcessSettings = new MarkupCompatibilityProcessSettings(MarkupCompatibilityProcessMode.ProcessAllParts, + DocumentFormat.OpenXml.FileFormatVersions.Office2007); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true, os)) + { + TestForInvalidContent(wDoc); + RemoveExistingPowerToolsMarkup(wDoc); + SimplifyMarkupSettings msSettings = new SimplifyMarkupSettings() + { + RemoveBookmarks = true, + AcceptRevisions = false, + RemoveComments = true, + RemoveContentControls = true, + RemoveFieldCodes = true, + RemoveGoBackBookmark = true, + RemoveLastRenderedPageBreak = true, + RemovePermissions = true, + RemoveProof = true, + RemoveSmartTags = true, + RemoveSoftHyphens = true, + RemoveHyperlinks = true, + }; + MarkupSimplifier.SimplifyMarkup(wDoc, msSettings); + ChangeFootnoteEndnoteReferencesToUniqueRange(wDoc, startingIdForFootnotesEndnotes); + AddUnidsToMarkupInContentParts(wDoc); + AddFootnotesEndnotesParts(wDoc); + FillInEmptyFootnotesEndnotes(wDoc); + } + return new WmlDocument(source.FileName, ms.ToArray()); + } + } + + // somehow, sometimes a footnote or endnote contains absolutely nothing - no paragraph - nothing. + // This messes up the algorithm, so in this case, insert an empty paragraph. + // This is pretty wacky markup to find, and I don't know how this markup comes into existence, but this is an innocuous fix. + private static void FillInEmptyFootnotesEndnotes(WordprocessingDocument wDoc) + { + XElement emptyFootnote = XElement.Parse( +@" + + + + + + + + + +"); + + XElement emptyEndnote = XElement.Parse( +@" + + + + + + + + + +"); + + var footnotePart = wDoc.MainDocumentPart.FootnotesPart; + if (footnotePart != null) + { + var fnXDoc = footnotePart.GetXDocument(); + foreach (var fn in fnXDoc.Root.Elements(W.footnote)) + { + if (!fn.HasElements) + fn.Add(emptyFootnote); + } + footnotePart.PutXDocument(); + } + + var endnotePart = wDoc.MainDocumentPart.EndnotesPart; + if (endnotePart != null) + { + var fnXDoc = endnotePart.GetXDocument(); + foreach (var fn in fnXDoc.Root.Elements(W.endnote)) + { + if (!fn.HasElements) + fn.Add(emptyEndnote); + } + endnotePart.PutXDocument(); + } + } + + private static bool ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(XElement element, WordprocessingDocument wDocDelta) + { + var footnoteEndnoteReferences = element.Descendants().Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference); + if (!footnoteEndnoteReferences.Any()) + return false; + var footnoteXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + var endnoteXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (var note in footnoteEndnoteReferences) + { + XElement fnen = null; + if (note.Name == W.footnoteReference) + { + var id = (int)note.Attribute(W.id); + fnen = footnoteXDoc + .Root + .Elements(W.footnote) + .FirstOrDefault(n => (int)n.Attribute(W.id) == id); + if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any()) + return true; + } + if (note.Name == W.endnoteReference) + { + var id = (int)note.Attribute(W.id); + fnen = endnoteXDoc + .Root + .Elements(W.endnote) + .FirstOrDefault(n => (int)n.Attribute(W.id) == id); + if (fnen.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Any()) + return true; + } + } + return false; + } + + private static void AddUnidsToMarkupInContentParts(WordprocessingDocument wDoc) + { + var mdp = wDoc.MainDocumentPart.GetXDocument(); + AssignUnidToAllElements(mdp.Root); + IgnorePt14Namespace(mdp.Root); + wDoc.MainDocumentPart.PutXDocument(); + + if (wDoc.MainDocumentPart.FootnotesPart != null) + { + var p = wDoc.MainDocumentPart.FootnotesPart.GetXDocument(); + AssignUnidToAllElements(p.Root); + IgnorePt14Namespace(p.Root); + wDoc.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (wDoc.MainDocumentPart.EndnotesPart != null) + { + var p = wDoc.MainDocumentPart.EndnotesPart.GetXDocument(); + AssignUnidToAllElements(p.Root); + IgnorePt14Namespace(p.Root); + wDoc.MainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private class ConsolidationInfo + { + public string Revisor; + public Color Color; + public XElement RevisionElement; + public bool InsertBefore = false; + public string RevisionHash; + public XElement[] Footnotes; + public XElement[] Endnotes; + public string RevisionString; // for debugging purposes only + } + + private static string nl = Environment.NewLine; + + /*****************************************************************************************************************/ + // Consolidate processes footnotes and endnotes in a particular fashion - if the unmodified document has a footnote + // reference, and a delta has a footnote reference, we end up with two footnotes - one is unmodified, and is refered to + // from the unmodified content. The footnote reference in the delta refers to the modified footnote. This is as it + // should be. + /*****************************************************************************************************************/ + public static WmlDocument Consolidate(WmlDocument original, + List revisedDocumentInfoList, + WmlComparerSettings settings) + { + var consolidateSettings = new WmlComparerConsolidateSettings(); + return Consolidate(original, revisedDocumentInfoList, settings, consolidateSettings); + } + + public static WmlDocument Consolidate(WmlDocument original, + List revisedDocumentInfoList, + WmlComparerSettings settings, WmlComparerConsolidateSettings consolidateSettings) + { + // pre-process the original, so that it already has unids for all elements + // then when comparing all documents to the original, each one will have the unid as appropriate + // for all revision block-level content + // set unid to look for + // while true + // determine where to insert + // get the unid for the revision + // look it up in the original. if find it, then insert after that element + // if not in the original + // look backwards in revised document, set unid to look for, do the loop again + // if get to the beginning of the document + // insert at beginning of document + + settings.StartingIdForFootnotesEndnotes = 3000; + var originalWithUnids = PreProcessMarkup(original, settings.StartingIdForFootnotesEndnotes); + WmlDocument consolidated = new WmlDocument(originalWithUnids); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = "Original-with-Unids.docx"; + var preProcFi1 = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + originalWithUnids.SaveAs(preProcFi1.FullName); + } + + var revisedDocumentInfoListCount = revisedDocumentInfoList.Count(); + + using (MemoryStream consolidatedMs = new MemoryStream()) + { + consolidatedMs.Write(consolidated.DocumentByteArray, 0, consolidated.DocumentByteArray.Length); + using (WordprocessingDocument consolidatedWDoc = WordprocessingDocument.Open(consolidatedMs, true)) + { + var consolidatedMainDocPart = consolidatedWDoc.MainDocumentPart; + var consolidatedMainDocPartXDoc = consolidatedMainDocPart.GetXDocument(); + + // save away last sectPr + XElement savedSectPr = consolidatedMainDocPartXDoc + .Root + .Element(W.body) + .Elements(W.sectPr) + .LastOrDefault(); + consolidatedMainDocPartXDoc + .Root + .Element(W.body) + .Elements(W.sectPr) + .Remove(); + + var consolidatedByUnid = consolidatedMainDocPartXDoc + .Descendants() + .Where(d => (d.Name == W.p || d.Name == W.tbl) && d.Attribute(PtOpenXml.Unid) != null) + .ToDictionary(d => (string)d.Attribute(PtOpenXml.Unid)); + + int deltaNbr = 1; + foreach (var revisedDocumentInfo in revisedDocumentInfoList) + { + settings.StartingIdForFootnotesEndnotes = (deltaNbr * 2000) + 3000; + var delta = WmlComparer.CompareInternal(originalWithUnids, revisedDocumentInfo.RevisedDocument, settings, false); + + if (s_SaveIntermediateFilesForDebugging && settings.DebugTempFileDi != null) + { + var name1 = string.Format("Delta-{0}.docx", deltaNbr++); + var deltaFi = new FileInfo(Path.Combine(settings.DebugTempFileDi.FullName, name1)); + delta.SaveAs(deltaFi.FullName); + } + + var colorRgb = revisedDocumentInfo.Color.ToArgb(); + var colorString = colorRgb.ToString("X"); + if (colorString.Length == 8) + colorString = colorString.Substring(2); + + using (MemoryStream msOriginalWithUnids = new MemoryStream()) + using (MemoryStream msDelta = new MemoryStream()) + { + msOriginalWithUnids.Write(originalWithUnids.DocumentByteArray, 0, originalWithUnids.DocumentByteArray.Length); + msDelta.Write(delta.DocumentByteArray, 0, delta.DocumentByteArray.Length); + using (WordprocessingDocument wDocOriginalWithUnids = WordprocessingDocument.Open(msOriginalWithUnids, true)) + using (WordprocessingDocument wDocDelta = WordprocessingDocument.Open(msDelta, true)) + { + var modMainDocPart = wDocDelta.MainDocumentPart; + var modMainDocPartXDoc = modMainDocPart.GetXDocument(); + var blockLevelContentToMove = modMainDocPartXDoc + .Root + .DescendantsTrimmed(d => d.Name == W.txbxContent || d.Name == W.tr) + .Where(d => d.Name == W.p || d.Name == W.tbl) + .Where(d => d.Descendants().Any(z => z.Name == W.ins || z.Name == W.del) || + ContentContainsFootnoteEndnoteReferencesThatHaveRevisions(d, wDocDelta)) + .ToList(); + + foreach (var revision in blockLevelContentToMove) + { + var elementLookingAt = revision; + while (true) + { + var unid = (string)elementLookingAt.Attribute(PtOpenXml.Unid); + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + + XElement elementToInsertAfter = null; + if (consolidatedByUnid.ContainsKey(unid)) + elementToInsertAfter = consolidatedByUnid[unid]; + + if (elementToInsertAfter != null) + { + ConsolidationInfo ci = new ConsolidationInfo(); + ci.Revisor = revisedDocumentInfo.Revisor; + ci.Color = revisedDocumentInfo.Color; + ci.RevisionElement = revision; + ci.Footnotes = revision + .Descendants(W.footnoteReference) + .Select(fr => + { + var id = (int)fr.Attribute(W.id); + var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + return footnote; + }) + .ToArray(); + ci.Endnotes = revision + .Descendants(W.endnoteReference) + .Select(er => + { + var id = (int)er.Attribute(W.id); + var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id); + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + return endnote; + }) + .ToArray(); + AddToAnnotation( + wDocDelta, + consolidatedWDoc, + elementToInsertAfter, + ci, + settings); + break; + } + else + { + // find an element to insert after + var elementBeforeRevision = elementLookingAt + .SiblingsBeforeSelfReverseDocumentOrder() + .FirstOrDefault(e => e.Attribute(PtOpenXml.Unid) != null); + if (elementBeforeRevision == null) + { + var firstElement = consolidatedMainDocPartXDoc + .Root + .Element(W.body) + .Elements() + .FirstOrDefault(e => e.Name == W.p || e.Name == W.tbl); + + ConsolidationInfo ci = new ConsolidationInfo(); + ci.Revisor = revisedDocumentInfo.Revisor; + ci.Color = revisedDocumentInfo.Color; + ci.RevisionElement = revision; + ci.InsertBefore = true; + ci.Footnotes = revision + .Descendants(W.footnoteReference) + .Select(fr => + { + var id = (int)fr.Attribute(W.id); + var fnXDoc = wDocDelta.MainDocumentPart.FootnotesPart.GetXDocument(); + var footnote = fnXDoc.Root.Elements(W.footnote).FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + return footnote; + }) + .ToArray(); + ci.Endnotes = revision + .Descendants(W.endnoteReference) + .Select(er => + { + var id = (int)er.Attribute(W.id); + var enXDoc = wDocDelta.MainDocumentPart.EndnotesPart.GetXDocument(); + var endnote = enXDoc.Root.Elements(W.endnote).FirstOrDefault(en => (int)en.Attribute(W.id) == id); + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal Error"); + return endnote; + }) + .ToArray(); + AddToAnnotation( + wDocDelta, + consolidatedWDoc, + firstElement, + ci, + settings); + break; + } + else + { + elementLookingAt = elementBeforeRevision; + continue; + } + } + } + } + CopyMissingStylesFromOneDocToAnother(wDocDelta, consolidatedWDoc); + } + } + } + + // at this point, everything is added as an annotation, from all documents to be merged. + // so now the process is to go through and add the annotations to the document + var elementsToProcess = consolidatedMainDocPartXDoc + .Root + .Descendants() + .Where(d => d.Annotation>() != null) + .ToList(); + + var emptyParagraph = new XElement(W.p, + new XElement(W.pPr, + new XElement(W.spacing, + new XAttribute(W.after, "0"), + new XAttribute(W.line, "240"), + new XAttribute(W.lineRule, "auto")))); + + foreach (var ele in elementsToProcess) + { + var lci = ele.Annotation>(); + + // process before + var contentToAddBefore = lci + .Where(ci => ci.InsertBefore == true) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + ele.AddBeforeSelf(contentToAddBefore); + + // process after + // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after + // that contains the revisions, then simply replace the paragraph with the one with the revisions. + // RC004 documents contain the test data to exercise this. + + var lciCount = lci.Where(ci => ci.InsertBefore == false).Count(); + + if (lciCount > 1 && lciCount == revisedDocumentInfoListCount) + { + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // This is the code that determines if revisions should be consolidated into one. + + var uniqueRevisions = lci + .Where(ci => ci.InsertBefore == false) + .GroupBy(ci => + { + // Get a hash after first accepting revisions and compressing the text. + var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(acceptedRevisionElement.Value.Replace(" ", "").Replace(" ", "").Replace(" ", "").Replace("\n", "").Replace(".", "").Replace(",", "").ToUpper()); + return sha1Hash; + }) + .OrderByDescending(g => g.Count()) + .ToList(); + var uniqueRevisionCount = uniqueRevisions.Count(); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if (uniqueRevisionCount == 1) + { + MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc); + + var dummyElement = new XElement("dummy", lci.First().RevisionElement); + + foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + var aut = rev.Attribute(W.author); + aut.Value = "ITU"; + } + + ele.ReplaceWith(dummyElement.Elements()); + continue; + } + + // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents. + // however, the hash for all of them were not the same. + // therefore, they would be added to the consolidated document as separate revisions. + + // create a log that shows what is different, in detail. + if (settings.LogCallback != null) + { + StringBuilder sb = new StringBuilder(); + sb.Append("====================================================================================================" + nl); + sb.Append("Non-Consolidated Revision" + nl); + sb.Append("====================================================================================================" + nl); + foreach (var urList in uniqueRevisions) + { + var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':'); + sb.Append("Revisors: " + revisorList + nl); + var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false); + sb.Append(str); + sb.Append("=========================" + nl); + } + sb.Append(nl); + settings.LogCallback(sb.ToString()); + } + } + + var contentToAddAfter = lci + .Where(ci => ci.InsertBefore == false) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + ele.AddAfterSelf(contentToAddAfter); + } + +#if false + // old code + foreach (var ele in elementsToProcess) + { + var lci = ele.Annotation>(); + + // if all revisions from all revisors are exactly the same, then instead of adding multiple tables after + // that contains the revisions, then simply replace the paragraph with the one with the revisions. + // RC004 documents contain the test data to exercise this. + + var lciCount = lci.Count(); + + if (lci.Count() > 1 && lciCount == revisedDocumentInfoListCount) + { + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // This is the code that determines if revisions should be consolidated into one. + + var uniqueRevisions = lci + .GroupBy(ci => + { + // Get a hash after first accepting revisions and compressing the text. + var ciz = ci; + + var acceptedRevisionElement = RevisionProcessor.AcceptRevisionsForElement(ci.RevisionElement); + var text = acceptedRevisionElement.Value + .Replace(" ", "") + .Replace(" ", "") + .Replace(" ", "") + .Replace("\n", ""); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(text); + return ci.InsertBefore.ToString() + sha1Hash; + }) + .OrderByDescending(g => g.Count()) + .ToList(); + var uniqueRevisionCount = uniqueRevisions.Count(); + + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if (uniqueRevisionCount == 1) + { + MoveFootnotesEndnotesForConsolidatedRevisions(lci.First(), consolidatedWDoc); + + var dummyElement = new XElement("dummy", lci.First().RevisionElement); + + foreach(var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + var aut = rev.Attribute(W.author); + aut.Value = "ITU"; + } + + ele.ReplaceWith(dummyElement.Elements()); + continue; + } + + // this is the location where we have determined that there are the same number of revisions for this paragraph as there are revision documents. + // however, the hash for all of them were not the same. + // therefore, they would be added to the consolidated document as separate revisions. + + // create a log that shows what is different, in detail. + if (settings.LogCallback != null) + { + StringBuilder sb = new StringBuilder(); + sb.Append("====================================================================================================" + nl); + sb.Append("Non-Consolidated Revision" + nl); + sb.Append("====================================================================================================" + nl); + foreach (var urList in uniqueRevisions) + { + var revisorList = urList.Select(ur => ur.Revisor + " : ").StringConcatenate().TrimEnd(' ', ':'); + sb.Append("Revisors: " + revisorList + nl); + var str = RevisionToLogFormTransform(urList.First().RevisionElement, 0, false); + sb.Append(str); + sb.Append("=========================" + nl); + } + sb.Append(nl); + settings.LogCallback(sb.ToString()); + } + } + + var contentToAddBefore = lci + .Where(ci => ci.InsertBefore == true) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + var contentToAddAfter = lci + .Where(ci => ci.InsertBefore == false) + .GroupAdjacent(ci => ci.Revisor + ci.Color.ToString()) + .Select((groupedCi, idx) => AssembledConjoinedRevisionContent(emptyParagraph, groupedCi, idx, consolidatedWDoc, consolidateSettings)); + ele.AddBeforeSelf(contentToAddBefore); + ele.AddAfterSelf(contentToAddAfter); + } +#endif + + consolidatedMainDocPartXDoc + .Root + .Element(W.body) + .Add(savedSectPr); + + AddTableGridStyleToStylesPart(consolidatedWDoc.MainDocumentPart.StyleDefinitionsPart); + FixUpRevisionIds(consolidatedWDoc, consolidatedMainDocPartXDoc); + IgnorePt14NamespaceForFootnotesEndnotes(consolidatedWDoc); + FixUpDocPrIds(consolidatedWDoc); + FixUpShapeIds(consolidatedWDoc); + FixUpGroupIds(consolidatedWDoc); + FixUpShapeTypeIds(consolidatedWDoc); + WmlComparer.IgnorePt14Namespace(consolidatedMainDocPartXDoc.Root); + consolidatedWDoc.MainDocumentPart.PutXDocument(); + AddFootnotesEndnotesStyles(consolidatedWDoc); + } + + var newConsolidatedDocument = new WmlDocument("consolidated.docx", consolidatedMs.ToArray()); + return newConsolidatedDocument; + } + } + + private static void MoveFootnotesEndnotesForConsolidatedRevisions(ConsolidationInfo ci, WordprocessingDocument wDocConsolidated) + { + var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + + int maxFootnoteId = 1; + if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any()) + maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max(); + int maxEndnoteId = 1; + if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any()) + maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ; + + /// At this point, content might contain a footnote or endnote reference. + /// Need to add the footnote / endnote into the consolidated document (with the same guid id) + /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point + + if (ci.RevisionElement.Descendants(W.footnoteReference).Any()) + { + var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference)) + { + var id = (int)footnoteReference.Attribute(W.id); + var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxFootnoteId + 1; + maxFootnoteId++; + footnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedFootnote = new XElement(footnote); + clonedFootnote.Attribute(W.id).Value = newId.ToString(); + footnoteXDoc.Root.Add(clonedFootnote); + } + wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (ci.RevisionElement.Descendants(W.endnoteReference).Any()) + { + var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference)) + { + var id = (int)endnoteReference.Attribute(W.id); + var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxEndnoteId + 1; + maxEndnoteId++; + endnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedEndnote = new XElement(endnote); + clonedEndnote.Attribute(W.id).Value = newId.ToString(); + endnoteXDoc.Root.Add(clonedEndnote); + } + wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private static object CleanPartTransform(XNode node) + { + var element = node as XElement; + if (element != null) + { + return new XElement(element.Name, + element.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt && + !a.Name.LocalName.ToLower().Contains("rsid")), + element.Nodes().Select(n => CleanPartTransform(n))); + } + return node; + } + + private static string RevisionToLogFormTransform(XElement element, int depth, bool inserting) + { + if (element.Name == W.p) + return "Paragraph" + nl + element.Elements().Select(e => RevisionToLogFormTransform(e, depth + 2, false)).StringConcatenate(); + if (element.Name == W.pPr || element.Name == W.rPr) + return ""; + if (element.Name == W.r) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, inserting)).StringConcatenate(); + if (element.Name == W.t) + { + if (inserting) + return "".PadRight(depth) + "Inserted Text:" + QuoteIt((string)element) + nl; + else + return "".PadRight(depth) + "Text:" + QuoteIt((string)element) + nl; + } + if (element.Name == W.delText) + return "".PadRight(depth) + "Deleted Text:" + QuoteIt((string)element) + nl; + if (element.Name == W.ins) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, true)).StringConcatenate(); + if (element.Name == W.del) + return element.Elements().Select(e => RevisionToLogFormTransform(e, depth, false)).StringConcatenate(); + return ""; + } + + private static string QuoteIt(string str) + { + var quoteString = "\""; + if (str.Contains('\"')) + quoteString = "\'"; + return quoteString + str + quoteString; + } + + private static void IgnorePt14NamespaceForFootnotesEndnotes(WordprocessingDocument wDoc) + { + var footnotesPart = wDoc.MainDocumentPart.FootnotesPart; + var endnotesPart = wDoc.MainDocumentPart.EndnotesPart; + + XDocument footnotesPartXDoc = null; + if (footnotesPart != null) + { + footnotesPartXDoc = footnotesPart.GetXDocument(); + WmlComparer.IgnorePt14Namespace(footnotesPartXDoc.Root); + } + + XDocument endnotesPartXDoc = null; + if (endnotesPart != null) + { + endnotesPartXDoc = endnotesPart.GetXDocument(); + WmlComparer.IgnorePt14Namespace(endnotesPartXDoc.Root); + } + + if (footnotesPart != null) + footnotesPart.PutXDocument(); + + if (endnotesPart != null) + endnotesPart.PutXDocument(); + } + + private static XElement[] AssembledConjoinedRevisionContent(XElement emptyParagraph, IGrouping groupedCi, int idx, WordprocessingDocument wDocConsolidated, + WmlComparerConsolidateSettings consolidateSettings) + { + var consolidatedFootnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + var consolidatedEndnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + + int maxFootnoteId = 1; + if (consolidatedFootnoteXDoc.Root.Elements(W.footnote).Any()) + maxFootnoteId = consolidatedFootnoteXDoc.Root.Elements(W.footnote).Select(e => (int)e.Attribute(W.id)).Max(); + int maxEndnoteId = 1; + if (consolidatedEndnoteXDoc.Root.Elements(W.endnote).Any()) + maxEndnoteId = consolidatedEndnoteXDoc.Root.Elements(W.endnote).Select(e => (int)e.Attribute(W.id)).Max(); ; + + var revisor = groupedCi.First().Revisor; + + var captionParagraph = new XElement(W.p, + new XElement(W.pPr, + new XElement(W.jc, new XAttribute(W.val, "both")), + new XElement(W.rPr, + new XElement(W.b), + new XElement(W.bCs))), + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.b), + new XElement(W.bCs)), + new XElement(W.t, revisor))); + + var colorRgb = groupedCi.First().Color.ToArgb(); + var colorString = colorRgb.ToString("X"); + if (colorString.Length == 8) + colorString = colorString.Substring(2); + + if (consolidateSettings.ConsolidateWithTable) + { + var table = new XElement(W.tbl, + new XElement(W.tblPr, + new XElement(W.tblStyle, new XAttribute(W.val, "TableGridForRevisions")), + new XElement(W.tblW, + new XAttribute(W._w, "0"), + new XAttribute(W.type, "auto")), + new XElement(W.shd, + new XAttribute(W.val, "clear"), + new XAttribute(W.color, "auto"), + new XAttribute(W.fill, colorString)), + new XElement(W.tblLook, + new XAttribute(W.firstRow, "0"), + new XAttribute(W.lastRow, "0"), + new XAttribute(W.firstColumn, "0"), + new XAttribute(W.lastColumn, "0"), + new XAttribute(W.noHBand, "0"), + new XAttribute(W.noVBand, "0"))), + new XElement(W.tblGrid, + new XElement(W.gridCol, new XAttribute(W._w, "9576"))), + new XElement(W.tr, + new XElement(W.tc, + new XElement(W.tcPr, + new XElement(W.shd, + new XAttribute(W.val, "clear"), + new XAttribute(W.color, "auto"), + new XAttribute(W.fill, colorString))), + captionParagraph, + groupedCi.Select(ci => + { + XElement paraAfter = null; + if (ci.RevisionElement.Name == W.tbl) + paraAfter = emptyParagraph; + var revisionInTable = new[] { + ci.RevisionElement, + paraAfter, + }; + + /// At this point, content might contain a footnote or endnote reference. + /// Need to add the footnote / endnote into the consolidated document (with the same guid id) + /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point + + if (ci.RevisionElement.Descendants(W.footnoteReference).Any()) + { + var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference)) + { + var id = (int)footnoteReference.Attribute(W.id); + var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxFootnoteId + 1; + maxFootnoteId++; + footnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedFootnote = new XElement(footnote); + clonedFootnote.Attribute(W.id).Value = newId.ToString(); + footnoteXDoc.Root.Add(clonedFootnote); + } + wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (ci.RevisionElement.Descendants(W.endnoteReference).Any()) + { + var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference)) + { + var id = (int)endnoteReference.Attribute(W.id); + var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxEndnoteId + 1; + maxEndnoteId++; + endnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedEndnote = new XElement(endnote); + clonedEndnote.Attribute(W.id).Value = newId.ToString(); + endnoteXDoc.Root.Add(clonedEndnote); + } + wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument(); + } + + return revisionInTable; + })))); + + // if the last paragraph has a deleted paragraph mark, then remove the deletion from the paragraph mark. This is to prevent Word from misbehaving. + // the last paragraph in a cell must not have a deleted paragraph mark. + var theCell = table + .Descendants(W.tc) + .FirstOrDefault(); + var lastPara = theCell + .Elements(W.p) + .LastOrDefault(); + if (lastPara != null) + { + var isDeleted = lastPara + .Elements(W.pPr) + .Elements(W.rPr) + .Elements(W.del) + .Any(); + if (isDeleted) + lastPara + .Elements(W.pPr) + .Elements(W.rPr) + .Elements(W.del) + .Remove(); + } + + var content = new[] { + idx == 0 ? emptyParagraph : null, + table, + emptyParagraph, + }; + return content; + } + else + { + var content = groupedCi.Select(ci => + { + XElement paraAfter = null; + if (ci.RevisionElement.Name == W.tbl) + paraAfter = emptyParagraph; + var revisionInTable = new[] { + ci.RevisionElement, + paraAfter, + }; + + /// At this point, content might contain a footnote or endnote reference. + /// Need to add the footnote / endnote into the consolidated document (with the same guid id) + /// Because of preprocessing of the documents, all footnote and endnote references will be unique at this point + + if (ci.RevisionElement.Descendants(W.footnoteReference).Any()) + { + var footnoteXDoc = wDocConsolidated.MainDocumentPart.FootnotesPart.GetXDocument(); + foreach (var footnoteReference in ci.RevisionElement.Descendants(W.footnoteReference)) + { + var id = (int)footnoteReference.Attribute(W.id); + var footnote = ci.Footnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxFootnoteId + 1; + maxFootnoteId++; + footnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedFootnote = new XElement(footnote); + clonedFootnote.Attribute(W.id).Value = newId.ToString(); + footnoteXDoc.Root.Add(clonedFootnote); + } + wDocConsolidated.MainDocumentPart.FootnotesPart.PutXDocument(); + } + + if (ci.RevisionElement.Descendants(W.endnoteReference).Any()) + { + var endnoteXDoc = wDocConsolidated.MainDocumentPart.EndnotesPart.GetXDocument(); + foreach (var endnoteReference in ci.RevisionElement.Descendants(W.endnoteReference)) + { + var id = (int)endnoteReference.Attribute(W.id); + var endnote = ci.Endnotes.FirstOrDefault(fn => (int)fn.Attribute(W.id) == id); + var newId = maxEndnoteId + 1; + maxEndnoteId++; + endnoteReference.Attribute(W.id).Value = newId.ToString(); + var clonedEndnote = new XElement(endnote); + clonedEndnote.Attribute(W.id).Value = newId.ToString(); + endnoteXDoc.Root.Add(clonedEndnote); + } + wDocConsolidated.MainDocumentPart.EndnotesPart.PutXDocument(); + } + + return revisionInTable; + }); + + var dummyElement = new XElement("dummy", + content.SelectMany(m => m)); + + foreach (var rev in dummyElement.Descendants().Where(d => d.Attribute(W.author) != null)) + { + var aut = rev.Attribute(W.author); + aut.Value = revisor; + } + + return dummyElement.Elements().ToArray(); + } + } + + private static void AddToAnnotation( + WordprocessingDocument wDocDelta, + WordprocessingDocument consolidatedWDoc, + XElement elementToInsertAfter, + ConsolidationInfo consolidationInfo, + WmlComparerSettings settings) + { + Package packageOfDeletedContent = wDocDelta.MainDocumentPart.OpenXmlPackage.Package; + Package packageOfNewContent = consolidatedWDoc.MainDocumentPart.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(wDocDelta.MainDocumentPart.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(consolidatedWDoc.MainDocumentPart.Uri); + consolidationInfo.RevisionElement = MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, consolidationInfo.RevisionElement); + + var clonedForHashing = (XElement)CloneBlockLevelContentForHashing(consolidatedWDoc.MainDocumentPart, consolidationInfo.RevisionElement, false, settings); + clonedForHashing.Descendants().Where(d => d.Name == W.ins || d.Name == W.del).Attributes(W.id).Remove(); + var shaString = clonedForHashing.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + consolidationInfo.RevisionString = shaString; + consolidationInfo.RevisionHash = sha1Hash; + + var annotationList = elementToInsertAfter.Annotation>(); + if (annotationList == null) + { + annotationList = new List(); + elementToInsertAfter.AddAnnotation(annotationList); + } + annotationList.Add(consolidationInfo); + } + + private static void AddTableGridStyleToStylesPart(StyleDefinitionsPart styleDefinitionsPart) + { + var sXDoc = styleDefinitionsPart.GetXDocument(); + var tableGridStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableGridForRevisions"); + if (tableGridStyle == null) + { + var tableGridForRevisionsStyleMarkup = +@" + + + + + + + + + + + + + + + + + + +"; + var tgsElement = XElement.Parse(tableGridForRevisionsStyleMarkup); + sXDoc.Root.Add(tgsElement); + } + var tableNormalStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "TableNormal"); + if (tableNormalStyle == null) + { + var tableNormalStyleMarkup = +@" + + + + + + + + + + + + + + "; + var tnsElement = XElement.Parse(tableNormalStyleMarkup); + sXDoc.Root.Add(tnsElement); + } + styleDefinitionsPart.PutXDocument(); + } + + private static XAttribute[] NamespaceAttributes = + { + new XAttribute(XNamespace.Xmlns + "wpc", WPC.wpc), + new XAttribute(XNamespace.Xmlns + "mc", MC.mc), + new XAttribute(XNamespace.Xmlns + "o", O.o), + new XAttribute(XNamespace.Xmlns + "r", R.r), + new XAttribute(XNamespace.Xmlns + "m", M.m), + new XAttribute(XNamespace.Xmlns + "v", VML.vml), + new XAttribute(XNamespace.Xmlns + "wp14", WP14.wp14), + new XAttribute(XNamespace.Xmlns + "wp", WP.wp), + new XAttribute(XNamespace.Xmlns + "w10", W10.w10), + new XAttribute(XNamespace.Xmlns + "w", W.w), + new XAttribute(XNamespace.Xmlns + "w14", W14.w14), + new XAttribute(XNamespace.Xmlns + "wpg", WPG.wpg), + new XAttribute(XNamespace.Xmlns + "wpi", WPI.wpi), + new XAttribute(XNamespace.Xmlns + "wne", WNE.wne), + new XAttribute(XNamespace.Xmlns + "wps", WPS.wps), + new XAttribute(MC.Ignorable, "w14 wp14"), + }; + + private static void AddFootnotesEndnotesParts(WordprocessingDocument wDoc) + { + var mdp = wDoc.MainDocumentPart; + if (mdp.FootnotesPart == null) + { + mdp.AddNewPart(); + var newFootnotes = wDoc.MainDocumentPart.FootnotesPart.GetXDocument(); + newFootnotes.Declaration.Standalone = "yes"; + newFootnotes.Declaration.Encoding = "UTF-8"; + newFootnotes.Add(new XElement(W.footnotes, NamespaceAttributes)); + mdp.FootnotesPart.PutXDocument(); + } + if (mdp.EndnotesPart == null) + { + mdp.AddNewPart(); + var newEndnotes = wDoc.MainDocumentPart.EndnotesPart.GetXDocument(); + newEndnotes.Declaration.Standalone = "yes"; + newEndnotes.Declaration.Encoding = "UTF-8"; + newEndnotes.Add(new XElement(W.endnotes, NamespaceAttributes)); + mdp.EndnotesPart.PutXDocument(); + } + } + + private static void ChangeFootnoteEndnoteReferencesToUniqueRange(WordprocessingDocument wDoc, int startingIdForFootnotesEndnotes) + { + var mainDocPart = wDoc.MainDocumentPart; + var footnotesPart = wDoc.MainDocumentPart.FootnotesPart; + var endnotesPart = wDoc.MainDocumentPart.EndnotesPart; + + var mainDocumentXDoc = mainDocPart.GetXDocument(); + XDocument footnotesPartXDoc = null; + if (footnotesPart != null) + footnotesPartXDoc = footnotesPart.GetXDocument(); + XDocument endnotesPartXDoc = null; + if (endnotesPart != null) + endnotesPartXDoc = endnotesPart.GetXDocument(); + + var references = mainDocumentXDoc + .Root + .Descendants() + .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference); + + var rnd = new Random(); + foreach (var r in references) + { + var oldId = (string)r.Attribute(W.id); + var newId = startingIdForFootnotesEndnotes.ToString(); + startingIdForFootnotesEndnotes++; + r.Attribute(W.id).Value = newId; + if (r.Name == W.footnoteReference) + { + var fn = footnotesPartXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + if (fn == null) + throw new OpenXmlPowerToolsException("Invalid document"); + fn.Attribute(W.id).Value = newId; + } + else + { + var en = endnotesPartXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + if (en == null) + throw new OpenXmlPowerToolsException("Invalid document"); + en.Attribute(W.id).Value = newId; + } + } + + mainDocPart.PutXDocument(); + if (footnotesPart != null) + footnotesPart.PutXDocument(); + if (endnotesPart != null) + endnotesPart.PutXDocument(); + } + + private static WmlDocument ProduceDocumentWithTrackedRevisions(WmlComparerSettings settings, WmlDocument wmlResult, WordprocessingDocument wDoc1, WordprocessingDocument wDoc2) + { + // save away sectPr so that can set in the newly produced document. + var savedSectPr = wDoc1 + .MainDocumentPart + .GetXDocument() + .Root + .Element(W.body) + .Element(W.sectPr); + + var contentParent1 = wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body); + AddSha1HashToBlockLevelContent(wDoc1.MainDocumentPart, contentParent1, settings); + var contentParent2 = wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body); + AddSha1HashToBlockLevelContent(wDoc2.MainDocumentPart, contentParent2, settings); + + var cal1 = WmlComparer.CreateComparisonUnitAtomList(wDoc1.MainDocumentPart, wDoc1.MainDocumentPart.GetXDocument().Root.Element(W.body), settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in cal1) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var cus1 = GetComparisonUnitList(cal1, settings); + + if (s_False) + { + var sbs = ComparisonUnit.ComparisonUnitListToString(cus1); + TestUtil.NotePad(sbs); + } + + var cal2 = WmlComparer.CreateComparisonUnitAtomList(wDoc2.MainDocumentPart, wDoc2.MainDocumentPart.GetXDocument().Root.Element(W.body), settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in cal2) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var cus2 = GetComparisonUnitList(cal2, settings); + + if (s_False) + { + var sbs = ComparisonUnit.ComparisonUnitListToString(cus2); + TestUtil.NotePad(sbs); + } + + if (s_False) + { + var sb3 = new StringBuilder(); + sb3.Append("ComparisonUnitList 1 =====" + Environment.NewLine + Environment.NewLine); + sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus1)); + sb3.Append(Environment.NewLine); + sb3.Append("ComparisonUnitList 2 =====" + Environment.NewLine + Environment.NewLine); + sb3.Append(ComparisonUnit.ComparisonUnitListToString(cus2)); + var sbs3 = sb3.ToString(); + TestUtil.NotePad(sbs3); + } + + var correlatedSequence = Lcs(cus1, cus2, settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in correlatedSequence) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore + // when generating the document, the appropriate row will be marked as deleted or inserted. + MarkRowsAsDeletedOrInserted(settings, correlatedSequence); + + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal + var listOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(correlatedSequence, settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in listOfComparisonUnitAtoms) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply + // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted. + + // the table id will be hacked in the normal course of events. + // in the case where a row is deleted, not necessary to hack - the deleted row ID will do. + // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well. + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(listOfComparisonUnitAtoms); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in listOfComparisonUnitAtoms) + sb.Append(item.ToStringAncestorUnids() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // and then finally can generate the document with revisions + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(wmlResult.DocumentByteArray, 0, wmlResult.DocumentByteArray.Length); + using (WordprocessingDocument wDocWithRevisions = WordprocessingDocument.Open(ms, true)) + { + var xDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + var rootNamespaceAttributes = xDoc + .Root + .Attributes() + .Where(a => a.IsNamespaceDeclaration || a.Name.Namespace == MC.mc) + .ToList(); + + // ====================================== + // The following produces a new valid WordprocessingML document from the listOfComparisonUnitAtoms + var newBodyChildren = ProduceNewWmlMarkupFromCorrelatedSequence(wDocWithRevisions.MainDocumentPart, + listOfComparisonUnitAtoms, settings); + + XDocument newXDoc = new XDocument(); + newXDoc.Add( + new XElement(W.document, + rootNamespaceAttributes, + new XElement(W.body, newBodyChildren))); + MarkContentAsDeletedOrInserted(newXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(newXDoc); + IgnorePt14Namespace(newXDoc.Root); + + ProcessFootnoteEndnote(settings, + listOfComparisonUnitAtoms, + wDoc1.MainDocumentPart, + wDoc2.MainDocumentPart, + newXDoc); + + RectifyFootnoteEndnoteIds( + wDoc1.MainDocumentPart, + wDoc2.MainDocumentPart, + wDocWithRevisions.MainDocumentPart, + newXDoc, + settings); + + ConjoinDeletedInsertedParagraphMarks(wDocWithRevisions.MainDocumentPart, newXDoc); + + FixUpRevisionIds(wDocWithRevisions, newXDoc); + + // little bit of cleanup + MoveLastSectPrToChildOfBody(newXDoc); + XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root); + xDoc.Root.ReplaceWith(newXDoc2Root); + + /**********************************************************************************************/ + // temporary code to remove sections. When remove this code, get validation errors for some ITU documents. + xDoc.Root.Descendants(W.sectPr).Remove(); + + // move w:sectPr from source document into newly generated document. + if (savedSectPr != null) + { + var xd = wDocWithRevisions.MainDocumentPart.GetXDocument(); + // add everything but headers/footers + var clonedSectPr = new XElement(W.sectPr, + savedSectPr.Attributes(), + savedSectPr.Element(W.type), + savedSectPr.Element(W.pgSz), + savedSectPr.Element(W.pgMar), + savedSectPr.Element(W.cols), + savedSectPr.Element(W.titlePg)); + xd.Root.Element(W.body).Add(clonedSectPr); + } + /**********************************************************************************************/ + + wDocWithRevisions.MainDocumentPart.PutXDocument(); + FixUpFootnotesEndnotesWithCustomMarkers(wDocWithRevisions); + FixUpRevMarkIds(wDocWithRevisions); + FixUpDocPrIds(wDocWithRevisions); + FixUpShapeIds(wDocWithRevisions); + FixUpShapeTypeIds(wDocWithRevisions); + AddFootnotesEndnotesStyles(wDocWithRevisions); + CopyMissingStylesFromOneDocToAnother(wDoc2, wDocWithRevisions); + DeleteFootnotePropertiesInSettings(wDocWithRevisions); + } + foreach (var part in wDoc1.ContentParts()) + part.PutXDocument(); + foreach (var part in wDoc2.ContentParts()) + part.PutXDocument(); + var updatedWmlResult = new WmlDocument("Dummy.docx", ms.ToArray()); + return updatedWmlResult; + } + } + + private static void DeleteFootnotePropertiesInSettings(WordprocessingDocument wDocWithRevisions) + { + var settingsPart = wDocWithRevisions.MainDocumentPart.DocumentSettingsPart; + if (settingsPart != null) + { + var sxDoc = settingsPart.GetXDocument(); + sxDoc.Root.Elements().Where(e => e.Name == W.footnotePr || e.Name == W.endnotePr).Remove(); + settingsPart.PutXDocument(); + } + } + + private static void FixUpFootnotesEndnotesWithCustomMarkers(WordprocessingDocument wDocWithRevisions) + { +#if FALSE + // this needs to change + + + + + + + + + + + + + + + + + + + + + + + + 1 + + + + // to this + + + + + + + + + + + + 1 + + +#endif + // this is pretty random - a bug in Word prevents display of a document if the delText element does not immediately follow the footnoteReference element, in the same run. + var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + var newRoot = (XElement)FootnoteEndnoteReferenceCleanupTransform(mainXDoc.Root); + mainXDoc.Root.ReplaceWith(newRoot); + wDocWithRevisions.MainDocumentPart.PutXDocument(); + } + + private static object FootnoteEndnoteReferenceCleanupTransform(XNode node) + { + var element = node as XElement; + if (element != null) + { + // small optimization to eliminate the work for most elements + if (element.Element(W.del) != null || element.Element(W.ins) != null) + { + var hasFootnoteEndnoteReferencesThatNeedCleanedUp = element + .Elements() + .Where(e => e.Name == W.del || e.Name == W.ins) + .Elements(W.r) + .Elements() + .Where(e => e.Name == W.footnoteReference || e.Name == W.endnoteReference) + .Attributes(W.customMarkFollows) + .Any(); + + if (hasFootnoteEndnoteReferencesThatNeedCleanedUp) + { + var clone = new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n))); + var footnoteEndnoteReferencesToAdjust = clone + .Descendants() + .Where(d => d.Name == W.footnoteReference || d.Name == W.endnoteReference) + .Where(d => d.Attribute(W.customMarkFollows) != null); + foreach (var fnenr in footnoteEndnoteReferencesToAdjust) + { + var par = fnenr.Parent; + var gp = fnenr.Parent.Parent; + if (par.Name == W.r && + gp.Name == W.del) + { + if (par.Element(W.delText) != null) + continue; + var afterGp = gp.ElementsAfterSelf().FirstOrDefault(); + if (afterGp == null) + continue; + var afterGpDelText = afterGp.Elements(W.r).Elements(W.delText); + if (afterGpDelText.Any()) + { + par.Add(afterGpDelText); // this will clone and add to run that contains the reference + afterGpDelText.Remove(); // this leaves an empty run, does not matter. + } + } + if (par.Name == W.r && + gp.Name == W.ins) + { + if (par.Element(W.t) != null) + continue; + var afterGp = gp.ElementsAfterSelf().FirstOrDefault(); + if (afterGp == null) + continue; + var afterGpText = afterGp.Elements(W.r).Elements(W.t); + if (afterGpText.Any()) + { + par.Add(afterGpText); // this will clone and add to run that contains the reference + afterGpText.Remove(); // this leaves an empty run, does not matter. + } + } + } + return clone; + } + } + else + { + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => FootnoteEndnoteReferenceCleanupTransform(n))); + } + } + return node; + } + + private static void CopyMissingStylesFromOneDocToAnother(WordprocessingDocument wDocFrom, WordprocessingDocument wDocTo) + { + var revisionsStylesXDoc = wDocTo.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); + var afterStylesXDoc = wDocFrom.MainDocumentPart.StyleDefinitionsPart.GetXDocument(); + foreach (var style in afterStylesXDoc.Root.Elements(W.style)) + { + var type = (string)style.Attribute(W.type); + var styleId = (string)style.Attribute(W.styleId); + var styleInRevDoc = revisionsStylesXDoc + .Root + .Elements(W.style) + .FirstOrDefault(st => (string)st.Attribute(W.type) == type && + (string)st.Attribute(W.styleId) == styleId); + if (styleInRevDoc != null) + continue; + var cloned = new XElement(style); + if (cloned.Attribute(W._default) != null) + cloned.Attribute(W._default).Remove(); + revisionsStylesXDoc.Root.Add(cloned); + } + wDocTo.MainDocumentPart.StyleDefinitionsPart.PutXDocument(); + } + + private static void AddFootnotesEndnotesStyles(WordprocessingDocument wDocWithRevisions) + { + var mainXDoc = wDocWithRevisions.MainDocumentPart.GetXDocument(); + var hasFootnotes = mainXDoc.Descendants(W.footnoteReference).Any(); + var hasEndnotes = mainXDoc.Descendants(W.endnoteReference).Any(); + var styleDefinitionsPart = wDocWithRevisions.MainDocumentPart.StyleDefinitionsPart; + var sXDoc = styleDefinitionsPart.GetXDocument(); + if (hasFootnotes) + { + var footnoteTextStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteText"); + if (footnoteTextStyle == null) + { + var footnoteTextStyleMarkup = +@" + + + + + + + + + + + + + + "; + var ftsElement = XElement.Parse(footnoteTextStyleMarkup); + sXDoc.Root.Add(ftsElement); + } + var footnoteTextCharStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteTextChar"); + if (footnoteTextCharStyle == null) + { + var footnoteTextCharStyleMarkup = +@" + + + + + + + + + + "; + var fntcsElement = XElement.Parse(footnoteTextCharStyleMarkup); + sXDoc.Root.Add(fntcsElement); + } + var footnoteReferenceStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "FootnoteReference"); + if (footnoteReferenceStyle == null) + { + var footnoteReferenceStyleMarkup = +@" + + + + + + + + + "; + var fnrsElement = XElement.Parse(footnoteReferenceStyleMarkup); + sXDoc.Root.Add(fnrsElement); + } + } + if (hasEndnotes) + { + var endnoteTextStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteText"); + if (endnoteTextStyle == null) + { + var endnoteTextStyleMarkup = +@" + + + + + + + + + + + + + + "; + var etsElement = XElement.Parse(endnoteTextStyleMarkup); + sXDoc.Root.Add(etsElement); + } + var endnoteTextCharStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteTextChar"); + if (endnoteTextCharStyle == null) + { + var endnoteTextCharStyleMarkup = +@" + + + + + + + + + + "; + var entcsElement = XElement.Parse(endnoteTextCharStyleMarkup); + sXDoc.Root.Add(entcsElement); + } + var endnoteReferenceStyle = sXDoc + .Root + .Elements(W.style) + .FirstOrDefault(s => (string)s.Attribute(W.styleId) == "EndnoteReference"); + if (endnoteReferenceStyle == null) + { + var endnoteReferenceStyleMarkup = +@" + + + + + + + + + "; + var enrsElement = XElement.Parse(endnoteReferenceStyleMarkup); + sXDoc.Root.Add(enrsElement); + } + } + if (hasFootnotes || hasEndnotes) + { + styleDefinitionsPart.PutXDocument(); + } + } + + // it is possible, per the algorithm, for the algorithm to find that the paragraph mark for a single paragraph has been + // inserted and deleted. If the algorithm sets them to equal, then sometimes it will equate paragraph marks that should + // not be equated. + private static void ConjoinDeletedInsertedParagraphMarks(MainDocumentPart mainDocumentPart, XDocument newXDoc) + { + ConjoinMultipleParagraphMarks(newXDoc); + if (mainDocumentPart.FootnotesPart != null) + { + var fnXDoc = mainDocumentPart.FootnotesPart.GetXDocument(); + ConjoinMultipleParagraphMarks(fnXDoc); + mainDocumentPart.FootnotesPart.PutXDocument(); + } + if (mainDocumentPart.EndnotesPart != null) + { + var fnXDoc = mainDocumentPart.EndnotesPart.GetXDocument(); + ConjoinMultipleParagraphMarks(fnXDoc); + mainDocumentPart.EndnotesPart.PutXDocument(); + } + } + + private static void ConjoinMultipleParagraphMarks(XDocument xDoc) + { + var newRoot = ConjoinTransform(xDoc.Root); + xDoc.Root.ReplaceWith(newRoot); + } + + private static object ConjoinTransform(XNode node) + { + var element = node as XElement; + if (element != null) + { + if (element.Name == W.p && element.Elements(W.pPr).Count() >= 2) + { + var pPr = new XElement(element.Element(W.pPr)); + pPr.Elements(W.rPr).Elements().Where(r => r.Name == W.ins || r.Name == W.del).Remove(); + pPr.Attributes(PtOpenXml.Status).Remove(); + var newPara = new XElement(W.p, + element.Attributes(), + pPr, + element.Elements().Where(c => c.Name != W.pPr)); + return newPara; + } + + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => ConjoinTransform(n))); + } + return node; + } + + private static void MarkContentAsDeletedOrInserted(XDocument newXDoc, WmlComparerSettings settings) + { + var newRoot = MarkContentAsDeletedOrInsertedTransform(newXDoc.Root, settings); + newXDoc.Root.ReplaceWith(newRoot); + } + + private static object MarkContentAsDeletedOrInsertedTransform(XNode node, WmlComparerSettings settings) + { + XElement element = node as XElement; + if (element != null) + { + if (element.Name == W.r) + { + var statusList = element + .DescendantsTrimmed(W.txbxContent) + .Where(d => d.Name == W.t || d.Name == W.delText || AllowableRunChildren.Contains(d.Name)) + .Attributes(PtOpenXml.Status) + .Select(a => (string)a) + .Distinct() + .ToList(); + if (statusList.Count() > 1) + throw new OpenXmlPowerToolsException("Internal error - have both deleted and inserted text elements in the same run."); + if (statusList.Count() == 0) + return new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + if (statusList.First() == "Deleted") + { + return new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)))); + } + else if (statusList.First() == "Inserted") + { + return new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + new XElement(W.r, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings)))); + } + } + + if (element.Name == W.pPr) + { + var status = (string)element.Attribute(PtOpenXml.Status); + if (status == null) + return new XElement(W.pPr, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + var pPr = new XElement(element); + if (status == "Deleted") + { + XElement rPr = pPr.Element(W.rPr); + if (rPr == null) + rPr = new XElement(W.rPr); + rPr.Add(new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + if (pPr.Element(W.rPr) != null) + pPr.Element(W.rPr).ReplaceWith(rPr); + else + pPr.AddFirst(rPr); + } + else if (status == "Inserted") + { + XElement rPr = pPr.Element(W.rPr); + if (rPr == null) + rPr = new XElement(W.rPr); + rPr.Add(new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + if (pPr.Element(W.rPr) != null) + pPr.Element(W.rPr).ReplaceWith(rPr); + else + pPr.AddFirst(rPr); + } + else + throw new OpenXmlPowerToolsException("Internal error"); + return pPr; + } + + return new XElement(element.Name, + element.Attributes(), + element.Nodes().Select(n => MarkContentAsDeletedOrInsertedTransform(n, settings))); + } + return node; + } + + private static void FixUpRevisionIds(WordprocessingDocument wDocWithRevisions, XDocument newXDoc) + { + IEnumerable footnoteRevisions = Enumerable.Empty(); + if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null) + { + var fnxd = wDocWithRevisions.MainDocumentPart.FootnotesPart.GetXDocument(); + footnoteRevisions = fnxd + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + } + IEnumerable endnoteRevisions = Enumerable.Empty(); + if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null) + { + var fnxd = wDocWithRevisions.MainDocumentPart.EndnotesPart.GetXDocument(); + endnoteRevisions = fnxd + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + } + var mainRevisions = newXDoc + .Descendants() + .Where(d => d.Name == W.ins || d.Name == W.del); + var allRevisions = mainRevisions + .Concat(footnoteRevisions) + .Concat(endnoteRevisions) + .Select((r, i) => + { + return new + { + Rev = r, + Idx = i + 1, + }; + }); + foreach (var item in allRevisions) + item.Rev.Attribute(W.id).Value = item.Idx.ToString(); + if (wDocWithRevisions.MainDocumentPart.FootnotesPart != null) + wDocWithRevisions.MainDocumentPart.FootnotesPart.PutXDocument(); + if (wDocWithRevisions.MainDocumentPart.EndnotesPart != null) + wDocWithRevisions.MainDocumentPart.EndnotesPart.PutXDocument(); + } + + private static void IgnorePt14Namespace(XElement root) + { + if (root.Attribute(XNamespace.Xmlns + "pt14") == null) + { + root.Add(new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName)); + } + var ignorable = (string)root.Attribute(MC.Ignorable); + if (ignorable != null) + { + var list = ignorable.Split(' '); + if (!list.Contains("pt14")) + { + ignorable += " pt14"; + root.Attribute(MC.Ignorable).Value = ignorable; + } + } + else + { + root.Add(new XAttribute(MC.Ignorable, "pt14")); + } + } + + private static void CoalesceAdjacentRunsWithIdenticalFormatting(XDocument xDoc) + { + var paras = xDoc.Root.DescendantsTrimmed(W.txbxContent).Where(d => d.Name == W.p); + foreach (var para in paras) + { + var newPara = WordprocessingMLUtil.CoalesceAdjacentRunsWithIdenticalFormatting(para); + para.ReplaceNodes(newPara.Nodes()); + } + } + + private static void ProcessFootnoteEndnote( + WmlComparerSettings settings, + List listOfComparisonUnitAtoms, + MainDocumentPart mainDocumentPartBefore, + MainDocumentPart mainDocumentPartAfter, + XDocument mainDocumentXDoc) + { + var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart; + var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart; + var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart; + var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart; + + XDocument footnotesPartBeforeXDoc = null; + if (footnotesPartBefore != null) + footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument(); + XDocument footnotesPartAfterXDoc = null; + if (footnotesPartAfter != null) + footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument(); + XDocument endnotesPartBeforeXDoc = null; + if (endnotesPartBefore != null) + endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument(); + XDocument endnotesPartAfterXDoc = null; + if (endnotesPartAfter != null) + endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument(); + + var possiblyModifiedFootnotesEndNotes = listOfComparisonUnitAtoms + .Where(cua => + cua.ContentElement.Name == W.footnoteReference || + cua.ContentElement.Name == W.endnoteReference) + .ToList(); + + foreach (var fn in possiblyModifiedFootnotesEndNotes) + { + string beforeId = null; + if (fn.ContentElementBefore != null) + beforeId = (string)fn.ContentElementBefore.Attribute(W.id); + var afterId = (string)fn.ContentElement.Attribute(W.id); + + XElement footnoteEndnoteBefore = null; + XElement footnoteEndnoteAfter = null; + OpenXmlPart partToUseBefore = null; + OpenXmlPart partToUseAfter = null; + XDocument partToUseBeforeXDoc = null; + XDocument partToUseAfterXDoc = null; + + if (fn.CorrelationStatus == CorrelationStatus.Equal) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteBefore = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId); + footnoteEndnoteAfter = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseBefore = footnotesPartBefore; + partToUseAfter = footnotesPartAfter; + partToUseBeforeXDoc = footnotesPartBeforeXDoc; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteBefore = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == beforeId); + footnoteEndnoteAfter = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseBefore = endnotesPartBefore; + partToUseAfter = endnotesPartAfter; + partToUseBeforeXDoc = endnotesPartBeforeXDoc; + partToUseAfterXDoc = endnotesPartAfterXDoc; + } + AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings); + AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings); + + var fncal1 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings); + var fncus1 = GetComparisonUnitList(fncal1, settings); + + var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings); + var fncus2 = GetComparisonUnitList(fncal2, settings); + + if (! (fncus1.Length == 0 && fncus2.Length == 0)) + { + var fnCorrelatedSequence = Lcs(fncus1, fncus2, settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in fnCorrelatedSequence) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore + // when generating the document, the appropriate row will be marked as deleted or inserted. + MarkRowsAsDeletedOrInserted(settings, fnCorrelatedSequence); + + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal + var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(fnCorrelatedSequence, settings); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in fnListOfComparisonUnitAtoms) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // hack = set the guid ID of the table, row, or cell from the 'before' document to be equal to the 'after' document. + + // note - we don't want to do the hack until after flattening all of the groups. At the end of the flattening, we should simply + // have a list of ComparisonUnitAtoms, appropriately marked as equal, inserted, or deleted. + + // the table id will be hacked in the normal course of events. + // in the case where a row is deleted, not necessary to hack - the deleted row ID will do. + // in the case where a row is inserted, not necessary to hack - the inserted row ID will do as well. + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + var hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + var firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + var firstRun = firstPara.Element(W.r); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes()); + } + } + else if (fn.CorrelationStatus == CorrelationStatus.Inserted) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteAfter = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseAfter = footnotesPartAfter; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteAfter = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseAfter = endnotesPartAfter; + partToUseAfterXDoc = endnotesPartAfterXDoc; + } + + AddSha1HashToBlockLevelContent(partToUseAfter, footnoteEndnoteAfter, settings); + + var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseAfter, footnoteEndnoteAfter, settings); + var fncus2 = GetComparisonUnitList(fncal2, settings); + + var insertedCorrSequ = new List() { + new CorrelatedSequence() + { + ComparisonUnitArray1 = null, + ComparisonUnitArray2 = fncus2, + CorrelationStatus = CorrelationStatus.Inserted, + }, + }; + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in insertedCorrSequ) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + MarkRowsAsDeletedOrInserted(settings, insertedCorrSequ); + + var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(insertedCorrSequ, settings); + + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseAfter, + fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + var hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + var firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + var firstRun = firstPara.Descendants(W.r).FirstOrDefault(); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + var newContentElement = newTempElement + .Descendants() + .FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + footnoteEndnoteAfter.ReplaceNodes(newContentElement.Nodes()); + } + else if (fn.CorrelationStatus == CorrelationStatus.Deleted) + { + if (fn.ContentElement.Name == W.footnoteReference) + { + footnoteEndnoteBefore = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseAfter = footnotesPartAfter; + partToUseAfterXDoc = footnotesPartAfterXDoc; + } + else + { + footnoteEndnoteBefore = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(fnn => (string)fnn.Attribute(W.id) == afterId); + partToUseBefore = endnotesPartBefore; + partToUseBeforeXDoc = endnotesPartBeforeXDoc; + } + + AddSha1HashToBlockLevelContent(partToUseBefore, footnoteEndnoteBefore, settings); + + var fncal2 = WmlComparer.CreateComparisonUnitAtomList(partToUseBefore, footnoteEndnoteBefore, settings); + var fncus2 = GetComparisonUnitList(fncal2, settings); + + var deletedCorrSequ = new List() { + new CorrelatedSequence() + { + ComparisonUnitArray1 = fncus2, + ComparisonUnitArray2 = null, + CorrelationStatus = CorrelationStatus.Deleted, + }, + }; + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in deletedCorrSequ) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + MarkRowsAsDeletedOrInserted(settings, deletedCorrSequ); + + var fnListOfComparisonUnitAtoms = FlattenToComparisonUnitAtomList(deletedCorrSequ, settings); + + if (fnListOfComparisonUnitAtoms.Any()) + { + AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(fnListOfComparisonUnitAtoms); + + var newFootnoteEndnoteChildren = ProduceNewWmlMarkupFromCorrelatedSequence(partToUseBefore, + fnListOfComparisonUnitAtoms, settings); + var tempElement = new XElement(W.body, newFootnoteEndnoteChildren); + var hasFootnoteReference = tempElement.Descendants(W.r).Any(r => + { + var b = false; + if ((string)r.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault() == "FootnoteReference") + b = true; + if (r.Descendants(W.footnoteRef).Any()) + b = true; + return b; + }); + if (!hasFootnoteReference) + { + var firstPara = tempElement.Descendants(W.p).FirstOrDefault(); + if (firstPara != null) + { + var firstRun = firstPara.Descendants(W.r).FirstOrDefault(); + if (firstRun != null) + { + if (fn.ContentElement.Name == W.footnoteReference) + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "FootnoteReference"))), + new XElement(W.footnoteRef))); + else + firstRun.AddBeforeSelf( + new XElement(W.r, + new XElement(W.rPr, + new XElement(W.rStyle, + new XAttribute(W.val, "EndnoteReference"))), + new XElement(W.endnoteRef))); + } + } + } + XElement newTempElement = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(tempElement); + var newContentElement = newTempElement.Descendants().FirstOrDefault(d => d.Name == W.footnote || d.Name == W.endnote); + if (newContentElement == null) + throw new OpenXmlPowerToolsException("Internal error"); + footnoteEndnoteBefore.ReplaceNodes(newContentElement.Nodes()); + } + } + else + throw new OpenXmlPowerToolsException("Internal error"); + } + } + + private static void RectifyFootnoteEndnoteIds( + MainDocumentPart mainDocumentPartBefore, + MainDocumentPart mainDocumentPartAfter, + MainDocumentPart mainDocumentPartWithRevisions, + XDocument mainDocumentXDoc, + WmlComparerSettings settings) + { + var footnotesPartBefore = mainDocumentPartBefore.FootnotesPart; + var endnotesPartBefore = mainDocumentPartBefore.EndnotesPart; + var footnotesPartAfter = mainDocumentPartAfter.FootnotesPart; + var endnotesPartAfter = mainDocumentPartAfter.EndnotesPart; + var footnotesPartWithRevisions = mainDocumentPartWithRevisions.FootnotesPart; + var endnotesPartWithRevisions = mainDocumentPartWithRevisions.EndnotesPart; + + XDocument footnotesPartBeforeXDoc = null; + if (footnotesPartBefore != null) + footnotesPartBeforeXDoc = footnotesPartBefore.GetXDocument(); + + XDocument footnotesPartAfterXDoc = null; + if (footnotesPartAfter != null) + footnotesPartAfterXDoc = footnotesPartAfter.GetXDocument(); + + XDocument footnotesPartWithRevisionsXDoc = null; + if (footnotesPartWithRevisions != null) + { + footnotesPartWithRevisionsXDoc = footnotesPartWithRevisions.GetXDocument(); + footnotesPartWithRevisionsXDoc + .Root + .Elements(W.footnote) + .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0") + .Remove(); + } + + XDocument endnotesPartBeforeXDoc = null; + if (endnotesPartBefore != null) + endnotesPartBeforeXDoc = endnotesPartBefore.GetXDocument(); + + XDocument endnotesPartAfterXDoc = null; + if (endnotesPartAfter != null) + endnotesPartAfterXDoc = endnotesPartAfter.GetXDocument(); + + XDocument endnotesPartWithRevisionsXDoc = null; + if (endnotesPartWithRevisions != null) + { + endnotesPartWithRevisionsXDoc = endnotesPartWithRevisions.GetXDocument(); + endnotesPartWithRevisionsXDoc + .Root + .Elements(W.endnote) + .Where(e => (string)e.Attribute(W.id) != "-1" && (string)e.Attribute(W.id) != "0") + .Remove(); + } + + var footnotesRefs = mainDocumentXDoc + .Descendants(W.footnoteReference) + .Select((fn, idx) => + { + return new + { + FootNote = fn, + Idx = idx, + }; + }); + + foreach (var fn in footnotesRefs) + { + var oldId = (string)fn.FootNote.Attribute(W.id); + var newId = (fn.Idx + 1).ToString(); + fn.FootNote.Attribute(W.id).Value = newId; + var footnote = footnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + if (footnote == null) + { + footnote = footnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + } + if (footnote == null) + throw new OpenXmlPowerToolsException("Internal error"); + var cloned = new XElement(footnote); + cloned.Attribute(W.id).Value = newId; + footnotesPartWithRevisionsXDoc + .Root + .Add(cloned); + } + + var endnotesRefs = mainDocumentXDoc + .Descendants(W.endnoteReference) + .Select((fn, idx) => + { + return new + { + Endnote = fn, + Idx = idx, + }; + }); + + foreach (var fn in endnotesRefs) + { + var oldId = (string)fn.Endnote.Attribute(W.id); + var newId = (fn.Idx + 1).ToString(); + fn.Endnote.Attribute(W.id).Value = newId; + var endnote = endnotesPartAfterXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + if (endnote == null) + { + endnote = endnotesPartBeforeXDoc + .Root + .Elements() + .FirstOrDefault(e => (string)e.Attribute(W.id) == oldId); + } + if (endnote == null) + throw new OpenXmlPowerToolsException("Internal error"); + var cloned = new XElement(endnote); + cloned.Attribute(W.id).Value = newId; + endnotesPartWithRevisionsXDoc + .Root + .Add(cloned); + } + + if (footnotesPartWithRevisionsXDoc != null) + { + MarkContentAsDeletedOrInserted(footnotesPartWithRevisionsXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(footnotesPartWithRevisionsXDoc); + XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(footnotesPartWithRevisionsXDoc.Root); + footnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot); + IgnorePt14Namespace(footnotesPartWithRevisionsXDoc.Root); + footnotesPartWithRevisions.PutXDocument(); + } + if (endnotesPartWithRevisionsXDoc != null) + { + MarkContentAsDeletedOrInserted(endnotesPartWithRevisionsXDoc, settings); + CoalesceAdjacentRunsWithIdenticalFormatting(endnotesPartWithRevisionsXDoc); + XElement newXDocRoot = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(endnotesPartWithRevisionsXDoc.Root); + endnotesPartWithRevisionsXDoc.Root.ReplaceWith(newXDocRoot); + IgnorePt14Namespace(endnotesPartWithRevisionsXDoc.Root); + endnotesPartWithRevisions.PutXDocument(); + } + } + + /// Here is the crux of the fix to the algorithm. After assembling the entire list of ComparisonUnitAtoms, we do the following: + /// - First, figure out the maximum hierarchy depth, considering only paragraphs, txbx, txbxContent, tables, rows, cells, and content controls. + /// - For documents that do not contain tables, nor text boxes, this maximum hierarchy depth will always be 1. + /// - For atoms within a table, the depth will be 4. The first level is the table, the second level is row, third is cell, fourth is paragraph. + /// - For atoms within a nested table, the depth will be 7: Table / Row / Cell / Table / Row / Cell / Paragraph + /// - For atoms within a text box, the depth will be 3: Paragraph / txbxContent / Paragraph + /// - For atoms within a table in a text box, the depth will be 5: Paragraph / txbxContent / Table / Row / Cell / Paragraph + /// In any case, we figure out the maximum depth. + /// + /// Then we iterate through the list of content atoms backwards. We do this n times, where n is the maximum depth. + /// + /// At each level, we find a paragraph mark, and working backwards, we set the guids in the hierarchy so that the content will be assembled together correctly. + /// + /// For each iteration, we only set unids at the level that we are working at. + /// + /// So first we will set all unids at level 1. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another + /// paragraph mark, we set all unids at level 1 to the same unid as level 1 of the paragraph mark. + /// + /// Then we set all unids at level 2. When we find a paragraph mark, we get the unid for that level, and then working backwards, until we find another paragraph + /// mark, we set all unids at level 2 to the same unid as level 2 of the paragraph mark. At some point, we will find a paragraph mark with no level 2. This is + /// not a problem. We stop setting anything until we find another paragraph mark that has a level 2, at which point we resume setting values at level 2. + /// + /// Same process for level 3, and so on, until we have processed to the maximum depth of the hierarchy. + /// + /// At the end of this process, we will be able to do the coalsce recurse algorithm, and the content atom list will be put back together into a beautiful tree, + /// where every element is correctly positioned in the hierarchy. + /// + /// This should also properly assemble the test where just the paragraph marks have been deleted for a range of paragraphs. + /// + /// There is an interesting thought - it is possible that I have set two runs of text that were initially in the same paragraph, but then after + /// processing, they match up to text in different paragraphs. Therefore this will not work. We need to actually keep a list of reconstructed ancestor + /// Unids, because the same paragraph would get set to two different IDs - two ComparisonUnitAtoms need to be in separate paragraphs in the reconstructed + /// document, but their ancestors actually point to the same paragraph. + /// + /// Fix this in the algorithm, and also keep the appropriate list in ComparisonUnitAtom class. + + private static void AssembleAncestorUnidsInOrderToRebuildXmlTreeProperly(List comparisonUnitAtomList) + { + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in comparisonUnitAtomList) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // the following loop sets all ancestor unids in the after document to the unids in the before document for all pPr where the status is equal. + // this should always be true. + + // one additional modification to make to this loop - where we find a pPr in a text box, we want to do this as well, regardless of whether the status is equal, inserted, or deleted. + // reason being that this module does not support insertion / deletion of text boxes themselves. If a text box is in the before or after document, it will be in the document that + // contains deltas. It may have inserted or deleted text, but regardless, it will be in the result document. + foreach (var cua in comparisonUnitAtomList) + { + var doSet = false; + if (cua.ContentElement.Name == W.pPr) + { + if (cua.AncestorElements.Any(ae => ae.Name == W.txbxContent)) + doSet = true; + if (cua.CorrelationStatus == CorrelationStatus.Equal) + doSet = true; + } + if (doSet) + { + var cuaBefore = cua.ComparisonUnitAtomBefore; + var ancestorsAfter = cua.AncestorElements; + if (cuaBefore != null) + { + var ancestorsBefore = cuaBefore.AncestorElements; + if (ancestorsAfter.Length == ancestorsBefore.Length) + { + var zipped = ancestorsBefore.Zip(ancestorsAfter, (b, a) => + new + { + After = a, + Before = b, + }); + + foreach (var z in zipped) + { + var afterUnidAtt = z.After.Attribute(PtOpenXml.Unid); + var beforeUnidAtt = z.Before.Attribute(PtOpenXml.Unid); + if (afterUnidAtt != null && beforeUnidAtt != null) + afterUnidAtt.Value = beforeUnidAtt.Value; + } + } + } + } + } + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in comparisonUnitAtomList) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var rComparisonUnitAtomList = ((IEnumerable)comparisonUnitAtomList).Reverse().ToList(); + + // the following should always succeed, because there will always be at least one element in rComparisonUnitAtomList, and there will always be at least one + // ancestor in AncestorElements + var deepestAncestor = rComparisonUnitAtomList.First().AncestorElements.First(); + var deepestAncestorName = deepestAncestor.Name; + string deepestAncestorUnid = null; + if (deepestAncestorName == W.footnote || deepestAncestorName == W.endnote) + { + deepestAncestorUnid = (string)deepestAncestor.Attribute(PtOpenXml.Unid); + } + + /// If the following loop finds a pPr that is in a text box, then continue on, processing the pPr and all of its contents as though it were + /// content in the containing text box. This is going to leave it after this loop where the AncestorUnids for the content in the text box will be + /// incomplete. We then will need to go through the rComparisonUnitAtomList a second time, processing all of the text boxes. + + /// Note that this makes the basic assumption that a text box can't be nested inside of a text box, which, as far as I know, is a good assumption. + + /// This also makes the basic assumption that an endnote / footnote can't contain a text box, which I believe is a good assumption. + + + string[] currentAncestorUnids = null; + foreach (var cua in rComparisonUnitAtomList) + { + if (cua.ContentElement.Name == W.pPr) + { + var pPr_inTextBox = cua + .AncestorElements + .Any(ae => ae.Name == W.txbxContent); + + if (!pPr_inTextBox) + { + // this will collect the ancestor unids for the paragraph. + // my hypothesis is that these ancestor unids should be the same for all content unit atoms within that paragraph. + currentAncestorUnids = cua + .AncestorElements + .Select(ae => + { + var thisUnid = (string)ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + throw new OpenXmlPowerToolsException("Internal error"); + return thisUnid; + }) + .ToArray(); + cua.AncestorUnids = currentAncestorUnids; + if (deepestAncestorUnid != null) + cua.AncestorUnids[0] = deepestAncestorUnid; + continue; + } + } + + var thisDepth = cua.AncestorElements.Length; + var additionalAncestorUnids = cua + .AncestorElements + .Skip(currentAncestorUnids.Length) + .Select(ae => + { + var thisUnid = (string)ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + Guid.NewGuid().ToString().Replace("-", ""); + return thisUnid; + }); + var thisAncestorUnids = currentAncestorUnids + .Concat(additionalAncestorUnids) + .ToArray(); + cua.AncestorUnids = thisAncestorUnids; + if (deepestAncestorUnid != null) + cua.AncestorUnids[0] = deepestAncestorUnid; + } + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in comparisonUnitAtomList) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + // this is the second loop that processes all text boxes. + currentAncestorUnids = null; + bool skipUntilNextPpr = false; + foreach (var cua in rComparisonUnitAtomList) + { + if (currentAncestorUnids != null && cua.AncestorElements.Length < currentAncestorUnids.Length) + { + skipUntilNextPpr = true; + currentAncestorUnids = null; + continue; + } + if (cua.ContentElement.Name == W.pPr) + { + //if (s_True) + //{ + // var sb = new StringBuilder(); + // foreach (var item in comparisonUnitAtomList) + // sb.Append(item.ToString()).Append(Environment.NewLine); + // var sbs = sb.ToString(); + // TestUtil.NotePad(sbs); + //} + + var pPr_inTextBox = cua + .AncestorElements + .Any(ae => ae.Name == W.txbxContent); + + if (!pPr_inTextBox) + { + skipUntilNextPpr = true; + currentAncestorUnids = null; + continue; + } + else + { + skipUntilNextPpr = false; + + currentAncestorUnids = cua + .AncestorElements + .Select(ae => + { + var thisUnid = (string)ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + throw new OpenXmlPowerToolsException("Internal error"); + return thisUnid; + }) + .ToArray(); + cua.AncestorUnids = currentAncestorUnids; + continue; + } + } + + if (skipUntilNextPpr) + continue; + + var thisDepth = cua.AncestorElements.Length; + var additionalAncestorUnids = cua + .AncestorElements + .Skip(currentAncestorUnids.Length) + .Select(ae => + { + var thisUnid = (string)ae.Attribute(PtOpenXml.Unid); + if (thisUnid == null) + Guid.NewGuid().ToString().Replace("-", ""); + return thisUnid; + }); + var thisAncestorUnids = currentAncestorUnids + .Concat(additionalAncestorUnids) + .ToArray(); + cua.AncestorUnids = thisAncestorUnids; + } + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in comparisonUnitAtomList) + sb.Append(item.ToStringAncestorUnids()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + } + + // the following gets a flattened list of ComparisonUnitAtoms, with status indicated in each ComparisonUnitAtom: Deleted, Inserted, or Equal + private static List FlattenToComparisonUnitAtomList(List correlatedSequence, WmlComparerSettings settings) + { + var listOfComparisonUnitAtoms = correlatedSequence + .Select(cs => + { + + // need to write some code here to find out if we are assembling a paragraph (or anything) that contains the following unid. + // why do are we dropping content??????? + //string searchFor = "0ecb9184"; + + + + + + + + + + + + if (cs.CorrelationStatus == CorrelationStatus.Equal) + { + var contentAtomsBefore = cs + .ComparisonUnitArray1 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m); + + var contentAtomsAfter = cs + .ComparisonUnitArray2 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m); + + var comparisonUnitAtomList = contentAtomsBefore + .Zip(contentAtomsAfter, + (before, after) => + { + return new ComparisonUnitAtom(after.ContentElement, after.AncestorElements, after.Part, settings) + { + CorrelationStatus = CorrelationStatus.Equal, + ContentElementBefore = before.ContentElement, + ComparisonUnitAtomBefore = before, + }; + }) + .ToList(); + return comparisonUnitAtomList; + } + else if (cs.CorrelationStatus == CorrelationStatus.Deleted) + { + var comparisonUnitAtomList = cs + .ComparisonUnitArray1 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m) + .Select(ca => + new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings) + { + CorrelationStatus = CorrelationStatus.Deleted, + }); + + return comparisonUnitAtomList; + } + else if (cs.CorrelationStatus == CorrelationStatus.Inserted) + { + var comparisonUnitAtomList = cs + .ComparisonUnitArray2 + .Select(ca => ca.DescendantContentAtoms()) + .SelectMany(m => m) + .Select(ca => + new ComparisonUnitAtom(ca.ContentElement, ca.AncestorElements, ca.Part, settings) + { + CorrelationStatus = CorrelationStatus.Inserted, + }); + return comparisonUnitAtomList; + } + else + throw new OpenXmlPowerToolsException("Internal error"); + }) + .SelectMany(m => m) + .ToList(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in listOfComparisonUnitAtoms) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return listOfComparisonUnitAtoms; + } + + // for any deleted or inserted rows, we go into the w:trPr properties, and add the appropriate w:ins or w:del element, and therefore + // when generating the document, the appropriate row will be marked as deleted or inserted. + private static void MarkRowsAsDeletedOrInserted(WmlComparerSettings settings, List correlatedSequence) + { + foreach (var dcs in correlatedSequence.Where(cs => + cs.CorrelationStatus == CorrelationStatus.Deleted || cs.CorrelationStatus == CorrelationStatus.Inserted)) + { + // iterate through all deleted/inserted items in dcs.ComparisonUnitArray1/ComparisonUnitArray2 + var toIterateThrough = dcs.ComparisonUnitArray1; + if (dcs.CorrelationStatus == CorrelationStatus.Inserted) + toIterateThrough = dcs.ComparisonUnitArray2; + + foreach (var ca in toIterateThrough) + { + var cug = ca as ComparisonUnitGroup; + + // this works because we will never see a table in this list, only rows. If tables were in this list, would need to recursively + // go into children, but tables are always flattened in the LCS process. + + // when we have a row, it is only necessary to find the first content atom of the row, then find the row ancestor, and then tweak + // the w:trPr + + if (cug != null && cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + var firstContentAtom = cug.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom == null) + throw new OpenXmlPowerToolsException("Internal error"); + var tr = firstContentAtom + .AncestorElements + .Reverse() + .FirstOrDefault(a => a.Name == W.tr); + + if (tr == null) + throw new OpenXmlPowerToolsException("Internal error"); + var trPr = tr.Element(W.trPr); + if (trPr == null) + { + trPr = new XElement(W.trPr); + tr.AddFirst(trPr); + } + XName revTrackElementName = null; + if (dcs.CorrelationStatus == CorrelationStatus.Deleted) + revTrackElementName = W.del; + else if (dcs.CorrelationStatus == CorrelationStatus.Inserted) + revTrackElementName = W.ins; + trPr.Add(new XElement(revTrackElementName, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions))); + } + } + } + } + + public enum WmlComparerRevisionType + { + Inserted, + Deleted, + } + + public class WmlComparerRevision + { + public WmlComparerRevisionType RevisionType; + public string Text; + public string Author; + public string Date; + public XElement ContentXElement; + public XElement RevisionXElement; + public Uri PartUri; + public string PartContentType; + } + + private static XName[] RevElementsWithNoText = new XName[] { + M.oMath, + M.oMathPara, + W.drawing, + }; + + public static List GetRevisions(WmlDocument source, WmlComparerSettings settings) + { + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(source.DocumentByteArray, 0, source.DocumentByteArray.Length); + using (WordprocessingDocument wDoc = WordprocessingDocument.Open(ms, true)) + { + TestForInvalidContent(wDoc); + RemoveExistingPowerToolsMarkup(wDoc); + + var contentParent = wDoc.MainDocumentPart.GetXDocument().Root.Element(W.body); + var atomList = WmlComparer.CreateComparisonUnitAtomList(wDoc.MainDocumentPart, contentParent, settings).ToArray(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in atomList) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var grouped = atomList + .GroupAdjacent(a => + { + var key = a.CorrelationStatus.ToString(); + if (a.CorrelationStatus != CorrelationStatus.Equal) + { + var rt = new XElement(a.RevTrackElement.Name, + new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), + a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); + key += rt.ToString(SaveOptions.DisableFormatting); + } + return key; + }) + .ToList(); + + var revisions = grouped + .Where(k => k.Key != "Equal") + .ToList(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in revisions) + sb.Append(item.Key + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var mainDocPartRevisionList = revisions + .Select(rg => + { + var rev = new WmlComparerRevision(); + if (rg.Key.StartsWith("Inserted")) + rev.RevisionType = WmlComparerRevisionType.Inserted; + else if (rg.Key.StartsWith("Deleted")) + rev.RevisionType = WmlComparerRevisionType.Deleted; + var revTrackElement = rg.First().RevTrackElement; + rev.RevisionXElement = revTrackElement; + rev.Author = (string)revTrackElement.Attribute(W.author); + rev.ContentXElement = rg.First().ContentElement; + rev.Date = (string)revTrackElement.Attribute(W.date); + rev.PartUri = wDoc.MainDocumentPart.Uri; + rev.PartContentType = wDoc.MainDocumentPart.ContentType; + if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) + { + rev.Text = rg + .Select(rgc => + { + if (rgc.ContentElement.Name == W.pPr) + return Environment.NewLine; + return rgc.ContentElement.Value; + }) + .StringConcatenate(); + } + return rev; + }) + .ToList(); + + var footnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.FootnotesPart, W.footnote, settings); + var endnotesRevisionList = GetFootnoteEndnoteRevisionList(wDoc.MainDocumentPart.EndnotesPart, W.endnote, settings); + var finalRevisionList = mainDocPartRevisionList.Concat(footnotesRevisionList).Concat(endnotesRevisionList).ToList(); + return finalRevisionList; + } + } + } + + private static IEnumerable GetFootnoteEndnoteRevisionList(OpenXmlPart footnotesEndnotesPart, + XName footnoteEndnoteElementName, + WmlComparerSettings settings) + { + if (footnotesEndnotesPart == null) + return Enumerable.Empty(); + + var xDoc = footnotesEndnotesPart.GetXDocument(); + var footnotesEndnotes = xDoc.Root.Elements(footnoteEndnoteElementName); + List revisionsForPart = new List(); + foreach (var fn in footnotesEndnotes) + { + var atomList = WmlComparer.CreateComparisonUnitAtomList(footnotesEndnotesPart, fn, settings).ToArray(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in atomList) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var grouped = atomList + .GroupAdjacent(a => + { + var key = a.CorrelationStatus.ToString(); + if (a.CorrelationStatus != CorrelationStatus.Equal) + { + var rt = new XElement(a.RevTrackElement.Name, + new XAttribute(XNamespace.Xmlns + "w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main"), + a.RevTrackElement.Attributes().Where(a2 => a2.Name != W.id && a2.Name != PtOpenXml.Unid)); + key += rt.ToString(SaveOptions.DisableFormatting); + } + return key; + }) + .ToList(); + + var revisions = grouped + .Where(k => k.Key != "Equal") + .ToList(); + + var thisNoteRevisionList = revisions + .Select(rg => + { + var rev = new WmlComparerRevision(); + if (rg.Key.StartsWith("Inserted")) + rev.RevisionType = WmlComparerRevisionType.Inserted; + else if (rg.Key.StartsWith("Deleted")) + rev.RevisionType = WmlComparerRevisionType.Deleted; + var revTrackElement = rg.First().RevTrackElement; + rev.RevisionXElement = revTrackElement; + rev.Author = (string)revTrackElement.Attribute(W.author); + rev.ContentXElement = rg.First().ContentElement; + rev.Date = (string)revTrackElement.Attribute(W.date); + rev.PartUri = footnotesEndnotesPart.Uri; + rev.PartContentType = footnotesEndnotesPart.ContentType; + if (!RevElementsWithNoText.Contains(rev.ContentXElement.Name)) + { + rev.Text = rg + .Select(rgc => + { + if (rgc.ContentElement.Name == W.pPr) + return Environment.NewLine; + return rgc.ContentElement.Value; + }) + .StringConcatenate(); + } + return rev; + }); + + foreach (var item in thisNoteRevisionList) + revisionsForPart.Add(item); + } + return revisionsForPart; + } + + // prohibit + // - altChunk + // - subDoc + // - contentPart + private static void TestForInvalidContent(WordprocessingDocument wDoc) + { + foreach (var part in wDoc.ContentParts()) + { + var xDoc = part.GetXDocument(); + if (xDoc.Descendants(W.altChunk).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:altChunk"); + if (xDoc.Descendants(W.subDoc).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:subDoc"); + if (xDoc.Descendants(W.contentPart).Any()) + throw new OpenXmlPowerToolsException("Unsupported document, contains w:contentPart"); + } + } + + private static void RemoveExistingPowerToolsMarkup(WordprocessingDocument wDoc) + { + wDoc.MainDocumentPart + .GetXDocument() + .Root + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + wDoc.MainDocumentPart.PutXDocument(); + + var fnPart = wDoc.MainDocumentPart.FootnotesPart; + if (fnPart != null) + { + var fnXDoc = fnPart.GetXDocument(); + fnXDoc + .Root + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + fnPart.PutXDocument(); + } + + var enPart = wDoc.MainDocumentPart.EndnotesPart; + if (enPart != null) + { + var enXDoc = enPart.GetXDocument(); + enXDoc + .Root + .Descendants() + .Attributes() + .Where(a => a.Name.Namespace == PtOpenXml.pt) + .Where(a => a.Name != PtOpenXml.Unid) + .Remove(); + enPart.PutXDocument(); + } + } + + private static void AddSha1HashToBlockLevelContent(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) + { + var blockLevelContentToAnnotate = contentParent + .Descendants() + .Where(d => ElementsToHaveSha1Hash.Contains(d.Name)); + + foreach (var blockLevelContent in blockLevelContentToAnnotate) + { + var cloneBlockLevelContentForHashing = (XElement)CloneBlockLevelContentForHashing(part, blockLevelContent, true, settings); + var shaString = cloneBlockLevelContentForHashing.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + var sha1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaString); + blockLevelContent.Add(new XAttribute(PtOpenXml.SHA1Hash, sha1Hash)); + + if (blockLevelContent.Name == W.tbl || + blockLevelContent.Name == W.tr) + { + var clonedForStructureHash = (XElement)CloneForStructureHash(cloneBlockLevelContentForHashing); + + // this is a convenient place to look at why tables are being compared as different. + + //if (blockLevelContent.Name == W.tbl) + // Console.WriteLine(); + + var shaString2 = clonedForStructureHash.ToString(SaveOptions.DisableFormatting) + .Replace(" xmlns=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\"", ""); + var sha1Hash2 = WmlComparerUtil.SHA1HashStringForUTF8String(shaString2); + blockLevelContent.Add(new XAttribute(PtOpenXml.StructureSHA1Hash, sha1Hash2)); + } + } + } + + // This strips all text nodes from the XML tree, thereby leaving only the structure. + private static object CloneForStructureHash(XNode node) + { + XElement element = node as XElement; + if (element != null) + { + return new XElement(element.Name, + element.Attributes(), + element.Elements().Select(e => CloneForStructureHash(e))); + } + return null; + } + + static XName[] AttributesToTrimWhenCloning = new XName[] { + WP14.anchorId, + WP14.editId, + "ObjectID", + "ShapeID", + "id", + "type", + }; + + private static object CloneBlockLevelContentForHashing(OpenXmlPart mainDocumentPart, XNode node, bool includeRelatedParts, WmlComparerSettings settings) + { + var element = node as XElement; + if (element != null) + { + if (element.Name == W.bookmarkStart || + element.Name == W.bookmarkEnd || + element.Name == W.pPr || + element.Name == W.rPr) + return null; + + if (element.Name == W.p) + { + var clonedPara = new XElement(element.Name, + element.Attributes().Where(a => a.Name != W.rsid && + a.Name != W.rsidDel && + a.Name != W.rsidP && + a.Name != W.rsidR && + a.Name != W.rsidRDefault && + a.Name != W.rsidRPr && + a.Name != W.rsidSect && + a.Name != W.rsidTr && + a.Name.Namespace != PtOpenXml.pt), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + + var groupedRuns = clonedPara + .Elements() + .GroupAdjacent(e => e.Name == W.r && + e.Elements().Count() == 1 && + e.Element(W.t) != null); + + var clonedParaWithGroupedRuns = new XElement(element.Name, + groupedRuns.Select(g => + { + if (g.Key) + { + var text = g.Select(t => t.Value).StringConcatenate(); + if (settings.CaseInsensitive) + text = text.ToUpper(settings.CultureInfo); + var newRun = (object)new XElement(W.r, + new XElement(W.t, + text)); + return newRun; + } + return g; + })); + + return clonedParaWithGroupedRuns; + } + + if (element.Name == W.r) + { + var clonedRuns = element + .Elements() + .Where(e => e.Name != W.rPr) + .Select(rc => new XElement(W.r, CloneBlockLevelContentForHashing(mainDocumentPart, rc, includeRelatedParts, settings))); + return clonedRuns; + } + + if (element.Name == W.tbl) + { + var clonedTable = new XElement(W.tbl, + element.Elements(W.tr).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedTable; + } + + if (element.Name == W.tr) + { + var clonedRow = new XElement(W.tr, + element.Elements(W.tc).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedRow; + } + + if (element.Name == W.tc) + { + var clonedCell = new XElement(W.tc, + element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedCell; + } + + if (element.Name == W.tcPr) + { + var clonedCellProps = new XElement(W.tcPr, + element.Elements(W.gridSpan).Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedCellProps; + } + + if (element.Name == W.gridSpan) + { + var clonedGridSpan = new XElement(W.gridSpan, + new XAttribute("val", (string)element.Attribute(W.val))); + return clonedGridSpan; + } + + if (element.Name == W.txbxContent) + { + var clonedTextbox = new XElement(W.txbxContent, + element.Elements().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return clonedTextbox; + } + + if (includeRelatedParts) + { + if (ComparisonUnitWord.s_ElementsWithRelationshipIds.Contains(element.Name)) + { + var newElement = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)) + .Select(a => + { + if (!ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name)) + return a; + var rId = (string)a; + + // could be an hyperlink relationship + try + { + OpenXmlPart oxp = mainDocumentPart.GetPartById(rId); + if (oxp == null) + throw new FileFormatException("Invalid WordprocessingML Document"); + var anno = oxp.Annotation(); + if (anno != null) + return new XAttribute(a.Name, anno.Hash); + + if (!oxp.ContentType.EndsWith("xml")) + { + using (var str = oxp.GetStream()) + { + byte[] ba; + using (BinaryReader br = new BinaryReader(str)) + { + ba = br.ReadBytes((int)str.Length); + } + var sha1 = WmlComparerUtil.SHA1HashStringForByteArray(ba); + oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1)); + return new XAttribute(a.Name, sha1); + } + } + } + catch (ArgumentOutOfRangeException) + { + HyperlinkRelationship hr = mainDocumentPart.HyperlinkRelationships.FirstOrDefault(z => z.Id == rId); + if (hr != null) + { + var str = hr.Uri.ToString(); + return new XAttribute(a.Name, str); + } + // could be an external relationship + ExternalRelationship er = mainDocumentPart.ExternalRelationships.FirstOrDefault(z => z.Id == rId); + if (er != null) + { + var str = er.Uri.ToString(); + return new XAttribute(a.Name, str); + } + return new XAttribute(a.Name, "NULL Relationship"); + } + + return null; + }), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return newElement; + } + } + + if (element.Name == VML.shape) + { + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => a.Name != "style" && a.Name != "id" && a.Name != "type"), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + + if (element.Name == O.OLEObject) + { + var o = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => a.Name != "ObjectID" && a.Name != R.id), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return o; + } + + if (element.Name == W._object) + { + var o = new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + return o; + } + + if (element.Name == WP.docPr) + { + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt && a.Name != "id"), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + + return new XElement(element.Name, + element.Attributes() + .Where(a => a.Name.Namespace != PtOpenXml.pt) + .Where(a => !AttributesToTrimWhenCloning.Contains(a.Name)), + element.Nodes().Select(n => CloneBlockLevelContentForHashing(mainDocumentPart, n, includeRelatedParts, settings))); + } + if (settings.CaseInsensitive) + { + var xt = node as XText; + if (xt != null) + { + var newText = xt.Value.ToUpper(settings.CultureInfo); + return new XText(newText); + } + } + return node; + } + + private static List FindCommonAtBeginningAndEnd(CorrelatedSequence unknown, WmlComparerSettings settings) + { + int lengthToCompare = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length); + + var countCommonAtBeginning = unknown + .ComparisonUnitArray1 + .Take(lengthToCompare) + .Zip(unknown.ComparisonUnitArray2, + (pu1, pu2) => + { + return new + { + Pu1 = pu1, + Pu2 = pu2, + }; + }) + .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash) + .Count(); + + if (countCommonAtBeginning != 0 && ((double)countCommonAtBeginning / (double)lengthToCompare) < settings.DetailThreshold) + countCommonAtBeginning = 0; + + if (countCommonAtBeginning != 0) + { + var newSequence = new List(); + + CorrelatedSequence csEqual = new CorrelatedSequence(); + csEqual.CorrelationStatus = CorrelationStatus.Equal; + csEqual.ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Take(countCommonAtBeginning) + .ToArray(); + csEqual.ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Take(countCommonAtBeginning) + .ToArray(); + newSequence.Add(csEqual); + + var remainingLeft = unknown.ComparisonUnitArray1.Length - countCommonAtBeginning; + var remainingRight = unknown.ComparisonUnitArray2.Length - countCommonAtBeginning; + + if (remainingLeft != 0 && remainingRight == 0) + { + CorrelatedSequence csDeleted = new CorrelatedSequence(); + csDeleted.CorrelationStatus = CorrelationStatus.Deleted; + csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(); + csDeleted.ComparisonUnitArray2 = null; + newSequence.Add(csDeleted); + } + else if (remainingLeft == 0 && remainingRight != 0) + { + CorrelatedSequence csInserted = new CorrelatedSequence(); + csInserted.CorrelationStatus = CorrelationStatus.Inserted; + csInserted.ComparisonUnitArray1 = null; + csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray(); + newSequence.Add(csInserted); + } + else if (remainingLeft != 0 && remainingRight != 0) + { + var first1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitWord; + var first2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitWord; + + if (first1 != null && first2 != null) + { + // if operating at the word level and + // if the last word on the left != pPr && last word on right != pPr + // then create an unknown for the rest of the paragraph, and create an unknown for the rest of the unknown + // if the last word on the left != pPr and last word on right == pPr + // then create deleted for the left, and create an unknown for the rest of the unknown + // if the last word on the left == pPr and last word on right != pPr + // then create inserted for the right, and create an unknown for the rest of the unknown + // if the last word on the left == pPr and last word on right == pPr + // then create an unknown for the rest of the unknown + + var remainingInLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .ToArray(); + + var remainingInRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .ToArray(); + + var lastContentAtomLeft = unknown.ComparisonUnitArray1[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault(); + var lastContentAtomRight = unknown.ComparisonUnitArray2[countCommonAtBeginning - 1].DescendantContentAtoms().FirstOrDefault(); + + if (lastContentAtomLeft.ContentElement.Name != W.pPr && lastContentAtomRight.ContentElement.Name != W.pPr) + { + var split1 = SplitAtParagraphMark(remainingInLeft); + var split2 = SplitAtParagraphMark(remainingInRight); + if (split1.Count() == 1 && split2.Count() == 1) + { + CorrelatedSequence csUnknown2 = new CorrelatedSequence(); + csUnknown2.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown2.ComparisonUnitArray1 = split1.First(); + csUnknown2.ComparisonUnitArray2 = split2.First(); + newSequence.Add(csUnknown2); + return newSequence; + } + else if (split1.Count == 2 && split2.Count == 2) + { + CorrelatedSequence csUnknown2 = new CorrelatedSequence(); + csUnknown2.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown2.ComparisonUnitArray1 = split1.First(); + csUnknown2.ComparisonUnitArray2 = split2.First(); + newSequence.Add(csUnknown2); + + CorrelatedSequence csUnknown3 = new CorrelatedSequence(); + csUnknown3.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown3.ComparisonUnitArray1 = split1.Skip(1).First(); + csUnknown3.ComparisonUnitArray2 = split2.Skip(1).First(); + newSequence.Add(csUnknown3); + + return newSequence; + } + } + } + + CorrelatedSequence csUnknown = new CorrelatedSequence(); + csUnknown.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(countCommonAtBeginning).ToArray(); + csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(countCommonAtBeginning).ToArray(); + newSequence.Add(csUnknown); + } + else if (remainingLeft == 0 && remainingRight == 0) + { + // nothing to do + } + return newSequence; + } + + // if we get to here, then countCommonAtBeginning == 0 + + var countCommonAtEnd = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(lengthToCompare) + .Zip(unknown + .ComparisonUnitArray2 + .Reverse() + .Take(lengthToCompare), + (pu1, pu2) => + { + return new + { + Pu1 = pu1, + Pu2 = pu2, + }; + }) + .TakeWhile(pair => pair.Pu1.SHA1Hash == pair.Pu2.SHA1Hash) + .Count(); + + // never start a common section with a paragraph mark. However, it is OK to set two paragraph marks as equal. + while (true) + { + if (countCommonAtEnd <= 1) + break; + + var firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + var firstCommonWord = firstCommon as ComparisonUnitWord; + if (firstCommonWord == null) + break; + + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() != 1) + break; + + var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom; + if (firstCommonAtom == null) + break; + + if (firstCommonAtom.ContentElement.Name != W.pPr) + break; + + countCommonAtEnd--; + } + + bool isOnlyParagraphMark = false; + if (countCommonAtEnd == 1) + { + var firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + var firstCommonWord = firstCommon as ComparisonUnitWord; + if (firstCommonWord != null) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() == 1) + { + var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom; + if (firstCommonAtom != null) + { + if (firstCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + if (countCommonAtEnd == 2) + { + var firstCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .LastOrDefault(); + + var secondCommon = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .FirstOrDefault(); + + var firstCommonWord = firstCommon as ComparisonUnitWord; + var secondCommonWord = secondCommon as ComparisonUnitWord; + if (firstCommonWord != null && secondCommonWord != null) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() == 1 && secondCommonWord.Contents.Count() == 1) + { + var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom; + var secondCommonAtom = secondCommonWord.Contents.First() as ComparisonUnitAtom; + if (firstCommonAtom != null && secondCommonAtom != null) + { + if (secondCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + + if (!isOnlyParagraphMark && countCommonAtEnd != 0 && ((double)countCommonAtEnd / (double)lengthToCompare) < settings.DetailThreshold) + countCommonAtEnd = 0; + + // If the following test is not there, the test below sets the end paragraph mark of the entire document equal to the end paragraph + // mark of the first paragraph in the other document, causing lines to be out of order. + // [InlineData("WC010-Para-Before-Table-Unmodified.docx", "WC010-Para-Before-Table-Mod.docx", 3)] + if (isOnlyParagraphMark) + countCommonAtEnd = 0; + + if (countCommonAtEnd == 0) + return null; + + // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version) + // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those comparison units to go into the middle unknown comparison unit. + + if (countCommonAtEnd != 0) + { + int remainingInLeftParagraph = 0; + int remainingInRightParagraph = 0; + + var commonEndSeq = unknown + .ComparisonUnitArray1 + .Reverse() + .Take(countCommonAtEnd) + .Reverse() + .ToList(); + var firstOfCommonEndSeq = commonEndSeq.First(); + if (firstOfCommonEndSeq is ComparisonUnitWord) + { + // are there any paragraph marks in the common seq at end? + //if (commonEndSeq.Any(cu => cu.Contents.OfType().First().ContentElement.Name == W.pPr)) + if (commonEndSeq.Any(cu => + { + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return false; + return firstComparisonUnitAtom.ContentElement.Name == W.pPr; + })) + { + remainingInLeftParagraph = unknown + .ComparisonUnitArray1 + .Reverse() + .Skip(countCommonAtEnd) + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + remainingInRightParagraph = unknown + .ComparisonUnitArray2 + .Reverse() + .Skip(countCommonAtEnd) + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + } + } + + var newSequence = new List(); + + int beforeCommonParagraphLeft = unknown.ComparisonUnitArray1.Length - remainingInLeftParagraph - countCommonAtEnd; + int beforeCommonParagraphRight = unknown.ComparisonUnitArray2.Length - remainingInRightParagraph - countCommonAtEnd; + + if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight == 0) + { + CorrelatedSequence csDeleted = new CorrelatedSequence(); + csDeleted.CorrelationStatus = CorrelationStatus.Deleted; + csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray(); + csDeleted.ComparisonUnitArray2 = null; + newSequence.Add(csDeleted); + } + else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight != 0) + { + CorrelatedSequence csInserted = new CorrelatedSequence(); + csInserted.CorrelationStatus = CorrelationStatus.Inserted; + csInserted.ComparisonUnitArray1 = null; + csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray(); + newSequence.Add(csInserted); + } + else if (beforeCommonParagraphLeft != 0 && beforeCommonParagraphRight != 0) + { + CorrelatedSequence csUnknown = new CorrelatedSequence(); + csUnknown.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Take(beforeCommonParagraphLeft).ToArray(); + csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Take(beforeCommonParagraphRight).ToArray(); + newSequence.Add(csUnknown); + } + else if (beforeCommonParagraphLeft == 0 && beforeCommonParagraphRight == 0) + { + // nothing to do + } + + if (remainingInLeftParagraph != 0 && remainingInRightParagraph == 0) + { + CorrelatedSequence csDeleted = new CorrelatedSequence(); + csDeleted.CorrelationStatus = CorrelationStatus.Deleted; + csDeleted.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray(); + csDeleted.ComparisonUnitArray2 = null; + newSequence.Add(csDeleted); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph != 0) + { + CorrelatedSequence csInserted = new CorrelatedSequence(); + csInserted.CorrelationStatus = CorrelationStatus.Inserted; + csInserted.ComparisonUnitArray1 = null; + csInserted.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray(); + newSequence.Add(csInserted); + } + else if (remainingInLeftParagraph != 0 && remainingInRightParagraph != 0) + { + CorrelatedSequence csUnknown = new CorrelatedSequence(); + csUnknown.CorrelationStatus = CorrelationStatus.Unknown; + csUnknown.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(beforeCommonParagraphLeft).Take(remainingInLeftParagraph).ToArray(); + csUnknown.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(beforeCommonParagraphRight).Take(remainingInRightParagraph).ToArray(); + newSequence.Add(csUnknown); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0) + { + // nothing to do + } + + CorrelatedSequence csEqual = new CorrelatedSequence(); + csEqual.CorrelationStatus = CorrelationStatus.Equal; + csEqual.ComparisonUnitArray1 = unknown.ComparisonUnitArray1.Skip(unknown.ComparisonUnitArray1.Length - countCommonAtEnd).ToArray(); + csEqual.ComparisonUnitArray2 = unknown.ComparisonUnitArray2.Skip(unknown.ComparisonUnitArray2.Length - countCommonAtEnd).ToArray(); + newSequence.Add(csEqual); + + return newSequence; + } + + return null; +#if false + var middleLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .SkipLast(remainingInLeftParagraph) + .SkipLast(countCommonAtEnd) + .ToArray(); + + var middleRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .SkipLast(remainingInRightParagraph) + .SkipLast(countCommonAtEnd) + .ToArray(); + + if (middleLeft.Length > 0 && middleRight.Length == 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Deleted; + cs.ComparisonUnitArray1 = middleLeft; + cs.ComparisonUnitArray2 = null; + newSequence.Add(cs); + } + else if (middleLeft.Length == 0 && middleRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Inserted; + cs.ComparisonUnitArray1 = null; + cs.ComparisonUnitArray2 = middleRight; + newSequence.Add(cs); + } + else if (middleLeft.Length > 0 && middleRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Unknown; + cs.ComparisonUnitArray1 = middleLeft; + cs.ComparisonUnitArray2 = middleRight; + newSequence.Add(cs); + } + + var remainingInParaLeft = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning) + .Skip(middleLeft.Length) + .Take(remainingInLeftParagraph) + .ToArray(); + + var remainingInParaRight = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning) + .Skip(middleRight.Length) + .Take(remainingInRightParagraph) + .ToArray(); + + if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length == 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Deleted; + cs.ComparisonUnitArray1 = remainingInParaLeft; + cs.ComparisonUnitArray2 = null; + newSequence.Add(cs); + } + else if (remainingInParaLeft.Length == 0 && remainingInParaRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Inserted; + cs.ComparisonUnitArray1 = null; + cs.ComparisonUnitArray2 = remainingInParaRight; + newSequence.Add(cs); + } + else if (remainingInParaLeft.Length > 0 && remainingInParaRight.Length > 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Unknown; + cs.ComparisonUnitArray1 = remainingInParaLeft; + cs.ComparisonUnitArray2 = remainingInParaRight; + newSequence.Add(cs); + } + + if (countCommonAtEnd != 0) + { + CorrelatedSequence cs = new CorrelatedSequence(); + cs.CorrelationStatus = CorrelationStatus.Equal; + + cs.ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Skip(countCommonAtBeginning + middleLeft.Length + remainingInParaLeft.Length) + .ToArray(); + + cs.ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Skip(countCommonAtBeginning + middleRight.Length + remainingInParaRight.Length) + .ToArray(); + + if (cs.ComparisonUnitArray1.Length != cs.ComparisonUnitArray2.Length) + throw new OpenXmlPowerToolsException("Internal error"); + + newSequence.Add(cs); + } + return newSequence; +#endif + } + + private static List SplitAtParagraphMark(ComparisonUnit[] cua) + { + int i; + for (i = 0; i < cua.Length; i++) + { + var atom = cua[i].DescendantContentAtoms().FirstOrDefault(); + if (atom != null && atom.ContentElement.Name == W.pPr) + break; + } + if (i == cua.Length) + { + return new List() + { + cua + }; + } + return new List() + { + cua.Take(i).ToArray(), + cua.Skip(i).ToArray(), + }; + } + + private static void MoveLastSectPrToChildOfBody(XDocument newXDoc) + { + var lastParaWithSectPr = newXDoc + .Root + .Elements(W.body) + .Elements(W.p) + .Where(p => p.Elements(W.pPr).Elements(W.sectPr).Any()) + .LastOrDefault(); + if (lastParaWithSectPr != null) + { + newXDoc.Root.Element(W.body).Add(lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr)); + lastParaWithSectPr.Elements(W.pPr).Elements(W.sectPr).Remove(); + } + } + + private static int s_MaxId = 0; + + private static object ProduceNewWmlMarkupFromCorrelatedSequence(OpenXmlPart part, + IEnumerable comparisonUnitAtomList, + WmlComparerSettings settings) + { + // fabricate new MainDocumentPart from correlatedSequence + s_MaxId = 0; + var newBodyChildren = CoalesceRecurse(part, comparisonUnitAtomList, 0, settings); + return newBodyChildren; + } + + private static void FixUpDocPrIds(WordprocessingDocument wDoc) + { + var elementToFind = WP.docPr; + var docPrToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (var item in docPrToChange) + { + var idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = (nextId++).ToString(); + } + foreach (var cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpRevMarkIds(WordprocessingDocument wDoc) + { + var revMarksToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == W.ins || d.Name == W.del)) + .SelectMany(m => m); + var nextId = 0; + foreach (var item in revMarksToChange) + { + var idAtt = item.Attribute(W.id); + if (idAtt != null) + idAtt.Value = (nextId++).ToString(); + } + foreach (var cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpShapeIds(WordprocessingDocument wDoc) + { + var elementToFind = VML.shape; + var shapeIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (var item in shapeIdsToChange) + { + var thisId = nextId++; + + var idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + + var oleObject = item.Parent.Element(O.OLEObject); + if (oleObject != null) + { + var shapeIdAtt = oleObject.Attribute("ShapeID"); + if (shapeIdAtt != null) + shapeIdAtt.Value = thisId.ToString(); + } + } + foreach (var cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpGroupIds(WordprocessingDocument wDoc) + { + var elementToFind = VML.group; + var groupIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (var item in groupIdsToChange) + { + var thisId = nextId++; + + var idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + } + foreach (var cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static void FixUpShapeTypeIds(WordprocessingDocument wDoc) + { + var elementToFind = VML.shapetype; + var shapeTypeIdsToChange = wDoc + .ContentParts() + .Select(cp => cp.GetXDocument()) + .Select(xd => xd.Descendants().Where(d => d.Name == elementToFind)) + .SelectMany(m => m); + var nextId = 1; + foreach (var item in shapeTypeIdsToChange) + { + var thisId = nextId++; + + var idAtt = item.Attribute("id"); + if (idAtt != null) + idAtt.Value = thisId.ToString(); + + var shape = item.Parent.Element(VML.shape); + if (shape != null) + { + var typeAtt = shape.Attribute("type"); + if (typeAtt != null) + typeAtt.Value = thisId.ToString(); + } + } + foreach (var cp in wDoc.ContentParts()) + cp.PutXDocument(); + } + + private static object CoalesceRecurse(OpenXmlPart part, IEnumerable list, int level, WmlComparerSettings settings) + { + var grouped = list.GroupBy(ca => + { + if (level >= ca.AncestorElements.Length) + return ""; + return ca.AncestorUnids[level]; + }) + .Where(g => g.Key != ""); + + // if there are no deeper children, then we're done. + if (!grouped.Any()) + return null; + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var group in grouped) + { + sb.AppendFormat("Group Key: {0}", group.Key); + sb.Append(Environment.NewLine); + foreach (var groupChildItem in group) + { + sb.Append(" "); + sb.Append(groupChildItem.ToString(0)); + sb.Append(Environment.NewLine); + } + sb.Append(Environment.NewLine); + } + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var elementList = grouped + .Select(g => + { + var ancestorBeingConstructed = g.First().AncestorElements[level]; // these will all be the same, by definition + + // need to group by corr stat + var groupedChildren = g + .GroupAdjacent(gc => + { + var key = ""; + if (level < (gc.AncestorElements.Length - 1)) + { + key = gc.AncestorUnids[level + 1]; + } + if (gc.AncestorElements.Skip(level).Any(ae => ae.Name == W.txbxContent)) + key += "|" + CorrelationStatus.Equal.ToString(); + else + key += "|" + gc.CorrelationStatus.ToString(); + return key; + }) + .ToList(); + + if (ancestorBeingConstructed.Name == W.p) + { + var newChildElements = groupedChildren + .Select(gc => + { + var spl = gc.Key.Split('|'); + if (spl[0] == "") + return (object)gc.Select(gcc => + { + var dup = new XElement(gcc.ContentElement); + if (spl[1] == "Deleted") + dup.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + else if (spl[1] == "Inserted") + dup.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + else + { + return CoalesceRecurse(part, gc, level + 1, settings); + } + }) + .ToList(); + + var newPara = new XElement(W.p, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Unid, g.Key), + newChildElements); + + return newPara; + } + + if (ancestorBeingConstructed.Name == W.r) + { + var newChildElements = groupedChildren + .Select(gc => + { + var spl = gc.Key.Split('|'); + if (spl[0] == "") + return (object)gc.Select(gcc => + { + var dup = new XElement(gcc.ContentElement); + if (spl[1] == "Deleted") + dup.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + else if (spl[1] == "Inserted") + dup.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + else + { + return CoalesceRecurse(part, gc, level + 1, settings); + } + }) + .ToList(); + + XElement rPr = ancestorBeingConstructed.Element(W.rPr); + var newRun = new XElement(W.r, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + rPr, + newChildElements); + return newRun; + } + + if (ancestorBeingConstructed.Name == W.t) + { + var newChildElements = groupedChildren + .Select(gc => + { + var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate(); + var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + return (object)(new XElement(W.delText, + new XAttribute(PtOpenXml.Status, "Deleted"), + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement)); + else if (ins) + return (object)(new XElement(W.t, + new XAttribute(PtOpenXml.Status, "Inserted"), + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement)); + else + return (object)(new XElement(W.t, + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement)); + }) + .ToList(); + return newChildElements; + } + + if (ancestorBeingConstructed.Name == W.drawing) + { + var newChildElements = groupedChildren + .Select(gc => + { + var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + { + return (object)gc.Select(gcc => + { + var newDrawing = new XElement(gcc.ContentElement); + newDrawing.Add(new XAttribute(PtOpenXml.Status, "Deleted")); + + var openXmlPartOfDeletedContent = gc.First().Part; + var openXmlPartInNewDocument = part; + return gc.Select(gce => + { + Package packageOfDeletedContent = openXmlPartOfDeletedContent.OpenXmlPackage.Package; + Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfDeletedContent.GetPart(part.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri); + return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing); + }); + }); + } + else if (ins) + { + return gc.Select(gcc => + { + var newDrawing = new XElement(gcc.ContentElement); + newDrawing.Add(new XAttribute(PtOpenXml.Status, "Inserted")); + + var openXmlPartOfInsertedContent = gc.First().Part; + var openXmlPartInNewDocument = part; + return gc.Select(gce => + { + Package packageOfSourceContent = openXmlPartOfInsertedContent.OpenXmlPackage.Package; + Package packageOfNewContent = openXmlPartInNewDocument.OpenXmlPackage.Package; + PackagePart partInDeletedDocument = packageOfSourceContent.GetPart(part.Uri); + PackagePart partInNewDocument = packageOfNewContent.GetPart(part.Uri); + return MoveRelatedPartsToDestination(partInDeletedDocument, partInNewDocument, newDrawing); + }); + }); + } + else + { + return gc.Select(gcc => + { + return gcc.ContentElement; + }); + } + }) + .ToList(); + return newChildElements; + } + + if (ancestorBeingConstructed.Name == M.oMath || ancestorBeingConstructed.Name == M.oMathPara) + { + var newChildElements = groupedChildren + .Select(gc => + { + var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + { + return gc.Select(gcc => + { + return new XElement(W.del, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + gcc.ContentElement); + }); + } + else if (ins) + { + return gc.Select(gcc => + { + return new XElement(W.ins, + new XAttribute(W.author, settings.AuthorForRevisions), + new XAttribute(W.id, s_MaxId++), + new XAttribute(W.date, settings.DateTimeForRevisions), + gcc.ContentElement); + }); + } + else + { + return gc.Select(gcc => gcc.ContentElement); + } + }) + .ToList(); + return newChildElements; + } + + if (AllowableRunChildren.Contains(ancestorBeingConstructed.Name)) + { + var newChildElements = groupedChildren + .Select(gc => + { + var del = gc.First().CorrelationStatus == CorrelationStatus.Deleted; + var ins = gc.First().CorrelationStatus == CorrelationStatus.Inserted; + if (del) + { + return gc.Select(gcc => + { + var dup = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Status, "Deleted")); + return dup; + }); + } + else if (ins) + { + return gc.Select(gcc => + { + var dup = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes().Where(a => a.Name.Namespace != PtOpenXml.pt), + new XAttribute(PtOpenXml.Status, "Inserted")); + return dup; + }); + } + else + { + return gc.Select(gcc => gcc.ContentElement); + } + }) + .ToList(); + return newChildElements; + } + + if (ancestorBeingConstructed.Name == W.tbl) + return ReconstructElement(part, g, ancestorBeingConstructed, W.tblPr, W.tblGrid, null, level, settings); + if (ancestorBeingConstructed.Name == W.tr) + return ReconstructElement(part, g, ancestorBeingConstructed, W.trPr, null, null, level, settings); + if (ancestorBeingConstructed.Name == W.tc) + return ReconstructElement(part, g, ancestorBeingConstructed, W.tcPr, null, null, level, settings); + if (ancestorBeingConstructed.Name == W.sdt) + return ReconstructElement(part, g, ancestorBeingConstructed, W.sdtPr, W.sdtEndPr, null, level, settings); + if (ancestorBeingConstructed.Name == W.pict) + return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, null, null, level, settings); + if (ancestorBeingConstructed.Name == VML.shape) + return ReconstructElement(part, g, ancestorBeingConstructed, W10.wrap, null, null, level, settings); + if (ancestorBeingConstructed.Name == W._object) + return ReconstructElement(part, g, ancestorBeingConstructed, VML.shapetype, VML.shape, O.OLEObject, level, settings); + if (ancestorBeingConstructed.Name == W.ruby) + return ReconstructElement(part, g, ancestorBeingConstructed, W.rubyPr, null, null, level, settings); + return (object)ReconstructElement(part, g, ancestorBeingConstructed, null, null, null, level, settings); + }) + .ToList(); + return elementList; + } + + private static XElement MoveRelatedPartsToDestination(PackagePart partOfDeletedContent, PackagePart partInNewDocument, + XElement contentElement) + { + var elementsToUpdate = contentElement + .Descendants() + .Where(d => d.Attributes().Any(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name))) + .ToList(); + foreach (var element in elementsToUpdate) + { + var attributesToUpdate = element + .Attributes() + .Where(a => ComparisonUnitWord.s_RelationshipAttributeNames.Contains(a.Name)) + .ToList(); + foreach (var att in attributesToUpdate) + { + var rId = (string)att; + + var relationshipForDeletedPart = partOfDeletedContent.GetRelationship(rId); + if (relationshipForDeletedPart == null) + throw new FileFormatException("Invalid document"); + + Uri targetUri = PackUriHelper + .ResolvePartUri( + new Uri(partOfDeletedContent.Uri.ToString(), UriKind.Relative), + relationshipForDeletedPart.TargetUri); + + var relatedPackagePart = partOfDeletedContent.Package.GetPart(targetUri); + var uriSplit = relatedPackagePart.Uri.ToString().Split('/'); + var last = uriSplit[uriSplit.Length - 1].Split('.'); + string uriString = null; + if (last.Length == 2) + { + uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() + + "P" + Guid.NewGuid().ToString().Replace("-", "") + "." + last[1]; + } + else + { + uriString = uriSplit.SkipLast(1).Select(p => p + "/").StringConcatenate() + + "P" + Guid.NewGuid().ToString().Replace("-", ""); + } + Uri uri = null; + if (relatedPackagePart.Uri.IsAbsoluteUri) + uri = new Uri(uriString, UriKind.Absolute); + else + uri = new Uri(uriString, UriKind.Relative); + + var newPart = partInNewDocument.Package.CreatePart(uri, relatedPackagePart.ContentType); + using (var oldPartStream = relatedPackagePart.GetStream()) + using (var newPartStream = newPart.GetStream()) + FileUtils.CopyStream(oldPartStream, newPartStream); + + var newRid = "R" + Guid.NewGuid().ToString().Replace("-", ""); + partInNewDocument.CreateRelationship(newPart.Uri, TargetMode.Internal, relationshipForDeletedPart.RelationshipType, newRid); + att.Value = newRid; + + if (newPart.ContentType.EndsWith("xml")) + { + XDocument newPartXDoc = null; + using (var stream = newPart.GetStream()) + { + newPartXDoc = XDocument.Load(stream); + MoveRelatedPartsToDestination(relatedPackagePart, newPart, newPartXDoc.Root); + } + using (var stream = newPart.GetStream()) + newPartXDoc.Save(stream); + } + } + } + return contentElement; + } + + private static XAttribute GetXmlSpaceAttribute(string textOfTextElement) + { + if (char.IsWhiteSpace(textOfTextElement[0]) || + char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1])) + return new XAttribute(XNamespace.Xml + "space", "preserve"); + return null; + } + + private static XElement ReconstructElement(OpenXmlPart part, IGrouping g, XElement ancestorBeingConstructed, XName props1XName, + XName props2XName, XName props3XName, int level, WmlComparerSettings settings) + { + var newChildElements = CoalesceRecurse(part, g, level + 1, settings); + + object props1 = null; + if (props1XName != null) + props1 = ancestorBeingConstructed.Elements(props1XName); + + object props2 = null; + if (props2XName != null) + props2 = ancestorBeingConstructed.Elements(props2XName); + + object props3 = null; + if (props3XName != null) + props3 = ancestorBeingConstructed.Elements(props3XName); + + var reconstructedElement = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes(), + props1, props2, props3, newChildElements); + + return reconstructedElement; + } + + private static List Lcs(ComparisonUnit[] cu1, ComparisonUnit[] cu2, WmlComparerSettings settings) + { + // set up initial state - one CorrelatedSequence, UnKnown, contents == entire sequences (both) + CorrelatedSequence cs = new CorrelatedSequence() + { + CorrelationStatus = CorrelationStatus.Unknown, + ComparisonUnitArray1 = cu1, + ComparisonUnitArray2 = cu2, + }; + List csList = new List() + { + cs + }; + + while (true) + { + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in csList) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var unknown = csList + .FirstOrDefault(z => z.CorrelationStatus == CorrelationStatus.Unknown); + + if (unknown != null) + { + // if unknown consists of a single group of the same type in each side, then can set some Unids in the 'after' document. + // if the unknown is a pair of single tables, then can set table Unid. + // if the unknown is a pair of single rows, then can set table and rows Unids. + // if the unknown is a pair of single cells, then can set table, row, and cell Unids. + // if the unknown is a pair of paragraphs, then can set paragraph (and all ancestor) Unids. + SetAfterUnids(unknown); + + if (s_False) + { + var sb = new StringBuilder(); + sb.Append(unknown.ToString()); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + List newSequence = ProcessCorrelatedHashes(unknown, settings); + if (newSequence == null) + { + newSequence = FindCommonAtBeginningAndEnd(unknown, settings); + if (newSequence == null) + { + newSequence = DoLcsAlgorithm(unknown, settings); + } + } + + var indexOfUnknown = csList.IndexOf(unknown); + csList.Remove(unknown); + + newSequence.Reverse(); + foreach (var item in newSequence) + csList.Insert(indexOfUnknown, item); + + continue; + } + + return csList; + } + } + + private static void SetAfterUnids(CorrelatedSequence unknown) + { + if (unknown.ComparisonUnitArray1.Length == 1 && unknown.ComparisonUnitArray2.Length == 1) + { + var cua1 = unknown.ComparisonUnitArray1[0] as ComparisonUnitGroup; + var cua2 = unknown.ComparisonUnitArray2[0] as ComparisonUnitGroup; + if (cua1 != null && + cua2 != null && + cua1.ComparisonUnitGroupType == cua2.ComparisonUnitGroupType) + { + var groupType = cua1.ComparisonUnitGroupType; + var da1 = cua1.DescendantContentAtoms(); + var da2 = cua2.DescendantContentAtoms(); + XName takeThruName = null; + switch (groupType) + { + case ComparisonUnitGroupType.Paragraph: + takeThruName = W.p; + break; + case ComparisonUnitGroupType.Table: + takeThruName = W.tbl; + break; + case ComparisonUnitGroupType.Row: + takeThruName = W.tr; + break; + case ComparisonUnitGroupType.Cell: + takeThruName = W.tc; + break; + case ComparisonUnitGroupType.Textbox: + takeThruName = W.txbxContent; + break; + } + if (takeThruName == null) + throw new OpenXmlPowerToolsException("Internal error"); + var relevantAncestors = new List(); + foreach (var ae in da1.First().AncestorElements) + { + if (ae.Name != takeThruName) + { + relevantAncestors.Add(ae); + continue; + } + relevantAncestors.Add(ae); + break; + } + var unidList = relevantAncestors + .Select(a => + { + var unid = (string)a.Attribute(PtOpenXml.Unid); + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + return unid; + }) + .ToArray(); + foreach (var da in da2) + { + var ancestorsToSet = da.AncestorElements.Take(unidList.Length); + var zipped = ancestorsToSet.Zip(unidList, (a, u) => + new + { + Ancestor = a, + Unid = u, + }); + + foreach (var z in zipped) + { + var unid = z.Ancestor.Attribute(PtOpenXml.Unid); + + if (z.Ancestor.Name == W.footnotes || z.Ancestor.Name == W.endnotes) + continue; + + if (unid == null) + throw new OpenXmlPowerToolsException("Internal error"); + unid.Value = z.Unid; + } + } + } + } + } + + private static List ProcessCorrelatedHashes(CorrelatedSequence unknown, WmlComparerSettings settings) + { + // never attempt this optimization if there are less than 3 groups + var maxd = Math.Min(unknown.ComparisonUnitArray1.Length, unknown.ComparisonUnitArray2.Length); + if (maxd < 3) + return null; + + var firstInCu1 = unknown.ComparisonUnitArray1.FirstOrDefault() as ComparisonUnitGroup; + var firstInCu2 = unknown.ComparisonUnitArray2.FirstOrDefault() as ComparisonUnitGroup; + if (firstInCu1 != null && firstInCu2 != null) + { + if ((firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph || + firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Table || + firstInCu1.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) && + (firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph || + firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Table || + firstInCu2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row)) + { + var groupType = firstInCu1.ComparisonUnitGroupType; + + // Next want to do the lcs algorithm on this. + // potentially, we will find all paragraphs are correlated, but they may not be for two reasons- + // - if there were changes that were not tracked + // - if the anomolies in the change tracking cause there to be a mismatch in the number of paragraphs + // therefore we are going to do the whole LCS algorithm thing + // and at the end of the process, we set up the correlated sequence list where correlated paragraphs are together in their + // own unknown correlated sequence. + + var cul1 = unknown.ComparisonUnitArray1; + var cul2 = unknown.ComparisonUnitArray2; + int currentLongestCommonSequenceLength = 0; + int currentLongestCommonSequenceAtomCount = 0; + int currentI1 = -1; + int currentI2 = -1; + for (int i1 = 0; i1 < cul1.Length; i1++) + { + for (int i2 = 0; i2 < cul2.Length; i2++) + { + var thisSequenceLength = 0; + var thisSequenceAtomCount = 0; + var thisI1 = i1; + var thisI2 = i2; + while (true) + { + var group1 = cul1[thisI1] as ComparisonUnitGroup; + var group2 = cul2[thisI2] as ComparisonUnitGroup; + bool match = group1 != null && + group2 != null && + group1.ComparisonUnitGroupType == group2.ComparisonUnitGroupType && + group1.CorrelatedSHA1Hash != null && + group2.CorrelatedSHA1Hash != null && + group1.CorrelatedSHA1Hash == group2.CorrelatedSHA1Hash; + + if (match) + { + thisSequenceAtomCount += cul1[thisI1].DescendantContentAtomsCount; + thisI1++; + thisI2++; + thisSequenceLength++; + if (thisI1 == cul1.Length || thisI2 == cul2.Length) + { + if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentLongestCommonSequenceAtomCount = thisSequenceAtomCount; + currentI1 = i1; + currentI2 = i2; + } + break; + } + continue; + } + else + { + if (thisSequenceAtomCount > currentLongestCommonSequenceAtomCount) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentLongestCommonSequenceAtomCount = thisSequenceAtomCount; + currentI1 = i1; + currentI2 = i2; + } + break; + } + } + } + } + + // here we want to have some sort of threshold, and if the currentLongestCommonSequenceLength is not longer than the threshold, then don't do anything + bool doCorrelation = false; + if (currentLongestCommonSequenceLength == 1) + { + var numberOfAtoms1 = unknown.ComparisonUnitArray1[currentI1].DescendantContentAtoms().Count(); + var numberOfAtoms2 = unknown.ComparisonUnitArray2[currentI2].DescendantContentAtoms().Count(); + if (numberOfAtoms1 > 16 && numberOfAtoms2 > 16) + doCorrelation = true; + } + else if (currentLongestCommonSequenceLength > 1 && currentLongestCommonSequenceLength <= 3) + { + var numberOfAtoms1 = unknown.ComparisonUnitArray1.Skip(currentI1).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum(); + var numberOfAtoms2 = unknown.ComparisonUnitArray2.Skip(currentI2).Take(currentLongestCommonSequenceLength).Select(z => z.DescendantContentAtoms().Count()).Sum(); + if (numberOfAtoms1 > 32 && numberOfAtoms2 > 32) + doCorrelation = true; + } + else if (currentLongestCommonSequenceLength > 3) + doCorrelation = true; + if (doCorrelation) + { + var newListOfCorrelatedSequence = new List(); + + if (currentI1 > 0 && currentI2 == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Take(currentI1) + .ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (currentI1 == 0 && currentI2 > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Take(currentI2) + .ToArray(); + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (currentI1 > 0 && currentI2 > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Take(currentI1) + .ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Take(currentI2) + .ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (currentI1 == 0 && currentI2 == 0) + { + // nothing to do + } + + for (int i = 0; i < currentLongestCommonSequenceLength; i++) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Skip(currentI1) + .Skip(i) + .Take(1) + .ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Skip(currentI2) + .Skip(i) + .Take(1) + .ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + + int endI1 = currentI1 + currentLongestCommonSequenceLength; + int endI2 = currentI2 + currentLongestCommonSequenceLength; + + if (endI1 < cul1.Length && endI2 == cul2.Length) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Skip(endI1) + .ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (endI1 == cul1.Length && endI2 < cul2.Length) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Skip(endI2) + .ToArray(); + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (endI1 < cul1.Length && endI2 < cul2.Length) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Skip(endI1) + .ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Skip(endI2) + .ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (endI1 == cul1.Length && endI2 == cul2.Length) + { + // nothing to do + } + return newListOfCorrelatedSequence; + } + return null; + } + } + return null; + } + + private static List DoLcsAlgorithm(CorrelatedSequence unknown, WmlComparerSettings settings) + { + var newListOfCorrelatedSequence = new List(); + + var cul1 = unknown.ComparisonUnitArray1; + var cul2 = unknown.ComparisonUnitArray2; + + // first thing to do - if we have an unknown with zero length on left or right side, create appropriate + // this is a code optimization that enables easier processing of cases elsewhere. + if (cul1.Length > 0 && cul2.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = cul1; + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + return newListOfCorrelatedSequence; + } + else if (cul1.Length == 0 && cul2.Length > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = cul2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + return newListOfCorrelatedSequence; + } + else if (cul1.Length == 0 && cul2.Length == 0) + { + return newListOfCorrelatedSequence; // this will effectively remove the unknown with no data on either side from the current data model. + } + + int currentLongestCommonSequenceLength = 0; + int currentI1 = -1; + int currentI2 = -1; + for (int i1 = 0; i1 < cul1.Length - currentLongestCommonSequenceLength; i1++) + { + for (int i2 = 0; i2 < cul2.Length - currentLongestCommonSequenceLength; i2++) + { + var thisSequenceLength = 0; + var thisI1 = i1; + var thisI2 = i2; + while (true) + { + if (cul1[thisI1].SHA1Hash == cul2[thisI2].SHA1Hash) + { + thisI1++; + thisI2++; + thisSequenceLength++; + if (thisI1 == cul1.Length || thisI2 == cul2.Length) + { + if (thisSequenceLength > currentLongestCommonSequenceLength) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentI1 = i1; + currentI2 = i2; + } + break; + } + continue; + } + else + { + if (thisSequenceLength > currentLongestCommonSequenceLength) + { + currentLongestCommonSequenceLength = thisSequenceLength; + currentI1 = i1; + currentI2 = i2; + } + break; + } + } + } + } + + // never start a common section with a paragraph mark. + while (true) + { + if (currentLongestCommonSequenceLength <= 1) + break; + + var firstCommon = cul1[currentI1]; + + var firstCommonWord = firstCommon as ComparisonUnitWord; + if (firstCommonWord == null) + break; + + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() != 1) + break; + + var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom; + if (firstCommonAtom == null) + break; + + if (firstCommonAtom.ContentElement.Name != W.pPr) + break; + + --currentLongestCommonSequenceLength; + if (currentLongestCommonSequenceLength == 0) + { + currentI1 = -1; + currentI2 = -1; + } + else + { + ++currentI1; + ++currentI2; + } + } + + bool isOnlyParagraphMark = false; + if (currentLongestCommonSequenceLength == 1) + { + var firstCommon = cul1[currentI1]; + + var firstCommonWord = firstCommon as ComparisonUnitWord; + if (firstCommonWord != null) + { + // if the word contains more than one atom, then not a paragraph mark + if (firstCommonWord.Contents.Count() == 1) + { + var firstCommonAtom = firstCommonWord.Contents.First() as ComparisonUnitAtom; + if (firstCommonAtom != null) + { + if (firstCommonAtom.ContentElement.Name == W.pPr) + isOnlyParagraphMark = true; + } + } + } + } + + // don't match just a single character + if (currentLongestCommonSequenceLength == 1) + { + var cuw2 = cul2[currentI2] as ComparisonUnitAtom; + if (cuw2 != null) + { + if (cuw2.ContentElement.Name == W.t && cuw2.ContentElement.Value == " ") + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + // don't match only word break characters + if (currentLongestCommonSequenceLength > 0 && currentLongestCommonSequenceLength <= 3) + { + var commonSequence = cul1.Skip(currentI1).Take(currentLongestCommonSequenceLength).ToArray(); + // if they are all ComparisonUnitWord objects + var oneIsNotWord = commonSequence.Any(cs => (cs as ComparisonUnitWord) == null); + var allAreWords = !oneIsNotWord; + if (allAreWords) + { + var contentOtherThanWordSplitChars = commonSequence + .Cast() + .Any(cs => + { + var otherThanText = cs.DescendantContentAtoms().Any(dca => dca.ContentElement.Name != W.t); + if (otherThanText) + return true; + var otherThanWordSplit = cs + .DescendantContentAtoms() + .Any(dca => + { + var charValue = dca.ContentElement.Value; + var isWordSplit = settings.WordSeparators.Contains(charValue[0]); + if (isWordSplit) + return false; + return true; + }); + return otherThanWordSplit; + }); + if (! contentOtherThanWordSplitChars) + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + // if we are only looking at text, and if the longest common subsequence is less than 15% of the whole, then forget it, + // don't find that LCS. + if (!isOnlyParagraphMark && currentLongestCommonSequenceLength > 0) + { + var anyButWord1 = cul1.Any(cu => (cu as ComparisonUnitWord) == null); + var anyButWord2 = cul2.Any(cu => (cu as ComparisonUnitWord) == null); + if (!anyButWord1 && !anyButWord2) + { + var maxLen = Math.Max(cul1.Length, cul2.Length); + if (((double)currentLongestCommonSequenceLength / (double)maxLen) < settings.DetailThreshold) + { + currentI1 = -1; + currentI2 = -1; + currentLongestCommonSequenceLength = 0; + } + } + } + + if (currentI1 == -1 && currentI2 == -1) + { + var leftLength = unknown.ComparisonUnitArray1.Length; + var leftTables = unknown.ComparisonUnitArray1.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count(); + var leftRows = unknown.ComparisonUnitArray1.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count(); + var leftCells = unknown.ComparisonUnitArray1.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count(); + var leftParagraphs = unknown.ComparisonUnitArray1.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count(); + var leftTextboxes = unknown.ComparisonUnitArray1.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count(); + var leftWords = unknown.ComparisonUnitArray1.OfType().Count(); + + var rightLength = unknown.ComparisonUnitArray2.Length; + var rightTables = unknown.ComparisonUnitArray2.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Table).Count(); + var rightRows = unknown.ComparisonUnitArray2.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Row).Count(); + var rightCells = unknown.ComparisonUnitArray2.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell).Count(); + var rightParagraphs = unknown.ComparisonUnitArray2.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Paragraph).Count(); + var rightTextboxes = unknown.ComparisonUnitArray2.OfType().Where(l => l.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox).Count(); + var rightWords = unknown.ComparisonUnitArray2.OfType().Count(); + + // if either side has both words, rows and text boxes, then we need to separate out into separate unknown correlated sequences + // group adjacent based on whether word, row, or textbox + // in most cases, the count of groups will be the same, but they may differ + // if the first group on either side is word, then create a deleted or inserted corr sequ for it. + // then have counter on both sides pointing to the first matched pairs of rows + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + var leftOnlyWordsRowsTextboxes = leftLength == leftWords + leftRows + leftTextboxes; + var rightOnlyWordsRowsTextboxes = rightLength == rightWords + rightRows + rightTextboxes; + if ((leftWords > 0 || rightWords > 0) && + (leftRows > 0 || rightRows > 0 || leftTextboxes > 0 || rightTextboxes > 0) && + (leftOnlyWordsRowsTextboxes && rightOnlyWordsRowsTextboxes)) + { + var leftGrouped = unknown + .ComparisonUnitArray1 + .GroupAdjacent(cu => + { + if (cu is ComparisonUnitWord) + { + return "Word"; + } + else + { + var cug = cu as ComparisonUnitGroup; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + return "Row"; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox) + return "Textbox"; + throw new OpenXmlPowerToolsException("Internal error"); + } + }) + .ToArray(); + var rightGrouped = unknown + .ComparisonUnitArray2 + .GroupAdjacent(cu => + { + if (cu is ComparisonUnitWord) + { + return "Word"; + } + else + { + var cug = cu as ComparisonUnitGroup; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + return "Row"; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Textbox) + return "Textbox"; + throw new OpenXmlPowerToolsException("Internal error"); + } + }) + .ToArray(); + int iLeft = 0; + int iRight = 0; + + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + while (true) + { + if (leftGrouped[iLeft].Key == rightGrouped[iRight].Key) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + ++iLeft; + ++iRight; + } + + // have to decide which of the following two branches to do first based on whether the left contains a paragraph mark + // i.e. cant insert a string of deleted text right before a table. + + else if (leftGrouped[iLeft].Key == "Word" && + leftGrouped[iLeft].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr && + rightGrouped[iRight].Key == "Row") + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + else if (rightGrouped[iRight].Key == "Word" && + rightGrouped[iRight].Select(lg => lg.DescendantContentAtoms()).SelectMany(m => m).Last().ContentElement.Name != W.pPr && + leftGrouped[iLeft].Key == "Row") + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = leftGrouped[iLeft].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iLeft; + } + + else if (leftGrouped[iLeft].Key == "Word" && rightGrouped[iRight].Key != "Word") + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + ++iLeft; + } + + else if (leftGrouped[iLeft].Key != "Word" && rightGrouped[iRight].Key == "Word") + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + + if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length) + return newListOfCorrelatedSequence; + + // if there is content on the left, but not content on the right + if (iRight == rightGrouped.Length) + { + for (int j = iLeft; j < leftGrouped.Length; j++) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + return newListOfCorrelatedSequence; + } + // there is content on the right but not on the left + else if (iLeft == leftGrouped.Length) + { + for (int j = iRight; j < rightGrouped.Length; j++) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + return newListOfCorrelatedSequence; + } + // else continue on next round. + } + } + + // if both sides contain tables and paragraphs, then split into multiple unknown corr sequ + if (leftTables > 0 && rightTables > 0 && + leftParagraphs > 0 && rightParagraphs > 0 && + (leftLength > 1 || rightLength > 1)) + { + var leftGrouped = unknown + .ComparisonUnitArray1 + .GroupAdjacent(cu => + { + var cug = cu as ComparisonUnitGroup; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table) + return "Table"; + else + return "Para"; + }) + .ToArray(); + var rightGrouped = unknown + .ComparisonUnitArray2 + .GroupAdjacent(cu => + { + var cug = cu as ComparisonUnitGroup; + if (cug.ComparisonUnitGroupType == ComparisonUnitGroupType.Table) + return "Table"; + else + return "Para"; + }) + .ToArray(); + int iLeft = 0; + int iRight = 0; + + // create an unknown corr sequ for it. + // increment both counters + // if one is at end but the other is not, then tag the remaining content as inserted or deleted, and done. + // if both are at the end, then done + // return the new list of corr sequ + + while (true) + { + if ((leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Table") || + (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Para")) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + ++iLeft; + ++iRight; + } + else if (leftGrouped[iLeft].Key == "Para" && rightGrouped[iRight].Key == "Table") + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[iLeft].ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + ++iLeft; + } + else if (leftGrouped[iLeft].Key == "Table" && rightGrouped[iRight].Key == "Para") + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[iRight].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + ++iRight; + } + + if (iLeft == leftGrouped.Length && iRight == rightGrouped.Length) + return newListOfCorrelatedSequence; + + // if there is content on the left, but not content on the right + if (iRight == rightGrouped.Length) + { + for (int j = iLeft; j < leftGrouped.Length; j++) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = leftGrouped[j].ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + return newListOfCorrelatedSequence; + } + // there is content on the right but not on the left + else if (iLeft == leftGrouped.Length) + { + for (int j = iRight; j < rightGrouped.Length; j++) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = rightGrouped[j].ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + return newListOfCorrelatedSequence; + } + // else continue on next round. + } + } + + // If both sides consists of a single table, and if the table contains merged cells, then mark as deleted/inserted + if (leftTables == 1 && leftLength == 1 && + rightTables == 1 && rightLength == 1) + { + var result = DoLcsAlgorithmForTable(unknown, settings); + if (result != null) + return result; + } + + // If either side contains only paras or tables, then flatten and iterate. + var leftOnlyParasTablesTextboxes = leftLength == leftTables + leftParagraphs + leftTextboxes; + var rightOnlyParasTablesTextboxes = rightLength == rightTables + rightParagraphs + rightTextboxes; + if (leftOnlyParasTablesTextboxes && rightOnlyParasTablesTextboxes) + { + // flatten paras and tables, and iterate + var left = unknown + .ComparisonUnitArray1 + .Select(cu => cu.Contents) + .SelectMany(m => m) + .ToArray(); + + var right = unknown + .ComparisonUnitArray2 + .Select(cu => cu.Contents) + .SelectMany(m => m) + .ToArray(); + + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = left; + unknownCorrelatedSequence.ComparisonUnitArray2 = right; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + + return newListOfCorrelatedSequence; + } + + // if first of left is a row and first of right is a row + // then flatten the row to cells and iterate. + + var firstLeft = unknown + .ComparisonUnitArray1 + .FirstOrDefault() as ComparisonUnitGroup; + + var firstRight = unknown + .ComparisonUnitArray2 + .FirstOrDefault() as ComparisonUnitGroup; + + if (firstLeft != null && firstRight != null) + { + if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Row && + firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + ComparisonUnit[] leftContent = firstLeft.Contents.ToArray(); + ComparisonUnit[] rightContent = firstRight.Contents.ToArray(); + + var lenLeft = leftContent.Length; + var lenRight = rightContent.Length; + + if (lenLeft < lenRight) + leftContent = leftContent.Concat(Enumerable.Repeat(null, lenRight - lenLeft)).ToArray(); + else if (lenRight < lenLeft) + rightContent = rightContent.Concat(Enumerable.Repeat(null, lenLeft - lenRight)).ToArray(); + + List newCs = leftContent.Zip(rightContent, (l, r) => + { + if (l != null && r != null) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { l }; + unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { r }; + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + return new[] { unknownCorrelatedSequence }; + } + if (l == null) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = r.Contents.ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + return new[] { insertedCorrelatedSequence }; + } + else if (r == null) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = l.Contents.ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + return new[] { deletedCorrelatedSequence }; + } + else + throw new OpenXmlPowerToolsException("Internal error"); + }) + .SelectMany(m => m) + .ToList(); + + foreach (var cs in newCs) + newListOfCorrelatedSequence.Add(cs); + + var remainderLeft = unknown + .ComparisonUnitArray1 + .Skip(1) + .ToArray(); + + var remainderRight = unknown + .ComparisonUnitArray2 + .Skip(1) + .ToArray(); + + if (remainderLeft.Length > 0 && remainderRight.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft; + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainderRight.Length > 0 && remainderLeft.Length == 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainderLeft.Length > 0 && remainderRight.Length > 0) + { + var unknownCorrelatedSequence2 = new CorrelatedSequence(); + unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft; + unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2); + } + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in newListOfCorrelatedSequence) + sb.Append(item.ToString()).Append(Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return newListOfCorrelatedSequence; + } + if (firstLeft.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell && + firstRight.ComparisonUnitGroupType == ComparisonUnitGroupType.Cell) + { + var left = firstLeft + .Contents + .ToArray(); + + var right = firstRight + .Contents + .ToArray(); + + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = left; + unknownCorrelatedSequence.ComparisonUnitArray2 = right; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + + var remainderLeft = unknown + .ComparisonUnitArray1 + .Skip(1) + .ToArray(); + + var remainderRight = unknown + .ComparisonUnitArray2 + .Skip(1) + .ToArray(); + + if (remainderLeft.Length > 0 && remainderRight.Length == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = remainderLeft; + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainderRight.Length > 0 && remainderLeft.Length == 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = remainderRight; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainderLeft.Length > 0 && remainderRight.Length > 0) + { + var unknownCorrelatedSequence2 = new CorrelatedSequence(); + unknownCorrelatedSequence2.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence2.ComparisonUnitArray1 = remainderLeft; + unknownCorrelatedSequence2.ComparisonUnitArray2 = remainderRight; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence2); + } + + return newListOfCorrelatedSequence; + } + } + + if (unknown.ComparisonUnitArray1.Any() && unknown.ComparisonUnitArray2.Any()) + { + var left = unknown.ComparisonUnitArray1.First() as ComparisonUnitWord; + var right = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup; + if (left != null && + right != null && + right.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + var insertedCorrelatedSequence3 = new CorrelatedSequence(); + insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence3.ComparisonUnitArray1 = null; + insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3); + + var deletedCorrelatedSequence3 = new CorrelatedSequence(); + deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1; + deletedCorrelatedSequence3.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3); + + return newListOfCorrelatedSequence; + } + + var left2 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup; + var right2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitWord; + if (right2 != null && + left2 != null && + left2.ComparisonUnitGroupType == ComparisonUnitGroupType.Row) + { + var deletedCorrelatedSequence3 = new CorrelatedSequence(); + deletedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence3.ComparisonUnitArray1 = unknown.ComparisonUnitArray1; + deletedCorrelatedSequence3.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence3); + + var insertedCorrelatedSequence3 = new CorrelatedSequence(); + insertedCorrelatedSequence3.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence3.ComparisonUnitArray1 = null; + insertedCorrelatedSequence3.ComparisonUnitArray2 = unknown.ComparisonUnitArray2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence3); + + return newListOfCorrelatedSequence; + } + + var lastContentAtomLeft = unknown.ComparisonUnitArray1.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault(); + var lastContentAtomRight = unknown.ComparisonUnitArray2.Select(cu => cu.DescendantContentAtoms().Last()).LastOrDefault(); + if (lastContentAtomLeft != null && lastContentAtomRight != null) + { + if (lastContentAtomLeft.ContentElement.Name == W.pPr && + lastContentAtomRight.ContentElement.Name != W.pPr) + { + var insertedCorrelatedSequence5 = new CorrelatedSequence(); + insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence5.ComparisonUnitArray1 = null; + insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5); + + var deletedCorrelatedSequence5 = new CorrelatedSequence(); + deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1; + deletedCorrelatedSequence5.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5); + + return newListOfCorrelatedSequence; + } + else if (lastContentAtomLeft.ContentElement.Name != W.pPr && + lastContentAtomRight.ContentElement.Name == W.pPr) + { + var deletedCorrelatedSequence5 = new CorrelatedSequence(); + deletedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence5.ComparisonUnitArray1 = unknown.ComparisonUnitArray1; + deletedCorrelatedSequence5.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence5); + + var insertedCorrelatedSequence5 = new CorrelatedSequence(); + insertedCorrelatedSequence5.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence5.ComparisonUnitArray1 = null; + insertedCorrelatedSequence5.ComparisonUnitArray2 = unknown.ComparisonUnitArray2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence5); + + return newListOfCorrelatedSequence; + } + } + } + + var deletedCorrelatedSequence4 = new CorrelatedSequence(); + deletedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence4.ComparisonUnitArray1 = unknown.ComparisonUnitArray1; + deletedCorrelatedSequence4.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence4); + + var insertedCorrelatedSequence4 = new CorrelatedSequence(); + insertedCorrelatedSequence4.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence4.ComparisonUnitArray1 = null; + insertedCorrelatedSequence4.ComparisonUnitArray2 = unknown.ComparisonUnitArray2; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence4); + + return newListOfCorrelatedSequence; + } + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // here we have the longest common subsequence. + // but it may start in the middle of a paragraph. + // therefore need to dispose of the content from the beginning of the longest common subsequence to the beginning of the paragraph. + // this should be in a separate unknown region + // if countCommonAtEnd != 0, and if it contains a paragraph mark, then if there are comparison units in the same paragraph before the common at end (in either version) + // then we want to put all of those comparison units into a single unknown, where they must be resolved against each other. We don't want those + // comparison units to go into the middle unknown comparison unit. + + int remainingInLeftParagraph = 0; + int remainingInRightParagraph = 0; + if (currentLongestCommonSequenceLength != 0) + { + var commonSeq = unknown + .ComparisonUnitArray1 + .Skip(currentI1) + .Take(currentLongestCommonSequenceLength) + .ToList(); + var firstOfCommonSeq = commonSeq.First(); + if (firstOfCommonSeq is ComparisonUnitWord) + { + // are there any paragraph marks in the common seq at end? + if (commonSeq.Any(cu => + { + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return false; + return firstComparisonUnitAtom.ContentElement.Name == W.pPr; + })) + { + remainingInLeftParagraph = unknown + .ComparisonUnitArray1 + .Take(currentI1) + .Reverse() + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + remainingInRightParagraph = unknown + .ComparisonUnitArray2 + .Take(currentI2) + .Reverse() + .TakeWhile(cu => + { + if (!(cu is ComparisonUnitWord)) + return false; + var firstComparisonUnitAtom = cu.Contents.OfType().FirstOrDefault(); + if (firstComparisonUnitAtom == null) + return true; + return firstComparisonUnitAtom.ContentElement.Name != W.pPr; + }) + .Count(); + } + } + } + + var countBeforeCurrentParagraphLeft = currentI1 - remainingInLeftParagraph; + var countBeforeCurrentParagraphRight = currentI2 - remainingInRightParagraph; + + if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Take(countBeforeCurrentParagraphLeft) + .ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Take(countBeforeCurrentParagraphRight) + .ToArray(); + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft > 0 && countBeforeCurrentParagraphRight > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Take(countBeforeCurrentParagraphLeft) + .ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Take(countBeforeCurrentParagraphRight) + .ToArray(); + + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (countBeforeCurrentParagraphLeft == 0 && countBeforeCurrentParagraphRight == 0) + { + // nothing to do + } + + if (remainingInLeftParagraph > 0 && remainingInRightParagraph == 0) + { + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + deletedCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Skip(countBeforeCurrentParagraphLeft) + .Take(remainingInLeftParagraph) + .ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph > 0) + { + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Skip(countBeforeCurrentParagraphRight) + .Take(remainingInRightParagraph) + .ToArray(); + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + } + else if (remainingInLeftParagraph > 0 && remainingInRightParagraph > 0) + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence.ComparisonUnitArray1 = cul1 + .Skip(countBeforeCurrentParagraphLeft) + .Take(remainingInLeftParagraph) + .ToArray(); + unknownCorrelatedSequence.ComparisonUnitArray2 = cul2 + .Skip(countBeforeCurrentParagraphRight) + .Take(remainingInRightParagraph) + .ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence); + } + else if (remainingInLeftParagraph == 0 && remainingInRightParagraph == 0) + { + // nothing to do + } + + var middleEqual = new CorrelatedSequence(); + middleEqual.CorrelationStatus = CorrelationStatus.Equal; + middleEqual.ComparisonUnitArray1 = cul1 + .Skip(currentI1) + .Take(currentLongestCommonSequenceLength) + .ToArray(); + middleEqual.ComparisonUnitArray2 = cul2 + .Skip(currentI2) + .Take(currentLongestCommonSequenceLength) + .ToArray(); + newListOfCorrelatedSequence.Add(middleEqual); + + + int endI1 = currentI1 + currentLongestCommonSequenceLength; + int endI2 = currentI2 + currentLongestCommonSequenceLength; + + var remaining1 = cul1 + .Skip(endI1) + .ToArray(); + + var remaining2 = cul2 + .Skip(endI2) + .ToArray(); + + // here is the point that we want to make a new unknown from this point to the end of the paragraph that contains the equal parts. + // this will never hurt anything, and will in many cases result in a better difference. + + var leftCuw = middleEqual.ComparisonUnitArray1[middleEqual.ComparisonUnitArray1.Length - 1] as ComparisonUnitWord; + if (leftCuw != null) + { + var lastContentAtom = leftCuw.DescendantContentAtoms().LastOrDefault(); + // if the middleEqual did not end with a paragraph mark + if (lastContentAtom != null && lastContentAtom.ContentElement.Name != W.pPr) + { + int idx1 = FindIndexOfNextParaMark(remaining1); + int idx2 = FindIndexOfNextParaMark(remaining2); + + var unknownCorrelatedSequenceRemaining = new CorrelatedSequence(); + unknownCorrelatedSequenceRemaining.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequenceRemaining.ComparisonUnitArray1 = remaining1.Take(idx1).ToArray(); + unknownCorrelatedSequenceRemaining.ComparisonUnitArray2 = remaining2.Take(idx2).ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceRemaining); + + var unknownCorrelatedSequenceAfter = new CorrelatedSequence(); + unknownCorrelatedSequenceAfter.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequenceAfter.ComparisonUnitArray1 = remaining1.Skip(idx1).ToArray(); + unknownCorrelatedSequenceAfter.ComparisonUnitArray2 = remaining2.Skip(idx2).ToArray(); + newListOfCorrelatedSequence.Add(unknownCorrelatedSequenceAfter); + + return newListOfCorrelatedSequence; + } + } + + var unknownCorrelatedSequence20 = new CorrelatedSequence(); + unknownCorrelatedSequence20.CorrelationStatus = CorrelationStatus.Unknown; + unknownCorrelatedSequence20.ComparisonUnitArray1 = remaining1; + unknownCorrelatedSequence20.ComparisonUnitArray2 = remaining2; + newListOfCorrelatedSequence.Add(unknownCorrelatedSequence20); + + return newListOfCorrelatedSequence; + } + + private static int FindIndexOfNextParaMark(ComparisonUnit[] cul) + { + for (int i = 0; i < cul.Length; i++) + { + var cuw = cul[i] as ComparisonUnitWord; + var lastAtom = cuw.DescendantContentAtoms().LastOrDefault(); + if (lastAtom.ContentElement.Name == W.pPr) + return i; + } + return cul.Length; + } + + private static List DoLcsAlgorithmForTable(CorrelatedSequence unknown, WmlComparerSettings settings) + { + List newListOfCorrelatedSequence = new List(); + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // if we have a table with the same number of rows, and all rows have equal CorrelatedSHA1Hash, then we can flatten and compare every corresponding row. + // This is true regardless of whether there are horizontally or vertically merged cells, since that characteristic is incorporated into the CorrespondingSHA1Hash. + // This is probably not very common, but it will never do any harm. + var tblGroup1 = unknown.ComparisonUnitArray1.First() as ComparisonUnitGroup; + var tblGroup2 = unknown.ComparisonUnitArray2.First() as ComparisonUnitGroup; + if (tblGroup1.Contents.Count() == tblGroup2.Contents.Count()) // if there are the same number of rows + { + var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new + { + Row1 = r1 as ComparisonUnitGroup, + Row2 = r2 as ComparisonUnitGroup, + }); + var canCollapse = true; + if (zipped.Any(z => z.Row1.CorrelatedSHA1Hash != z.Row2.CorrelatedSHA1Hash)) + canCollapse = false; + if (canCollapse) + { + newListOfCorrelatedSequence = zipped + .Select(z => + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 }; + unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 }; + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + return unknownCorrelatedSequence; + }) + .ToList(); + return newListOfCorrelatedSequence; + } + } + + var firstContentAtom1 = tblGroup1.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom1 == null) + throw new OpenXmlPowerToolsException("Internal error"); + var tblElement1 = firstContentAtom1 + .AncestorElements + .Reverse() + .FirstOrDefault(a => a.Name == W.tbl); + + var firstContentAtom2 = tblGroup2.DescendantContentAtoms().FirstOrDefault(); + if (firstContentAtom2 == null) + throw new OpenXmlPowerToolsException("Internal error"); + var tblElement2 = firstContentAtom2 + .AncestorElements + .Reverse() + .FirstOrDefault(a => a.Name == W.tbl); + + var leftContainsMerged = tblElement1 + .Descendants() + .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan); + + var rightContainsMerged = tblElement2 + .Descendants() + .Any(d => d.Name == W.vMerge || d.Name == W.gridSpan); + + if (leftContainsMerged || rightContainsMerged) + { + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // If StructureSha1Hash is the same for both tables, then we know that the structure of the tables is identical, so we can break into correlated sequences for rows. + if (tblGroup1.StructureSHA1Hash != null && + tblGroup2.StructureSHA1Hash != null && + tblGroup1.StructureSHA1Hash == tblGroup2.StructureSHA1Hash) + { + var zipped = tblGroup1.Contents.Zip(tblGroup2.Contents, (r1, r2) => new + { + Row1 = r1 as ComparisonUnitGroup, + Row2 = r2 as ComparisonUnitGroup, + }); + newListOfCorrelatedSequence = zipped + .Select(z => + { + var unknownCorrelatedSequence = new CorrelatedSequence(); + unknownCorrelatedSequence.ComparisonUnitArray1 = new[] { z.Row1 }; + unknownCorrelatedSequence.ComparisonUnitArray2 = new[] { z.Row2 }; + unknownCorrelatedSequence.CorrelationStatus = CorrelationStatus.Unknown; + return unknownCorrelatedSequence; + }) + .ToList(); + return newListOfCorrelatedSequence; + } + + // otherwise flatten to rows + var deletedCorrelatedSequence = new CorrelatedSequence(); + deletedCorrelatedSequence.ComparisonUnitArray1 = unknown + .ComparisonUnitArray1 + .Select(z => z.Contents) + .SelectMany(m => m) + .ToArray(); + deletedCorrelatedSequence.ComparisonUnitArray2 = null; + deletedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Deleted; + newListOfCorrelatedSequence.Add(deletedCorrelatedSequence); + + var insertedCorrelatedSequence = new CorrelatedSequence(); + insertedCorrelatedSequence.ComparisonUnitArray1 = null; + insertedCorrelatedSequence.ComparisonUnitArray2 = unknown + .ComparisonUnitArray2 + .Select(z => z.Contents) + .SelectMany(m => m) + .ToArray(); + insertedCorrelatedSequence.CorrelationStatus = CorrelationStatus.Inserted; + newListOfCorrelatedSequence.Add(insertedCorrelatedSequence); + + return newListOfCorrelatedSequence; + } + return null; + } + + private static XName[] WordBreakElements = new XName[] { + W.pPr, + W.tab, + W.br, + W.continuationSeparator, + W.cr, + W.dayLong, + W.dayShort, + W.drawing, + W.pict, + W.endnoteRef, + W.footnoteRef, + W.monthLong, + W.monthShort, + W.noBreakHyphen, + W._object, + W.ptab, + W.separator, + W.sym, + W.yearLong, + W.yearShort, + M.oMathPara, + M.oMath, + W.footnoteReference, + W.endnoteReference, + }; + + private class Atgbw + { + public int? Key; + public ComparisonUnitAtom ComparisonUnitAtomMember; + public int NextIndex; + } + + private static ComparisonUnit[] GetComparisonUnitList(ComparisonUnitAtom[] comparisonUnitAtomList, WmlComparerSettings settings) + { + var seed = new Atgbw() + { + Key = null, + ComparisonUnitAtomMember = null, + NextIndex = 0, + }; + + var groupingKey = comparisonUnitAtomList + .Rollup(seed, (sr, prevAtgbw, i) => + { + int? key = null; + var nextIndex = prevAtgbw.NextIndex; + if (sr.ContentElement.Name == W.t) + { + string chr = sr.ContentElement.Value; + var ch = chr[0]; + if (ch == '.' || ch == ',') + { + bool beforeIsDigit = false; + if (i > 0) + { + var prev = comparisonUnitAtomList[i - 1]; + if (prev.ContentElement.Name == W.t && char.IsDigit(prev.ContentElement.Value[0])) + beforeIsDigit = true; + } + bool afterIsDigit = false; + if (i < comparisonUnitAtomList.Length - 1) + { + var next = comparisonUnitAtomList[i + 1]; + if (next.ContentElement.Name == W.t && char.IsDigit(next.ContentElement.Value[0])) + afterIsDigit = true; + } + if (beforeIsDigit || afterIsDigit) + { + key = nextIndex; + } + else + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + } + else if (settings.WordSeparators.Contains(ch)) + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + else + { + key = nextIndex; + } + } + else if (WordBreakElements.Contains(sr.ContentElement.Name)) + { + nextIndex++; + key = nextIndex; + nextIndex++; + } + else + { + key = nextIndex; + } + return new Atgbw() + { + Key = key, + ComparisonUnitAtomMember = sr, + NextIndex = nextIndex, + }; + }); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in groupingKey) + { + sb.Append(item.Key + Environment.NewLine); + sb.Append(" " + item.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); + } + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var groupedByWords = groupingKey + .GroupAdjacent(gc => gc.Key); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var group in groupedByWords) + { + sb.Append("Group ===== " + group.Key + Environment.NewLine); + foreach (var gc in group) + { + sb.Append(" " + gc.ComparisonUnitAtomMember.ToString(0) + Environment.NewLine); + } + } + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var withHierarchicalGroupingKey = groupedByWords + .Select(g => + { + var hierarchicalGroupingArray = g + .First() + .ComparisonUnitAtomMember + .AncestorElements + .Where(a => ComparisonGroupingElements.Contains(a.Name)) + .Select(a => a.Name.LocalName + ":" + (string)a.Attribute(PtOpenXml.Unid)) + .ToArray(); + + return new WithHierarchicalGroupingKey() + { + ComparisonUnitWord = new ComparisonUnitWord(g.Select(gc => gc.ComparisonUnitAtomMember)), + HierarchicalGroupingArray = hierarchicalGroupingArray, + }; + } + ) + .ToArray(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var group in withHierarchicalGroupingKey) + { + sb.Append("Grouping Array: " + group.HierarchicalGroupingArray.Select(gam => gam + " - ").StringConcatenate() + Environment.NewLine); + foreach (var gc in group.ComparisonUnitWord.Contents) + { + sb.Append(" " + gc.ToString(0) + Environment.NewLine); + } + } + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + var cul = GetHierarchicalComparisonUnits(withHierarchicalGroupingKey, 0).ToArray(); + + if (s_False) + { + var str = ComparisonUnit.ComparisonUnitListToString(cul); + TestUtil.NotePad(str); + } + + return cul; + } + + private static IEnumerable GetHierarchicalComparisonUnits(IEnumerable input, int level) + { + var grouped = input + .GroupAdjacent(whgk => + { + if (level >= whgk.HierarchicalGroupingArray.Length) + return ""; + return whgk.HierarchicalGroupingArray[level]; + }); + var retList = grouped + .Select(gc => + { + if (gc.Key == "") + { + return (IEnumerable)gc.Select(whgk => whgk.ComparisonUnitWord).ToList(); + } + else + { + ComparisonUnitGroupType? group = null; + var spl = gc.Key.Split(':'); + if (spl[0] == "p") + group = ComparisonUnitGroupType.Paragraph; + else if (spl[0] == "tbl") + group = ComparisonUnitGroupType.Table; + else if (spl[0] == "tr") + group = ComparisonUnitGroupType.Row; + else if (spl[0] == "tc") + group = ComparisonUnitGroupType.Cell; + else if (spl[0] == "txbxContent") + group = ComparisonUnitGroupType.Textbox; + var childHierarchicalComparisonUnits = GetHierarchicalComparisonUnits(gc, level + 1); + var newCompUnitGroup = new ComparisonUnitGroup(childHierarchicalComparisonUnits, (ComparisonUnitGroupType)group, level); + return new[] { newCompUnitGroup }; + } + }) + .SelectMany(m => m) + .ToList(); + return retList; + } + + private static XName[] AllowableRunChildren = new XName[] { + W.br, + W.drawing, + W.cr, + W.dayLong, + W.dayShort, + W.footnoteReference, + W.endnoteReference, + W.monthLong, + W.monthShort, + W.noBreakHyphen, + //W._object, + W.pgNum, + W.ptab, + W.softHyphen, + W.sym, + W.tab, + W.yearLong, + W.yearShort, + M.oMathPara, + M.oMath, + W.fldChar, + W.instrText, + }; + + private static XName[] ElementsToThrowAway = new XName[] { + W.bookmarkStart, + W.bookmarkEnd, + W.commentRangeStart, + W.commentRangeEnd, + W.lastRenderedPageBreak, + W.proofErr, + W.tblPr, + W.sectPr, + W.permEnd, + W.permStart, + W.footnoteRef, + W.endnoteRef, + W.separator, + W.continuationSeparator, + }; + + private static XName[] ElementsToHaveSha1Hash = new XName[] + { + W.p, + W.tbl, + W.tr, + W.tc, + W.drawing, + W.pict, + W.txbxContent, + }; + + private static XName[] InvalidElements = new XName[] + { + W.altChunk, + W.customXml, + W.customXmlDelRangeEnd, + W.customXmlDelRangeStart, + W.customXmlInsRangeEnd, + W.customXmlInsRangeStart, + W.customXmlMoveFromRangeEnd, + W.customXmlMoveFromRangeStart, + W.customXmlMoveToRangeEnd, + W.customXmlMoveToRangeStart, + W.moveFrom, + W.moveFromRangeStart, + W.moveFromRangeEnd, + W.moveTo, + W.moveToRangeStart, + W.moveToRangeEnd, + W.subDoc, + }; + + private class RecursionInfo + { + public XName ElementName; + public XName[] ChildElementPropertyNames; + } + + private static RecursionInfo[] RecursionElements = new RecursionInfo[] + { + new RecursionInfo() + { + ElementName = W.del, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.ins, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.tbl, + ChildElementPropertyNames = new[] { W.tblPr, W.tblGrid, W.tblPrEx }, + }, + new RecursionInfo() + { + ElementName = W.tr, + ChildElementPropertyNames = new[] { W.trPr, W.tblPrEx }, + }, + new RecursionInfo() + { + ElementName = W.tc, + ChildElementPropertyNames = new[] { W.tcPr, W.tblPrEx }, + }, + new RecursionInfo() + { + ElementName = W.pict, + ChildElementPropertyNames = new[] { VML.shapetype }, + }, + new RecursionInfo() + { + ElementName = VML.group, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = VML.shape, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = VML.rect, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = VML.textbox, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = O._lock, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.txbxContent, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W10.wrap, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.sdt, + ChildElementPropertyNames = new[] { W.sdtPr, W.sdtEndPr }, + }, + new RecursionInfo() + { + ElementName = W.sdtContent, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.hyperlink, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.fldSimple, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = VML.shapetype, + ChildElementPropertyNames = null, + }, + new RecursionInfo() + { + ElementName = W.smartTag, + ChildElementPropertyNames = new[] { W.smartTagPr }, + }, + new RecursionInfo() + { + ElementName = W.ruby, + ChildElementPropertyNames = new[] { W.rubyPr }, + }, + }; + + internal static ComparisonUnitAtom[] CreateComparisonUnitAtomList(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) + { + VerifyNoInvalidContent(contentParent); + AssignUnidToAllElements(contentParent); // add the Guid id to every element + MoveLastSectPrIntoLastParagraph(contentParent); + var cal = CreateComparisonUnitAtomListInternal(part, contentParent, settings).ToArray(); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var item in cal) + sb.Append(item.ToString() + Environment.NewLine); + var sbs = sb.ToString(); + TestUtil.NotePad(sbs); + } + + return cal; + } + + private static void VerifyNoInvalidContent(XElement contentParent) + { + var invalidElement = contentParent.Descendants().FirstOrDefault(d => InvalidElements.Contains(d.Name)); + if (invalidElement == null) + return; + throw new NotSupportedException("Document contains " + invalidElement.Name.LocalName); + } + + internal static XDocument Coalesce(ComparisonUnitAtom[] comparisonUnitAtomList) + { + XDocument newXDoc = new XDocument(); + var newBodyChildren = CoalesceRecurse(comparisonUnitAtomList, 0); + newXDoc.Add(new XElement(W.document, + new XAttribute(XNamespace.Xmlns + "w", W.w.NamespaceName), + new XAttribute(XNamespace.Xmlns + "pt14", PtOpenXml.pt.NamespaceName), + new XElement(W.body, newBodyChildren))); + + // little bit of cleanup + MoveLastSectPrToChildOfBody(newXDoc); + XElement newXDoc2Root = (XElement)WordprocessingMLUtil.WmlOrderElementsPerStandard(newXDoc.Root); + newXDoc.Root.ReplaceWith(newXDoc2Root); + return newXDoc; + } + + private static object CoalesceRecurse(IEnumerable list, int level) + { + var grouped = list + .GroupBy(sr => + { + // per the algorithm, The following condition will never evaluate to true + // if it evaluates to true, then the basic mechanism for breaking a hierarchical structure into flat and back is broken. + + // for a table, we initially get all ComparisonUnitAtoms for the entire table, then process. When processing a row, + // no ComparisonUnitAtoms will have ancestors outside the row. Ditto for cells, and on down the tree. + if (level >= sr.AncestorElements.Length) + throw new OpenXmlPowerToolsException("Internal error 4 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?"); + + var unid = (string)sr.AncestorElements[level].Attribute(PtOpenXml.Unid); + return unid; + }); + + if (s_False) + { + var sb = new StringBuilder(); + foreach (var group in grouped) + { + sb.AppendFormat("Group Key: {0}", group.Key); + sb.Append(Environment.NewLine); + foreach (var groupChildItem in group) + { + sb.Append(" "); + sb.Append(groupChildItem.ToString(0)); + sb.Append(Environment.NewLine); + } + sb.Append(Environment.NewLine); + } + var sbs = sb.ToString(); + } + + var elementList = grouped + .Select(g => + { + // see the comment above at the beginning of CoalesceRecurse + if (level >= g.First().AncestorElements.Length) + throw new OpenXmlPowerToolsException("Internal error 3 - why do we have ComparisonUnitAtom objects with fewer ancestors than its siblings?"); + + var ancestorBeingConstructed = g.First().AncestorElements[level]; + + if (ancestorBeingConstructed.Name == W.p) + { + var groupedChildren = g + .GroupAdjacent(gc => gc.ContentElement.Name.ToString()); + var newChildElements = groupedChildren + .Where(gc => gc.First().ContentElement.Name != W.pPr) + .Select(gc => + { + return CoalesceRecurse(gc, level + 1); + }); + var newParaProps = groupedChildren + .Where(gc => gc.First().ContentElement.Name == W.pPr) + .Select(gc => gc.Select(gce => gce.ContentElement)); + return new XElement(W.p, + ancestorBeingConstructed.Attributes(), + newParaProps, newChildElements); + } + + if (ancestorBeingConstructed.Name == W.r) + { + var groupedChildren = g + .GroupAdjacent(gc => gc.ContentElement.Name.ToString()); + var newChildElements = groupedChildren + .Select(gc => + { + var name = gc.First().ContentElement.Name; + if (name == W.t || name == W.delText) + { + var textOfTextElement = gc.Select(gce => gce.ContentElement.Value).StringConcatenate(); + return (object)(new XElement(name, + GetXmlSpaceAttribute(textOfTextElement), + textOfTextElement)); + } + else + return gc.Select(gce => gce.ContentElement); + }); + var runProps = ancestorBeingConstructed.Elements(W.rPr); + return new XElement(W.r, runProps, newChildElements); + } + + var re = RecursionElements.FirstOrDefault(z => z.ElementName == ancestorBeingConstructed.Name); + if (re != null) + { + return ReconstructElement(g, ancestorBeingConstructed, re.ChildElementPropertyNames, level); + } + + var newElement = new XElement(ancestorBeingConstructed.Name, + ancestorBeingConstructed.Attributes(), + CoalesceRecurse(g, level + 1)); + + return newElement; + }) + .ToList(); + return elementList; + } + + private static XElement ReconstructElement(IGrouping g, XElement ancestorBeingConstructed, XName[] childPropElementNames, int level) + { + var newChildElements = CoalesceRecurse(g, level + 1); + IEnumerable childProps = null; + if (childPropElementNames != null) + childProps = ancestorBeingConstructed.Elements() + .Where(a => childPropElementNames.Contains(a.Name)); + + var reconstructedElement = new XElement(ancestorBeingConstructed.Name, childProps, newChildElements); + return reconstructedElement; + } + + private static void MoveLastSectPrIntoLastParagraph(XElement contentParent) + { + var lastSectPrList = contentParent.Elements(W.sectPr).ToList(); + if (lastSectPrList.Count() > 1) + throw new OpenXmlPowerToolsException("Invalid document"); + var lastSectPr = lastSectPrList.FirstOrDefault(); + if (lastSectPr != null) + { + var lastParagraph = contentParent.Elements(W.p).LastOrDefault(); + if (lastParagraph == null) + throw new OpenXmlPowerToolsException("Invalid document"); + var pPr = lastParagraph.Element(W.pPr); + if (pPr == null) + { + pPr = new XElement(W.pPr); + lastParagraph.AddFirst(W.pPr); + } + pPr.Add(lastSectPr); + contentParent.Elements(W.sectPr).Remove(); + } + } + + private static List CreateComparisonUnitAtomListInternal(OpenXmlPart part, XElement contentParent, WmlComparerSettings settings) + { + var comparisonUnitAtomList = new List(); + CreateComparisonUnitAtomListRecurse(part, contentParent, comparisonUnitAtomList, settings); + return comparisonUnitAtomList; + } + + private static XName[] ComparisonGroupingElements = new[] { + W.p, + W.tbl, + W.tr, + W.tc, + W.txbxContent, + }; + + private static void CreateComparisonUnitAtomListRecurse(OpenXmlPart part, XElement element, List comparisonUnitAtomList, WmlComparerSettings settings) + { + if (element.Name == W.body || element.Name == W.footnote || element.Name == W.endnote) + { + foreach (var item in element.Elements()) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + return; + } + + if (element.Name == W.p) + { + var paraChildrenToProcess = element + .Elements() + .Where(e => e.Name != W.pPr); + foreach (var item in paraChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + var paraProps = element.Element(W.pPr); + if (paraProps == null) + { + ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom( + new XElement(W.pPr), + element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(), + part, + settings); + comparisonUnitAtomList.Add(pPrComparisonUnitAtom); + } + else + { + ComparisonUnitAtom pPrComparisonUnitAtom = new ComparisonUnitAtom( + paraProps, + element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(), + part, + settings); + comparisonUnitAtomList.Add(pPrComparisonUnitAtom); + } + return; + } + + if (element.Name == W.r) + { + var runChildrenToProcess = element + .Elements() + .Where(e => e.Name != W.rPr); + foreach (var item in runChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + return; + } + + if (element.Name == W.t || element.Name == W.delText) + { + var val = element.Value; + foreach (var ch in val) + { + ComparisonUnitAtom sr = new ComparisonUnitAtom( + new XElement(element.Name, ch), + element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(), + part, + settings); + comparisonUnitAtomList.Add(sr); + } + return; + } + + if (AllowableRunChildren.Contains(element.Name) || element.Name == W._object) + { + ComparisonUnitAtom sr3 = new ComparisonUnitAtom( + element, + element.AncestorsAndSelf().TakeWhile(a => a.Name != W.body && a.Name != W.footnotes && a.Name != W.endnotes).Reverse().ToArray(), + part, + settings); + comparisonUnitAtomList.Add(sr3); + return; + } + + var re = RecursionElements.FirstOrDefault(z => z.ElementName == element.Name); + if (re != null) + { + AnnotateElementWithProps(part, element, comparisonUnitAtomList, re.ChildElementPropertyNames, settings); + return; + } + + if (ElementsToThrowAway.Contains(element.Name)) + return; + + AnnotateElementWithProps(part, element, comparisonUnitAtomList, null, settings); + } + + private static void AnnotateElementWithProps(OpenXmlPart part, XElement element, List comparisonUnitAtomList, XName[] childElementPropertyNames, WmlComparerSettings settings) + { + IEnumerable runChildrenToProcess = null; + if (childElementPropertyNames == null) + runChildrenToProcess = element.Elements(); + else + runChildrenToProcess = element + .Elements() + .Where(e => !childElementPropertyNames.Contains(e.Name)); + + foreach (var item in runChildrenToProcess) + CreateComparisonUnitAtomListRecurse(part, item, comparisonUnitAtomList, settings); + } + + + + private static void AssignUnidToAllElements(XElement contentParent) + { + var content = contentParent.Descendants(); + foreach (var d in content) + { + if (d.Attribute(PtOpenXml.Unid) == null) + { + string unid = Guid.NewGuid().ToString().Replace("-", ""); + var newAtt = new XAttribute(PtOpenXml.Unid, unid); + d.Add(newAtt); + } + } + } + } + + internal class WithHierarchicalGroupingKey + { + public string[] HierarchicalGroupingArray; + public ComparisonUnitWord ComparisonUnitWord; + } + + public abstract class ComparisonUnit + { + public List Contents; + public string SHA1Hash; + public CorrelationStatus CorrelationStatus; + + public IEnumerable Descendants() + { + List comparisonUnitList = new List(); + DescendantsInternal(this, comparisonUnitList); + return comparisonUnitList; + } + + public IEnumerable DescendantContentAtoms() + { + return Descendants().OfType(); + } + + private int? m_DescendantContentAtomsCount = null; + + public int DescendantContentAtomsCount + { + get + { + if (m_DescendantContentAtomsCount != null) + return (int)m_DescendantContentAtomsCount; + m_DescendantContentAtomsCount = this.DescendantContentAtoms().Count(); + return (int)m_DescendantContentAtomsCount; + } + } + + private void DescendantsInternal(ComparisonUnit comparisonUnit, List comparisonUnitList) + { + foreach (var cu in comparisonUnit.Contents) + { + comparisonUnitList.Add(cu); + if (cu.Contents != null && cu.Contents.Any()) + DescendantsInternal(cu, comparisonUnitList); + } + } + + public abstract string ToString(int indent); + + internal static string ComparisonUnitListToString(ComparisonUnit[] cul) + { + var sb = new StringBuilder(); + sb.Append("Dump Comparision Unit List To String" + Environment.NewLine); + foreach (var item in cul) + { + sb.Append(item.ToString(2) + Environment.NewLine); + } + return sb.ToString(); + } + } + + internal class ComparisonUnitWord : ComparisonUnit + { + public ComparisonUnitWord(IEnumerable comparisonUnitAtomList) + { + Contents = comparisonUnitAtomList.OfType().ToList(); + var sha1String = Contents + .Select(c => c.SHA1Hash) + .StringConcatenate(); + SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(sha1String); + } + + public static XName[] s_ElementsWithRelationshipIds = new XName[] { + A.blip, + A.hlinkClick, + A.relIds, + C.chart, + C.externalData, + C.userShapes, + DGM.relIds, + O.OLEObject, + VML.fill, + VML.imagedata, + VML.stroke, + W.altChunk, + W.attachedTemplate, + W.control, + W.dataSource, + W.embedBold, + W.embedBoldItalic, + W.embedItalic, + W.embedRegular, + W.footerReference, + W.headerReference, + W.headerSource, + W.hyperlink, + W.printerSettings, + W.recipientData, + W.saveThroughXslt, + W.sourceFileName, + W.src, + W.subDoc, + WNE.toolbarData, + }; + + public static XName[] s_RelationshipAttributeNames = new XName[] { + R.embed, + R.link, + R.id, + R.cs, + R.dm, + R.lo, + R.qs, + R.href, + R.pict, + }; + + public override string ToString(int indent) + { + var sb = new StringBuilder(); + sb.Append("".PadRight(indent) + "Word SHA1:" + this.SHA1Hash.Substring(0, 8) + Environment.NewLine); + foreach (var comparisonUnitAtom in Contents) + sb.Append(comparisonUnitAtom.ToString(indent + 2) + Environment.NewLine); + return sb.ToString(); + } + } + + class WmlComparerUtil + { + public static string SHA1HashStringForUTF8String(string s) + { + byte[] bytes = Encoding.UTF8.GetBytes(s); + var sha1 = SHA1.Create(); + byte[] hashBytes = sha1.ComputeHash(bytes); + return HexStringFromBytes(hashBytes); + } + + public static string SHA1HashStringForByteArray(byte[] bytes) + { + var sha1 = SHA1.Create(); + byte[] hashBytes = sha1.ComputeHash(bytes); + return HexStringFromBytes(hashBytes); + } + + public static string HexStringFromBytes(byte[] bytes) + { + var sb = new StringBuilder(); + foreach (byte b in bytes) + { + var hex = b.ToString("x2"); + sb.Append(hex); + } + return sb.ToString(); + } + } + + public class ComparisonUnitAtom : ComparisonUnit + { + // AncestorElements are kept in order from the body to the leaf, because this is the order in which we need to access in order + // to reassemble the document. However, in many places in the code, it is necessary to find the nearest ancestor, i.e. cell + // so it is necessary to reverse the order when looking for it, i.e. look from the leaf back to the body element. + + public XElement[] AncestorElements; + public string[] AncestorUnids; + public XElement ContentElement; + public XElement ContentElementBefore; + public ComparisonUnitAtom ComparisonUnitAtomBefore; + public OpenXmlPart Part; + public XElement RevTrackElement; + + public ComparisonUnitAtom(XElement contentElement, XElement[] ancestorElements, OpenXmlPart part, WmlComparerSettings settings) + { + ContentElement = contentElement; + AncestorElements = ancestorElements; + Part = part; + RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements); + if (RevTrackElement == null) + { + CorrelationStatus = CorrelationStatus.Equal; + } + else + { + if (RevTrackElement.Name == W.del) + CorrelationStatus = CorrelationStatus.Deleted; + else if (RevTrackElement.Name == W.ins) + CorrelationStatus = CorrelationStatus.Inserted; + } + string sha1Hash = (string)contentElement.Attribute(PtOpenXml.SHA1Hash); + if (sha1Hash != null) + { + SHA1Hash = sha1Hash; + } + else + { + var shaHashString = GetSha1HashStringForElement(ContentElement, settings); + SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString); + } + } + + private string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings) + { + var text = contentElement.Value; + if (settings.CaseInsensitive) + text = text.ToUpper(settings.CultureInfo); + return contentElement.Name.LocalName + text; + } + + private static XElement GetRevisionTrackingElementFromAncestors(XElement contentElement, XElement[] ancestors) + { + XElement revTrackElement = null; + + if (contentElement.Name == W.pPr) + { + revTrackElement = contentElement + .Elements(W.rPr) + .Elements() + .FirstOrDefault(e => e.Name == W.del || e.Name == W.ins); + return revTrackElement; + } + + revTrackElement = ancestors.FirstOrDefault(a => a.Name == W.del || a.Name == W.ins); + return revTrackElement; + } + + public override string ToString(int indent) + { + int xNamePad = 16; + var indentString = "".PadRight(indent); + + var sb = new StringBuilder(); + sb.Append(indentString); + string correlationStatus = ""; + if (CorrelationStatus != CorrelationStatus.Nil) + correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8)); + if (ContentElement.Name == W.t || ContentElement.Name == W.delText) + { + sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8)); + AppendAncestorsDump(sb, this); + } + else + { + sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8)); + AppendAncestorsDump(sb, this); + } + return sb.ToString(); + } + + public string ToStringAncestorUnids(int indent) + { + int xNamePad = 16; + var indentString = "".PadRight(indent); + + var sb = new StringBuilder(); + sb.Append(indentString); + string correlationStatus = ""; + if (CorrelationStatus != CorrelationStatus.Nil) + correlationStatus = string.Format("[{0}] ", CorrelationStatus.ToString().PadRight(8)); + if (ContentElement.Name == W.t || ContentElement.Name == W.delText) + { + sb.AppendFormat("Atom {0}: {1} {2} SHA1:{3} ", PadLocalName(xNamePad, this), ContentElement.Value, correlationStatus, this.SHA1Hash.Substring(0, 8)); + AppendAncestorsUnidsDump(sb, this); + } + else + { + sb.AppendFormat("Atom {0}: {1} SHA1:{2} ", PadLocalName(xNamePad, this), correlationStatus, this.SHA1Hash.Substring(0, 8)); + AppendAncestorsUnidsDump(sb, this); + } + return sb.ToString(); + } + + public override string ToString() + { + return ToString(0); + } + + public string ToStringAncestorUnids() + { + return ToStringAncestorUnids(0); + } + + private static string PadLocalName(int xNamePad, ComparisonUnitAtom item) + { + return (item.ContentElement.Name.LocalName + " ").PadRight(xNamePad, '-') + " "; + } + + private void AppendAncestorsDump(StringBuilder sb, ComparisonUnitAtom sr) + { + var s = sr.AncestorElements.Select(p => p.Name.LocalName + GetUnid(p) + "/").StringConcatenate().TrimEnd('/'); + sb.Append("Ancestors:" + s); + } + + private void AppendAncestorsUnidsDump(StringBuilder sb, ComparisonUnitAtom sr) + { + var zipped = sr.AncestorElements.Zip(sr.AncestorUnids, (a, u) => new + { + AncestorElement = a, + AncestorUnid = u, + }); + var s = zipped.Select(p => p.AncestorElement.Name.LocalName + "[" + p.AncestorUnid.Substring(0, 8) + "]/").StringConcatenate().TrimEnd('/'); + sb.Append("Ancestors:" + s); + } + + private string GetUnid(XElement p) + { + var unid = (string)p.Attribute(PtOpenXml.Unid); + if (unid == null) + return ""; + return "[" + unid.Substring(0, 8) + "]"; + } + + public static string ComparisonUnitAtomListToString(List comparisonUnitAtomList, int indent) + { + StringBuilder sb = new StringBuilder(); + var cal = comparisonUnitAtomList + .Select((ca, i) => new + { + ComparisonUnitAtom = ca, + Index = i, + }); + foreach (var item in cal) + sb.Append("".PadRight(indent)) + .AppendFormat("[{0:000000}] ", item.Index + 1) + .Append(item.ComparisonUnitAtom.ToString(0) + Environment.NewLine); + return sb.ToString(); + } + } + + internal enum ComparisonUnitGroupType + { + Paragraph, + Table, + Row, + Cell, + Textbox, + }; + + internal class ComparisonUnitGroup : ComparisonUnit + { + public ComparisonUnitGroupType ComparisonUnitGroupType; + public string CorrelatedSHA1Hash; + public string StructureSHA1Hash; + + public ComparisonUnitGroup(IEnumerable comparisonUnitList, ComparisonUnitGroupType groupType, int level) + { + Contents = comparisonUnitList.ToList(); + ComparisonUnitGroupType = groupType; + var first = comparisonUnitList.First(); + ComparisonUnitAtom comparisonUnitAtom = GetFirstComparisonUnitAtomOfGroup(first); + XName ancestorName = null; + if (groupType == ComparisonUnitGroupType.Table) + ancestorName = W.tbl; + else if (groupType == ComparisonUnitGroupType.Row) + ancestorName = W.tr; + else if (groupType == ComparisonUnitGroupType.Cell) + ancestorName = W.tc; + else if (groupType == ComparisonUnitGroupType.Paragraph) + ancestorName = W.p; + else if (groupType == ComparisonUnitGroupType.Textbox) + ancestorName = W.txbxContent; + + var ancestorsToLookAt = comparisonUnitAtom.AncestorElements.Where(ae => ae.Name == W.tbl || ae.Name == W.tr || ae.Name == W.tc || ae.Name == W.p || ae.Name == W.txbxContent).ToArray(); ; + var ancestor = ancestorsToLookAt[level]; + + if (ancestor == null) + throw new OpenXmlPowerToolsException("Internal error: ComparisonUnitGroup"); + SHA1Hash = (string)ancestor.Attribute(PtOpenXml.SHA1Hash); + CorrelatedSHA1Hash = (string)ancestor.Attribute(PtOpenXml.CorrelatedSHA1Hash); + StructureSHA1Hash = (string)ancestor.Attribute(PtOpenXml.StructureSHA1Hash); + } + + public static ComparisonUnitAtom GetFirstComparisonUnitAtomOfGroup(ComparisonUnit group) + { + var thisGroup = group; + while (true) + { + var tg = thisGroup as ComparisonUnitGroup; + if (tg != null) + { + thisGroup = tg.Contents.First(); + continue; + } + var tw = thisGroup as ComparisonUnitWord; + if (tw == null) + throw new OpenXmlPowerToolsException("Internal error: GetFirstComparisonUnitAtomOfGroup"); + var ca = (ComparisonUnitAtom)tw.Contents.First(); + return ca; + } + } + + public override string ToString(int indent) + { + var sb = new StringBuilder(); + sb.Append("".PadRight(indent) + "Group Type: " + ComparisonUnitGroupType.ToString() + " SHA1:" + SHA1Hash + Environment.NewLine); + foreach (var comparisonUnitAtom in Contents) + sb.Append(comparisonUnitAtom.ToString(indent + 2)); + return sb.ToString(); + } + } + + public enum CorrelationStatus + { + Nil, + Normal, + Unknown, + Inserted, + Deleted, + Equal, + Group, + } + + class PartSHA1HashAnnotation + { + public string Hash; + + public PartSHA1HashAnnotation(string hash) + { + Hash = hash; + } + } + + class CorrelatedSequence + { + public CorrelationStatus CorrelationStatus; + + // if ComparisonUnitList1 == null and ComparisonUnitList2 contains sequence, then inserted content. + // if ComparisonUnitList2 == null and ComparisonUnitList1 contains sequence, then deleted content. + // if ComparisonUnitList2 contains sequence and ComparisonUnitList1 contains sequence, then either is Unknown or Equal. + public ComparisonUnit[] ComparisonUnitArray1; + public ComparisonUnit[] ComparisonUnitArray2; +#if DEBUG + public string SourceFile; + public int SourceLine; +#endif + + public CorrelatedSequence() + { +#if DEBUG + SourceFile = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileName(); + SourceLine = new System.Diagnostics.StackTrace(true).GetFrame(1).GetFileLineNumber(); +#endif + } + + public override string ToString() + { + var sb = new StringBuilder(); + var indentString = " "; + var indentString4 = " "; + sb.Append("CorrelatedSequence =====" + Environment.NewLine); +#if DEBUG + sb.Append(indentString + "Created at Line: " + SourceLine.ToString() + Environment.NewLine); +#endif + sb.Append(indentString + "CorrelatedItem =====" + Environment.NewLine); + sb.Append(indentString4 + "CorrelationStatus: " + CorrelationStatus.ToString() + Environment.NewLine); + if (CorrelationStatus == CorrelationStatus.Equal) + { + sb.Append(indentString4 + "ComparisonUnitList =====" + Environment.NewLine); + foreach (var item in ComparisonUnitArray2) + sb.Append(item.ToString(6) + Environment.NewLine); + } + else + { + if (ComparisonUnitArray1 != null) + { + sb.Append(indentString4 + "ComparisonUnitList1 =====" + Environment.NewLine); + foreach (var item in ComparisonUnitArray1) + sb.Append(item.ToString(6) + Environment.NewLine); + } + if (ComparisonUnitArray2 != null) + { + sb.Append(indentString4 + "ComparisonUnitList2 =====" + Environment.NewLine); + foreach (var item in ComparisonUnitArray2) + sb.Append(item.ToString(6) + Environment.NewLine); + } + } + return sb.ToString(); + } + } +} + diff --git a/OpenXmlPowerTools/Properties/AssemblyInfo.cs b/OpenXmlPowerTools/Properties/AssemblyInfo.cs index 7967d2c0..feebdd2b 100644 --- a/OpenXmlPowerTools/Properties/AssemblyInfo.cs +++ b/OpenXmlPowerTools/Properties/AssemblyInfo.cs @@ -1,6 +1,36 @@ -// Copyright (c) Microsoft. All rights reserved. -// Licensed under the MIT license. See LICENSE file in the project root for full license information. - +using System.Reflection; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("OpenXmlPowerTools")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("OpenXmlPowerTools")] +[assembly: AssemblyCopyright("Copyright © 2020")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("b9c353a0-d182-46d3-94e0-c11d2a9f2fad")] -[assembly: InternalsVisibleTo("OpenXmlPowerTools.Tests")] +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +// You can specify all the values or you can default the Build and Revision Numbers +// by using the '*' as shown below: +// [assembly: AssemblyVersion("1.0.*")] +[assembly: AssemblyVersion("1.0.0.0")] +[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/OpenXmlPowerTools/PtOpenXmlUtil.cs b/OpenXmlPowerTools/PtOpenXmlUtil.cs index c64fc7f4..8e80aa00 100644 --- a/OpenXmlPowerTools/PtOpenXmlUtil.cs +++ b/OpenXmlPowerTools/PtOpenXmlUtil.cs @@ -742,35 +742,6 @@ public static bool GetBoolProp(XElement runProps, XName xName) return false; } - public static int StringToTwips(string twipsOrPoints) - { - // if the pos value is in points, not twips - if (twipsOrPoints.EndsWith("pt")) - { - decimal decimalValue = decimal.Parse(twipsOrPoints.Substring(0, twipsOrPoints.Length - 2)); - return (int)(decimalValue * 20); - } - return int.Parse(twipsOrPoints); - } - - public static int? AttributeToTwips(XAttribute attribute) - { - if (attribute == null) - { - return null; - } - - string twipsOrPoints = (string)attribute; - - // if the pos value is in points, not twips - if (twipsOrPoints.EndsWith("pt")) - { - decimal decimalValue = decimal.Parse(twipsOrPoints.Substring(0, twipsOrPoints.Length - 2)); - return (int)(decimalValue * 20); - } - return int.Parse(twipsOrPoints); - } - private static readonly List AdditionalRunContainerNames = new List { W.w + "bdo", @@ -797,9 +768,6 @@ public static XElement CoalesceAdjacentRunsWithIdenticalFormatting(XElement runC if (ce.Elements().Count(e => e.Name != W.rPr) != 1) return dontConsolidate; - if (ce.Attribute(PtOpenXml.AbstractNumId) != null) - return dontConsolidate; - XElement rPr = ce.Element(W.rPr); string rPrString = rPr != null ? rPr.ToString(SaveOptions.None) : string.Empty; @@ -919,39 +887,42 @@ public static XElement CoalesceAdjacentRunsWithIdenticalFormatting(XElement runC IEnumerable> statusAtt = g.Select(r => r.Descendants(W.t).Take(1).Attributes(PtOpenXml.Status)); return new XElement(W.r, - g.First().Attributes(), g.First().Elements(W.rPr), new XElement(W.t, statusAtt, xs, textValue)); } if (g.First().Element(W.instrText) != null) return new XElement(W.r, - g.First().Attributes(), g.First().Elements(W.rPr), new XElement(W.instrText, xs, textValue)); } if (g.First().Name == W.ins) { - XElement firstR = g.First().Element(W.r); +#if false + if (g.First().Elements(W.del).Any()) + return new XElement(W.ins, + g.First().Attributes(), + new XElement(W.del, + g.First().Elements(W.del).Attributes(), + new XElement(W.r, + g.First().Elements(W.del).Elements(W.r).Elements(W.rPr), + new XElement(W.delText, xs, textValue)))); +#endif return new XElement(W.ins, g.First().Attributes(), new XElement(W.r, - firstR?.Attributes(), g.First().Elements(W.r).Elements(W.rPr), new XElement(W.t, xs, textValue))); } if (g.First().Name == W.del) - { - XElement firstR = g.First().Element(W.r); return new XElement(W.del, g.First().Attributes(), new XElement(W.r, - firstR?.Attributes(), g.First().Elements(W.r).Elements(W.rPr), new XElement(W.delText, xs, textValue))); - } + return g; })); @@ -5860,8 +5831,6 @@ public static class PtOpenXml public static XName FontName = pt + "FontName"; public static XName LanguageType = pt + "LanguageType"; public static XName AbstractNumId = pt + "AbstractNumId"; - public static XName HtmlStructure = pt + "HtmlStructure"; - public static XName HtmlStyle = pt + "HtmlStyle"; public static XName StyleName = pt + "StyleName"; public static XName TabWidth = pt + "TabWidth"; public static XName Leader = pt + "Leader"; @@ -5881,13 +5850,6 @@ public static class Xhtml public static readonly XName div = xhtml + "div"; public static readonly XName h1 = xhtml + "h1"; public static readonly XName h2 = xhtml + "h2"; - public static readonly XName h3 = xhtml + "h3"; - public static readonly XName h4 = xhtml + "h4"; - public static readonly XName h5 = xhtml + "h5"; - public static readonly XName h6 = xhtml + "h6"; - public static readonly XName h7 = xhtml + "h7"; - public static readonly XName h8 = xhtml + "h8"; - public static readonly XName h9 = xhtml + "h9"; public static readonly XName head = xhtml + "head"; public static readonly XName html = xhtml + "html"; public static readonly XName i = xhtml + "i"; diff --git a/OpenXmlPowerTools/PtUtil.cs b/OpenXmlPowerTools/PtUtil.cs index e5e96fff..e955d7d6 100644 --- a/OpenXmlPowerTools/PtUtil.cs +++ b/OpenXmlPowerTools/PtUtil.cs @@ -8,7 +8,6 @@ using System.Diagnostics.CodeAnalysis; using System.IO; using System.Linq; -using System.Security.Cryptography; using System.Text; using System.Xml; using System.Xml.Linq; @@ -18,32 +17,6 @@ namespace OpenXmlPowerTools { public static class PtUtils { - public static string SHA1HashStringForUTF8String(string s) - { - byte[] bytes = Encoding.UTF8.GetBytes(s); - var sha1 = SHA1.Create(); - byte[] hashBytes = sha1.ComputeHash(bytes); - return HexStringFromBytes(hashBytes); - } - - public static string SHA1HashStringForByteArray(byte[] bytes) - { - var sha1 = SHA1.Create(); - byte[] hashBytes = sha1.ComputeHash(bytes); - return HexStringFromBytes(hashBytes); - } - - public static string HexStringFromBytes(byte[] bytes) - { - var sb = new StringBuilder(); - foreach (byte b in bytes) - { - var hex = b.ToString("x2"); - sb.Append(hex); - } - return sb.ToString(); - } - public static string NormalizeDirName(string dirName) { string d = dirName.Replace('\\', '/'); @@ -797,7 +770,7 @@ public static IEnumerable SequenceAt(this TSource[] source, in yield return source[i++]; } - public static IEnumerable PtSkipLast(this IEnumerable source, int count) + public static IEnumerable SkipLast(this IEnumerable source, int count) { var saveList = new Queue(); var saved = 0; @@ -1169,128 +1142,6 @@ IEnumerator IEnumerable.GetEnumerator() } } - // this is fundamentally the same as PtBucketTimer, except that it is instance based, - // not a static class. - public class BucketTimer - { - public BucketTimer() - { - Buckets = new Dictionary(); - } - - private class BucketInfo - { - public int Count; - public TimeSpan Time; - } - - private string LastBucket = null; - private DateTime LastTime; - private Dictionary Buckets; - - public void Bucket(string bucket) - { - DateTime now = DateTime.Now; - if (LastBucket != null) - AddToBuckets(now); - LastBucket = bucket; - LastTime = now; - } - - public void End() - { - DateTime now = DateTime.Now; - if (LastBucket != null) - AddToBuckets(now); - LastBucket = null; - } - - private void AddToBuckets(DateTime now) - { - TimeSpan d = now - LastTime; - var bucketParts = LastBucket.Split('/'); - var bucketList = bucketParts.Select((t, i) => bucketParts - .Take(i + 1) - .Select(z => z + "/") - .StringConcatenate() - .Trim('/')) - .ToList(); - - foreach (var b in bucketList) - { - if (Buckets.ContainsKey(b)) - { - Buckets[b].Count = Buckets[b].Count + 1; - Buckets[b].Time += d; - } - else - { - Buckets.Add(b, new BucketInfo() - { - Count = 1, - Time = d, - }); - } - } - LastTime = now; - } - - public string DumpBucketsByKey() - { - StringBuilder sb = new StringBuilder(); - foreach (var bucket in Buckets.OrderBy(b => b.Key)) - { - string ts = bucket.Value.Time.ToString(); - if (ts.Contains('.')) - ts = ts.Substring(0, ts.Length - 5); - string s = bucket.Key.PadRight(60, '-') + " " + string.Format("{0:00000000}", bucket.Value.Count) + " " + ts; - sb.Append(s + Environment.NewLine); - } - TimeSpan total = Buckets - .Aggregate(TimeSpan.Zero, (t, b) => t + b.Value.Time); - var tz = total.ToString(); - sb.Append(string.Format("Total: {0}", tz.Substring(0, tz.Length - 5))); - return sb.ToString(); - } - - public string DumpBucketsToCsvByKey() - { - StringBuilder sb = new StringBuilder(); - foreach (var bucket in Buckets.OrderBy(b => b.Key)) - { - string ts = bucket.Value.Time.ToString(); - if (ts.Contains('.')) - ts = ts.Substring(0, ts.Length - 5); - string s = bucket.Key + "," + bucket.Value.Count.ToString() + "," + ts; - sb.Append(s + Environment.NewLine); - } - return sb.ToString(); - } - - public string DumpBucketsByTime() - { - StringBuilder sb = new StringBuilder(); - foreach (var bucket in Buckets.OrderBy(b => b.Value.Time)) - { - string ts = bucket.Value.Time.ToString(); - if (ts.Contains('.')) - ts = ts.Substring(0, ts.Length - 5); - string s = bucket.Key.PadRight(60, '-') + " " + string.Format("{0:00000000}", bucket.Value.Count) + " " + ts; - sb.Append(s + Environment.NewLine); - } - TimeSpan total = Buckets - .Aggregate(TimeSpan.Zero, (t, b) => t + b.Value.Time); - var tz = total.ToString(); - sb.Append(string.Format("Total: {0}", tz.Substring(0, tz.Length - 5))); - return sb.ToString(); - } - - public void Init() - { - Buckets = new Dictionary(); - } - } - public static class PtBucketTimer { private class BucketInfo @@ -1396,7 +1247,7 @@ public static void Init() Buckets = new Dictionary(); } } - + public class XEntity : XText { public override void WriteTo(XmlWriter writer) diff --git a/OpenXmlPowerTools/WmlComparer.cs b/OpenXmlPowerTools/WmlComparer.cs index 9eed4d51..8b5a1631 100644 --- a/OpenXmlPowerTools/WmlComparer.cs +++ b/OpenXmlPowerTools/WmlComparer.cs @@ -3697,6 +3697,7 @@ private static object CloneBlockLevelContentForHashingInternal(OpenXmlPart mainD } var sha1 = PtUtils.SHA1HashStringForByteArray(ba); oxp.AddAnnotation(new PartSHA1HashAnnotation(sha1)); + return new XAttribute(a.Name, sha1); } } @@ -4817,8 +4818,9 @@ private static XElement MoveRelatedPartsToDestination(PackagePart partOfDeletedC newPartXDoc = XDocument.Load(stream); MoveRelatedPartsToDestination(relatedPackagePart, newPart, newPartXDoc.Root); } - using (var stream = newPart.GetStream()) - newPartXDoc.Save(stream); + + using (var partStream = newPart.GetStream()) + newPartXDoc.Save(partStream); } } } diff --git a/OpenXmlPowerTools/WmlToHtmlConverter.cs b/OpenXmlPowerTools/WmlToHtmlConverter.cs index 3d1cb496..8c908cfd 100644 --- a/OpenXmlPowerTools/WmlToHtmlConverter.cs +++ b/OpenXmlPowerTools/WmlToHtmlConverter.cs @@ -1210,7 +1210,7 @@ private static void CreateStyleFromInd(Dictionary style, XElemen : "0"); } - var firstLine = WordprocessingMLUtil.AttributeToTwips(ind.Attribute(W.firstLine)); + var firstLine = (decimal?) ind.Attribute(W.firstLine); if (firstLine != null && elementName != Xhtml.span) { var firstLineInInches = (decimal) firstLine/1440m; @@ -1218,7 +1218,7 @@ private static void CreateStyleFromInd(Dictionary style, XElemen string.Format(NumberFormatInfo.InvariantInfo, "{0:0.00}in", firstLineInInches)); } - var hanging = WordprocessingMLUtil.AttributeToTwips(ind.Attribute(W.hanging)); + var hanging = (decimal?) ind.Attribute(W.hanging); if (hanging != null && elementName != Xhtml.span) { var hangingInInches = (decimal) -hanging/1440m; @@ -1279,7 +1279,7 @@ private static void CreateStyleFromSpacing(Dictionary style, XEl style.Add("line-height", string.Format(NumberFormatInfo.InvariantInfo, "{0:0.0}pt", points)); } - var spacingAfter = suppressTrailingWhiteSpace ? 0 : WordprocessingMLUtil.AttributeToTwips(spacing.Attribute(W.after)); + var spacingAfter = suppressTrailingWhiteSpace ? 0m : (decimal?) spacing.Attribute(W.after); if (spacingAfter != null) style.AddIfMissing("margin-bottom", spacingAfter > 0m @@ -1837,8 +1837,7 @@ private static void CalculateSpanWidthForTabs(WordprocessingDocument wordDoc) // w:defaultTabStop in settings var sxd = wordDoc.MainDocumentPart.DocumentSettingsPart.GetXDocument(); - var defaultTabStopValue = (string)sxd.Descendants(W.defaultTabStop).Attributes(W.val).FirstOrDefault(); - var defaultTabStop = defaultTabStopValue != null ? WordprocessingMLUtil.StringToTwips(defaultTabStopValue) : 720; + var defaultTabStop = (int?)sxd.Descendants(W.defaultTabStop).Attributes(W.val).FirstOrDefault() ?? 720; var pxd = wordDoc.MainDocumentPart.GetXDocument(); var root = pxd.Root; @@ -1877,16 +1876,16 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop { // todo need to handle start and end attributes - var left = WordprocessingMLUtil.AttributeToTwips(ind.Attribute(W.left)); + var left = (int?)ind.Attribute(W.left); if (left != null) leftInTwips = (int)left; var firstLine = 0; - var firstLineAtt = WordprocessingMLUtil.AttributeToTwips(ind.Attribute(W.firstLine)); + var firstLineAtt = (int?)ind.Attribute(W.firstLine); if (firstLineAtt != null) firstLine = (int)firstLineAtt; - var hangingAtt = WordprocessingMLUtil.AttributeToTwips(ind.Attribute(W.hanging)); + var hangingAtt = (int?)ind.Attribute(W.hanging); if (hangingAtt != null) firstLine = -(int)hangingAtt; @@ -1960,7 +1959,7 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop var tabAfterText = tabs .Elements(W.tab) - .FirstOrDefault(t => WordprocessingMLUtil.StringToTwips((string)t.Attribute(W.pos)) > testAmount); + .FirstOrDefault(t => (int)t.Attribute(W.pos) > testAmount); if (tabAfterText == null) { @@ -1996,14 +1995,14 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop new XElement(W.t, textAfterTab)); var widthOfTextAfterTab = CalcWidthOfRunInTwips(dummyRun2); - var delta2 = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) - widthOfTextAfterTab - twipCounter; + var delta2 = (int)tabAfterText.Attribute(W.pos) - widthOfTextAfterTab - twipCounter; if (delta2 < 0) delta2 = 0; currentElement.Add( new XAttribute(PtOpenXml.TabWidth, string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}", (decimal)delta2 / 1440m)), GetLeader(tabAfterText)); - twipCounter = Math.Max(WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)), twipCounter + widthOfTextAfterTab); + twipCounter = Math.Max((int)tabAfterText.Attribute(W.pos), twipCounter + widthOfTextAfterTab); var lastElement = textElementsToMeasure.LastOrDefault(); if (lastElement == null) @@ -2043,7 +2042,7 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop new XElement(W.t, mantissa)); var widthOfMantissa = CalcWidthOfRunInTwips(dummyRun4); - var delta2 = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) - widthOfMantissa - twipCounter; + var delta2 = (int)tabAfterText.Attribute(W.pos) - widthOfMantissa - twipCounter; if (delta2 < 0) delta2 = 0; currentElement.Add( @@ -2058,7 +2057,7 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop new XElement(W.t, decims)); var widthOfDecims = CalcWidthOfRunInTwips(dummyRun4); - twipCounter = Math.Max(WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) + widthOfDecims, twipCounter + widthOfMantissa + widthOfDecims); + twipCounter = Math.Max((int)tabAfterText.Attribute(W.pos) + widthOfDecims, twipCounter + widthOfMantissa + widthOfDecims); var lastElement = textElementsToMeasure.LastOrDefault(); if (lastElement == null) @@ -2078,14 +2077,14 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop new XElement(W.t, textAfterTab)); var widthOfTextAfterTab = CalcWidthOfRunInTwips(dummyRun2); - var delta2 = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) - widthOfTextAfterTab - twipCounter; + var delta2 = (int)tabAfterText.Attribute(W.pos) - widthOfTextAfterTab - twipCounter; if (delta2 < 0) delta2 = 0; currentElement.Add( new XAttribute(PtOpenXml.TabWidth, string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}", (decimal)delta2 / 1440m)), GetLeader(tabAfterText)); - twipCounter = Math.Max(WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)), twipCounter + widthOfTextAfterTab); + twipCounter = Math.Max((int)tabAfterText.Attribute(W.pos), twipCounter + widthOfTextAfterTab); var lastElement = textElementsToMeasure.LastOrDefault(); if (lastElement == null) @@ -2122,14 +2121,14 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop new XElement(W.t, textAfterTab)); var widthOfText = CalcWidthOfRunInTwips(dummyRun4); - var delta2 = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) - (widthOfText / 2) - twipCounter; + var delta2 = (int)tabAfterText.Attribute(W.pos) - (widthOfText / 2) - twipCounter; if (delta2 < 0) delta2 = 0; currentElement.Add( new XAttribute(PtOpenXml.TabWidth, string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}", (decimal)delta2 / 1440m)), GetLeader(tabAfterText)); - twipCounter = Math.Max(WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) + widthOfText / 2, twipCounter + widthOfText); + twipCounter = Math.Max((int)tabAfterText.Attribute(W.pos) + widthOfText / 2, twipCounter + widthOfText); var lastElement = textElementsToMeasure.LastOrDefault(); if (lastElement == null) @@ -2143,12 +2142,12 @@ private static object CalculateSpanWidthTransform(XNode node, int defaultTabStop } if (tabVal == "left" || tabVal == "start" || tabVal == "num") { - var delta = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)) - twipCounter; + var delta = (int)tabAfterText.Attribute(W.pos) - twipCounter; currentElement.Add( new XAttribute(PtOpenXml.TabWidth, string.Format(NumberFormatInfo.InvariantInfo, "{0:0.000}", (decimal)delta / 1440m)), GetLeader(tabAfterText)); - twipCounter = WordprocessingMLUtil.StringToTwips((string)tabAfterText.Attribute(W.pos)); + twipCounter = (int)tabAfterText.Attribute(W.pos); currentElementIdx++; if (currentElementIdx >= contentToMeasure.Length) @@ -2210,13 +2209,13 @@ private static XElement AddDefaultTabsAfterLastTab(XElement tabs, int defaultTab var lastTabElement = tabs .Elements(W.tab) .Where(t => (string)t.Attribute(W.val) != "clear" && (string)t.Attribute(W.val) != "bar") - .OrderBy(t => WordprocessingMLUtil.StringToTwips((string)t.Attribute(W.pos))) + .OrderBy(t => (int)t.Attribute(W.pos)) .LastOrDefault(); if (lastTabElement != null) { if (defaultTabStop == 0) defaultTabStop = 720; - var rangeStart = WordprocessingMLUtil.StringToTwips((string)lastTabElement.Attribute(W.pos)) / defaultTabStop + 1; + var rangeStart = (int)lastTabElement.Attribute(W.pos) / defaultTabStop + 1; var tempTabs = new XElement(W.tabs, tabs.Elements().Where(t => (string)t.Attribute(W.val) != "clear" && (string)t.Attribute(W.val) != "bar"), Enumerable.Range(rangeStart, 100) @@ -2224,7 +2223,7 @@ private static XElement AddDefaultTabsAfterLastTab(XElement tabs, int defaultTab new XAttribute(W.val, "left"), new XAttribute(W.pos, r * defaultTabStop)))); tempTabs = new XElement(W.tabs, - tempTabs.Elements().OrderBy(t => WordprocessingMLUtil.StringToTwips((string)t.Attribute(W.pos)))); + tempTabs.Elements().OrderBy(t => (int)t.Attribute(W.pos))); return tempTabs; } else diff --git a/OpenXmlPowerTools/XlsxTables.cs b/OpenXmlPowerTools/XlsxTables.cs index 135b689e..e33225ad 100644 --- a/OpenXmlPowerTools/XlsxTables.cs +++ b/OpenXmlPowerTools/XlsxTables.cs @@ -60,7 +60,7 @@ public IEnumerable TableRows() return Parent .Rows() .Skip(headerRowsCount) - .PtSkipLast(totalRowsCount) + .SkipLast(totalRowsCount) .Where(r => { int rowId = Int32.Parse(r.RowId); diff --git a/OpenXmlPowerToolsExamples/ChartUpdater01/ChartUpdater01.csproj b/OpenXmlPowerToolsExamples/ChartUpdater01/ChartUpdater01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/ChartUpdater01/ChartUpdater01.csproj +++ b/OpenXmlPowerToolsExamples/ChartUpdater01/ChartUpdater01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentAssembler/DocumentAssembler.csproj b/OpenXmlPowerToolsExamples/DocumentAssembler/DocumentAssembler.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentAssembler/DocumentAssembler.csproj +++ b/OpenXmlPowerToolsExamples/DocumentAssembler/DocumentAssembler.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentAssembler01/DocumentAssembler01.csproj b/OpenXmlPowerToolsExamples/DocumentAssembler01/DocumentAssembler01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentAssembler01/DocumentAssembler01.csproj +++ b/OpenXmlPowerToolsExamples/DocumentAssembler01/DocumentAssembler01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentAssembler02/DocumentAssembler02.csproj b/OpenXmlPowerToolsExamples/DocumentAssembler02/DocumentAssembler02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentAssembler02/DocumentAssembler02.csproj +++ b/OpenXmlPowerToolsExamples/DocumentAssembler02/DocumentAssembler02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentAssembler03/DocumentAssembler03.csproj b/OpenXmlPowerToolsExamples/DocumentAssembler03/DocumentAssembler03.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentAssembler03/DocumentAssembler03.csproj +++ b/OpenXmlPowerToolsExamples/DocumentAssembler03/DocumentAssembler03.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentBuilder01/DocumentBuilder01.csproj b/OpenXmlPowerToolsExamples/DocumentBuilder01/DocumentBuilder01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentBuilder01/DocumentBuilder01.csproj +++ b/OpenXmlPowerToolsExamples/DocumentBuilder01/DocumentBuilder01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentBuilder02/DocumentBuilder02.csproj b/OpenXmlPowerToolsExamples/DocumentBuilder02/DocumentBuilder02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentBuilder02/DocumentBuilder02.csproj +++ b/OpenXmlPowerToolsExamples/DocumentBuilder02/DocumentBuilder02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentBuilder03/DocumentBuilder03.csproj b/OpenXmlPowerToolsExamples/DocumentBuilder03/DocumentBuilder03.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentBuilder03/DocumentBuilder03.csproj +++ b/OpenXmlPowerToolsExamples/DocumentBuilder03/DocumentBuilder03.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/DocumentBuilder04/DocumentBuilder04.csproj b/OpenXmlPowerToolsExamples/DocumentBuilder04/DocumentBuilder04.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/DocumentBuilder04/DocumentBuilder04.csproj +++ b/OpenXmlPowerToolsExamples/DocumentBuilder04/DocumentBuilder04.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/FieldRetriever01/FieldRetriever01.csproj b/OpenXmlPowerToolsExamples/FieldRetriever01/FieldRetriever01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/FieldRetriever01/FieldRetriever01.csproj +++ b/OpenXmlPowerToolsExamples/FieldRetriever01/FieldRetriever01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/FormattingAssembler01/FormattingAssembler01.csproj b/OpenXmlPowerToolsExamples/FormattingAssembler01/FormattingAssembler01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/FormattingAssembler01/FormattingAssembler01.csproj +++ b/OpenXmlPowerToolsExamples/FormattingAssembler01/FormattingAssembler01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/Formulas01/Formulas01.csproj b/OpenXmlPowerToolsExamples/Formulas01/Formulas01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/Formulas01/Formulas01.csproj +++ b/OpenXmlPowerToolsExamples/Formulas01/Formulas01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/HtmlConverter01/HtmlConverter01.csproj b/OpenXmlPowerToolsExamples/HtmlConverter01/HtmlConverter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/HtmlConverter01/HtmlConverter01.csproj +++ b/OpenXmlPowerToolsExamples/HtmlConverter01/HtmlConverter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/HtmlToWmlConverter01/HtmlToWmlConverter01.csproj b/OpenXmlPowerToolsExamples/HtmlToWmlConverter01/HtmlToWmlConverter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/HtmlToWmlConverter01/HtmlToWmlConverter01.csproj +++ b/OpenXmlPowerToolsExamples/HtmlToWmlConverter01/HtmlToWmlConverter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/HtmlToWmlConverter02/HtmlToWmlConverter02.csproj b/OpenXmlPowerToolsExamples/HtmlToWmlConverter02/HtmlToWmlConverter02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/HtmlToWmlConverter02/HtmlToWmlConverter02.csproj +++ b/OpenXmlPowerToolsExamples/HtmlToWmlConverter02/HtmlToWmlConverter02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/ListItemRetriever01/ListItemRetriever01.csproj b/OpenXmlPowerToolsExamples/ListItemRetriever01/ListItemRetriever01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/ListItemRetriever01/ListItemRetriever01.csproj +++ b/OpenXmlPowerToolsExamples/ListItemRetriever01/ListItemRetriever01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/MetricsGetter01/MetricsGetter01.csproj b/OpenXmlPowerToolsExamples/MetricsGetter01/MetricsGetter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/MetricsGetter01/MetricsGetter01.csproj +++ b/OpenXmlPowerToolsExamples/MetricsGetter01/MetricsGetter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/OpenXmlRegex01/OpenXmlRegex01.csproj b/OpenXmlPowerToolsExamples/OpenXmlRegex01/OpenXmlRegex01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/OpenXmlRegex01/OpenXmlRegex01.csproj +++ b/OpenXmlPowerToolsExamples/OpenXmlRegex01/OpenXmlRegex01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/PivotTables01/PivotTables01.csproj b/OpenXmlPowerToolsExamples/PivotTables01/PivotTables01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/PivotTables01/PivotTables01.csproj +++ b/OpenXmlPowerToolsExamples/PivotTables01/PivotTables01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/PresentationBuilder01/PresentationBuilder01.csproj b/OpenXmlPowerToolsExamples/PresentationBuilder01/PresentationBuilder01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/PresentationBuilder01/PresentationBuilder01.csproj +++ b/OpenXmlPowerToolsExamples/PresentationBuilder01/PresentationBuilder01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/PresentationBuilder02/PresentationBuilder02.csproj b/OpenXmlPowerToolsExamples/PresentationBuilder02/PresentationBuilder02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/PresentationBuilder02/PresentationBuilder02.csproj +++ b/OpenXmlPowerToolsExamples/PresentationBuilder02/PresentationBuilder02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/ReferenceAdder01/ReferenceAdder01.csproj b/OpenXmlPowerToolsExamples/ReferenceAdder01/ReferenceAdder01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/ReferenceAdder01/ReferenceAdder01.csproj +++ b/OpenXmlPowerToolsExamples/ReferenceAdder01/ReferenceAdder01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/RevisionAccepter01/RevisionAccepter01.csproj b/OpenXmlPowerToolsExamples/RevisionAccepter01/RevisionAccepter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/RevisionAccepter01/RevisionAccepter01.csproj +++ b/OpenXmlPowerToolsExamples/RevisionAccepter01/RevisionAccepter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/SmlDataRetriever01/SmlDataRetriever01.csproj b/OpenXmlPowerToolsExamples/SmlDataRetriever01/SmlDataRetriever01.csproj index e188d465..a56b9a71 100644 --- a/OpenXmlPowerToolsExamples/SmlDataRetriever01/SmlDataRetriever01.csproj +++ b/OpenXmlPowerToolsExamples/SmlDataRetriever01/SmlDataRetriever01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net60 diff --git a/OpenXmlPowerToolsExamples/SpreadsheetWriter01/SpreadsheetWriter01.csproj b/OpenXmlPowerToolsExamples/SpreadsheetWriter01/SpreadsheetWriter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/SpreadsheetWriter01/SpreadsheetWriter01.csproj +++ b/OpenXmlPowerToolsExamples/SpreadsheetWriter01/SpreadsheetWriter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/SpreadsheetWriter02/SpreadsheetWriter02.csproj b/OpenXmlPowerToolsExamples/SpreadsheetWriter02/SpreadsheetWriter02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/SpreadsheetWriter02/SpreadsheetWriter02.csproj +++ b/OpenXmlPowerToolsExamples/SpreadsheetWriter02/SpreadsheetWriter02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/TextReplacer01/TextReplacer01.csproj b/OpenXmlPowerToolsExamples/TextReplacer01/TextReplacer01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/TextReplacer01/TextReplacer01.csproj +++ b/OpenXmlPowerToolsExamples/TextReplacer01/TextReplacer01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/TextReplacer02/TextReplacer02.csproj b/OpenXmlPowerToolsExamples/TextReplacer02/TextReplacer02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/TextReplacer02/TextReplacer02.csproj +++ b/OpenXmlPowerToolsExamples/TextReplacer02/TextReplacer02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/WmlComparer01/WmlComparer01.csproj b/OpenXmlPowerToolsExamples/WmlComparer01/WmlComparer01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/WmlComparer01/WmlComparer01.csproj +++ b/OpenXmlPowerToolsExamples/WmlComparer01/WmlComparer01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/WmlComparer02/WmlComparer02.csproj b/OpenXmlPowerToolsExamples/WmlComparer02/WmlComparer02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/WmlComparer02/WmlComparer02.csproj +++ b/OpenXmlPowerToolsExamples/WmlComparer02/WmlComparer02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/WmlToHtmlConverter01/WmlToHtmlConverter01.csproj b/OpenXmlPowerToolsExamples/WmlToHtmlConverter01/WmlToHtmlConverter01.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/WmlToHtmlConverter01/WmlToHtmlConverter01.csproj +++ b/OpenXmlPowerToolsExamples/WmlToHtmlConverter01/WmlToHtmlConverter01.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0 diff --git a/OpenXmlPowerToolsExamples/WmlToHtmlConverter02/WmlToHtmlConverter02.csproj b/OpenXmlPowerToolsExamples/WmlToHtmlConverter02/WmlToHtmlConverter02.csproj index e188d465..32f8f4f2 100644 --- a/OpenXmlPowerToolsExamples/WmlToHtmlConverter02/WmlToHtmlConverter02.csproj +++ b/OpenXmlPowerToolsExamples/WmlToHtmlConverter02/WmlToHtmlConverter02.csproj @@ -1,7 +1,7 @@ Exe - net45;net46;netcoreapp2.0 + net452;net461;net6.0