Skip to content

Commit 1a0fc48

Browse files
authored
Enabled page breaks
2 parents 7d93a34 + bb515c0 commit 1a0fc48

11 files changed

+174
-53
lines changed

OpenXmlToHtml/OpenXmlToHtml.cs

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ public static async Task ConvertToHtmlAsync(string sourceOpenXmlFilePath, string
3333
await html.CopyToAsync(destinationHtmlFile).ConfigureAwait(false);
3434
}
3535

36-
3736
/// <summary>
3837
/// Converts docx to html
3938
/// </summary>
@@ -73,7 +72,7 @@ private static async Task<Stream> ConvertToHtmlInternalAsync(Stream sourceOpenXm
7372

7473
var htmlElement = WmlToHtmlConverter.ConvertToHtml(wordProcessingDocument, CreateHtmlConverterSettings(pageTitle));
7574

76-
var html = new XDocument(new XDocumentType("html", String.Empty, String.Empty, String.Empty), htmlElement);
75+
var html = new XDocument(new XDocumentType("html", string.Empty, string.Empty, string.Empty), htmlElement);
7776

7877
var memoryStreamHtml = new MemoryStream();
7978
html.Save(memoryStreamHtml);
@@ -83,16 +82,8 @@ private static async Task<Stream> ConvertToHtmlInternalAsync(Stream sourceOpenXm
8382

8483
private static WmlToHtmlConverterSettings CreateHtmlConverterSettings(string pageTitle)
8584
{
86-
var settings = new WmlToHtmlConverterSettings(new DefaultImageHandler(), new WordprocessingTextSymbolToUnicodeHandler(), new SymbolHandler())
87-
{
88-
GeneralCss = string.Empty,
89-
AdditionalCss = "@page { size: A4 } body { margin: 1cm auto; max-width: 20cm; padding: 0; }",
90-
PageTitle = pageTitle,
91-
FabricateCssClasses = true,
92-
CssClassPrefix = "Codeuctivity-",
93-
RestrictToSupportedLanguages = false,
94-
RestrictToSupportedNumberingFormats = false
95-
};
85+
var settings = new WmlToHtmlConverterSettings(pageTitle, new ImageHandler(), new TextSymbolToUnicodeHandler(), new SymbolHandler(), new PageBreakHandler(new BreakHandler()), true, string.Empty, "@page { size: A4 } body { margin: 1cm auto; max-width: 20cm; padding: 0; }", "Codeuctivity-");
86+
9687
return settings;
9788
}
9889
}

OpenXmlToHtml/OpenXmlToHtml.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@
3535

3636
<ItemGroup>
3737
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="all" />
38-
<PackageReference Include="OpenXmlPowerToolsStandard" Version="5.0.57-prerelease" />
39-
<PackageReference Include="SonarAnalyzer.CSharp" Version="8.17.0.26580">
38+
<PackageReference Include="OpenXmlPowerToolsStandard" Version="5.0.65-prerelease" />
39+
<PackageReference Include="SonarAnalyzer.CSharp" Version="8.18.0.27296">
4040
<PrivateAssets>all</PrivateAssets>
4141
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
4242
</PackageReference>

OpenXmlToHtml/PageBreakHandler.cs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
using OpenXmlPowerTools;
2+
using OpenXmlPowerTools.OpenXMLWordprocessingMLToHtmlConverter;
3+
using System.Collections.Generic;
4+
using System.Xml.Linq;
5+
6+
namespace Codeuctivity.OpenXmlToHtml
7+
{
8+
/// <summary>
9+
///
10+
/// </summary>
11+
public class PageBreakHandler : IBreakHandler
12+
{
13+
/// <summary>
14+
/// DefaultBreakHandler is used if TransformBreak is not applied to a page break
15+
/// </summary>
16+
public IBreakHandler DefaultBreakHandler { get; }
17+
18+
/// <summary>
19+
///
20+
/// </summary>
21+
/// <param name="defaultBreakHandler"></param>
22+
public PageBreakHandler(IBreakHandler defaultBreakHandler)
23+
{
24+
DefaultBreakHandler = defaultBreakHandler;
25+
}
26+
27+
/// <summary>
28+
/// Default handler that transforms breaks into some HTML specific equivalent
29+
/// </summary>
30+
/// <param name="element"></param>
31+
/// <returns></returns>
32+
public IEnumerable<XNode> TransformBreak(XElement element)
33+
{
34+
if (element.Attribute(W.type)?.Value == "page")
35+
{
36+
var pageBreakDiv = new XElement(Xhtml.div);
37+
pageBreakDiv.Add(new XAttribute(H.Style, "break-before: page;"));
38+
return new XNode[] { pageBreakDiv };
39+
}
40+
41+
return DefaultBreakHandler.TransformBreak(element);
42+
}
43+
}
44+
}

OpenXmlToHtml/SymbolHandler.cs

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66
namespace Codeuctivity.OpenXmlToHtml
77
{
88
/// <summary>
9-
/// Default handler that transforms every symbol into some html encoded font specific char
9+
/// Handler that transforms every symbol into some HTML encoded font specific char
1010
/// </summary>
11-
public class SymbolHandler : IWordprocessingSymbolHandler
11+
public class SymbolHandler : ISymbolHandler
1212
{
13+
private OpenXmlPowerTools.OpenXMLWordprocessingMLToHtmlConverter.SymbolHandler DefaultSymbolHandler { get; set; }
1314

14-
15-
private DefaultSymbolHandler DefaultSymbolHandler { get; set; }
1615
/// <summary>
17-
/// Dictonary codes from symbol char to unicode
16+
/// Dictionary codes from symbol char to Unicode
1817
/// </summary>
1918
public static Dictionary<string, string> SymbolCharDictonary => new Dictionary<string, string>
2019
{
@@ -82,7 +81,7 @@ public class SymbolHandler : IWordprocessingSymbolHandler
8281
{"F05D","]"},
8382
{"F05E","⊥"},
8483
{"F05F","_"},
85-
// Not sure, but I think that overline has a context specific effect on the following char
84+
// Not sure, but I think that overline has a context specific effect on the following char
8685
{"F060","\u0305"},
8786
{"F061","α"},
8887
{"F062","β"},
@@ -244,8 +243,9 @@ public class SymbolHandler : IWordprocessingSymbolHandler
244243
{"F0FE","⎭"},
245244
{"F0FF","□"}
246245
};
246+
247247
/// <summary>
248-
/// Dictonary codes from wingdings char to unicode
248+
/// Dictonary codes from wingdings char to unicode
249249
/// </summary>
250250
public static Dictionary<string, string> WingdingsCharDictonary => new Dictionary<string, string>
251251
{
@@ -474,13 +474,12 @@ public class SymbolHandler : IWordprocessingSymbolHandler
474474
{"F0FE","🗹"},
475475
};
476476

477-
478477
/// <summary>
479478
/// Default ctor
480479
/// </summary>
481480
public SymbolHandler()
482481
{
483-
DefaultSymbolHandler = new DefaultSymbolHandler();
482+
DefaultSymbolHandler = new OpenXmlPowerTools.OpenXMLWordprocessingMLToHtmlConverter.SymbolHandler();
484483
}
485484

486485
/// <summary>

OpenXmlToHtml/WordprocessingTextSymbolToUnicodeHandler.cs renamed to OpenXmlToHtml/TextSymbolToUnicodeHandler.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ namespace Codeuctivity.OpenXmlToHtml
66
/// <summary>
77
/// Replaces any char of wingdings with the Unicode equivalent
88
/// </summary>
9-
public class WordprocessingTextSymbolToUnicodeHandler : IWordprocessingTextHandler
9+
public class TextSymbolToUnicodeHandler : ITextHandler
1010
{
1111
/// <summary>
12-
/// Dictonary used to translate symbol chars to unicode
12+
/// Dictionary used to translate symbol chars to Unicode
1313
/// </summary>
1414
private static readonly Dictionary<char, char> SymbolToUnicode = new Dictionary<char, char>
1515
{
@@ -28,6 +28,7 @@ public string TransformText(string text, Dictionary<string, string> fontFamily)
2828
text = text.Replace(item.Key, item.Value);
2929
}
3030
}
31+
3132
return text;
3233
}
3334
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
using Codeuctivity.OpenXmlToHtml;
2+
using Moq;
3+
using OpenXmlPowerTools;
4+
using OpenXmlPowerTools.OpenXMLWordprocessingMLToHtmlConverter;
5+
using System.Linq;
6+
using System.Xml.Linq;
7+
using Xunit;
8+
9+
namespace OpenXmlToHtmlTests
10+
{
11+
public class BreakHandlerAdapterTests
12+
{
13+
[Fact]
14+
public void ShouldTranslatePageBreaks()
15+
{
16+
var breakHandler = new Mock<IBreakHandler>();
17+
var breakHandlerAdapter = new PageBreakHandler(breakHandler.Object);
18+
19+
var element = new XElement("br", new XAttribute(W.type, "page"));
20+
21+
var actual = breakHandlerAdapter.TransformBreak(element);
22+
23+
Assert.Equal("<div Style=\"break-before: page;\" xmlns=\"http://www.w3.org/1999/xhtml\" />", actual.Single().ToString());
24+
breakHandler.Verify(m => m.TransformBreak(It.IsAny<XElement>()), Times.Never());
25+
}
26+
27+
[Fact]
28+
public void ShouldTranslatePage()
29+
{
30+
var breakHandler = new Mock<IBreakHandler>();
31+
var breakHandlerAdapter = new PageBreakHandler(breakHandler.Object);
32+
33+
var element = new XElement("br");
34+
35+
breakHandlerAdapter.TransformBreak(element);
36+
37+
breakHandler.Verify(m => m.TransformBreak(element), Times.Once);
38+
}
39+
}
40+
}
Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
using Codeuctivity.OpenXmlToHtml;
2-
using OpenXmlPowerTools;
3-
using System.Collections.Generic;
2+
using Codeuctivity.PuppeteerSharp;
3+
using PdfSharp.Pdf.IO;
44
using System.IO;
55
using System.Threading.Tasks;
6-
using System.Xml.Linq;
76
using Xunit;
87

98
namespace OpenXmlToHtmlTests
@@ -13,9 +12,9 @@ public class OpenXmlToHtmlTests
1312
[Theory]
1413
[InlineData("EmptyDocument.docx", 0)]
1514
[InlineData("WingdingsSymbols.docx", 71000)]
16-
[InlineData("BasicTextFormated.docx", 0)]
17-
[InlineData("Images.docx", 0)]
18-
public async Task ShouldConvertDocumentIntegrativeTest(string testFileName, int allowedPixelErrorCount)
15+
[InlineData("BasicTextFormated.docx", 50)]
16+
[InlineData("Images.docx", 5)]
17+
public async Task ShouldConvertDocumentIntegrativeWithKnownAberrancyTest(string testFileName, int allowedPixelErrorCount)
1918
{
2019
var sourceOpenXmlFilePath = $"../../../TestInput/{testFileName}";
2120
var actualHtmlFilePath = Path.Combine(Path.GetTempPath(), $"Actual{testFileName}.html");
@@ -30,35 +29,30 @@ public async Task ShouldConvertDocumentIntegrativeTest(string testFileName, int
3029
await DocumentAsserter.AssertRenderedHtmlIsEqual(actualHtmlFilePath, expectedHtmlFilePath, allowedPixelErrorCount);
3130
}
3231

33-
34-
[Theory]
35-
[InlineData("1", "•1", "Symbol")]
36-
[InlineData("1", "1", "arial")]
37-
public void ShouldTranslateTextWithCustomGlyphToUnicode(string original, string expectedEquivalent, string fontFamily)
32+
[Fact]
33+
public async Task ShouldConvertDocumentIntegrativeWithToExpectedPageQuantityTest()
3834
{
39-
var currentStyle = new Dictionary<string, string> { { "font-family", fontFamily } };
35+
var testFileName = "TwoPages.docx";
36+
var sourceOpenXmlFilePath = $"../../../TestInput/{testFileName}";
37+
var actualHtmlFilePath = Path.Combine(Path.GetTempPath(), $"Actual{testFileName}.html");
4038

41-
var WordprocessingTextSymbolToUnicodeHandler = new WordprocessingTextSymbolToUnicodeHandler();
39+
if (File.Exists(actualHtmlFilePath))
40+
{
41+
File.Delete(actualHtmlFilePath);
42+
}
4243

43-
var actual = WordprocessingTextSymbolToUnicodeHandler.TransformText(original, currentStyle);
44+
await OpenXmlToHtml.ConvertToHtmlAsync(sourceOpenXmlFilePath, actualHtmlFilePath);
4445

45-
Assert.Equal(expectedEquivalent, actual);
46+
await using var chromiumRenderer = await Renderer.CreateAsync();
47+
var pathPdfizedHtml = actualHtmlFilePath + ".pdf";
48+
await chromiumRenderer.ConvertHtmlToPdf(actualHtmlFilePath, pathPdfizedHtml);
49+
AssertPdfPageCount(pathPdfizedHtml, 2);
4650
}
4751

48-
[Fact]
49-
public void ShouldTranslateSymbolsToUnicode()
52+
private static void AssertPdfPageCount(string pathPdfizedHtml, int expectePageQuantity)
5053
{
51-
var fontFamily = new Dictionary<string, string>
52-
{
53-
{ "font-family", "Symbol" }
54-
};
55-
56-
var symbolHandler = new SymbolHandler();
57-
var element = new XElement("symbol", new XAttribute(W._char, "F0D7"));
58-
59-
var actual = symbolHandler.TransformSymbol(element, fontFamily);
60-
61-
Assert.Equal("<span xmlns=\"http://www.w3.org/1999/xhtml\">⋅</span>", actual.ToString());
54+
var pdfReader = PdfReader.Open(pathPdfizedHtml, PdfDocumentOpenMode.ReadOnly);
55+
Assert.Equal(expectePageQuantity, pdfReader.PageCount);
6256
}
6357
}
6458
}

OpenXmlToHtmlTests/OpenXmlToHtmlTests.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
<ItemGroup>
1313
<PackageReference Include="ImageSharpCompare" Version="1.1.30" />
14+
<PackageReference Include="Moq" Version="4.16.0" />
15+
<PackageReference Include="PdfSharp.netstandard" Version="1.3.2" />
1416
<PackageReference Include="PuppeteerSharp.Renderer" Version="1.0.8-prerelease" />
1517
<PackageReference Include="SonarAnalyzer.CSharp" Version="8.17.0.26580">
1618
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
using Codeuctivity.OpenXmlToHtml;
2+
using OpenXmlPowerTools;
3+
using System.Collections.Generic;
4+
using System.Xml.Linq;
5+
using Xunit;
6+
7+
namespace OpenXmlToHtmlTests
8+
{
9+
public class SymbolHandlerTests
10+
{
11+
[Fact]
12+
public void ShouldTranslateSymbolsToUnicode()
13+
{
14+
var fontFamily = new Dictionary<string, string>
15+
{
16+
{ "font-family", "Symbol" }
17+
};
18+
19+
var symbolHandler = new SymbolHandler();
20+
var element = new XElement("symbol", new XAttribute(W._char, "F0D7"));
21+
22+
var actual = symbolHandler.TransformSymbol(element, fontFamily);
23+
24+
Assert.Equal("<span xmlns=\"http://www.w3.org/1999/xhtml\">⋅</span>", actual.ToString());
25+
}
26+
}
27+
}
4.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)