Skip to content

Commit

Permalink
Add parsing of html image in base64 format (PHPOffice#1382)
Browse files Browse the repository at this point in the history
* increased test coverage of new lines
* added exception control to file_get_contents error
* update changelog
  • Loading branch information
javier authored and troosan committed May 30, 2018
1 parent 82f3a2a commit 1a06173
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ v0.15.0 (?? ??? 2018)
- Add support for table indent (tblInd) @Trainmaster #1343
- Added parsing of internal links in HTML reader @lalop #1336
- Several improvements to charts @JAEK-S #1332
- Add parsing of html image in base64 format @jgpATs2w #1382

### Fixed
- Fix reading of docx default style - @troosan #1238
Expand Down
10 changes: 10 additions & 0 deletions samples/resources/Sample_30_ReadHTML.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,15 @@ <h1>Adding element via HTML</h1>
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
<p>Ordered (numbered) list:</p>
<ol><li>Item 1</li><li>Item 2</li></ol>

<p style="line-height:2">Double height</p>

<h2>Includes images</h2>
<img src="https://phpword.readthedocs.io/en/latest/_images/phpword.png" alt=""/>

<img src="https://localhost/gev/desarrollo/actividades/pruebas_14/5b064503587f7.jpeg" name="Imagen 12" align="bottom" width="208" height="183" border="0"/>
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b064503589db.png" name="Imagen 13" align="bottom" width="143" height="202" border="0"/>
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b0645035aac8.jpeg" name="Imagen 14" align="bottom" width="194" height="188" border="0"/>

</body>
</html>
Empty file modified samples/results/.gitignore
100644 → 100755
Empty file.
56 changes: 54 additions & 2 deletions src/PhpWord/Shared/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use PhpOffice\PhpWord\Element\AbstractContainer;
use PhpOffice\PhpWord\Element\Row;
use PhpOffice\PhpWord\Element\Table;
use PhpOffice\PhpWord\Settings;
use PhpOffice\PhpWord\SimpleType\Jc;
use PhpOffice\PhpWord\SimpleType\NumberFormat;

Expand All @@ -32,6 +33,7 @@ class Html
{
private static $listIndex = 0;
private static $xpath;
private static $options;

/**
* Add HTML parts.
Expand All @@ -44,13 +46,17 @@ class Html
* @param string $html The code to parse
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
* @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed
* @param array $options:
* + IMG_SRC_SEARCH: optional to speed up images loading from remote url when files can be found locally
* + IMG_SRC_REPLACE: optional to speed up images loading from remote url when files can be found locally
*/
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true)
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null)
{
/*
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
* which could be applied when such an element occurs in the parseNode function.
*/
self::$options = $options;

// Preprocess: remove all line ends, decode HTML entity,
// fix ampersand and angle brackets and add body tag for HTML fragments
Expand Down Expand Up @@ -141,6 +147,7 @@ protected static function parseNode($node, $element, $styles = array(), $data =
'sup' => array('Property', null, null, $styles, null, 'superScript', true),
'sub' => array('Property', null, null, $styles, null, 'subScript', true),
'span' => array('Span', $node, null, $styles, null, null, null),
'font' => array('Span', $node, null, $styles, null, null, null),
'table' => array('Table', $node, $element, $styles, null, null, null),
'tr' => array('Row', $node, $element, $styles, null, null, null),
'td' => array('Cell', $node, $element, $styles, null, null, null),
Expand Down Expand Up @@ -648,7 +655,52 @@ private static function parseImage($node, $element)
break;
}
}
$newElement = $element->addImage($src, $style);
$originSrc = $src;
if (strpos($src, 'data:image') !== false) {
$tmpDir = Settings::getTempDir() . '/';

$match = array();
preg_match('/data:image\/(\w+);base64,(.+)/', $src, $match);

$src = $imgFile = $tmpDir . uniqid() . '.' . $match[1];

$ifp = fopen($imgFile, 'wb');

if ($ifp !== false) {
fwrite($ifp, base64_decode($match[2]));
fclose($ifp);
}
}
$src = urldecode($src);

if (!is_file($src)
&& !is_null(self::$options)
&& isset(self::$options['IMG_SRC_SEARCH'])
&& isset(self::$options['IMG_SRC_REPLACE'])) {
$src = str_replace(self::$options['IMG_SRC_SEARCH'], self::$options['IMG_SRC_REPLACE'], $src);
}

if (!is_file($src)) {
if ($imgBlob = @file_get_contents($src)) {
$tmpDir = Settings::getTempDir() . '/';
$match = array();
preg_match('/.+\.(\w+)$/', $src, $match);
$src = $tmpDir . uniqid() . '.' . $match[1];

$ifp = fopen($src, 'wb');

if ($ifp !== false) {
fwrite($ifp, $imgBlob);
fclose($ifp);
}
}
}

if (is_file($src)) {
$newElement = $element->addImage($src, $style);
} else {
throw new \Exception("Could not load image $originSrc");
}

return $newElement;
}
Expand Down
87 changes: 87 additions & 0 deletions tests/PhpWord/Shared/HtmlTest.php

Large diffs are not rendered by default.

0 comments on commit 1a06173

Please sign in to comment.