Merge pull request #23824 from woocommerce/fix/23799

Update Emogrifier library
This commit is contained in:
Claudio Sanches 2019-06-18 16:03:40 -03:00 committed by GitHub
commit e18481e795
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 132 additions and 287 deletions

View File

@ -2,6 +2,7 @@
namespace Pelago\Emogrifier;
use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
@ -21,7 +22,7 @@ use Symfony\Component\CssSelector\Exception\SyntaxErrorException;
* @author Sander Kruger <s.kruger@invessel.com>
* @author Zoli Szabó <zoli.szabo+github@gmail.com>
*/
class CssInliner
class CssInliner extends AbstractHtmlProcessor
{
/**
* @var int
@ -53,26 +54,6 @@ class CssInliner
*/
const PSEUDO_CLASS_MATCHER = '\\S+\\-(?:child|type\\()|not\\([[:ascii:]]*\\)';
/**
* @var string
*/
const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
/**
* @var string
*/
const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
/**
* @var \DOMDocument
*/
protected $domDocument = null;
/**
* @var string
*/
private $css = '';
/**
* @var bool[]
*/
@ -81,7 +62,7 @@ class CssInliner
/**
* @var string[]
*/
private $unprocessableHtmlTags = ['wbr'];
private $unprocessableHtmlTags = [];
/**
* @var bool[]
@ -170,90 +151,15 @@ class CssInliner
private $debug = false;
/**
* @param string $unprocessedHtml raw HTML, must be UTF-encoded, must not be empty
*
* @throws \InvalidArgumentException if $unprocessedHtml is anything other than a non-empty string
* @return CssSelectorConverter
*/
public function __construct($unprocessedHtml)
private function getCssSelectorConverter()
{
if (!\is_string($unprocessedHtml)) {
throw new \InvalidArgumentException('The provided HTML must be a string.', 1540403176);
}
if ($unprocessedHtml === '') {
throw new \InvalidArgumentException('The provided HTML must not be empty.', 1540403181);
if ($this->cssSelectorConverter === null) {
$this->cssSelectorConverter = new CssSelectorConverter();
}
$this->cssSelectorConverter = new CssSelectorConverter();
$this->setHtml($unprocessedHtml);
}
/**
* Sets the HTML to process.
*
* @param string $html the HTML to process, must be UTF-8-encoded
*
* @return void
*/
private function setHtml($html)
{
$this->createUnifiedDomDocument($html);
}
/**
* Provides access to the internal DOMDocument representation of the HTML in its current state.
*
* @return \DOMDocument
*/
public function getDomDocument()
{
return $this->domDocument;
}
/**
* Sets the CSS to merge with the HTML.
*
* @param string $css the CSS to merge, must be UTF-8-encoded
*
* @return void
*/
public function setCss($css)
{
$this->css = $css;
}
/**
* Renders the normalized and processed HTML.
*
* @return string
*/
public function render()
{
return $this->domDocument->saveHTML();
}
/**
* Renders the content of the BODY element of the normalized and processed HTML.
*
* @return string
*/
public function renderBodyContent()
{
$bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement());
return \str_replace(['<body>', '</body>'], '', $bodyNodeHtml);
}
/**
* Returns the BODY element.
*
* This method assumes that there always is a BODY element.
*
* @return \DOMElement
*/
private function getBodyElement()
{
return $this->domDocument->getElementsByTagName('body')->item(0);
return $this->cssSelectorConverter;
}
/**
@ -269,101 +175,15 @@ class CssInliner
}
/**
* Applies $this->css to the given HTML and returns the HTML with the CSS
* applied.
* Inlines the given CSS into the existing HTML.
*
* This method places the CSS inline.
* @param string $css the CSS to inline, must be UTF-8-encoded
*
* @return string
* @return CssInliner fluent interface
*
* @throws SyntaxErrorException
*/
public function emogrify()
{
$this->process();
return $this->render();
}
/**
* Applies $this->css to the given HTML and returns only the HTML content
* within the <body> tag.
*
* This method places the CSS inline.
*
* @return string
*
* @throws SyntaxErrorException
*/
public function emogrifyBodyContent()
{
$this->process();
return $this->renderBodyContent();
}
/**
* Creates a DOM document from the given HTML and stores it in $this->domDocument.
*
* The DOM document will always have a BODY element and a document type.
*
* @param string $html
*
* @return void
*/
private function createUnifiedDomDocument($html)
{
$this->createRawDomDocument($html);
$this->ensureExistenceOfBodyElement();
}
/**
* Creates a DOMDocument instance from the given HTML and stores it in $this->domDocument.
*
* @param string $html
*
* @return void
*/
private function createRawDomDocument($html)
{
$domDocument = new \DOMDocument();
$domDocument->encoding = 'UTF-8';
$domDocument->strictErrorChecking = false;
$domDocument->formatOutput = true;
$libXmlState = \libxml_use_internal_errors(true);
$domDocument->loadHTML($this->prepareHtmlForDomConversion($html));
\libxml_clear_errors();
\libxml_use_internal_errors($libXmlState);
$domDocument->normalizeDocument();
$this->domDocument = $domDocument;
}
/**
* Returns the HTML with added document type and Content-Type meta tag if needed,
* ensuring that the HTML will be good for creating a DOM document from it.
*
* @param string $html
*
* @return string the unified HTML
*/
private function prepareHtmlForDomConversion($html)
{
$htmlWithDocumentType = $this->ensureDocumentType($html);
return $this->addContentTypeMetaTag($htmlWithDocumentType);
}
/**
* Applies $this->css to $this->domDocument.
*
* This method places the CSS inline.
*
* @return void
*
* @throws SyntaxErrorException
*/
protected function process()
public function inlineCss($css)
{
$this->clearAllCaches();
$this->purgeVisitedNodes();
@ -372,18 +192,19 @@ class CssInliner
$this->removeUnprocessableTags();
$this->normalizeStyleAttributesOfAllNodes($xPath);
// grab any existing style blocks from the html and append them to the existing CSS
$combinedCss = $css;
// grab any existing style blocks from the HTML and append them to the existing CSS
// (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
$allCss = $this->css;
if ($this->isStyleBlocksParsingEnabled) {
$allCss .= $this->getCssFromAllStyleNodes($xPath);
$combinedCss .= $this->getCssFromAllStyleNodes($xPath);
}
$excludedNodes = $this->getNodesToExclude($xPath);
$cssRules = $this->parseCssRules($allCss);
$cssRules = $this->parseCssRules($combinedCss);
$cssSelectorConverter = $this->getCssSelectorConverter();
foreach ($cssRules['inlineable'] as $cssRule) {
try {
$nodesMatchingCssSelectors = $xPath->query($this->cssSelectorConverter->toXPath($cssRule['selector']));
$nodesMatchingCssSelectors = $xPath->query($cssSelectorConverter->toXPath($cssRule['selector']));
} catch (SyntaxErrorException $e) {
if ($this->debug) {
throw $e;
@ -408,6 +229,8 @@ class CssInliner
$this->removeImportantAnnotationFromAllInlineStyles($xPath);
$this->copyUninlineableCssToStyleNode($xPath, $cssRules['uninlineable']);
return $this;
}
/**
@ -609,9 +432,9 @@ class CssInliner
/**
* Disables the removal of elements with `display: none` properties.
*
* @deprecated will be removed in Emogrifier 3.0
*
* @return void
*
* @deprecated will be removed in Emogrifier 3.0
*/
public function disableInvisibleNodeRemoval()
{
@ -997,7 +820,7 @@ class CssInliner
private function existsMatchForCssSelector(\DOMXPath $xPath, $cssSelector)
{
try {
$nodesMatchingSelector = $xPath->query($this->cssSelectorConverter->toXPath($cssSelector));
$nodesMatchingSelector = $xPath->query($this->getCssSelectorConverter()->toXPath($cssSelector));
} catch (SyntaxErrorException $e) {
if ($this->debug) {
throw $e;
@ -1038,7 +861,7 @@ class CssInliner
*
* This method is protected to allow overriding.
*
* @see https://github.com/jjriv/emogrifier/issues/103
* @see https://github.com/MyIntervals/emogrifier/issues/103
*
* @param string $css
*
@ -1055,21 +878,6 @@ class CssInliner
$headElement->appendChild($styleElement);
}
/**
* Checks that $this->domDocument has a BODY element and adds it if it is missing.
*
* @return void
*/
private function ensureExistenceOfBodyElement()
{
if ($this->domDocument->getElementsByTagName('body')->item(0) !== null) {
return;
}
$htmlElement = $this->domDocument->getElementsByTagName('html')->item(0);
$htmlElement->appendChild($this->domDocument->createElement('body'));
}
/**
* Splits input CSS code into an array of parts for different media querues, in order.
* Each part is an array where:
@ -1154,7 +962,11 @@ class CssInliner
private function removeUnprocessableTags()
{
foreach ($this->unprocessableHtmlTags as $tagName) {
$nodes = $this->domDocument->getElementsByTagName($tagName);
// Deleting nodes from a 'live' NodeList invalidates iteration on it, so a copy must be made to iterate.
$nodes = [];
foreach ($this->domDocument->getElementsByTagName($tagName) as $node) {
$nodes[] = $node;
}
/** @var \DOMNode $node */
foreach ($nodes as $node) {
$hasContent = $node->hasChildNodes() || $node->hasChildNodes();
@ -1165,59 +977,6 @@ class CssInliner
}
}
/**
* Makes sure that the passed HTML has a document type.
*
* @param string $html
*
* @return string HTML with document type
*/
private function ensureDocumentType($html)
{
$hasDocumentType = \stripos($html, '<!DOCTYPE') !== false;
if ($hasDocumentType) {
return $html;
}
return static::DEFAULT_DOCUMENT_TYPE . $html;
}
/**
* Adds a Content-Type meta tag for the charset.
*
* This method also ensures that there is a HEAD element.
*
* @param string $html
*
* @return string the HTML with the meta tag added
*/
private function addContentTypeMetaTag($html)
{
$hasContentTypeMetaTag = \stripos($html, 'Content-Type') !== false;
if ($hasContentTypeMetaTag) {
return $html;
}
// We are trying to insert the meta tag to the right spot in the DOM.
// If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
$hasHeadTag = \stripos($html, '<head') !== false;
$hasHtmlTag = \stripos($html, '<html') !== false;
if ($hasHeadTag) {
$reworkedHtml = \preg_replace('/<head(.*?)>/i', '<head$1>' . static::CONTENT_TYPE_META_TAG, $html);
} elseif ($hasHtmlTag) {
$reworkedHtml = \preg_replace(
'/<html(.*?)>/i',
'<html$1><head>' . static::CONTENT_TYPE_META_TAG . '</head>',
$html
);
} else {
$reworkedHtml = static::CONTENT_TYPE_META_TAG . $html;
}
return $reworkedHtml;
}
/**
* @param string[] $a
* @param string[] $b
@ -1317,7 +1076,7 @@ class CssInliner
$excludedNodes = [];
foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) {
try {
$matchingNodes = $xPath->query($this->cssSelectorConverter->toXPath($selectorToExclude));
$matchingNodes = $xPath->query($this->getCssSelectorConverter()->toXPath($selectorToExclude));
} catch (SyntaxErrorException $e) {
if ($this->debug) {
throw $e;

View File

@ -23,6 +23,15 @@ abstract class AbstractHtmlProcessor
*/
const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
/**
* @var string Regular expression part to match tag names that PHP's DOMDocument implementation is not aware are
* self-closing. These are mostly HTML5 elements, but for completeness <command> (obsolete) and <keygen>
* (deprecated) are also included.
*
* @see https://bugs.php.net/bug.php?id=73175
*/
const PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER = '(?:command|embed|keygen|source|track|wbr)';
/**
* @var \DOMDocument
*/
@ -74,7 +83,9 @@ abstract class AbstractHtmlProcessor
*/
public function render()
{
return $this->domDocument->saveHTML();
$htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML();
return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
}
/**
@ -84,11 +95,24 @@ abstract class AbstractHtmlProcessor
*/
public function renderBodyContent()
{
$bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement());
$htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML($this->getBodyElement());
$bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
return \str_replace(['<body>', '</body>'], '', $bodyNodeHtml);
}
/**
* Eliminates any invalid closing tags for void elements from the given HTML.
*
* @param string $html
*
* @return string
*/
private function removeSelfClosingTagsClosingTags($html)
{
return \preg_replace('%</' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '>%', '', $html);
}
/**
* Returns the BODY element.
*
@ -137,7 +161,7 @@ abstract class AbstractHtmlProcessor
}
/**
* Returns the HTML with added document type and Content-Type meta tag if needed,
* Returns the HTML with added document type, Content-Type meta tag, and self-closing slashes, if needed,
* ensuring that the HTML will be good for creating a DOM document from it.
*
* @param string $html
@ -146,7 +170,8 @@ abstract class AbstractHtmlProcessor
*/
private function prepareHtmlForDomConversion($html)
{
$htmlWithDocumentType = $this->ensureDocumentType($html);
$htmlWithSelfClosingSlashes = $this->ensurePhpUnrecognizedSelfClosingTagsAreXml($html);
$htmlWithDocumentType = $this->ensureDocumentType($htmlWithSelfClosingSlashes);
return $this->addContentTypeMetaTag($htmlWithDocumentType);
}
@ -172,7 +197,7 @@ abstract class AbstractHtmlProcessor
* Adds a Content-Type meta tag for the charset.
*
* This method also ensures that there is a HEAD element.
*
* @param string $html
*
* @return string the HTML with the meta tag added
@ -204,6 +229,23 @@ abstract class AbstractHtmlProcessor
return $reworkedHtml;
}
/**
* Makes sure that any self-closing tags not recognized as such by PHP's DOMDocument implementation have a
* self-closing slash.
*
* @param string $html
*
* @return string HTML with problematic tags converted.
*/
private function ensurePhpUnrecognizedSelfClosingTagsAreXml($html)
{
return \preg_replace(
'%<' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?<!/)(?=>)%',
'$0/',
$html
);
}
/**
* Checks that $this->domDocument has a BODY element and adds it if it is missing.
*

View File

@ -85,6 +85,15 @@ class Emogrifier
*/
const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
/**
* @var string Regular expression part to match tag names that PHP's DOMDocument implementation is not aware are
* self-closing. These are mostly HTML5 elements, but for completeness <command> (obsolete) and <keygen>
* (deprecated) are also included.
*
* @see https://bugs.php.net/bug.php?id=73175
*/
const PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER = '(?:command|embed|keygen|source|track|wbr)';
/**
* @var \DOMDocument
*/
@ -321,7 +330,9 @@ class Emogrifier
*/
protected function render()
{
return $this->domDocument->saveHTML();
$htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML();
return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
}
/**
@ -331,11 +342,24 @@ class Emogrifier
*/
protected function renderBodyContent()
{
$bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement());
$htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML($this->getBodyElement());
$bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
return \str_replace(['<body>', '</body>'], '', $bodyNodeHtml);
}
/**
* Eliminates any invalid closing tags for void elements from the given HTML.
*
* @param string $html
*
* @return string
*/
private function removeSelfClosingTagsClosingTags($html)
{
return \preg_replace('%</' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '>%', '', $html);
}
/**
* Returns the BODY element.
*
@ -437,20 +461,18 @@ class Emogrifier
private function createRawDomDocument($html)
{
$domDocument = new \DOMDocument();
$domDocument->encoding = 'UTF-8';
$domDocument->strictErrorChecking = false;
$domDocument->formatOutput = true;
$libXmlState = \libxml_use_internal_errors(true);
$domDocument->loadHTML($this->prepareHtmlForDomConversion($html));
\libxml_clear_errors();
\libxml_use_internal_errors($libXmlState);
$domDocument->normalizeDocument();
$this->domDocument = $domDocument;
}
/**
* Returns the HTML with added document type and Content-Type meta tag if needed,
* Returns the HTML with added document type, Content-Type meta tag, and self-closing slashes, if needed,
* ensuring that the HTML will be good for creating a DOM document from it.
*
* @param string $html
@ -459,7 +481,8 @@ class Emogrifier
*/
private function prepareHtmlForDomConversion($html)
{
$htmlWithDocumentType = $this->ensureDocumentType($html);
$htmlWithSelfClosingSlashes = $this->ensurePhpUnrecognizedSelfClosingTagsAreXml($html);
$htmlWithDocumentType = $this->ensureDocumentType($htmlWithSelfClosingSlashes);
return $this->addContentTypeMetaTag($htmlWithDocumentType);
}
@ -1402,7 +1425,7 @@ class Emogrifier
*
* This method is protected to allow overriding.
*
* @see https://github.com/jjriv/emogrifier/issues/103
* @see https://github.com/MyIntervals/emogrifier/issues/103
*
* @param string $css
*
@ -1518,7 +1541,11 @@ class Emogrifier
private function removeUnprocessableTags()
{
foreach ($this->unprocessableHtmlTags as $tagName) {
$nodes = $this->domDocument->getElementsByTagName($tagName);
// Deleting nodes from a 'live' NodeList invalidates iteration on it, so a copy must be made to iterate.
$nodes = [];
foreach ($this->domDocument->getElementsByTagName($tagName) as $node) {
$nodes[] = $node;
}
/** @var \DOMNode $node */
foreach ($nodes as $node) {
$hasContent = $node->hasChildNodes() || $node->hasChildNodes();
@ -1582,6 +1609,23 @@ class Emogrifier
return $reworkedHtml;
}
/**
* Makes sure that any self-closing tags not recognized as such by PHP's DOMDocument implementation have a
* self-closing slash.
*
* @param string $html
*
* @return string HTML with problematic tags converted.
*/
private function ensurePhpUnrecognizedSelfClosingTagsAreXml($html)
{
return \preg_replace(
'%<' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?<!/)(?=>)%',
'$0/',
$html
);
}
/**
* @param string[] $a
* @param string[] $b
@ -1784,21 +1828,21 @@ class Emogrifier
if ($parseResult[static::MULTIPLIER] < 0) {
$parseResult[static::MULTIPLIER] = \abs($parseResult[static::MULTIPLIER]);
$xPathExpression = \sprintf(
'*[(last() - position()) mod %1%u = %2$u]/static::%3$s',
'*[(last() - position()) mod %1%u = %2$u]/self::%3$s',
$parseResult[static::MULTIPLIER],
$parseResult[static::INDEX],
$match[1]
);
} else {
$xPathExpression = \sprintf(
'*[position() mod %1$u = %2$u]/static::%3$s',
'*[position() mod %1$u = %2$u]/self::%3$s',
$parseResult[static::MULTIPLIER],
$parseResult[static::INDEX],
$match[1]
);
}
} else {
$xPathExpression = \sprintf('*[%1$u]/static::%2$s', $parseResult[static::INDEX], $match[1]);
$xPathExpression = \sprintf('*[%1$u]/self::%2$s', $parseResult[static::INDEX], $match[1]);
}
return $xPathExpression;