diff --git a/includes/libraries/Emogrifier/CssInliner.php b/includes/libraries/Emogrifier/CssInliner.php index 847767ea414..d765e844875 100755 --- a/includes/libraries/Emogrifier/CssInliner.php +++ b/includes/libraries/Emogrifier/CssInliner.php @@ -2,6 +2,7 @@ namespace Pelago\Emogrifier; +use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor; use Symfony\Component\CssSelector\CssSelectorConverter; use Symfony\Component\CssSelector\Exception\SyntaxErrorException; @@ -21,7 +22,7 @@ use Symfony\Component\CssSelector\Exception\SyntaxErrorException; * @author Sander Kruger * @author Zoli Szabó */ -class CssInliner +class CssInliner extends AbstractHtmlProcessor { /** * @var int @@ -53,26 +54,6 @@ class CssInliner */ const PSEUDO_CLASS_MATCHER = '\\S+\\-(?:child|type\\()|not\\([[:ascii:]]*\\)'; - /** - * @var string - */ - const CONTENT_TYPE_META_TAG = ''; - - /** - * @var string - */ - const DEFAULT_DOCUMENT_TYPE = ''; - - /** - * @var \DOMDocument - */ - protected $domDocument = null; - - /** - * @var string - */ - private $css = ''; - /** * @var bool[] */ @@ -81,7 +62,7 @@ class CssInliner /** * @var string[] */ - private $unprocessableHtmlTags = ['wbr']; + private $unprocessableHtmlTags = []; /** * @var bool[] @@ -170,90 +151,15 @@ class CssInliner private $debug = false; /** - * @param string $unprocessedHtml raw HTML, must be UTF-encoded, must not be empty - * - * @throws \InvalidArgumentException if $unprocessedHtml is anything other than a non-empty string + * @return CssSelectorConverter */ - public function __construct($unprocessedHtml) + private function getCssSelectorConverter() { - if (!\is_string($unprocessedHtml)) { - throw new \InvalidArgumentException('The provided HTML must be a string.', 1540403176); - } - if ($unprocessedHtml === '') { - throw new \InvalidArgumentException('The provided HTML must not be empty.', 1540403181); + if ($this->cssSelectorConverter === null) { + $this->cssSelectorConverter = new CssSelectorConverter(); } - $this->cssSelectorConverter = new CssSelectorConverter(); - - $this->setHtml($unprocessedHtml); - } - - /** - * Sets the HTML to process. - * - * @param string $html the HTML to process, must be UTF-8-encoded - * - * @return void - */ - private function setHtml($html) - { - $this->createUnifiedDomDocument($html); - } - - /** - * Provides access to the internal DOMDocument representation of the HTML in its current state. - * - * @return \DOMDocument - */ - public function getDomDocument() - { - return $this->domDocument; - } - - /** - * Sets the CSS to merge with the HTML. - * - * @param string $css the CSS to merge, must be UTF-8-encoded - * - * @return void - */ - public function setCss($css) - { - $this->css = $css; - } - - /** - * Renders the normalized and processed HTML. - * - * @return string - */ - public function render() - { - return $this->domDocument->saveHTML(); - } - - /** - * Renders the content of the BODY element of the normalized and processed HTML. - * - * @return string - */ - public function renderBodyContent() - { - $bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement()); - - return \str_replace(['', ''], '', $bodyNodeHtml); - } - - /** - * Returns the BODY element. - * - * This method assumes that there always is a BODY element. - * - * @return \DOMElement - */ - private function getBodyElement() - { - return $this->domDocument->getElementsByTagName('body')->item(0); + return $this->cssSelectorConverter; } /** @@ -269,101 +175,15 @@ class CssInliner } /** - * Applies $this->css to the given HTML and returns the HTML with the CSS - * applied. + * Inlines the given CSS into the existing HTML. * - * This method places the CSS inline. + * @param string $css the CSS to inline, must be UTF-8-encoded * - * @return string + * @return CssInliner fluent interface * * @throws SyntaxErrorException */ - public function emogrify() - { - $this->process(); - - return $this->render(); - } - - /** - * Applies $this->css to the given HTML and returns only the HTML content - * within the tag. - * - * This method places the CSS inline. - * - * @return string - * - * @throws SyntaxErrorException - */ - public function emogrifyBodyContent() - { - $this->process(); - - return $this->renderBodyContent(); - } - - /** - * Creates a DOM document from the given HTML and stores it in $this->domDocument. - * - * The DOM document will always have a BODY element and a document type. - * - * @param string $html - * - * @return void - */ - private function createUnifiedDomDocument($html) - { - $this->createRawDomDocument($html); - $this->ensureExistenceOfBodyElement(); - } - - /** - * Creates a DOMDocument instance from the given HTML and stores it in $this->domDocument. - * - * @param string $html - * - * @return void - */ - private function createRawDomDocument($html) - { - $domDocument = new \DOMDocument(); - $domDocument->encoding = 'UTF-8'; - $domDocument->strictErrorChecking = false; - $domDocument->formatOutput = true; - $libXmlState = \libxml_use_internal_errors(true); - $domDocument->loadHTML($this->prepareHtmlForDomConversion($html)); - \libxml_clear_errors(); - \libxml_use_internal_errors($libXmlState); - $domDocument->normalizeDocument(); - - $this->domDocument = $domDocument; - } - - /** - * Returns the HTML with added document type and Content-Type meta tag if needed, - * ensuring that the HTML will be good for creating a DOM document from it. - * - * @param string $html - * - * @return string the unified HTML - */ - private function prepareHtmlForDomConversion($html) - { - $htmlWithDocumentType = $this->ensureDocumentType($html); - - return $this->addContentTypeMetaTag($htmlWithDocumentType); - } - - /** - * Applies $this->css to $this->domDocument. - * - * This method places the CSS inline. - * - * @return void - * - * @throws SyntaxErrorException - */ - protected function process() + public function inlineCss($css) { $this->clearAllCaches(); $this->purgeVisitedNodes(); @@ -372,18 +192,19 @@ class CssInliner $this->removeUnprocessableTags(); $this->normalizeStyleAttributesOfAllNodes($xPath); - // grab any existing style blocks from the html and append them to the existing CSS + $combinedCss = $css; + // grab any existing style blocks from the HTML and append them to the existing CSS // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) - $allCss = $this->css; if ($this->isStyleBlocksParsingEnabled) { - $allCss .= $this->getCssFromAllStyleNodes($xPath); + $combinedCss .= $this->getCssFromAllStyleNodes($xPath); } $excludedNodes = $this->getNodesToExclude($xPath); - $cssRules = $this->parseCssRules($allCss); + $cssRules = $this->parseCssRules($combinedCss); + $cssSelectorConverter = $this->getCssSelectorConverter(); foreach ($cssRules['inlineable'] as $cssRule) { try { - $nodesMatchingCssSelectors = $xPath->query($this->cssSelectorConverter->toXPath($cssRule['selector'])); + $nodesMatchingCssSelectors = $xPath->query($cssSelectorConverter->toXPath($cssRule['selector'])); } catch (SyntaxErrorException $e) { if ($this->debug) { throw $e; @@ -408,6 +229,8 @@ class CssInliner $this->removeImportantAnnotationFromAllInlineStyles($xPath); $this->copyUninlineableCssToStyleNode($xPath, $cssRules['uninlineable']); + + return $this; } /** @@ -609,9 +432,9 @@ class CssInliner /** * Disables the removal of elements with `display: none` properties. * - * @deprecated will be removed in Emogrifier 3.0 - * * @return void + * + * @deprecated will be removed in Emogrifier 3.0 */ public function disableInvisibleNodeRemoval() { @@ -997,7 +820,7 @@ class CssInliner private function existsMatchForCssSelector(\DOMXPath $xPath, $cssSelector) { try { - $nodesMatchingSelector = $xPath->query($this->cssSelectorConverter->toXPath($cssSelector)); + $nodesMatchingSelector = $xPath->query($this->getCssSelectorConverter()->toXPath($cssSelector)); } catch (SyntaxErrorException $e) { if ($this->debug) { throw $e; @@ -1038,7 +861,7 @@ class CssInliner * * This method is protected to allow overriding. * - * @see https://github.com/jjriv/emogrifier/issues/103 + * @see https://github.com/MyIntervals/emogrifier/issues/103 * * @param string $css * @@ -1055,21 +878,6 @@ class CssInliner $headElement->appendChild($styleElement); } - /** - * Checks that $this->domDocument has a BODY element and adds it if it is missing. - * - * @return void - */ - private function ensureExistenceOfBodyElement() - { - if ($this->domDocument->getElementsByTagName('body')->item(0) !== null) { - return; - } - - $htmlElement = $this->domDocument->getElementsByTagName('html')->item(0); - $htmlElement->appendChild($this->domDocument->createElement('body')); - } - /** * Splits input CSS code into an array of parts for different media querues, in order. * Each part is an array where: @@ -1154,7 +962,11 @@ class CssInliner private function removeUnprocessableTags() { foreach ($this->unprocessableHtmlTags as $tagName) { - $nodes = $this->domDocument->getElementsByTagName($tagName); + // Deleting nodes from a 'live' NodeList invalidates iteration on it, so a copy must be made to iterate. + $nodes = []; + foreach ($this->domDocument->getElementsByTagName($tagName) as $node) { + $nodes[] = $node; + } /** @var \DOMNode $node */ foreach ($nodes as $node) { $hasContent = $node->hasChildNodes() || $node->hasChildNodes(); @@ -1165,59 +977,6 @@ class CssInliner } } - /** - * Makes sure that the passed HTML has a document type. - * - * @param string $html - * - * @return string HTML with document type - */ - private function ensureDocumentType($html) - { - $hasDocumentType = \stripos($html, '/i', '' . static::CONTENT_TYPE_META_TAG, $html); - } elseif ($hasHtmlTag) { - $reworkedHtml = \preg_replace( - '//i', - '' . static::CONTENT_TYPE_META_TAG . '', - $html - ); - } else { - $reworkedHtml = static::CONTENT_TYPE_META_TAG . $html; - } - - return $reworkedHtml; - } - /** * @param string[] $a * @param string[] $b @@ -1317,7 +1076,7 @@ class CssInliner $excludedNodes = []; foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) { try { - $matchingNodes = $xPath->query($this->cssSelectorConverter->toXPath($selectorToExclude)); + $matchingNodes = $xPath->query($this->getCssSelectorConverter()->toXPath($selectorToExclude)); } catch (SyntaxErrorException $e) { if ($this->debug) { throw $e; diff --git a/includes/libraries/Emogrifier/HtmlProcessor/AbstractHtmlProcessor.php b/includes/libraries/Emogrifier/HtmlProcessor/AbstractHtmlProcessor.php index c5a25eefa6b..6c8b0e094e5 100755 --- a/includes/libraries/Emogrifier/HtmlProcessor/AbstractHtmlProcessor.php +++ b/includes/libraries/Emogrifier/HtmlProcessor/AbstractHtmlProcessor.php @@ -23,6 +23,15 @@ abstract class AbstractHtmlProcessor */ const CONTENT_TYPE_META_TAG = ''; + /** + * @var string Regular expression part to match tag names that PHP's DOMDocument implementation is not aware are + * self-closing. These are mostly HTML5 elements, but for completeness (obsolete) and + * (deprecated) are also included. + * + * @see https://bugs.php.net/bug.php?id=73175 + */ + const PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER = '(?:command|embed|keygen|source|track|wbr)'; + /** * @var \DOMDocument */ @@ -74,7 +83,9 @@ abstract class AbstractHtmlProcessor */ public function render() { - return $this->domDocument->saveHTML(); + $htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML(); + + return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags); } /** @@ -84,11 +95,24 @@ abstract class AbstractHtmlProcessor */ public function renderBodyContent() { - $bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement()); + $htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML($this->getBodyElement()); + $bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags); return \str_replace(['', ''], '', $bodyNodeHtml); } + /** + * Eliminates any invalid closing tags for void elements from the given HTML. + * + * @param string $html + * + * @return string + */ + private function removeSelfClosingTagsClosingTags($html) + { + return \preg_replace('%%', '', $html); + } + /** * Returns the BODY element. * @@ -137,7 +161,7 @@ abstract class AbstractHtmlProcessor } /** - * Returns the HTML with added document type and Content-Type meta tag if needed, + * Returns the HTML with added document type, Content-Type meta tag, and self-closing slashes, if needed, * ensuring that the HTML will be good for creating a DOM document from it. * * @param string $html @@ -146,7 +170,8 @@ abstract class AbstractHtmlProcessor */ private function prepareHtmlForDomConversion($html) { - $htmlWithDocumentType = $this->ensureDocumentType($html); + $htmlWithSelfClosingSlashes = $this->ensurePhpUnrecognizedSelfClosingTagsAreXml($html); + $htmlWithDocumentType = $this->ensureDocumentType($htmlWithSelfClosingSlashes); return $this->addContentTypeMetaTag($htmlWithDocumentType); } @@ -172,7 +197,7 @@ abstract class AbstractHtmlProcessor * Adds a Content-Type meta tag for the charset. * * This method also ensures that there is a HEAD element. - + * * @param string $html * * @return string the HTML with the meta tag added @@ -204,6 +229,23 @@ abstract class AbstractHtmlProcessor return $reworkedHtml; } + /** + * Makes sure that any self-closing tags not recognized as such by PHP's DOMDocument implementation have a + * self-closing slash. + * + * @param string $html + * + * @return string HTML with problematic tags converted. + */ + private function ensurePhpUnrecognizedSelfClosingTagsAreXml($html) + { + return \preg_replace( + '%<' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?)%', + '$0/', + $html + ); + } + /** * Checks that $this->domDocument has a BODY element and adds it if it is missing. * diff --git a/includes/libraries/class-emogrifier.php b/includes/libraries/class-emogrifier.php index 08f5eb6af42..5bca94ef90d 100644 --- a/includes/libraries/class-emogrifier.php +++ b/includes/libraries/class-emogrifier.php @@ -85,6 +85,15 @@ class Emogrifier */ const DEFAULT_DOCUMENT_TYPE = ''; + /** + * @var string Regular expression part to match tag names that PHP's DOMDocument implementation is not aware are + * self-closing. These are mostly HTML5 elements, but for completeness (obsolete) and + * (deprecated) are also included. + * + * @see https://bugs.php.net/bug.php?id=73175 + */ + const PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER = '(?:command|embed|keygen|source|track|wbr)'; + /** * @var \DOMDocument */ @@ -321,7 +330,9 @@ class Emogrifier */ protected function render() { - return $this->domDocument->saveHTML(); + $htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML(); + + return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags); } /** @@ -331,11 +342,24 @@ class Emogrifier */ protected function renderBodyContent() { - $bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement()); + $htmlWithPossibleErroneousClosingTags = $this->domDocument->saveHTML($this->getBodyElement()); + $bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags); return \str_replace(['', ''], '', $bodyNodeHtml); } + /** + * Eliminates any invalid closing tags for void elements from the given HTML. + * + * @param string $html + * + * @return string + */ + private function removeSelfClosingTagsClosingTags($html) + { + return \preg_replace('%%', '', $html); + } + /** * Returns the BODY element. * @@ -437,20 +461,18 @@ class Emogrifier private function createRawDomDocument($html) { $domDocument = new \DOMDocument(); - $domDocument->encoding = 'UTF-8'; $domDocument->strictErrorChecking = false; $domDocument->formatOutput = true; $libXmlState = \libxml_use_internal_errors(true); $domDocument->loadHTML($this->prepareHtmlForDomConversion($html)); \libxml_clear_errors(); \libxml_use_internal_errors($libXmlState); - $domDocument->normalizeDocument(); $this->domDocument = $domDocument; } /** - * Returns the HTML with added document type and Content-Type meta tag if needed, + * Returns the HTML with added document type, Content-Type meta tag, and self-closing slashes, if needed, * ensuring that the HTML will be good for creating a DOM document from it. * * @param string $html @@ -459,7 +481,8 @@ class Emogrifier */ private function prepareHtmlForDomConversion($html) { - $htmlWithDocumentType = $this->ensureDocumentType($html); + $htmlWithSelfClosingSlashes = $this->ensurePhpUnrecognizedSelfClosingTagsAreXml($html); + $htmlWithDocumentType = $this->ensureDocumentType($htmlWithSelfClosingSlashes); return $this->addContentTypeMetaTag($htmlWithDocumentType); } @@ -1402,7 +1425,7 @@ class Emogrifier * * This method is protected to allow overriding. * - * @see https://github.com/jjriv/emogrifier/issues/103 + * @see https://github.com/MyIntervals/emogrifier/issues/103 * * @param string $css * @@ -1518,7 +1541,11 @@ class Emogrifier private function removeUnprocessableTags() { foreach ($this->unprocessableHtmlTags as $tagName) { - $nodes = $this->domDocument->getElementsByTagName($tagName); + // Deleting nodes from a 'live' NodeList invalidates iteration on it, so a copy must be made to iterate. + $nodes = []; + foreach ($this->domDocument->getElementsByTagName($tagName) as $node) { + $nodes[] = $node; + } /** @var \DOMNode $node */ foreach ($nodes as $node) { $hasContent = $node->hasChildNodes() || $node->hasChildNodes(); @@ -1582,6 +1609,23 @@ class Emogrifier return $reworkedHtml; } + /** + * Makes sure that any self-closing tags not recognized as such by PHP's DOMDocument implementation have a + * self-closing slash. + * + * @param string $html + * + * @return string HTML with problematic tags converted. + */ + private function ensurePhpUnrecognizedSelfClosingTagsAreXml($html) + { + return \preg_replace( + '%<' . static::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?)%', + '$0/', + $html + ); + } + /** * @param string[] $a * @param string[] $b @@ -1784,21 +1828,21 @@ class Emogrifier if ($parseResult[static::MULTIPLIER] < 0) { $parseResult[static::MULTIPLIER] = \abs($parseResult[static::MULTIPLIER]); $xPathExpression = \sprintf( - '*[(last() - position()) mod %1%u = %2$u]/static::%3$s', + '*[(last() - position()) mod %1%u = %2$u]/self::%3$s', $parseResult[static::MULTIPLIER], $parseResult[static::INDEX], $match[1] ); } else { $xPathExpression = \sprintf( - '*[position() mod %1$u = %2$u]/static::%3$s', + '*[position() mod %1$u = %2$u]/self::%3$s', $parseResult[static::MULTIPLIER], $parseResult[static::INDEX], $match[1] ); } } else { - $xPathExpression = \sprintf('*[%1$u]/static::%2$s', $parseResult[static::INDEX], $match[1]); + $xPathExpression = \sprintf('*[%1$u]/self::%2$s', $parseResult[static::INDEX], $match[1]); } return $xPathExpression;