Update Emogrifier

This commit is contained in:
Mike Jolley 2019-01-04 14:47:49 +00:00
parent cea9b59523
commit 2edf517b2e
6 changed files with 2681 additions and 416 deletions

View File

@ -0,0 +1,154 @@
<?php
namespace Pelago\Emogrifier;
/**
* Facilitates building a CSS string by appending rule blocks one at a time, checking whether the media query,
* selectors, or declarations block are the same as those from the preceding block and combining blocks in such cases.
*
* Example:
* $concatenator = new CssConcatenator();
* $concatenator->append(['body'], 'color: blue;');
* $concatenator->append(['body'], 'font-size: 16px;');
* $concatenator->append(['p'], 'margin: 1em 0;');
* $concatenator->append(['ul', 'ol'], 'margin: 1em 0;');
* $concatenator->append(['body'], 'font-size: 14px;', '@media screen and (max-width: 400px)');
* $concatenator->append(['ul', 'ol'], 'margin: 0.75em 0;', '@media screen and (max-width: 400px)');
* $css = $concatenator->getCss();
*
* `$css` (if unminified) would contain the following CSS:
* ` body {
* ` color: blue;
* ` font-size: 16px;
* ` }
* ` p, ul, ol {
* ` margin: 1em 0;
* ` }
* ` @media screen and (max-width: 400px) {
* ` body {
* ` font-size: 14px;
* ` }
* ` ul, ol {
* ` margin: 0.75em 0;
* ` }
* ` }
*
* @author Jake Hotson <jake.github@qzdesign.co.uk>
*/
class CssConcatenator
{
/**
* Array of media rules in order. Each element is an object with the following properties:
* - string `media` - The media query string, e.g. "@media screen and (max-width:639px)", or an empty string for
* rules not within a media query block;
* - \stdClass[] `ruleBlocks` - Array of rule blocks in order, where each element is an object with the following
* properties:
* - mixed[] `selectorsAsKeys` - Array whose keys are selectors for the rule block (values are of no
* significance);
* - string `declarationsBlock` - The property declarations, e.g. "margin-top: 0.5em; padding: 0".
*
* @var \stdClass[]
*/
private $mediaRules = [];
/**
* Appends a declaration block to the CSS.
*
* @param string[] $selectors Array of selectors for the rule, e.g. ["ul", "ol", "p:first-child"].
* @param string $declarationsBlock The property declarations, e.g. "margin-top: 0.5em; padding: 0".
* @param string $media The media query for the rule, e.g. "@media screen and (max-width:639px)",
* or an empty string if none.
*/
public function append(array $selectors, $declarationsBlock, $media = '')
{
$selectorsAsKeys = \array_flip($selectors);
$mediaRule = $this->getOrCreateMediaRuleToAppendTo($media);
$lastRuleBlock = \end($mediaRule->ruleBlocks);
$hasSameDeclarationsAsLastRule = $lastRuleBlock !== false
&& $declarationsBlock === $lastRuleBlock->declarationsBlock;
if ($hasSameDeclarationsAsLastRule) {
$lastRuleBlock->selectorsAsKeys += $selectorsAsKeys;
} else {
$hasSameSelectorsAsLastRule = $lastRuleBlock !== false
&& static::hasEquivalentSelectors($selectorsAsKeys, $lastRuleBlock->selectorsAsKeys);
if ($hasSameSelectorsAsLastRule) {
$lastDeclarationsBlockWithoutSemicolon = \rtrim(\rtrim($lastRuleBlock->declarationsBlock), ';');
$lastRuleBlock->declarationsBlock = $lastDeclarationsBlockWithoutSemicolon . ';' . $declarationsBlock;
} else {
$mediaRule->ruleBlocks[] = (object)\compact('selectorsAsKeys', 'declarationsBlock');
}
}
}
/**
* @return string
*/
public function getCss()
{
return \implode('', \array_map([$this, 'getMediaRuleCss'], $this->mediaRules));
}
/**
* @param string $media The media query for rules to be appended, e.g. "@media screen and (max-width:639px)",
* or an empty string if none.
*
* @return \stdClass Object with properties as described for elements of `$mediaRules`.
*/
private function getOrCreateMediaRuleToAppendTo($media)
{
$lastMediaRule = \end($this->mediaRules);
if ($lastMediaRule !== false && $media === $lastMediaRule->media) {
return $lastMediaRule;
}
$newMediaRule = (object)[
'media' => $media,
'ruleBlocks' => [],
];
$this->mediaRules[] = $newMediaRule;
return $newMediaRule;
}
/**
* Tests if two sets of selectors are equivalent (i.e. the same selectors, possibly in a different order).
*
* @param mixed[] $selectorsAsKeys1 Array in which the selectors are the keys, and the values are of no
* significance.
* @param mixed[] $selectorsAsKeys2 Another such array.
*
* @return bool
*/
private static function hasEquivalentSelectors(array $selectorsAsKeys1, array $selectorsAsKeys2)
{
return \count($selectorsAsKeys1) === \count($selectorsAsKeys2)
&& \count($selectorsAsKeys1) === \count($selectorsAsKeys1 + $selectorsAsKeys2);
}
/**
* @param \stdClass $mediaRule Object with properties as described for elements of `$mediaRules`.
*
* @return string CSS for the media rule.
*/
private static function getMediaRuleCss(\stdClass $mediaRule)
{
$css = \implode('', \array_map([static::class, 'getRuleBlockCss'], $mediaRule->ruleBlocks));
if ($mediaRule->media !== '') {
$css = $mediaRule->media . '{' . $css . '}';
}
return $css;
}
/**
* @param \stdClass $ruleBlock Object with properties as described for elements of the `ruleBlocks` property of
* elements of `$mediaRules`.
*
* @return string CSS for the rule block.
*/
private static function getRuleBlockCss(\stdClass $ruleBlock)
{
$selectors = \array_keys($ruleBlock->selectorsAsKeys);
return \implode(',', $selectors) . '{' . $ruleBlock->declarationsBlock . '}';
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,221 @@
<?php
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* Base class for HTML processor that e.g., can remove, add or modify nodes or attributes.
*
* The "vanilla" subclass is the HtmlNormalizer.
*
* @internal This class currently is a new technology preview, and its API is still in flux. Don't use it in production.
*
* @author Oliver Klee <github@oliverklee.de>
*/
abstract class AbstractHtmlProcessor
{
/**
* @var string
*/
const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
/**
* @var string
*/
const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
/**
* @var \DOMDocument
*/
protected $domDocument = null;
/**
* @param string $unprocessedHtml raw HTML, must be UTF-encoded, must not be empty
*
* @throws \InvalidArgumentException if $unprocessedHtml is anything other than a non-empty string
*/
public function __construct($unprocessedHtml)
{
if (!\is_string($unprocessedHtml)) {
throw new \InvalidArgumentException('The provided HTML must be a string.', 1515459744);
}
if ($unprocessedHtml === '') {
throw new \InvalidArgumentException('The provided HTML must not be empty.', 1515763647);
}
$this->setHtml($unprocessedHtml);
}
/**
* Sets the HTML to process.
*
* @param string $html the HTML to process, must be UTF-8-encoded
*
* @return void
*/
private function setHtml($html)
{
$this->createUnifiedDomDocument($html);
}
/**
* Provides access to the internal DOMDocument representation of the HTML in its current state.
*
* @return \DOMDocument
*/
public function getDomDocument()
{
return $this->domDocument;
}
/**
* Renders the normalized and processed HTML.
*
* @return string
*/
public function render()
{
return $this->domDocument->saveHTML();
}
/**
* Renders the content of the BODY element of the normalized and processed HTML.
*
* @return string
*/
public function renderBodyContent()
{
$bodyNodeHtml = $this->domDocument->saveHTML($this->getBodyElement());
return \str_replace(['<body>', '</body>'], '', $bodyNodeHtml);
}
/**
* Returns the BODY element.
*
* This method assumes that there always is a BODY element.
*
* @return \DOMElement
*/
private function getBodyElement()
{
return $this->domDocument->getElementsByTagName('body')->item(0);
}
/**
* Creates a DOM document from the given HTML and stores it in $this->domDocument.
*
* The DOM document will always have a BODY element and a document type.
*
* @param string $html
*
* @return void
*/
private function createUnifiedDomDocument($html)
{
$this->createRawDomDocument($html);
$this->ensureExistenceOfBodyElement();
}
/**
* Creates a DOMDocument instance from the given HTML and stores it in $this->domDocument.
*
* @param string $html
*
* @return void
*/
private function createRawDomDocument($html)
{
$domDocument = new \DOMDocument();
$domDocument->strictErrorChecking = false;
$domDocument->formatOutput = true;
$libXmlState = \libxml_use_internal_errors(true);
$domDocument->loadHTML($this->prepareHtmlForDomConversion($html));
\libxml_clear_errors();
\libxml_use_internal_errors($libXmlState);
$this->domDocument = $domDocument;
}
/**
* Returns the HTML with added document type and Content-Type meta tag if needed,
* ensuring that the HTML will be good for creating a DOM document from it.
*
* @param string $html
*
* @return string the unified HTML
*/
private function prepareHtmlForDomConversion($html)
{
$htmlWithDocumentType = $this->ensureDocumentType($html);
return $this->addContentTypeMetaTag($htmlWithDocumentType);
}
/**
* Makes sure that the passed HTML has a document type.
*
* @param string $html
*
* @return string HTML with document type
*/
private function ensureDocumentType($html)
{
$hasDocumentType = \stripos($html, '<!DOCTYPE') !== false;
if ($hasDocumentType) {
return $html;
}
return static::DEFAULT_DOCUMENT_TYPE . $html;
}
/**
* Adds a Content-Type meta tag for the charset.
*
* This method also ensures that there is a HEAD element.
* @param string $html
*
* @return string the HTML with the meta tag added
*/
private function addContentTypeMetaTag($html)
{
$hasContentTypeMetaTag = \stripos($html, 'Content-Type') !== false;
if ($hasContentTypeMetaTag) {
return $html;
}
// We are trying to insert the meta tag to the right spot in the DOM.
// If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
$hasHeadTag = \stripos($html, '<head') !== false;
$hasHtmlTag = \stripos($html, '<html') !== false;
if ($hasHeadTag) {
$reworkedHtml = \preg_replace('/<head(.*?)>/i', '<head$1>' . static::CONTENT_TYPE_META_TAG, $html);
} elseif ($hasHtmlTag) {
$reworkedHtml = \preg_replace(
'/<html(.*?)>/i',
'<html$1><head>' . static::CONTENT_TYPE_META_TAG . '</head>',
$html
);
} else {
$reworkedHtml = static::CONTENT_TYPE_META_TAG . $html;
}
return $reworkedHtml;
}
/**
* Checks that $this->domDocument has a BODY element and adds it if it is missing.
*
* @return void
*/
private function ensureExistenceOfBodyElement()
{
if ($this->domDocument->getElementsByTagName('body')->item(0) !== null) {
return;
}
$htmlElement = $this->domDocument->getElementsByTagName('html')->item(0);
$htmlElement->appendChild($this->domDocument->createElement('body'));
}
}

View File

@ -0,0 +1,320 @@
<?php
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* This HtmlProcessor can convert style HTML attributes to the corresponding other visual HTML attributes,
* e.g. it converts style="width: 100px" to width="100".
*
* It will only add attributes, but leaves the style attribute untouched.
*
* To trigger the conversion, call the convertCssToVisualAttributes method.
*
* @internal This class currently is a new technology preview, and its API is still in flux. Don't use it in production.
*
* @author Oliver Klee <github@oliverklee.de>
*/
class CssToAttributeConverter extends AbstractHtmlProcessor
{
/**
* This multi-level array contains simple mappings of CSS properties to
* HTML attributes. If a mapping only applies to certain HTML nodes or
* only for certain values, the mapping is an object with a whitelist
* of nodes and values.
*
* @var mixed[][]
*/
private $cssToHtmlMap = [
'background-color' => [
'attribute' => 'bgcolor',
],
'text-align' => [
'attribute' => 'align',
'nodes' => ['p', 'div', 'td'],
'values' => ['left', 'right', 'center', 'justify'],
],
'float' => [
'attribute' => 'align',
'nodes' => ['table', 'img'],
'values' => ['left', 'right'],
],
'border-spacing' => [
'attribute' => 'cellspacing',
'nodes' => ['table'],
],
];
/**
* @var string[][]
*/
private static $parsedCssCache = [];
/**
* Maps the CSS from the style nodes to visual HTML attributes.
*
* @return CssToAttributeConverter fluent interface
*/
public function convertCssToVisualAttributes()
{
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node);
}
return $this;
}
/**
* Returns a list with all DOM nodes that have a style attribute.
*
* @return \DOMNodeList
*/
private function getAllNodesWithStyleAttribute()
{
$xPath = new \DOMXPath($this->domDocument);
return $xPath->query('//*[@style]');
}
/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return string[]
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock($cssDeclarationsBlock)
{
if (isset(self::$parsedCssCache[$cssDeclarationsBlock])) {
return self::$parsedCssCache[$cssDeclarationsBlock];
}
$properties = [];
$declarations = \preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock);
foreach ($declarations as $declaration) {
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}
$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
self::$parsedCssCache[$cssDeclarationsBlock] = $properties;
return $properties;
}
/**
* Applies $styles to $node.
*
* This method maps CSS styles to HTML attributes and adds those to the
* node.
*
* @param string[] $styles the new CSS styles taken from the global styles to be applied to this node
* @param \DOMElement $node node to apply styles to
*
* @return void
*/
private function mapCssToHtmlAttributes(array $styles, \DOMElement $node)
{
foreach ($styles as $property => $value) {
// Strip !important indicator
$value = \trim(\str_replace('!important', '', $value));
$this->mapCssToHtmlAttribute($property, $value, $node);
}
}
/**
* Tries to apply the CSS style to $node as an attribute.
*
* This method maps a CSS rule to HTML attributes and adds those to the node.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*
* @return void
*/
private function mapCssToHtmlAttribute($property, $value, \DOMElement $node)
{
if (!$this->mapSimpleCssProperty($property, $value, $node)) {
$this->mapComplexCssProperty($property, $value, $node);
}
}
/**
* Looks up the CSS property in the mapping table and maps it if it matches the conditions.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*
* @return bool true if the property can be mapped using the simple mapping table
*/
private function mapSimpleCssProperty($property, $value, \DOMElement $node)
{
if (!isset($this->cssToHtmlMap[$property])) {
return false;
}
$mapping = $this->cssToHtmlMap[$property];
$nodesMatch = !isset($mapping['nodes']) || \in_array($node->nodeName, $mapping['nodes'], true);
$valuesMatch = !isset($mapping['values']) || \in_array($value, $mapping['values'], true);
if (!$nodesMatch || !$valuesMatch) {
return false;
}
$node->setAttribute($mapping['attribute'], $value);
return true;
}
/**
* Maps CSS properties that need special transformation to an HTML attribute.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*
* @return void
*/
private function mapComplexCssProperty($property, $value, \DOMElement $node)
{
switch ($property) {
case 'background':
$this->mapBackgroundProperty($node, $value);
break;
case 'width':
// intentional fall-through
case 'height':
$this->mapWidthOrHeightProperty($node, $value, $property);
break;
case 'margin':
$this->mapMarginProperty($node, $value);
break;
case 'border':
$this->mapBorderProperty($node, $value);
break;
default:
}
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*
* @return void
*/
private function mapBackgroundProperty(\DOMElement $node, $value)
{
// parse out the color, if any
$styles = \explode(' ', $value);
$first = $styles[0];
if (!\is_numeric($first[0]) && \strpos($first, 'url') !== 0) {
// as this is not a position or image, assume it's a color
$node->setAttribute('bgcolor', $first);
}
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
* @param string $property the name of the CSS property to map
*
* @return void
*/
private function mapWidthOrHeightProperty(\DOMElement $node, $value, $property)
{
// only parse values in px and %, but not values like "auto"
if (!\preg_match('/^(\\d+)(px|%)$/', $value)) {
return;
}
$number = \preg_replace('/[^0-9.%]/', '', $value);
$node->setAttribute($property, $number);
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*
* @return void
*/
private function mapMarginProperty(\DOMElement $node, $value)
{
if (!$this->isTableOrImageNode($node)) {
return;
}
$margins = $this->parseCssShorthandValue($value);
if ($margins['left'] === 'auto' && $margins['right'] === 'auto') {
$node->setAttribute('align', 'center');
}
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*
* @return void
*/
private function mapBorderProperty(\DOMElement $node, $value)
{
if (!$this->isTableOrImageNode($node)) {
return;
}
if ($value === 'none' || $value === '0') {
$node->setAttribute('border', '0');
}
}
/**
* @param \DOMElement $node
*
* @return bool
*/
private function isTableOrImageNode(\DOMElement $node)
{
return $node->nodeName === 'table' || $node->nodeName === 'img';
}
/**
* Parses a shorthand CSS value and splits it into individual values
*
* @param string $value a string of CSS value with 1, 2, 3 or 4 sizes
* For example: padding: 0 auto;
* '0 auto' is split into top: 0, left: auto, bottom: 0,
* right: auto.
*
* @return string[] an array of values for top, right, bottom and left (using these as associative array keys)
*/
private function parseCssShorthandValue($value)
{
$values = \preg_split('/\\s+/', $value);
$css = [];
$css['top'] = $values[0];
$css['right'] = (\count($values) > 1) ? $values[1] : $css['top'];
$css['bottom'] = (\count($values) > 2) ? $values[2] : $css['top'];
$css['left'] = (\count($values) > 3) ? $values[3] : $css['right'];
return $css;
}
}

View File

@ -0,0 +1,18 @@
<?php
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* Normalizes HTML:
* - add a document type (HTML5) if missing
* - disentangle incorrectly nested tags
* - add HEAD and BODY elements (if they are missing)
* - reformat the HTML
*
* @internal This class currently is a new technology preview, and its API is still in flux. Don't use it in production.
*
* @author Oliver Klee <github@oliverklee.de>
*/
class HtmlNormalizer extends AbstractHtmlProcessor
{
}

File diff suppressed because it is too large Load Diff