woocommerce/apigen/libs/Texy/texy/modules/TexyHtmlModule.php

282 lines
6.6 KiB
PHP
Raw Normal View History

2014-02-14 14:29:30 +00:00
<?php
/**
* Texy! - human-readable text to HTML converter.
*
* @copyright Copyright (c) 2004, 2010 David Grudl
* @license GNU GENERAL PUBLIC LICENSE version 2 or 3
* @link http://texy.info
* @package Texy
*/
/**
* Html tags module.
*
* @copyright Copyright (c) 2004, 2010 David Grudl
* @package Texy
*/
final class TexyHtmlModule extends TexyModule
{
/** @var bool pass HTML comments to output? */
public $passComment = TRUE;
public function __construct($texy)
{
$this->texy = $texy;
$texy->addHandler('htmlComment', array($this, 'solveComment'));
$texy->addHandler('htmlTag', array($this, 'solveTag'));
$texy->registerLinePattern(
array($this, 'patternTag'),
'#<(/?)([a-z][a-z0-9_:-]*)((?:\s+[a-z0-9:-]+|=\s*"[^"'.TEXY_MARK.']*"|=\s*\'[^\''.TEXY_MARK.']*\'|=[^\s>'.TEXY_MARK.']+)*)\s*(/?)>#isu',
'html/tag'
);
$texy->registerLinePattern(
array($this, 'patternComment'),
'#<!--([^'.TEXY_MARK.']*?)-->#is',
'html/comment'
);
}
/**
* Callback for: <!-- comment -->.
*
* @param TexyLineParser
* @param array regexp matches
* @param string pattern name
* @return TexyHtml|string|FALSE
*/
public function patternComment($parser, $matches)
{
list(, $mComment) = $matches;
return $this->texy->invokeAroundHandlers('htmlComment', $parser, array($mComment));
}
/**
* Callback for: <tag attr="...">.
*
* @param TexyLineParser
* @param array regexp matches
* @param string pattern name
* @return TexyHtml|string|FALSE
*/
public function patternTag($parser, $matches)
{
list(, $mEnd, $mTag, $mAttr, $mEmpty) = $matches;
// [1] => /
// [2] => tag
// [3] => attributes
// [4] => /
$tx = $this->texy;
$isStart = $mEnd !== '/';
$isEmpty = $mEmpty === '/';
if (!$isEmpty && substr($mAttr, -1) === '/') { // uvizlo v $mAttr?
$mAttr = substr($mAttr, 0, -1);
$isEmpty = TRUE;
}
// error - can't close empty element
if ($isEmpty && !$isStart)
return FALSE;
// error - end element with atttrs
$mAttr = trim(strtr($mAttr, "\n", ' '));
if ($mAttr && !$isStart)
return FALSE;
$el = TexyHtml::el($mTag);
if ($isStart) {
// parse attributes
$matches2 = NULL;
preg_match_all(
'#([a-z0-9:-]+)\s*(?:=\s*(\'[^\']*\'|"[^"]*"|[^\'"\s]+))?()#isu',
$mAttr,
$matches2,
PREG_SET_ORDER
);
foreach ($matches2 as $m) {
$key = strtolower($m[1]);
$value = $m[2];
if ($value == NULL) $el->attrs[$key] = TRUE;
elseif ($value{0} === '\'' || $value{0} === '"') $el->attrs[$key] = Texy::unescapeHtml(substr($value, 1, -1));
else $el->attrs[$key] = Texy::unescapeHtml($value);
}
}
$res = $tx->invokeAroundHandlers('htmlTag', $parser, array($el, $isStart, $isEmpty));
if ($res instanceof TexyHtml) {
return $tx->protect($isStart ? $res->startTag() : $res->endTag(), $res->getContentType());
}
return $res;
}
/**
* Finish invocation.
*
* @param TexyHandlerInvocation handler invocation
* @param TexyHtml element
* @param bool is start tag?
* @param bool is empty?
* @return TexyHtml|string|FALSE
*/
public function solveTag($invocation, TexyHtml $el, $isStart, $forceEmpty = NULL)
{
$tx = $this->texy;
// tag & attibutes
$allowedTags = $tx->allowedTags; // speed-up
if (!$allowedTags)
return FALSE; // all tags are disabled
// convert case
$name = $el->getName();
$lower = strtolower($name);
if (isset($tx->dtd[$lower]) || $name === strtoupper($name)) {
// complete UPPER convert to lower
$name = $lower;
$el->setName($name);
}
if (is_array($allowedTags)) {
if (!isset($allowedTags[$name])) return FALSE;
$allowedAttrs = $allowedTags[$name]; // allowed attrs
} else {
// allowedTags === Texy::ALL
if ($forceEmpty) $el->setName($name, TRUE);
$allowedAttrs = Texy::ALL; // all attrs are allowed
}
// end tag? we are finished
if (!$isStart) {
return $el;
}
$elAttrs = & $el->attrs;
// process attributes
if (!$allowedAttrs) {
$elAttrs = array();
} elseif (is_array($allowedAttrs)) {
// skip disabled
$allowedAttrs = array_flip($allowedAttrs);
foreach ($elAttrs as $key => $foo)
if (!isset($allowedAttrs[$key])) unset($elAttrs[$key]);
}
// apply allowedClasses
$tmp = $tx->_classes; // speed-up
if (isset($elAttrs['class'])) {
if (is_array($tmp)) {
$elAttrs['class'] = explode(' ', $elAttrs['class']);
foreach ($elAttrs['class'] as $key => $value)
if (!isset($tmp[$value])) unset($elAttrs['class'][$key]); // id & class are case-sensitive
} elseif ($tmp !== Texy::ALL) {
$elAttrs['class'] = NULL;
}
}
// apply allowedClasses for ID
if (isset($elAttrs['id'])) {
if (is_array($tmp)) {
if (!isset($tmp['#' . $elAttrs['id']])) $elAttrs['id'] = NULL;
} elseif ($tmp !== Texy::ALL) {
$elAttrs['id'] = NULL;
}
}
// apply allowedStyles
if (isset($elAttrs['style'])) {
$tmp = $tx->_styles; // speed-up
if (is_array($tmp)) {
$styles = explode(';', $elAttrs['style']);
$elAttrs['style'] = NULL;
foreach ($styles as $value) {
$pair = explode(':', $value, 2);
$prop = trim($pair[0]);
if (isset($pair[1]) && isset($tmp[strtolower($prop)])) // CSS is case-insensitive
$elAttrs['style'][$prop] = $pair[1];
}
} elseif ($tmp !== Texy::ALL) {
$elAttrs['style'] = NULL;
}
}
if ($name === 'img') {
if (!isset($elAttrs['src'])) return FALSE;
if (!$tx->checkURL($elAttrs['src'], Texy::FILTER_IMAGE)) return FALSE;
$tx->summary['images'][] = $elAttrs['src'];
} elseif ($name === 'a') {
if (!isset($elAttrs['href']) && !isset($elAttrs['name']) && !isset($elAttrs['id'])) return FALSE;
if (isset($elAttrs['href'])) {
if ($tx->linkModule->forceNoFollow && strpos($elAttrs['href'], '//') !== FALSE) {
if (isset($elAttrs['rel'])) $elAttrs['rel'] = (array) $elAttrs['rel'];
$elAttrs['rel'][] = 'nofollow';
}
if (!$tx->checkURL($elAttrs['href'], Texy::FILTER_ANCHOR)) return FALSE;
$tx->summary['links'][] = $elAttrs['href'];
}
} elseif (preg_match('#^h[1-6]#i', $name)) {
$tx->headingModule->TOC[] = array(
'el' => $el,
'level' => (int) substr($name, 1),
'type' => 'html',
);
}
$el->validateAttrs($tx->dtd);
return $el;
}
/**
* Finish invocation.
*
* @param TexyHandlerInvocation handler invocation
* @param string
* @return string
*/
public function solveComment($invocation, $content)
{
if (!$this->passComment) return '';
// sanitize comment
$content = preg_replace('#-{2,}#', '-', $content);
$content = trim($content, '-');
return $this->texy->protect('<!--' . $content . '-->', Texy::CONTENT_MARKUP);
}
}