diff --git a/AvocadoEdition_Light/plugin/htmlpurifier/HTMLPurifier.standalone.php b/AvocadoEdition_Light/plugin/htmlpurifier/HTMLPurifier.standalone.php index 467218a..5e42bfe 100644 --- a/AvocadoEdition_Light/plugin/htmlpurifier/HTMLPurifier.standalone.php +++ b/AvocadoEdition_Light/plugin/htmlpurifier/HTMLPurifier.standalone.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 4.8.0 + * @version 4.14.0 * * @warning * You must *not* include any other HTML Purifier files before this file, @@ -39,7 +39,7 @@ */ /* - HTML Purifier 4.8.0 - Standards Compliant HTML Filtering + HTML Purifier 4.14.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -78,12 +78,12 @@ class HTMLPurifier * Version of HTML Purifier. * @type string */ - public $version = '4.8.0'; + public $version = '4.14.0'; /** * Constant with version of HTML Purifier. */ - const VERSION = '4.8.0'; + const VERSION = '4.14.0'; /** * Global configuration object. @@ -260,12 +260,17 @@ class HTMLPurifier public function purifyArray($array_of_html, $config = null) { $context_array = array(); - foreach ($array_of_html as $key => $html) { - $array_of_html[$key] = $this->purify($html, $config); + $array = array(); + foreach($array_of_html as $key=>$value){ + if (is_array($value)) { + $array[$key] = $this->purifyArray($value, $config); + } else { + $array[$key] = $this->purify($value, $config); + } $context_array[$key] = $this->context; } $this->context = $context_array; - return $array_of_html; + return $array; } /** @@ -332,8 +337,8 @@ class HTMLPurifier_Arborize if ($token instanceof HTMLPurifier_Token_End) { $token->start = null; // [MUT] $r = array_pop($stack); - assert($r->name === $token->name); - assert(empty($token->attr)); + //assert($r->name === $token->name); + //assert(empty($token->attr)); $r->endCol = $token->col; $r->endLine = $token->line; $r->endArmor = $token->armor; @@ -345,7 +350,7 @@ class HTMLPurifier_Arborize $stack[] = $node; } } - assert(count($stack) == 1); + //assert(count($stack) == 1); return $stack[0]; } @@ -620,7 +625,13 @@ abstract class HTMLPurifier_AttrDef */ protected function mungeRgb($string) { - return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); + $p = '\s*(\d+(\.\d+)?([%]?))\s*'; + + if (preg_match('/(rgba|hsla)\(/', $string)) { + return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string); + } + + return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string); } /** @@ -1301,6 +1312,22 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition ); $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); + $this->info['background-size'] = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_Enum( + array( + 'auto', + 'cover', + 'contain', + 'initial', + 'inherit', + ) + ), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_CSS_Length() + ) + ); + $border_color = $this->info['border-top-color'] = $this->info['border-bottom-color'] = @@ -1412,7 +1439,21 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), - new HTMLPurifier_AttrDef_Enum(array('auto')) + new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit')) + ) + ); + $trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Length('0'), + new HTMLPurifier_AttrDef_CSS_Percentage(true), + new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) + ) + ); + $trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Length('0'), + new HTMLPurifier_AttrDef_CSS_Percentage(true), + new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ); $max = $config->get('CSS.MaxImgLength'); @@ -1433,6 +1474,38 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition // For everyone else: $trusted_wh ); + $this->info['min-width'] = + $this->info['min-height'] = + $max === null ? + $trusted_min_wh : + new HTMLPurifier_AttrDef_Switch( + 'img', + // For img tags: + new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Length('0', $max), + new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) + ) + ), + // For everyone else: + $trusted_min_wh + ); + $this->info['max-width'] = + $this->info['max-height'] = + $max === null ? + $trusted_max_wh : + new HTMLPurifier_AttrDef_Switch( + 'img', + // For img tags: + new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Length('0', $max), + new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) + ) + ), + // For everyone else: + $trusted_max_wh + ); $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); @@ -1754,7 +1827,7 @@ class HTMLPurifier_Config * HTML Purifier's version * @type string */ - public $version = '4.8.0'; + public $version = '4.14.0'; /** * Whether or not to automatically finalize @@ -2066,7 +2139,7 @@ class HTMLPurifier_Config } // Raw type might be negative when using the fully optimized form - // of stdclass, which indicates allow_null == true + // of stdClass, which indicates allow_null == true $rtype = is_int($def) ? $def : $def->type; if ($rtype < 0) { $type = -$rtype; @@ -2141,7 +2214,7 @@ class HTMLPurifier_Config * maybeGetRawHTMLDefinition, which is more explicitly * named, instead. * - * @return HTMLPurifier_HTMLDefinition + * @return HTMLPurifier_HTMLDefinition|null */ public function getHTMLDefinition($raw = false, $optimized = false) { @@ -2160,7 +2233,7 @@ class HTMLPurifier_Config * maybeGetRawCSSDefinition, which is more explicitly * named, instead. * - * @return HTMLPurifier_CSSDefinition + * @return HTMLPurifier_CSSDefinition|null */ public function getCSSDefinition($raw = false, $optimized = false) { @@ -2179,7 +2252,7 @@ class HTMLPurifier_Config * maybeGetRawURIDefinition, which is more explicitly * named, instead. * - * @return HTMLPurifier_URIDefinition + * @return HTMLPurifier_URIDefinition|null */ public function getURIDefinition($raw = false, $optimized = false) { @@ -2201,7 +2274,7 @@ class HTMLPurifier_Config * maybe semantics is the "right thing to do." * * @throws HTMLPurifier_Exception - * @return HTMLPurifier_Definition + * @return HTMLPurifier_Definition|null */ public function getDefinition($type, $raw = false, $optimized = false) { @@ -2380,7 +2453,7 @@ class HTMLPurifier_Config } /** - * @return HTMLPurifier_HTMLDefinition + * @return HTMLPurifier_HTMLDefinition|null */ public function maybeGetRawHTMLDefinition() { @@ -2388,7 +2461,7 @@ class HTMLPurifier_Config } /** - * @return HTMLPurifier_CSSDefinition + * @return HTMLPurifier_CSSDefinition|null */ public function maybeGetRawCSSDefinition() { @@ -2396,7 +2469,7 @@ class HTMLPurifier_Config } /** - * @return HTMLPurifier_URIDefinition + * @return HTMLPurifier_URIDefinition|null */ public function maybeGetRawURIDefinition() { @@ -2536,7 +2609,7 @@ class HTMLPurifier_Config if ($index !== false) { $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); } - $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); + $mq = $mq_fix && version_compare(PHP_VERSION, '7.4.0', '<') && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); $ret = array(); @@ -2623,7 +2696,7 @@ class HTMLPurifier_Config // zip(tail(trace), trace) -- but PHP is not Haskell har har for ($i = 0, $c = count($trace); $i < $c - 1; $i++) { // XXX this is not correct on some versions of HTML Purifier - if ($trace[$i + 1]['class'] === 'HTMLPurifier_Config') { + if (isset($trace[$i + 1]['class']) && $trace[$i + 1]['class'] === 'HTMLPurifier_Config') { continue; } $frame = $trace[$i]; @@ -2678,11 +2751,11 @@ class HTMLPurifier_ConfigSchema * * array( * 'Namespace' => array( - * 'Directive' => new stdclass(), + * 'Directive' => new stdClass(), * ) * ) * - * The stdclass may have the following properties: + * The stdClass may have the following properties: * * - If isAlias isn't set: * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions @@ -2693,8 +2766,8 @@ class HTMLPurifier_ConfigSchema * - namespace: Namespace this directive aliases to * - name: Directive name this directive aliases to * - * In certain degenerate cases, stdclass will actually be an integer. In - * that case, the value is equivalent to an stdclass with the type + * In certain degenerate cases, stdClass will actually be an integer. In + * that case, the value is equivalent to an stdClass with the type * property set to the integer. If the integer is negative, type is * equal to the absolute value of integer, and allow_null is true. * @@ -2754,12 +2827,12 @@ class HTMLPurifier_ConfigSchema * @param string $key Name of directive * @param mixed $default Default value of directive * @param string $type Allowed type of the directive. See - * HTMLPurifier_DirectiveDef::$type for allowed values + * HTMLPurifier_VarParser::$types for allowed values * @param bool $allow_null Whether or not to allow null values */ public function add($key, $default, $type, $allow_null) { - $obj = new stdclass(); + $obj = new stdClass(); $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; if ($allow_null) { $obj->allow_null = true; @@ -2806,14 +2879,14 @@ class HTMLPurifier_ConfigSchema */ public function addAlias($key, $new_key) { - $obj = new stdclass; + $obj = new stdClass; $obj->key = $new_key; $obj->isAlias = true; $this->info[$key] = $obj; } /** - * Replaces any stdclass that only has the type property with type integer. + * Replaces any stdClass that only has the type property with type integer. */ public function postProcess() { @@ -3728,7 +3801,7 @@ class HTMLPurifier_ElementDef if (!empty($def->content_model)) { $this->content_model = - str_replace("#SUPER", $this->content_model, $def->content_model); + str_replace("#SUPER", (string)$this->content_model, $def->content_model); $this->child = false; } if (!empty($def->content_model_type)) { @@ -3870,6 +3943,14 @@ class HTMLPurifier_Encoder * It will parse according to UTF-8 and return a valid UTF8 string, with * non-SGML codepoints excluded. * + * Specifically, it will permit: + * \x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF} + * Source: https://www.w3.org/TR/REC-xml/#NT-Char + * Arguably this function should be modernized to the HTML5 set + * of allowed characters: + * https://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream + * which simultaneously expand and restrict the set of allowed characters. + * * @param string $str The string to clean * @param bool $force_php * @return string @@ -3891,15 +3972,12 @@ class HTMLPurifier_Encoder * function that needs to be able to understand UTF-8 characters. * As of right now, only smart lossless character encoding converters * would need that, and I'm probably not going to implement them. - * Once again, PHP 6 should solve all our problems. */ public static function cleanUTF8($str, $force_php = false) { // UTF-8 validity is checked since PHP 4.3.5 // This is an optimization: if the string is already valid UTF-8, no // need to do PHP stuff. 99% of the time, this will be the case. - // The regexp matches the XML char production, as well as well as excluding - // non-SGML codepoints U+007F to U+009F if (preg_match( '/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str @@ -3923,7 +4001,7 @@ class HTMLPurifier_Encoder $len = strlen($str); for ($i = 0; $i < $len; $i++) { - $in = ord($str{$i}); + $in = ord($str[$i]); $char .= $str[$i]; // append byte to char if (0 == $mState) { // When mState is zero we expect either a US-ASCII character @@ -4024,6 +4102,7 @@ class HTMLPurifier_Encoder // 7F-9F is not strictly prohibited by XML, // but it is non-SGML, and thus we don't allow it (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || + (0xE000 <= $mUcs4 && 0xFFFD >= $mUcs4) || (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) ) ) { @@ -4446,6 +4525,138 @@ class HTMLPurifier_EntityParser */ protected $_entity_lookup; + /** + * Callback regex string for entities in text. + * @type string + */ + protected $_textEntitiesRegex; + + /** + * Callback regex string for entities in attributes. + * @type string + */ + protected $_attrEntitiesRegex; + + /** + * Tests if the beginning of a string is a semi-optional regex + */ + protected $_semiOptionalPrefixRegex; + + public function __construct() { + // From + // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon + $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml"; + + // NB: three empty captures to put the fourth match in the right + // place + $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/"; + + $this->_textEntitiesRegex = + '/&(?:'. + // hex + '[#]x([a-fA-F0-9]+);?|'. + // dec + '[#]0*(\d+);?|'. + // string (mandatory semicolon) + // NB: order matters: match semicolon preferentially + '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. + // string (optional semicolon) + "($semi_optional)". + ')/'; + + $this->_attrEntitiesRegex = + '/&(?:'. + // hex + '[#]x([a-fA-F0-9]+);?|'. + // dec + '[#]0*(\d+);?|'. + // string (mandatory semicolon) + // NB: order matters: match semicolon preferentially + '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. + // string (optional semicolon) + // don't match if trailing is equals or alphanumeric (URL + // like) + "($semi_optional)(?![=;A-Za-z0-9])". + ')/'; + + } + + /** + * Substitute entities with the parsed equivalents. Use this on + * textual data in an HTML document (as opposed to attributes.) + * + * @param string $string String to have entities parsed. + * @return string Parsed string. + */ + public function substituteTextEntities($string) + { + return preg_replace_callback( + $this->_textEntitiesRegex, + array($this, 'entityCallback'), + $string + ); + } + + /** + * Substitute entities with the parsed equivalents. Use this on + * attribute contents in documents. + * + * @param string $string String to have entities parsed. + * @return string Parsed string. + */ + public function substituteAttrEntities($string) + { + return preg_replace_callback( + $this->_attrEntitiesRegex, + array($this, 'entityCallback'), + $string + ); + } + + /** + * Callback function for substituteNonSpecialEntities() that does the work. + * + * @param array $matches PCRE matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @return string Replacement string. + */ + + protected function entityCallback($matches) + { + $entity = $matches[0]; + $hex_part = @$matches[1]; + $dec_part = @$matches[2]; + $named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3]; + if ($hex_part !== NULL && $hex_part !== "") { + return HTMLPurifier_Encoder::unichr(hexdec($hex_part)); + } elseif ($dec_part !== NULL && $dec_part !== "") { + return HTMLPurifier_Encoder::unichr((int) $dec_part); + } else { + if (!$this->_entity_lookup) { + $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); + } + if (isset($this->_entity_lookup->table[$named_part])) { + return $this->_entity_lookup->table[$named_part]; + } else { + // exact match didn't match anything, so test if + // any of the semicolon optional match the prefix. + // Test that this is an EXACT match is important to + // prevent infinite loop + if (!empty($matches[3])) { + return preg_replace_callback( + $this->_semiOptionalPrefixRegex, + array($this, 'entityCallback'), + $entity + ); + } + return $entity; + } + } + } + + // LEGACY CODE BELOW + /** * Callback regex string for parsing entities. * @type string @@ -4574,7 +4785,7 @@ class HTMLPurifier_EntityParser $entity; } else { return isset($this->_special_ent2dec[$matches[3]]) ? - $this->_special_ent2dec[$matches[3]] : + $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] : $entity; } } @@ -5120,7 +5331,7 @@ class HTMLPurifier_Generator $attr = $this->generateAttributes($token->attr, $token->name); if ($this->_flashCompat) { if ($token->name == "object") { - $flash = new stdclass(); + $flash = new stdClass(); $flash->attr = $token->attr; $flash->param = array(); $this->_flashStack[] = $flash; @@ -5887,9 +6098,9 @@ class HTMLPurifier_HTMLModule * @param string $element Name of element to add * @param string|bool $type What content set should element be registered to? * Set as false to skip this step. - * @param string $contents Allowed children in form of: + * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of: * "$content_model_type: $content_model" - * @param array $attr_includes What attribute collections to register to + * @param array|string $attr_includes What attribute collections to register to * element? * @param array $attr What unique attributes does the element define? * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters. @@ -6012,8 +6223,9 @@ class HTMLPurifier_HTMLModule */ public function makeLookup($list) { + $args = func_get_args(); if (is_string($list)) { - $list = func_get_args(); + $list = $args; } $ret = array(); foreach ($list as $value) { @@ -6311,11 +6523,14 @@ class HTMLPurifier_HTMLModuleManager if ($config->get('HTML.TargetBlank')) { $modules[] = 'TargetBlank'; } - // NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank + // NB: HTML.TargetNoreferrer and HTML.TargetNoopener must be AFTER HTML.TargetBlank // so that its post-attr-transform gets run afterwards. if ($config->get('HTML.TargetNoreferrer')) { $modules[] = 'TargetNoreferrer'; } + if ($config->get('HTML.TargetNoopener')) { + $modules[] = 'TargetNoopener'; + } // merge in custom modules $modules = array_merge($modules, $this->userModules); @@ -6720,11 +6935,13 @@ abstract class HTMLPurifier_Injector return false; } // check for exclusion - for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { - $node = $this->currentNesting[$i]; - $def = $this->htmlDefinition->info[$node->name]; - if (isset($def->excludes[$name])) { - return false; + if (!empty($this->currentNesting)) { + for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { + $node = $this->currentNesting[$i]; + $def = $this->htmlDefinition->info[$node->name]; + if (isset($def->excludes[$name])) { + return false; + } } } return true; @@ -7286,12 +7503,14 @@ class HTMLPurifier_Length protected $isValid; /** - * Array Lookup array of units recognized by CSS 2.1 + * Array Lookup array of units recognized by CSS 3 * @type array */ protected static $allowedUnits = array( 'em' => true, 'ex' => true, 'px' => true, 'in' => true, - 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true, + 'ch' => true, 'rem' => true, 'vw' => true, 'vh' => true, + 'vmin' => true, 'vmax' => true ); /** @@ -7336,7 +7555,7 @@ class HTMLPurifier_Length if ($this->n === '0' && $this->unit === false) { return true; } - if (!ctype_lower($this->unit)) { + if ($this->unit === false || !ctype_lower($this->unit)) { $this->unit = strtolower($this->unit); } if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) { @@ -7517,7 +7736,7 @@ class HTMLPurifier_Lexer break; } - if (class_exists('DOMDocument') && + if (class_exists('DOMDocument', false) && method_exists('DOMDocument', 'loadHTML') && !extension_loaded('domxml') ) { @@ -7590,21 +7809,24 @@ class HTMLPurifier_Lexer ''' => "'" ); + public function parseText($string, $config) { + return $this->parseData($string, false, $config); + } + + public function parseAttr($string, $config) { + return $this->parseData($string, true, $config); + } + /** * Parses special entities into the proper characters. * * This string will translate escaped versions of the special characters * into the correct ones. * - * @warning - * You should be able to treat the output of this function as - * completely parsed, but that's only because all other entities should - * have been handled previously in substituteNonSpecialEntities() - * * @param string $string String character data to be parsed. * @return string Parsed character data. */ - public function parseData($string) + public function parseData($string, $is_attr, $config) { // following functions require at least one character if ($string === '') { @@ -7630,7 +7852,15 @@ class HTMLPurifier_Lexer } // hmm... now we have some uncommon entities. Use the callback. - $string = $this->_entity_parser->substituteSpecialEntities($string); + if ($config->get('Core.LegacyEntityDecoder')) { + $string = $this->_entity_parser->substituteSpecialEntities($string); + } else { + if ($is_attr) { + $string = $this->_entity_parser->substituteAttrEntities($string); + } else { + $string = $this->_entity_parser->substituteTextEntities($string); + } + } return $string; } @@ -7716,8 +7946,8 @@ class HTMLPurifier_Lexer { // normalize newlines to \n if ($config->get('Core.NormalizeNewlines')) { - $html = str_replace("\r\n", "\n", $html); - $html = str_replace("\r", "\n", $html); + $html = str_replace("\r\n", "\n", (string)$html); + $html = str_replace("\r", "\n", (string)$html); } if ($config->get('HTML.Trusted')) { @@ -7744,7 +7974,9 @@ class HTMLPurifier_Lexer } // expand entities that aren't the big five - $html = $this->_entity_parser->substituteNonSpecialEntities($html); + if ($config->get('Core.LegacyEntityDecoder')) { + $html = $this->_entity_parser->substituteNonSpecialEntities($html); + } // clean into wellformed UTF-8 string for an SGML context: this has // to be done after entity expansion because the entities sometimes @@ -7756,6 +7988,13 @@ class HTMLPurifier_Lexer $html = preg_replace('#<\?.+?\?>#s', '', $html); } + $hidden_elements = $config->get('Core.HiddenElements'); + if ($config->get('Core.AggressivelyRemoveScript') && + !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents') + || empty($hidden_elements["script"]))) { + $html = preg_replace('#]*>.*?#i', '', $html); + } + return $html; } @@ -8097,7 +8336,10 @@ class HTMLPurifier_PropertyListIterator extends FilterIterator /** * @return bool + * + * {@inheritdoc} */ + #[\ReturnTypeWillChange] public function accept() { $key = $this->getInnerIterator()->key(); @@ -8215,7 +8457,10 @@ class HTMLPurifier_StringHash extends ArrayObject * Retrieves a value, and logs the access. * @param mixed $index * @return mixed + * + * {@inheritdoc} */ + #[\ReturnTypeWillChange] public function offsetGet($index) { $this->accessed[$index] = true; @@ -8445,7 +8690,7 @@ abstract class HTMLPurifier_Token public $armor = array(); /** - * Used during MakeWellFormed. + * Used during MakeWellFormed. See Note [Injector skips] * @type */ public $skip; @@ -8724,11 +8969,13 @@ class HTMLPurifier_URI $def = $config->getDefinition('URI'); $scheme_obj = $def->getDefaultScheme($config, $context); if (!$scheme_obj) { - // something funky happened to the default scheme object - trigger_error( - 'Default scheme object "' . $def->defaultScheme . '" was not readable', - E_USER_WARNING - ); + if ($def->defaultScheme !== null) { + // something funky happened to the default scheme object + trigger_error( + 'Default scheme object "' . $def->defaultScheme . '" was not readable', + E_USER_WARNING + ); + } // suppress error if it's null return false; } } @@ -9714,34 +9961,34 @@ class HTMLPurifier_UnitConverter class HTMLPurifier_VarParser { - const STRING = 1; + const C_STRING = 1; const ISTRING = 2; const TEXT = 3; const ITEXT = 4; - const INT = 5; - const FLOAT = 6; - const BOOL = 7; + const C_INT = 5; + const C_FLOAT = 6; + const C_BOOL = 7; const LOOKUP = 8; const ALIST = 9; const HASH = 10; - const MIXED = 11; + const C_MIXED = 11; /** * Lookup table of allowed types. Mainly for backwards compatibility, but * also convenient for transforming string type names to the integer constants. */ public static $types = array( - 'string' => self::STRING, + 'string' => self::C_STRING, 'istring' => self::ISTRING, 'text' => self::TEXT, 'itext' => self::ITEXT, - 'int' => self::INT, - 'float' => self::FLOAT, - 'bool' => self::BOOL, + 'int' => self::C_INT, + 'float' => self::C_FLOAT, + 'bool' => self::C_BOOL, 'lookup' => self::LOOKUP, 'list' => self::ALIST, 'hash' => self::HASH, - 'mixed' => self::MIXED + 'mixed' => self::C_MIXED ); /** @@ -9749,7 +9996,7 @@ class HTMLPurifier_VarParser * allowed value lists. */ public static $stringTypes = array( - self::STRING => true, + self::C_STRING => true, self::ISTRING => true, self::TEXT => true, self::ITEXT => true, @@ -9781,7 +10028,7 @@ class HTMLPurifier_VarParser // These are basic checks, to make sure nothing horribly wrong // happened in our implementations. switch ($type) { - case (self::STRING): + case (self::C_STRING): case (self::ISTRING): case (self::TEXT): case (self::ITEXT): @@ -9792,17 +10039,17 @@ class HTMLPurifier_VarParser $var = strtolower($var); } return $var; - case (self::INT): + case (self::C_INT): if (!is_int($var)) { break; } return $var; - case (self::FLOAT): + case (self::C_FLOAT): if (!is_float($var)) { break; } return $var; - case (self::BOOL): + case (self::C_BOOL): if (!is_bool($var)) { break; } @@ -9826,7 +10073,7 @@ class HTMLPurifier_VarParser } } return $var; - case (self::MIXED): + case (self::C_MIXED): return $var; default: $this->errorInconsistent(get_class($this), $type); @@ -10103,13 +10350,38 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef $definition = $config->getCSSDefinition(); $allow_duplicates = $config->get("CSS.AllowDuplicates"); - // we're going to break the spec and explode by semicolons. - // This is because semicolon rarely appears in escaped form - // Doing this is generally flaky but fast - // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI - // for details - $declarations = explode(';', $css); + // According to the CSS2.1 spec, the places where a + // non-delimiting semicolon can appear are in strings + // escape sequences. So here is some dumb hack to + // handle quotes. + $len = strlen($css); + $accum = ""; + $declarations = array(); + $quoted = false; + for ($i = 0; $i < $len; $i++) { + $c = strcspn($css, ";'\"", $i); + $accum .= substr($css, $i, $c); + $i += $c; + if ($i == $len) break; + $d = $css[$i]; + if ($quoted) { + $accum .= $d; + if ($d == $quoted) { + $quoted = false; + } + } else { + if ($d == ";") { + $declarations[] = $accum; + $accum = ""; + } else { + $accum .= $d; + $quoted = $d; + } + } + } + if ($accum != "") $declarations[] = $accum; + $propvalues = array(); $new_declarations = ''; @@ -10743,7 +11015,13 @@ class HTMLPurifier_AttrDef_CSS_Number extends HTMLPurifier_AttrDef return false; } - $left = ltrim($left, '0'); + // Remove leading zeros until positive number or a zero stays left + if (ltrim($left, '0') != '') { + $left = ltrim($left, '0'); + } else { + $left = '0'; + } + $right = rtrim($right, '0'); if ($right === '') { @@ -10819,6 +11097,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef $this->info['background-repeat'] = $def->info['background-repeat']; $this->info['background-attachment'] = $def->info['background-attachment']; $this->info['background-position'] = $def->info['background-position']; + $this->info['background-size'] = $def->info['background-size']; } /** @@ -10847,6 +11126,7 @@ class HTMLPurifier_AttrDef_CSS_Background extends HTMLPurifier_AttrDef $caught['repeat'] = false; $caught['attachment'] = false; $caught['position'] = false; + $caught['size'] = false; $i = 0; // number of catches @@ -11127,6 +11407,16 @@ class HTMLPurifier_AttrDef_CSS_Border extends HTMLPurifier_AttrDef class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef { + /** + * @type HTMLPurifier_AttrDef_CSS_AlphaValue + */ + protected $alpha; + + public function __construct() + { + $this->alpha = new HTMLPurifier_AttrDef_CSS_AlphaValue(); + } + /** * @param string $color * @param HTMLPurifier_Config $config @@ -11150,59 +11440,104 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef return $colors[$lower]; } - if (strpos($color, 'rgb(') !== false) { - // rgb literal handling + if (preg_match('#(rgb|rgba|hsl|hsla)\(#', $color, $matches) === 1) { $length = strlen($color); if (strpos($color, ')') !== $length - 1) { return false; } - $triad = substr($color, 4, $length - 4 - 1); - $parts = explode(',', $triad); - if (count($parts) !== 3) { + + // get used function : rgb, rgba, hsl or hsla + $function = $matches[1]; + + $parameters_size = 3; + $alpha_channel = false; + if (substr($function, -1) === 'a') { + $parameters_size = 4; + $alpha_channel = true; + } + + /* + * Allowed types for values : + * parameter_position => [type => max_value] + */ + $allowed_types = array( + 1 => array('percentage' => 100, 'integer' => 255), + 2 => array('percentage' => 100, 'integer' => 255), + 3 => array('percentage' => 100, 'integer' => 255), + ); + $allow_different_types = false; + + if (strpos($function, 'hsl') !== false) { + $allowed_types = array( + 1 => array('integer' => 360), + 2 => array('percentage' => 100), + 3 => array('percentage' => 100), + ); + $allow_different_types = true; + } + + $values = trim(str_replace($function, '', $color), ' ()'); + + $parts = explode(',', $values); + if (count($parts) !== $parameters_size) { return false; } - $type = false; // to ensure that they're all the same type + + $type = false; $new_parts = array(); + $i = 0; + foreach ($parts as $part) { + $i++; $part = trim($part); + if ($part === '') { return false; } - $length = strlen($part); - if ($part[$length - 1] === '%') { - // handle percents - if (!$type) { - $type = 'percentage'; - } elseif ($type !== 'percentage') { + + // different check for alpha channel + if ($alpha_channel === true && $i === count($parts)) { + $result = $this->alpha->validate($part, $config, $context); + + if ($result === false) { return false; } - $num = (float)substr($part, 0, $length - 1); - if ($num < 0) { - $num = 0; - } - if ($num > 100) { - $num = 100; - } - $new_parts[] = "$num%"; + + $new_parts[] = (string)$result; + continue; + } + + if (substr($part, -1) === '%') { + $current_type = 'percentage'; } else { - // handle integers - if (!$type) { - $type = 'integer'; - } elseif ($type !== 'integer') { - return false; - } - $num = (int)$part; - if ($num < 0) { - $num = 0; - } - if ($num > 255) { - $num = 255; - } - $new_parts[] = (string)$num; + $current_type = 'integer'; + } + + if (!array_key_exists($current_type, $allowed_types[$i])) { + return false; + } + + if (!$type) { + $type = $current_type; + } + + if ($allow_different_types === false && $type != $current_type) { + return false; + } + + $max_value = $allowed_types[$i][$current_type]; + + if ($current_type == 'integer') { + // Return value between range 0 -> $max_value + $new_parts[] = (int)max(min($part, $max_value), 0); + } elseif ($current_type == 'percentage') { + $new_parts[] = (float)max(min(rtrim($part, '%'), $max_value), 0) . '%'; } } - $new_triad = implode(',', $new_parts); - $color = "rgb($new_triad)"; + + $new_values = implode(',', $new_parts); + + $color = $function . '(' . $new_values . ')'; } else { // hexadecimal handling if ($color[0] === '#') { @@ -11221,6 +11556,7 @@ class HTMLPurifier_AttrDef_CSS_Color extends HTMLPurifier_AttrDef } return $color; } + } @@ -12336,7 +12672,7 @@ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef { /** - * @type bool + * @type string */ protected $name; @@ -12346,7 +12682,7 @@ class HTMLPurifier_AttrDef_HTML_Bool extends HTMLPurifier_AttrDef public $minimized = true; /** - * @param bool $name + * @param bool|string $name */ public function __construct($name = false) { @@ -13089,7 +13425,11 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // PHP 5.3 and later support this functionality natively if (function_exists('idn_to_ascii')) { - return idn_to_ascii($string); + if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) { + $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46); + } else { + $string = idn_to_ascii($string); + } // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, @@ -13115,13 +13455,14 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef } } $string = implode('.', $new_parts); - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; - } } catch (Exception $e) { // XXX error reporting } } + // Try again + if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { + return $string; + } return false; } } @@ -14105,6 +14446,44 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform +// must be called POST validation + +/** + * Adds rel="noopener" to any links which target a different window + * than the current one. This is used to prevent malicious websites + * from silently replacing the original window, which could be used + * to do phishing. + * This transform is controlled by %HTML.TargetNoopener. + */ +class HTMLPurifier_AttrTransform_TargetNoopener extends HTMLPurifier_AttrTransform +{ + /** + * @param array $attr + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function transform($attr, $config, $context) + { + if (isset($attr['rel'])) { + $rels = explode(' ', $attr['rel']); + } else { + $rels = array(); + } + if (isset($attr['target']) && !in_array('noopener', $rels)) { + $rels[] = 'noopener'; + } + if (!empty($rels) || isset($attr['rel'])) { + $attr['rel'] = implode(' ', $rels); + } + + return $attr; + } +} + + + + // must be called POST validation /** @@ -14284,7 +14663,7 @@ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef protected function _compileRegex() { $raw = str_replace(' ', '', $this->dtd_regex); - if ($raw{0} != '(') { + if ($raw[0] != '(') { $raw = "($raw)"; } $el = '[#a-zA-Z0-9_.-]+'; @@ -14431,7 +14810,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; - $current_li = false; + $current_li = null; foreach ($children as $node) { if (!empty($node->is_whitespace)) { @@ -14452,7 +14831,7 @@ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef // to handle non-list elements; non-list elements should // not be appended to an existing li; only li created // for non-list. This distinction is not currently made. - if ($current_li === false) { + if ($current_li === null) { $current_li = new HTMLPurifier_Node_Element('li'); $result[] = $current_li; } @@ -14914,7 +15293,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef } } - if (empty($content)) { + if (empty($content) && $thead === false && $tfoot === false) { return false; } @@ -14953,7 +15332,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef $current_tr_tbody->children[] = $node; break; case '#PCDATA': - assert($node->is_whitespace); + //assert($node->is_whitespace); if ($current_tr_tbody === null) { $ret[] = $node; } else { @@ -15277,6 +15656,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } unlink($dir . '/' . $filename); } + closedir($dh); return true; } @@ -15307,6 +15687,7 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac unlink($dir . '/' . $filename); } } + closedir($dh); return true; } @@ -15363,11 +15744,8 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac if ($result !== false) { // set permissions of the new file (no execute) $chmod = $config->get('Cache.SerializerPermissions'); - if ($chmod === null) { - // don't do anything - } else { - $chmod = $chmod & 0666; - chmod($file, $chmod); + if ($chmod !== null) { + chmod($file, $chmod & 0666); } } return $result; @@ -15382,6 +15760,16 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac { $directory = $this->generateDirectoryPath($config); $chmod = $config->get('Cache.SerializerPermissions'); + if ($chmod === null) { + if (!@mkdir($directory) && !is_dir($directory)) { + trigger_error( + 'Could not create directory ' . $directory . '', + E_USER_WARNING + ); + return false; + } + return true; + } if (!is_dir($directory)) { $base = $this->generateBaseDirectoryPath($config); if (!is_dir($base)) { @@ -15394,25 +15782,14 @@ class HTMLPurifier_DefinitionCache_Serializer extends HTMLPurifier_DefinitionCac } elseif (!$this->_testPermissions($base, $chmod)) { return false; } - if ($chmod === null) { + if (!@mkdir($directory, $chmod) && !is_dir($directory)) { trigger_error( - 'Base directory ' . $base . ' does not exist, - please create or change using %Cache.SerializerPath', + 'Could not create directory ' . $directory . '', E_USER_WARNING ); return false; } - if ($chmod !== null) { - mkdir($directory, $chmod); - } else { - mkdir($directory); - } if (!$this->_testPermissions($directory, $chmod)) { - trigger_error( - 'Base directory ' . $base . ' does not exist, - please create or change using %Cache.SerializerPath', - E_USER_WARNING - ); return false; } } elseif (!$this->_testPermissions($directory, $chmod)) { @@ -15805,6 +16182,10 @@ class HTMLPurifier_HTMLModule_Forms extends HTMLPurifier_HTMLModule */ public function setup($config) { + if ($config->get('HTML.Forms')) { + $this->safe = true; + } + $form = $this->addElement( 'form', 'Form', @@ -16735,13 +17116,13 @@ class HTMLPurifier_HTMLModule_SafeScripting extends HTMLPurifier_HTMLModule $script = $this->addElement( 'script', 'Inline', - 'Empty', + 'Optional:', // Not `Empty` to not allow to autoclose the