From 7798c2771142fd67dd37a45106fe2b6cdfb150d5 Mon Sep 17 00:00:00 2001 From: Felipe Nunes Azambuja Date: Wed, 8 Apr 2020 21:25:25 -0300 Subject: [PATCH] update to php 7.3 regex update --- .../simplehtmldom_1_5/simple_html_dom.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php index 279d589..6fa1363 100644 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php @@ -73,7 +73,7 @@ // ----------------------------------------------------------------------------- // get html dom from file // $maxlen is defined in the code as PHP_STREAM_COPY_ALL which is defined as -1. -function file_get_html($url, $use_include_path = false, $context=null, $offset=0, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) +function file_get_html($url, $use_include_path = false, $context=null, $offset = -1, $maxLen=-1, $lowercase = true, $forceTagsClosed=true, $target_charset = DEFAULT_TARGET_CHARSET, $stripRN=true, $defaultBRText=DEFAULT_BR_TEXT, $defaultSpanText=DEFAULT_SPAN_TEXT) { // We DO force the tags to be terminated. $dom = new simple_html_dom(null, $lowercase, $forceTagsClosed, $target_charset, $stripRN, $defaultBRText, $defaultSpanText); @@ -693,7 +693,7 @@ protected function parse_selector($selector_string) { // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. // farther study is required to determine of this should be documented or removed. // $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; - $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; + $pattern = "/([\w\-:\*]*)(?:\#([\w\-]+)|\.([\w\-]+))?(?:\[@?(!?[\w\-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); if (is_object($debugObject)) {$debugObject->debugLog(2, "Matches Array: ", $matches);} @@ -899,7 +899,7 @@ function get_display_size() { // Thanks to user gnarf from stackoverflow for this regular expression. $attributes = array(); - preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); + preg_match_all("/([\w\-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); foreach ($matches as $match) { $attributes[$match[1]] = $match[2]; } @@ -952,8 +952,8 @@ function get_display_size() } // camel naming conventions - function getAllAttributes() {return array_map('html_entity_decode', $this->attr);} - function getAttribute($name) {return html_entity_decode($this->__get($name));} + function getAllAttributes() {return $this->attr;} + function getAttribute($name) {return $this->__get($name);} function setAttribute($name, $value) {$this->__set($name, $value);} function hasAttribute($name) {return $this->__isset($name);} function removeAttribute($name) {$this->__set($name, null);} @@ -1245,7 +1245,7 @@ protected function parse_charset() if (empty($charset)) { // Have php try to detect the encoding from the text given to us. - $charset = (function_exists('mb_detect_encoding')) ? mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ) : false; + $charset = mb_detect_encoding($this->root->plaintext . "ascii", $encoding_list = array( "UTF-8", "CP1252" ) ); if (is_object($debugObject)) {$debugObject->debugLog(2, 'mb_detect found: ' . $charset);} // and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... @@ -1375,7 +1375,7 @@ protected function read_tag() return true; } - if (!preg_match("/^[\w-:]+$/", $tag)) { + if (!preg_match("/^[\w\-:]+$/", $tag)) { $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); if ($this->char==='<') { $this->link_nodes($node, false);