From 4d5f99c4f93985def30563c66d1bd6dd0a242b20 Mon Sep 17 00:00:00 2001 From: bmooreitul Date: Tue, 28 Mar 2023 15:03:56 -0700 Subject: [PATCH] Update regex patterns Fix legacy pattern matching to be compatible with PHP 7.4+. These changes still work with older versions of php (like 5.2+). PHP 7.4 requires escaping dashes (\-). The only changes are from \w- to \w\- This modernizes while maintaining legacy support. --- Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php index 279d589..075c746 100644 --- a/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php +++ b/Src/Sunra/PhpSimple/simplehtmldom_1_5/simple_html_dom.php @@ -692,8 +692,8 @@ protected function parse_selector($selector_string) { // Notice the \[ starting the attbute? and the @? following? This implies that an attribute can begin with an @ sign that is not captured. // This implies that an html attribute specifier may start with an @ sign that is NOT captured by the expression. // farther study is required to determine of this should be documented or removed. -// $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; - $pattern = "/([\w-:\*]*)(?:\#([\w-]+)|\.([\w-]+))?(?:\[@?(!?[\w-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; +// $pattern = "/([\w\-:\*]*)(?:\#([\w\-]+)|\.([\w\-]+))?(?:\[@?(!?[\w\-]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; + $pattern = "/([\w\-:\*]*)(?:\#([\w\-]+)|\.([\w\-]+))?(?:\[@?(!?[\w\-:]+)(?:([!*^$]?=)[\"']?(.*?)[\"']?)?\])?([\/, ]+)/is"; preg_match_all($pattern, trim($selector_string).' ', $matches, PREG_SET_ORDER); if (is_object($debugObject)) {$debugObject->debugLog(2, "Matches Array: ", $matches);} @@ -899,7 +899,7 @@ function get_display_size() { // Thanks to user gnarf from stackoverflow for this regular expression. $attributes = array(); - preg_match_all("/([\w-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); + preg_match_all("/([\w\-]+)\s*:\s*([^;]+)\s*;?/", $this->attr['style'], $matches, PREG_SET_ORDER); foreach ($matches as $match) { $attributes[$match[1]] = $match[2]; } @@ -1375,7 +1375,7 @@ protected function read_tag() return true; } - if (!preg_match("/^[\w-:]+$/", $tag)) { + if (!preg_match("/^[\w\-:]+$/", $tag)) { $node->_[HDOM_INFO_TEXT] = '<' . $tag . $this->copy_until('<>'); if ($this->char==='<') { $this->link_nodes($node, false);