Skip to content

Commit 5194408

Browse files
Verbum Comments: optimize block filtering performance with Block_Scanner pre-filtering (#44996)
1 parent 233e85b commit 5194408

File tree

8 files changed

+350
-9
lines changed

8 files changed

+350
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Significance: patch
2+
Type: changed
3+
4+
Verbum Comments: optimize block filtering performance using Block_Scanner for fast pre-filtering when all blocks are allowed.

projects/packages/jetpack-mu-wpcom/composer.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"automattic/scheduled-updates": "@dev",
1818
"automattic/jetpack-compat": "@dev",
1919
"automattic/jetpack-google-analytics": "@dev",
20+
"automattic/block-delimiter": "@dev",
2021
"scssphp/scssphp": "1.13.0",
2122
"automattic/jetpack-subscribers-dashboard": "@dev"
2223
},

projects/packages/jetpack-mu-wpcom/src/features/verbum-comments/assets/class-verbum-block-utils.php

Lines changed: 62 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@
1010
*/
1111
class Verbum_Block_Utils {
1212
/**
13-
* Remove blocks that aren't allowed
13+
* Remove blocks that aren't allowed using hybrid Block_Scanner optimization
14+
*
15+
* Uses Block_Scanner for fast pre-filtering when possible, falling back to
16+
* parse_blocks approach only when disallowed blocks are detected.
1417
*
1518
* @param string $content - Text of the comment.
1619
* @return string
@@ -21,12 +24,57 @@ public static function remove_blocks( $content ) {
2124
}
2225

2326
// The block attributes come slashed and `parse_blocks` won't be able to parse them.
24-
$content = wp_unslash( $content );
25-
$blocks = parse_blocks( $content );
27+
$unslashed_content = wp_unslash( $content );
2628

27-
$filtered_blocks = self::filter_blocks_recursive( $blocks );
29+
if ( ! self::has_disallowed_blocks( $unslashed_content ) ) {
30+
return $unslashed_content;
31+
}
32+
33+
return self::remove_blocks_with_parse_blocks( $unslashed_content );
34+
}
35+
36+
/**
37+
* Quick verification using Block_Scanner to detect if content contains disallowed blocks
38+
*
39+
* This method provides significant performance benefits by avoiding expensive
40+
* parse_blocks() processing when all blocks are allowed (the common case).
41+
*
42+
* @param string $content Unslashed content to scan.
43+
* @return bool True if disallowed blocks found, false if all blocks are allowed.
44+
*/
45+
private static function has_disallowed_blocks( $content ) {
46+
if ( ! class_exists( '\\Automattic\\Block_Scanner' ) ) {
47+
return true;
48+
}
49+
50+
try {
51+
$scanner = \Automattic\Block_Scanner::create( $content );
52+
$allowed_blocks = self::get_allowed_blocks();
53+
54+
while ( $scanner->next_delimiter() ) {
55+
if ( $scanner->opens_block() ) {
56+
$block_type = $scanner->get_block_type();
57+
if ( ! in_array( $block_type, $allowed_blocks, true ) ) {
58+
return true; // Found disallowed block
59+
}
60+
}
61+
}
2862

29-
// Convert the filtered blocks back to string
63+
return false; // All blocks are allowed
64+
} catch ( \Exception $e ) {
65+
return true;
66+
}
67+
}
68+
69+
/**
70+
* Remove disallowed blocks using parse_blocks
71+
*
72+
* @param string $unslashed_content Content with blocks (already unslashed).
73+
* @return string Filtered content with disallowed blocks removed.
74+
*/
75+
private static function remove_blocks_with_parse_blocks( $unslashed_content ) {
76+
$blocks = parse_blocks( $unslashed_content );
77+
$filtered_blocks = self::filter_blocks_recursive( $blocks );
3078
return serialize_blocks( $filtered_blocks );
3179
}
3280

@@ -62,7 +110,8 @@ private static function filter_blocks_recursive( $blocks ) {
62110
$block['innerContent'] = $inner_content;
63111
}
64112

65-
$block['innerHTML'] ??= '';
113+
$block['innerHTML'] = isset( $block['innerHTML'] ) && is_string( $block['innerHTML'] ) ? $block['innerHTML'] : '';
114+
66115
if ( empty( $block['innerContent'] ) ) {
67116
$block['innerContent'] = array( $block['innerHTML'] );
68117
}
@@ -127,6 +176,12 @@ public static function render_verbum_blocks( $comment_content ) {
127176
public static function get_allowed_blocks() {
128177
global $allowedtags;
129178

179+
// Validate $allowedtags integrity - use local variable to avoid override warning
180+
$validated_allowedtags = $allowedtags;
181+
if ( ! is_array( $validated_allowedtags ) ) {
182+
$validated_allowedtags = wp_kses_allowed_html( 'post' );
183+
}
184+
130185
$allowed_blocks = array( 'core/paragraph', 'core/list', 'core/code', 'core/list-item', 'core/quote', 'core/image', 'core/embed' );
131186
$convert = array(
132187
'blockquote' => 'core/quote',
@@ -139,7 +194,7 @@ public static function get_allowed_blocks() {
139194
'pre' => 'core/code',
140195
);
141196

142-
foreach ( array_keys( $allowedtags ) as $tag ) {
197+
foreach ( array_keys( $validated_allowedtags ) as $tag ) {
143198
if ( isset( $convert[ $tag ] ) ) {
144199
$allowed_blocks[] = $convert[ $tag ];
145200
}

projects/packages/jetpack-mu-wpcom/tests/php/features/verbum-comments/Verbum_Block_Utils_Test.php

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,147 @@ public function test_pre_comment_content_inner_blocks() {
101101
$expected_content = preg_replace( '/\s+/', ' ', $expected_content );
102102
$this->assertSame( $expected_content, $filtered_content );
103103
}
104+
105+
/**
106+
* Test security: malformed block delimiters should be handled safely
107+
*/
108+
public function test_security_malformed_block_delimiters() {
109+
$malformed_inputs = array(
110+
'<!-- wp:paragraph', // Incomplete opening
111+
'<!-- wp:paragraph <!-- wp:nested --> -->', // Nested comments
112+
'<!-- wp:paragraph {"unclosed": "string -->', // Invalid JSON
113+
"<!-- wp:paragraph -->\x00<!-- /wp:paragraph -->", // Null bytes
114+
'<!-- wp:paragraph -->content<!-- /wp:invalid -->', // Mismatched closing
115+
'<!--wp:paragraph -->', // No space after comment start
116+
'<!-- w:paragraph -->', // Incomplete wp prefix
117+
);
118+
119+
foreach ( $malformed_inputs as $input ) {
120+
$result = Verbum_Block_Utils::remove_blocks( $input );
121+
// Should not cause errors and should return safe content
122+
$this->assertIsString( $result );
123+
124+
// For truly malformed inputs that don't have proper block structure,
125+
// they should be returned as-is since has_blocks() would return false
126+
// or they would be treated as plain text content
127+
if ( ! has_blocks( $input ) ) {
128+
$this->assertEquals( $input, $result );
129+
} else {
130+
// If it's detected as having blocks but they're malformed,
131+
// the parser should handle them gracefully
132+
$this->assertIsString( $result );
133+
}
134+
}
135+
}
136+
137+
/**
138+
* Test security: Unicode and encoding edge cases
139+
*/
140+
public function test_security_unicode_handling() {
141+
$unicode_inputs = array(
142+
'<!-- wp:paragraph -->🔥💯<!-- /wp:paragraph -->', // Emoji
143+
'<!-- wp:paragraph -->مرحبا<!-- /wp:paragraph -->', // RTL text
144+
'<!-- wp:paragraph -->' . chr( 0xC2 ) . chr( 0x85 ) . '<!-- /wp:paragraph -->', // UTF-8 sequences
145+
'<!-- wp:paragraph -->café<!-- /wp:paragraph -->', // Accented characters
146+
);
147+
148+
foreach ( $unicode_inputs as $input ) {
149+
$result = Verbum_Block_Utils::remove_blocks( $input );
150+
// Should handle unicode properly without corruption
151+
$this->assertIsString( $result );
152+
// Allowed blocks should be preserved with unicode content
153+
$this->assertStringContainsString( 'wp:paragraph', $result );
154+
}
155+
}
156+
157+
/**
158+
* Test security: Block type spoofing attempts
159+
*/
160+
public function test_security_block_type_spoofing() {
161+
$spoofing_attempts = array(
162+
'<!-- wp:core/paragraph -->content<!-- /wp:core/paragraph -->', // Namespace confusion
163+
'<!-- wp:paragraph/evil -->content<!-- /wp:paragraph/evil -->', // Fake subtype
164+
'<!-- wp: paragraph -->content<!-- /wp: paragraph -->', // Extra space
165+
'<!-- wp:PARAGRAPH -->content<!-- /wp:PARAGRAPH -->', // Case variation
166+
);
167+
168+
foreach ( $spoofing_attempts as $input ) {
169+
$result = Verbum_Block_Utils::remove_blocks( $input );
170+
// Should properly identify and handle spoofing attempts
171+
$this->assertIsString( $result );
172+
}
173+
}
174+
175+
/**
176+
* Test security: Input validation consistency between fast and slow paths
177+
*/
178+
public function test_security_input_validation_consistency() {
179+
$test_inputs = array(
180+
'<!-- wp:paragraph -->Allowed content<!-- /wp:paragraph -->',
181+
'<!-- wp:paragraph -->Allowed<!-- /wp:paragraph --><!-- wp:latest-posts -->',
182+
'<!-- wp:quote --><!-- wp:paragraph -->Nested allowed<!-- /wp:paragraph --><!-- /wp:quote -->',
183+
'<!-- wp:quote --><!-- wp:button -->Nested disallowed<!-- /wp:button --><!-- /wp:quote -->',
184+
);
185+
186+
foreach ( $test_inputs as $input ) {
187+
$slashed_input = wp_slash( $input );
188+
$result = Verbum_Block_Utils::remove_blocks( $slashed_input );
189+
190+
// Result should be unslashed content (consistent processing)
191+
$this->assertIsString( $result );
192+
193+
// Should not contain slashes that would indicate inconsistent processing
194+
// This tests the fix for the critical input validation inconsistency
195+
if ( strpos( $result, 'wp:' ) !== false ) {
196+
$this->assertStringNotContainsString( '\\"', $result, 'Result should not contain escaped quotes from slashing' );
197+
}
198+
}
199+
}
200+
201+
/**
202+
* Test security: Resource exhaustion scenarios
203+
*/
204+
public function test_security_resource_limits() {
205+
// Test deeply nested blocks
206+
$deeply_nested = str_repeat( '<!-- wp:quote -->', 50 ) . 'content' . str_repeat( '<!-- /wp:quote -->', 50 );
207+
$result = Verbum_Block_Utils::remove_blocks( $deeply_nested );
208+
$this->assertIsString( $result );
209+
210+
// Test very long content
211+
$long_content = '<!-- wp:paragraph -->' . str_repeat( 'a', 10000 ) . '<!-- /wp:paragraph -->';
212+
$result = Verbum_Block_Utils::remove_blocks( $long_content );
213+
$this->assertIsString( $result );
214+
215+
// Test many blocks
216+
$many_blocks = str_repeat( '<!-- wp:paragraph -->content<!-- /wp:paragraph -->', 100 );
217+
$result = Verbum_Block_Utils::remove_blocks( $many_blocks );
218+
$this->assertIsString( $result );
219+
}
220+
221+
/**
222+
* Test that Block_Scanner and parse_blocks produce consistent security results
223+
*/
224+
public function test_consistency_between_scanner_and_parse_blocks() {
225+
$test_cases = array(
226+
'<!-- wp:paragraph -->Safe content<!-- /wp:paragraph -->',
227+
'<!-- wp:paragraph -->Safe<!-- /wp:paragraph --><!-- wp:latest-posts -->Unsafe<!-- /wp:latest-posts -->',
228+
'<!-- wp:quote --><!-- wp:paragraph -->Nested safe<!-- /wp:paragraph --><!-- /wp:quote -->',
229+
'<!-- wp:quote --><!-- wp:button -->Nested unsafe<!-- /wp:button --><!-- /wp:quote -->',
230+
);
231+
232+
foreach ( $test_cases as $test_case ) {
233+
$result = Verbum_Block_Utils::remove_blocks( $test_case );
234+
235+
// Verify that allowed blocks are preserved
236+
if ( strpos( $test_case, 'wp:paragraph' ) !== false || strpos( $test_case, 'wp:quote' ) !== false ) {
237+
$this->assertStringContainsString( 'wp:', $result, 'Allowed blocks should be preserved' );
238+
}
239+
240+
// Verify that disallowed blocks are removed
241+
if ( strpos( $test_case, 'wp:latest-posts' ) !== false || strpos( $test_case, 'wp:button' ) !== false ) {
242+
$this->assertStringNotContainsString( 'latest-posts', $result, 'Disallowed blocks should be removed' );
243+
$this->assertStringNotContainsString( 'button', $result, 'Disallowed blocks should be removed' );
244+
}
245+
}
246+
}
104247
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Significance: patch
2+
Type: changed
3+
Comment: Verbum Comments: optimize block filtering performance with Block_Scanner pre-filtering
4+
5+

projects/plugins/mu-wpcom-plugin/composer.lock

Lines changed: 65 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Significance: patch
2+
Type: added
3+
Comment: Add block-delimiter package to jetpack-mu-wpcom package
4+
5+

0 commit comments

Comments
 (0)