Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: changed

Verbum Comments: optimize block filtering performance using Block_Scanner for fast pre-filtering when all blocks are allowed.
1 change: 1 addition & 0 deletions projects/packages/jetpack-mu-wpcom/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"automattic/scheduled-updates": "@dev",
"automattic/jetpack-compat": "@dev",
"automattic/jetpack-google-analytics": "@dev",
"automattic/block-delimiter": "@dev",
"scssphp/scssphp": "1.13.0",
"automattic/jetpack-subscribers-dashboard": "@dev"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
*/
class Verbum_Block_Utils {
/**
* Remove blocks that aren't allowed
* Remove blocks that aren't allowed using hybrid Block_Scanner optimization
*
* Uses Block_Scanner for fast pre-filtering when possible, falling back to
* parse_blocks approach only when disallowed blocks are detected.
*
* @param string $content - Text of the comment.
* @return string
Expand All @@ -21,12 +24,57 @@ public static function remove_blocks( $content ) {
}

// The block attributes come slashed and `parse_blocks` won't be able to parse them.
$content = wp_unslash( $content );
$blocks = parse_blocks( $content );
$unslashed_content = wp_unslash( $content );

$filtered_blocks = self::filter_blocks_recursive( $blocks );
if ( ! self::has_disallowed_blocks( $unslashed_content ) ) {
return $unslashed_content;
}

return self::remove_blocks_with_parse_blocks( $unslashed_content );
}

/**
* Quick verification using Block_Scanner to detect if content contains disallowed blocks
*
* This method provides significant performance benefits by avoiding expensive
* parse_blocks() processing when all blocks are allowed (the common case).
*
* @param string $content Unslashed content to scan.
* @return bool True if disallowed blocks found, false if all blocks are allowed.
*/
private static function has_disallowed_blocks( $content ) {
if ( ! class_exists( '\\Automattic\\Block_Scanner' ) ) {
return true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going back and forth about the function returning true (has disallowed blocks) but I think it makes sense. This way we default back to always using parse blocks if we can't use the Block_Scanner.

}

try {
$scanner = \Automattic\Block_Scanner::create( $content );
$allowed_blocks = self::get_allowed_blocks();

while ( $scanner->next_delimiter() ) {
if ( $scanner->opens_block() ) {
$block_type = $scanner->get_block_type();
if ( ! in_array( $block_type, $allowed_blocks, true ) ) {
return true; // Found disallowed block
}
}
}

// Convert the filtered blocks back to string
return false; // All blocks are allowed
} catch ( \Exception $e ) {
return true;
}
}

/**
* Remove disallowed blocks using parse_blocks
*
* @param string $unslashed_content Content with blocks (already unslashed).
* @return string Filtered content with disallowed blocks removed.
*/
private static function remove_blocks_with_parse_blocks( $unslashed_content ) {
$blocks = parse_blocks( $unslashed_content );
$filtered_blocks = self::filter_blocks_recursive( $blocks );
return serialize_blocks( $filtered_blocks );
}

Expand Down Expand Up @@ -62,7 +110,8 @@ private static function filter_blocks_recursive( $blocks ) {
$block['innerContent'] = $inner_content;
}

$block['innerHTML'] ??= '';
$block['innerHTML'] = isset( $block['innerHTML'] ) && is_string( $block['innerHTML'] ) ? $block['innerHTML'] : '';

if ( empty( $block['innerContent'] ) ) {
$block['innerContent'] = array( $block['innerHTML'] );
}
Expand Down Expand Up @@ -127,6 +176,12 @@ public static function render_verbum_blocks( $comment_content ) {
public static function get_allowed_blocks() {
global $allowedtags;

// Validate $allowedtags integrity - use local variable to avoid override warning
$validated_allowedtags = $allowedtags;
if ( ! is_array( $validated_allowedtags ) ) {
$validated_allowedtags = wp_kses_allowed_html( 'post' );
}
Comment on lines +179 to +183
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to just make sure that we're working with correct data, right? Just making sure 😅

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup! Correct, just an extra defensive measure.


$allowed_blocks = array( 'core/paragraph', 'core/list', 'core/code', 'core/list-item', 'core/quote', 'core/image', 'core/embed' );
$convert = array(
'blockquote' => 'core/quote',
Expand All @@ -139,7 +194,7 @@ public static function get_allowed_blocks() {
'pre' => 'core/code',
);

foreach ( array_keys( $allowedtags ) as $tag ) {
foreach ( array_keys( $validated_allowedtags ) as $tag ) {
if ( isset( $convert[ $tag ] ) ) {
$allowed_blocks[] = $convert[ $tag ];
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,147 @@ public function test_pre_comment_content_inner_blocks() {
$expected_content = preg_replace( '/\s+/', ' ', $expected_content );
$this->assertSame( $expected_content, $filtered_content );
}

/**
* Test security: malformed block delimiters should be handled safely
*/
public function test_security_malformed_block_delimiters() {
$malformed_inputs = array(
'<!-- wp:paragraph', // Incomplete opening
'<!-- wp:paragraph <!-- wp:nested --> -->', // Nested comments
'<!-- wp:paragraph {"unclosed": "string -->', // Invalid JSON
"<!-- wp:paragraph -->\x00<!-- /wp:paragraph -->", // Null bytes
'<!-- wp:paragraph -->content<!-- /wp:invalid -->', // Mismatched closing
'<!--wp:paragraph -->', // No space after comment start
'<!-- w:paragraph -->', // Incomplete wp prefix
);

foreach ( $malformed_inputs as $input ) {
$result = Verbum_Block_Utils::remove_blocks( $input );
// Should not cause errors and should return safe content
$this->assertIsString( $result );

// For truly malformed inputs that don't have proper block structure,
// they should be returned as-is since has_blocks() would return false
// or they would be treated as plain text content
if ( ! has_blocks( $input ) ) {
$this->assertEquals( $input, $result );
} else {
// If it's detected as having blocks but they're malformed,
// the parser should handle them gracefully
$this->assertIsString( $result );
}
}
}

/**
* Test security: Unicode and encoding edge cases
*/
public function test_security_unicode_handling() {
$unicode_inputs = array(
'<!-- wp:paragraph -->🔥💯<!-- /wp:paragraph -->', // Emoji
'<!-- wp:paragraph -->مرحبا<!-- /wp:paragraph -->', // RTL text
'<!-- wp:paragraph -->' . chr( 0xC2 ) . chr( 0x85 ) . '<!-- /wp:paragraph -->', // UTF-8 sequences
'<!-- wp:paragraph -->café<!-- /wp:paragraph -->', // Accented characters
);

foreach ( $unicode_inputs as $input ) {
$result = Verbum_Block_Utils::remove_blocks( $input );
// Should handle unicode properly without corruption
$this->assertIsString( $result );
// Allowed blocks should be preserved with unicode content
$this->assertStringContainsString( 'wp:paragraph', $result );
}
}

/**
* Test security: Block type spoofing attempts
*/
public function test_security_block_type_spoofing() {
$spoofing_attempts = array(
'<!-- wp:core/paragraph -->content<!-- /wp:core/paragraph -->', // Namespace confusion
'<!-- wp:paragraph/evil -->content<!-- /wp:paragraph/evil -->', // Fake subtype
'<!-- wp: paragraph -->content<!-- /wp: paragraph -->', // Extra space
'<!-- wp:PARAGRAPH -->content<!-- /wp:PARAGRAPH -->', // Case variation
);

foreach ( $spoofing_attempts as $input ) {
$result = Verbum_Block_Utils::remove_blocks( $input );
// Should properly identify and handle spoofing attempts
$this->assertIsString( $result );
}
}

/**
* Test security: Input validation consistency between fast and slow paths
*/
public function test_security_input_validation_consistency() {
$test_inputs = array(
'<!-- wp:paragraph -->Allowed content<!-- /wp:paragraph -->',
'<!-- wp:paragraph -->Allowed<!-- /wp:paragraph --><!-- wp:latest-posts -->',
'<!-- wp:quote --><!-- wp:paragraph -->Nested allowed<!-- /wp:paragraph --><!-- /wp:quote -->',
'<!-- wp:quote --><!-- wp:button -->Nested disallowed<!-- /wp:button --><!-- /wp:quote -->',
);

foreach ( $test_inputs as $input ) {
$slashed_input = wp_slash( $input );
$result = Verbum_Block_Utils::remove_blocks( $slashed_input );

// Result should be unslashed content (consistent processing)
$this->assertIsString( $result );

// Should not contain slashes that would indicate inconsistent processing
// This tests the fix for the critical input validation inconsistency
if ( strpos( $result, 'wp:' ) !== false ) {
$this->assertStringNotContainsString( '\\"', $result, 'Result should not contain escaped quotes from slashing' );
}
}
}

/**
* Test security: Resource exhaustion scenarios
*/
public function test_security_resource_limits() {
// Test deeply nested blocks
$deeply_nested = str_repeat( '<!-- wp:quote -->', 50 ) . 'content' . str_repeat( '<!-- /wp:quote -->', 50 );
$result = Verbum_Block_Utils::remove_blocks( $deeply_nested );
$this->assertIsString( $result );

// Test very long content
$long_content = '<!-- wp:paragraph -->' . str_repeat( 'a', 10000 ) . '<!-- /wp:paragraph -->';
$result = Verbum_Block_Utils::remove_blocks( $long_content );
$this->assertIsString( $result );

// Test many blocks
$many_blocks = str_repeat( '<!-- wp:paragraph -->content<!-- /wp:paragraph -->', 100 );
$result = Verbum_Block_Utils::remove_blocks( $many_blocks );
$this->assertIsString( $result );
}

/**
* Test that Block_Scanner and parse_blocks produce consistent security results
*/
public function test_consistency_between_scanner_and_parse_blocks() {
$test_cases = array(
'<!-- wp:paragraph -->Safe content<!-- /wp:paragraph -->',
'<!-- wp:paragraph -->Safe<!-- /wp:paragraph --><!-- wp:latest-posts -->Unsafe<!-- /wp:latest-posts -->',
'<!-- wp:quote --><!-- wp:paragraph -->Nested safe<!-- /wp:paragraph --><!-- /wp:quote -->',
'<!-- wp:quote --><!-- wp:button -->Nested unsafe<!-- /wp:button --><!-- /wp:quote -->',
);

foreach ( $test_cases as $test_case ) {
$result = Verbum_Block_Utils::remove_blocks( $test_case );

// Verify that allowed blocks are preserved
if ( strpos( $test_case, 'wp:paragraph' ) !== false || strpos( $test_case, 'wp:quote' ) !== false ) {
$this->assertStringContainsString( 'wp:', $result, 'Allowed blocks should be preserved' );
}

// Verify that disallowed blocks are removed
if ( strpos( $test_case, 'wp:latest-posts' ) !== false || strpos( $test_case, 'wp:button' ) !== false ) {
$this->assertStringNotContainsString( 'latest-posts', $result, 'Disallowed blocks should be removed' );
$this->assertStringNotContainsString( 'button', $result, 'Disallowed blocks should be removed' );
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Significance: patch
Type: changed
Comment: Verbum Comments: optimize block filtering performance with Block_Scanner pre-filtering


66 changes: 65 additions & 1 deletion projects/plugins/mu-wpcom-plugin/composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Significance: patch
Type: added
Comment: Add block-delimiter package to jetpack-mu-wpcom package


Loading