diff --git a/src/main/java/org/htmlunit/cssparser/parser/AbstractCSSParser.java b/src/main/java/org/htmlunit/cssparser/parser/AbstractCSSParser.java index b152df0..73cd814 100644 --- a/src/main/java/org/htmlunit/cssparser/parser/AbstractCSSParser.java +++ b/src/main/java/org/htmlunit/cssparser/parser/AbstractCSSParser.java @@ -35,6 +35,20 @@ * @author Ronald Brill */ public abstract class AbstractCSSParser { + /** + * Strategy for error recovery in the parser. + */ + public enum ErrorRecoveryStrategy { + /** Skip tokens until semicolon or EOF */ + SKIP_TO_SEMICOLON, + /** Skip tokens with brace depth tracking */ + SKIP_TO_BRACE, + /** Skip to next rule boundary */ + SKIP_TO_NEXT_RULE, + /** Skip balanced braces/parentheses */ + SKIP_BALANCED_BLOCK + } + private DocumentHandler documentHandler_; private CSSErrorHandler errorHandler_; private InputSource source_; diff --git a/src/main/javacc/CSS3Parser.jj b/src/main/javacc/CSS3Parser.jj index aed93c0..49fa95a 100644 --- a/src/main/javacc/CSS3Parser.jj +++ b/src/main/javacc/CSS3Parser.jj @@ -36,6 +36,7 @@ import java.util.Locale; import org.htmlunit.cssparser.dom.CSSValueImpl; import org.htmlunit.cssparser.dom.Property; import org.htmlunit.cssparser.parser.AbstractCSSParser; +import org.htmlunit.cssparser.parser.AbstractCSSParser.ErrorRecoveryStrategy; import org.htmlunit.cssparser.parser.CSSParseException; import org.htmlunit.cssparser.parser.LexicalUnit; import org.htmlunit.cssparser.parser.LexicalUnitImpl; @@ -526,15 +527,7 @@ void styleSheetRuleList() : return; } - CSSParseException cpe = toCSSParseException("invalidRule", e); - getErrorHandler().error(cpe); - getErrorHandler().warning(createSkipWarning("ignoringRule", cpe)); - while (t.kind != RBRACE && t.kind != EOF ) { - t = getNextToken(); - } - if (t.kind == EOF) { - return; - } + handleParseError("invalidRule", e, ErrorRecoveryStrategy.SKIP_TO_NEXT_RULE); } } ) @@ -3339,6 +3332,98 @@ void error_skipAtRule() while (t.kind != SEMICOLON && t.kind != EOF); } +/** + * Skip tokens until a semicolon or EOF is encountered. + * This is useful for recovering from errors in declarations and simple statements. + */ +JAVACODE +void error_skipToSemicolon() +{ + Token t; + do { + t = getNextToken(); + } + while (t.kind != SEMICOLON && t.kind != EOF); +} + +/** + * Skip tokens until a right brace is encountered, accounting for brace nesting. + * This is useful for recovering from errors in rule blocks. + */ +JAVACODE +void error_skipToRightBrace() +{ + Token t; + int nesting = 0; + do { + t = getNextToken(); + if (t.kind == LBRACE) { + nesting++; + } + else if (t.kind == RBRACE) { + nesting--; + } + } + while (t.kind != EOF && (t.kind != RBRACE || nesting > 0)); +} + +/** + * Skip tokens to the next rule boundary. + * This is useful for recovering from errors at the rule level. + */ +JAVACODE +void error_skipToNextRule() +{ + Token t; + int nesting = 0; + do { + t = getNextToken(); + if (t.kind == LBRACE) { + nesting++; + } + else if (t.kind == RBRACE) { + nesting--; + if (nesting <= 0) { + // Found the end of the current rule + break; + } + } + } + while (t.kind != EOF); +} + +/** + * Centralized error handler that reports the error and applies a recovery strategy. + * + * @param messageKey the message key for the error + * @param e the parse exception + * @param strategy the error recovery strategy to apply + */ +JAVACODE +void handleParseError(String messageKey, ParseException e, ErrorRecoveryStrategy strategy) +{ + CSSParseException cpe = toCSSParseException(messageKey, e); + getErrorHandler().error(cpe); + getErrorHandler().warning(createSkipWarning("ignoringRule", cpe)); + + if (strategy != null) { + switch (strategy) { + case SKIP_TO_SEMICOLON: + error_skipToSemicolon(); + break; + case SKIP_TO_BRACE: + error_skipToRightBrace(); + break; + case SKIP_TO_NEXT_RULE: + error_skipToNextRule(); + break; + case SKIP_BALANCED_BLOCK: + error_skipblock(null, null); + break; + } + } +} + JAVACODE Boolean handleCaseInSensitive(Token t) { diff --git a/src/test/java/org/htmlunit/cssparser/parser/ErrorRecoveryTest.java b/src/test/java/org/htmlunit/cssparser/parser/ErrorRecoveryTest.java new file mode 100644 index 0000000..8d8998c --- /dev/null +++ b/src/test/java/org/htmlunit/cssparser/parser/ErrorRecoveryTest.java @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2019-2024 Ronald Brill. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.htmlunit.cssparser.parser; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.htmlunit.cssparser.dom.CSSRuleListImpl; +import org.htmlunit.cssparser.dom.CSSStyleSheetImpl; +import org.junit.jupiter.api.Test; + +/** + * Tests for error recovery methods in the CSS parser. + * These tests verify that the parser can recover from various malformed CSS + * and continue parsing subsequent valid rules. + * + * @author Ronald Brill + */ +public class ErrorRecoveryTest extends AbstractCSSParserTest { + + /** + * Test recovery from error at the start of the stylesheet. + * The parser should report the error and attempt recovery. + */ + @Test + public void errorAtStartWithRecovery() throws Exception { + final String css = "// invalid comment \n" + + "h1 { color: red; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // The invalid '//' comment causes an error, and recovery skips to the next valid content + assertEquals(0, rules.getLength()); + } + + /** + * Test recovery from multiple errors. + * The parser should report each error and continue parsing. + */ + @Test + public void multipleErrorsWithRecovery() throws Exception { + final String css = "// error 1\n" + + "h1 { color: red; }\n" + + "// error 2\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 2, 0, 2); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Each '//' comment triggers error recovery, affecting subsequent parsing + assertEquals(0, rules.getLength()); + } + + /** + * Test recovery from unbalanced braces. + * The parser should skip to the next valid rule. + */ + @Test + public void unbalancedBracesRecovery() throws Exception { + final String css = "h1 { color: red;\n" + + "h2 { color: blue; }"; + // Parser parses this with an error for the unbalanced brace + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 0); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Parser creates a rule with the error + assertEquals(1, rules.getLength()); + } + + /** + * Test recovery from invalid property in declaration. + */ + @Test + public void invalidPropertyRecovery() throws Exception { + final String css = "h1 { @invalid; color: red; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should have the rule but skip the invalid property + assertEquals(1, rules.getLength()); + } + + /** + * Test recovery at EOF boundary. + * The parser should handle EOF gracefully without hanging. + */ + @Test + public void errorAtEOF() throws Exception { + final String css = "h1 { color: red; }\n" + + "// invalid at end"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should have the first rule + assertEquals(1, rules.getLength()); + } + + /** + * Test recovery from misplaced @charset rule. + */ + @Test + public void misplacedCharsetRecovery() throws Exception { + final String css = "h1 { color: red; }\n" + + "@charset \"UTF-8\";\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should have both h1 and h2 rules + assertEquals(2, rules.getLength()); + assertEquals("h1 { color: red; }", rules.getRules().get(0).getCssText()); + assertEquals("h2 { color: blue; }", rules.getRules().get(1).getCssText()); + } + + /** + * Test recovery from invalid at-rule. + */ + @Test + public void invalidAtRuleRecovery() throws Exception { + final String css = "@invalid { content: \"test\"; }\n" + + "h1 { color: red; }"; + final CSSStyleSheetImpl sheet = parse(css, 0, 0, 0); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should skip the invalid at-rule and parse h1 + assertEquals(2, rules.getLength()); + } + + /** + * Test that recovery doesn't consume too many tokens. + */ + @Test + public void recoveryDoesNotConsumeValidRules() throws Exception { + final String css = "// invalid\n" + + "p { color: green; }\n" + + "h1 { color: red; }\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // After error recovery, subsequent valid rules should be parsed + // In practice, this parser recovers and parses h1 and h2 + assertEquals(2, rules.getLength()); + } + + /** + * Test recovery from malformed selector. + */ + @Test + public void malformedSelectorRecovery() throws Exception { + final String css = "h1 & h2 { color: red; }\n" + + "h3 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should skip the invalid rule and parse h3 + assertEquals(1, rules.getLength()); + assertEquals("h3 { color: blue; }", rules.getRules().get(0).getCssText()); + } + + /** + * Test recovery from incomplete rule. + */ + @Test + public void incompleteRuleRecovery() throws Exception { + final String css = "h1 { color:\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Parser treats h2 as a value for the color property in h1 + assertEquals(1, rules.getLength()); + assertEquals("h1 { color: h2; }", rules.getRules().get(0).getCssText()); + } + + /** + * Test that the stylesheet object is created even with errors. + */ + @Test + public void stylesheetCreatedWithErrors() throws Exception { + final String css = "// invalid\n" + + "// more invalid"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + + // Should create stylesheet even though it has no valid rules + assertNotNull(sheet); + assertEquals(0, sheet.getCssRules().getLength()); + } + + /** + * Test recovery from error in media query. + */ + @Test + public void errorInMediaQuery() throws Exception { + final String css = "@media screen and invalid {\n" + + " h1 { color: red; }\n" + + "}\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 1, 0, 1); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Should skip the invalid media rule and parse h2 + assertEquals(1, rules.getLength()); + assertEquals("h2 { color: blue; }", rules.getRules().get(0).getCssText()); + } + + /** + * Test recovery handles balanced braces correctly. + */ + @Test + public void balancedBracesInError() throws Exception { + final String css = "h1 { content: \"{}\"; color: red; }\n" + + "h2 { color: blue; }"; + final CSSStyleSheetImpl sheet = parse(css, 0, 0, 0); + final CSSRuleListImpl rules = sheet.getCssRules(); + + // Both rules should be valid + assertEquals(2, rules.getLength()); + assertEquals("h1 { content: \"{}\"; color: red; }", rules.getRules().get(0).getCssText()); + assertEquals("h2 { color: blue; }", rules.getRules().get(1).getCssText()); + } +}