diff --git a/acdc-parser/CHANGELOG.md b/acdc-parser/CHANGELOG.md index 353dbc69..2ecfbd22 100644 --- a/acdc-parser/CHANGELOG.md +++ b/acdc-parser/CHANGELOG.md @@ -9,10 +9,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Expand attributes inside `pass:a[]` content when macros disabled via `subs=-macros`, + matching asciidoctor behavior +- Fixed passthrough preprocessor bypassing `subs=-macros` gating — `pass:[]` macros and + inline passthrough syntax (`+...+`, `++...++`, `+++...+++`) are now treated as literal + text when macros are disabled, matching asciidoctor behavior - Fixed non-monotonic inline positions for subscript/superscript text preceded by short plain text ### Added +- **`subs=macros` substitution type** — `[subs=-macros]` and explicit lists without `macros` + now gate macro grammar rules at parse time. When macros are disabled, inline macros + (links, xrefs, images, footnotes, index terms, etc.) are treated as plain text. + Requires the `pre-spec-subs` feature flag. - **Include `indent` attribute** — `include::file.rb[indent=2]` now re-indents included content to the specified level, matching asciidoctor behavior. Strips existing leading whitespace and prepends the specified number of spaces. `indent=0` removes all leading whitespace. diff --git a/acdc-parser/fixtures/tests/subs_explicit_no_macros.adoc b/acdc-parser/fixtures/tests/subs_explicit_no_macros.adoc new file mode 100644 index 00000000..1745b044 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_explicit_no_macros.adoc @@ -0,0 +1,2 @@ +[subs=specialchars] +This has link:https://example.com[text] and macro text. diff --git a/acdc-parser/fixtures/tests/subs_explicit_no_macros.json b/acdc-parser/fixtures/tests/subs_explicit_no_macros.json new file mode 100644 index 00000000..5305f42f --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_explicit_no_macros.json @@ -0,0 +1,52 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "This has link:https://example.com[text] and macro text.", + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 2, + "col": 55 + } + ] + } + ], + "metadata": { + "substitutions": [ + "special_chars" + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 55 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 55 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/subs_macros_disabled.adoc b/acdc-parser/fixtures/tests/subs_macros_disabled.adoc new file mode 100644 index 00000000..250f616d --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_macros_disabled.adoc @@ -0,0 +1,2 @@ +[subs=-macros] +This has link:https://example.com[a link] and xref:section[cross ref] and image:photo.png[alt] and footnote:[a note] and [[myanchor]]anchor and [[[bibref]]]biblio as plain text. diff --git a/acdc-parser/fixtures/tests/subs_macros_disabled.json b/acdc-parser/fixtures/tests/subs_macros_disabled.json new file mode 100644 index 00000000..5d97f270 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_macros_disabled.json @@ -0,0 +1,52 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "This has link:https://example.com[a link] and xref:section[cross ref] and image:photo.png[alt] and footnote:[a note] and [[myanchor]]anchor and [[[bibref]]]biblio as plain text.", + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 2, + "col": 177 + } + ] + } + ], + "metadata": { + "substitutions": [ + "-macros" + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 177 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 177 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.adoc b/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.adoc new file mode 100644 index 00000000..c85e9253 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.adoc @@ -0,0 +1,4 @@ +:version: 1.0.0 + +[subs=-macros] +Use pass:a[{version}] here. diff --git a/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.json b/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.json new file mode 100644 index 00000000..0f2f4f88 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_pass_attributes_macros_disabled.json @@ -0,0 +1,55 @@ +{ + "name": "document", + "type": "block", + "attributes": { + "version": "1.0.0" + }, + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Use pass:a[1.0.0] here.", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "metadata": { + "substitutions": [ + "-macros" + ] + }, + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 27 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/subs_pass_macros_disabled.adoc b/acdc-parser/fixtures/tests/subs_pass_macros_disabled.adoc new file mode 100644 index 00000000..def1b8a9 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_pass_macros_disabled.adoc @@ -0,0 +1,2 @@ +[subs=-macros] +Use pass:[bold] for raw HTML. diff --git a/acdc-parser/fixtures/tests/subs_pass_macros_disabled.json b/acdc-parser/fixtures/tests/subs_pass_macros_disabled.json new file mode 100644 index 00000000..fff5f477 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_pass_macros_disabled.json @@ -0,0 +1,52 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Use pass:[bold] for raw HTML.", + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 2, + "col": 36 + } + ] + } + ], + "metadata": { + "substitutions": [ + "-macros" + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 36 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 2, + "col": 36 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.adoc b/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.adoc new file mode 100644 index 00000000..19ee4e8e --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.adoc @@ -0,0 +1,4 @@ +:version: 1.0.0 + +[subs=specialchars] +The version is {version} here. diff --git a/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.json b/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.json new file mode 100644 index 00000000..4a0fdca2 --- /dev/null +++ b/acdc-parser/fixtures/tests/subs_specialchars_no_attributes.json @@ -0,0 +1,55 @@ +{ + "name": "document", + "type": "block", + "attributes": { + "version": "1.0.0" + }, + "blocks": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "The version is {version} here.", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 30 + } + ] + } + ], + "metadata": { + "substitutions": [ + "special_chars" + ] + }, + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 4, + "col": 30 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 30 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 28abf6ad..810f6003 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -60,12 +60,26 @@ enum HeaderMetadataLine { Attributes((bool, Box)), } -#[derive(Debug, Default)] +#[derive(Debug)] // Used purely in the grammar to represent the parsed block details pub(crate) struct BlockParsingMetadata { pub(crate) metadata: BlockMetadata, title: Title, parent_section_level: Option, + pub(crate) macros_enabled: bool, + pub(crate) attributes_enabled: bool, +} + +impl Default for BlockParsingMetadata { + fn default() -> Self { + Self { + metadata: BlockMetadata::default(), + title: Title::default(), + parent_section_level: None, + macros_enabled: true, + attributes_enabled: true, + } + } } #[derive(Debug)] @@ -1623,10 +1637,20 @@ peg::parser! { if meta_start != meta_end { metadata.location = Some(state.create_block_location(meta_start, meta_end, offset)); } + let (macros_enabled, attributes_enabled) = if cfg!(feature = "pre-spec-subs") { + ( + metadata.substitutions.as_ref().is_none_or(|spec| !spec.macros_disabled()), + metadata.substitutions.as_ref().is_none_or(|spec| !spec.attributes_disabled()), + ) + } else { + (true, true) + }; Ok(BlockParsingMetadata { metadata, title, parent_section_level, + macros_enabled, + attributes_enabled, }) } @@ -3765,30 +3789,30 @@ peg::parser! { // Escaped syntax must come next - backslash prevents any following syntax from being parsed / escaped_syntax:escaped_syntax(offset) { escaped_syntax } // Index terms: concealed (triple parens) must come before flow (double parens) - / index_term:index_term_concealed(offset) { index_term } - / index_term:index_term_flow(offset) { index_term } - / indexterm:indexterm_macro(offset) { indexterm } - / indexterm2:indexterm2_macro(offset) { indexterm2 } + / check_macros(block_metadata) index_term:index_term_concealed(offset) { index_term } + / check_macros(block_metadata) index_term:index_term_flow(offset) { index_term } + / check_macros(block_metadata) indexterm:indexterm_macro(offset) { indexterm } + / check_macros(block_metadata) indexterm2:indexterm2_macro(offset) { indexterm2 } // Bibliography anchor (triple brackets) must come before inline anchor (double brackets) - / bibliography_anchor:bibliography_anchor(offset) { bibliography_anchor } - / inline_anchor:inline_anchor(offset) { inline_anchor } - / cross_reference_shorthand:cross_reference_shorthand(offset, block_metadata) { cross_reference_shorthand } - / cross_reference_macro:cross_reference_macro(offset, block_metadata) { cross_reference_macro } + / check_macros(block_metadata) bibliography_anchor:bibliography_anchor(offset) { bibliography_anchor } + / check_macros(block_metadata) inline_anchor:inline_anchor(offset) { inline_anchor } + / check_macros(block_metadata) cross_reference_shorthand:cross_reference_shorthand(offset, block_metadata) { cross_reference_shorthand } + / check_macros(block_metadata) cross_reference_macro:cross_reference_macro(offset, block_metadata) { cross_reference_macro } / hard_wrap:hard_wrap(offset) { hard_wrap } - / &"footnote:" footnote:footnote(offset, block_metadata) { footnote } - / stem:inline_stem(offset) { stem } - / image:inline_image(offset, block_metadata) { image } - / icon:inline_icon(offset, block_metadata) { icon } - / keyboard:inline_keyboard(offset) { keyboard } - / button:inline_button(offset) { button } - / menu:inline_menu(offset) { menu } + / check_macros(block_metadata) &"footnote:" footnote:footnote(offset, block_metadata) { footnote } + / check_macros(block_metadata) stem:inline_stem(offset) { stem } + / check_macros(block_metadata) image:inline_image(offset, block_metadata) { image } + / check_macros(block_metadata) icon:inline_icon(offset, block_metadata) { icon } + / check_macros(block_metadata) keyboard:inline_keyboard(offset) { keyboard } + / check_macros(block_metadata) button:inline_button(offset) { button } + / check_macros(block_metadata) menu:inline_menu(offset) { menu } // mailto has to come before the url_macro because url_macro calls url() which // also matches against mailto: - / mailto_macro:mailto_macro(offset, block_metadata) { mailto_macro } - / url_macro:url_macro(offset, block_metadata) { url_macro } - / pass:inline_pass(offset) { pass } - / link_macro:link_macro(offset) { link_macro } - / check_autolinks(allow_autolinks) inline_autolink:inline_autolink(offset) { inline_autolink } + / check_macros(block_metadata) mailto_macro:mailto_macro(offset, block_metadata) { mailto_macro } + / check_macros(block_metadata) url_macro:url_macro(offset, block_metadata) { url_macro } + / check_macros(block_metadata) pass:inline_pass(offset) { pass } + / check_macros(block_metadata) link_macro:link_macro(offset) { link_macro } + / check_macros(block_metadata) check_autolinks(allow_autolinks) inline_autolink:inline_autolink(offset) { inline_autolink } / inline_line_break:inline_line_break(offset) { inline_line_break } / bold_text_unconstrained:bold_text_unconstrained(offset, block_metadata) { bold_text_unconstrained } / bold_text_constrained:bold_text_constrained(offset, block_metadata) { bold_text_constrained } @@ -4288,6 +4312,9 @@ peg::parser! { rule check_autolinks(allow: bool) -> () = {? if allow { Ok(()) } else { Err("autolinks suppressed") } } + rule check_macros(block_metadata: &BlockParsingMetadata) -> () + = {? if block_metadata.macros_enabled { Ok(()) } else { Err("macros disabled") } } + rule inline_autolink(offset: usize) -> InlineNode = start:position!() @@ -5106,7 +5133,7 @@ peg::parser! { // a complete pattern (those are handled by escaped_superscript_subscript rule) "\\" "^" !([^'^' | ' ' | '\t' | '\n']+ "^") / "\\" "~" !([^'~' | ' ' | '\t' | '\n']+ "~") - / (!(eol()*<2,> / ![_] / escaped_syntax_match() / index_term_match() / inline_anchor_match() / cross_reference_shorthand_match() / cross_reference_macro_match() / hard_wrap(offset) / footnote_match(offset, block_metadata) / inline_image(start_pos, block_metadata) / inline_icon(start_pos, block_metadata) / inline_stem(start_pos) / inline_keyboard(start_pos) / inline_button(start_pos) / inline_menu(start_pos) / mailto_macro(start_pos, block_metadata) / url_macro(start_pos, block_metadata) / inline_pass(start_pos) / link_macro(start_pos) / (check_autolinks(allow_autolinks) inline_autolink(start_pos)) / inline_line_break(start_pos) / bold_text_unconstrained(start_pos, block_metadata) / bold_text_constrained_match() / italic_text_unconstrained(start_pos, block_metadata) / italic_text_constrained_match() / monospace_text_unconstrained(start_pos, block_metadata) / monospace_text_constrained_match() / highlight_text_unconstrained(start_pos, block_metadata) / highlight_text_constrained_match() / superscript_text(start_pos, block_metadata) / subscript_text(start_pos, block_metadata) / curved_quotation_text(start_pos, block_metadata) / curved_apostrophe_text(start_pos, block_metadata) / standalone_curved_apostrophe(start_pos, block_metadata)) [_]) + / (!(eol()*<2,> / ![_] / escaped_syntax_match() / hard_wrap(offset) / (check_macros(block_metadata) (inline_anchor_match() / index_term_match() / cross_reference_shorthand_match() / cross_reference_macro_match() / footnote_match(offset, block_metadata) / inline_image(start_pos, block_metadata) / inline_icon(start_pos, block_metadata) / inline_stem(start_pos) / inline_keyboard(start_pos) / inline_button(start_pos) / inline_menu(start_pos) / mailto_macro(start_pos, block_metadata) / url_macro(start_pos, block_metadata) / inline_pass(start_pos) / link_macro(start_pos))) / (check_macros(block_metadata) check_autolinks(allow_autolinks) inline_autolink(start_pos)) / inline_line_break(start_pos) / bold_text_unconstrained(start_pos, block_metadata) / bold_text_constrained_match() / italic_text_unconstrained(start_pos, block_metadata) / italic_text_constrained_match() / monospace_text_unconstrained(start_pos, block_metadata) / monospace_text_constrained_match() / highlight_text_unconstrained(start_pos, block_metadata) / highlight_text_constrained_match() / superscript_text(start_pos, block_metadata) / subscript_text(start_pos, block_metadata) / curved_quotation_text(start_pos, block_metadata) / curved_apostrophe_text(start_pos, block_metadata) / standalone_curved_apostrophe(start_pos, block_metadata)) [_]) )+) end:position!() { @@ -5150,6 +5177,8 @@ peg::parser! { attr_end_offset, offset, &attr_str, + block_metadata.macros_enabled, + true, )?; let attr_inlines = parse_inlines(&attr_processed, state, block_metadata, &attr_location)?; let attr_inlines = map_inline_locations(state, &attr_processed, &attr_inlines, &attr_location)?; @@ -5168,6 +5197,8 @@ peg::parser! { cite_raw_start + cite.len(), offset, cite, + block_metadata.macros_enabled, + true, )?; let inlines = parse_inlines(&cite_processed, state, block_metadata, &cite_location)?; Some(map_inline_locations(state, &cite_processed, &inlines, &cite_location)?) @@ -5235,6 +5266,8 @@ peg::parser! { attr_end_offset, offset, &author, + block_metadata.macros_enabled, + true, )?; let attr_inlines = parse_inlines(&attr_processed, state, block_metadata, &attr_location)?; let attr_inlines = map_inline_locations(state, &attr_processed, &attr_inlines, &attr_location)?; @@ -5252,6 +5285,8 @@ peg::parser! { cite_start + cite.len(), offset, &cite, + block_metadata.macros_enabled, + true, )?; let cite_inlines = parse_inlines(&cite_processed, state, block_metadata, &cite_location)?; let cite_inlines = map_inline_locations(state, &cite_processed, &cite_inlines, &cite_location)?; @@ -5338,7 +5373,7 @@ peg::parser! { return Ok(get_literal_paragraph(state, content, start, end, offset, block_metadata)); } - let (location, processed) = preprocess_inline_content(state, &content_start, end, offset, content)?; + let (location, processed) = preprocess_inline_content(state, &content_start, end, offset, content, block_metadata.macros_enabled, block_metadata.attributes_enabled)?; let content = parse_inlines(&processed, state, block_metadata, &location)?; let content = map_inline_locations(state, &processed, &content, &location)?; @@ -5875,7 +5910,7 @@ peg::parser! { /// Excludes '[' and ']' to respect AsciiDoc macro/attribute boundaries rule url_path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' | '_' | '~' | ':' | '/' | '?' | '#' | '@' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '%' | '\\' ]+) {? - let inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new_all_enabled( path, state.line_map.clone(), &state.input, @@ -5912,7 +5947,7 @@ peg::parser! { )* ) {? - let inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new_all_enabled( path, state.line_map.clone(), &state.input, @@ -5962,7 +5997,7 @@ peg::parser! { /// Includes '{' and '}' for `AsciiDoc` attribute substitution pub rule path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '{' | '}' | '_' | '-' | '.' | '/' | '\\' ]+) {? - let inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new_all_enabled( path, state.line_map.clone(), &state.input, diff --git a/acdc-parser/src/grammar/inline_preprocessor.rs b/acdc-parser/src/grammar/inline_preprocessor.rs index c5874323..7dd48862 100644 --- a/acdc-parser/src/grammar/inline_preprocessor.rs +++ b/acdc-parser/src/grammar/inline_preprocessor.rs @@ -36,6 +36,12 @@ pub(crate) struct InlinePreprocessorParserState<'a> { /// Warnings collected during PEG parsing for post-parse emission. /// Uses `RefCell` for interior mutability in PEG action blocks. pub(crate) warnings: RefCell>, + /// Whether macro substitutions are enabled for this block. + /// When `false`, `pass:[]` macros are not extracted by the preprocessor. + pub(crate) macros_enabled: bool, + /// Whether attribute substitutions are enabled for this block. + /// When `false`, `{attribute}` references are not expanded by the preprocessor. + pub(crate) attributes_enabled: bool, } impl<'a> InlinePreprocessorParserState<'a> { @@ -45,7 +51,15 @@ impl<'a> InlinePreprocessorParserState<'a> { /// * `input` - The substring to parse /// * `line_map` - Pre-computed line map for the full document /// * `full_input` - The full document input (for position lookups) - pub(crate) fn new(input: &'a str, line_map: LineMap, full_input: &'a str) -> Self { + /// * `macros_enabled` - Whether macro substitutions are active + /// * `attributes_enabled` - Whether attribute substitutions are active + pub(crate) fn new( + input: &'a str, + line_map: LineMap, + full_input: &'a str, + macros_enabled: bool, + attributes_enabled: bool, + ) -> Self { Self { pass_found_count: Cell::new(0), passthroughs: RefCell::new(Vec::new()), @@ -57,9 +71,16 @@ impl<'a> InlinePreprocessorParserState<'a> { input: RefCell::new(input), substring_start_offset: Cell::new(0), warnings: RefCell::new(Vec::new()), + macros_enabled, + attributes_enabled, } } + /// Create a new state with all substitutions enabled (macros + attributes). + pub(crate) fn new_all_enabled(input: &'a str, line_map: LineMap, full_input: &'a str) -> Self { + Self::new(input, line_map, full_input, true, true) + } + /// Set the initial position for parsing a substring within the document. pub(crate) fn set_initial_position(&mut self, _location: &Location, absolute_offset: usize) { self.substring_start_offset.set(absolute_offset); @@ -100,6 +121,63 @@ impl<'a> InlinePreprocessorParserState<'a> { self.warnings.borrow_mut().drain(..).collect() } + /// Extract the subs-spec string, content, and parsed substitutions from + /// a matched `pass:SUBS[CONTENT]` string. + fn parse_pass_macro_parts(full: &str) -> (&str, &str, Vec) { + let subs_end = full[5..].find('[').unwrap_or(0); + let subs_str = &full[5..5 + subs_end]; + let content = &full[5 + subs_end + 1..full.len() - 1]; + let substitutions = if subs_str.is_empty() { + Vec::new() + } else { + subs_str + .split(',') + .filter_map(|s| parse_substitution(s.trim())) + .collect() + }; + (subs_str, content, substitutions) + } + + /// When macros are disabled, a `pass:SUBS[CONTENT]` macro is treated as literal text. + /// However, if its sub-spec includes attributes (`a` or `n`), we still expand + /// attribute references in the content — matching asciidoctor behavior. + fn expand_disabled_pass_macro( + &self, + full: &str, + document_attributes: &DocumentAttributes, + ) -> String { + let (subs_str, content, substitutions) = Self::parse_pass_macro_parts(full); + + let has_attr_subs = substitutions + .iter() + .any(|s| matches!(s, Substitution::Attributes | Substitution::Normal)); + + if !has_attr_subs { + self.advance(full); + return full.to_string(); + } + + let expanded = inline_preprocessing::attribute_reference_substitutions( + content, + document_attributes, + self, + ) + .unwrap_or_else(|_| content.to_string()); + let reconstructed = format!("pass:{subs_str}[{expanded}]"); + + let absolute_start = self.get_offset(); + self.advance(full); + if reconstructed.chars().count() != full.chars().count() { + self.source_map.borrow_mut().add_replacement( + absolute_start, + absolute_start + full.len(), + reconstructed.chars().count(), + ProcessedKind::Attribute, + ); + } + reconstructed + } + /// Calculate location for a matched construct. /// /// Advances the offset by `content.len() + padding` and returns a Location @@ -293,6 +371,12 @@ parser!( rule attribute_reference() -> String = start:position() "{" attribute_name:attribute_name() "}" { + if !state.attributes_enabled { + let text = format!("{{{attribute_name}}}"); + state.advance(&text); + return text; + } + let location = state.calculate_location(start, attribute_name, 2); // Special handling for character reference attributes that need passthrough behavior. @@ -358,6 +442,12 @@ parser!( content:$(![(' '|'\t'|'\n'|'\r')] (!("+" &([' '|'\t'|'\n'|'\r'|','|';'|'"'|'.'|'?'|'!'|':'|')'|']'|'}'|'/'|'-'|'<'|'>'] / ![_])) [_])*) "+" { + if !state.macros_enabled { + let text = format!("+{content}+"); + state.advance(&text); + return text; + } + // Check if we're at start OR preceded by word boundary character // Convert absolute offset to relative offset within the substring let substring_start = state.substring_start_offset.get(); @@ -432,6 +522,10 @@ parser!( rule double_plus_passthrough() -> String = start:position() "++" content:$((!"++" [_])+) "++" { + if !state.macros_enabled { + state.advance(&format!("++{content}++")); + return format!("++{content}++"); + } let location = state.calculate_location(start, content, 4); state.passthroughs.borrow_mut().push(Pass { text: Some(content.to_string()), @@ -455,6 +549,11 @@ parser!( rule triple_plus_passthrough() -> String = start:position() "+++" content:$((!"+++" [_])+) "+++" { + if !state.macros_enabled { + let text = format!("+++{content}+++"); + state.advance(&text); + return text; + } let location = state.calculate_location(start, content, 6); state.passthroughs.borrow_mut().push(Pass { text: Some(content.to_string()), @@ -475,16 +574,16 @@ parser!( } rule pass_macro() -> String - = start:position() "pass:" substitutions:substitutions() "[" content:$([^']']*) "]" { + = start:position() full:$("pass:" substitutions() "[" [^']']* "]") { + if !state.macros_enabled { + return state.expand_disabled_pass_macro(full, document_attributes); + } + + let (subs_str, content, substitutions) = + InlinePreprocessorParserState::parse_pass_macro_parts(full); + // For pass macro: "pass:" (5) + substitutions + "[" (1) + "]" (1) - // Calculate approximate substitutions length - let subs_len = if substitutions.is_empty() { - 0 - } else { - // Each substitution is 1 char + commas between them - substitutions.len() + (substitutions.len().saturating_sub(1)) - }; - let padding = 5 + subs_len + 1 + 1; // "pass:" + subs + "[" + "]" + let padding = 5 + subs_str.len() + 1 + 1; // "pass:" + subs + "[" + "]" let location = state.calculate_location(start, content, padding); // Normal substitution group includes Attributes, so check for both let content = if substitutions.contains(&Substitution::Attributes) @@ -607,6 +706,8 @@ mod tests { input: RefCell::new(content), substring_start_offset: Cell::new(0), warnings: RefCell::new(Vec::new()), + macros_enabled: true, + attributes_enabled: true, } } @@ -1255,4 +1356,65 @@ mod tests { ); Ok(()) } + + fn setup_state_macros_disabled(content: &str) -> InlinePreprocessorParserState<'_> { + InlinePreprocessorParserState { + pass_found_count: Cell::new(0), + passthroughs: RefCell::new(Vec::new()), + attributes: RefCell::new(HashMap::new()), + current_offset: Cell::new(0), + line_map: LineMap::new(content), + full_input: content, + source_map: RefCell::new(SourceMap::default()), + input: RefCell::new(content), + substring_start_offset: Cell::new(0), + warnings: RefCell::new(Vec::new()), + macros_enabled: false, + attributes_enabled: true, + } + } + + #[test] + fn test_pass_macro_a_with_macros_disabled_expands_attributes() -> Result<(), Error> { + let attributes = setup_attributes(); + let input = "pass:a[{version}]"; + let state = setup_state_macros_disabled(input); + let result = inline_preprocessing::run(input, &attributes, &state)?; + assert_eq!(result.text, "pass:a[1.0]"); + assert!(state.passthroughs.borrow().is_empty()); + Ok(()) + } + + #[test] + fn test_pass_macro_no_subs_with_macros_disabled_preserves_attributes() -> Result<(), Error> { + let attributes = setup_attributes(); + let input = "pass:[{version}]"; + let state = setup_state_macros_disabled(input); + let result = inline_preprocessing::run(input, &attributes, &state)?; + assert_eq!(result.text, "pass:[{version}]"); + assert!(state.passthroughs.borrow().is_empty()); + Ok(()) + } + + #[test] + fn test_pass_macro_q_with_macros_disabled_preserves_content() -> Result<(), Error> { + let attributes = setup_attributes(); + let input = "pass:q[text]"; + let state = setup_state_macros_disabled(input); + let result = inline_preprocessing::run(input, &attributes, &state)?; + assert_eq!(result.text, "pass:q[text]"); + assert!(state.passthroughs.borrow().is_empty()); + Ok(()) + } + + #[test] + fn test_pass_macro_a_q_with_macros_disabled_expands_attributes() -> Result<(), Error> { + let attributes = setup_attributes(); + let input = "pass:a,q[{version}]"; + let state = setup_state_macros_disabled(input); + let result = inline_preprocessing::run(input, &attributes, &state)?; + assert_eq!(result.text, "pass:a,q[1.0]"); + assert!(state.passthroughs.borrow().is_empty()); + Ok(()) + } } diff --git a/acdc-parser/src/grammar/inline_processing.rs b/acdc-parser/src/grammar/inline_processing.rs index a160b7e8..98085832 100644 --- a/acdc-parser/src/grammar/inline_processing.rs +++ b/acdc-parser/src/grammar/inline_processing.rs @@ -91,6 +91,8 @@ pub(crate) fn preprocess_inline_content( end: usize, offset: usize, content: &str, + macros_enabled: bool, + attributes_enabled: bool, ) -> Result<(Location, ProcessedContent), Error> { // First, ensure the end position is on a valid UTF-8 boundary let mut adjusted_end = end + offset; @@ -101,8 +103,13 @@ pub(crate) fn preprocess_inline_content( } } - let mut inline_state = - InlinePreprocessorParserState::new(content, state.line_map.clone(), &state.input); + let mut inline_state = InlinePreprocessorParserState::new( + content, + state.line_map.clone(), + &state.input, + macros_enabled, + attributes_enabled, + ); // We adjust the start and end positions to account for the content start offset let content_end_offset = if adjusted_end == 0 { @@ -217,8 +224,15 @@ pub(crate) fn process_inlines( offset: usize, content: &str, ) -> Result, Error> { - let (location, processed) = - preprocess_inline_content(state, content_start, end, offset, content)?; + let (location, processed) = preprocess_inline_content( + state, + content_start, + end, + offset, + content, + block_metadata.macros_enabled, + block_metadata.attributes_enabled, + )?; // After preprocessing, attribute substitution may result in empty content // (e.g., {empty} -> ""). In this case, return empty vec without parsing. if processed.text.trim().is_empty() { @@ -241,8 +255,15 @@ pub(crate) fn process_inlines_no_autolinks( offset: usize, content: &str, ) -> Result, Error> { - let (location, processed) = - preprocess_inline_content(state, content_start, end, offset, content)?; + let (location, processed) = preprocess_inline_content( + state, + content_start, + end, + offset, + content, + block_metadata.macros_enabled, + block_metadata.attributes_enabled, + )?; if processed.text.trim().is_empty() { return Ok(Vec::new()); } diff --git a/acdc-parser/src/model/substitution.rs b/acdc-parser/src/model/substitution.rs index 40c412b7..e381abda 100644 --- a/acdc-parser/src/model/substitution.rs +++ b/acdc-parser/src/model/substitution.rs @@ -27,7 +27,10 @@ //! - **Callouts** - Already parsed into [`crate::CalloutRef`] nodes by the grammar. //! Converters render the callout markers. //! -//! - **Macros** / **`PostReplacements`** - Not yet implemented. +//! - **Macros** - Handled at the grammar level: when macros are disabled via `subs`, +//! macro grammar rules are gated by a predicate and macro-like text becomes plain text. +//! +//! - **`PostReplacements`** - Not yet implemented. //! //! ## Why this split? //! @@ -198,6 +201,32 @@ impl SubstitutionSpec { result } + /// Check if macros are disabled by this spec. + /// - Explicit list without Macros → disabled + /// - Modifiers with Remove(Macros) → disabled + #[must_use] + pub fn macros_disabled(&self) -> bool { + match self { + Self::Explicit(subs) => !subs.contains(&Substitution::Macros), + Self::Modifiers(ops) => ops + .iter() + .any(|op| matches!(op, SubstitutionOp::Remove(Substitution::Macros))), + } + } + + /// Check if attribute substitution is disabled by this spec. + /// - Explicit list without Attributes → disabled + /// - Modifiers with Remove(Attributes) → disabled + #[must_use] + pub fn attributes_disabled(&self) -> bool { + match self { + Self::Explicit(subs) => !subs.contains(&Substitution::Attributes), + Self::Modifiers(ops) => ops + .iter() + .any(|op| matches!(op, SubstitutionOp::Remove(Substitution::Attributes))), + } + } + /// Resolve the substitution spec to a concrete list of substitutions. /// /// - For `Explicit`, returns the list directly @@ -896,4 +925,76 @@ mod tests { assert_eq!(resolved, value); } } + + #[test] + fn test_macros_disabled_explicit_without_macros() { + let spec = parse_subs_attribute("specialchars"); + assert!(spec.macros_disabled()); + } + + #[test] + fn test_macros_disabled_explicit_with_macros() { + let spec = parse_subs_attribute("macros"); + assert!(!spec.macros_disabled()); + } + + #[test] + fn test_macros_disabled_explicit_normal_includes_macros() { + let spec = parse_subs_attribute("normal"); + assert!(!spec.macros_disabled()); + } + + #[test] + fn test_macros_disabled_modifier_remove() { + let spec = parse_subs_attribute("-macros"); + assert!(spec.macros_disabled()); + } + + #[test] + fn test_macros_disabled_modifier_add() { + let spec = parse_subs_attribute("+macros"); + assert!(!spec.macros_disabled()); + } + + #[test] + fn test_macros_disabled_explicit_none() { + let spec = parse_subs_attribute("none"); + assert!(spec.macros_disabled()); + } + + #[test] + fn test_attributes_disabled_explicit_without_attributes() { + let spec = parse_subs_attribute("specialchars"); + assert!(spec.attributes_disabled()); + } + + #[test] + fn test_attributes_disabled_explicit_with_attributes() { + let spec = parse_subs_attribute("attributes"); + assert!(!spec.attributes_disabled()); + } + + #[test] + fn test_attributes_disabled_explicit_normal_includes_attributes() { + let spec = parse_subs_attribute("normal"); + assert!(!spec.attributes_disabled()); + } + + #[test] + fn test_attributes_disabled_modifier_remove() { + let spec = parse_subs_attribute("-attributes"); + assert!(spec.attributes_disabled()); + } + + #[test] + fn test_attributes_disabled_modifier_add() { + let spec = parse_subs_attribute("+attributes"); + assert!(!spec.attributes_disabled()); + } + + #[test] + fn test_attributes_disabled_explicit_none() { + let spec = parse_subs_attribute("none"); + assert!(spec.attributes_disabled()); + } } diff --git a/converters/html/CHANGELOG.md b/converters/html/CHANGELOG.md index 7ab4a0f4..5396c7ce 100644 --- a/converters/html/CHANGELOG.md +++ b/converters/html/CHANGELOG.md @@ -133,6 +133,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **`subs=` overrides on passthrough blocks** — passthrough blocks (`++++`) with a `subs=` + attribute now apply the specified substitutions instead of emitting raw content. Supports + all substitution types including `normal`, `specialchars`, `quotes`, `attributes`, and + explicit combinations. - **Em-dash patterns now match asciidoctor** — spaced pattern emits ` — `, word-bounded (`word--word`) emits `—​`. - **Em-dash inside inline formatting** — `--` inside bold, italic, monospace, highlight, diff --git a/converters/html/src/delimited.rs b/converters/html/src/delimited.rs index da411611..47c97ba2 100644 --- a/converters/html/src/delimited.rs +++ b/converters/html/src/delimited.rs @@ -5,8 +5,8 @@ use acdc_converters_core::{ visitor::{WritableVisitor, WritableVisitorExt}, }; use acdc_parser::{ - AttributeValue, Block, BlockMetadata, DelimitedBlock, DelimitedBlockType, InlineNode, - StemContent, StemNotation, + AttributeValue, Block, BlockMetadata, DelimitedBlock, DelimitedBlockType, InlineNode, Location, + Plain, StemContent, StemNotation, Substitution, SubstitutionSpec, substitute, }; use crate::{ @@ -665,17 +665,74 @@ fn render_listing_block_semantic>( Ok(()) } +/// Render a passthrough block with `subs=` override applied. +/// +/// Passthrough blocks default to no substitutions, emitting raw content. +/// When `subs=` is specified, the raw content is processed through the +/// substitution pipeline (attributes, quotes, specialchars, replacements). +fn render_pass_block_with_subs>( + inlines: &[InlineNode], + spec: &SubstitutionSpec, + visitor: &mut V, + processor: &Processor, + options: &RenderOptions, +) -> Result<(), Error> { + // Passthrough blocks default to no subs, so the baseline is empty. + let effective = spec.resolve(&[]); + + let mut content = String::new(); + for node in inlines { + if let InlineNode::RawText(r) = node { + content.push_str(&r.content); + } + } + + // Apply attribute substitution if enabled (not done by parser for + // passthrough blocks) + if effective.contains(&Substitution::Attributes) { + content = substitute( + &content, + &[Substitution::Attributes], + processor.document_attributes(), + ); + } + + // If quotes substitution is enabled, parse the content for inline + // formatting (bold, italic, etc.) and render each node with the full + // effective subs. This mirrors VerbatimText rendering which passes full + // subs to avoid PlainText's no_quotes_subs optimization that would + // prevent Bold/Italic nodes from rendering as HTML. + if effective.contains(&Substitution::Quotes) { + let parsed_nodes = acdc_parser::parse_text_for_quotes(&content); + for node in &parsed_nodes { + crate::inlines::visit_inline_node(node, visitor, processor, options, &effective)?; + } + } else { + let plain = InlineNode::PlainText(Plain { + content, + location: Location::default(), + escaped: false, + }); + crate::inlines::visit_inline_node(&plain, visitor, processor, options, &effective)?; + } + Ok(()) +} + fn render_delimited_block_inner>( inner: &DelimitedBlockType, title: &[InlineNode], metadata: &BlockMetadata, visitor: &mut V, processor: &Processor, - _options: &RenderOptions, + options: &RenderOptions, ) -> Result<(), Error> { match inner { DelimitedBlockType::DelimitedPass(inlines) => { - visitor.visit_inline_nodes(inlines)?; + if let Some(spec) = &metadata.substitutions { + render_pass_block_with_subs(inlines, spec, visitor, processor, options)?; + } else { + visitor.visit_inline_nodes(inlines)?; + } } DelimitedBlockType::DelimitedListing(inlines) => { render_listing_block(inlines, title, metadata, visitor, processor)?; diff --git a/converters/html/tests/fixtures/expected/html/embedded/passthrough_block_subs_override.html b/converters/html/tests/fixtures/expected/html/embedded/passthrough_block_subs_override.html new file mode 100644 index 00000000..d7ca0506 --- /dev/null +++ b/converters/html/tests/fixtures/expected/html/embedded/passthrough_block_subs_override.html @@ -0,0 +1 @@ +This has bold and italic and <html> and hello world.This has *bold* and and {my-attr}.This has *bold* and <html> and {my-attr}.This has bold and and {my-attr}.This has *bold* and <html> and hello world.Default passthrough: *bold* and and {my-attr}. \ No newline at end of file diff --git a/converters/html/tests/fixtures/expected/html/embedded/subs_explicit_no_macros.html b/converters/html/tests/fixtures/expected/html/embedded/subs_explicit_no_macros.html new file mode 100644 index 00000000..5300b2fb --- /dev/null +++ b/converters/html/tests/fixtures/expected/html/embedded/subs_explicit_no_macros.html @@ -0,0 +1,3 @@ +
+

This has link:https://example.com[text] and macro text.

+
diff --git a/converters/html/tests/fixtures/expected/html/embedded/subs_macros_disabled.html b/converters/html/tests/fixtures/expected/html/embedded/subs_macros_disabled.html new file mode 100644 index 00000000..a08f8729 --- /dev/null +++ b/converters/html/tests/fixtures/expected/html/embedded/subs_macros_disabled.html @@ -0,0 +1,3 @@ +
+

This has link:https://example.com[a link] and xref:section[cross ref] and image:photo.png[alt] and footnote:[a note] and [[myanchor]]anchor and [[[bibref]]]biblio as plain text.

+
diff --git a/converters/html/tests/fixtures/source/html/embedded/passthrough_block_subs_override.adoc b/converters/html/tests/fixtures/source/html/embedded/passthrough_block_subs_override.adoc new file mode 100644 index 00000000..0d057880 --- /dev/null +++ b/converters/html/tests/fixtures/source/html/embedded/passthrough_block_subs_override.adoc @@ -0,0 +1,30 @@ +:my-attr: hello world + +[subs=normal] +++++ +This has *bold* and _italic_ and and {my-attr}. +++++ + +[subs=none] +++++ +This has *bold* and and {my-attr}. +++++ + +[subs=specialchars] +++++ +This has *bold* and and {my-attr}. +++++ + +[subs=quotes] +++++ +This has *bold* and and {my-attr}. +++++ + +[subs="attributes,specialchars"] +++++ +This has *bold* and and {my-attr}. +++++ + +++++ +Default passthrough: *bold* and and {my-attr}. +++++ diff --git a/converters/html/tests/fixtures/source/html/embedded/subs_explicit_no_macros.adoc b/converters/html/tests/fixtures/source/html/embedded/subs_explicit_no_macros.adoc new file mode 100644 index 00000000..1745b044 --- /dev/null +++ b/converters/html/tests/fixtures/source/html/embedded/subs_explicit_no_macros.adoc @@ -0,0 +1,2 @@ +[subs=specialchars] +This has link:https://example.com[text] and macro text. diff --git a/converters/html/tests/fixtures/source/html/embedded/subs_macros_disabled.adoc b/converters/html/tests/fixtures/source/html/embedded/subs_macros_disabled.adoc new file mode 100644 index 00000000..250f616d --- /dev/null +++ b/converters/html/tests/fixtures/source/html/embedded/subs_macros_disabled.adoc @@ -0,0 +1,2 @@ +[subs=-macros] +This has link:https://example.com[a link] and xref:section[cross ref] and image:photo.png[alt] and footnote:[a note] and [[myanchor]]anchor and [[[bibref]]]biblio as plain text.