From bea2a3c0a201fc8b913b47c0a4148504ebeea17c Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 16:03:56 +0000 Subject: [PATCH 01/18] chore(tests): add table cell fixtures --- .../fixtures/tests/table_cell_colspan.adoc | 7 + .../fixtures/tests/table_cell_colspan.json | 345 ++++++++++++ .../fixtures/tests/table_cell_rowspan.adoc | 7 + .../fixtures/tests/table_cell_rowspan.json | 449 +++++++++++++++ .../tests/table_cell_span_combined.adoc | 7 + .../tests/table_cell_span_combined.json | 520 ++++++++++++++++++ 6 files changed, 1335 insertions(+) create mode 100644 acdc-parser/fixtures/tests/table_cell_colspan.adoc create mode 100644 acdc-parser/fixtures/tests/table_cell_colspan.json create mode 100644 acdc-parser/fixtures/tests/table_cell_rowspan.adoc create mode 100644 acdc-parser/fixtures/tests/table_cell_rowspan.json create mode 100644 acdc-parser/fixtures/tests/table_cell_span_combined.adoc create mode 100644 acdc-parser/fixtures/tests/table_cell_span_combined.json diff --git a/acdc-parser/fixtures/tests/table_cell_colspan.adoc b/acdc-parser/fixtures/tests/table_cell_colspan.adoc new file mode 100644 index 0000000..0b11f96 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_colspan.adoc @@ -0,0 +1,7 @@ +[cols="3*"] +|=== +| A | B | C + +2+| Spans two columns | D +| E | F | G +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_colspan.json b/acdc-parser/fixtures/tests/table_cell_colspan.json new file mode 100644 index 0000000..f285245 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_colspan.json @@ -0,0 +1,345 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "cols": "3*" + } + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans two columns", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "colspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 5, + "col": 25 + }, + { + "line": 5, + "col": 25 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 25 + }, + { + "line": 5, + "col": 25 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_rowspan.adoc b/acdc-parser/fixtures/tests/table_cell_rowspan.adoc new file mode 100644 index 0000000..c34e08b --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_rowspan.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C + +.2+| Spans rows | D | E +| F | G +| H | I | J +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_rowspan.json b/acdc-parser/fixtures/tests/table_cell_rowspan.json new file mode 100644 index 0000000..42cd39a --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_rowspan.json @@ -0,0 +1,449 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans rows", + "location": [ + { + "line": 4, + "col": 6 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 6 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 4, + "col": 19 + }, + { + "line": 4, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 19 + }, + { + "line": 4, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 4, + "col": 23 + }, + { + "line": 4, + "col": 23 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 23 + }, + { + "line": 4, + "col": 23 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_span_combined.adoc b/acdc-parser/fixtures/tests/table_cell_span_combined.adoc new file mode 100644 index 0000000..6251a31 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_span_combined.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C | D + +2.2+| Big cell | E | F +| G | H +| I | J | K | L +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_span_combined.json b/acdc-parser/fixtures/tests/table_cell_span_combined.json new file mode 100644 index 0000000..c7fb41c --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_span_combined.json @@ -0,0 +1,520 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 2, + "col": 15 + }, + { + "line": 2, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 15 + }, + { + "line": 2, + "col": 15 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Big cell", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 14 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 14 + } + ] + } + ], + "colspan": 2, + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 4, + "col": 18 + }, + { + "line": 4, + "col": 18 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 18 + }, + { + "line": 4, + "col": 18 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 4, + "col": 22 + }, + { + "line": 4, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 22 + }, + { + "line": 4, + "col": 22 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "K", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "L", + "location": [ + { + "line": 6, + "col": 15 + }, + { + "line": 6, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 15 + }, + { + "line": 6, + "col": 15 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file From e16cec09679321b7fce11c020733e580269461af Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 16:04:23 +0000 Subject: [PATCH 02/18] feat(parser): add table colspand and rowspan --- acdc-parser/src/blocks/table.rs | 159 +++++++++++++++++++++++++--- acdc-parser/src/grammar/document.rs | 35 ++++-- acdc-parser/src/grammar/table.rs | 24 +++-- acdc-parser/src/model/tables.rs | 35 +++++- 4 files changed, 217 insertions(+), 36 deletions(-) diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index bca8cb3..a93be2c 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -1,12 +1,106 @@ use crate::Table; +/// Represents a parsed cell specifier with span information. +/// +/// In `AsciiDoc`, cell specifiers appear before the cell separator: +/// - `2+|content` → colspan=2 +/// - `.3+|content` → rowspan=3 +/// - `2.3+|content` → colspan=2, rowspan=3 +#[derive(Debug, Clone, Copy)] +pub(crate) struct CellSpecifier { + pub colspan: usize, + pub rowspan: usize, +} + +impl Default for CellSpecifier { + fn default() -> Self { + Self { + colspan: 1, + rowspan: 1, + } + } +} + +impl CellSpecifier { + /// Parse a cell specifier from the beginning of cell content. + /// + /// Returns the specifier and the offset where actual content begins. + /// Pattern: `(\d+)?(\.\d+)?\+` + /// + /// Examples: + /// - `"2+rest"` → `(CellSpecifier { colspan: 2, rowspan: 1 }, 2)` + /// - `".3+rest"` → `(CellSpecifier { colspan: 1, rowspan: 3 }, 3)` + /// - `"2.3+rest"` → `(CellSpecifier { colspan: 2, rowspan: 3 }, 4)` + /// - `"plain"` → `(CellSpecifier { colspan: 1, rowspan: 1 }, 0)` + #[must_use] + pub fn parse(content: &str) -> (Self, usize) { + let bytes = content.as_bytes(); + let mut pos = 0; + let mut colspan: Option = None; + let mut rowspan: Option = None; + + // Parse optional colspan (digits before optional dot) + let colspan_start = pos; + while bytes.get(pos).is_some_and(u8::is_ascii_digit) { + pos += 1; + } + if pos > colspan_start { + if let Some(n) = content + .get(colspan_start..pos) + .and_then(|s| s.parse::().ok()) + { + colspan = Some(n); + } + } + + // Parse optional rowspan (dot followed by digits) + if bytes.get(pos) == Some(&b'.') { + let dot_pos = pos; + pos += 1; + let rowspan_start = pos; + while bytes.get(pos).is_some_and(u8::is_ascii_digit) { + pos += 1; + } + if pos > rowspan_start { + if let Some(n) = content + .get(rowspan_start..pos) + .and_then(|s| s.parse::().ok()) + { + rowspan = Some(n); + } + } else { + // Dot without following digits - not a span specifier + pos = dot_pos; + } + } + + // Must end with '+' to be a valid span specifier + if bytes.get(pos) == Some(&b'+') && (colspan.is_some() || rowspan.is_some()) { + pos += 1; + ( + Self { + colspan: colspan.unwrap_or(1), + rowspan: rowspan.unwrap_or(1), + }, + pos, + ) + } else { + // No valid span specifier found + (Self::default(), 0) + } + } +} + +/// A parsed table cell with position and span information. +pub(crate) type ParsedCell = (String, usize, usize, usize, usize); // (content, start, end, colspan, rowspan) + impl Table { pub(crate) fn parse_rows_with_positions( text: &str, separator: &str, has_header: &mut bool, base_offset: usize, - ) -> Vec> { + ) -> Vec> { let mut rows = Vec::new(); let mut current_offset = base_offset; let lines: Vec<&str> = text.lines().collect(); @@ -36,9 +130,10 @@ impl Table { // Check if this is a single-line-per-row table (line has multiple separators) // vs multi-line-per-row table (one cell per line, rows separated by empty lines) + // A line is single-line row if it has multiple separators (handles both `| a | b` + // and `2+| a | b` formats) let first_line = line_ref.trim_end(); - let is_single_line_row = - first_line.starts_with(separator) && first_line.matches(separator).count() > 1; + let is_single_line_row = first_line.matches(separator).count() > 1; if is_single_line_row { // Single-line row format: each line is a complete row @@ -89,13 +184,13 @@ impl Table { row_lines: &[&str], separator: &str, row_start_offset: usize, - ) -> Vec<(String, usize, usize)> { + ) -> Vec { let mut columns = Vec::new(); let mut current_offset = row_start_offset; for line in row_lines { - // Skip lines that don't start with the separator - if !line.starts_with(separator) { + // Check if line contains the separator at all + if !line.contains(separator) { current_offset += line.len() + 1; // +1 for newline continue; } @@ -106,28 +201,66 @@ impl Table { // Track position within the line let mut line_offset = current_offset; - // Skip the first empty part (before the first |) + // Handle span specifier at the start of line (before first separator) + // e.g., "2+| content" -> part 0 is "2+", applies to part 1 + let mut pending_spec: Option = None; + for (i, part) in parts.iter().enumerate() { if i == 0 { - // First part is always empty (before first |) - line_offset += separator.len(); + // First part is before first separator + let trimmed = part.trim(); + if trimmed.is_empty() { + // Normal case: line starts with separator + line_offset += separator.len(); + } else { + // Span specifier before first separator: "2+| content" + let (spec, spec_len) = CellSpecifier::parse(trimmed); + if spec_len > 0 { + pending_spec = Some(spec); + } + // Move past the specifier and the separator + line_offset += part.len() + separator.len(); + } continue; } let cell_content_with_spaces = part; - let cell_content = cell_content_with_spaces.trim(); + let cell_content_trimmed = cell_content_with_spaces.trim(); + + // Use pending specifier if we have one, otherwise parse from content + let (spec, spec_offset) = if let Some(pending) = pending_spec.take() { + (pending, 0) + } else { + CellSpecifier::parse(cell_content_trimmed) + }; + + // The actual cell content starts after the specifier + let cell_content = if spec_offset > 0 { + cell_content_trimmed + .get(spec_offset..) + .unwrap_or("") + .trim_start() + } else { + cell_content_trimmed + }; - // Find where the actual content starts (after leading spaces) + // Find where the actual content starts (after leading spaces and specifier) let leading_spaces = cell_content_with_spaces.len() - cell_content_with_spaces.trim_start().len(); - let cell_start = line_offset + leading_spaces; + let cell_start = line_offset + leading_spaces + spec_offset; let cell_end = if cell_content.is_empty() { cell_start } else { cell_start + cell_content.len() - 1 // -1 for inclusive end }; - columns.push((cell_content.to_string(), cell_start, cell_end)); + columns.push(( + cell_content.to_string(), + cell_start, + cell_end, + spec.colspan, + spec.rowspan, + )); // Move offset past this cell and its separator line_offset += part.len(); diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 7e6e80e..7e20054 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -1515,8 +1515,10 @@ peg::parser! { for (i, row) in raw_rows.iter().enumerate() { let columns = row .iter() - .filter(|(cell, _, _)| !cell.is_empty()) - .map(|(cell, start, _end)| parse_table_cell(cell, state, *start, block_metadata.parent_section_level)) + .filter(|(cell, _, _, _, _)| !cell.is_empty()) + .map(|(cell, start, _end, colspan, rowspan)| { + parse_table_cell(cell, state, *start, block_metadata.parent_section_level, *colspan, *rowspan) + }) .collect::, _>>()?; // Calculate row line number from first cell for better error reporting @@ -1526,16 +1528,29 @@ peg::parser! { table_location.start.line // Fallback if row is empty (shouldn't happen) }; - // validate that if we have ncols we have the same number of columns in each row + // validate that if we have ncols the logical column count matches + // Logical column count = sum of colspans for all cells + let logical_col_count: usize = columns.iter().map(|c| c.colspan).sum(); if let Some(ncols) = ncols - && columns.len() != ncols + && logical_col_count != ncols { - tracing::warn!( - actual = columns.len(), - expected = ncols, - line = row_line, - "table row has incorrect column count, skipping row" - ); + // Check if any cell's colspan exceeds the table width + let has_overflow = columns.iter().any(|c| c.colspan > ncols); + if has_overflow { + tracing::error!( + actual = logical_col_count, + expected = ncols, + line = row_line, + "dropping cell because it exceeds specified number of columns" + ); + } else { + tracing::warn!( + actual = logical_col_count, + expected = ncols, + line = row_line, + "table row has incorrect column count, skipping row" + ); + } continue; } diff --git a/acdc-parser/src/grammar/table.rs b/acdc-parser/src/grammar/table.rs index c9521bf..d3afb7c 100644 --- a/acdc-parser/src/grammar/table.rs +++ b/acdc-parser/src/grammar/table.rs @@ -7,17 +7,19 @@ pub(crate) fn parse_table_cell( state: &mut ParserState, cell_start_offset: usize, parent_section_level: Option, + colspan: usize, + rowspan: usize, ) -> Result { - let content = document_parser::blocks(content, state, cell_start_offset, parent_section_level) + let blocks = document_parser::blocks(content, state, cell_start_offset, parent_section_level) .unwrap_or_else(|error| { - adjust_and_log_parse_error( - &error, - content, - cell_start_offset, - state, - "Failed parsing table cell content as blocks", - ); - Ok(Vec::new()) - })?; - Ok(TableColumn { content }) + adjust_and_log_parse_error( + &error, + content, + cell_start_offset, + state, + "Failed parsing table cell content as blocks", + ); + Ok(Vec::new()) + })?; + Ok(TableColumn::with_spans(blocks, colspan, rowspan)) } diff --git a/acdc-parser/src/model/tables.rs b/acdc-parser/src/model/tables.rs index 1f9a7c5..257dbf2 100644 --- a/acdc-parser/src/model/tables.rs +++ b/acdc-parser/src/model/tables.rs @@ -223,12 +223,43 @@ impl TableRow { #[non_exhaustive] pub struct TableColumn { pub content: Vec, + /// Number of columns this cell spans (default 1). + /// Specified in `AsciiDoc` with `n+|` syntax (e.g., `2+|` for colspan=2). + #[serde(default = "default_span", skip_serializing_if = "is_default_span")] + pub colspan: usize, + /// Number of rows this cell spans (default 1). + /// Specified in `AsciiDoc` with `.n+|` syntax (e.g., `.2+|` for rowspan=2). + #[serde(default = "default_span", skip_serializing_if = "is_default_span")] + pub rowspan: usize, +} + +const fn default_span() -> usize { + 1 +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +const fn is_default_span(span: &usize) -> bool { + *span == 1 } impl TableColumn { - /// Create a new table column with the given content. + /// Create a new table column with the given content and default spans (1). #[must_use] pub fn new(content: Vec) -> Self { - Self { content } + Self { + content, + colspan: 1, + rowspan: 1, + } + } + + /// Create a new table column with content and explicit span values. + #[must_use] + pub fn with_spans(content: Vec, colspan: usize, rowspan: usize) -> Self { + Self { + content, + colspan, + rowspan, + } } } From 6b8bc949690e7182f4beacf87860c271eec2af31 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 16:04:37 +0000 Subject: [PATCH 03/18] feat(html): add table colspand and rowspan --- converters/html/src/table.rs | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index 031bd19..e0aa173 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -2,7 +2,7 @@ use acdc_converters_core::table::calculate_column_widths; use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; use acdc_parser::{ AttributeValue, Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, - VerticalAlignment, + TableColumn, VerticalAlignment, }; use crate::{Error, Processor, RenderOptions}; @@ -30,6 +30,20 @@ fn get_column_format(columns: &[ColumnFormat], col_index: usize) -> ColumnFormat columns.get(col_index).cloned().unwrap_or_default() } +/// Format colspan/rowspan attributes for a table cell. +/// Returns an empty string if both are 1 (default). +fn format_span_attrs(cell: &TableColumn) -> String { + use std::fmt::Write; + let mut attrs = String::new(); + if cell.colspan > 1 { + let _ = write!(attrs, " colspan=\"{}\"", cell.colspan); + } + if cell.rowspan > 1 { + let _ = write!(attrs, " rowspan=\"{}\"", cell.rowspan); + } + attrs +} + /// Render cell content with support for nested blocks /// `wrap_paragraph` controls whether paragraphs get

wrappers. /// Headers should NOT have wrappers, body cells should have them. @@ -186,8 +200,12 @@ where let spec = get_column_format(&table.columns, col_index); let halign = halign_class(spec.halign); let valign = valign_class(spec.valign); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, false)?; let writer = visitor.writer_mut(); @@ -210,8 +228,12 @@ where let spec = get_column_format(&table.columns, col_index); let halign = halign_class(spec.halign); let valign = valign_class(spec.valign); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, true)?; let writer = visitor.writer_mut(); @@ -233,8 +255,12 @@ where let spec = get_column_format(&table.columns, col_index); let halign = halign_class(spec.halign); let valign = valign_class(spec.valign); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, true)?; let writer = visitor.writer_mut(); From 2ecc6cbe836f558de72e8b2627d9586312d26c3b Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 16:43:35 +0000 Subject: [PATCH 04/18] feat(parser): complete cell specifier support for tables Tables now properly handle cell-level formatting that was previously only supported at the column level. This brings parity with asciidoctor for common table patterns. Key changes: - Cell alignment overrides column defaults (^|, >|, .<|, .^|, .>|) - Cell duplication creates N identical cells (3*| Same) - Rowspan tracking prevents cell overlap in subsequent rows --- .../fixtures/tests/table_cell_alignment.adoc | 6 + .../fixtures/tests/table_cell_alignment.html | 475 ++++++++++++++++++ .../fixtures/tests/table_cell_alignment.json | 378 ++++++++++++++ .../tests/table_cell_duplication.adoc | 7 + .../tests/table_cell_duplication.json | 374 ++++++++++++++ .../tests/table_rowspan_with_cols.adoc | 8 + .../tests/table_rowspan_with_cols.json | 454 +++++++++++++++++ acdc-parser/src/blocks/mod.rs | 2 +- acdc-parser/src/blocks/table.rs | 271 ++++++++-- acdc-parser/src/grammar/document.rs | 65 ++- acdc-parser/src/grammar/table.rs | 14 +- acdc-parser/src/model/tables.rs | 38 ++ converters/html/src/table.rs | 42 +- 13 files changed, 2058 insertions(+), 76 deletions(-) create mode 100644 acdc-parser/fixtures/tests/table_cell_alignment.adoc create mode 100644 acdc-parser/fixtures/tests/table_cell_alignment.html create mode 100644 acdc-parser/fixtures/tests/table_cell_alignment.json create mode 100644 acdc-parser/fixtures/tests/table_cell_duplication.adoc create mode 100644 acdc-parser/fixtures/tests/table_cell_duplication.json create mode 100644 acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc create mode 100644 acdc-parser/fixtures/tests/table_rowspan_with_cols.json diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.adoc b/acdc-parser/fixtures/tests/table_cell_alignment.adoc new file mode 100644 index 0000000..b0f055f --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.adoc @@ -0,0 +1,6 @@ +|=== +| Default | Centered | Right + +^| Center | Default | > Right +| Default | .^ Middle | .> Bottom +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.html b/acdc-parser/fixtures/tests/table_cell_alignment.html new file mode 100644 index 0000000..d4d72ab --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.html @@ -0,0 +1,475 @@ + + + + + + + + + + + +

+ +++++ + + + + + + + + + + + + + + + + + + + +
DefaultCenteredRight

Center

Default

Right

Default

Middle

Bottom

+
+ + + diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.json b/acdc-parser/fixtures/tests/table_cell_alignment.json new file mode 100644 index 0000000..c9cdc64 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.json @@ -0,0 +1,378 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Centered", + "location": [ + { + "line": 2, + "col": 13 + }, + { + "line": 2, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 13 + }, + { + "line": 2, + "col": 20 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Right", + "location": [ + { + "line": 2, + "col": 24 + }, + { + "line": 2, + "col": 28 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 24 + }, + { + "line": 2, + "col": 28 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Center", + "location": [ + { + "line": 4, + "col": 4 + }, + { + "line": 4, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 4 + }, + { + "line": 4, + "col": 9 + } + ] + } + ], + "halign": "center" + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 4, + "col": 13 + }, + { + "line": 4, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 13 + }, + { + "line": 4, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Right", + "location": [ + { + "line": 4, + "col": 24 + }, + { + "line": 4, + "col": 28 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 24 + }, + { + "line": 4, + "col": 28 + } + ] + } + ], + "halign": "right" + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Middle", + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 20 + } + ] + } + ], + "valign": "middle" + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bottom", + "location": [ + { + "line": 5, + "col": 27 + }, + { + "line": 5, + "col": 32 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 27 + }, + { + "line": 5, + "col": 32 + } + ] + } + ], + "valign": "bottom" + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_duplication.adoc b/acdc-parser/fixtures/tests/table_cell_duplication.adoc new file mode 100644 index 0000000..e3c05cc --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_duplication.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C + +3*| Same + +| X | Y | Z +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_duplication.json b/acdc-parser/fixtures/tests/table_cell_duplication.json new file mode 100644 index 0000000..b7259b6 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_duplication.json @@ -0,0 +1,374 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "X", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Y", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Z", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc b/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc new file mode 100644 index 0000000..55f0b92 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc @@ -0,0 +1,8 @@ +[cols="1,1,1"] +|=== +| A | B | C + +.2+| Spans rows | D | E +| F | G +| H | I | J +|=== diff --git a/acdc-parser/fixtures/tests/table_rowspan_with_cols.json b/acdc-parser/fixtures/tests/table_rowspan_with_cols.json new file mode 100644 index 0000000..05dc72c --- /dev/null +++ b/acdc-parser/fixtures/tests/table_rowspan_with_cols.json @@ -0,0 +1,454 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "cols": "1,1,1" + } + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans rows", + "location": [ + { + "line": 5, + "col": 6 + }, + { + "line": 5, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 6 + }, + { + "line": 5, + "col": 15 + } + ] + } + ], + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 5, + "col": 19 + }, + { + "line": 5, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 19 + }, + { + "line": 5, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 5, + "col": 23 + }, + { + "line": 5, + "col": 23 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 23 + }, + { + "line": 5, + "col": 23 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 7, + "col": 3 + }, + { + "line": 7, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 3 + }, + { + "line": 7, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 7, + "col": 7 + }, + { + "line": 7, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 7 + }, + { + "line": 7, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 7, + "col": 11 + }, + { + "line": 7, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 11 + }, + { + "line": 7, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/src/blocks/mod.rs b/acdc-parser/src/blocks/mod.rs index dfc7a2c..07db398 100644 --- a/acdc-parser/src/blocks/mod.rs +++ b/acdc-parser/src/blocks/mod.rs @@ -1,2 +1,2 @@ mod list; -mod table; +pub(crate) mod table; diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index a93be2c..50f5c69 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -1,15 +1,27 @@ -use crate::Table; +use crate::{ColumnStyle, HorizontalAlignment, Table, VerticalAlignment}; -/// Represents a parsed cell specifier with span information. +/// Represents a parsed cell specifier with span, alignment, and style information. /// -/// In `AsciiDoc`, cell specifiers appear before the cell separator: +/// In `AsciiDoc`, cell specifiers appear before the cell separator with format: +/// `[halign][valign][colspan][.rowspan][op][style]|` +/// +/// Examples: /// - `2+|content` → colspan=2 /// - `.3+|content` → rowspan=3 /// - `2.3+|content` → colspan=2, rowspan=3 +/// - `^.>2+s|content` → center, bottom, colspan=2, strong style +/// - `3*|content` → duplicate cell 3 times #[derive(Debug, Clone, Copy)] pub(crate) struct CellSpecifier { pub colspan: usize, pub rowspan: usize, + pub halign: Option, + pub valign: Option, + pub style: Option, + /// If true, this is a duplication specifier (`*`) rather than a span (`+`). + pub is_duplication: bool, + /// For duplication, this is the count (e.g., `3*` means 3 copies). + pub duplication_count: usize, } impl Default for CellSpecifier { @@ -17,82 +29,240 @@ impl Default for CellSpecifier { Self { colspan: 1, rowspan: 1, + halign: None, + valign: None, + style: None, + is_duplication: false, + duplication_count: 1, } } } +/// Parse a single style letter into a `ColumnStyle`. +fn parse_style_byte(byte: u8) -> Option { + match byte { + b'a' => Some(ColumnStyle::AsciiDoc), + b'd' => Some(ColumnStyle::Default), + b'e' => Some(ColumnStyle::Emphasis), + b'h' => Some(ColumnStyle::Header), + b'l' => Some(ColumnStyle::Literal), + b'm' => Some(ColumnStyle::Monospace), + b's' => Some(ColumnStyle::Strong), + _ => None, + } +} + impl CellSpecifier { /// Parse a cell specifier from the beginning of cell content. /// /// Returns the specifier and the offset where actual content begins. - /// Pattern: `(\d+)?(\.\d+)?\+` + /// Full pattern: `[halign][valign][colspan][.rowspan][+|*][style]` /// /// Examples: - /// - `"2+rest"` → `(CellSpecifier { colspan: 2, rowspan: 1 }, 2)` - /// - `".3+rest"` → `(CellSpecifier { colspan: 1, rowspan: 3 }, 3)` - /// - `"2.3+rest"` → `(CellSpecifier { colspan: 2, rowspan: 3 }, 4)` - /// - `"plain"` → `(CellSpecifier { colspan: 1, rowspan: 1 }, 0)` + /// - `"2+rest"` → colspan=2 + /// - `".3+rest"` → rowspan=3 + /// - `"2.3+rest"` → colspan=2, rowspan=3 + /// - `"^.>2+srest"` → center, bottom, colspan=2, strong style + /// - `"3*rest"` → `duplication_count`=3 + /// - `"plain"` → defaults (no specifier found) #[must_use] pub fn parse(content: &str) -> (Self, usize) { let bytes = content.as_bytes(); let mut pos = 0; - let mut colspan: Option = None; - let mut rowspan: Option = None; - // Parse optional colspan (digits before optional dot) - let colspan_start = pos; + // Phase 1: Parse optional alignment markers + let (halign, valign, align_end) = Self::parse_alignments(bytes, pos); + pos = align_end; + + // Phase 2: Parse optional colspan (digits) + let (colspan, colspan_end) = Self::parse_number(content, bytes, pos); + pos = colspan_end; + + // Phase 3: Parse optional rowspan (dot followed by digits) + let (rowspan, rowspan_end) = Self::parse_rowspan(content, bytes, pos); + pos = rowspan_end; + + // Phase 4: Check for operator and build result + Self::build_result(bytes, pos, colspan, rowspan, halign, valign) + } + + /// Parse alignment markers at the current position. + /// Returns `(halign, valign, new_position)`. + fn parse_alignments( + bytes: &[u8], + mut pos: usize, + ) -> ( + Option, + Option, + usize, + ) { + let mut halign: Option = None; + let mut valign: Option = None; + + loop { + match bytes.get(pos) { + Some(b'<') => { + halign = Some(HorizontalAlignment::Left); + pos += 1; + } + Some(b'^') => { + halign = Some(HorizontalAlignment::Center); + pos += 1; + } + Some(b'>') => { + halign = Some(HorizontalAlignment::Right); + pos += 1; + } + Some(b'.') => { + // Could be vertical alignment (.< .^ .>) or rowspan (.N) + match bytes.get(pos + 1) { + Some(b'<') => { + valign = Some(VerticalAlignment::Top); + pos += 2; + } + Some(b'^') => { + valign = Some(VerticalAlignment::Middle); + pos += 2; + } + Some(b'>') => { + valign = Some(VerticalAlignment::Bottom); + pos += 2; + } + _ => break, // Not vertical alignment, might be rowspan + } + } + _ => break, + } + } + + (halign, valign, pos) + } + + /// Parse a number (for colspan) at the current position. + /// Returns `(parsed_value, new_position)`. + fn parse_number(content: &str, bytes: &[u8], mut pos: usize) -> (Option, usize) { + let start = pos; while bytes.get(pos).is_some_and(u8::is_ascii_digit) { pos += 1; } - if pos > colspan_start { - if let Some(n) = content - .get(colspan_start..pos) + let value = if pos > start { + content + .get(start..pos) .and_then(|s| s.parse::().ok()) - { - colspan = Some(n); - } + } else { + None + }; + (value, pos) + } + + /// Parse rowspan (dot followed by digits) at the current position. + /// Returns `(parsed_value, new_position)`. + fn parse_rowspan(content: &str, bytes: &[u8], mut pos: usize) -> (Option, usize) { + if bytes.get(pos) != Some(&b'.') { + return (None, pos); } - // Parse optional rowspan (dot followed by digits) - if bytes.get(pos) == Some(&b'.') { - let dot_pos = pos; + let dot_pos = pos; + pos += 1; + let start = pos; + while bytes.get(pos).is_some_and(u8::is_ascii_digit) { pos += 1; - let rowspan_start = pos; - while bytes.get(pos).is_some_and(u8::is_ascii_digit) { + } + + if pos > start { + let value = content + .get(start..pos) + .and_then(|s| s.parse::().ok()); + (value, pos) + } else { + // Dot without following digits - not a rowspan specifier + (None, dot_pos) + } + } + + /// Build the final result based on parsed components. + fn build_result( + bytes: &[u8], + mut pos: usize, + colspan: Option, + rowspan: Option, + halign: Option, + valign: Option, + ) -> (Self, usize) { + let has_span_or_dup = colspan.is_some() || rowspan.is_some(); + let is_duplication = bytes.get(pos) == Some(&b'*'); + let is_span = bytes.get(pos) == Some(&b'+'); + + if (is_span || is_duplication) && has_span_or_dup { + pos += 1; + + // Parse optional style letter after operator + let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); + if style.is_some() { pos += 1; } - if pos > rowspan_start { - if let Some(n) = content - .get(rowspan_start..pos) - .and_then(|s| s.parse::().ok()) - { - rowspan = Some(n); + + let spec = if is_duplication { + Self { + colspan: 1, + rowspan: 1, + halign, + valign, + style, + is_duplication: true, + duplication_count: colspan.unwrap_or(1), } } else { - // Dot without following digits - not a span specifier - pos = dot_pos; - } - } - - // Must end with '+' to be a valid span specifier - if bytes.get(pos) == Some(&b'+') && (colspan.is_some() || rowspan.is_some()) { - pos += 1; - ( Self { colspan: colspan.unwrap_or(1), rowspan: rowspan.unwrap_or(1), + halign, + valign, + style, + is_duplication: false, + duplication_count: 1, + } + }; + (spec, pos) + } else if halign.is_some() || valign.is_some() { + // Alignment without span operator - still valid + let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); + if style.is_some() { + pos += 1; + } + ( + Self { + colspan: 1, + rowspan: 1, + halign, + valign, + style, + is_duplication: false, + duplication_count: 1, }, pos, ) } else { - // No valid span specifier found + // No valid specifier found (Self::default(), 0) } } } -/// A parsed table cell with position and span information. -pub(crate) type ParsedCell = (String, usize, usize, usize, usize); // (content, start, end, colspan, rowspan) +/// A parsed table cell with position, span, alignment, and style information. +#[derive(Debug, Clone)] +pub(crate) struct ParsedCell { + pub content: String, + pub start: usize, + pub end: usize, + pub colspan: usize, + pub rowspan: usize, + pub halign: Option, + pub valign: Option, + pub style: Option, + pub is_duplication: bool, + pub duplication_count: usize, +} impl Table { pub(crate) fn parse_rows_with_positions( @@ -254,13 +424,18 @@ impl Table { cell_start + cell_content.len() - 1 // -1 for inclusive end }; - columns.push(( - cell_content.to_string(), - cell_start, - cell_end, - spec.colspan, - spec.rowspan, - )); + columns.push(ParsedCell { + content: cell_content.to_string(), + start: cell_start, + end: cell_end, + colspan: spec.colspan, + rowspan: spec.rowspan, + halign: spec.halign, + valign: spec.valign, + style: spec.style, + is_duplication: spec.is_duplication, + duplication_count: spec.duplication_count, + }); // Move offset past this cell and its separator line_offset += part.len(); diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 7e20054..4bb3dad 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -1512,25 +1512,39 @@ peg::parser! { let mut footer = None; let mut rows = Vec::new(); + // Track rowspan state: maps column positions to remaining rowspan count. + // When a cell has rowspan > 1, we track how many more rows it occupies. + // Each entry: (column_position, remaining_rows, colspan_width) + let mut active_rowspans: Vec<(usize, usize, usize)> = Vec::new(); + for (i, row) in raw_rows.iter().enumerate() { - let columns = row - .iter() - .filter(|(cell, _, _, _, _)| !cell.is_empty()) - .map(|(cell, start, _end, colspan, rowspan)| { - parse_table_cell(cell, state, *start, block_metadata.parent_section_level, *colspan, *rowspan) - }) - .collect::, _>>()?; + // Process cells, handling duplication + let mut columns = Vec::new(); + for cell in row.iter().filter(|c| !c.content.is_empty()) { + let parsed = parse_table_cell(&cell.content, state, cell.start, block_metadata.parent_section_level, cell)?; + if cell.is_duplication && cell.duplication_count > 1 { + // Duplicate the cell N times + for _ in 0..cell.duplication_count { + columns.push(parsed.clone()); + } + } else { + columns.push(parsed); + } + } // Calculate row line number from first cell for better error reporting let row_line = if let Some(first) = row.first() { - state.create_location(first.1, first.2).start.line + state.create_location(first.start, first.end).start.line } else { table_location.start.line // Fallback if row is empty (shouldn't happen) }; - // validate that if we have ncols the logical column count matches - // Logical column count = sum of colspans for all cells - let logical_col_count: usize = columns.iter().map(|c| c.colspan).sum(); + // Calculate occupied columns from active rowspans + let occupied_from_rowspans: usize = active_rowspans.iter().map(|(_pos, _remaining, width)| *width).sum(); + + // Logical column count = columns occupied by rowspans + colspans of new cells + let logical_col_count: usize = occupied_from_rowspans + columns.iter().map(|c| c.colspan).sum::(); + if let Some(ncols) = ncols && logical_col_count != ncols { @@ -1547,6 +1561,7 @@ peg::parser! { tracing::warn!( actual = logical_col_count, expected = ncols, + occupied_from_rowspans, line = row_line, "table row has incorrect column count, skipping row" ); @@ -1554,6 +1569,34 @@ peg::parser! { continue; } + // Update active rowspans for this row: + // 1. Decrement remaining count for existing rowspans + // 2. Remove rowspans that are now exhausted + active_rowspans.retain_mut(|(_pos, remaining, _width)| { + *remaining -= 1; + *remaining > 0 + }); + + // 3. Add new rowspans from current row's cells + let mut col_position = 0; + for (_, active_pos, _, colspan) in active_rowspans.iter().map(|(p, r, c)| (*p, *p, *r, *c)) { + if col_position == active_pos { + col_position += colspan; + } + } + for cell in &columns { + // Skip over positions occupied by rowspans + while active_rowspans.iter().any(|(pos, _, width)| col_position >= *pos && col_position < pos + width) { + if let Some((_, _, width)) = active_rowspans.iter().find(|(pos, _, w)| col_position >= *pos && col_position < pos + w) { + col_position += width; + } + } + if cell.rowspan > 1 { + active_rowspans.push((col_position, cell.rowspan - 1, cell.colspan)); + } + col_position += cell.colspan; + } + // if we have a header, we need to add the columns we have to the header if has_header { header = Some(TableRow { columns }); diff --git a/acdc-parser/src/grammar/table.rs b/acdc-parser/src/grammar/table.rs index d3afb7c..00ef4b5 100644 --- a/acdc-parser/src/grammar/table.rs +++ b/acdc-parser/src/grammar/table.rs @@ -1,4 +1,4 @@ -use crate::{Error, TableColumn, model::SectionLevel}; +use crate::{Error, TableColumn, blocks::table::ParsedCell, model::SectionLevel}; use super::{ParserState, document_parser, inline_processing::adjust_and_log_parse_error}; @@ -7,8 +7,7 @@ pub(crate) fn parse_table_cell( state: &mut ParserState, cell_start_offset: usize, parent_section_level: Option, - colspan: usize, - rowspan: usize, + cell: &ParsedCell, ) -> Result { let blocks = document_parser::blocks(content, state, cell_start_offset, parent_section_level) .unwrap_or_else(|error| { @@ -21,5 +20,12 @@ pub(crate) fn parse_table_cell( ); Ok(Vec::new()) })?; - Ok(TableColumn::with_spans(blocks, colspan, rowspan)) + Ok(TableColumn::with_format( + blocks, + cell.colspan, + cell.rowspan, + cell.halign, + cell.valign, + cell.style, + )) } diff --git a/acdc-parser/src/model/tables.rs b/acdc-parser/src/model/tables.rs index 257dbf2..04c459a 100644 --- a/acdc-parser/src/model/tables.rs +++ b/acdc-parser/src/model/tables.rs @@ -231,6 +231,18 @@ pub struct TableColumn { /// Specified in `AsciiDoc` with `.n+|` syntax (e.g., `.2+|` for rowspan=2). #[serde(default = "default_span", skip_serializing_if = "is_default_span")] pub rowspan: usize, + /// Cell-level horizontal alignment override. + /// Specified with `<`, `^`, or `>` in cell specifier (e.g., `^|` for center). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub halign: Option, + /// Cell-level vertical alignment override. + /// Specified with `.<`, `.^`, or `.>` in cell specifier (e.g., `.>|` for bottom). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub valign: Option, + /// Cell-level style override. + /// Specified with style letter after operator (e.g., `s|` for strong/bold). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub style: Option, } const fn default_span() -> usize { @@ -250,6 +262,9 @@ impl TableColumn { content, colspan: 1, rowspan: 1, + halign: None, + valign: None, + style: None, } } @@ -260,6 +275,29 @@ impl TableColumn { content, colspan, rowspan, + halign: None, + valign: None, + style: None, + } + } + + /// Create a new table column with full cell specifier options. + #[must_use] + pub fn with_format( + content: Vec, + colspan: usize, + rowspan: usize, + halign: Option, + valign: Option, + style: Option, + ) -> Self { + Self { + content, + colspan, + rowspan, + halign, + valign, + style, } } } diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index e0aa173..49d0430 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -25,9 +25,30 @@ fn valign_class(valign: VerticalAlignment) -> &'static str { } } -/// Get column format for a given column index, defaulting to left/top if not specified -fn get_column_format(columns: &[ColumnFormat], col_index: usize) -> ColumnFormat { - columns.get(col_index).cloned().unwrap_or_default() +/// Get effective alignment for a cell, considering cell-level overrides. +fn get_effective_halign( + columns: &[ColumnFormat], + col_index: usize, + cell: &TableColumn, +) -> HorizontalAlignment { + cell.halign.unwrap_or_else(|| { + columns + .get(col_index) + .map_or_else(HorizontalAlignment::default, |c| c.halign) + }) +} + +/// Get effective vertical alignment for a cell, considering cell-level overrides. +fn get_effective_valign( + columns: &[ColumnFormat], + col_index: usize, + cell: &TableColumn, +) -> VerticalAlignment { + cell.valign.unwrap_or_else(|| { + columns + .get(col_index) + .map_or_else(VerticalAlignment::default, |c| c.valign) + }) } /// Format colspan/rowspan attributes for a table cell. @@ -197,9 +218,8 @@ where writeln!(writer, "")?; let _ = writer; for (col_index, cell) in header.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); write!( @@ -225,9 +245,8 @@ where writeln!(writer, "")?; let _ = writer; for (col_index, cell) in row.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); write!( @@ -252,9 +271,8 @@ where writeln!(writer, "")?; let _ = writer; for (col_index, cell) in footer.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); write!( From 440ccd9e22d558c100d1f3cd6c674034a45016ae Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 18:37:49 +0000 Subject: [PATCH 05/18] feat(html): support table visual attributes Tables now render with proper CSS classes for frame, grid, stripes, width, and custom roles - matching asciidoctor output. Also consolidates attribute string extraction into ElementAttributes methods (get_string) removing duplicate helper functions from icon.rs and table.rs. --- acdc-parser/src/model/attributes.rs | 4 +- converters/html/src/icon.rs | 38 ++++------ converters/html/src/table.rs | 104 +++++++++++++++++++++++----- 3 files changed, 102 insertions(+), 44 deletions(-) diff --git a/acdc-parser/src/model/attributes.rs b/acdc-parser/src/model/attributes.rs index 74508ad..ccbc6ee 100644 --- a/acdc-parser/src/model/attributes.rs +++ b/acdc-parser/src/model/attributes.rs @@ -247,9 +247,9 @@ impl ElementAttributes { self.0.merge(other.0); } - /// Helper to get a string value. + /// Get a string attribute value as an owned `String`. /// - /// Strips surrounding quotes from the value if present (parser quirk workaround). + /// Strips surrounding quotes from the value if present. #[must_use] pub fn get_string(&self, name: &str) -> Option { self.get(name).and_then(|v| match v { diff --git a/converters/html/src/icon.rs b/converters/html/src/icon.rs index 3d51e8a..6a768be 100644 --- a/converters/html/src/icon.rs +++ b/converters/html/src/icon.rs @@ -5,20 +5,6 @@ use acdc_parser::{AttributeValue, ElementAttributes, ICON_SIZES, Icon, Source}; use crate::Processor; -/// Helper to get a string value from `ElementAttributes`. -/// -/// Strips surrounding quotes from the value if present (parser quirk workaround). -fn get_attr_string(attrs: &ElementAttributes, name: &str) -> Option { - attrs.get(name).and_then(|v| match v { - AttributeValue::String(s) => { - // Strip surrounding quotes if present (parser includes them for quoted values) - let trimmed = s.trim_matches('"'); - Some(trimmed.to_string()) - } - AttributeValue::None | AttributeValue::Bool(_) | _ => None, - }) -} - /// Check if a positional attribute exists (stored as key with `AttributeValue::None`). fn has_positional_attr(attrs: &ElementAttributes, name: &str) -> bool { matches!(attrs.get(name), Some(AttributeValue::None)) @@ -27,7 +13,7 @@ fn has_positional_attr(attrs: &ElementAttributes, name: &str) -> bool { /// Get the icon size from attributes (either named `size=...` or positional like `2x`). fn get_icon_size(attrs: &ElementAttributes) -> Option { // First check named attribute - if let Some(size) = get_attr_string(attrs, "size") { + if let Some(size) = attrs.get_string("size") { return Some(size); } // Then check for positional size values @@ -54,7 +40,7 @@ pub(crate) fn write_icon( let attrs = &icon.attributes; // Build span class with optional role - let span_class = match get_attr_string(attrs, "role") { + let span_class = match attrs.get_string("role") { Some(role) => format!("icon {role}"), None => "icon".to_string(), }; @@ -89,14 +75,15 @@ fn write_font_icon( } // flip takes precedence over rotate (matches asciidoctor behavior) - if let Some(flip) = get_attr_string(attrs, "flip") { + if let Some(flip) = attrs.get_string("flip") { let _ = write!(classes, " fa-flip-{flip}"); - } else if let Some(rotate) = get_attr_string(attrs, "rotate") { + } else if let Some(rotate) = attrs.get_string("rotate") { let _ = write!(classes, " fa-rotate-{rotate}"); } // Build title attribute - let title_attr = get_attr_string(attrs, "title") + let title_attr = attrs + .get_string("title") .map(|t| format!(" title=\"{t}\"")) .unwrap_or_default(); @@ -124,16 +111,18 @@ fn write_image_icon( .map_or_else(|| "./images/icons".to_string(), ToString::to_string); // Build alt attribute (use custom alt or target name) - let alt = get_attr_string(attrs, "alt").unwrap_or_else(|| target.to_string()); + let alt = attrs + .get_string("alt") + .unwrap_or_else(|| target.to_string()); // Build img attributes let mut img_attrs = format!("src=\"{iconsdir}/{target}.png\" alt=\"{alt}\""); - if let Some(width) = get_attr_string(attrs, "width") { + if let Some(width) = attrs.get_string("width") { let _ = write!(img_attrs, " width=\"{width}\""); } - if let Some(title) = get_attr_string(attrs, "title") { + if let Some(title) = attrs.get_string("title") { let _ = write!(img_attrs, " title=\"{title}\""); } @@ -148,10 +137,11 @@ fn write_image_icon( /// Wrap icon content with a link if the `link` attribute is present. fn wrap_icon_with_link(content: &str, attrs: &ElementAttributes) -> String { - if let Some(link) = get_attr_string(attrs, "link") { + if let Some(link) = attrs.get_string("link") { // HTML-escape ampersands in URLs for valid HTML let escaped_link = link.replace('&', "&"); - let window_attrs = get_attr_string(attrs, "window") + let window_attrs = attrs + .get_string("window") .map(|w| format!(" target=\"{w}\" rel=\"noopener\"")) .unwrap_or_default(); format!("{content}") diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index 49d0430..bdc73b8 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -1,8 +1,8 @@ use acdc_converters_core::table::calculate_column_widths; use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; use acdc_parser::{ - AttributeValue, Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, - TableColumn, VerticalAlignment, + Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, TableColumn, + VerticalAlignment, }; use crate::{Error, Processor, RenderOptions}; @@ -112,14 +112,8 @@ where if !title.is_empty() { let count = processor.table_counter.get() + 1; processor.table_counter.set(count); - let caption = processor - .document_attributes - .get("table-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Table"); + let caption_owned = processor.document_attributes.get_string("table-caption"); + let caption = caption_owned.unwrap_or(String::from("Table")); visitor.render_title_with_wrapper( title, &format!("{caption} {count}. "), @@ -136,12 +130,7 @@ fn render_colgroup( metadata: &BlockMetadata, ) -> Result<(), Error> { // Generate colgroup - either from cols attribute or inferred from table structure - let col_count = if let Some(cols_value) = metadata.attributes.get("cols") { - let cols_str = match cols_value { - AttributeValue::String(s) => s.trim_matches('"'), - AttributeValue::Bool(_) | AttributeValue::None | _ => "", - }; - + let col_count = if let Some(cols_str) = metadata.attributes.get_string("cols") { // Handle multiplier syntax like "3*" or "2*~" if let Some(asterisk_pos) = cols_str.find('*') { let count_str = &cols_str[..asterisk_pos]; @@ -187,6 +176,63 @@ fn render_colgroup( Ok(()) } +/// Get frame class from metadata (default: all). +fn get_frame_class(metadata: &BlockMetadata) -> &'static str { + metadata + .attributes + .get_string("frame") + .map_or("frame-all", |frame| match frame.as_str() { + "ends" | "topbot" => "frame-ends", + "sides" => "frame-sides", + "none" => "frame-none", + _ => "frame-all", + }) +} + +/// Get grid class from metadata (default: all). +fn get_grid_class(metadata: &BlockMetadata) -> &'static str { + metadata + .attributes + .get_string("grid") + .map_or("grid-all", |grid| match grid.as_str() { + "rows" => "grid-rows", + "cols" => "grid-cols", + "none" => "grid-none", + _ => "grid-all", + }) +} + +/// Get stripes class from metadata (only if specified). +fn get_stripes_class(metadata: &BlockMetadata) -> Option<&'static str> { + metadata + .attributes + .get_string("stripes") + .and_then(|stripes| match stripes.as_str() { + "even" => Some("stripes-even"), + "odd" => Some("stripes-odd"), + "all" => Some("stripes-all"), + "hover" => Some("stripes-hover"), + _ => None, + }) +} + +/// Get width style from metadata (returns empty string if not specified). +fn get_width_style(metadata: &BlockMetadata) -> String { + metadata + .attributes + .get_string("width") + .map_or_else(String::new, |w| format!(" style=\"width: {w};\"")) +} + +/// Get sizing class based on %autowidth option. +fn get_sizing_class(metadata: &BlockMetadata) -> &'static str { + if metadata.options.contains(&"autowidth".to_string()) { + "fit-content" + } else { + "stretch" + } +} + /// Render table with support for nested blocks in cells pub(crate) fn render_table( table: &Table, @@ -200,9 +246,31 @@ where V: WritableVisitor, { let writer = visitor.writer_mut(); - let classes = ["tableblock", "frame-all", "grid-all", "stretch"]; - writeln!(writer, "", classes.join(" "))?; + // Build table classes + let frame = get_frame_class(metadata); + let grid = get_grid_class(metadata); + let sizing = get_sizing_class(metadata); + + // Start with base classes, add optional ones + let mut class_parts = format!("tableblock {frame} {grid} {sizing}"); + + // Add stripes class if specified + if let Some(stripes) = get_stripes_class(metadata) { + class_parts.push(' '); + class_parts.push_str(stripes); + } + + // Add custom roles/classes from metadata + for role in &metadata.roles { + class_parts.push(' '); + class_parts.push_str(role); + } + + // Get width style + let width_style = get_width_style(metadata); + + writeln!(writer, "
")?; // Render caption with table number if title exists let _ = writer; From 9cce8535954298ff4e0a694978e62d56e712e702 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 20:10:54 +0000 Subject: [PATCH 06/18] fix(README): use all features and targets during nextest --- README.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.adoc b/README.adoc index 00d4fdd..224cba7 100644 --- a/README.adoc +++ b/README.adoc @@ -87,7 +87,7 @@ See link:./acdc-parser/README.adoc[acdc-parser README] for detailed feature supp cargo build --all # Run tests with detailed output -RUST_LOG=error cargo nextest run --no-fail-fast +RUST_LOG=error cargo nextest run --no-fail-fast --all-features --all-targets # Run clippy with pedantic lints cargo clippy --all-targets --all-features -- --deny clippy::pedantic From 2296b95962ca9a09db654d58ba5c9e2ebe41cf09 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 20:11:52 +0000 Subject: [PATCH 07/18] chore(parser): show line when include directive can't be processed --- acdc-parser/src/preprocessor/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/acdc-parser/src/preprocessor/mod.rs b/acdc-parser/src/preprocessor/mod.rs index cf439cf..8650075 100644 --- a/acdc-parser/src/preprocessor/mod.rs +++ b/acdc-parser/src/preprocessor/mod.rs @@ -181,7 +181,7 @@ impl Preprocessor { return Ok(Some(include.lines()?)); } } else { - tracing::error!("file parent is missing - include directive cannot be processed"); + tracing::error!(%line, "file parent is missing - include directive cannot be processed"); } Ok(None) } From 52b7e609f56309ae2bff0c755aea15d8e6dbbf86 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 21:04:05 +0000 Subject: [PATCH 08/18] fix(parser): correct table cell content position tracking Cell positions were off by one column because the calculation used part boundaries directly instead of accounting for whitespace and cell specifiers. The fix calculates actual content offsets by tracking leading whitespace, specifier length, and post-specifier whitespace before computing positions. --- .../fixtures/tests/table_cell_alignment.json | 24 +- acdc-parser/src/blocks/table.rs | 208 +++++++++++++++--- 2 files changed, 191 insertions(+), 41 deletions(-) diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.json b/acdc-parser/fixtures/tests/table_cell_alignment.json index c9cdc64..0e8492a 100644 --- a/acdc-parser/fixtures/tests/table_cell_alignment.json +++ b/acdc-parser/fixtures/tests/table_cell_alignment.json @@ -205,11 +205,11 @@ "location": [ { "line": 4, - "col": 24 + "col": 25 }, { "line": 4, - "col": 28 + "col": 29 } ] } @@ -217,11 +217,11 @@ "location": [ { "line": 4, - "col": 24 + "col": 25 }, { "line": 4, - "col": 28 + "col": 29 } ] } @@ -280,11 +280,11 @@ "location": [ { "line": 5, - "col": 15 + "col": 16 }, { "line": 5, - "col": 20 + "col": 21 } ] } @@ -292,11 +292,11 @@ "location": [ { "line": 5, - "col": 15 + "col": 16 }, { "line": 5, - "col": 20 + "col": 21 } ] } @@ -316,11 +316,11 @@ "location": [ { "line": 5, - "col": 27 + "col": 28 }, { "line": 5, - "col": 32 + "col": 33 } ] } @@ -328,11 +328,11 @@ "location": [ { "line": 5, - "col": 27 + "col": 28 }, { "line": 5, - "col": 32 + "col": 33 } ] } diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index 50f5c69..4552c92 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -1,5 +1,152 @@ use crate::{ColumnStyle, HorizontalAlignment, Table, VerticalAlignment}; +/// A cell part with its unescaped content and original start position. +struct CellPart { + /// Unescaped content (e.g., `\|` becomes `|`) + content: String, + /// Start position in the original line + start: usize, +} + +/// Split a line by separator, respecting backslash escapes. +/// +/// For PSV (`|`) and DSV (`:`), a backslash before the separator escapes it. +/// Returns parts with their original byte positions for accurate source mapping. +fn split_escaped(line: &str, separator: char) -> Vec { + let mut parts = Vec::new(); + let mut current_content = String::new(); + let mut part_start = 0; + let mut chars = line.char_indices().peekable(); + + while let Some((byte_idx, ch)) = chars.next() { + if ch == '\\' { + // Check if next char is the separator + if let Some(&(_, next_ch)) = chars.peek() { + if next_ch == separator { + // Escaped separator - add literal separator, skip the backslash + current_content.push(separator); + chars.next(); // consume the separator + continue; + } + } + // Not an escape - add backslash literally + current_content.push(ch); + } else if ch == separator { + // Unescaped separator - end current part + parts.push(CellPart { + content: std::mem::take(&mut current_content), + start: part_start, + }); + part_start = byte_idx + ch.len_utf8(); + } else { + current_content.push(ch); + } + } + + // Add final part + parts.push(CellPart { + content: current_content, + start: part_start, + }); + + parts +} + +/// Split a CSV line, respecting quoted fields (RFC 4180). +/// +/// - Fields enclosed in double quotes can contain commas +/// - Double-double-quotes (`""`) inside quoted fields become a single quote +fn split_csv(line: &str) -> Vec { + let mut parts = Vec::new(); + let mut current_content = String::new(); + let mut part_start = 0; + let mut in_quotes = false; + let mut chars = line.char_indices().peekable(); + + while let Some((byte_idx, ch)) = chars.next() { + if in_quotes { + if ch == '"' { + // Check for escaped quote ("") + if let Some(&(_, next_ch)) = chars.peek() { + if next_ch == '"' { + // Escaped quote - add one quote, skip both + current_content.push('"'); + chars.next(); // consume the second quote + continue; + } + } + // End of quoted field + in_quotes = false; + } else { + current_content.push(ch); + } + } else if ch == '"' { + // Start of quoted field + in_quotes = true; + } else if ch == ',' { + // Field separator + parts.push(CellPart { + content: std::mem::take(&mut current_content), + start: part_start, + }); + part_start = byte_idx + 1; // comma is always 1 byte + } else { + current_content.push(ch); + } + } + + // Add final part + parts.push(CellPart { + content: current_content, + start: part_start, + }); + + parts +} + +/// Determine if this is a CSV format table. +fn is_csv_format(separator: &str) -> bool { + separator == "," +} + +/// Split a line into cell parts using the appropriate method for the separator. +fn split_line(line: &str, separator: &str) -> Vec { + if is_csv_format(separator) { + split_csv(line) + } else if let Some(sep_char) = separator.chars().next() { + if separator.len() == 1 { + split_escaped(line, sep_char) + } else { + // Multi-char separator - no escape handling + split_multi_char(line, separator) + } + } else { + // Empty separator - return whole line as one part + vec![CellPart { + content: line.to_string(), + start: 0, + }] + } +} + +/// Split by multi-character separator (no escape handling). +fn split_multi_char(line: &str, separator: &str) -> Vec { + let mut parts = Vec::new(); + let mut last_end = 0; + for (idx, _) in line.match_indices(separator) { + parts.push(CellPart { + content: line.get(last_end..idx).unwrap_or("").to_string(), + start: last_end, + }); + last_end = idx + separator.len(); + } + parts.push(CellPart { + content: line.get(last_end..).unwrap_or("").to_string(), + start: last_end, + }); + parts +} + /// Represents a parsed cell specifier with span, alignment, and style information. /// /// In `AsciiDoc`, cell specifiers appear before the cell separator with format: @@ -365,37 +512,36 @@ impl Table { continue; } - // Split the line by separator to get all cells - let parts: Vec<&str> = line.split(separator).collect(); - - // Track position within the line - let mut line_offset = current_offset; + // Split the line by separator, handling escapes appropriately + let parts = split_line(line, separator); // Handle span specifier at the start of line (before first separator) // e.g., "2+| content" -> part 0 is "2+", applies to part 1 let mut pending_spec: Option = None; + // Determine if first part should be treated as content (CSV) or specifier/skip (PSV/DSV) + // For CSV: first part is actual content + // For PSV/DSV: first part is either empty, whitespace, or a cell specifier + let is_csv = is_csv_format(separator); + for (i, part) in parts.iter().enumerate() { - if i == 0 { - // First part is before first separator - let trimmed = part.trim(); - if trimmed.is_empty() { - // Normal case: line starts with separator - line_offset += separator.len(); - } else { - // Span specifier before first separator: "2+| content" + if i == 0 && !is_csv { + // First part is before first separator (PSV/DSV format) + let trimmed = part.content.trim(); + if !trimmed.is_empty() { + // Check if this looks like a specifier (e.g., "2+", "3*", "^.>") let (spec, spec_len) = CellSpecifier::parse(trimmed); - if spec_len > 0 { + if spec_len > 0 && spec_len == trimmed.len() { + // Entire first part is a specifier, apply to next cell pending_spec = Some(spec); } - // Move past the specifier and the separator - line_offset += part.len() + separator.len(); + // If not a complete specifier, it's just content before first separator + // which we skip for PSV/DSV } continue; } - let cell_content_with_spaces = part; - let cell_content_trimmed = cell_content_with_spaces.trim(); + let cell_content_trimmed = part.content.trim(); // Use pending specifier if we have one, otherwise parse from content let (spec, spec_offset) = if let Some(pending) = pending_spec.take() { @@ -414,14 +560,24 @@ impl Table { cell_content_trimmed }; - // Find where the actual content starts (after leading spaces and specifier) - let leading_spaces = - cell_content_with_spaces.len() - cell_content_with_spaces.trim_start().len(); - let cell_start = line_offset + leading_spaces + spec_offset; + // Calculate where cell_content starts within part.content + // Pattern: leading_ws + spec_offset + post_spec_ws + let leading_ws = part.content.len() - part.content.trim_start().len(); + let post_spec_ws = if spec_offset > 0 { + let after_spec = cell_content_trimmed.get(spec_offset..).unwrap_or(""); + after_spec.len() - after_spec.trim_start().len() + } else { + 0 + }; + let content_start_offset = leading_ws + spec_offset + post_spec_ws; + + // Calculate positions using actual content boundaries + let cell_start = current_offset + part.start + content_start_offset; let cell_end = if cell_content.is_empty() { cell_start } else { - cell_start + cell_content.len() - 1 // -1 for inclusive end + // End is start + content length - 1 (inclusive end position) + cell_start + cell_content.len().saturating_sub(1) }; columns.push(ParsedCell { @@ -436,12 +592,6 @@ impl Table { is_duplication: spec.is_duplication, duplication_count: spec.duplication_count, }); - - // Move offset past this cell and its separator - line_offset += part.len(); - if i < parts.len() - 1 { - line_offset += separator.len(); - } } current_offset += line.len() + 1; // +1 for newline From 22dd7c6d115b7e70a88c85dcbcb3d7dd147554e2 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 21:05:31 +0000 Subject: [PATCH 09/18] chore(parser): colspan/rowspan don't need to have a default serde --- acdc-parser/src/model/tables.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/acdc-parser/src/model/tables.rs b/acdc-parser/src/model/tables.rs index 04c459a..e085a3c 100644 --- a/acdc-parser/src/model/tables.rs +++ b/acdc-parser/src/model/tables.rs @@ -225,11 +225,11 @@ pub struct TableColumn { pub content: Vec, /// Number of columns this cell spans (default 1). /// Specified in `AsciiDoc` with `n+|` syntax (e.g., `2+|` for colspan=2). - #[serde(default = "default_span", skip_serializing_if = "is_default_span")] + #[serde(skip_serializing_if = "is_default_span")] pub colspan: usize, /// Number of rows this cell spans (default 1). /// Specified in `AsciiDoc` with `.n+|` syntax (e.g., `.2+|` for rowspan=2). - #[serde(default = "default_span", skip_serializing_if = "is_default_span")] + #[serde(skip_serializing_if = "is_default_span")] pub rowspan: usize, /// Cell-level horizontal alignment override. /// Specified with `<`, `^`, or `>` in cell specifier (e.g., `^|` for center). @@ -245,10 +245,6 @@ pub struct TableColumn { pub style: Option, } -const fn default_span() -> usize { - 1 -} - #[allow(clippy::trivially_copy_pass_by_ref)] const fn is_default_span(span: &usize) -> bool { *span == 1 From 4673844ab4a999742a6315530389369e2cb54770 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sat, 17 Jan 2026 21:41:59 +0000 Subject: [PATCH 10/18] chore(parser): removed unnecessary pub functions --- CHANGELOG.md | 21 +++++++++ acdc-parser/src/model/tables.rs | 28 +----------- converters/terminal/src/table.rs | 75 +++++++++++++++----------------- 3 files changed, 57 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 817f56e..35fc466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Table colspan and rowspan rendering (`colspan="n"` and `rowspan="n"` attributes on `\n", )?; } @@ -274,7 +289,7 @@ where // Render caption with table number if title exists let _ = writer; - render_table_caption(visitor, title, processor)?; + render_table_caption(visitor, title, processor, metadata)?; // Render colgroup with column widths render_colgroup(visitor.writer_mut(), table, metadata)?; From cde88ce7bd988b79261f1339894f962942f169f3 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Mon, 19 Jan 2026 06:15:30 +0000 Subject: [PATCH 14/18] chore(html): simplify captions + support disabling them --- converters/html/src/delimited.rs | 20 ++++++++------------ converters/html/src/image.rs | 17 +++++------------ converters/html/src/lib.rs | 31 +++++++++++++++++++++++++++++++ converters/html/src/table.rs | 22 +++++++++++----------- 4 files changed, 55 insertions(+), 35 deletions(-) diff --git a/converters/html/src/delimited.rs b/converters/html/src/delimited.rs index a301315..f67f075 100644 --- a/converters/html/src/delimited.rs +++ b/converters/html/src/delimited.rs @@ -40,19 +40,15 @@ fn write_example_block>( let _ = writer; // Render title with caption prefix if title exists + // Caption can be disabled with :example-caption!: if !block.title.is_empty() { - let count = processor.example_counter.get() + 1; - processor.example_counter.set(count); - let caption = processor - .document_attributes - .get("example-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Example"); - let prefix = format!("
{caption} {count}. "); - visitor.render_title_with_wrapper(&block.title, &prefix, "
\n")?; + let prefix = + processor.caption_prefix("example-caption", &processor.example_counter, "Example"); + visitor.render_title_with_wrapper( + &block.title, + &format!("
{prefix}"), + "
\n", + )?; } writer = visitor.writer_mut(); diff --git a/converters/html/src/image.rs b/converters/html/src/image.rs index 03868d4..65fc6ef 100644 --- a/converters/html/src/image.rs +++ b/converters/html/src/image.rs @@ -1,5 +1,5 @@ use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; -use acdc_parser::{AttributeValue, Image}; +use acdc_parser::Image; use crate::{ Error, Processor, @@ -56,21 +56,14 @@ pub(crate) fn visit_image>( write!(w, "")?; // close content // Render title with figure caption if title exists + // Caption can be disabled with :figure-caption!: if !img.title.is_empty() { - let count = processor.figure_counter.get() + 1; - processor.figure_counter.set(count); - let caption = processor - .document_attributes - .get("figure-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Figure"); + let prefix = + processor.caption_prefix("figure-caption", &processor.figure_counter, "Figure"); let _ = w; visitor.render_title_with_wrapper( &img.title, - &format!("
{caption} {count}. "), + &format!("
{prefix}"), "
", )?; w = visitor.writer_mut(); diff --git a/converters/html/src/lib.rs b/converters/html/src/lib.rs index 8c76e9b..ac6b899 100644 --- a/converters/html/src/lib.rs +++ b/converters/html/src/lib.rs @@ -70,6 +70,37 @@ impl Processor { self.has_valid_index_section } + /// Generate a caption prefix based on document attributes. + /// + /// Returns the caption prefix string. If captions are disabled via `:X-caption!:`, + /// returns an empty string. Otherwise increments the counter and returns + /// "Caption N. " format. + #[must_use] + pub(crate) fn caption_prefix( + &self, + attribute_name: &str, + counter: &Rc>, + default_text: &str, + ) -> String { + match self.document_attributes.get(attribute_name) { + Some(AttributeValue::Bool(false)) => { + // Disabled via :X-caption!: + String::new() + } + Some(AttributeValue::String(s)) => { + let count = counter.get() + 1; + counter.set(count); + let caption = s.trim_matches('"'); + format!("{caption} {count}. ") + } + _ => { + let count = counter.get() + 1; + counter.set(count); + format!("{default_text} {count}. ") + } + } + } + /// Generate a unique anchor ID for an index term and collect the entry. #[must_use] pub fn add_index_entry(&self, kind: IndexTermKind) -> String { diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index 8ea6c7b..756b69e 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -104,6 +104,10 @@ where /// /// Per-block `[caption="..."]` attribute overrides the prefix entirely and does NOT increment /// the table counter (following `AsciiDoc` specification). +/// +/// Caption can be disabled with: +/// - `:table-caption!:` at document level (disables for all tables) +/// - `[caption=""]` at block level (disables for specific table) fn render_table_caption( visitor: &mut V, title: &[InlineNode], @@ -114,19 +118,15 @@ where V: WritableVisitor, { if !title.is_empty() { - // Check for per-block caption override + // Check for per-block caption override (does NOT increment counter) let prefix = if let Some(custom_caption) = metadata.attributes.get_string("caption") { - // Per-block caption replaces entire prefix and does NOT increment internal counter. - custom_caption + if custom_caption.is_empty() { + String::new() + } else { + custom_caption + } } else { - // Default: "Table N. " format - increment counter - let count = processor.table_counter.get() + 1; - processor.table_counter.set(count); - let caption = processor - .document_attributes - .get_string("table-caption") - .unwrap_or_else(|| String::from("Table")); - format!("{caption} {count}. ") + processor.caption_prefix("table-caption", &processor.table_counter, "Table") }; visitor.render_title_with_wrapper( From 9c75b5060eadf4d3f74ebadab3fa17c892164238 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Tue, 20 Jan 2026 22:15:05 +0000 Subject: [PATCH 15/18] refactor(parser): replace bool with ParseContext enum in CellSpecifier::parse Makes call sites self-documenting - FirstPart vs InlineContent instead of cryptic true/false. --- acdc-parser/src/blocks/table.rs | 52 +++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index 302af03..ebc6275 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -232,6 +232,15 @@ fn split_multi_char(line: &str, separator: &str) -> Vec { parts } +/// Context for parsing cell specifiers, controlling which specifier types are valid. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum ParseContext { + /// First part before separator in PSV tables - style-only specifiers allowed (e.g., `s|`) + FirstPart, + /// Inline cell content - style-only specifiers NOT allowed (prevents "another" → 'a' style) + InlineContent, +} + /// Represents a parsed cell specifier with span, alignment, and style information. /// /// In `AsciiDoc`, cell specifiers appear before the cell separator with format: @@ -290,6 +299,11 @@ impl CellSpecifier { /// Returns the specifier and the offset where actual content begins. /// Full pattern: `[halign][valign][colspan][.rowspan][+|*][style]` /// + /// The `mode` parameter controls whether style-only specifiers + /// (e.g., `s|` for strong without any alignment or span) are accepted: + /// - `ParseContext::FirstPart`: Accept style-only specifiers (first part before separator) + /// - `ParseContext::InlineContent`: Reject style-only (prevents "another" → 'a' style) + /// /// Examples: /// - `"2+rest"` → colspan=2 /// - `".3+rest"` → rowspan=3 @@ -298,7 +312,7 @@ impl CellSpecifier { /// - `"3*rest"` → `duplication_count`=3 /// - `"plain"` → defaults (no specifier found) #[must_use] - pub fn parse(content: &str) -> (Self, usize) { + pub fn parse(content: &str, mode: ParseContext) -> (Self, usize) { let bytes = content.as_bytes(); let mut pos = 0; @@ -315,7 +329,7 @@ impl CellSpecifier { pos = rowspan_end; // Phase 4: Check for operator and build result - Self::build_result(bytes, pos, colspan, rowspan, halign, valign) + Self::build_result(bytes, pos, colspan, rowspan, halign, valign, mode) } /// Parse alignment markers at the current position. @@ -420,6 +434,7 @@ impl CellSpecifier { rowspan: Option, halign: Option, valign: Option, + context: ParseContext, ) -> (Self, usize) { let has_span_or_dup = colspan.is_some() || rowspan.is_some(); let is_duplication = bytes.get(pos) == Some(&b'*'); @@ -474,6 +489,27 @@ impl CellSpecifier { }, pos, ) + } else if context == ParseContext::FirstPart { + // Check for style-only specifier (e.g., `s|` for strong) + // Only accepted in FirstPart context (first-part in PSV tables) + let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); + if let Some(style) = style { + pos += 1; + ( + Self { + colspan: 1, + rowspan: 1, + halign: None, + valign: None, + style: Some(style), + is_duplication: false, + duplication_count: 1, + }, + pos, + ) + } else { + (Self::default(), 0) + } } else { // No valid specifier found (Self::default(), 0) @@ -676,8 +712,10 @@ impl Table { // First part is before first separator (PSV format only) let trimmed = part.content.trim(); if !trimmed.is_empty() { - // Check if this looks like a specifier (e.g., "2+", "3*", "^.>") - let (spec, spec_len) = CellSpecifier::parse(trimmed); + // Check if this looks like a specifier (e.g., "2+", "3*", "^.>", "s") + // Style-only specifiers (e.g., "s" for strong) are valid here + let (spec, spec_len) = + CellSpecifier::parse(trimmed, ParseContext::FirstPart); if spec_len > 0 && spec_len == trimmed.len() { // Entire first part is a specifier, apply to next cell pending_spec = Some(spec); @@ -690,11 +728,13 @@ impl Table { let cell_content_trimmed = part.content.trim(); - // Use pending specifier if we have one, otherwise parse from content + // Use pending specifier if we have one, otherwise parse from content. + // Style-only specifiers are NOT valid from inline content parsing - + // this prevents treating content like "another" as having an 'a' (AsciiDoc) style. let (spec, spec_offset) = if let Some(pending) = pending_spec.take() { (pending, 0) } else { - CellSpecifier::parse(cell_content_trimmed) + CellSpecifier::parse(cell_content_trimmed, ParseContext::InlineContent) }; // The actual cell content starts after the specifier From 12a283c6f212e5e161ff25f880f82ec37a927e79 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Tue, 20 Jan 2026 22:15:15 +0000 Subject: [PATCH 16/18] fix(html): HTML5 compliance - remove self-closing syntax and deprecated attrs -
and now use and - Removed deprecated frameborder attribute from iframes --- converters/html/src/image.rs | 2 +- converters/html/src/table.rs | 159 +++++++++++++++--- converters/html/src/video.rs | 2 +- .../fixtures/expected/basic_image_block.html | 2 +- .../expected/image_block_attributes.html | 10 +- .../macros_with_quoted_attributes.html | 2 +- .../expected/table_multi_cell_per_line.html | 8 +- 7 files changed, 148 insertions(+), 37 deletions(-) diff --git a/converters/html/src/image.rs b/converters/html/src/image.rs index 65fc6ef..4d4c7ec 100644 --- a/converters/html/src/image.rs +++ b/converters/html/src/image.rs @@ -48,7 +48,7 @@ pub(crate) fn visit_image>( write!(w, "\"{alt_text}\"",")?; + write!(w, ">")?; if link.is_some() { write!(w, "")?; diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index 756b69e..788552e 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -1,8 +1,8 @@ use acdc_converters_core::table::calculate_column_widths; use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; use acdc_parser::{ - Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, TableColumn, - VerticalAlignment, + Block, BlockMetadata, ColumnFormat, ColumnStyle, HorizontalAlignment, InlineNode, Table, + TableColumn, VerticalAlignment, }; use crate::{Error, Processor, RenderOptions}; @@ -51,6 +51,24 @@ fn get_effective_valign( }) } +/// Get effective style for a cell, considering cell-level overrides. +/// Returns `None` if the effective style is `Default` (no wrapper needed). +fn get_effective_style( + columns: &[ColumnFormat], + col_index: usize, + cell: &TableColumn, +) -> Option { + cell.style.or_else(|| { + columns.get(col_index).and_then(|c| { + if c.style == ColumnStyle::Default { + None + } else { + Some(c.style) + } + }) + }) +} + /// Format colspan/rowspan attributes for a table cell. /// Returns an empty string if both are 1 (default). fn format_span_attrs(cell: &TableColumn) -> String { @@ -65,15 +83,20 @@ fn format_span_attrs(cell: &TableColumn) -> String { attrs } -/// Render cell content with support for nested blocks -/// `wrap_paragraph` controls whether paragraphs get

wrappers. -/// Headers should NOT have wrappers, body cells should have them. +/// Render cell content with support for nested blocks and cell styles. +/// +/// # Arguments +/// * `blocks` - The content blocks to render +/// * `visitor` - The HTML visitor +/// * `wrap_paragraph` - Whether paragraphs get `

` wrappers +/// * `style` - Optional cell style (Strong, Emphasis, Monospace, Literal, Header, `AsciiDoc`) fn render_cell_content( blocks: &[Block], visitor: &mut V, _processor: &Processor, _options: &RenderOptions, wrap_paragraph: bool, + style: Option, ) -> Result<(), Error> where V: WritableVisitor, @@ -81,16 +104,27 @@ where for block in blocks { // For paragraphs in table cells, use

for body cells only if let Block::Paragraph(para) = block { - if wrap_paragraph { + // Literal style uses different structure entirely + if style == Some(ColumnStyle::Literal) { let writer = visitor.writer_mut(); - write!(writer, "

")?; + write!(writer, "

")?;
                 let _ = writer;
                 visitor.visit_inline_nodes(¶.content)?;
+                let writer = visitor.writer_mut();
+                write!(writer, "
")?; + } else if wrap_paragraph { + let writer = visitor.writer_mut(); + write!(writer, "

")?; + let _ = writer; + + // Apply style wrapper inside the paragraph + render_styled_content(visitor, ¶.content, style)?; + let writer = visitor.writer_mut(); write!(writer, "

")?; } else { // Header cells: output content directly without

wrapper - visitor.visit_inline_nodes(¶.content)?; + render_styled_content(visitor, ¶.content, style)?; } } else { // For other block types, use visitor @@ -100,6 +134,56 @@ where Ok(()) } +/// Render inline content with optional style wrappers. +fn render_styled_content( + visitor: &mut V, + content: &[InlineNode], + style: Option, +) -> Result<(), Error> +where + V: WritableVisitor, +{ + match style { + Some(ColumnStyle::Strong) => { + let writer = visitor.writer_mut(); + write!(writer, "")?; + let _ = writer; + visitor.visit_inline_nodes(content)?; + let writer = visitor.writer_mut(); + write!(writer, "")?; + } + Some(ColumnStyle::Emphasis) => { + let writer = visitor.writer_mut(); + write!(writer, "")?; + let _ = writer; + visitor.visit_inline_nodes(content)?; + let writer = visitor.writer_mut(); + write!(writer, "")?; + } + Some(ColumnStyle::Monospace) => { + let writer = visitor.writer_mut(); + write!(writer, "")?; + let _ = writer; + visitor.visit_inline_nodes(content)?; + let writer = visitor.writer_mut(); + write!(writer, "")?; + } + // Default, Header, AsciiDoc, Literal (handled elsewhere) - no content wrapper + // Wildcard handles any future non-exhaustive variants + Some( + ColumnStyle::Default + | ColumnStyle::Header + | ColumnStyle::AsciiDoc + | ColumnStyle::Literal + | _, + ) + | None => { + visitor.visit_inline_nodes(content)?; + } + } + Ok(()) +} + /// Render table caption with number if title exists. /// /// Per-block `[caption="..."]` attribute overrides the prefix entirely and does NOT increment @@ -180,9 +264,9 @@ fn render_colgroup( for width in widths { // Match asciidoctor's 4-decimal precision for non-round percentages if (width - width.round()).abs() < 0.0001 { - writeln!(writer, "

")?; + writeln!(writer, "")?; } else { - writeln!(writer, "")?; + writeln!(writer, "")?; } } writeln!(writer, "")?; @@ -248,6 +332,42 @@ fn get_sizing_class(metadata: &BlockMetadata) -> &'static str { } } +/// Render a single body cell with appropriate tag and style. +fn render_body_cell( + cell: &TableColumn, + col_index: usize, + columns: &[ColumnFormat], + visitor: &mut V, + processor: &Processor, + options: &RenderOptions, +) -> Result<(), Error> +where + V: WritableVisitor, +{ + let halign = halign_class(get_effective_halign(columns, col_index, cell)); + let valign = valign_class(get_effective_valign(columns, col_index, cell)); + let style = get_effective_style(columns, col_index, cell); + let span_attrs = format_span_attrs(cell); + + // Header-styled cells in body use ")?; } @@ -328,18 +449,7 @@ where writeln!(writer, "")?; let _ = writer; for (col_index, cell) in row.columns.iter().enumerate() { - let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); - let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); - let span_attrs = format_span_attrs(cell); - let writer = visitor.writer_mut(); - write!( - writer, - "")?; + render_body_cell(cell, col_index, &table.columns, visitor, processor, options)?; } let writer = visitor.writer_mut(); writeln!(writer, "")?; @@ -356,6 +466,7 @@ where for (col_index, cell) in footer.columns.iter().enumerate() { let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); + let style = get_effective_style(&table.columns, col_index, cell); let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); write!( @@ -363,7 +474,7 @@ where "")?; } diff --git a/converters/html/src/video.rs b/converters/html/src/video.rs index b8a8fe0..7d2e736 100644 --- a/converters/html/src/video.rs +++ b/converters/html/src/video.rs @@ -71,7 +71,7 @@ fn render_iframe_video(video: &Video, w: &mut W) -> Result<() write!(w, " height=\"{height}\"")?; } - write!(w, " src=\"{url}\" frameborder=\"0\"")?; + write!(w, " src=\"{url}\"")?; if allow_fullscreen { write!(w, " allowfullscreen")?; diff --git a/converters/html/tests/fixtures/expected/basic_image_block.html b/converters/html/tests/fixtures/expected/basic_image_block.html index e872de1..6e9e04e 100644 --- a/converters/html/tests/fixtures/expected/basic_image_block.html +++ b/converters/html/tests/fixtures/expected/basic_image_block.html @@ -439,7 +439,7 @@
-
Sunset
Sunset
+
Sunset
Sunset

Float

-
float left
float right
+
float left
float right

Role

-
thumb
+
thumb

Combined

-
combined
+
combined

Named Attributes

-
A photo
+
A photo

Inline Macros

diff --git a/converters/html/tests/fixtures/expected/table_multi_cell_per_line.html b/converters/html/tests/fixtures/expected/table_multi_cell_per_line.html index 7f3c867..997378e 100644 --- a/converters/html/tests/fixtures/expected/table_multi_cell_per_line.html +++ b/converters/html/tests/fixtures/expected/table_multi_cell_per_line.html @@ -448,10 +448,10 @@

The table

`/``) +- Table visual attribute support: + - `frame` attribute - controls outer border (`all`, `ends`/`topbot`, `sides`, `none`) + - `grid` attribute - controls inner gridlines (`all`, `rows`, `cols`, `none`) + - `stripes` attribute - controls row striping (`even`, `odd`, `all`, `hover`) + - `width` attribute - sets explicit table width (e.g., `width=75%`) + - `%autowidth` option - uses `fit-content` sizing instead of `stretch` + - Custom roles from metadata applied as CSS classes +- Cell-level alignment overrides are now respected, falling back to column-level defaults - Initial support for `[subs=...]` attribute on verbatim blocks (listing, literal) - `subs=none` - disables all substitutions, outputs raw content - `subs=specialchars` - only escapes HTML special characters @@ -80,6 +89,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Complete cell specifier support for tables: + - Colspan: `2+|` spans 2 columns + - Rowspan: `.2+|` spans 2 rows + - Combined: `2.3+|` spans 2 columns and 3 rows + - Cell duplication: `3*|` duplicates cell content 3 times + - Cell-level horizontal alignment: `<|` (left), `^|` (center), `>|` (right) + - Cell-level vertical alignment: `.<|` (top), `.^|` (middle), `.>|` (bottom) + - Cell-level style: `s|` (strong), `e|` (emphasis), `m|` (monospace), etc. + - All specifiers can be combined (e.g., `2.3+^.^s|` for colspan=2, rowspan=3, centered, strong) - Tag filtering for include directives ([#279]) - `tag=name` - include a specific tagged region - `tags=a;b;c` - include multiple tags (semicolon or comma delimited) @@ -98,6 +116,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Table cell content now has correct source position tracking for multi-line cells - Description lists with terms starting with `#` (e.g., `#issue-123:: definition`) are no longer incorrectly parsed as section boundaries inside sections. The section boundary detection now requires a space after the level marker. @@ -123,6 +142,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **BREAKING**: `TableColumn` struct now includes `colspan`, `rowspan`, `halign`, `valign`, + and `style` fields. - **BREAKING**: `BlockMetadata.substitutions` changed from `Option>` to `Option`. New types `SubstitutionSpec` and `SubstitutionOp` are now public exports. Modifier syntax (`+quotes`, `-callouts`) is now stored as operations diff --git a/acdc-parser/src/model/tables.rs b/acdc-parser/src/model/tables.rs index e085a3c..9a4b4d2 100644 --- a/acdc-parser/src/model/tables.rs +++ b/acdc-parser/src/model/tables.rs @@ -251,35 +251,9 @@ const fn is_default_span(span: &usize) -> bool { } impl TableColumn { - /// Create a new table column with the given content and default spans (1). - #[must_use] - pub fn new(content: Vec) -> Self { - Self { - content, - colspan: 1, - rowspan: 1, - halign: None, - valign: None, - style: None, - } - } - - /// Create a new table column with content and explicit span values. - #[must_use] - pub fn with_spans(content: Vec, colspan: usize, rowspan: usize) -> Self { - Self { - content, - colspan, - rowspan, - halign: None, - valign: None, - style: None, - } - } - /// Create a new table column with full cell specifier options. #[must_use] - pub fn with_format( + pub(crate) fn with_format( content: Vec, colspan: usize, rowspan: usize, diff --git a/converters/terminal/src/table.rs b/converters/terminal/src/table.rs index efb99ae..d3b1dae 100644 --- a/converters/terminal/src/table.rs +++ b/converters/terminal/src/table.rs @@ -117,17 +117,24 @@ pub(crate) fn visit_table>( mod tests { use super::*; use acdc_converters_core::Options; - use acdc_parser::{ - Block, DocumentAttributes, InlineNode, Location, Paragraph, Plain, TableColumn, TableRow, - }; - - /// Create simple plain text inline nodes for testing - fn create_test_inlines(content: &str) -> Vec { - vec![InlineNode::PlainText(Plain { - content: content.to_string(), - location: Location::default(), - escaped: false, - })] + use acdc_parser::{Block, DelimitedBlockType, DocumentAttributes}; + + /// Parse an `AsciiDoc` string and extract the first table from the document. + #[allow(clippy::expect_used)] + fn parse_table(adoc: &str) -> acdc_parser::Table { + let options = acdc_parser::Options::default(); + let doc = acdc_parser::parse(adoc, &options).expect("Failed to parse AsciiDoc"); + doc.blocks + .into_iter() + .find_map(|block| { + if let Block::DelimitedBlock(db) = block + && let DelimitedBlockType::DelimitedTable(table) = db.inner + { + return Some(table); + } + None + }) + .expect("No table found in document") } /// Create test processor with default options @@ -146,31 +153,19 @@ mod tests { } } - /// Helper to create a paragraph block with plain text content - fn create_paragraph_block(text: &str) -> Block { - Block::Paragraph(Paragraph::new( - create_test_inlines(text), - Location::default(), - )) - } - #[test] fn test_table_with_footer() -> Result<(), Error> { - let table = acdc_parser::Table::new( - vec![TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Cell 1")]), - TableColumn::new(vec![create_paragraph_block("Cell 2")]), - ])], - Location::default(), - ) - .with_header(Some(TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Header 1")]), - TableColumn::new(vec![create_paragraph_block("Header 2")]), - ]))) - .with_footer(Some(TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Footer 1")]), - TableColumn::new(vec![create_paragraph_block("Footer 2")]), - ]))); + let adoc = r" +[%header%footer] +|=== +| Header 1 | Header 2 + +| Cell 1 | Cell 2 + +| Footer 1 | Footer 2 +|=== +"; + let table = parse_table(adoc); let buffer = Vec::new(); let processor = create_test_processor(); @@ -198,12 +193,12 @@ mod tests { #[test] fn test_table_without_footer() -> Result<(), Error> { - let table = acdc_parser::Table::new( - vec![TableRow::new(vec![TableColumn::new(vec![ - create_paragraph_block("Cell"), - ])])], - Location::default(), - ); + let adoc = r" +|=== +| Cell +|=== +"; + let table = parse_table(adoc); let buffer = Vec::new(); let processor = create_test_processor(); From 14405b32396cc1d719919ce810e786c6faa4c141 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sun, 18 Jan 2026 09:09:00 +0000 Subject: [PATCH 11/18] fix(parser): DSV tables no longer drop the first cell The condition for skipping the first part after splitting was checking `!is_csv` when it should've been `separator == "|"`. PSV format has an empty first part (before the leading `|`), but DSV format doesn't - the first part is actual content. Also added unit tests for the split_escaped function and fixtures for both PSV and DSV escape handling. --- CHANGELOG.md | 3 + .../fixtures/tests/table_dsv_basic.adoc | 5 + .../fixtures/tests/table_dsv_basic.json | 271 ++++++++++++++++++ .../tests/table_dsv_escaped_separator.adoc | 5 + .../tests/table_dsv_escaped_separator.json | 201 +++++++++++++ .../tests/table_psv_escaped_separator.adoc | 4 + .../tests/table_psv_escaped_separator.json | 196 +++++++++++++ acdc-parser/src/blocks/table.rs | 95 +++++- 8 files changed, 773 insertions(+), 7 deletions(-) create mode 100644 acdc-parser/fixtures/tests/table_dsv_basic.adoc create mode 100644 acdc-parser/fixtures/tests/table_dsv_basic.json create mode 100644 acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc create mode 100644 acdc-parser/fixtures/tests/table_dsv_escaped_separator.json create mode 100644 acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc create mode 100644 acdc-parser/fixtures/tests/table_psv_escaped_separator.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 35fc466..0a4b96d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -116,6 +116,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- DSV tables now correctly preserve the first cell. Previously, DSV format (`cell1:cell2`) + was incorrectly treated like PSV (`| cell1 | cell2 |`), causing the first cell to be + dropped. Escape handling (`\:` → literal `:`) also works correctly now. - Table cell content now has correct source position tracking for multi-line cells - Description lists with terms starting with `#` (e.g., `#issue-123:: definition`) are no longer incorrectly parsed as section boundaries inside sections. The diff --git a/acdc-parser/fixtures/tests/table_dsv_basic.adoc b/acdc-parser/fixtures/tests/table_dsv_basic.adoc new file mode 100644 index 0000000..69fd0a2 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_basic.adoc @@ -0,0 +1,5 @@ +[format=dsv] +|=== +cell1:cell2:cell3 +A:B:C +|=== diff --git a/acdc-parser/fixtures/tests/table_dsv_basic.json b/acdc-parser/fixtures/tests/table_dsv_basic.json new file mode 100644 index 0000000..695fb50 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_basic.json @@ -0,0 +1,271 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "dsv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell1", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell2", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell3", + "location": [ + { + "line": 3, + "col": 13 + }, + { + "line": 3, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 13 + }, + { + "line": 3, + "col": 17 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 1 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 1 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 4, + "col": 3 + }, + { + "line": 4, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 3 + }, + { + "line": 4, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc new file mode 100644 index 0000000..487db87 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc @@ -0,0 +1,5 @@ +[format=dsv] +|=== +cell with \: colon:normal cell +another \: escaped:plain +|=== diff --git a/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json new file mode 100644 index 0000000..a9d1932 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json @@ -0,0 +1,201 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "dsv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell with : colon", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 17 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "normal cell", + "location": [ + { + "line": 3, + "col": 20 + }, + { + "line": 3, + "col": 30 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 20 + }, + { + "line": 3, + "col": 30 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "another : escaped", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 17 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "plain", + "location": [ + { + "line": 4, + "col": 20 + }, + { + "line": 4, + "col": 24 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 20 + }, + { + "line": 4, + "col": 24 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc b/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc new file mode 100644 index 0000000..ef0c5c8 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc @@ -0,0 +1,4 @@ +|=== +| cell with \| pipe | normal cell +| another \| escaped | plain +|=== diff --git a/acdc-parser/fixtures/tests/table_psv_escaped_separator.json b/acdc-parser/fixtures/tests/table_psv_escaped_separator.json new file mode 100644 index 0000000..a3ccef2 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_psv_escaped_separator.json @@ -0,0 +1,196 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell with | pipe", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 18 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 18 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "normal cell", + "location": [ + { + "line": 2, + "col": 23 + }, + { + "line": 2, + "col": 33 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 23 + }, + { + "line": 2, + "col": 33 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "another | escaped", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "plain", + "location": [ + { + "line": 3, + "col": 24 + }, + { + "line": 3, + "col": 28 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 24 + }, + { + "line": 3, + "col": 28 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index 4552c92..ee210a8 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -519,14 +519,13 @@ impl Table { // e.g., "2+| content" -> part 0 is "2+", applies to part 1 let mut pending_spec: Option = None; - // Determine if first part should be treated as content (CSV) or specifier/skip (PSV/DSV) - // For CSV: first part is actual content - // For PSV/DSV: first part is either empty, whitespace, or a cell specifier - let is_csv = is_csv_format(separator); + // Determine if first part should be treated as content or specifier/skip + // For PSV (|): first part is before the leading separator, skip it or treat as specifier + // For CSV (,) and DSV (:): first part is actual cell content for (i, part) in parts.iter().enumerate() { - if i == 0 && !is_csv { - // First part is before first separator (PSV/DSV format) + if i == 0 && separator == "|" { + // First part is before first separator (PSV format only) let trimmed = part.content.trim(); if !trimmed.is_empty() { // Check if this looks like a specifier (e.g., "2+", "3*", "^.>") @@ -536,7 +535,7 @@ impl Table { pending_spec = Some(spec); } // If not a complete specifier, it's just content before first separator - // which we skip for PSV/DSV + // which we skip for PSV } continue; } @@ -600,3 +599,85 @@ impl Table { columns } } + +#[cfg(test)] +#[allow(clippy::panic, clippy::indexing_slicing)] +mod tests { + use super::*; + + #[test] + fn split_escaped_psv_no_escapes() { + let parts = split_escaped("| cell1 | cell2 |", '|'); + let [p0, p1, p2, p3] = parts.as_slice() else { + panic!("expected 4 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, ""); + assert_eq!(p1.content, " cell1 "); + assert_eq!(p2.content, " cell2 "); + assert_eq!(p3.content, ""); + } + + #[test] + fn split_escaped_psv_with_escape() { + let parts = split_escaped(r"| cell with \| pipe | normal |", '|'); + let [p0, p1, p2, p3] = parts.as_slice() else { + panic!("expected 4 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, ""); + assert_eq!(p1.content, " cell with | pipe "); + assert_eq!(p2.content, " normal "); + assert_eq!(p3.content, ""); + } + + #[test] + fn split_escaped_dsv_no_escapes() { + let parts = split_escaped("cell1:cell2:cell3", ':'); + let [p0, p1, p2] = parts.as_slice() else { + panic!("expected 3 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, "cell1"); + assert_eq!(p1.content, "cell2"); + assert_eq!(p2.content, "cell3"); + } + + #[test] + fn split_escaped_dsv_with_escape() { + let parts = split_escaped(r"cell with \: colon:normal", ':'); + let [p0, p1] = parts.as_slice() else { + panic!("expected 2 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, "cell with : colon"); + assert_eq!(p1.content, "normal"); + } + + #[test] + fn split_escaped_backslash_not_before_separator() { + // Backslash before non-separator should be preserved + let parts = split_escaped(r"cell\n with backslash|next", '|'); + let [p0, p1] = parts.as_slice() else { + panic!("expected 2 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, r"cell\n with backslash"); + assert_eq!(p1.content, "next"); + } + + #[test] + fn split_escaped_multiple_escapes() { + let parts = split_escaped(r"\|start\|middle\|end", '|'); + let [p0] = parts.as_slice() else { + panic!("expected 1 part, got {}", parts.len()); + }; + assert_eq!(p0.content, "|start|middle|end"); + } + + #[test] + fn split_escaped_positions_tracked() { + let parts = split_escaped("ab|cd|ef", '|'); + let [p0, p1, p2] = parts.as_slice() else { + panic!("expected 3 parts, got {}", parts.len()); + }; + assert_eq!(p0.start, 0); + assert_eq!(p1.start, 3); // after "ab|" + assert_eq!(p2.start, 6); // after "ab|cd|" + } +} From 63f310e4e2077367723ad72ab6c9d880c5e983f5 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sun, 18 Jan 2026 10:10:44 +0000 Subject: [PATCH 12/18] fix(parser): accurate source positions for CSV multiline quoted values The csv crate strips quotes and normalizes content, so we need to scan the raw text to find where fields actually start. Now "Hello\nWorld" correctly points to line 4:8 (inside the quotes) instead of some approximated offset that ignored quote characters entirely. Added proper RFC 4180 handling for escaped quotes too. --- CHANGELOG.md | 4 + Cargo.lock | 211 +++++----- acdc-parser/Cargo.toml | 1 + .../fixtures/tests/table_csv_basic.adoc | 6 + .../fixtures/tests/table_csv_basic.json | 382 ++++++++++++++++++ .../fixtures/tests/table_csv_multiline.adoc | 9 + .../fixtures/tests/table_csv_multiline.json | 277 +++++++++++++ .../fixtures/tests/table_csv_no_header.adoc | 6 + .../fixtures/tests/table_csv_no_header.json | 380 +++++++++++++++++ .../fixtures/tests/table_tsv_basic.adoc | 6 + .../fixtures/tests/table_tsv_basic.json | 382 ++++++++++++++++++ acdc-parser/src/blocks/table.rs | 236 +++++++++-- 12 files changed, 1765 insertions(+), 135 deletions(-) create mode 100644 acdc-parser/fixtures/tests/table_csv_basic.adoc create mode 100644 acdc-parser/fixtures/tests/table_csv_basic.json create mode 100644 acdc-parser/fixtures/tests/table_csv_multiline.adoc create mode 100644 acdc-parser/fixtures/tests/table_csv_multiline.json create mode 100644 acdc-parser/fixtures/tests/table_csv_no_header.adoc create mode 100644 acdc-parser/fixtures/tests/table_csv_no_header.json create mode 100644 acdc-parser/fixtures/tests/table_tsv_basic.adoc create mode 100644 acdc-parser/fixtures/tests/table_tsv_basic.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a4b96d..d6ffbf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 was incorrectly treated like PSV (`| cell1 | cell2 |`), causing the first cell to be dropped. Escape handling (`\:` → literal `:`) also works correctly now. - Table cell content now has correct source position tracking for multi-line cells +- CSV tables with quoted multiline values now have accurate source positions. Previously, + positions were approximated without accounting for quote characters, so `"Hello\nWorld"` + would report incorrect line/column. Now positions point to actual content start (inside + the quotes), with proper handling for RFC 4180 escaped quotes (`""`). - Description lists with terms starting with `#` (e.g., `#issue-123:: definition`) are no longer incorrectly parsed as section boundaries inside sections. The section boundary detection now requires a space after the level marker. diff --git a/Cargo.lock b/Cargo.lock index e8dfa91..e63c659 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,6 +108,7 @@ version = "0.1.4" dependencies = [ "criterion", "crossterm", + "csv", "encoding_rs", "evalexpr", "peg", @@ -477,9 +478,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.49" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "jobserver", @@ -495,9 +496,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -569,9 +570,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "color_quant" @@ -587,9 +588,9 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ "crossterm", "unicode-segmentation", @@ -734,6 +735,27 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -772,18 +794,18 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ "convert_case", "proc-macro2", @@ -932,15 +954,15 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -1046,9 +1068,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -1279,8 +1301,8 @@ dependencies = [ "ravif", "rgb", "tiff", - "zune-core 0.5.0", - "zune-jpeg 0.5.7", + "zune-core 0.5.1", + "zune-jpeg 0.5.9", ] [[package]] @@ -1301,9 +1323,9 @@ checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8" [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -1352,9 +1374,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jobserver" @@ -1368,9 +1390,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -1390,9 +1412,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libfuzzer-sys" @@ -1907,9 +1929,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -1993,9 +2015,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -2028,9 +2050,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -2174,7 +2196,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -2211,9 +2233,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -2232,9 +2254,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags 2.10.0", "errno", @@ -2245,9 +2267,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "log", "once_cell", @@ -2260,18 +2282,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "ring", "rustls-pki-types", @@ -2296,6 +2318,12 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ryu" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + [[package]] name = "same-file" version = "1.0.6" @@ -2409,10 +2437,11 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -2494,9 +2523,9 @@ checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -2537,9 +2566,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom 0.3.4", @@ -2622,30 +2651,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" dependencies = [ "num-conv", "time-core", @@ -2701,9 +2730,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -2714,9 +2743,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.4+spec-1.0.0" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe3cea6b2aa3b910092f6abd4053ea464fab5f9c170ba5e9a6aead16ec4af2b6" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ "serde_core", ] @@ -2735,9 +2764,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.5+spec-1.0.0" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c03bee5ce3696f31250db0bbaff18bc43301ce0e8db2ed1f07cbb2acf89984c" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" dependencies = [ "winnow", ] @@ -3045,18 +3074,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -3067,9 +3096,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3077,9 +3106,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -3090,18 +3119,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -3109,9 +3138,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] @@ -3379,9 +3408,9 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "writeable" @@ -3435,18 +3464,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", @@ -3515,9 +3544,9 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.2" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" [[package]] name = "zune-core" @@ -3527,9 +3556,9 @@ checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" [[package]] name = "zune-core" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "111f7d9820f05fd715df3144e254d6fc02ee4088b0644c0ffd0efc9e6d9d2773" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" [[package]] name = "zune-inflate" @@ -3551,9 +3580,9 @@ dependencies = [ [[package]] name = "zune-jpeg" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d915729b0e7d5fe35c2f294c5dc10b30207cc637920e5b59077bfa3da63f28" +checksum = "87c86acb70a85b2c16f071f171847d1945e8f44812630463cd14ec83900ad01c" dependencies = [ - "zune-core 0.5.0", + "zune-core 0.5.1", ] diff --git a/acdc-parser/Cargo.toml b/acdc-parser/Cargo.toml index cab7a26..dbd6f51 100644 --- a/acdc-parser/Cargo.toml +++ b/acdc-parser/Cargo.toml @@ -19,6 +19,7 @@ setext = [] # Enable Setext-style (underlined) header parsing pre-spec-subs = [] [dependencies] +csv = "1.4" encoding_rs = "0.8" evalexpr = "13" peg = "0.8" diff --git a/acdc-parser/fixtures/tests/table_csv_basic.adoc b/acdc-parser/fixtures/tests/table_csv_basic.adoc new file mode 100644 index 0000000..5ec585a --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_basic.adoc @@ -0,0 +1,6 @@ +[%header,format=csv] +|=== +name,role,department +Alice,Developer,Engineering +Bob,Designer,Marketing +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_basic.json b/acdc-parser/fixtures/tests/table_csv_basic.json new file mode 100644 index 0000000..84f45e0 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_basic.json @@ -0,0 +1,382 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "role", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "department", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_csv_multiline.adoc b/acdc-parser/fixtures/tests/table_csv_multiline.adoc new file mode 100644 index 0000000..0ce9bfc --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_multiline.adoc @@ -0,0 +1,9 @@ +[%header,format=csv] +|=== +name,description +Alice,"Hello +World" +Bob,"Line 1 +Line 2 +Line 3" +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_multiline.json b/acdc-parser/fixtures/tests/table_csv_multiline.json new file mode 100644 index 0000000..cf3c69a --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_multiline.json @@ -0,0 +1,277 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "description", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 16 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 16 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Hello\nWorld", + "location": [ + { + "line": 4, + "col": 8 + }, + { + "line": 5, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 8 + }, + { + "line": 5, + "col": 5 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 6, + "col": 1 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 1 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Line 1\nLine 2\nLine 3", + "location": [ + { + "line": 6, + "col": 6 + }, + { + "line": 8, + "col": 6 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 6 + }, + { + "line": 8, + "col": 6 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_csv_no_header.adoc b/acdc-parser/fixtures/tests/table_csv_no_header.adoc new file mode 100644 index 0000000..f351a78 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_no_header.adoc @@ -0,0 +1,6 @@ +[format=csv] +|=== +Alice,Developer,Engineering +Bob,Designer,Marketing +Carol,Manager,Operations +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_no_header.json b/acdc-parser/fixtures/tests/table_csv_no_header.json new file mode 100644 index 0000000..07878fe --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_no_header.json @@ -0,0 +1,380 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 3, + "col": 17 + }, + { + "line": 3, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 17 + }, + { + "line": 3, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 4, + "col": 14 + }, + { + "line": 4, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 14 + }, + { + "line": 4, + "col": 22 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Carol", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Manager", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 13 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 13 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Operations", + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 24 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 24 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_tsv_basic.adoc b/acdc-parser/fixtures/tests/table_tsv_basic.adoc new file mode 100644 index 0000000..9a7a0d1 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_tsv_basic.adoc @@ -0,0 +1,6 @@ +[%header,format=tsv] +|=== +name role department +Alice Developer Engineering +Bob Designer Marketing +|=== diff --git a/acdc-parser/fixtures/tests/table_tsv_basic.json b/acdc-parser/fixtures/tests/table_tsv_basic.json new file mode 100644 index 0000000..9376524 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_tsv_basic.json @@ -0,0 +1,382 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "tsv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "role", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "department", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index ee210a8..302af03 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -52,56 +52,141 @@ fn split_escaped(line: &str, separator: char) -> Vec { parts } -/// Split a CSV line, respecting quoted fields (RFC 4180). +/// Parse a CSV table body using the `csv` crate for full RFC 4180 compliance. /// -/// - Fields enclosed in double quotes can contain commas -/// - Double-double-quotes (`""`) inside quoted fields become a single quote -fn split_csv(line: &str) -> Vec { - let mut parts = Vec::new(); - let mut current_content = String::new(); - let mut part_start = 0; - let mut in_quotes = false; - let mut chars = line.char_indices().peekable(); +/// This handles multi-line quoted values, escaped quotes, and all CSV edge cases. +/// Returns rows with cells containing their content and accurate byte positions. +fn parse_csv_table(text: &str, base_offset: usize) -> Vec> { + let text_bytes = text.as_bytes(); + let mut reader = csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) // allow variable column counts + .from_reader(text_bytes); + + let mut rows = Vec::new(); + + for result in reader.records() { + let Ok(record) = result else { + continue; + }; - while let Some((byte_idx, ch)) = chars.next() { - if in_quotes { - if ch == '"' { - // Check for escaped quote ("") - if let Some(&(_, next_ch)) = chars.peek() { - if next_ch == '"' { - // Escaped quote - add one quote, skip both - current_content.push('"'); - chars.next(); // consume the second quote - continue; - } - } - // End of quoted field - in_quotes = false; + // Get the byte position where this record starts in the input + let record_start = record + .position() + .map_or(0, |p| usize::try_from(p.byte()).unwrap_or(0)); + + let mut cells = Vec::new(); + let mut scan_pos = record_start; + + for field in &record { + // Find actual field position by scanning the original text + let (field_content_start, next_pos) = + find_csv_field_position(text_bytes, scan_pos, field); + + cells.push(CellPart { + content: field.to_string(), + start: base_offset + field_content_start, + }); + + scan_pos = next_pos; + } + + rows.push(cells); + } + + rows +} + +/// Find the actual byte position of a CSV field's content in the original text. +/// +/// Returns `(content_start, next_scan_position)` where: +/// - `content_start` is where the field's actual content begins (after opening quote if quoted) +/// - `next_scan_position` is where to start scanning for the next field +fn find_csv_field_position(text: &[u8], start: usize, expected_content: &str) -> (usize, usize) { + let Some(&first_byte) = text.get(start) else { + return (start, start); + }; + + if first_byte == b'"' { + // Quoted field: content starts after the opening quote + let content_start = start + 1; + // Find the closing quote (handle escaped quotes "") + let end_pos = find_closing_quote(text, start + 1); + // Next field starts after closing quote and comma (or newline) + let next_pos = skip_to_next_field(text, end_pos); + (content_start, next_pos) + } else { + // Unquoted field: content starts at current position + let content_start = start; + // Find end of field (comma or newline) + let end_pos = find_unquoted_field_end(text, start, expected_content.len()); + // Next field starts after the separator + let next_pos = skip_to_next_field(text, end_pos); + (content_start, next_pos) + } +} + +/// Find the closing quote of a quoted CSV field, handling escaped quotes (`""`). +fn find_closing_quote(text: &[u8], start: usize) -> usize { + let mut pos = start; + while let Some(&byte) = text.get(pos) { + if byte == b'"' { + // Check if this is an escaped quote ("") + if text.get(pos + 1) == Some(&b'"') { + // Escaped quote - skip both and continue + pos += 2; } else { - current_content.push(ch); + // Closing quote found + return pos; } - } else if ch == '"' { - // Start of quoted field - in_quotes = true; - } else if ch == ',' { - // Field separator - parts.push(CellPart { - content: std::mem::take(&mut current_content), - start: part_start, - }); - part_start = byte_idx + 1; // comma is always 1 byte } else { - current_content.push(ch); + pos += 1; } } + // No closing quote found - return end of text + text.len() +} - // Add final part - parts.push(CellPart { - content: current_content, - start: part_start, - }); +/// Find the end of an unquoted CSV field. +fn find_unquoted_field_end(text: &[u8], start: usize, content_len: usize) -> usize { + // The field ends at comma, CR, LF, or content_len bytes (whichever comes first) + let mut pos = start; + let mut remaining = content_len; + while let Some(&byte) = text.get(pos) { + if byte == b',' || byte == b'\n' || byte == b'\r' { + return pos; + } + if remaining == 0 { + return pos; + } + remaining = remaining.saturating_sub(1); + pos += 1; + } + text.len() +} - parts +/// Skip past the current field separator to find the start of the next field. +fn skip_to_next_field(text: &[u8], pos: usize) -> usize { + let mut pos = pos; + // Skip closing quote if present + if text.get(pos) == Some(&b'"') { + pos += 1; + } + // Skip comma or newline characters + while let Some(&byte) = text.get(pos) { + if byte == b',' { + return pos + 1; + } + if byte == b'\r' || byte == b'\n' { + // Skip CRLF or just LF + if byte == b'\r' && text.get(pos + 1) == Some(&b'\n') { + return pos + 2; + } + return pos + 1; + } + pos += 1; + } + pos } /// Determine if this is a CSV format table. @@ -110,10 +195,10 @@ fn is_csv_format(separator: &str) -> bool { } /// Split a line into cell parts using the appropriate method for the separator. +/// +/// Note: CSV format is handled separately via `parse_csv_table()` for multi-line support. fn split_line(line: &str, separator: &str) -> Vec { - if is_csv_format(separator) { - split_csv(line) - } else if let Some(sep_char) = separator.chars().next() { + if let Some(sep_char) = separator.chars().next() { if separator.len() == 1 { split_escaped(line, sep_char) } else { @@ -418,6 +503,11 @@ impl Table { has_header: &mut bool, base_offset: usize, ) -> Vec> { + // CSV format needs special handling for multi-line quoted values + if is_csv_format(separator) { + return Self::parse_csv_rows_with_positions(text, has_header, base_offset); + } + let mut rows = Vec::new(); let mut current_offset = base_offset; let lines: Vec<&str> = text.lines().collect(); @@ -497,6 +587,64 @@ impl Table { rows } + /// Parse CSV table rows using the `csv` crate for RFC 4180 compliance. + /// + /// This handles multi-line quoted values correctly by processing the entire + /// table body at once rather than line-by-line. + fn parse_csv_rows_with_positions( + text: &str, + has_header: &mut bool, + base_offset: usize, + ) -> Vec> { + // Check for header indicator: first row followed by blank line + // For CSV, we need to detect this before parsing since the csv crate + // consumes the text as a stream. + let lines: Vec<&str> = text.lines().collect(); + if lines.len() >= 2 { + // Find where first CSV record ends - look for first complete record + // A simple heuristic: if line 1 (0-indexed) is empty, we have a header + if let Some(&line) = lines.get(1) { + if line.trim().is_empty() { + *has_header = true; + } + } + } + + let csv_rows = parse_csv_table(text, base_offset); + let mut rows = Vec::new(); + + for csv_row in csv_rows { + let mut cells = Vec::new(); + for part in csv_row { + let content = part.content.trim(); + let start = part.start; + let end = if content.is_empty() { + start + } else { + start + content.len().saturating_sub(1) + }; + + cells.push(ParsedCell { + content: content.to_string(), + start, + end, + colspan: 1, + rowspan: 1, + halign: None, + valign: None, + style: None, + is_duplication: false, + duplication_count: 1, + }); + } + if !cells.is_empty() { + rows.push(cells); + } + } + + rows + } + fn parse_row_with_positions( row_lines: &[&str], separator: &str, From f317146608dbca775988755305694b5ed482bb1e Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Sun, 18 Jan 2026 23:35:51 +0000 Subject: [PATCH 13/18] chore(parser): detect unsupported counter and warn about it It seems counters are not so hot (https://asciidoc.zulipchat.com/#narrow/channel/335219-asciidoc-lang/topic/counters/near/568691273) so I've decided to just add a warning if we detect them but not actually do anything with them. --- acdc-parser/src/grammar/document.rs | 4 +-- .../src/grammar/inline_preprocessor.rs | 36 ++++++++++++++++++- converters/html/src/table.rs | 29 +++++++++++---- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 4bb3dad..5c2ae79 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -4990,7 +4990,7 @@ peg::parser! { /// Excludes '[' and ']' to respect AsciiDoc macro/attribute boundaries rule url_path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' | '_' | '~' | ':' | '/' | '?' | '#' | '@' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '%' | '\\' ]+) {? - let mut inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new( path, state.line_map.clone(), &state.input, @@ -5009,7 +5009,7 @@ peg::parser! { /// Includes '{' and '}' for `AsciiDoc` attribute substitution pub rule path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '{' | '}' | '_' | '-' | '.' | '/' | '\\' ]+) {? - let mut inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new( path, state.line_map.clone(), &state.input, diff --git a/acdc-parser/src/grammar/inline_preprocessor.rs b/acdc-parser/src/grammar/inline_preprocessor.rs index e53f856..c4b3d11 100644 --- a/acdc-parser/src/grammar/inline_preprocessor.rs +++ b/acdc-parser/src/grammar/inline_preprocessor.rs @@ -230,6 +230,10 @@ parser!( kbd_macro() / monospace() / passthrough() + // counter_reference must come BEFORE attribute_reference because counters + // have a colon in the name (e.g., {counter:num}) which is not valid in + // standard attribute names + / counter_reference() / attribute_reference() / unprocessed_text() } / expected!("inlines parser failed") @@ -250,6 +254,33 @@ parser!( text.to_string() } + /// Counter reference: `{counter:name}`, `{counter:name:initial}`, `{counter2:name}` + /// + /// Counters are not supported. Per asciidoctor maintainer feedback, counters are + /// "a disaster" that they want to redesign or remove. We detect them, emit a warning, + /// and return empty string (the counter syntax is silently removed from output). + rule counter_reference() -> String + = start:position() "{" + counter_type:$("counter2" / "counter") ":" + name:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+) + (":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)? + "}" + { + tracing::warn!( + counter_type, + name, + "Counters ({{{counter_type}:{name}}}) are not supported and will be removed from output" + ); + + // Calculate total length for position tracking + // We capture the full match including any optional initial value + let total_len = counter_type.len() + 1 + name.len() + 2; // "{" + counter_type + ":" + name + "}" + let _location = state.calculate_location(start, "", total_len); + + // Return empty string - counter is removed from output + String::new() + } + rule attribute_reference() -> String = start:position() "{" attribute_name:attribute_name() "}" { let location = state.calculate_location(start, attribute_name, 2); @@ -485,11 +516,14 @@ parser!( = $(['a'..='z' | 'A'..='Z' | '0'..='9']+) rule unprocessed_text() -> String - = text:$((!(passthrough_pattern() / attribute_reference_pattern() / kbd_macro_pattern() / monospace_pattern()) [_])+) { + = text:$((!(passthrough_pattern() / counter_reference_pattern() / attribute_reference_pattern() / kbd_macro_pattern() / monospace_pattern()) [_])+) { state.advance(text); text.to_string() } + /// Pattern for counter references: {counter:name} or {counter:name:initial} or {counter2:...} + rule counter_reference_pattern() = "{" ("counter2" / "counter") ":" ['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+ (":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)? "}" + rule attribute_reference_pattern() = "{" attribute_name_pattern() "}" rule attribute_name_pattern() = ['a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_']+ diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index bdc73b8..8ea6c7b 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -100,23 +100,38 @@ where Ok(()) } -/// Render table caption with number if title exists +/// Render table caption with number if title exists. +/// +/// Per-block `[caption="..."]` attribute overrides the prefix entirely and does NOT increment +/// the table counter (following `AsciiDoc` specification). fn render_table_caption( visitor: &mut V, title: &[InlineNode], processor: &Processor, + metadata: &BlockMetadata, ) -> Result<(), Error> where V: WritableVisitor, { if !title.is_empty() { - let count = processor.table_counter.get() + 1; - processor.table_counter.set(count); - let caption_owned = processor.document_attributes.get_string("table-caption"); - let caption = caption_owned.unwrap_or(String::from("Table")); + // Check for per-block caption override + let prefix = if let Some(custom_caption) = metadata.attributes.get_string("caption") { + // Per-block caption replaces entire prefix and does NOT increment internal counter. + custom_caption + } else { + // Default: "Table N. " format - increment counter + let count = processor.table_counter.get() + 1; + processor.table_counter.set(count); + let caption = processor + .document_attributes + .get_string("table-caption") + .unwrap_or_else(|| String::from("Table")); + format!("{caption} {count}. ") + }; + visitor.render_title_with_wrapper( title, - &format!("
{caption} {count}. "), + &format!("{prefix}"), "
instead of + let tag = if style == Some(ColumnStyle::Header) { + "th" + } else { + "td" + }; + + let writer = visitor.writer_mut(); + write!( + writer, + "<{tag} class=\"tableblock {halign} {valign}\"{span_attrs}>" + )?; + let _ = writer; + render_cell_content(&cell.content, visitor, processor, options, true, style)?; + let writer = visitor.writer_mut(); + writeln!(writer, "")?; + Ok(()) +} + /// Render table with support for nested blocks in cells pub(crate) fn render_table( table: &Table, @@ -303,6 +423,7 @@ where for (col_index, cell) in header.columns.iter().enumerate() { let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); + let style = get_effective_style(&table.columns, col_index, cell); let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); write!( @@ -310,7 +431,7 @@ where "" )?; let _ = writer; - render_cell_content(&cell.content, visitor, processor, options, false)?; + render_cell_content(&cell.content, visitor, processor, options, false, style)?; let writer = visitor.writer_mut(); writeln!(writer, "
" - )?; - let _ = writer; - render_cell_content(&cell.content, visitor, processor, options, true)?; - let writer = visitor.writer_mut(); - writeln!(writer, "
" )?; let _ = writer; - render_cell_content(&cell.content, visitor, processor, options, true)?; + render_cell_content(&cell.content, visitor, processor, options, true, style)?; let writer = visitor.writer_mut(); writeln!(writer, "
----++++ From 9e7c4ad2f13796e62a08a00d67cac4ddcd3635cf Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Wed, 21 Jan 2026 15:39:04 +0000 Subject: [PATCH 17/18] chore(html): add fixture for table cell styles --- .../fixtures/expected/table_cell_styles.html | 468 ++++++++++++++++++ .../fixtures/source/table_cell_styles.adoc | 9 + 2 files changed, 477 insertions(+) create mode 100644 converters/html/tests/fixtures/expected/table_cell_styles.html create mode 100644 converters/html/tests/fixtures/source/table_cell_styles.adoc diff --git a/converters/html/tests/fixtures/expected/table_cell_styles.html b/converters/html/tests/fixtures/expected/table_cell_styles.html new file mode 100644 index 0000000..25449f6 --- /dev/null +++ b/converters/html/tests/fixtures/expected/table_cell_styles.html @@ -0,0 +1,468 @@ + + + + + + + + + + + +
+
++++++++ + + + + + + + + + + +

normal

strong

emphasis

monospace

literal

header

+ + + + diff --git a/converters/html/tests/fixtures/source/table_cell_styles.adoc b/converters/html/tests/fixtures/source/table_cell_styles.adoc new file mode 100644 index 0000000..a8e2325 --- /dev/null +++ b/converters/html/tests/fixtures/source/table_cell_styles.adoc @@ -0,0 +1,9 @@ +[cols="1,1,1,1,1,1"] +|=== +|normal +s|strong +e|emphasis +m|monospace +l|literal +h|header +|=== From d91aad0c1246609c6c14ac0e95d61c55e91f1d90 Mon Sep 17 00:00:00 2001 From: Norberto Lopes Date: Wed, 21 Jan 2026 22:52:13 +0000 Subject: [PATCH 18/18] fix(parser): restrict block types in default table cells to match asciidoctor Default table cells now treat list markers, delimited blocks, toc macros, and page breaks as literal text instead of parsing them as blocks. Only cells with the 'a' (AsciiDoc) style get full block parsing. This fixes the mismatch where `| * item` was parsed as a list instead of literal text "* item". --- CHANGELOG.md | 5 + .../fixtures/tests/table_cell_alignment.html | 475 ------------------ .../fixtures/tests/table_cell_alignment.json | 27 +- acdc-parser/src/blocks/table.rs | 4 +- acdc-parser/src/grammar/document.rs | 36 ++ acdc-parser/src/grammar/table.rs | 17 +- 6 files changed, 69 insertions(+), 495 deletions(-) delete mode 100644 acdc-parser/fixtures/tests/table_cell_alignment.html diff --git a/CHANGELOG.md b/CHANGELOG.md index d6ffbf3..5b6ec2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 the parser based on each passthrough's own substitution settings. ([#291]) - Verbatim blocks (listing/literal) now correctly skip typography replacements by default, matching asciidoctor behavior. Previously, smart quotes were incorrectly applied. +- HTML5 compliance: removed self-closing syntax (`` → ``, `` → ``) + and deprecated `frameborder` attribute from iframes. ### Changed @@ -146,6 +148,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Paragraphs no longer incorrectly split when a line starts with inline passthrough syntax like `+>+`. The list continuation lookahead now only matches actual continuation markers (standalone `+` followed by whitespace/EOL/EOF). +- Default table cells now treat list markers, delimited blocks, toc macros, and page + breaks as literal text instead of parsing them as blocks. Only cells with the `a` + (AsciiDoc) style get full block parsing, matching asciidoctor behavior. ### Changed diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.html b/acdc-parser/fixtures/tests/table_cell_alignment.html deleted file mode 100644 index d4d72ab..0000000 --- a/acdc-parser/fixtures/tests/table_cell_alignment.html +++ /dev/null @@ -1,475 +0,0 @@ - - - - - - - - - - - -
- ----- - - - - - - - - - - - - - - - - - - - -
DefaultCenteredRight

Center

Default

Right

Default

Middle

Bottom

-
- - - diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.json b/acdc-parser/fixtures/tests/table_cell_alignment.json index 0e8492a..3aba82f 100644 --- a/acdc-parser/fixtures/tests/table_cell_alignment.json +++ b/acdc-parser/fixtures/tests/table_cell_alignment.json @@ -201,11 +201,11 @@ { "name": "text", "type": "string", - "value": "Right", + "value": "> Right", "location": [ { "line": 4, - "col": 25 + "col": 23 }, { "line": 4, @@ -217,7 +217,7 @@ "location": [ { "line": 4, - "col": 25 + "col": 23 }, { "line": 4, @@ -225,8 +225,7 @@ } ] } - ], - "halign": "right" + ] } ] }, @@ -276,11 +275,11 @@ { "name": "text", "type": "string", - "value": "Middle", + "value": ".^ Middle", "location": [ { "line": 5, - "col": 16 + "col": 13 }, { "line": 5, @@ -292,7 +291,7 @@ "location": [ { "line": 5, - "col": 16 + "col": 13 }, { "line": 5, @@ -300,8 +299,7 @@ } ] } - ], - "valign": "middle" + ] }, { "content": [ @@ -312,11 +310,11 @@ { "name": "text", "type": "string", - "value": "Bottom", + "value": ".> Bottom", "location": [ { "line": 5, - "col": 28 + "col": 25 }, { "line": 5, @@ -328,7 +326,7 @@ "location": [ { "line": 5, - "col": 28 + "col": 25 }, { "line": 5, @@ -336,8 +334,7 @@ } ] } - ], - "valign": "bottom" + ] } ] } diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index ebc6275..9dafaf6 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -471,8 +471,8 @@ impl CellSpecifier { } }; (spec, pos) - } else if halign.is_some() || valign.is_some() { - // Alignment without span operator - still valid + } else if (halign.is_some() || valign.is_some()) && context == ParseContext::FirstPart { + // Alignment without span operator - still valid (only in FirstPart context) let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); if style.is_some() { pos += 1; diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 5c2ae79..cdd5253 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -585,6 +585,18 @@ peg::parser! { blocks.into_iter().collect::, Error>>() } + /// Blocks for table cells without `AsciiDoc` style - excludes block types that require full parsing. + /// Table cells use a simplified block parser that excludes sections, document attributes, + /// and block types like lists, delimited blocks, toc, page breaks, and markdown blockquotes. + pub(crate) rule blocks_for_table_cell(offset: usize, parent_section_level: Option) -> Result, Error> + = eol()* + blocks:( + comment_line_block(offset) / + block_generic_for_table_cell(offset, parent_section_level) + )* + { + blocks.into_iter().collect::, Error>>() + } pub(crate) rule block(offset: usize, parent_section_level: Option) -> Result = eol()* @@ -1074,6 +1086,30 @@ peg::parser! { block } + /// Block parsing for table cells without `AsciiDoc` style - excludes block types that require full parsing. + /// Only `a` (`AsciiDoc`) style cells should have full block parsing. + /// Excluded: delimited_block, list, toc, page_break, markdown_blockquote + rule block_generic_for_table_cell(offset: usize, parent_section_level: Option) -> Result + = start:position!() + block_metadata:(bm:block_metadata(offset, parent_section_level) {? + bm.map_err(|e| { + tracing::error!(?e, "error parsing block metadata in block_generic_for_table_cell"); + "block metadata parse error" + }) + }) + block:( + // NOTE: delimited_block is intentionally excluded - only valid with 'a' cell style + image:image(start, offset, &block_metadata) { image } + / audio:audio(start, offset, &block_metadata) { audio } + / video:video(start, offset, &block_metadata) { video } + / thematic_break:thematic_break(start, offset, &block_metadata) { thematic_break } + / quoted_paragraph:quoted_paragraph(start, offset, &block_metadata) { quoted_paragraph } + // NOTE: toc, page_break, list, markdown_blockquote are excluded - only valid with 'a' cell style + / paragraph:paragraph(start, offset, &block_metadata) { paragraph } + ) { + block + } + rule delimited_block( start: usize, offset: usize, diff --git a/acdc-parser/src/grammar/table.rs b/acdc-parser/src/grammar/table.rs index 00ef4b5..beeab7c 100644 --- a/acdc-parser/src/grammar/table.rs +++ b/acdc-parser/src/grammar/table.rs @@ -1,4 +1,4 @@ -use crate::{Error, TableColumn, blocks::table::ParsedCell, model::SectionLevel}; +use crate::{ColumnStyle, Error, TableColumn, blocks::table::ParsedCell, model::SectionLevel}; use super::{ParserState, document_parser, inline_processing::adjust_and_log_parse_error}; @@ -9,8 +9,19 @@ pub(crate) fn parse_table_cell( parent_section_level: Option, cell: &ParsedCell, ) -> Result { - let blocks = document_parser::blocks(content, state, cell_start_offset, parent_section_level) - .unwrap_or_else(|error| { + // Markdown blockquotes are only parsed when cell has AsciiDoc style ('a' prefix). + // This matches asciidoctor behavior where `> text` is only a blockquote in 'a' style cells. + let blocks = if cell.style == Some(ColumnStyle::AsciiDoc) { + document_parser::blocks(content, state, cell_start_offset, parent_section_level) + } else { + document_parser::blocks_for_table_cell( + content, + state, + cell_start_offset, + parent_section_level, + ) + } + .unwrap_or_else(|error| { adjust_and_log_parse_error( &error, content,