diff --git a/CHANGELOG.md b/CHANGELOG.md index 817f56e2..d6ffbf36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Table colspan and rowspan rendering (`colspan="n"` and `rowspan="n"` attributes on ``/``) +- Table visual attribute support: + - `frame` attribute - controls outer border (`all`, `ends`/`topbot`, `sides`, `none`) + - `grid` attribute - controls inner gridlines (`all`, `rows`, `cols`, `none`) + - `stripes` attribute - controls row striping (`even`, `odd`, `all`, `hover`) + - `width` attribute - sets explicit table width (e.g., `width=75%`) + - `%autowidth` option - uses `fit-content` sizing instead of `stretch` + - Custom roles from metadata applied as CSS classes +- Cell-level alignment overrides are now respected, falling back to column-level defaults - Initial support for `[subs=...]` attribute on verbatim blocks (listing, literal) - `subs=none` - disables all substitutions, outputs raw content - `subs=specialchars` - only escapes HTML special characters @@ -80,6 +89,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Complete cell specifier support for tables: + - Colspan: `2+|` spans 2 columns + - Rowspan: `.2+|` spans 2 rows + - Combined: `2.3+|` spans 2 columns and 3 rows + - Cell duplication: `3*|` duplicates cell content 3 times + - Cell-level horizontal alignment: `<|` (left), `^|` (center), `>|` (right) + - Cell-level vertical alignment: `.<|` (top), `.^|` (middle), `.>|` (bottom) + - Cell-level style: `s|` (strong), `e|` (emphasis), `m|` (monospace), etc. + - All specifiers can be combined (e.g., `2.3+^.^s|` for colspan=2, rowspan=3, centered, strong) - Tag filtering for include directives ([#279]) - `tag=name` - include a specific tagged region - `tags=a;b;c` - include multiple tags (semicolon or comma delimited) @@ -98,6 +116,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- DSV tables now correctly preserve the first cell. Previously, DSV format (`cell1:cell2`) + was incorrectly treated like PSV (`| cell1 | cell2 |`), causing the first cell to be + dropped. Escape handling (`\:` → literal `:`) also works correctly now. +- Table cell content now has correct source position tracking for multi-line cells +- CSV tables with quoted multiline values now have accurate source positions. Previously, + positions were approximated without accounting for quote characters, so `"Hello\nWorld"` + would report incorrect line/column. Now positions point to actual content start (inside + the quotes), with proper handling for RFC 4180 escaped quotes (`""`). - Description lists with terms starting with `#` (e.g., `#issue-123:: definition`) are no longer incorrectly parsed as section boundaries inside sections. The section boundary detection now requires a space after the level marker. @@ -123,6 +149,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- **BREAKING**: `TableColumn` struct now includes `colspan`, `rowspan`, `halign`, `valign`, + and `style` fields. - **BREAKING**: `BlockMetadata.substitutions` changed from `Option>` to `Option`. New types `SubstitutionSpec` and `SubstitutionOp` are now public exports. Modifier syntax (`+quotes`, `-callouts`) is now stored as operations diff --git a/Cargo.lock b/Cargo.lock index e8dfa918..e63c6599 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,6 +108,7 @@ version = "0.1.4" dependencies = [ "criterion", "crossterm", + "csv", "encoding_rs", "evalexpr", "peg", @@ -477,9 +478,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.49" +version = "1.2.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" +checksum = "755d2fce177175ffca841e9a06afdb2c4ab0f593d53b4dee48147dfaade85932" dependencies = [ "find-msvc-tools", "jobserver", @@ -495,9 +496,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "js-sys", @@ -569,9 +570,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" [[package]] name = "color_quant" @@ -587,9 +588,9 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ "crossterm", "unicode-segmentation", @@ -734,6 +735,27 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "csv" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde_core", +] + +[[package]] +name = "csv-core" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" +dependencies = [ + "memchr", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -772,18 +794,18 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b768e943bed7bf2cab53df09f4bc34bfd217cdb57d971e769874c9a6710618" +checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d286bfdaf75e988b4a78e013ecd79c581e06399ab53fbacd2d916c2f904f30b" +checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb" dependencies = [ "convert_case", "proc-macro2", @@ -932,15 +954,15 @@ dependencies = [ [[package]] name = "find-msvc-tools" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" dependencies = [ "crc32fast", "miniz_oxide", @@ -1046,9 +1068,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -1279,8 +1301,8 @@ dependencies = [ "ravif", "rgb", "tiff", - "zune-core 0.5.0", - "zune-jpeg 0.5.7", + "zune-core 0.5.1", + "zune-jpeg 0.5.9", ] [[package]] @@ -1301,9 +1323,9 @@ checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8" [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -1352,9 +1374,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jobserver" @@ -1368,9 +1390,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -1390,9 +1412,9 @@ checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8" [[package]] name = "libc" -version = "0.2.178" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libfuzzer-sys" @@ -1907,9 +1929,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.103" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -1993,9 +2015,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -2028,9 +2050,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -2174,7 +2196,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -2211,9 +2233,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -2232,9 +2254,9 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags 2.10.0", "errno", @@ -2245,9 +2267,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "log", "once_cell", @@ -2260,18 +2282,18 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "ring", "rustls-pki-types", @@ -2296,6 +2318,12 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ryu" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" + [[package]] name = "same-file" version = "1.0.6" @@ -2409,10 +2437,11 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.7" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] @@ -2494,9 +2523,9 @@ checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2" [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -2537,9 +2566,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom 0.3.4", @@ -2622,30 +2651,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "f9e442fc33d7fdb45aa9bfeb312c095964abdf596f7567261062b2a7107aaabd" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "8b36ee98fd31ec7426d599183e8fe26932a8dc1fb76ddb6214d05493377d34ca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "71e552d1249bf61ac2a52db88179fd0673def1e1ad8243a00d9ec9ed71fee3dd" dependencies = [ "num-conv", "time-core", @@ -2701,9 +2730,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.17" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -2714,9 +2743,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.4+spec-1.0.0" +version = "0.7.5+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe3cea6b2aa3b910092f6abd4053ea464fab5f9c170ba5e9a6aead16ec4af2b6" +checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" dependencies = [ "serde_core", ] @@ -2735,9 +2764,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.5+spec-1.0.0" +version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c03bee5ce3696f31250db0bbaff18bc43301ce0e8db2ed1f07cbb2acf89984c" +checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" dependencies = [ "winnow", ] @@ -3045,18 +3074,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -3067,9 +3096,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3077,9 +3106,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -3090,18 +3119,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -3109,9 +3138,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" +checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" dependencies = [ "rustls-pki-types", ] @@ -3379,9 +3408,9 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "writeable" @@ -3435,18 +3464,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", @@ -3515,9 +3544,9 @@ dependencies = [ [[package]] name = "zmij" -version = "1.0.2" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4a4e8e9dc5c62d159f04fcdbe07f4c3fb710415aab4754bf11505501e3251d" +checksum = "94f63c051f4fe3c1509da62131a678643c5b6fbdc9273b2b79d4378ebda003d2" [[package]] name = "zune-core" @@ -3527,9 +3556,9 @@ checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" [[package]] name = "zune-core" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "111f7d9820f05fd715df3144e254d6fc02ee4088b0644c0ffd0efc9e6d9d2773" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" [[package]] name = "zune-inflate" @@ -3551,9 +3580,9 @@ dependencies = [ [[package]] name = "zune-jpeg" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d915729b0e7d5fe35c2f294c5dc10b30207cc637920e5b59077bfa3da63f28" +checksum = "87c86acb70a85b2c16f071f171847d1945e8f44812630463cd14ec83900ad01c" dependencies = [ - "zune-core 0.5.0", + "zune-core 0.5.1", ] diff --git a/README.adoc b/README.adoc index 00d4fdd3..224cba7f 100644 --- a/README.adoc +++ b/README.adoc @@ -87,7 +87,7 @@ See link:./acdc-parser/README.adoc[acdc-parser README] for detailed feature supp cargo build --all # Run tests with detailed output -RUST_LOG=error cargo nextest run --no-fail-fast +RUST_LOG=error cargo nextest run --no-fail-fast --all-features --all-targets # Run clippy with pedantic lints cargo clippy --all-targets --all-features -- --deny clippy::pedantic diff --git a/acdc-parser/Cargo.toml b/acdc-parser/Cargo.toml index cab7a269..dbd6f513 100644 --- a/acdc-parser/Cargo.toml +++ b/acdc-parser/Cargo.toml @@ -19,6 +19,7 @@ setext = [] # Enable Setext-style (underlined) header parsing pre-spec-subs = [] [dependencies] +csv = "1.4" encoding_rs = "0.8" evalexpr = "13" peg = "0.8" diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.adoc b/acdc-parser/fixtures/tests/table_cell_alignment.adoc new file mode 100644 index 00000000..b0f055fa --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.adoc @@ -0,0 +1,6 @@ +|=== +| Default | Centered | Right + +^| Center | Default | > Right +| Default | .^ Middle | .> Bottom +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.html b/acdc-parser/fixtures/tests/table_cell_alignment.html new file mode 100644 index 00000000..d4d72ab6 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.html @@ -0,0 +1,475 @@ + + + + + + + + + + + +
+ +++++ + + + + + + + + + + + + + + + + + + + +
DefaultCenteredRight

Center

Default

Right

Default

Middle

Bottom

+
+ + + diff --git a/acdc-parser/fixtures/tests/table_cell_alignment.json b/acdc-parser/fixtures/tests/table_cell_alignment.json new file mode 100644 index 00000000..0e8492a9 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_alignment.json @@ -0,0 +1,378 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Centered", + "location": [ + { + "line": 2, + "col": 13 + }, + { + "line": 2, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 13 + }, + { + "line": 2, + "col": 20 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Right", + "location": [ + { + "line": 2, + "col": 24 + }, + { + "line": 2, + "col": 28 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 24 + }, + { + "line": 2, + "col": 28 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Center", + "location": [ + { + "line": 4, + "col": 4 + }, + { + "line": 4, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 4 + }, + { + "line": 4, + "col": 9 + } + ] + } + ], + "halign": "center" + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 4, + "col": 13 + }, + { + "line": 4, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 13 + }, + { + "line": 4, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Right", + "location": [ + { + "line": 4, + "col": 25 + }, + { + "line": 4, + "col": 29 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 25 + }, + { + "line": 4, + "col": 29 + } + ] + } + ], + "halign": "right" + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Default", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Middle", + "location": [ + { + "line": 5, + "col": 16 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 16 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "valign": "middle" + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bottom", + "location": [ + { + "line": 5, + "col": 28 + }, + { + "line": 5, + "col": 33 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 28 + }, + { + "line": 5, + "col": 33 + } + ] + } + ], + "valign": "bottom" + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_colspan.adoc b/acdc-parser/fixtures/tests/table_cell_colspan.adoc new file mode 100644 index 00000000..0b11f96f --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_colspan.adoc @@ -0,0 +1,7 @@ +[cols="3*"] +|=== +| A | B | C + +2+| Spans two columns | D +| E | F | G +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_colspan.json b/acdc-parser/fixtures/tests/table_cell_colspan.json new file mode 100644 index 00000000..f2852451 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_colspan.json @@ -0,0 +1,345 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "cols": "3*" + } + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans two columns", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 21 + } + ] + } + ], + "colspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 5, + "col": 25 + }, + { + "line": 5, + "col": 25 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 25 + }, + { + "line": 5, + "col": 25 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_duplication.adoc b/acdc-parser/fixtures/tests/table_cell_duplication.adoc new file mode 100644 index 00000000..e3c05cc6 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_duplication.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C + +3*| Same + +| X | Y | Z +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_duplication.json b/acdc-parser/fixtures/tests/table_cell_duplication.json new file mode 100644 index 00000000..b7259b6b --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_duplication.json @@ -0,0 +1,374 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Same", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 8 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "X", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Y", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Z", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_rowspan.adoc b/acdc-parser/fixtures/tests/table_cell_rowspan.adoc new file mode 100644 index 00000000..c34e08b1 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_rowspan.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C + +.2+| Spans rows | D | E +| F | G +| H | I | J +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_rowspan.json b/acdc-parser/fixtures/tests/table_cell_rowspan.json new file mode 100644 index 00000000..42cd39a7 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_rowspan.json @@ -0,0 +1,449 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans rows", + "location": [ + { + "line": 4, + "col": 6 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 6 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 4, + "col": 19 + }, + { + "line": 4, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 19 + }, + { + "line": 4, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 4, + "col": 23 + }, + { + "line": 4, + "col": 23 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 23 + }, + { + "line": 4, + "col": 23 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_cell_span_combined.adoc b/acdc-parser/fixtures/tests/table_cell_span_combined.adoc new file mode 100644 index 00000000..6251a31f --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_span_combined.adoc @@ -0,0 +1,7 @@ +|=== +| A | B | C | D + +2.2+| Big cell | E | F +| G | H +| I | J | K | L +|=== diff --git a/acdc-parser/fixtures/tests/table_cell_span_combined.json b/acdc-parser/fixtures/tests/table_cell_span_combined.json new file mode 100644 index 00000000..c7fb41cf --- /dev/null +++ b/acdc-parser/fixtures/tests/table_cell_span_combined.json @@ -0,0 +1,520 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 7 + }, + { + "line": 2, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 11 + }, + { + "line": 2, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 2, + "col": 15 + }, + { + "line": 2, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 15 + }, + { + "line": 2, + "col": 15 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Big cell", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 14 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 14 + } + ] + } + ], + "colspan": 2, + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 4, + "col": 18 + }, + { + "line": 4, + "col": 18 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 18 + }, + { + "line": 4, + "col": 18 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 4, + "col": 22 + }, + { + "line": 4, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 22 + }, + { + "line": 4, + "col": 22 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 3 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "K", + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 11 + }, + { + "line": 6, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "L", + "location": [ + { + "line": 6, + "col": 15 + }, + { + "line": 6, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 15 + }, + { + "line": 6, + "col": 15 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 7, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_csv_basic.adoc b/acdc-parser/fixtures/tests/table_csv_basic.adoc new file mode 100644 index 00000000..5ec585a7 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_basic.adoc @@ -0,0 +1,6 @@ +[%header,format=csv] +|=== +name,role,department +Alice,Developer,Engineering +Bob,Designer,Marketing +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_basic.json b/acdc-parser/fixtures/tests/table_csv_basic.json new file mode 100644 index 00000000..84f45e0d --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_basic.json @@ -0,0 +1,382 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "role", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "department", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_csv_multiline.adoc b/acdc-parser/fixtures/tests/table_csv_multiline.adoc new file mode 100644 index 00000000..0ce9bfc5 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_multiline.adoc @@ -0,0 +1,9 @@ +[%header,format=csv] +|=== +name,description +Alice,"Hello +World" +Bob,"Line 1 +Line 2 +Line 3" +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_multiline.json b/acdc-parser/fixtures/tests/table_csv_multiline.json new file mode 100644 index 00000000..cf3c69ac --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_multiline.json @@ -0,0 +1,277 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "description", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 16 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 16 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Hello\nWorld", + "location": [ + { + "line": 4, + "col": 8 + }, + { + "line": 5, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 8 + }, + { + "line": 5, + "col": 5 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 6, + "col": 1 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 1 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Line 1\nLine 2\nLine 3", + "location": [ + { + "line": 6, + "col": 6 + }, + { + "line": 8, + "col": 6 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 6 + }, + { + "line": 8, + "col": 6 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 9, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_csv_no_header.adoc b/acdc-parser/fixtures/tests/table_csv_no_header.adoc new file mode 100644 index 00000000..f351a782 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_no_header.adoc @@ -0,0 +1,6 @@ +[format=csv] +|=== +Alice,Developer,Engineering +Bob,Designer,Marketing +Carol,Manager,Operations +|=== diff --git a/acdc-parser/fixtures/tests/table_csv_no_header.json b/acdc-parser/fixtures/tests/table_csv_no_header.json new file mode 100644 index 00000000..07878fef --- /dev/null +++ b/acdc-parser/fixtures/tests/table_csv_no_header.json @@ -0,0 +1,380 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "csv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 3, + "col": 17 + }, + { + "line": 3, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 17 + }, + { + "line": 3, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 4, + "col": 14 + }, + { + "line": 4, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 14 + }, + { + "line": 4, + "col": 22 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Carol", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Manager", + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 13 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 7 + }, + { + "line": 5, + "col": 13 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Operations", + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 24 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 15 + }, + { + "line": 5, + "col": 24 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_dsv_basic.adoc b/acdc-parser/fixtures/tests/table_dsv_basic.adoc new file mode 100644 index 00000000..69fd0a26 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_basic.adoc @@ -0,0 +1,5 @@ +[format=dsv] +|=== +cell1:cell2:cell3 +A:B:C +|=== diff --git a/acdc-parser/fixtures/tests/table_dsv_basic.json b/acdc-parser/fixtures/tests/table_dsv_basic.json new file mode 100644 index 00000000..695fb50d --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_basic.json @@ -0,0 +1,271 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "dsv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell1", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell2", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell3", + "location": [ + { + "line": 3, + "col": 13 + }, + { + "line": 3, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 13 + }, + { + "line": 3, + "col": 17 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 1 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 1 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 4, + "col": 3 + }, + { + "line": 4, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 3 + }, + { + "line": 4, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 5 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc new file mode 100644 index 00000000..487db874 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.adoc @@ -0,0 +1,5 @@ +[format=dsv] +|=== +cell with \: colon:normal cell +another \: escaped:plain +|=== diff --git a/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json new file mode 100644 index 00000000..a9d19321 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_dsv_escaped_separator.json @@ -0,0 +1,201 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "dsv" + } + }, + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell with : colon", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 17 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "normal cell", + "location": [ + { + "line": 3, + "col": 20 + }, + { + "line": 3, + "col": 30 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 20 + }, + { + "line": 3, + "col": 30 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "another : escaped", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 17 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 17 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "plain", + "location": [ + { + "line": 4, + "col": 20 + }, + { + "line": 4, + "col": 24 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 20 + }, + { + "line": 4, + "col": 24 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 5, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc b/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc new file mode 100644 index 00000000..ef0c5c86 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_psv_escaped_separator.adoc @@ -0,0 +1,4 @@ +|=== +| cell with \| pipe | normal cell +| another \| escaped | plain +|=== diff --git a/acdc-parser/fixtures/tests/table_psv_escaped_separator.json b/acdc-parser/fixtures/tests/table_psv_escaped_separator.json new file mode 100644 index 00000000..a3ccef26 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_psv_escaped_separator.json @@ -0,0 +1,196 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "content": { + "header": null, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "cell with | pipe", + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 18 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 3 + }, + { + "line": 2, + "col": 18 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "normal cell", + "location": [ + { + "line": 2, + "col": 23 + }, + { + "line": 2, + "col": 33 + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 23 + }, + { + "line": 2, + "col": 33 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "another | escaped", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "plain", + "location": [ + { + "line": 3, + "col": 24 + }, + { + "line": 3, + "col": 28 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 24 + }, + { + "line": 3, + "col": 28 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 4, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc b/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc new file mode 100644 index 00000000..55f0b922 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_rowspan_with_cols.adoc @@ -0,0 +1,8 @@ +[cols="1,1,1"] +|=== +| A | B | C + +.2+| Spans rows | D | E +| F | G +| H | I | J +|=== diff --git a/acdc-parser/fixtures/tests/table_rowspan_with_cols.json b/acdc-parser/fixtures/tests/table_rowspan_with_cols.json new file mode 100644 index 00000000..05dc72c8 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_rowspan_with_cols.json @@ -0,0 +1,454 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "cols": "1,1,1" + } + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "A", + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 3 + }, + { + "line": 3, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "B", + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 7 + }, + { + "line": 3, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "C", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 11 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Spans rows", + "location": [ + { + "line": 5, + "col": 6 + }, + { + "line": 5, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 6 + }, + { + "line": 5, + "col": 15 + } + ] + } + ], + "rowspan": 2 + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "D", + "location": [ + { + "line": 5, + "col": 19 + }, + { + "line": 5, + "col": 19 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 19 + }, + { + "line": 5, + "col": 19 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "E", + "location": [ + { + "line": 5, + "col": 23 + }, + { + "line": 5, + "col": 23 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 23 + }, + { + "line": 5, + "col": 23 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "F", + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 3 + }, + { + "line": 6, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "G", + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 6, + "col": 7 + }, + { + "line": 6, + "col": 7 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "H", + "location": [ + { + "line": 7, + "col": 3 + }, + { + "line": 7, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 3 + }, + { + "line": 7, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "I", + "location": [ + { + "line": 7, + "col": 7 + }, + { + "line": 7, + "col": 7 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 7 + }, + { + "line": 7, + "col": 7 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "J", + "location": [ + { + "line": 7, + "col": 11 + }, + { + "line": 7, + "col": 11 + } + ] + } + ], + "location": [ + { + "line": 7, + "col": 11 + }, + { + "line": 7, + "col": 11 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 8, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/fixtures/tests/table_tsv_basic.adoc b/acdc-parser/fixtures/tests/table_tsv_basic.adoc new file mode 100644 index 00000000..9a7a0d1f --- /dev/null +++ b/acdc-parser/fixtures/tests/table_tsv_basic.adoc @@ -0,0 +1,6 @@ +[%header,format=tsv] +|=== +name role department +Alice Developer Engineering +Bob Designer Marketing +|=== diff --git a/acdc-parser/fixtures/tests/table_tsv_basic.json b/acdc-parser/fixtures/tests/table_tsv_basic.json new file mode 100644 index 00000000..93765247 --- /dev/null +++ b/acdc-parser/fixtures/tests/table_tsv_basic.json @@ -0,0 +1,382 @@ +{ + "name": "document", + "type": "block", + "blocks": [ + { + "name": "table", + "type": "block", + "form": "delimited", + "delimiter": "|===", + "metadata": { + "attributes": { + "format": "tsv" + }, + "options": [ + "header" + ] + }, + "content": { + "header": { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "name", + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 1 + }, + { + "line": 3, + "col": 4 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "role", + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 6 + }, + { + "line": 3, + "col": 9 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "department", + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ], + "location": [ + { + "line": 3, + "col": 11 + }, + { + "line": 3, + "col": 20 + } + ] + } + ] + } + ] + }, + "footer": null, + "rows": [ + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Alice", + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 1 + }, + { + "line": 4, + "col": 5 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Developer", + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 7 + }, + { + "line": 4, + "col": 15 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Engineering", + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ], + "location": [ + { + "line": 4, + "col": 17 + }, + { + "line": 4, + "col": 27 + } + ] + } + ] + } + ] + }, + { + "columns": [ + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Bob", + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 1 + }, + { + "line": 5, + "col": 3 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Designer", + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 5 + }, + { + "line": 5, + "col": 12 + } + ] + } + ] + }, + { + "content": [ + { + "name": "paragraph", + "type": "block", + "inlines": [ + { + "name": "text", + "type": "string", + "value": "Marketing", + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ], + "location": [ + { + "line": 5, + "col": 14 + }, + { + "line": 5, + "col": 22 + } + ] + } + ] + } + ] + } + ], + "location": [ + { + "line": 2, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + }, + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] + } + ], + "location": [ + { + "line": 1, + "col": 1 + }, + { + "line": 6, + "col": 4 + } + ] +} \ No newline at end of file diff --git a/acdc-parser/src/blocks/mod.rs b/acdc-parser/src/blocks/mod.rs index dfc7a2cf..07db3980 100644 --- a/acdc-parser/src/blocks/mod.rs +++ b/acdc-parser/src/blocks/mod.rs @@ -1,2 +1,2 @@ mod list; -mod table; +pub(crate) mod table; diff --git a/acdc-parser/src/blocks/table.rs b/acdc-parser/src/blocks/table.rs index bca8cb36..302af03d 100644 --- a/acdc-parser/src/blocks/table.rs +++ b/acdc-parser/src/blocks/table.rs @@ -1,4 +1,500 @@ -use crate::Table; +use crate::{ColumnStyle, HorizontalAlignment, Table, VerticalAlignment}; + +/// A cell part with its unescaped content and original start position. +struct CellPart { + /// Unescaped content (e.g., `\|` becomes `|`) + content: String, + /// Start position in the original line + start: usize, +} + +/// Split a line by separator, respecting backslash escapes. +/// +/// For PSV (`|`) and DSV (`:`), a backslash before the separator escapes it. +/// Returns parts with their original byte positions for accurate source mapping. +fn split_escaped(line: &str, separator: char) -> Vec { + let mut parts = Vec::new(); + let mut current_content = String::new(); + let mut part_start = 0; + let mut chars = line.char_indices().peekable(); + + while let Some((byte_idx, ch)) = chars.next() { + if ch == '\\' { + // Check if next char is the separator + if let Some(&(_, next_ch)) = chars.peek() { + if next_ch == separator { + // Escaped separator - add literal separator, skip the backslash + current_content.push(separator); + chars.next(); // consume the separator + continue; + } + } + // Not an escape - add backslash literally + current_content.push(ch); + } else if ch == separator { + // Unescaped separator - end current part + parts.push(CellPart { + content: std::mem::take(&mut current_content), + start: part_start, + }); + part_start = byte_idx + ch.len_utf8(); + } else { + current_content.push(ch); + } + } + + // Add final part + parts.push(CellPart { + content: current_content, + start: part_start, + }); + + parts +} + +/// Parse a CSV table body using the `csv` crate for full RFC 4180 compliance. +/// +/// This handles multi-line quoted values, escaped quotes, and all CSV edge cases. +/// Returns rows with cells containing their content and accurate byte positions. +fn parse_csv_table(text: &str, base_offset: usize) -> Vec> { + let text_bytes = text.as_bytes(); + let mut reader = csv::ReaderBuilder::new() + .has_headers(false) + .flexible(true) // allow variable column counts + .from_reader(text_bytes); + + let mut rows = Vec::new(); + + for result in reader.records() { + let Ok(record) = result else { + continue; + }; + + // Get the byte position where this record starts in the input + let record_start = record + .position() + .map_or(0, |p| usize::try_from(p.byte()).unwrap_or(0)); + + let mut cells = Vec::new(); + let mut scan_pos = record_start; + + for field in &record { + // Find actual field position by scanning the original text + let (field_content_start, next_pos) = + find_csv_field_position(text_bytes, scan_pos, field); + + cells.push(CellPart { + content: field.to_string(), + start: base_offset + field_content_start, + }); + + scan_pos = next_pos; + } + + rows.push(cells); + } + + rows +} + +/// Find the actual byte position of a CSV field's content in the original text. +/// +/// Returns `(content_start, next_scan_position)` where: +/// - `content_start` is where the field's actual content begins (after opening quote if quoted) +/// - `next_scan_position` is where to start scanning for the next field +fn find_csv_field_position(text: &[u8], start: usize, expected_content: &str) -> (usize, usize) { + let Some(&first_byte) = text.get(start) else { + return (start, start); + }; + + if first_byte == b'"' { + // Quoted field: content starts after the opening quote + let content_start = start + 1; + // Find the closing quote (handle escaped quotes "") + let end_pos = find_closing_quote(text, start + 1); + // Next field starts after closing quote and comma (or newline) + let next_pos = skip_to_next_field(text, end_pos); + (content_start, next_pos) + } else { + // Unquoted field: content starts at current position + let content_start = start; + // Find end of field (comma or newline) + let end_pos = find_unquoted_field_end(text, start, expected_content.len()); + // Next field starts after the separator + let next_pos = skip_to_next_field(text, end_pos); + (content_start, next_pos) + } +} + +/// Find the closing quote of a quoted CSV field, handling escaped quotes (`""`). +fn find_closing_quote(text: &[u8], start: usize) -> usize { + let mut pos = start; + while let Some(&byte) = text.get(pos) { + if byte == b'"' { + // Check if this is an escaped quote ("") + if text.get(pos + 1) == Some(&b'"') { + // Escaped quote - skip both and continue + pos += 2; + } else { + // Closing quote found + return pos; + } + } else { + pos += 1; + } + } + // No closing quote found - return end of text + text.len() +} + +/// Find the end of an unquoted CSV field. +fn find_unquoted_field_end(text: &[u8], start: usize, content_len: usize) -> usize { + // The field ends at comma, CR, LF, or content_len bytes (whichever comes first) + let mut pos = start; + let mut remaining = content_len; + while let Some(&byte) = text.get(pos) { + if byte == b',' || byte == b'\n' || byte == b'\r' { + return pos; + } + if remaining == 0 { + return pos; + } + remaining = remaining.saturating_sub(1); + pos += 1; + } + text.len() +} + +/// Skip past the current field separator to find the start of the next field. +fn skip_to_next_field(text: &[u8], pos: usize) -> usize { + let mut pos = pos; + // Skip closing quote if present + if text.get(pos) == Some(&b'"') { + pos += 1; + } + // Skip comma or newline characters + while let Some(&byte) = text.get(pos) { + if byte == b',' { + return pos + 1; + } + if byte == b'\r' || byte == b'\n' { + // Skip CRLF or just LF + if byte == b'\r' && text.get(pos + 1) == Some(&b'\n') { + return pos + 2; + } + return pos + 1; + } + pos += 1; + } + pos +} + +/// Determine if this is a CSV format table. +fn is_csv_format(separator: &str) -> bool { + separator == "," +} + +/// Split a line into cell parts using the appropriate method for the separator. +/// +/// Note: CSV format is handled separately via `parse_csv_table()` for multi-line support. +fn split_line(line: &str, separator: &str) -> Vec { + if let Some(sep_char) = separator.chars().next() { + if separator.len() == 1 { + split_escaped(line, sep_char) + } else { + // Multi-char separator - no escape handling + split_multi_char(line, separator) + } + } else { + // Empty separator - return whole line as one part + vec![CellPart { + content: line.to_string(), + start: 0, + }] + } +} + +/// Split by multi-character separator (no escape handling). +fn split_multi_char(line: &str, separator: &str) -> Vec { + let mut parts = Vec::new(); + let mut last_end = 0; + for (idx, _) in line.match_indices(separator) { + parts.push(CellPart { + content: line.get(last_end..idx).unwrap_or("").to_string(), + start: last_end, + }); + last_end = idx + separator.len(); + } + parts.push(CellPart { + content: line.get(last_end..).unwrap_or("").to_string(), + start: last_end, + }); + parts +} + +/// Represents a parsed cell specifier with span, alignment, and style information. +/// +/// In `AsciiDoc`, cell specifiers appear before the cell separator with format: +/// `[halign][valign][colspan][.rowspan][op][style]|` +/// +/// Examples: +/// - `2+|content` → colspan=2 +/// - `.3+|content` → rowspan=3 +/// - `2.3+|content` → colspan=2, rowspan=3 +/// - `^.>2+s|content` → center, bottom, colspan=2, strong style +/// - `3*|content` → duplicate cell 3 times +#[derive(Debug, Clone, Copy)] +pub(crate) struct CellSpecifier { + pub colspan: usize, + pub rowspan: usize, + pub halign: Option, + pub valign: Option, + pub style: Option, + /// If true, this is a duplication specifier (`*`) rather than a span (`+`). + pub is_duplication: bool, + /// For duplication, this is the count (e.g., `3*` means 3 copies). + pub duplication_count: usize, +} + +impl Default for CellSpecifier { + fn default() -> Self { + Self { + colspan: 1, + rowspan: 1, + halign: None, + valign: None, + style: None, + is_duplication: false, + duplication_count: 1, + } + } +} + +/// Parse a single style letter into a `ColumnStyle`. +fn parse_style_byte(byte: u8) -> Option { + match byte { + b'a' => Some(ColumnStyle::AsciiDoc), + b'd' => Some(ColumnStyle::Default), + b'e' => Some(ColumnStyle::Emphasis), + b'h' => Some(ColumnStyle::Header), + b'l' => Some(ColumnStyle::Literal), + b'm' => Some(ColumnStyle::Monospace), + b's' => Some(ColumnStyle::Strong), + _ => None, + } +} + +impl CellSpecifier { + /// Parse a cell specifier from the beginning of cell content. + /// + /// Returns the specifier and the offset where actual content begins. + /// Full pattern: `[halign][valign][colspan][.rowspan][+|*][style]` + /// + /// Examples: + /// - `"2+rest"` → colspan=2 + /// - `".3+rest"` → rowspan=3 + /// - `"2.3+rest"` → colspan=2, rowspan=3 + /// - `"^.>2+srest"` → center, bottom, colspan=2, strong style + /// - `"3*rest"` → `duplication_count`=3 + /// - `"plain"` → defaults (no specifier found) + #[must_use] + pub fn parse(content: &str) -> (Self, usize) { + let bytes = content.as_bytes(); + let mut pos = 0; + + // Phase 1: Parse optional alignment markers + let (halign, valign, align_end) = Self::parse_alignments(bytes, pos); + pos = align_end; + + // Phase 2: Parse optional colspan (digits) + let (colspan, colspan_end) = Self::parse_number(content, bytes, pos); + pos = colspan_end; + + // Phase 3: Parse optional rowspan (dot followed by digits) + let (rowspan, rowspan_end) = Self::parse_rowspan(content, bytes, pos); + pos = rowspan_end; + + // Phase 4: Check for operator and build result + Self::build_result(bytes, pos, colspan, rowspan, halign, valign) + } + + /// Parse alignment markers at the current position. + /// Returns `(halign, valign, new_position)`. + fn parse_alignments( + bytes: &[u8], + mut pos: usize, + ) -> ( + Option, + Option, + usize, + ) { + let mut halign: Option = None; + let mut valign: Option = None; + + loop { + match bytes.get(pos) { + Some(b'<') => { + halign = Some(HorizontalAlignment::Left); + pos += 1; + } + Some(b'^') => { + halign = Some(HorizontalAlignment::Center); + pos += 1; + } + Some(b'>') => { + halign = Some(HorizontalAlignment::Right); + pos += 1; + } + Some(b'.') => { + // Could be vertical alignment (.< .^ .>) or rowspan (.N) + match bytes.get(pos + 1) { + Some(b'<') => { + valign = Some(VerticalAlignment::Top); + pos += 2; + } + Some(b'^') => { + valign = Some(VerticalAlignment::Middle); + pos += 2; + } + Some(b'>') => { + valign = Some(VerticalAlignment::Bottom); + pos += 2; + } + _ => break, // Not vertical alignment, might be rowspan + } + } + _ => break, + } + } + + (halign, valign, pos) + } + + /// Parse a number (for colspan) at the current position. + /// Returns `(parsed_value, new_position)`. + fn parse_number(content: &str, bytes: &[u8], mut pos: usize) -> (Option, usize) { + let start = pos; + while bytes.get(pos).is_some_and(u8::is_ascii_digit) { + pos += 1; + } + let value = if pos > start { + content + .get(start..pos) + .and_then(|s| s.parse::().ok()) + } else { + None + }; + (value, pos) + } + + /// Parse rowspan (dot followed by digits) at the current position. + /// Returns `(parsed_value, new_position)`. + fn parse_rowspan(content: &str, bytes: &[u8], mut pos: usize) -> (Option, usize) { + if bytes.get(pos) != Some(&b'.') { + return (None, pos); + } + + let dot_pos = pos; + pos += 1; + let start = pos; + while bytes.get(pos).is_some_and(u8::is_ascii_digit) { + pos += 1; + } + + if pos > start { + let value = content + .get(start..pos) + .and_then(|s| s.parse::().ok()); + (value, pos) + } else { + // Dot without following digits - not a rowspan specifier + (None, dot_pos) + } + } + + /// Build the final result based on parsed components. + fn build_result( + bytes: &[u8], + mut pos: usize, + colspan: Option, + rowspan: Option, + halign: Option, + valign: Option, + ) -> (Self, usize) { + let has_span_or_dup = colspan.is_some() || rowspan.is_some(); + let is_duplication = bytes.get(pos) == Some(&b'*'); + let is_span = bytes.get(pos) == Some(&b'+'); + + if (is_span || is_duplication) && has_span_or_dup { + pos += 1; + + // Parse optional style letter after operator + let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); + if style.is_some() { + pos += 1; + } + + let spec = if is_duplication { + Self { + colspan: 1, + rowspan: 1, + halign, + valign, + style, + is_duplication: true, + duplication_count: colspan.unwrap_or(1), + } + } else { + Self { + colspan: colspan.unwrap_or(1), + rowspan: rowspan.unwrap_or(1), + halign, + valign, + style, + is_duplication: false, + duplication_count: 1, + } + }; + (spec, pos) + } else if halign.is_some() || valign.is_some() { + // Alignment without span operator - still valid + let style = bytes.get(pos).and_then(|&b| parse_style_byte(b)); + if style.is_some() { + pos += 1; + } + ( + Self { + colspan: 1, + rowspan: 1, + halign, + valign, + style, + is_duplication: false, + duplication_count: 1, + }, + pos, + ) + } else { + // No valid specifier found + (Self::default(), 0) + } + } +} + +/// A parsed table cell with position, span, alignment, and style information. +#[derive(Debug, Clone)] +pub(crate) struct ParsedCell { + pub content: String, + pub start: usize, + pub end: usize, + pub colspan: usize, + pub rowspan: usize, + pub halign: Option, + pub valign: Option, + pub style: Option, + pub is_duplication: bool, + pub duplication_count: usize, +} impl Table { pub(crate) fn parse_rows_with_positions( @@ -6,7 +502,12 @@ impl Table { separator: &str, has_header: &mut bool, base_offset: usize, - ) -> Vec> { + ) -> Vec> { + // CSV format needs special handling for multi-line quoted values + if is_csv_format(separator) { + return Self::parse_csv_rows_with_positions(text, has_header, base_offset); + } + let mut rows = Vec::new(); let mut current_offset = base_offset; let lines: Vec<&str> = text.lines().collect(); @@ -36,9 +537,10 @@ impl Table { // Check if this is a single-line-per-row table (line has multiple separators) // vs multi-line-per-row table (one cell per line, rows separated by empty lines) + // A line is single-line row if it has multiple separators (handles both `| a | b` + // and `2+| a | b` formats) let first_line = line_ref.trim_end(); - let is_single_line_row = - first_line.starts_with(separator) && first_line.matches(separator).count() > 1; + let is_single_line_row = first_line.matches(separator).count() > 1; if is_single_line_row { // Single-line row format: each line is a complete row @@ -85,55 +587,158 @@ impl Table { rows } + /// Parse CSV table rows using the `csv` crate for RFC 4180 compliance. + /// + /// This handles multi-line quoted values correctly by processing the entire + /// table body at once rather than line-by-line. + fn parse_csv_rows_with_positions( + text: &str, + has_header: &mut bool, + base_offset: usize, + ) -> Vec> { + // Check for header indicator: first row followed by blank line + // For CSV, we need to detect this before parsing since the csv crate + // consumes the text as a stream. + let lines: Vec<&str> = text.lines().collect(); + if lines.len() >= 2 { + // Find where first CSV record ends - look for first complete record + // A simple heuristic: if line 1 (0-indexed) is empty, we have a header + if let Some(&line) = lines.get(1) { + if line.trim().is_empty() { + *has_header = true; + } + } + } + + let csv_rows = parse_csv_table(text, base_offset); + let mut rows = Vec::new(); + + for csv_row in csv_rows { + let mut cells = Vec::new(); + for part in csv_row { + let content = part.content.trim(); + let start = part.start; + let end = if content.is_empty() { + start + } else { + start + content.len().saturating_sub(1) + }; + + cells.push(ParsedCell { + content: content.to_string(), + start, + end, + colspan: 1, + rowspan: 1, + halign: None, + valign: None, + style: None, + is_duplication: false, + duplication_count: 1, + }); + } + if !cells.is_empty() { + rows.push(cells); + } + } + + rows + } + fn parse_row_with_positions( row_lines: &[&str], separator: &str, row_start_offset: usize, - ) -> Vec<(String, usize, usize)> { + ) -> Vec { let mut columns = Vec::new(); let mut current_offset = row_start_offset; for line in row_lines { - // Skip lines that don't start with the separator - if !line.starts_with(separator) { + // Check if line contains the separator at all + if !line.contains(separator) { current_offset += line.len() + 1; // +1 for newline continue; } - // Split the line by separator to get all cells - let parts: Vec<&str> = line.split(separator).collect(); + // Split the line by separator, handling escapes appropriately + let parts = split_line(line, separator); - // Track position within the line - let mut line_offset = current_offset; + // Handle span specifier at the start of line (before first separator) + // e.g., "2+| content" -> part 0 is "2+", applies to part 1 + let mut pending_spec: Option = None; + + // Determine if first part should be treated as content or specifier/skip + // For PSV (|): first part is before the leading separator, skip it or treat as specifier + // For CSV (,) and DSV (:): first part is actual cell content - // Skip the first empty part (before the first |) for (i, part) in parts.iter().enumerate() { - if i == 0 { - // First part is always empty (before first |) - line_offset += separator.len(); + if i == 0 && separator == "|" { + // First part is before first separator (PSV format only) + let trimmed = part.content.trim(); + if !trimmed.is_empty() { + // Check if this looks like a specifier (e.g., "2+", "3*", "^.>") + let (spec, spec_len) = CellSpecifier::parse(trimmed); + if spec_len > 0 && spec_len == trimmed.len() { + // Entire first part is a specifier, apply to next cell + pending_spec = Some(spec); + } + // If not a complete specifier, it's just content before first separator + // which we skip for PSV + } continue; } - let cell_content_with_spaces = part; - let cell_content = cell_content_with_spaces.trim(); + let cell_content_trimmed = part.content.trim(); + + // Use pending specifier if we have one, otherwise parse from content + let (spec, spec_offset) = if let Some(pending) = pending_spec.take() { + (pending, 0) + } else { + CellSpecifier::parse(cell_content_trimmed) + }; + + // The actual cell content starts after the specifier + let cell_content = if spec_offset > 0 { + cell_content_trimmed + .get(spec_offset..) + .unwrap_or("") + .trim_start() + } else { + cell_content_trimmed + }; - // Find where the actual content starts (after leading spaces) - let leading_spaces = - cell_content_with_spaces.len() - cell_content_with_spaces.trim_start().len(); - let cell_start = line_offset + leading_spaces; + // Calculate where cell_content starts within part.content + // Pattern: leading_ws + spec_offset + post_spec_ws + let leading_ws = part.content.len() - part.content.trim_start().len(); + let post_spec_ws = if spec_offset > 0 { + let after_spec = cell_content_trimmed.get(spec_offset..).unwrap_or(""); + after_spec.len() - after_spec.trim_start().len() + } else { + 0 + }; + let content_start_offset = leading_ws + spec_offset + post_spec_ws; + + // Calculate positions using actual content boundaries + let cell_start = current_offset + part.start + content_start_offset; let cell_end = if cell_content.is_empty() { cell_start } else { - cell_start + cell_content.len() - 1 // -1 for inclusive end + // End is start + content length - 1 (inclusive end position) + cell_start + cell_content.len().saturating_sub(1) }; - columns.push((cell_content.to_string(), cell_start, cell_end)); - - // Move offset past this cell and its separator - line_offset += part.len(); - if i < parts.len() - 1 { - line_offset += separator.len(); - } + columns.push(ParsedCell { + content: cell_content.to_string(), + start: cell_start, + end: cell_end, + colspan: spec.colspan, + rowspan: spec.rowspan, + halign: spec.halign, + valign: spec.valign, + style: spec.style, + is_duplication: spec.is_duplication, + duplication_count: spec.duplication_count, + }); } current_offset += line.len() + 1; // +1 for newline @@ -142,3 +747,85 @@ impl Table { columns } } + +#[cfg(test)] +#[allow(clippy::panic, clippy::indexing_slicing)] +mod tests { + use super::*; + + #[test] + fn split_escaped_psv_no_escapes() { + let parts = split_escaped("| cell1 | cell2 |", '|'); + let [p0, p1, p2, p3] = parts.as_slice() else { + panic!("expected 4 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, ""); + assert_eq!(p1.content, " cell1 "); + assert_eq!(p2.content, " cell2 "); + assert_eq!(p3.content, ""); + } + + #[test] + fn split_escaped_psv_with_escape() { + let parts = split_escaped(r"| cell with \| pipe | normal |", '|'); + let [p0, p1, p2, p3] = parts.as_slice() else { + panic!("expected 4 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, ""); + assert_eq!(p1.content, " cell with | pipe "); + assert_eq!(p2.content, " normal "); + assert_eq!(p3.content, ""); + } + + #[test] + fn split_escaped_dsv_no_escapes() { + let parts = split_escaped("cell1:cell2:cell3", ':'); + let [p0, p1, p2] = parts.as_slice() else { + panic!("expected 3 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, "cell1"); + assert_eq!(p1.content, "cell2"); + assert_eq!(p2.content, "cell3"); + } + + #[test] + fn split_escaped_dsv_with_escape() { + let parts = split_escaped(r"cell with \: colon:normal", ':'); + let [p0, p1] = parts.as_slice() else { + panic!("expected 2 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, "cell with : colon"); + assert_eq!(p1.content, "normal"); + } + + #[test] + fn split_escaped_backslash_not_before_separator() { + // Backslash before non-separator should be preserved + let parts = split_escaped(r"cell\n with backslash|next", '|'); + let [p0, p1] = parts.as_slice() else { + panic!("expected 2 parts, got {}", parts.len()); + }; + assert_eq!(p0.content, r"cell\n with backslash"); + assert_eq!(p1.content, "next"); + } + + #[test] + fn split_escaped_multiple_escapes() { + let parts = split_escaped(r"\|start\|middle\|end", '|'); + let [p0] = parts.as_slice() else { + panic!("expected 1 part, got {}", parts.len()); + }; + assert_eq!(p0.content, "|start|middle|end"); + } + + #[test] + fn split_escaped_positions_tracked() { + let parts = split_escaped("ab|cd|ef", '|'); + let [p0, p1, p2] = parts.as_slice() else { + panic!("expected 3 parts, got {}", parts.len()); + }; + assert_eq!(p0.start, 0); + assert_eq!(p1.start, 3); // after "ab|" + assert_eq!(p2.start, 6); // after "ab|cd|" + } +} diff --git a/acdc-parser/src/grammar/document.rs b/acdc-parser/src/grammar/document.rs index 7e6e80e5..5c2ae796 100644 --- a/acdc-parser/src/grammar/document.rs +++ b/acdc-parser/src/grammar/document.rs @@ -1512,33 +1512,91 @@ peg::parser! { let mut footer = None; let mut rows = Vec::new(); + // Track rowspan state: maps column positions to remaining rowspan count. + // When a cell has rowspan > 1, we track how many more rows it occupies. + // Each entry: (column_position, remaining_rows, colspan_width) + let mut active_rowspans: Vec<(usize, usize, usize)> = Vec::new(); + for (i, row) in raw_rows.iter().enumerate() { - let columns = row - .iter() - .filter(|(cell, _, _)| !cell.is_empty()) - .map(|(cell, start, _end)| parse_table_cell(cell, state, *start, block_metadata.parent_section_level)) - .collect::, _>>()?; + // Process cells, handling duplication + let mut columns = Vec::new(); + for cell in row.iter().filter(|c| !c.content.is_empty()) { + let parsed = parse_table_cell(&cell.content, state, cell.start, block_metadata.parent_section_level, cell)?; + if cell.is_duplication && cell.duplication_count > 1 { + // Duplicate the cell N times + for _ in 0..cell.duplication_count { + columns.push(parsed.clone()); + } + } else { + columns.push(parsed); + } + } // Calculate row line number from first cell for better error reporting let row_line = if let Some(first) = row.first() { - state.create_location(first.1, first.2).start.line + state.create_location(first.start, first.end).start.line } else { table_location.start.line // Fallback if row is empty (shouldn't happen) }; - // validate that if we have ncols we have the same number of columns in each row + // Calculate occupied columns from active rowspans + let occupied_from_rowspans: usize = active_rowspans.iter().map(|(_pos, _remaining, width)| *width).sum(); + + // Logical column count = columns occupied by rowspans + colspans of new cells + let logical_col_count: usize = occupied_from_rowspans + columns.iter().map(|c| c.colspan).sum::(); + if let Some(ncols) = ncols - && columns.len() != ncols + && logical_col_count != ncols { - tracing::warn!( - actual = columns.len(), - expected = ncols, - line = row_line, - "table row has incorrect column count, skipping row" - ); + // Check if any cell's colspan exceeds the table width + let has_overflow = columns.iter().any(|c| c.colspan > ncols); + if has_overflow { + tracing::error!( + actual = logical_col_count, + expected = ncols, + line = row_line, + "dropping cell because it exceeds specified number of columns" + ); + } else { + tracing::warn!( + actual = logical_col_count, + expected = ncols, + occupied_from_rowspans, + line = row_line, + "table row has incorrect column count, skipping row" + ); + } continue; } + // Update active rowspans for this row: + // 1. Decrement remaining count for existing rowspans + // 2. Remove rowspans that are now exhausted + active_rowspans.retain_mut(|(_pos, remaining, _width)| { + *remaining -= 1; + *remaining > 0 + }); + + // 3. Add new rowspans from current row's cells + let mut col_position = 0; + for (_, active_pos, _, colspan) in active_rowspans.iter().map(|(p, r, c)| (*p, *p, *r, *c)) { + if col_position == active_pos { + col_position += colspan; + } + } + for cell in &columns { + // Skip over positions occupied by rowspans + while active_rowspans.iter().any(|(pos, _, width)| col_position >= *pos && col_position < pos + width) { + if let Some((_, _, width)) = active_rowspans.iter().find(|(pos, _, w)| col_position >= *pos && col_position < pos + w) { + col_position += width; + } + } + if cell.rowspan > 1 { + active_rowspans.push((col_position, cell.rowspan - 1, cell.colspan)); + } + col_position += cell.colspan; + } + // if we have a header, we need to add the columns we have to the header if has_header { header = Some(TableRow { columns }); @@ -4932,7 +4990,7 @@ peg::parser! { /// Excludes '[' and ']' to respect AsciiDoc macro/attribute boundaries rule url_path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' | '_' | '~' | ':' | '/' | '?' | '#' | '@' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '%' | '\\' ]+) {? - let mut inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new( path, state.line_map.clone(), &state.input, @@ -4951,7 +5009,7 @@ peg::parser! { /// Includes '{' and '}' for `AsciiDoc` attribute substitution pub rule path() -> String = path:$(['A'..='Z' | 'a'..='z' | '0'..='9' | '{' | '}' | '_' | '-' | '.' | '/' | '\\' ]+) {? - let mut inline_state = InlinePreprocessorParserState::new( + let inline_state = InlinePreprocessorParserState::new( path, state.line_map.clone(), &state.input, diff --git a/acdc-parser/src/grammar/inline_preprocessor.rs b/acdc-parser/src/grammar/inline_preprocessor.rs index e53f8567..c4b3d113 100644 --- a/acdc-parser/src/grammar/inline_preprocessor.rs +++ b/acdc-parser/src/grammar/inline_preprocessor.rs @@ -230,6 +230,10 @@ parser!( kbd_macro() / monospace() / passthrough() + // counter_reference must come BEFORE attribute_reference because counters + // have a colon in the name (e.g., {counter:num}) which is not valid in + // standard attribute names + / counter_reference() / attribute_reference() / unprocessed_text() } / expected!("inlines parser failed") @@ -250,6 +254,33 @@ parser!( text.to_string() } + /// Counter reference: `{counter:name}`, `{counter:name:initial}`, `{counter2:name}` + /// + /// Counters are not supported. Per asciidoctor maintainer feedback, counters are + /// "a disaster" that they want to redesign or remove. We detect them, emit a warning, + /// and return empty string (the counter syntax is silently removed from output). + rule counter_reference() -> String + = start:position() "{" + counter_type:$("counter2" / "counter") ":" + name:$(['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+) + (":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)? + "}" + { + tracing::warn!( + counter_type, + name, + "Counters ({{{counter_type}:{name}}}) are not supported and will be removed from output" + ); + + // Calculate total length for position tracking + // We capture the full match including any optional initial value + let total_len = counter_type.len() + 1 + name.len() + 2; // "{" + counter_type + ":" + name + "}" + let _location = state.calculate_location(start, "", total_len); + + // Return empty string - counter is removed from output + String::new() + } + rule attribute_reference() -> String = start:position() "{" attribute_name:attribute_name() "}" { let location = state.calculate_location(start, attribute_name, 2); @@ -485,11 +516,14 @@ parser!( = $(['a'..='z' | 'A'..='Z' | '0'..='9']+) rule unprocessed_text() -> String - = text:$((!(passthrough_pattern() / attribute_reference_pattern() / kbd_macro_pattern() / monospace_pattern()) [_])+) { + = text:$((!(passthrough_pattern() / counter_reference_pattern() / attribute_reference_pattern() / kbd_macro_pattern() / monospace_pattern()) [_])+) { state.advance(text); text.to_string() } + /// Pattern for counter references: {counter:name} or {counter:name:initial} or {counter2:...} + rule counter_reference_pattern() = "{" ("counter2" / "counter") ":" ['a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-']+ (":" ['a'..='z' | 'A'..='Z' | '0'..='9']+)? "}" + rule attribute_reference_pattern() = "{" attribute_name_pattern() "}" rule attribute_name_pattern() = ['a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_']+ diff --git a/acdc-parser/src/grammar/table.rs b/acdc-parser/src/grammar/table.rs index c9521bf9..00ef4b56 100644 --- a/acdc-parser/src/grammar/table.rs +++ b/acdc-parser/src/grammar/table.rs @@ -1,4 +1,4 @@ -use crate::{Error, TableColumn, model::SectionLevel}; +use crate::{Error, TableColumn, blocks::table::ParsedCell, model::SectionLevel}; use super::{ParserState, document_parser, inline_processing::adjust_and_log_parse_error}; @@ -7,17 +7,25 @@ pub(crate) fn parse_table_cell( state: &mut ParserState, cell_start_offset: usize, parent_section_level: Option, + cell: &ParsedCell, ) -> Result { - let content = document_parser::blocks(content, state, cell_start_offset, parent_section_level) + let blocks = document_parser::blocks(content, state, cell_start_offset, parent_section_level) .unwrap_or_else(|error| { - adjust_and_log_parse_error( - &error, - content, - cell_start_offset, - state, - "Failed parsing table cell content as blocks", - ); - Ok(Vec::new()) - })?; - Ok(TableColumn { content }) + adjust_and_log_parse_error( + &error, + content, + cell_start_offset, + state, + "Failed parsing table cell content as blocks", + ); + Ok(Vec::new()) + })?; + Ok(TableColumn::with_format( + blocks, + cell.colspan, + cell.rowspan, + cell.halign, + cell.valign, + cell.style, + )) } diff --git a/acdc-parser/src/model/attributes.rs b/acdc-parser/src/model/attributes.rs index 74508ad1..ccbc6ee0 100644 --- a/acdc-parser/src/model/attributes.rs +++ b/acdc-parser/src/model/attributes.rs @@ -247,9 +247,9 @@ impl ElementAttributes { self.0.merge(other.0); } - /// Helper to get a string value. + /// Get a string attribute value as an owned `String`. /// - /// Strips surrounding quotes from the value if present (parser quirk workaround). + /// Strips surrounding quotes from the value if present. #[must_use] pub fn get_string(&self, name: &str) -> Option { self.get(name).and_then(|v| match v { diff --git a/acdc-parser/src/model/tables.rs b/acdc-parser/src/model/tables.rs index 1f9a7c54..9a4b4d28 100644 --- a/acdc-parser/src/model/tables.rs +++ b/acdc-parser/src/model/tables.rs @@ -223,12 +223,51 @@ impl TableRow { #[non_exhaustive] pub struct TableColumn { pub content: Vec, + /// Number of columns this cell spans (default 1). + /// Specified in `AsciiDoc` with `n+|` syntax (e.g., `2+|` for colspan=2). + #[serde(skip_serializing_if = "is_default_span")] + pub colspan: usize, + /// Number of rows this cell spans (default 1). + /// Specified in `AsciiDoc` with `.n+|` syntax (e.g., `.2+|` for rowspan=2). + #[serde(skip_serializing_if = "is_default_span")] + pub rowspan: usize, + /// Cell-level horizontal alignment override. + /// Specified with `<`, `^`, or `>` in cell specifier (e.g., `^|` for center). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub halign: Option, + /// Cell-level vertical alignment override. + /// Specified with `.<`, `.^`, or `.>` in cell specifier (e.g., `.>|` for bottom). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub valign: Option, + /// Cell-level style override. + /// Specified with style letter after operator (e.g., `s|` for strong/bold). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub style: Option, +} + +#[allow(clippy::trivially_copy_pass_by_ref)] +const fn is_default_span(span: &usize) -> bool { + *span == 1 } impl TableColumn { - /// Create a new table column with the given content. + /// Create a new table column with full cell specifier options. #[must_use] - pub fn new(content: Vec) -> Self { - Self { content } + pub(crate) fn with_format( + content: Vec, + colspan: usize, + rowspan: usize, + halign: Option, + valign: Option, + style: Option, + ) -> Self { + Self { + content, + colspan, + rowspan, + halign, + valign, + style, + } } } diff --git a/acdc-parser/src/preprocessor/mod.rs b/acdc-parser/src/preprocessor/mod.rs index cf439cf2..86500758 100644 --- a/acdc-parser/src/preprocessor/mod.rs +++ b/acdc-parser/src/preprocessor/mod.rs @@ -181,7 +181,7 @@ impl Preprocessor { return Ok(Some(include.lines()?)); } } else { - tracing::error!("file parent is missing - include directive cannot be processed"); + tracing::error!(%line, "file parent is missing - include directive cannot be processed"); } Ok(None) } diff --git a/converters/html/src/delimited.rs b/converters/html/src/delimited.rs index a301315f..f67f075a 100644 --- a/converters/html/src/delimited.rs +++ b/converters/html/src/delimited.rs @@ -40,19 +40,15 @@ fn write_example_block>( let _ = writer; // Render title with caption prefix if title exists + // Caption can be disabled with :example-caption!: if !block.title.is_empty() { - let count = processor.example_counter.get() + 1; - processor.example_counter.set(count); - let caption = processor - .document_attributes - .get("example-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Example"); - let prefix = format!("
{caption} {count}. "); - visitor.render_title_with_wrapper(&block.title, &prefix, "
\n")?; + let prefix = + processor.caption_prefix("example-caption", &processor.example_counter, "Example"); + visitor.render_title_with_wrapper( + &block.title, + &format!("
{prefix}"), + "
\n", + )?; } writer = visitor.writer_mut(); diff --git a/converters/html/src/icon.rs b/converters/html/src/icon.rs index 3d51e8a1..6a768be1 100644 --- a/converters/html/src/icon.rs +++ b/converters/html/src/icon.rs @@ -5,20 +5,6 @@ use acdc_parser::{AttributeValue, ElementAttributes, ICON_SIZES, Icon, Source}; use crate::Processor; -/// Helper to get a string value from `ElementAttributes`. -/// -/// Strips surrounding quotes from the value if present (parser quirk workaround). -fn get_attr_string(attrs: &ElementAttributes, name: &str) -> Option { - attrs.get(name).and_then(|v| match v { - AttributeValue::String(s) => { - // Strip surrounding quotes if present (parser includes them for quoted values) - let trimmed = s.trim_matches('"'); - Some(trimmed.to_string()) - } - AttributeValue::None | AttributeValue::Bool(_) | _ => None, - }) -} - /// Check if a positional attribute exists (stored as key with `AttributeValue::None`). fn has_positional_attr(attrs: &ElementAttributes, name: &str) -> bool { matches!(attrs.get(name), Some(AttributeValue::None)) @@ -27,7 +13,7 @@ fn has_positional_attr(attrs: &ElementAttributes, name: &str) -> bool { /// Get the icon size from attributes (either named `size=...` or positional like `2x`). fn get_icon_size(attrs: &ElementAttributes) -> Option { // First check named attribute - if let Some(size) = get_attr_string(attrs, "size") { + if let Some(size) = attrs.get_string("size") { return Some(size); } // Then check for positional size values @@ -54,7 +40,7 @@ pub(crate) fn write_icon( let attrs = &icon.attributes; // Build span class with optional role - let span_class = match get_attr_string(attrs, "role") { + let span_class = match attrs.get_string("role") { Some(role) => format!("icon {role}"), None => "icon".to_string(), }; @@ -89,14 +75,15 @@ fn write_font_icon( } // flip takes precedence over rotate (matches asciidoctor behavior) - if let Some(flip) = get_attr_string(attrs, "flip") { + if let Some(flip) = attrs.get_string("flip") { let _ = write!(classes, " fa-flip-{flip}"); - } else if let Some(rotate) = get_attr_string(attrs, "rotate") { + } else if let Some(rotate) = attrs.get_string("rotate") { let _ = write!(classes, " fa-rotate-{rotate}"); } // Build title attribute - let title_attr = get_attr_string(attrs, "title") + let title_attr = attrs + .get_string("title") .map(|t| format!(" title=\"{t}\"")) .unwrap_or_default(); @@ -124,16 +111,18 @@ fn write_image_icon( .map_or_else(|| "./images/icons".to_string(), ToString::to_string); // Build alt attribute (use custom alt or target name) - let alt = get_attr_string(attrs, "alt").unwrap_or_else(|| target.to_string()); + let alt = attrs + .get_string("alt") + .unwrap_or_else(|| target.to_string()); // Build img attributes let mut img_attrs = format!("src=\"{iconsdir}/{target}.png\" alt=\"{alt}\""); - if let Some(width) = get_attr_string(attrs, "width") { + if let Some(width) = attrs.get_string("width") { let _ = write!(img_attrs, " width=\"{width}\""); } - if let Some(title) = get_attr_string(attrs, "title") { + if let Some(title) = attrs.get_string("title") { let _ = write!(img_attrs, " title=\"{title}\""); } @@ -148,10 +137,11 @@ fn write_image_icon( /// Wrap icon content with a link if the `link` attribute is present. fn wrap_icon_with_link(content: &str, attrs: &ElementAttributes) -> String { - if let Some(link) = get_attr_string(attrs, "link") { + if let Some(link) = attrs.get_string("link") { // HTML-escape ampersands in URLs for valid HTML let escaped_link = link.replace('&', "&"); - let window_attrs = get_attr_string(attrs, "window") + let window_attrs = attrs + .get_string("window") .map(|w| format!(" target=\"{w}\" rel=\"noopener\"")) .unwrap_or_default(); format!("{content}") diff --git a/converters/html/src/image.rs b/converters/html/src/image.rs index 03868d4d..65fc6ef2 100644 --- a/converters/html/src/image.rs +++ b/converters/html/src/image.rs @@ -1,5 +1,5 @@ use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; -use acdc_parser::{AttributeValue, Image}; +use acdc_parser::Image; use crate::{ Error, Processor, @@ -56,21 +56,14 @@ pub(crate) fn visit_image>( write!(w, "")?; // close content // Render title with figure caption if title exists + // Caption can be disabled with :figure-caption!: if !img.title.is_empty() { - let count = processor.figure_counter.get() + 1; - processor.figure_counter.set(count); - let caption = processor - .document_attributes - .get("figure-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Figure"); + let prefix = + processor.caption_prefix("figure-caption", &processor.figure_counter, "Figure"); let _ = w; visitor.render_title_with_wrapper( &img.title, - &format!("
{caption} {count}. "), + &format!("
{prefix}"), "
", )?; w = visitor.writer_mut(); diff --git a/converters/html/src/lib.rs b/converters/html/src/lib.rs index 8c76e9b3..ac6b899f 100644 --- a/converters/html/src/lib.rs +++ b/converters/html/src/lib.rs @@ -70,6 +70,37 @@ impl Processor { self.has_valid_index_section } + /// Generate a caption prefix based on document attributes. + /// + /// Returns the caption prefix string. If captions are disabled via `:X-caption!:`, + /// returns an empty string. Otherwise increments the counter and returns + /// "Caption N. " format. + #[must_use] + pub(crate) fn caption_prefix( + &self, + attribute_name: &str, + counter: &Rc>, + default_text: &str, + ) -> String { + match self.document_attributes.get(attribute_name) { + Some(AttributeValue::Bool(false)) => { + // Disabled via :X-caption!: + String::new() + } + Some(AttributeValue::String(s)) => { + let count = counter.get() + 1; + counter.set(count); + let caption = s.trim_matches('"'); + format!("{caption} {count}. ") + } + _ => { + let count = counter.get() + 1; + counter.set(count); + format!("{default_text} {count}. ") + } + } + } + /// Generate a unique anchor ID for an index term and collect the entry. #[must_use] pub fn add_index_entry(&self, kind: IndexTermKind) -> String { diff --git a/converters/html/src/table.rs b/converters/html/src/table.rs index 031bd19d..756b69ef 100644 --- a/converters/html/src/table.rs +++ b/converters/html/src/table.rs @@ -1,7 +1,7 @@ use acdc_converters_core::table::calculate_column_widths; use acdc_converters_core::visitor::{WritableVisitor, WritableVisitorExt}; use acdc_parser::{ - AttributeValue, Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, + Block, BlockMetadata, ColumnFormat, HorizontalAlignment, InlineNode, Table, TableColumn, VerticalAlignment, }; @@ -25,9 +25,44 @@ fn valign_class(valign: VerticalAlignment) -> &'static str { } } -/// Get column format for a given column index, defaulting to left/top if not specified -fn get_column_format(columns: &[ColumnFormat], col_index: usize) -> ColumnFormat { - columns.get(col_index).cloned().unwrap_or_default() +/// Get effective alignment for a cell, considering cell-level overrides. +fn get_effective_halign( + columns: &[ColumnFormat], + col_index: usize, + cell: &TableColumn, +) -> HorizontalAlignment { + cell.halign.unwrap_or_else(|| { + columns + .get(col_index) + .map_or_else(HorizontalAlignment::default, |c| c.halign) + }) +} + +/// Get effective vertical alignment for a cell, considering cell-level overrides. +fn get_effective_valign( + columns: &[ColumnFormat], + col_index: usize, + cell: &TableColumn, +) -> VerticalAlignment { + cell.valign.unwrap_or_else(|| { + columns + .get(col_index) + .map_or_else(VerticalAlignment::default, |c| c.valign) + }) +} + +/// Format colspan/rowspan attributes for a table cell. +/// Returns an empty string if both are 1 (default). +fn format_span_attrs(cell: &TableColumn) -> String { + use std::fmt::Write; + let mut attrs = String::new(); + if cell.colspan > 1 { + let _ = write!(attrs, " colspan=\"{}\"", cell.colspan); + } + if cell.rowspan > 1 { + let _ = write!(attrs, " rowspan=\"{}\"", cell.rowspan); + } + attrs } /// Render cell content with support for nested blocks @@ -65,29 +100,38 @@ where Ok(()) } -/// Render table caption with number if title exists +/// Render table caption with number if title exists. +/// +/// Per-block `[caption="..."]` attribute overrides the prefix entirely and does NOT increment +/// the table counter (following `AsciiDoc` specification). +/// +/// Caption can be disabled with: +/// - `:table-caption!:` at document level (disables for all tables) +/// - `[caption=""]` at block level (disables for specific table) fn render_table_caption( visitor: &mut V, title: &[InlineNode], processor: &Processor, + metadata: &BlockMetadata, ) -> Result<(), Error> where V: WritableVisitor, { if !title.is_empty() { - let count = processor.table_counter.get() + 1; - processor.table_counter.set(count); - let caption = processor - .document_attributes - .get("table-caption") - .and_then(|v| match v { - AttributeValue::String(s) => Some(s.as_str()), - AttributeValue::Bool(_) | AttributeValue::None | _ => None, - }) - .unwrap_or("Table"); + // Check for per-block caption override (does NOT increment counter) + let prefix = if let Some(custom_caption) = metadata.attributes.get_string("caption") { + if custom_caption.is_empty() { + String::new() + } else { + custom_caption + } + } else { + processor.caption_prefix("table-caption", &processor.table_counter, "Table") + }; + visitor.render_title_with_wrapper( title, - &format!("{caption} {count}. "), + &format!("{prefix}"), "\n", )?; } @@ -101,12 +145,7 @@ fn render_colgroup( metadata: &BlockMetadata, ) -> Result<(), Error> { // Generate colgroup - either from cols attribute or inferred from table structure - let col_count = if let Some(cols_value) = metadata.attributes.get("cols") { - let cols_str = match cols_value { - AttributeValue::String(s) => s.trim_matches('"'), - AttributeValue::Bool(_) | AttributeValue::None | _ => "", - }; - + let col_count = if let Some(cols_str) = metadata.attributes.get_string("cols") { // Handle multiplier syntax like "3*" or "2*~" if let Some(asterisk_pos) = cols_str.find('*') { let count_str = &cols_str[..asterisk_pos]; @@ -152,6 +191,63 @@ fn render_colgroup( Ok(()) } +/// Get frame class from metadata (default: all). +fn get_frame_class(metadata: &BlockMetadata) -> &'static str { + metadata + .attributes + .get_string("frame") + .map_or("frame-all", |frame| match frame.as_str() { + "ends" | "topbot" => "frame-ends", + "sides" => "frame-sides", + "none" => "frame-none", + _ => "frame-all", + }) +} + +/// Get grid class from metadata (default: all). +fn get_grid_class(metadata: &BlockMetadata) -> &'static str { + metadata + .attributes + .get_string("grid") + .map_or("grid-all", |grid| match grid.as_str() { + "rows" => "grid-rows", + "cols" => "grid-cols", + "none" => "grid-none", + _ => "grid-all", + }) +} + +/// Get stripes class from metadata (only if specified). +fn get_stripes_class(metadata: &BlockMetadata) -> Option<&'static str> { + metadata + .attributes + .get_string("stripes") + .and_then(|stripes| match stripes.as_str() { + "even" => Some("stripes-even"), + "odd" => Some("stripes-odd"), + "all" => Some("stripes-all"), + "hover" => Some("stripes-hover"), + _ => None, + }) +} + +/// Get width style from metadata (returns empty string if not specified). +fn get_width_style(metadata: &BlockMetadata) -> String { + metadata + .attributes + .get_string("width") + .map_or_else(String::new, |w| format!(" style=\"width: {w};\"")) +} + +/// Get sizing class based on %autowidth option. +fn get_sizing_class(metadata: &BlockMetadata) -> &'static str { + if metadata.options.contains(&"autowidth".to_string()) { + "fit-content" + } else { + "stretch" + } +} + /// Render table with support for nested blocks in cells pub(crate) fn render_table( table: &Table, @@ -165,13 +261,35 @@ where V: WritableVisitor, { let writer = visitor.writer_mut(); - let classes = ["tableblock", "frame-all", "grid-all", "stretch"]; - writeln!(writer, "", classes.join(" "))?; + // Build table classes + let frame = get_frame_class(metadata); + let grid = get_grid_class(metadata); + let sizing = get_sizing_class(metadata); + + // Start with base classes, add optional ones + let mut class_parts = format!("tableblock {frame} {grid} {sizing}"); + + // Add stripes class if specified + if let Some(stripes) = get_stripes_class(metadata) { + class_parts.push(' '); + class_parts.push_str(stripes); + } + + // Add custom roles/classes from metadata + for role in &metadata.roles { + class_parts.push(' '); + class_parts.push_str(role); + } + + // Get width style + let width_style = get_width_style(metadata); + + writeln!(writer, "
")?; // Render caption with table number if title exists let _ = writer; - render_table_caption(visitor, title, processor)?; + render_table_caption(visitor, title, processor, metadata)?; // Render colgroup with column widths render_colgroup(visitor.writer_mut(), table, metadata)?; @@ -183,11 +301,14 @@ where writeln!(writer, "")?; let _ = writer; for (col_index, cell) in header.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "")?; let _ = writer; for (col_index, cell) in row.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "")?; let _ = writer; for (col_index, cell) in footer.columns.iter().enumerate() { - let spec = get_column_format(&table.columns, col_index); - let halign = halign_class(spec.halign); - let valign = valign_class(spec.valign); + let halign = halign_class(get_effective_halign(&table.columns, col_index, cell)); + let valign = valign_class(get_effective_valign(&table.columns, col_index, cell)); + let span_attrs = format_span_attrs(cell); let writer = visitor.writer_mut(); - write!(writer, "
")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, false)?; let writer = visitor.writer_mut(); @@ -207,11 +328,14 @@ where writeln!(writer, "
")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, true)?; let writer = visitor.writer_mut(); @@ -230,11 +354,14 @@ where writeln!(writer, "
")?; + write!( + writer, + "" + )?; let _ = writer; render_cell_content(&cell.content, visitor, processor, options, true)?; let writer = visitor.writer_mut(); diff --git a/converters/terminal/src/table.rs b/converters/terminal/src/table.rs index efb99ae7..d3b1dae9 100644 --- a/converters/terminal/src/table.rs +++ b/converters/terminal/src/table.rs @@ -117,17 +117,24 @@ pub(crate) fn visit_table>( mod tests { use super::*; use acdc_converters_core::Options; - use acdc_parser::{ - Block, DocumentAttributes, InlineNode, Location, Paragraph, Plain, TableColumn, TableRow, - }; - - /// Create simple plain text inline nodes for testing - fn create_test_inlines(content: &str) -> Vec { - vec![InlineNode::PlainText(Plain { - content: content.to_string(), - location: Location::default(), - escaped: false, - })] + use acdc_parser::{Block, DelimitedBlockType, DocumentAttributes}; + + /// Parse an `AsciiDoc` string and extract the first table from the document. + #[allow(clippy::expect_used)] + fn parse_table(adoc: &str) -> acdc_parser::Table { + let options = acdc_parser::Options::default(); + let doc = acdc_parser::parse(adoc, &options).expect("Failed to parse AsciiDoc"); + doc.blocks + .into_iter() + .find_map(|block| { + if let Block::DelimitedBlock(db) = block + && let DelimitedBlockType::DelimitedTable(table) = db.inner + { + return Some(table); + } + None + }) + .expect("No table found in document") } /// Create test processor with default options @@ -146,31 +153,19 @@ mod tests { } } - /// Helper to create a paragraph block with plain text content - fn create_paragraph_block(text: &str) -> Block { - Block::Paragraph(Paragraph::new( - create_test_inlines(text), - Location::default(), - )) - } - #[test] fn test_table_with_footer() -> Result<(), Error> { - let table = acdc_parser::Table::new( - vec![TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Cell 1")]), - TableColumn::new(vec![create_paragraph_block("Cell 2")]), - ])], - Location::default(), - ) - .with_header(Some(TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Header 1")]), - TableColumn::new(vec![create_paragraph_block("Header 2")]), - ]))) - .with_footer(Some(TableRow::new(vec![ - TableColumn::new(vec![create_paragraph_block("Footer 1")]), - TableColumn::new(vec![create_paragraph_block("Footer 2")]), - ]))); + let adoc = r" +[%header%footer] +|=== +| Header 1 | Header 2 + +| Cell 1 | Cell 2 + +| Footer 1 | Footer 2 +|=== +"; + let table = parse_table(adoc); let buffer = Vec::new(); let processor = create_test_processor(); @@ -198,12 +193,12 @@ mod tests { #[test] fn test_table_without_footer() -> Result<(), Error> { - let table = acdc_parser::Table::new( - vec![TableRow::new(vec![TableColumn::new(vec![ - create_paragraph_block("Cell"), - ])])], - Location::default(), - ); + let adoc = r" +|=== +| Cell +|=== +"; + let table = parse_table(adoc); let buffer = Vec::new(); let processor = create_test_processor();