From d5e0f786c7a4e329073c8752aef9e35c5a2cf591 Mon Sep 17 00:00:00 2001 From: QuietMisdreavus Date: Fri, 31 Dec 2021 16:58:26 -0700 Subject: [PATCH 1/2] wip: half of the 'superscript' extension --- extensions/CMakeLists.txt | 1 + extensions/core-extensions.c | 2 + extensions/superscript.c | 141 +++++++++++++++++++++++++++++++++++ extensions/superscript.h | 9 +++ test/spec.txt | 44 +++++++++++ 5 files changed, 197 insertions(+) create mode 100644 extensions/superscript.c create mode 100644 extensions/superscript.h diff --git a/extensions/CMakeLists.txt b/extensions/CMakeLists.txt index 9bbec1f47..982bb4a83 100644 --- a/extensions/CMakeLists.txt +++ b/extensions/CMakeLists.txt @@ -11,6 +11,7 @@ set(LIBRARY_SOURCES ext_scanners.re ext_scanners.h tasklist.c + superscript.c ) include_directories( diff --git a/extensions/core-extensions.c b/extensions/core-extensions.c index 131cdf402..1456116bf 100644 --- a/extensions/core-extensions.c +++ b/extensions/core-extensions.c @@ -7,6 +7,7 @@ #include "tasklist.h" #include "registry.h" #include "plugin.h" +#include "superscript.h" static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_table_extension()); @@ -15,6 +16,7 @@ static int core_extensions_registration(cmark_plugin *plugin) { cmark_plugin_register_syntax_extension(plugin, create_autolink_extension()); cmark_plugin_register_syntax_extension(plugin, create_tagfilter_extension()); cmark_plugin_register_syntax_extension(plugin, create_tasklist_extension()); + cmark_plugin_register_syntax_extension(plugin, create_superscript_extension()); return 1; } diff --git a/extensions/superscript.c b/extensions/superscript.c new file mode 100644 index 000000000..64112cb9d --- /dev/null +++ b/extensions/superscript.c @@ -0,0 +1,141 @@ +#include "superscript.h" +#include +#include +#include + +cmark_node_type CMARK_NODE_SUPERSCRIPT; + +static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, + cmark_node *parent, unsigned char character, + cmark_inline_parser *inline_parser) { + cmark_node *res = NULL; + int startpos = cmark_inline_parser_get_offset(inline_parser) + 1; + int endpos = startpos; + + if (character != '^') + return NULL; + + // TODO: long-form parsing with parens + if (cmark_inline_parser_peek_at(inline_parser, endpos) == '(') + return NULL; + + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + bufsize_t len = chunk->len; + + while (endpos < len && !cmark_isspace(cmark_inline_parser_peek_at(inline_parser, endpos))) + endpos++; + + int nodelen = endpos - startpos; + + // don't emit an empty node + if (nodelen == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, startpos); + + res = cmark_node_new_with_mem_and_ext(CMARK_NODE_SUPERSCRIPT, parser->mem, self); + res->as.literal = cmark_chunk_dup(chunk, startpos, nodelen); + res->start_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser); + + cmark_inline_parser_set_offset(inline_parser, endpos); + + res->end_line = cmark_inline_parser_get_line(inline_parser); + res->end_column = cmark_inline_parser_get_column(inline_parser); + + const char *text = cmark_chunk_to_cstr(parser->mem, &res->as.literal); + cmark_node_set_string_content(res, text); + + cmark_parse_inlines(parser, res, parser->refmap, parser->options); + + return res; +} + +static const char *get_type_string(cmark_syntax_extension *extension, + cmark_node *node) { + return node->type == CMARK_NODE_SUPERSCRIPT ? "superscript" : ""; +} + +static int can_contain(cmark_syntax_extension *extension, cmark_node *node, + cmark_node_type child_type) { + if (node->type != CMARK_NODE_SUPERSCRIPT) + return false; + + return CMARK_NODE_TYPE_INLINE_P(child_type); +} + +static void commonmark_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool should_wrap = (cmark_strbuf_strchr(&node->content, ' ', 0) != -1); + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + if (should_wrap) + renderer->out(renderer, node, "^(", false, LITERAL); + else + renderer->out(renderer, node, "^", false, LITERAL); + } else if (!entering && should_wrap) { + renderer->out(renderer, node, ")", false, LITERAL); + } +} + +static void latex_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->out(renderer, node, "^{", false, LITERAL); + } else { + renderer->out(renderer, node, "}", false, LITERAL); + } +} + +static void man_render(cmark_syntax_extension *extension, + cmark_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + // requires MOM + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + renderer->cr(renderer); + renderer->out(renderer, node, "\\*[SUP]", false, LITERAL); + } else { + renderer->out(renderer, node, "\\*[SUPX]", false, LITERAL); + renderer->cr(renderer); + } +} + +static void html_render(cmark_syntax_extension *extension, + cmark_html_renderer *renderer, cmark_node *node, + cmark_event_type ev_type, int options) { + bool entering = (ev_type == CMARK_EVENT_ENTER); + if (entering) { + cmark_strbuf_puts(renderer->html, ""); + } else { + cmark_strbuf_puts(renderer->html, ""); + } +} + +cmark_syntax_extension *create_superscript_extension(void) { + cmark_syntax_extension *ext = cmark_syntax_extension_new("superscript"); + cmark_llist *special_chars = NULL; + + cmark_syntax_extension_set_get_type_string_func(ext, get_type_string); + cmark_syntax_extension_set_can_contain_func(ext, can_contain); + cmark_syntax_extension_set_commonmark_render_func(ext, commonmark_render); + cmark_syntax_extension_set_plaintext_render_func(ext, commonmark_render); + cmark_syntax_extension_set_latex_render_func(ext, latex_render); + cmark_syntax_extension_set_man_render_func(ext, man_render); + cmark_syntax_extension_set_html_render_func(ext, html_render); + CMARK_NODE_SUPERSCRIPT = cmark_syntax_extension_add_node(1); + + cmark_syntax_extension_set_match_inline_func(ext, match); +// cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + + cmark_mem *mem = cmark_get_default_mem_allocator(); + special_chars = cmark_llist_append(mem, special_chars, (void *)'^'); + cmark_syntax_extension_set_special_inline_chars(ext, special_chars); + + cmark_syntax_extension_set_emphasis(ext, 1); + + return ext; +} diff --git a/extensions/superscript.h b/extensions/superscript.h new file mode 100644 index 000000000..465e1a2ab --- /dev/null +++ b/extensions/superscript.h @@ -0,0 +1,9 @@ +#ifndef CMARK_GFM_SUPERSCRIPT_H +#define CMARK_GFM_SUPERSCRIPT_H + +#include "cmark-gfm-core-extensions.h" + +extern cmark_node_type CMARK_NODE_SUPERSCRIPT; +cmark_syntax_extension *create_superscript_extension(void); + +#endif /* CMARK_GFM_SUPERSCRIPT_H */ diff --git a/test/spec.txt b/test/spec.txt index 170276156..10438e615 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -7734,6 +7734,50 @@ new paragraph~~. +
+ +## Superscript (extension) + +GFM provides the `superscript` extension, which adds "superscript" spans. + +There are two ways to write superscripts. For simple uses, you can use a +caret (`^`) to style the text between it and the next space (or the end of +the line): + +```````````````````````````````` example superscript +y = x^2 + 2 + +Superscripting a whole ^word +. +

y = x2 + 2

+

Superscripting a whole word

+```````````````````````````````` + +In addition, if you would like to raise more than one word, you can add +parentheses around the text you would like to style in a superscript: + +```````````````````````````````` example superscript disabled +I would like to ^(raise this whole phrase), please. +. +

I would like to raise this whole phrase, please.

+```````````````````````````````` + +Superscripts can be nested, by adding additional carets: + +```````````````````````````````` example superscript +z = t^x^2 +. +

z = tx2

+```````````````````````````````` + +```````````````````````````````` example superscript disabled +For my next trick, I will ^(raise my text ^(twice)), at the same time! +. +

For my next trick, I will raise my texttwice, at the same time!

+```````````````````````````````` + +
+ ## Links A link contains [link text] (the visible text), a [link destination] From c2b9ecb97186c4caf7d5be87cf21bfbfe85590db Mon Sep 17 00:00:00 2001 From: QuietMisdreavus Date: Fri, 31 Dec 2021 20:42:22 -0700 Subject: [PATCH 2/2] add parenthesis version of superscript --- extensions/superscript.c | 165 ++++++++++++++++++++++++++++++++------- test/spec.txt | 6 +- 2 files changed, 138 insertions(+), 33 deletions(-) diff --git a/extensions/superscript.c b/extensions/superscript.c index 64112cb9d..f6c8cc009 100644 --- a/extensions/superscript.c +++ b/extensions/superscript.c @@ -3,50 +3,154 @@ #include #include +static cmark_chunk *S_get_node_literal_chunk(cmark_node *node) { + if (node == NULL) { + return NULL; + } + + switch (node->type) { + case CMARK_NODE_HTML_BLOCK: + case CMARK_NODE_TEXT: + case CMARK_NODE_HTML_INLINE: + case CMARK_NODE_CODE: + case CMARK_NODE_FOOTNOTE_REFERENCE: + return &node->as.literal; + + case CMARK_NODE_CODE_BLOCK: + return &node->as.code.literal; + + default: + break; + } + + return NULL; +} + +static bool S_node_contains_space(cmark_node *node) { + cmark_chunk *chunk = S_get_node_literal_chunk(node); + if (chunk) + return (cmark_chunk_strchr(chunk, ' ', 0) != chunk->len); + else + return false; +} + +static bool S_children_contain_space(cmark_node *parent) { + cmark_node *node = parent->first_child; + while (node) { + if (S_node_contains_space(node)) { + return true; + } + node = node->next; + } + + return false; +} + cmark_node_type CMARK_NODE_SUPERSCRIPT; static cmark_node *match(cmark_syntax_extension *self, cmark_parser *parser, cmark_node *parent, unsigned char character, cmark_inline_parser *inline_parser) { cmark_node *res = NULL; - int startpos = cmark_inline_parser_get_offset(inline_parser) + 1; - int endpos = startpos; - - if (character != '^') - return NULL; - - // TODO: long-form parsing with parens - if (cmark_inline_parser_peek_at(inline_parser, endpos) == '(') - return NULL; + int initpos = cmark_inline_parser_get_offset(inline_parser); + + if (character == '^') { + if (cmark_inline_parser_peek_at(inline_parser, initpos + 1) == '(') { + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, "^("); + res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser); + res->end_column = res->start_column + 2; + + cmark_inline_parser_set_offset(inline_parser, initpos + 2); + cmark_inline_parser_push_delimiter(inline_parser, '^', true, false, res); + } else { + int startpos = initpos + 1; + int endpos = startpos; + + cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); + bufsize_t len = chunk->len; + + while (endpos < len) { + unsigned char seekchar = cmark_inline_parser_peek_at(inline_parser, endpos); + if (cmark_isspace(seekchar) || (cmark_ispunct(seekchar) && seekchar != '^')) + break; + endpos++; + } + + int nodelen = endpos - startpos; + + // don't emit an empty node + if (nodelen == 0) + return NULL; + + cmark_inline_parser_set_offset(inline_parser, startpos); + + res = cmark_node_new_with_mem_and_ext(CMARK_NODE_SUPERSCRIPT, parser->mem, self); + res->as.literal = cmark_chunk_dup(chunk, startpos, nodelen); + res->start_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser); + + cmark_inline_parser_set_offset(inline_parser, endpos); + + res->end_line = cmark_inline_parser_get_line(inline_parser); + res->end_column = cmark_inline_parser_get_column(inline_parser); + + const char *text = cmark_chunk_to_cstr(parser->mem, &res->as.literal); + cmark_node_set_string_content(res, text); + + cmark_parse_inlines(parser, res, parser->refmap, parser->options); + } + } else if (character == ')') { + res = cmark_node_new_with_mem(CMARK_NODE_TEXT, parser->mem); + cmark_node_set_literal(res, ")"); + res->start_line = res->end_line = cmark_inline_parser_get_line(inline_parser); + res->start_column = cmark_inline_parser_get_column(inline_parser); + res->end_column = res->start_column + 1; + + cmark_inline_parser_set_offset(inline_parser, initpos + 1); + cmark_inline_parser_push_delimiter(inline_parser, '^', false, true, res); + } - cmark_chunk *chunk = cmark_inline_parser_get_chunk(inline_parser); - bufsize_t len = chunk->len; + return res; +} - while (endpos < len && !cmark_isspace(cmark_inline_parser_peek_at(inline_parser, endpos))) - endpos++; +static delimiter *insert(cmark_syntax_extension *self, cmark_parser *parser, + cmark_inline_parser *inline_parser, delimiter *opener, + delimiter *closer) { + cmark_node *superscript; + cmark_node *tmp, *next; + delimiter *delim, *tmp_delim; + delimiter *res = closer->next; - int nodelen = endpos - startpos; + superscript = opener->inl_text; - // don't emit an empty node - if (nodelen == 0) - return NULL; + if (!cmark_node_set_type(superscript, CMARK_NODE_SUPERSCRIPT)) + return res; - cmark_inline_parser_set_offset(inline_parser, startpos); + cmark_node_set_syntax_extension(superscript, self); - res = cmark_node_new_with_mem_and_ext(CMARK_NODE_SUPERSCRIPT, parser->mem, self); - res->as.literal = cmark_chunk_dup(chunk, startpos, nodelen); - res->start_line = cmark_inline_parser_get_line(inline_parser); - res->start_column = cmark_inline_parser_get_column(inline_parser); + tmp = cmark_node_next(opener->inl_text); - cmark_inline_parser_set_offset(inline_parser, endpos); + while (tmp) { + if (tmp == closer->inl_text) + break; + next = cmark_node_next(tmp); + cmark_node_append_child(superscript, tmp); + tmp = next; + } - res->end_line = cmark_inline_parser_get_line(inline_parser); - res->end_column = cmark_inline_parser_get_column(inline_parser); + superscript->end_column = closer->inl_text->start_column + closer->inl_text->as.literal.len - 1; + cmark_node_free(closer->inl_text); - const char *text = cmark_chunk_to_cstr(parser->mem, &res->as.literal); - cmark_node_set_string_content(res, text); + delim = closer; + while (delim != NULL && delim != opener) { + tmp_delim = delim->previous; + cmark_inline_parser_remove_delimiter(inline_parser, delim); + delim = tmp_delim; + } - cmark_parse_inlines(parser, res, parser->refmap, parser->options); + cmark_inline_parser_remove_delimiter(inline_parser, opener); return res; } @@ -67,7 +171,7 @@ static int can_contain(cmark_syntax_extension *extension, cmark_node *node, static void commonmark_render(cmark_syntax_extension *extension, cmark_renderer *renderer, cmark_node *node, cmark_event_type ev_type, int options) { - bool should_wrap = (cmark_strbuf_strchr(&node->content, ' ', 0) != -1); + bool should_wrap = S_children_contain_space(node); bool entering = (ev_type == CMARK_EVENT_ENTER); if (entering) { if (should_wrap) @@ -129,10 +233,11 @@ cmark_syntax_extension *create_superscript_extension(void) { CMARK_NODE_SUPERSCRIPT = cmark_syntax_extension_add_node(1); cmark_syntax_extension_set_match_inline_func(ext, match); -// cmark_syntax_extension_set_inline_from_delim_func(ext, insert); + cmark_syntax_extension_set_inline_from_delim_func(ext, insert); cmark_mem *mem = cmark_get_default_mem_allocator(); special_chars = cmark_llist_append(mem, special_chars, (void *)'^'); + special_chars = cmark_llist_append(mem, special_chars, (void *)')'); cmark_syntax_extension_set_special_inline_chars(ext, special_chars); cmark_syntax_extension_set_emphasis(ext, 1); diff --git a/test/spec.txt b/test/spec.txt index 10438e615..a335f0bfe 100644 --- a/test/spec.txt +++ b/test/spec.txt @@ -7756,7 +7756,7 @@ Superscripting a whole ^word In addition, if you would like to raise more than one word, you can add parentheses around the text you would like to style in a superscript: -```````````````````````````````` example superscript disabled +```````````````````````````````` example superscript I would like to ^(raise this whole phrase), please. .

I would like to raise this whole phrase, please.

@@ -7770,10 +7770,10 @@ z = t^x^2

z = tx2

```````````````````````````````` -```````````````````````````````` example superscript disabled +```````````````````````````````` example superscript For my next trick, I will ^(raise my text ^(twice)), at the same time! . -

For my next trick, I will raise my texttwice, at the same time!

+

For my next trick, I will raise my text twice, at the same time!

````````````````````````````````