Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

format antlr4 grammars using antlr4Formatter #40

Merged
merged 4 commits into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,5 @@ private static void showHelpAndExit(Options options) {
System.exit(1);
}

private StandaloneFormatter() {
}
private StandaloneFormatter() {}
}
87 changes: 70 additions & 17 deletions antlr4-formatter/src/main/antlr4/ANTLRv4Lexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,19 @@ lexer grammar ANTLRv4Lexer;
options { superClass = LexerAdaptor; }
import LexBasic;
// Standard set of fragments

tokens { TOKEN_REF , RULE_REF , LEXER_CHAR_SET }
channels { OFF_CHANNEL , COMMENT }
// ======================================================

// Lexer specification

//

// -------------------------

// Comments

DOC_COMMENT
: DocComment
;
Expand All @@ -59,20 +65,28 @@ LINE_COMMENT
: LineComment -> channel (COMMENT)
;
// -------------------------

// Integer

//

INT
: DecimalNumeral
;
// -------------------------

// Literal string

//

// ANTLR makes no distinction between a single character literal and a

// multi-character string. All literals are single quote delimited and

// may contain unicode escape sequences of the form \uxxxx, where x

// is a valid hexadecimal number (per Unicode standard).

STRING_LITERAL
: SQuoteLiteral
;
Expand All @@ -81,29 +95,40 @@ UNTERMINATED_STRING_LITERAL
: USQuoteLiteral
;
// -------------------------

// Arguments

//

// Certain argument lists, such as those specifying call parameters

// to a rule invocation, or input parameters to a rule specification

// are contained within square brackets.

BEGIN_ARGUMENT
: LBrack
{ handleBeginArgument(); }
;
// -------------------------

// Actions

BEGIN_ACTION
: LBrace -> pushMode (Action)
;
// -------------------------

// Keywords

//

// Keywords may not be used as labels for rules or in any other context where

// they would be ambiguous with the keyword vs some other identifier. OPTIONS,

// TOKENS, & CHANNELS blocks are handled idiomatically in dedicated lexical modes.

OPTIONS
: 'options' -> pushMode (Options)
;
Expand Down Expand Up @@ -172,8 +197,9 @@ MODE
: 'mode'
;
// -------------------------

// Punctuation

COLON
: Colon
;
Expand Down Expand Up @@ -266,40 +292,57 @@ NOT
: Tilde
;
// -------------------------

// Identifiers - allows unicode rule/token names

ID
: Id
;
// -------------------------

// Whitespace

WS
: Ws+ -> channel (OFF_CHANNEL)
;
// -------------------------

// Illegal Characters

//

// This is an illegal character trap which is always the last rule in the

// lexer specification. It matches a single character of any value and being

// the last rule in the file will match when no other rule knows what to do

// about the character. It is reported as an error but is not passed on to the

// parser. This means that the parser to deal with the gramamr file anyway

// but we will not try to analyse or code generate from a file with lexical

// errors.

//

// Comment this rule out to allow the error to be propagated to the parser

ERRCHAR
: . -> channel (HIDDEN)
;
// ======================================================

// Lexer modes

// -------------------------

// Arguments

mode Argument;
// E.g., [int x, List<String> a[]]

NESTED_ARGUMENT
: LBrack -> type (ARGUMENT_CONTENT) , pushMode (Argument)
;
Expand All @@ -321,7 +364,7 @@ END_ARGUMENT
{ handleEndArgument(); }
;
// added this to return non-EOF token type here. EOF does something weird

UNTERMINATED_ARGUMENT
: EOF -> popMode
;
Expand All @@ -330,15 +373,23 @@ ARGUMENT_CONTENT
: .
;
// -------------------------

// Actions

//

// Many language targets use {} as block delimiters and so we

// must recursively match {} delimited blocks to balance the

// braces. Additionally, we must make some assumptions about

// literal string representation in the target language. We assume

// that they are delimited by ' or " and so consume these

// in their own alts so as not to inadvertantly match {}.

mode Action;
NESTED_ACTION
: LBrace -> type (ACTION_CONTENT) , pushMode (Action)
Expand Down Expand Up @@ -381,7 +432,7 @@ ACTION_CONTENT
: .
;
// -------------------------

mode Options;
OPT_DOC_COMMENT
: DocComment -> type (DOC_COMMENT) , channel (COMMENT)
Expand Down Expand Up @@ -435,7 +486,7 @@ OPT_WS
: Ws+ -> type (WS) , channel (OFF_CHANNEL)
;
// -------------------------

mode Tokens;
TOK_DOC_COMMENT
: DocComment -> type (DOC_COMMENT) , channel (COMMENT)
Expand Down Expand Up @@ -473,9 +524,10 @@ TOK_WS
: Ws+ -> type (WS) , channel (OFF_CHANNEL)
;
// -------------------------

mode Channels;
// currently same as Tokens mode; distinguished by keyword

CHN_DOC_COMMENT
: DocComment -> type (DOC_COMMENT) , channel (COMMENT)
;
Expand Down Expand Up @@ -512,7 +564,7 @@ CHN_WS
: Ws+ -> type (WS) , channel (OFF_CHANNEL)
;
// -------------------------

mode LexerCharSet;
LEXER_CHAR_SET_BODY
: (~ [\]\\] | EscAny)+ -> more
Expand All @@ -526,8 +578,9 @@ UNTERMINATED_CHAR_SET
: EOF -> popMode
;
// ------------------------------------------------------------------------------

// Grammar specific Keywords, Punctuation, etc.

fragment Id
: NameStartChar NameChar*
;
Expand Down
Loading