@@ -16,6 +16,8 @@ pub enum Token {
16
16
Param ( String ) ,
17
17
18
18
Range {
19
+ /// Whether the left side of the range is bound by the previous token
20
+ /// (but it's not contained in this token)
19
21
bind_left : bool ,
20
22
bind_right : bool ,
21
23
} ,
@@ -39,12 +41,16 @@ pub enum Token {
39
41
Annotate , // @
40
42
}
41
43
44
+ /// Lex chars to tokens until the end of the input
42
45
pub fn lexer ( ) -> impl Parser < char , Vec < TokenSpan > , Error = Cheap < char > > {
43
- let whitespace = filter ( | x : & char | x . is_inline_whitespace ( ) )
46
+ lex_token ( )
44
47
. repeated ( )
45
- . at_least ( 1 )
46
- . ignored ( ) ;
48
+ . then_ignore ( ignored ( ) )
49
+ . then_ignore ( end ( ) )
50
+ }
47
51
52
+ /// Lex chars to a single token
53
+ pub fn lex_token ( ) -> impl Parser < char , TokenSpan , Error = Cheap < char > > {
48
54
let control_multi = choice ( (
49
55
just ( "->" ) . to ( Token :: ArrowThin ) ,
50
56
just ( "=>" ) . to ( Token :: ArrowFat ) ,
@@ -109,41 +115,55 @@ pub fn lexer() -> impl Parser<char, Vec<TokenSpan>, Error = Cheap<char>> {
109
115
) )
110
116
. recover_with ( skip_then_retry_until ( [ ] ) . skip_start ( ) ) ;
111
117
112
- let comment = just ( '#' )
113
- . then ( newline. not ( ) . repeated ( ) )
114
- . separated_by ( newline. then ( whitespace. or_not ( ) ) )
115
- . at_least ( 1 )
116
- . ignored ( ) ;
117
-
118
- let range = ( whitespace. or_not ( ) )
118
+ let range = ( whitespace ( ) . or_not ( ) )
119
119
. then_ignore ( just ( ".." ) )
120
- . then ( whitespace. or_not ( ) )
120
+ . then ( whitespace ( ) . or_not ( ) )
121
121
. map ( |( left, right) | Token :: Range {
122
+ // If there was no whitespace before (after), then we mark the range
123
+ // as bound on the left (right).
122
124
bind_left : left. is_none ( ) ,
123
125
bind_right : right. is_none ( ) ,
124
126
} )
125
127
. map_with_span ( TokenSpan ) ;
126
128
127
- let line_wrap = newline
129
+ choice ( ( range, ignored ( ) . ignore_then ( token. map_with_span ( TokenSpan ) ) ) )
130
+ }
131
+
132
+ fn ignored ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
133
+ choice ( ( comment ( ) , whitespace ( ) , line_wrap ( ) ) )
134
+ . repeated ( )
135
+ . ignored ( )
136
+ }
137
+
138
+ fn whitespace ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
139
+ filter ( |x : & char | x. is_inline_whitespace ( ) )
140
+ . repeated ( )
141
+ . at_least ( 1 )
142
+ . ignored ( )
143
+ }
144
+
145
+ fn line_wrap ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
146
+ newline ( )
128
147
. then (
129
148
// We can optionally have an empty line, or a line with a comment,
130
149
// between the initial line and the continued line
131
- whitespace
150
+ whitespace ( )
132
151
. or_not ( )
133
- . then ( comment. or_not ( ) )
134
- . then ( newline)
152
+ . then ( comment ( ) . or_not ( ) )
153
+ . then ( newline ( ) )
135
154
. repeated ( ) ,
136
155
)
137
- . then ( whitespace. repeated ( ) )
156
+ . then ( whitespace ( ) . repeated ( ) )
138
157
. then ( just ( '\\' ) )
139
- . ignored ( ) ;
140
-
141
- let ignored = choice ( ( comment, whitespace, line_wrap) ) . repeated ( ) ;
158
+ . ignored ( )
159
+ }
142
160
143
- choice ( ( range, ignored. ignore_then ( token. map_with_span ( TokenSpan ) ) ) )
144
- . repeated ( )
145
- . then_ignore ( ignored)
146
- . then_ignore ( end ( ) )
161
+ fn comment ( ) -> impl Parser < char , ( ) , Error = Cheap < char > > {
162
+ just ( '#' )
163
+ . then ( newline ( ) . not ( ) . repeated ( ) )
164
+ . separated_by ( newline ( ) . then ( whitespace ( ) . or_not ( ) ) )
165
+ . at_least ( 1 )
166
+ . ignored ( )
147
167
}
148
168
149
169
pub fn ident_part ( ) -> impl Parser < char , String , Error = Cheap < char > > + Clone {
@@ -625,4 +645,37 @@ mod test {
625
645
// Unicode escape
626
646
assert_snapshot ! ( quoted_string( true ) . parse( r"'\u{01f422}'" ) . unwrap( ) , @"🐢" ) ;
627
647
}
648
+
649
+ #[ test]
650
+ fn range ( ) {
651
+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "1..2" ) . unwrap( ) ) , @r###"
652
+ TokenVec (
653
+ 0..1: Literal(Integer(1)),
654
+ 1..3: Range { bind_left: true, bind_right: true },
655
+ 3..4: Literal(Integer(2)),
656
+ )
657
+ "### ) ;
658
+
659
+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "..2" ) . unwrap( ) ) , @r###"
660
+ TokenVec (
661
+ 0..2: Range { bind_left: true, bind_right: true },
662
+ 2..3: Literal(Integer(2)),
663
+ )
664
+ "### ) ;
665
+
666
+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "1.." ) . unwrap( ) ) , @r###"
667
+ TokenVec (
668
+ 0..1: Literal(Integer(1)),
669
+ 1..3: Range { bind_left: true, bind_right: true },
670
+ )
671
+ "### ) ;
672
+
673
+ assert_debug_snapshot ! ( TokenVec ( lexer( ) . parse( "in ..5" ) . unwrap( ) ) , @r###"
674
+ TokenVec (
675
+ 0..2: Ident("in"),
676
+ 2..5: Range { bind_left: false, bind_right: true },
677
+ 5..6: Literal(Integer(5)),
678
+ )
679
+ "### ) ;
680
+ }
628
681
}
0 commit comments