-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrexp.h
247 lines (211 loc) · 6.23 KB
/
rexp.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/********************************************
rexp.h
copyright 2008-2012,2014, Thomas E. Dickey
copyright 2010, Jonathan Nieder
copyright 1991,2014, Michael D. Brennan
This is a source file for mawk, an implementation of
the AWK programming language.
Mawk is distributed without warranty under the terms of
the GNU General Public License, version 2, 1991.
********************************************/
/*
* $MawkId: rexp.h,v 1.26 2014/08/22 00:00:17 tom Exp $
* @Log: rexp.h,v @
* Revision 1.2 1993/07/23 13:21:35 mike
* cleanup rexp code
*
* Revision 1.1.1.1 1993/07/03 18:58:27 mike
* move source to cvs
*
* Revision 3.6 1992/01/21 17:31:45 brennan
* moved ison() macro out of rexp[23].c
*
* Revision 3.5 91/10/29 10:53:55 brennan
* SIZE_T
*
* Revision 3.4 91/08/13 09:10:02 brennan
* VERSION .9994
*
* Revision 3.3 91/06/15 09:40:25 brennan
* gcc defines __STDC__ but might not have stdlib.h
*
* Revision 3.2 91/06/10 16:18:19 brennan
* changes for V7
*
* Revision 3.1 91/06/07 10:33:18 brennan
* VERSION 0.995
*
* Revision 1.3 91/06/05 08:57:57 brennan
* removed RE_xmalloc()
*
* Revision 1.2 91/06/03 07:23:26 brennan
* changed type of RE_error_trap
*
* Revision 1.1 91/06/03 07:05:41 brennan
* Initial revision
*
*/
#ifndef REXP_H
#define REXP_H
#include "nstd.h"
#include "types.h"
#include <stdio.h>
#include <setjmp.h>
extern PTR RE_malloc(size_t);
extern PTR RE_realloc(void *, size_t);
#ifdef NO_LEAKS
extern void RE_free(void *);
#else
#define RE_free(p) free(p)
#endif
/* finite machine state types */
#define M_STR 0 /* matching a literal string */
#define M_CLASS 1 /* character class */
#define M_ANY 2 /* arbitrary character (.) */
#define M_START 3 /* start of string (^) */
#define M_END 4 /* end of string ($) */
#define M_U 5 /* arbitrary string (.*) */
#define M_1J 6 /* mandatory jump */
#define M_2JA 7 /* optional (undesirable) jump */
#define M_2JB 8 /* optional (desirable) jump */
#define M_SAVE_POS 9 /* push position onto stack */
#define M_2JC 10 /* pop pos'n, optional jump if advanced */
#define M_ACCEPT 11 /* end of match */
#define U_ON 12
#define U_OFF 0
#define END_OFF 0
#define END_ON (2*U_ON)
typedef UChar BV[32]; /* bit vector */
typedef struct {
SType s_type;
SLen s_len; /* used for M_STR */
union {
char *str; /* string */
BV *bvp; /* class */
int jump;
} s_data;
} STATE;
#define STATESZ (sizeof(STATE))
typedef struct {
STATE *start, *stop;
} MACHINE;
/* tokens */
#define T_NONE 0 /* no token */
#define T_OR 1 /* | */
#define T_CAT 2 /* binary operator */
#define T_STAR 3 /* * */
#define T_PLUS 4 /* + */
#define T_Q 5 /* ? */
#define T_LP 6 /* ( */
#define T_RP 7 /* ) */
#define T_START 8 /* ^ */
#define T_END 9 /* $ */
#define T_ANY 10 /* . */
#define T_CLASS 11 /* starts with [ */
#define T_SLASH 12 /* \ */
#define T_CHAR 13 /* all the rest */
#define T_STR 14 /* string built of other tokens */
#define T_U 15
/* precedences and error codes */
#define L 0
#define EQ 1
#define G 2
#define E1 (-1)
#define E2 (-2)
#define E3 (-3)
#define E4 (-4)
#define E5 (-5)
#define E6 (-6)
#define E7 (-7)
#define MEMORY_FAILURE 5
#define ison(b,x) ((b)[((UChar)(x)) >> 3] & (1 << ((x) & 7)))
/* struct for the run time stack */
typedef struct {
STATE *m; /* save the machine ptr */
int u; /* save the u_flag */
char *s; /* save the active string ptr */
int sp; /* size of position stack */
int tp; /* offset to top entry of position stack */
char *ss; /* save the match start -- only used by REmatch */
} RT_STATE; /* run time state */
/* entry for the position stack */
typedef struct {
/* if we have not advanced beyond this character,
* do not bother trying another round.
*/
const char *pos;
/* run time stack frame responsible for removing this node */
int owner;
/* previous node is this - this->prev_offset. See RE_pos_pop() */
int prev_offset;
} RT_POS_ENTRY;
/* error trap */
extern int REerrno;
void RE_error_trap(int);
#ifndef GCC_NORETURN
#define GCC_NORETURN /* nothing */
#endif
extern MACHINE RE_u(void);
extern MACHINE RE_start(void);
extern MACHINE RE_end(void);
extern MACHINE RE_any(void);
extern MACHINE RE_str(char *, size_t);
extern MACHINE RE_class(BV *);
extern void RE_cat(MACHINE *, MACHINE *);
extern void RE_or(MACHINE *, MACHINE *);
extern void RE_close(MACHINE *);
extern void RE_poscl(MACHINE *);
extern void RE_01(MACHINE *);
extern void RE_panic(const char *) GCC_NORETURN;
#ifndef MAWK_H
extern char *str_str(char *, size_t, char *, size_t);
#endif
extern void RE_lex_init(char *, size_t);
extern int RE_lex(MACHINE *);
extern void RE_run_stack_init(void);
extern void RE_pos_stack_init(void);
extern RT_STATE *RE_new_run_stack(void);
extern RT_POS_ENTRY *RE_new_pos_stack(void);
extern RT_STATE *RE_run_stack_base;
extern RT_STATE *RE_run_stack_limit;
extern RT_STATE *RE_run_stack_empty;
extern RT_POS_ENTRY *RE_pos_stack_base;
extern RT_POS_ENTRY *RE_pos_stack_limit;
extern RT_POS_ENTRY *RE_pos_stack_empty;
#ifdef LOCAL_REGEXP
static /* inline */ RT_POS_ENTRY *
RE_pos_push(RT_POS_ENTRY * head, const RT_STATE * owner, const char *s)
{
head->pos = s;
head->owner = (int) (owner - RE_run_stack_base);
if (++head == RE_pos_stack_limit)
head = RE_new_pos_stack();
head->prev_offset = 1;
return head;
}
#if 0
static /* inline */ const char *
RE_pos_peek(const RT_POS_ENTRY * head)
{
const RT_POS_ENTRY *prev = head - head->prev_offset;
/* peeking below the bottom node can be useful when debugging,
* so we allow it. See RE_pos_stack_init().
*/
return prev->pos;
}
#endif
static /* inline */ const char *
RE_pos_pop(RT_POS_ENTRY ** head, const RT_STATE * current)
{
RT_POS_ENTRY *prev = *head - (*head)->prev_offset;
if (prev->owner == current - RE_run_stack_base) /* likely */
/* no need to preserve intervening nodes */
*head = prev;
else if (*head == prev)
RE_panic("unbalanced M_SAVE_POS and M_2JC");
else
(*head)->prev_offset += prev->prev_offset;
return prev->pos;
}
#endif /* LOCAL_REGEXP */
#endif /* REXP_H */