-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlex-io.cpp
254 lines (202 loc) · 5.62 KB
/
lex-io.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
/* PET
* Platform for Experimentation with efficient HPSG processing Techniques
* (C) 1999 - 2002 Ulrich Callmeier [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* fast memory mapped I/O for lexer - implemented with high lexer
throughput in mind */
/* functionality provided:
- stack of input files to handle include files transparently
- efficient arbitrary lookahead, efficient buffer access via mark()
*/
/* for unix - essentially we just mmap(2) the whole file,
which gives both good performance (minimizes copying) and easy use
for windows - read the whole file
*/
#include "pet-config.h"
#include "lex-io.h"
#include "errors.h"
#include "options.h"
#include "logging.h"
#include <cassert>
#include <cstring>
#include <cstdlib>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#ifdef HAVE_MMAP
#include <sys/mman.h>
#endif
using std::string;
static lex_file file_stack[MAX_LEX_NEST];
static int file_nest = 0;
lex_file *CURR;
int total_lexed_lines = 0;
struct lex_location *new_location(const char *fname, int linenr, int colnr)
{
struct lex_location *loc = (struct lex_location *) malloc(sizeof(struct lex_location));
loc->fname = fname;
loc->linenr = linenr;
loc->colnr = colnr;
return loc;
}
void push_file(const string &fname, const char *info) {
lex_file f;
struct stat statbuf;
if(file_nest >= MAX_LEX_NEST)
throw tError(string("too many nested includes (in ")
+ fname + ") - giving up");
#ifndef WINDOWS
f.fd = open(fname.c_str(), O_RDONLY);
#else
f.fd = open(fname.c_str(), O_RDONLY | O_BINARY);
#endif
if(f.fd < 0)
throw tError("error opening `" + fname + "': " + string(strerror(errno)));
if(fstat(f.fd, &statbuf) < 0)
throw tError("couldn't fstat `" + fname + "': " + string(strerror(errno)));
f.len = statbuf.st_size;
#ifdef HAVE_MMAP
f.buff = (char *) mmap(0, f.len, PROT_READ, MAP_SHARED, f.fd, 0);
if(f.buff == (caddr_t) -1)
throw tError("couldn't mmap `" + fname + "': " + string(strerror(errno)));
#else
f.buff = (char *) malloc(f.len + 1);
if(f.buff == 0)
throw tError("couldn't malloc for `" + fname + "': "
+ string(strerror(errno)));
if((size_t) read(f.fd,f.buff,f.len) != f.len)
throw tError("couldn't read from `" + fname + "': "
+ string(strerror(errno)));
f.buff[f.len] = '\0';
#endif
f.fname = strdup(fname.c_str());
f.pos = 0;
f.linenr = 1; f.colnr = 1;
f.info = (info != NULL ? strdup(info) : NULL);
file_stack[file_nest++] = f;
CURR = &(file_stack[file_nest-1]);
}
void push_string(const string &input, const char *info) {
lex_file f;
if(file_nest >= MAX_LEX_NEST)
throw tError("too many nested includes (in string) - giving up");
f.buff = strdup(input.c_str());
if(f.buff == 0)
throw tError("couldn't strdup for string include: "
+ string(strerror(errno)));
f.len = strlen(f.buff);
f.fname = NULL;
f.fd = -1;
f.pos = 0;
f.linenr = 1; f.colnr = 1;
f.info = (info != NULL ? strdup(info) : NULL);
file_stack[file_nest++] = f;
CURR = &(file_stack[file_nest-1]);
} // push_string()
int pop_file() {
lex_file f;
if(file_nest <= 0) return 0;
f = file_stack[--file_nest];
if(file_nest > 0)
CURR = &(file_stack[file_nest-1]);
else
CURR = NULL;
#ifdef HAVE_MMAP
if(f.fname) {
if(munmap(f.buff, f.len) != 0)
throw tError("couldn't munmap `" + string(f.fname)
+ "': " + string(strerror(errno)));
} // if
else {
//
// even when mmap() is in use, includes from strings were directly copied
// into the input buffer.
//
free(f.buff);
} // else
#else
free(f.buff);
#endif
if(f.fname) {
if(close(f.fd) != 0)
throw tError("couldn't close from `" + string(f.fname)
+ "': " + string(strerror(errno)));
} // if
return 1;
}
int curr_line()
{
assert(file_nest > 0);
return CURR->linenr;
}
int curr_col()
{
assert(file_nest > 0);
return CURR->colnr;
}
char *curr_fname()
{
assert(file_nest > 0);
return CURR->fname;
}
char *last_info = 0;
int LConsume(int n)
// consume lexical input
{
int i;
assert(n >= 0);
if(CURR->pos + n > CURR->len)
{
LOG(logSyntax, ERROR, "nothing to consume...");
return 0;
}
if(CURR->info)
{
{
if(last_info != CURR->info) {
LOG(logApplC, INFO,
CURR->info << " `" << CURR->fname << "'... ");
}
else {
LOG(logApplC, INFO, "`" << CURR->fname << "'... ");
}
}
last_info = CURR->info;
CURR->info = NULL;
}
for(i = 0; i < n; ++i)
{
CURR->colnr++;
if(CURR->buff[CURR->pos + i] == '\n')
{
CURR->colnr = 1;
CURR->linenr++;
total_lexed_lines ++;
}
}
CURR->pos += n;
return 1;
}
char *LMark()
{
if(CURR->pos >= CURR->len)
{
return NULL;
}
return CURR->buff + CURR->pos;
}