-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathindex.js
190 lines (167 loc) · 6.61 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
var got = require('got');
var url = require('url');
var stream = require('stream');
var crypto = require('crypto');
var parallel = require('parallel-stream');
module.exports = {};
module.exports.RequestStream = RequestStream;
module.exports.GeneratePath = GeneratePath;
module.exports.SampleStream = SampleStream;
const allowedMethods = ['GET', 'HEAD'];
/**
* decode a path according to cloudfront character encoding spec
* http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html
* @param {string path} - a cloudfront path to decode
*/
function cloudFrontDecode(path) {
var whitelist = ['3C', '3E', '22', '23', '25', '7B', '7D', '7C', '5C', '5E', '7E', '5B', '5D', '60', '27', '20'];
return path.replace(/%([\dA-F]{2})/g, function(match, hex) {
var code = parseInt(hex, 16);
if ((code < 32) || (code > 127) || (whitelist.indexOf(hex) !== -1))
return String.fromCharCode(code);
else
return match;
});
}
/**
* Transform stream for converting a CF log line into a path and querystring.
* Expects a line-oriented stream of CF log lines.
* @param {string} type
* @param {boolean} keepReferer - set to true if using cloudfront logs and you want to include the referer in the request
*/
function GeneratePath(type, keepReferer = false) {
var generatePath = new stream.Transform({ objectMode: true });
generatePath._transform = function(line, enc, callback) {
if (!line) return callback();
if (Buffer.isBuffer(line)) line = line.toString('utf-8');
if (type.toLowerCase() == 'cloudfront') {
var parts = line.split(/\s+/g);
if (parts.length > 7) {
if (parts[11] && parts[11] !== '-') {
var path = cloudFrontDecode(parts[7] + '?' + parts[11]);
} else {
path = cloudFrontDecode(parts[7]);
}
if (!path) return callback();
if (keepReferer && parts[9] && parts[9] !== '-') {
var referer = parts[9];
}
const method = parts[5];
// get Referer & method
if (method && allowedMethods.some((m) => method.includes(m))) {
const obj = { path, method };
if (referer) obj.referer = referer;
generatePath.push(obj);
}
}
} else if (type.toLowerCase() == 'lb') {
if (line.indexOf('Amazon Route 53 Health Check Service') > -1) return callback();
parts = line.split(/\s+/g);
if (parts.length < 12) return callback();
path = parts.length === 18 ? parts[12] : parts[13];
path = url.parse(path).path;
const method = parts.length === 18 ? parts[11] : parts[12];
if (!path) return callback();
// get request method
// usually it is stored as "GET, regex will help remove the non-alphabetical characters
if (method && allowedMethods.some((m) => method.includes(m))) generatePath.push({ path, method: method.match(/[a-zA-Z]+/g)[0] });
}
callback();
};
return generatePath;
}
/**
* Transform stream for replaying requests from a log of paths against a specified
* host. LH side expects a line-oriented stream of paths (& querystrings).
* @param {object} options
* @param {string} options.baseurl - Required. An http or https url prepended to paths when making requests.
* @param {string} options.strictSSL - Optional. If true (default), requires SSL/TLS certificates to be valid
* @param {object} options.headers - Optional. Headers to applied to requests
*/
function RequestStream(options) {
options = options || {};
if (!options.baseurl) throw new Error('options.baseurl should be an http:// or https:// baseurl for replay requests');
if (!options.hwm) options.hwm = 100;
function transform(data, enc, callback) {
if (this._closed) return setImmediate(callback);
var pathname, referer;
const method = data['method'];
referer = data['referer'];
if (referer && typeof referer !== 'string') referer = referer.toString('utf8');
pathname = data['path'];
if (pathname && typeof pathname !== 'string') pathname = pathname.toString('utf8');
if (!pathname || pathname.indexOf('/') !== 0) return callback();
var url = new URL(pathname, options.baseurl);
var gotOptions = {
method: method || 'GET',
prefixUrl: options.baseurl,
https: {
rejectUnauthorized: options.strictSSL === false ? false : true
},
responseType: 'buffer',
time: true,
retry: { limit: 5 },
throwHttpErrors: false
};
if (options.agent) {
if (options.agent.protocol.includes('https')) {
gotOptions.agent = { https: options.agent };
} else {
gotOptions.agent = { http: options.agent };
}
}
if (referer) {
gotOptions.headers = { referer };
}
if(options.headers) {
gotOptions.headers = { ...gotOptions.headers, ...options.headers };
}
got(url, gotOptions)
.then(({ statusCode, body, timings }) => {
this.push({
url: url.toString(),
elapsedTime: timings.phases.total,
statusCode,
body
});
callback();
})
.catch((error) => {
return callback(error);
});
}
var requestStream = parallel.transform(transform, { concurrency: options.hwm, objectMode: true });
requestStream.close = function() {
requestStream._closed = true;
};
return requestStream;
}
/**
* Emit lines at a specified sample rate & with optional filter regex.
* Behaves deterministically so given param/input combinations will return
* reliably identical results.
* @param {object} options
* @param {number} options.sample - Required. Sample rate between 0 and 1.0
* @param {string} options.filter - Optional. Regex pre-filter applied to input.
*/
function SampleStream(options) {
options = options || {};
if (!options.rate) throw new Error('must specify a sample rate (0 < sample < 1)');
if ((parseFloat(options.rate) <= 0) || (parseFloat(options.rate) >= 1)) throw new Error('rate must be between 0 and 1');
var sampleStream = new stream.Transform({ objectMode: true });
sampleStream.count = 0;
sampleStream.threshold = Math.round(parseFloat(options.rate) * Math.pow(2, 16));
if (options.filter) {
sampleStream.filterFunction = new RegExp(options.filter);
}
sampleStream._transform = function(line, enc, callback) {
if (!line) return callback();
if (sampleStream.filterFunction && !sampleStream.filterFunction.test(line)) return callback();
var hash = crypto.createHash('md5').update('cloudfront-log-read-salt-' + sampleStream.count).digest().readUInt16LE(0);
if (hash < sampleStream.threshold)
sampleStream.push(line);
sampleStream.count++;
callback();
};
return sampleStream;
}