Skip to content

Commit 79ce66f

Browse files
authored
Merge pull request #5 from kevinmarks/patch-2
handle images without extensions, don't look for links in zip,pdf
2 parents 0789f77 + 0ffbcce commit 79ce66f

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

spider.js

+12-6
Original file line numberDiff line numberDiff line change
@@ -116,13 +116,15 @@ function process_link(current) {
116116
}
117117

118118
var page_url = url.parse(current);
119-
120-
// Add a slash if the path is not a file (does not end in a slash and does not have a dot)
121-
var components = page_url.path.split("/");
122-
if(!page_url.path.match(/\/$/) && !components[components.length-1].match(/\./)) {
123-
page_url.path += "/";
119+
if(response.headers['content-type'] && response.headers['content-type'].match(/image/)) {
120+
console.log("not making this a directory: "+response.headers['content-type'])
121+
} else {
122+
// Add a slash if the path is not a file (does not end in a slash and does not have a dot)
123+
var components = page_url.path.split("/");
124+
if(!page_url.path.match(/\/$/) && !components[components.length-1].match(/\./)) {
125+
page_url.path += "/";
126+
}
124127
}
125-
126128
// Add "index.html" if the path ends in a slash
127129
if(page_url.path.match(/\/$/)) {
128130
page_url.path += "index.html";
@@ -158,6 +160,10 @@ function process_link(current) {
158160
});
159161
} else if(response.headers['content-type'] && response.headers['content-type'].match(/javascript/)) {
160162

163+
} else if(response.headers['content-type'] && response.headers['content-type'].match(/zip/)) {
164+
165+
} else if(response.headers['content-type'] && response.headers['content-type'].match(/pdf/)) {
166+
161167
} else {
162168
// assume HTML if it's not JS or CSS
163169
var links = $("a");

0 commit comments

Comments
 (0)