Skip to content

Commit eba51b2

Browse files
committed
Test factored out function
1 parent d86d8f4 commit eba51b2

File tree

1 file changed

+26
-2
lines changed

1 file changed

+26
-2
lines changed

src/robot.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,8 @@ http_request_handler!(robots_access_handler, |request: &mut http::Request| {
144144
Some(user_agent) =>
145145
match user_agent.to_str() {
146146
Ok(ua) => {
147-
let mut matcher = DefaultMatcher::default();
148147
ngx_log_debug_http!(request, "matching user agent {} and path {} against robots.txt contents: \n{}", ua, path, co.robots_txt_contents);
149-
let allowed = matcher.one_agent_allowed_by_robots(&co.robots_txt_contents, extract_user_agent(ua), path);
148+
let allowed = allow_access(&co.robots_txt_contents, ua, path);
150149
if allowed {
151150
ngx_log_debug_http!(request, "robots.txt allowed");
152151
core::Status::NGX_DECLINED
@@ -176,6 +175,20 @@ http_request_handler!(robots_access_handler, |request: &mut http::Request| {
176175
}
177176
});
178177

178+
// Determine whether the given user agent is allowed to access the given path according
179+
// to the given content of robots.txt. Access is allowed if and only if true is returned.
180+
fn allow_access(robots_txt_contents : &str, user_agent : &str, path : &str) -> bool {
181+
// Always allow robots.txt to be accessed -- this gives web crawlers the option
182+
// of obeying robots.txt. (Any other files which should always be accessed should
183+
// be allowed via robots.txt.)
184+
if path == ROBOTS_TXT_REQUEST_PATH {
185+
true
186+
} else {
187+
let mut matcher = DefaultMatcher::default();
188+
matcher.one_agent_allowed_by_robots(&robots_txt_contents, extract_user_agent(user_agent), path)
189+
}
190+
}
191+
179192
#[no_mangle]
180193
extern "C" fn ngx_http_robots_commands_set_robots_txt_path(
181194
cf: *mut ngx_conf_t,
@@ -224,4 +237,15 @@ mod test {
224237
assert_eq!("Goo", extract_user_agent("Goo1glebot_2.1"));
225238
assert_eq!("curl", extract_user_agent("curl/8.7.1"));
226239
}
240+
241+
#[test]
242+
fn test_allow_access() {
243+
assert_eq!(true, allow_access("User-agent: Xbot\nDisallow: /", "XBot/3.2.1", "/robots.txt"));
244+
assert_eq!(false, allow_access("User-agent: Xbot\nDisallow: /", "XBot/3.2.1", "/"));
245+
assert_eq!(true, allow_access("User-agent: Xbot\nDisallow: /", "YBot/3.2.1", "/"));
246+
assert_eq!(false, allow_access("User-agent: Xbot\nDisallow: /z", "XBot/3.2.1", "/z"));
247+
assert_eq!(true, allow_access("User-agent: Xbot\nDisallow: /z", "XBot/3.2.1", "/"));
248+
assert_eq!(true, allow_access("User-agent: Xbot\nDisallow: /z", "XBot/3.2.1", "/w"));
249+
assert_eq!(false, allow_access("User-agent: Xbot\nDisallow: /", "XBot", "/"));
250+
}
227251
}

0 commit comments

Comments
 (0)