@@ -144,9 +144,8 @@ http_request_handler!(robots_access_handler, |request: &mut http::Request| {
144
144
Some ( user_agent) =>
145
145
match user_agent. to_str( ) {
146
146
Ok ( ua) => {
147
- let mut matcher = DefaultMatcher :: default ( ) ;
148
147
ngx_log_debug_http!( request, "matching user agent {} and path {} against robots.txt contents: \n {}" , ua, path, co. robots_txt_contents) ;
149
- let allowed = matcher . one_agent_allowed_by_robots ( & co. robots_txt_contents, extract_user_agent ( ua ) , path) ;
148
+ let allowed = allow_access ( & co. robots_txt_contents, ua , path) ;
150
149
if allowed {
151
150
ngx_log_debug_http!( request, "robots.txt allowed" ) ;
152
151
core:: Status :: NGX_DECLINED
@@ -176,6 +175,20 @@ http_request_handler!(robots_access_handler, |request: &mut http::Request| {
176
175
}
177
176
} ) ;
178
177
178
+ // Determine whether the given user agent is allowed to access the given path according
179
+ // to the given content of robots.txt. Access is allowed if and only if true is returned.
180
+ fn allow_access ( robots_txt_contents : & str , user_agent : & str , path : & str ) -> bool {
181
+ // Always allow robots.txt to be accessed -- this gives web crawlers the option
182
+ // of obeying robots.txt. (Any other files which should always be accessed should
183
+ // be allowed via robots.txt.)
184
+ if path == ROBOTS_TXT_REQUEST_PATH {
185
+ true
186
+ } else {
187
+ let mut matcher = DefaultMatcher :: default ( ) ;
188
+ matcher. one_agent_allowed_by_robots ( & robots_txt_contents, extract_user_agent ( user_agent) , path)
189
+ }
190
+ }
191
+
179
192
#[ no_mangle]
180
193
extern "C" fn ngx_http_robots_commands_set_robots_txt_path (
181
194
cf : * mut ngx_conf_t ,
@@ -224,4 +237,15 @@ mod test {
224
237
assert_eq ! ( "Goo" , extract_user_agent( "Goo1glebot_2.1" ) ) ;
225
238
assert_eq ! ( "curl" , extract_user_agent( "curl/8.7.1" ) ) ;
226
239
}
240
+
241
+ #[ test]
242
+ fn test_allow_access ( ) {
243
+ assert_eq ! ( true , allow_access( "User-agent: Xbot\n Disallow: /" , "XBot/3.2.1" , "/robots.txt" ) ) ;
244
+ assert_eq ! ( false , allow_access( "User-agent: Xbot\n Disallow: /" , "XBot/3.2.1" , "/" ) ) ;
245
+ assert_eq ! ( true , allow_access( "User-agent: Xbot\n Disallow: /" , "YBot/3.2.1" , "/" ) ) ;
246
+ assert_eq ! ( false , allow_access( "User-agent: Xbot\n Disallow: /z" , "XBot/3.2.1" , "/z" ) ) ;
247
+ assert_eq ! ( true , allow_access( "User-agent: Xbot\n Disallow: /z" , "XBot/3.2.1" , "/" ) ) ;
248
+ assert_eq ! ( true , allow_access( "User-agent: Xbot\n Disallow: /z" , "XBot/3.2.1" , "/w" ) ) ;
249
+ assert_eq ! ( false , allow_access( "User-agent: Xbot\n Disallow: /" , "XBot" , "/" ) ) ;
250
+ }
227
251
}
0 commit comments