Skip to content

Commit 01e059f

Browse files
committed
add dump_all_hosts_of_file
1 parent 285ae95 commit 01e059f

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

tools/dump_all_hosts_of_file.rb

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env ruby
2+
# 获取文件中所有域名对应的子域名,每行一个子域名,
3+
require 'yaml'
4+
require 'json'
5+
require 'erb'
6+
require 'active_record'
7+
require 'thinking_sphinx'
8+
9+
@root_path = File.expand_path(File.dirname(__FILE__))
10+
require @root_path+"/../app/workers/module/httpmodule.rb"
11+
require @root_path+"/../app/workers/module/webdb2_class.rb"
12+
13+
require @root_path+"/../app/helpers/search_helper.rb"
14+
include SearchHelper
15+
require @root_path+"/../app/models/subdomain.rb"
16+
require @root_path+"/../app/models/rule.rb"
17+
require @root_path+"/../app/models/charts.rb"
18+
19+
Dir.chdir @root_path+"/../"
20+
#puts "working dir: #{Dir.pwd}"
21+
22+
rails_env = ENV['RAILS_ENV'] || 'development'
23+
thinking_config = YAML::load(File.open(@root_path+"/../config/thinking_sphinx.yml"))[rails_env]
24+
25+
config = YAML::load(File.open(@root_path+"/../config/database.yml"))[rails_env]
26+
ActiveRecord::Base.establish_connection (config)
27+
28+
USE_THINKING_SPHINX=true
29+
if USE_THINKING_SPHINX
30+
ThinkingSphinx::SphinxQL.functions!
31+
#ThinkingSphinx::Middlewares::DEFAULT.delete ThinkingSphinx::Middlewares::UTF8
32+
ThinkingSphinx::Configuration.instance.searchd.address = thinking_config['address']
33+
ThinkingSphinx::Configuration.instance.searchd.port = thinking_config['port']
34+
else
35+
@mysql ||= Mysql2::Client.new(:host => thinking_config['address'],
36+
:username => thinking_config['connection_options']['username'],
37+
:password => thinking_config['connection_options']['password'],
38+
:database => thinking_config['connection_options']['database'],
39+
:port => thinking_config['mysql41'],
40+
:encoding => 'utf8', :reconnect => true)
41+
end
42+
43+
def query(query_info, max_id)
44+
hosts = []
45+
match_query = SphinxProcessor.parse(query_info)
46+
47+
options = {:match_mode => :extended, :index => 'subdomain_core',
48+
:with => {:id => max_id..9999999999},
49+
:sql => { :select => 'id,host'}, :per_page => 1000,
50+
:page => 1, :order => "id asc"}
51+
ThinkingSphinx.search(match_query, options).each{|r|
52+
hosts << r.host
53+
max_id = r.id
54+
}
55+
[hosts, max_id.to_i]
56+
end
57+
58+
def query_all(query_info)
59+
maxid = 0
60+
while 1
61+
#puts $maxid
62+
hosts,maxid = query(query_info, maxid)
63+
unless hosts.size>0
64+
break
65+
end
66+
67+
hosts.each{|h|
68+
yield h
69+
}
70+
maxid += 1
71+
end
72+
end
73+
74+
require 'domainatrix'
75+
76+
def get_root_of_host(host)
77+
begin
78+
url = Domainatrix.parse(host)
79+
if url.domain && url.public_suffix
80+
return url.domain+'.'+url.public_suffix
81+
end
82+
rescue => e
83+
return nil
84+
end
85+
end
86+
87+
File.open(ARGV[0], 'r').each{|line|
88+
host=get_root_of_host(line.strip)
89+
puts "==>"+host
90+
query_all("host=\"#{host}\""){|h|
91+
puts h
92+
}
93+
}

0 commit comments

Comments
 (0)