|
| 1 | +#!/usr/bin/env ruby |
| 2 | +# 获取文件中所有域名对应的子域名,每行一个子域名, |
| 3 | +require 'yaml' |
| 4 | +require 'json' |
| 5 | +require 'erb' |
| 6 | +require 'active_record' |
| 7 | +require 'thinking_sphinx' |
| 8 | + |
| 9 | +@root_path = File.expand_path(File.dirname(__FILE__)) |
| 10 | +require @root_path+"/../app/workers/module/httpmodule.rb" |
| 11 | +require @root_path+"/../app/workers/module/webdb2_class.rb" |
| 12 | + |
| 13 | +require @root_path+"/../app/helpers/search_helper.rb" |
| 14 | +include SearchHelper |
| 15 | +require @root_path+"/../app/models/subdomain.rb" |
| 16 | +require @root_path+"/../app/models/rule.rb" |
| 17 | +require @root_path+"/../app/models/charts.rb" |
| 18 | + |
| 19 | +Dir.chdir @root_path+"/../" |
| 20 | +#puts "working dir: #{Dir.pwd}" |
| 21 | + |
| 22 | +rails_env = ENV['RAILS_ENV'] || 'development' |
| 23 | +thinking_config = YAML::load(File.open(@root_path+"/../config/thinking_sphinx.yml"))[rails_env] |
| 24 | + |
| 25 | +config = YAML::load(File.open(@root_path+"/../config/database.yml"))[rails_env] |
| 26 | +ActiveRecord::Base.establish_connection (config) |
| 27 | + |
| 28 | +USE_THINKING_SPHINX=true |
| 29 | +if USE_THINKING_SPHINX |
| 30 | + ThinkingSphinx::SphinxQL.functions! |
| 31 | + #ThinkingSphinx::Middlewares::DEFAULT.delete ThinkingSphinx::Middlewares::UTF8 |
| 32 | + ThinkingSphinx::Configuration.instance.searchd.address = thinking_config['address'] |
| 33 | + ThinkingSphinx::Configuration.instance.searchd.port = thinking_config['port'] |
| 34 | +else |
| 35 | + @mysql ||= Mysql2::Client.new(:host => thinking_config['address'], |
| 36 | + :username => thinking_config['connection_options']['username'], |
| 37 | + :password => thinking_config['connection_options']['password'], |
| 38 | + :database => thinking_config['connection_options']['database'], |
| 39 | + :port => thinking_config['mysql41'], |
| 40 | + :encoding => 'utf8', :reconnect => true) |
| 41 | +end |
| 42 | + |
| 43 | +def query(query_info, max_id) |
| 44 | + hosts = [] |
| 45 | + match_query = SphinxProcessor.parse(query_info) |
| 46 | + |
| 47 | + options = {:match_mode => :extended, :index => 'subdomain_core', |
| 48 | + :with => {:id => max_id..9999999999}, |
| 49 | + :sql => { :select => 'id,host'}, :per_page => 1000, |
| 50 | + :page => 1, :order => "id asc"} |
| 51 | + ThinkingSphinx.search(match_query, options).each{|r| |
| 52 | + hosts << r.host |
| 53 | + max_id = r.id |
| 54 | + } |
| 55 | + [hosts, max_id.to_i] |
| 56 | +end |
| 57 | + |
| 58 | +def query_all(query_info) |
| 59 | + maxid = 0 |
| 60 | + while 1 |
| 61 | + #puts $maxid |
| 62 | + hosts,maxid = query(query_info, maxid) |
| 63 | + unless hosts.size>0 |
| 64 | + break |
| 65 | + end |
| 66 | + |
| 67 | + hosts.each{|h| |
| 68 | + yield h |
| 69 | + } |
| 70 | + maxid += 1 |
| 71 | + end |
| 72 | +end |
| 73 | + |
| 74 | +require 'domainatrix' |
| 75 | + |
| 76 | +def get_root_of_host(host) |
| 77 | + begin |
| 78 | + url = Domainatrix.parse(host) |
| 79 | + if url.domain && url.public_suffix |
| 80 | + return url.domain+'.'+url.public_suffix |
| 81 | + end |
| 82 | + rescue => e |
| 83 | + return nil |
| 84 | + end |
| 85 | +end |
| 86 | + |
| 87 | +File.open(ARGV[0], 'r').each{|line| |
| 88 | + host=get_root_of_host(line.strip) |
| 89 | + puts "==>"+host |
| 90 | + query_all("host=\"#{host}\""){|h| |
| 91 | + puts h |
| 92 | + } |
| 93 | +} |
0 commit comments