diff --git a/README.md b/README.md index faf99ff..b3fa65f 100644 --- a/README.md +++ b/README.md @@ -86,11 +86,17 @@ Installation on Unix and Unix-like systems is straightforward. Either clone the sudo apt-get install python-dev python-pip python-virtualenv git git clone https://github.com/mlsecproject/combine.git cd combine -virtualenv venv +virtualenv --system-site-packages venv source venv/bin/activate pip install -r requirements.txt ``` +In RHEL/Centos, use the following line instead of apt-get: + +``` +sudo yum install python-dev python-pip python-virtualenv git gcc +``` + At this point you should be ready to run Combine. We also have a [dockerfile](https://github.com/mlsecproject/combine/tree/master/docker) available. @@ -113,6 +119,23 @@ set as `medium` throughout the export by default. Thanks to [@paulpc](https://github.com/paulpc) for implementing this feature and [@mgoffin](https://github.com/mgoffin) for moral support ;). +### Exporting to SiLK + +In order to use the [SiLK](http://tools.netsa.cert.org/silk/silk.html) exporting function a single +configuration item is necessary in the Baler section of the configuraiton file. Make sure you configure +the following entry correctly: + +``` +silk_output_path = /path/to/silk/ipsets +``` + +Each dataset that is collected will be created in the configured directory as a unique ipset file with +the naming convention: + +``` +__.ipset +``` + ### Copyright Info Permission is hereby granted, free of charge, to any person obtaining a copy diff --git a/baler.py b/baler.py index 5dde618..ba49bd9 100755 --- a/baler.py +++ b/baler.py @@ -198,6 +198,69 @@ def bale_CRITs(harvest, filename): (total_iocs, maxThreads, time.time() - start_time)) +def bale_silk(harvest, output_file): + """ Output the data as SiLK IPSet files""" + try: + from silk import IPSet + except: + raise ImportError('Failed to import IPSet from silk package. You probably need to install silk or reconfigure virtualenv with: --system-site-packages') + config = ConfigParser.SafeConfigParser() + cfg_success = config.read('combine.cfg') + if not cfg_success: + logger.error('bale_SiLK: Could not read combine.cfg.\n') + logger.error('HINT: edit combine-example.cfg and save as combine.cfg.\n') + return + + if config.has_option('Baler', 'silk_output_path'): + path = config.get('Baler', 'silk_output_path') + else: + raise 'Please check the combine.cfg file for the silk_output_path field in the [Baler] section' + + ipsets = {} + # Ugly regex to parse out source host and filename + # might be nice to rewrite the URL lists as config files, e.g. + # [source_name] + # url = + # direction = + # confidence = + source_pattern = re.compile('(?:https?|file)://(.*?)/.*?([^/.]*)') + for indicator in harvest: + if indicator[1] == 'IPv4': + # add indicator to IPSet - one per direction, per source + data = {} + data['ip'] = indicator[0] + data['reference'] = indicator[3] + data['direction'] = indicator[2] + # getting the source automatically: + source_match = source_pattern.match(data['reference']) + if source_match: + # Generate an IPSet filename based on source and direction + setname = str(source_match.group(1) + "_" + source_match.group(2) + "_" + data['direction']) + + # define IPSet if not already defined + if setname not in ipsets: + ipsets[setname] = IPSet() + + # add ipv4 address to ipset + ipsets[setname].add(data['ip']) + else: + logger.info("can't determine source from ref (%s) so don't know what to do with: %s[%s]" % (indicator[3], indicator[1], indicator[0])) + elif indicator[1] == "FQDN": + # Not dealing with FQDNs + True + else: + logger.info("don't yet know what to do with: %s[%s]" % (indicator[1], indicator[0])) + + for ipset in ipsets: + outfile = path + "/" + ipset + ".set" + logger.info("saving IPSet to: %s" % outfile) + if os.path.isfile(outfile): + ipsets[ipset].save(outfile + ".tmp") + os.rename(outfile + ".tmp", outfile) + else: + ipsets[ipset].save(outfile) + + def bale(input_file, output_file, output_format, is_regular): config = ConfigParser.SafeConfigParser() cfg_success = config.read('combine.cfg') @@ -212,9 +275,9 @@ def bale(input_file, output_file, output_format, is_regular): # TODO: also need plugins here (cf. #23) if is_regular: - format_funcs = {'csv': bale_reg_csv, 'crits': bale_CRITs} + format_funcs = {'csv': bale_reg_csv, 'crits': bale_CRITs, 'silk': bale_silk} else: - format_funcs = {'csv': bale_enr_csv, 'crits': bale_CRITs} + format_funcs = {'csv': bale_enr_csv, 'crits': bale_CRITs, 'silk': bale_silk} format_funcs[output_format](harvest, output_file) if __name__ == "__main__": diff --git a/combine-example.cfg b/combine-example.cfg index 9cd730c..0006484 100644 --- a/combine-example.cfg +++ b/combine-example.cfg @@ -16,3 +16,4 @@ crits_username = CRITS_USERNAME crits_api_key = CRITS_API_KEY crits_campaign = combine crits_maxThreads = 10 +silk_output_path = /path/to/silk/ipsets diff --git a/combine.py b/combine.py index c246b49..d01593a 100755 --- a/combine.py +++ b/combine.py @@ -15,14 +15,14 @@ logger = get_logger() parser = argparse.ArgumentParser() -parser.add_argument('-t', '--type', help="Specify output type. Currently supported: CSV and exporting to CRITs") +parser.add_argument('-t', '--type', help="Specify output type. Currently supported: CSV, SiLK IPSets and exporting to CRITs") parser.add_argument('-f', '--file', help="Specify output file. Defaults to harvest.FILETYPE") parser.add_argument('-d', '--delete', help="Delete intermediate files", action="store_true") parser.add_argument('-e', '--enrich', help="Enrich data", action="store_true") parser.add_argument('--tiq-test', help="Output in tiq-test format", action="store_true") args = parser.parse_args() -possible_types = ['csv', 'json','crits'] +possible_types = ['csv', 'json','crits', 'silk'] if not args.type: out_type = 'csv'