From 13d6724bac22d79cd89059ea56890376e1ff1ace Mon Sep 17 00:00:00 2001 From: Blake Anderson Date: Fri, 11 Mar 2016 10:48:00 -0800 Subject: [PATCH] added support for a compact byte distribution. Given a mapping file for byte values 256->16, export a length-16 array of the byte counts --- compact_bd.txt | 256 ++++++++++++++++++++++++++++++++++++++++++++++++ src/config.c | 5 + src/config.h | 1 + src/p2f.c | 43 ++++++++ src/p2f.h | 3 + src/pcap2flow.c | 27 +++++ src/pkt_proc.c | 4 + 7 files changed, 339 insertions(+) create mode 100644 compact_bd.txt diff --git a/compact_bd.txt b/compact_bd.txt new file mode 100644 index 00000000..f7a25ae1 --- /dev/null +++ b/compact_bd.txt @@ -0,0 +1,256 @@ +0 0 +1 12 +2 11 +3 11 +4 12 +5 12 +6 12 +7 2 +8 11 +9 2 +10 1 +11 12 +12 12 +13 1 +14 13 +15 12 +16 12 +17 13 +18 12 +19 12 +20 2 +21 11 +22 12 +23 12 +24 12 +25 2 +26 12 +27 12 +28 2 +29 12 +30 12 +31 2 +32 3 +33 12 +34 11 +35 12 +36 12 +37 13 +38 12 +39 12 +40 11 +41 11 +42 2 +43 11 +44 7 +45 3 +46 11 +47 0 +48 0 +49 4 +50 4 +51 11 +52 0 +53 7 +54 2 +55 2 +56 7 +57 0 +58 11 +59 12 +60 13 +61 7 +62 12 +63 10 +64 2 +65 0 +66 12 +67 4 +68 2 +69 0 +70 12 +71 11 +72 11 +73 2 +74 2 +75 7 +76 11 +77 0 +78 11 +79 11 +80 2 +81 12 +82 11 +83 11 +84 5 +85 10 +86 2 +87 12 +88 12 +89 10 +90 12 +91 12 +92 12 +93 10 +94 12 +95 10 +96 12 +97 3 +98 7 +99 1 +100 5 +101 6 +102 2 +103 11 +104 7 +105 7 +106 11 +107 12 +108 7 +109 0 +110 11 +111 4 +112 0 +113 10 +114 4 +115 7 +116 4 +117 0 +118 7 +119 11 +120 11 +121 11 +122 11 +123 13 +124 12 +125 12 +126 2 +127 12 +128 12 +129 12 +130 2 +131 10 +132 12 +133 12 +134 12 +135 12 +136 12 +137 10 +138 12 +139 2 +140 12 +141 12 +142 13 +143 12 +144 12 +145 13 +146 9 +147 9 +148 12 +149 13 +150 10 +151 10 +152 12 +153 13 +154 12 +155 12 +156 12 +157 12 +158 13 +159 9 +160 12 +161 12 +162 7 +163 12 +164 12 +165 12 +166 12 +167 13 +168 11 +169 12 +170 13 +171 9 +172 12 +173 12 +174 9 +175 9 +176 12 +177 12 +178 11 +179 13 +180 12 +181 9 +182 11 +183 13 +184 7 +185 10 +186 10 +187 2 +188 12 +189 12 +190 10 +191 13 +192 12 +193 9 +194 7 +195 13 +196 13 +197 9 +198 9 +199 9 +200 13 +201 8 +202 11 +203 8 +204 11 +205 13 +206 10 +207 11 +208 9 +209 2 +210 2 +211 10 +212 13 +213 2 +214 11 +215 2 +216 13 +217 9 +218 13 +219 12 +220 12 +221 10 +222 13 +223 13 +224 10 +225 12 +226 9 +227 10 +228 2 +229 2 +230 13 +231 9 +232 13 +233 8 +234 9 +235 9 +236 10 +237 10 +238 2 +239 11 +240 10 +241 13 +242 10 +243 12 +244 10 +245 10 +246 2 +247 13 +248 11 +249 12 +250 12 +251 13 +252 2 +253 2 +254 14 +255 15 diff --git a/src/config.c b/src/config.c index 880348f8..fce585ab 100644 --- a/src/config.c +++ b/src/config.c @@ -172,6 +172,9 @@ int config_parse_command(struct configuration *config, } else if (match(command, "dist")) { parse_check(parse_bool(&config->byte_distribution, arg, num)); + } else if (match(command, "cdist")) { + parse_check(parse_string(&config->compact_byte_distribution, arg, num)); + } else if (match(command, "entropy")) { parse_check(parse_bool(&config->report_entropy, arg, num)); @@ -388,6 +391,7 @@ void config_print(FILE *f, const struct configuration *c) { fprintf(f, "type = %u\n", c->type); fprintf(f, "zeros = %u\n", c->include_zeroes); fprintf(f, "dist = %u\n", c->byte_distribution); + fprintf(f, "cdist = %s\n", val(c->compact_byte_distribution)); fprintf(f, "entropy = %u\n", c->report_entropy); fprintf(f, "wht = %u\n", c->report_wht); fprintf(f, "hd = %u\n", c->report_hd); @@ -428,6 +432,7 @@ void config_print_json(FILE *f, const struct configuration *c) { fprintf(f, "\t\"type\": %u,\n", c->type); fprintf(f, "\t\"zeros\": %u,\n", c->include_zeroes); fprintf(f, "\t\"dist\": %u,\n", c->byte_distribution); + fprintf(f, "\t\"cdist\": \"%s\",\n", val(c->compact_byte_distribution)); fprintf(f, "\t\"entropy\": %u,\n", c->report_entropy); fprintf(f, "\t\"wht\": %u,\n", c->report_wht); fprintf(f, "\t\"hd\": %u,\n", c->report_hd); diff --git a/src/config.h b/src/config.h index b7989e38..1e8fe912 100644 --- a/src/config.h +++ b/src/config.h @@ -66,6 +66,7 @@ struct configuration { unsigned int output_level; unsigned int nfv9_capture_port; unsigned int flow_key_match_method; + char *compact_byte_distribution; char *interface; char *filename; /* output file, if not NULL */ char *outputdir; /* directory to write output files */ diff --git a/src/p2f.c b/src/p2f.c index d9c89fc4..4ed5b980 100644 --- a/src/p2f.c +++ b/src/p2f.c @@ -163,6 +163,8 @@ unsigned int include_zeroes = 0; unsigned int byte_distribution = 0; +char *compact_byte_distribution = 0; + unsigned int report_entropy = 0; unsigned int report_wht = 0; @@ -185,6 +187,8 @@ FILE *info = NULL; unsigned int records_in_file = 0; +unsigned short compact_bd_mapping[16]; + /* * config is the global configuration */ @@ -391,6 +395,7 @@ void flow_record_init(/* @out@ */ struct flow_record *record, timer_clear(&record->end); record->last_pkt_len = 0; memset(record->byte_count, 0, sizeof(record->byte_count)); + memset(record->compact_byte_count, 0, sizeof(record->compact_byte_count)); memset(record->pkt_len, 0, sizeof(record->pkt_len)); memset(record->pkt_time, 0, sizeof(record->pkt_time)); memset(record->pkt_flags, 0, sizeof(record->pkt_flags)); @@ -806,6 +811,17 @@ void flow_record_update_byte_count(struct flow_record *f, const void *x, unsigne } +void flow_record_update_compact_byte_count(struct flow_record *f, const void *x, unsigned int len) { + const unsigned char *data = x; + int i; + + if (compact_byte_distribution) { + for (i=0; icompact_byte_count[compact_bd_mapping[data[i]]]++; + } + } +} + void flow_record_update_byte_dist_mean_var(struct flow_record *f, const void *x, unsigned int len) { const unsigned char *data = x; double delta; @@ -898,6 +914,15 @@ void flow_record_print(const struct flow_record *record) { fprintf(output, "%u ]\n", record->byte_count[i]); } } + if (compact_byte_distribution) { + if (record->ob != 0) { + fprintf(output, "\tcompact_bd: [ "); + for (i = 0; i < 15; i++) { + fprintf(output, "%u, ", record->compact_byte_count[i]); + } + fprintf(output, "%u ]\n", record->compact_byte_count[i]); + } + } if (report_entropy) { if (record->ob != 0) { fprintf(output, "\tbe: %f\n", @@ -1271,7 +1296,9 @@ void flow_record_print_json(const struct flow_record *record) { if (byte_distribution || report_entropy) { const unsigned int *array; + const unsigned int *compact_array; unsigned int tmp[256]; + unsigned int compact_tmp[16]; unsigned int num_bytes; double mean = 0.0, variance = 0.0; @@ -1281,6 +1308,7 @@ void flow_record_print_json(const struct flow_record *record) { */ if (rec->twin == NULL) { array = rec->byte_count; + compact_array = rec->compact_byte_count; num_bytes = rec->ob; if (rec->num_bytes != 0) { @@ -1295,7 +1323,11 @@ void flow_record_print_json(const struct flow_record *record) { for (i=0; i<256; i++) { tmp[i] = rec->byte_count[i] + rec->twin->byte_count[i]; } + for (i=0; i<16; i++) { + compact_tmp[i] = rec->compact_byte_count[i] + rec->twin->compact_byte_count[i]; + } array = tmp; + compact_array = compact_tmp; num_bytes = rec->ob + rec->twin->ob; if (rec->num_bytes + rec->twin->num_bytes != 0) { @@ -1329,6 +1361,17 @@ void flow_record_print_json(const struct flow_record *record) { } + if (compact_byte_distribution) { + fprintf(output, ",\n\t\t\t\"compact_bd\": [ "); + for (i = 0; i < 15; i++) { + if ((i % 16) == 0) { + fprintf(output, "\n\t\t\t "); + } + fprintf(output, "%3u, ", compact_array[i]); + } + fprintf(output, "%3u\n\t\t\t]", compact_array[i]); + } + if (report_entropy) { if (num_bytes != 0) { double entropy = flow_record_get_byte_count_entropy(array, num_bytes); diff --git a/src/p2f.h b/src/p2f.h index 3029b664..0ea0e54c 100644 --- a/src/p2f.h +++ b/src/p2f.h @@ -92,6 +92,7 @@ struct flow_record { struct timeval pkt_time[MAX_NUM_PKT_LEN]; /* array of arrival times */ unsigned char pkt_flags[MAX_NUM_PKT_LEN]; /* array of packet flags */ unsigned int byte_count[256]; /* number of occurences of each byte */ + unsigned int compact_byte_count[16]; /* number of occurences of each byte, mapping to compact form */ unsigned long int num_bytes; double bd_mean; double bd_variance; @@ -206,6 +207,8 @@ void flow_record_print_json(const struct flow_record *record); void flow_record_update_byte_count(struct flow_record *f, const void *x, unsigned int len); +void flow_record_update_compact_byte_count(struct flow_record *f, const void *x, unsigned int len); + void flow_record_update_byte_dist_mean_var(struct flow_record *f, const void *x, unsigned int len); void flow_record_delete(struct flow_record *r); diff --git a/src/pcap2flow.c b/src/pcap2flow.c index 492f7e55..8f47da87 100644 --- a/src/pcap2flow.c +++ b/src/pcap2flow.c @@ -95,6 +95,8 @@ extern struct timeval active_timeout; extern unsigned int active_max; +extern unsigned short compact_bd_mapping[16]; + /* configuration state */ extern unsigned int bidir; @@ -103,6 +105,8 @@ extern unsigned int include_zeroes; extern unsigned int byte_distribution; +extern char *compact_byte_distribution; + extern unsigned int report_entropy; extern unsigned int report_wht; @@ -304,6 +308,7 @@ int usage(char *s) { " zeros=1 include zero-length data (e.g. ACKs) in packet list\n" " bidir=1 merge unidirectional flows into bidirectional ones\n" " dist=1 include byte distribution array\n" + " cdist=F include compact byte distribution array using the mapping file, F\n" " entropy=1 include byte entropy\n" " tls=1 include TLS data (ciphersuites, record lengths and times, ...)\n" " exe=1 include information about host process associated with flow\n" @@ -442,6 +447,7 @@ int main(int argc, char **argv) { bidir = config.bidir; include_zeroes = config.include_zeroes; byte_distribution = config.byte_distribution; + compact_byte_distribution = config.compact_byte_distribution; report_entropy = config.report_entropy; report_wht = config.report_wht; report_hd = config.report_hd; @@ -543,6 +549,27 @@ int main(int argc, char **argv) { } } + if (config.compact_byte_distribution) { + FILE *fp; + int count = 0; + unsigned short b_value, map_b_value; + + memset(compact_bd_mapping, 0, sizeof(compact_bd_mapping)); + + + fp = fopen(config.compact_byte_distribution,"r"); + if (fp != NULL) { + while (fscanf(fp, "%hu\t%hu", &b_value, &map_b_value) != EOF) { + compact_bd_mapping[b_value] = map_b_value; + count++; + if (count >= 256) { + break; + } + } + fclose(fp); + } + } + /* * configure labeled subnets (which uses a radix trie to identify * addresses that match subnets associated with labels) diff --git a/src/pkt_proc.c b/src/pkt_proc.c index 119ef675..ed6faf37 100644 --- a/src/pkt_proc.c +++ b/src/pkt_proc.c @@ -576,6 +576,7 @@ process_tcp(const struct pcap_pkthdr *h, const void *tcp_start, int tcp_len, str record->ob += payload_len; flow_record_update_byte_count(record, payload, payload_len); + flow_record_update_compact_byte_count(record, payload, payload_len); flow_record_update_byte_dist_mean_var(record, payload, payload_len); wht_update(&record->wht, payload, payload_len, report_wht); @@ -657,6 +658,7 @@ process_udp(const struct pcap_pkthdr *h, const void *udp_start, int udp_len, str record->ob += size_payload; flow_record_update_byte_count(record, payload, size_payload); + flow_record_update_compact_byte_count(record, payload, size_payload); flow_record_update_byte_dist_mean_var(record, payload, size_payload); wht_update(&record->wht, payload, size_payload, report_wht); @@ -726,6 +728,7 @@ process_icmp(const struct pcap_pkthdr *h, const void *start, int len, struct flo record->ob += size_payload; flow_record_update_byte_count(record, payload, size_payload); + flow_record_update_compact_byte_count(record, payload, size_payload); flow_record_update_byte_dist_mean_var(record, payload, size_payload); wht_update(&record->wht, payload, size_payload, report_wht); @@ -775,6 +778,7 @@ process_ip(const struct pcap_pkthdr *h, const void *ip_start, int ip_len, struct record->ob += size_payload; flow_record_update_byte_count(record, payload, size_payload); + flow_record_update_compact_byte_count(record, payload, size_payload); flow_record_update_byte_dist_mean_var(record, payload, size_payload); wht_update(&record->wht, payload, size_payload, report_wht);