From 57eae0f967197cbf6f7195031c28d7740efbdfab Mon Sep 17 00:00:00 2001 From: chb2mn Date: Thu, 9 Jul 2015 09:55:52 -0400 Subject: [PATCH 1/4] closes issues #83 --- pythonwhois/parse.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 66217d6..f33e0e6 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -513,7 +513,10 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ # SIDN isn't very standard either. And EURid uses a similar format. match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) if match is not None: - data["registrar"].insert(0, match.group(1).strip()) + try: + data["registrar"].insert(0, match.group(1).strip()) + except KeyError as e: + data["registrar"] = [match.group(1).strip()] match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) if match is not None: chunk = match.group(1) From c9c209d7972f16ffe253227a283e3e8f3ba877c6 Mon Sep 17 00:00:00 2001 From: chb2mn Date: Thu, 9 Jul 2015 11:25:27 -0400 Subject: [PATCH 2/4] closes #53 by ignoring lines with 0's for the date --- pwhois | 2 +- pythonwhois/net.py | 2 +- pythonwhois/parse.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pwhois b/pwhois index 09563f4..af631f3 100755 --- a/pwhois +++ b/pwhois @@ -35,7 +35,7 @@ else: parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) if args.json == True: - print(json.dumps(parsed, default=json_fallback)) + print(json.dumps(parsed, default=json_fallback)) else: data_map = OrderedDict({}) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 53f87ba..888657f 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -91,4 +91,4 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return buff.decode("utf-8") + return buff.decode("utf-8") diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index f33e0e6..0cf22b3 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -806,7 +806,7 @@ def parse_dates(dates): second = 0 print(e.message) # FIXME: This should have proper logging of some sort...? try: - if year > 0: + if year > 0 and (year == 2000 and month > 0): try: parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second)) except ValueError as e: From 75f6820d47db54a33c551042f47ee09b265d74e1 Mon Sep 17 00:00:00 2001 From: chb2mn Date: Thu, 9 Jul 2015 12:33:58 -0400 Subject: [PATCH 3/4] closes #90 --- pythonwhois/parse.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 0cf22b3..4946b3c 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -437,7 +437,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil for segment in raw_data: - for rule_key, rule_regexes in grammar['_data'].items(): + for rule_key, rule_regexes in grammar['_data'].items(): if (rule_key in data) == False: for line in segment.splitlines(): for regex in rule_regexes: @@ -455,10 +455,10 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ match = re.search("^\s?Name\s?[Ss]ervers:?\s*\n((?:\s*.+\n)+?\s?)\n", segment, re.MULTILINE) if match is not None: chunk = match.group(1) - for match in re.findall("[ ]*(.+)\n", chunk): + for match in re.findall("[ ]*(.+)\n", chunk): if match.strip() != "": if not re.match("^[a-zA-Z]+:", match): - try: + try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] @@ -519,10 +519,14 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ data["registrar"] = [match.group(1).strip()] match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) if match is not None: - chunk = match.group(1) + chunk = match.group(1) for match in re.findall("\s+?(.+)\n", chunk): - match = match.split()[0] - # Prevent nameserver aliases from being picked up. + try: + match = match.split()[0] + #prevents a crash in the case that chunk contains a blank string entry + except IndexError: + match = "[]" + # Prevent nameserver aliases from being picked up. if not match.startswith("[") and not match.endswith("]"): try: data["nameservers"].append(match.strip()) From 5ca86504d2a2ff26ded9d8cf236391bd7bdd962d Mon Sep 17 00:00:00 2001 From: chb2mn Date: Thu, 9 Jul 2015 14:51:05 -0400 Subject: [PATCH 4/4] changing spaces to tabs --- pwhois | 2 +- pythonwhois/net.py | 2 +- pythonwhois/parse.py | 28 ++++++++++++++-------------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pwhois b/pwhois index af631f3..09563f4 100755 --- a/pwhois +++ b/pwhois @@ -35,7 +35,7 @@ else: parsed = pythonwhois.parse.parse_raw_whois(data, normalized=True) if args.json == True: - print(json.dumps(parsed, default=json_fallback)) + print(json.dumps(parsed, default=json_fallback)) else: data_map = OrderedDict({}) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 888657f..53f87ba 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -91,4 +91,4 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return buff.decode("utf-8") + return buff.decode("utf-8") diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 4946b3c..c97286b 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -437,7 +437,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil for segment in raw_data: - for rule_key, rule_regexes in grammar['_data'].items(): + for rule_key, rule_regexes in grammar['_data'].items(): if (rule_key in data) == False: for line in segment.splitlines(): for regex in rule_regexes: @@ -455,10 +455,10 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ match = re.search("^\s?Name\s?[Ss]ervers:?\s*\n((?:\s*.+\n)+?\s?)\n", segment, re.MULTILINE) if match is not None: chunk = match.group(1) - for match in re.findall("[ ]*(.+)\n", chunk): + for match in re.findall("[ ]*(.+)\n", chunk): if match.strip() != "": if not re.match("^[a-zA-Z]+:", match): - try: + try: data["nameservers"].append(match.strip()) except KeyError as e: data["nameservers"] = [match.strip()] @@ -513,20 +513,20 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_ # SIDN isn't very standard either. And EURid uses a similar format. match = re.search("Registrar:\n\s+(?:Name:\s*)?(\S.*)", segment) if match is not None: - try: - data["registrar"].insert(0, match.group(1).strip()) - except KeyError as e: - data["registrar"] = [match.group(1).strip()] + try: + data["registrar"].insert(0, match.group(1).strip()) + except KeyError as e: + data["registrar"] = [match.group(1).strip()] match = re.search("(?:Domain nameservers|Name servers):([\s\S]*?\n)\n", segment) if match is not None: - chunk = match.group(1) + chunk = match.group(1) for match in re.findall("\s+?(.+)\n", chunk): - try: - match = match.split()[0] - #prevents a crash in the case that chunk contains a blank string entry - except IndexError: - match = "[]" - # Prevent nameserver aliases from being picked up. + try: + match = match.split()[0] + #prevents a crash in the case that chunk contains a blank string entry + except IndexError: + match = "[]" + #Prevent nameserver aliases from being picked up. if not match.startswith("[") and not match.endswith("]"): try: data["nameservers"].append(match.strip())