From 89b4903d514a9f01456a17376a4d8bb9a7602ea5 Mon Sep 17 00:00:00 2001
From: jogeo <jgeorge@redhat.com>
Date: Wed, 15 Sep 2021 15:52:09 -0700
Subject: [PATCH] Add issue_finder module.

---
 issue_finder.py              |  62 +++++++++++++++++
 issues/issue_OCPQE_5204.yaml |   4 ++
 issues/issue_OCPQE_5743.yaml |   5 ++
 issues/issues.py             |  51 ++++++++++++++
 parse_ci_monitor_json.py     | 127 +++++++++++++++++++++++++----------
 5 files changed, 212 insertions(+), 37 deletions(-)
 create mode 100755 issue_finder.py
 create mode 100644 issues/issue_OCPQE_5204.yaml
 create mode 100644 issues/issue_OCPQE_5743.yaml
 create mode 100755 issues/issues.py

diff --git a/issue_finder.py b/issue_finder.py
new file mode 100755
index 0000000..1e1f561
--- /dev/null
+++ b/issue_finder.py
@@ -0,0 +1,62 @@
+from fnmatch import fnmatch
+from http.client import IncompleteRead
+import os
+import pathlib
+from urllib.error import HTTPError
+import urllib.request
+import yaml
+
+from issues.issues import Issues
+
+
+class IssuesFinder:
+    """Known OCP CI issues."""
+
+    def __init__(self):
+        self.issues_found = []
+        self.issues = Issues()
+        self.issue_yamls = self.read_issue_yaml()
+
+    def read_issue_yaml(self):
+        """Return a list of objects read in from issue yaml defs."""
+        issue_yamls = []
+        script_dir_path = pathlib.Path(__file__).parent.resolve()
+        for (dirpath, dirnames, filenames) in os.walk(
+            os.path.join(script_dir_path, "issues")
+        ):
+            for name in filenames:
+                if fnmatch(name, "issue_*.yaml"):
+                    yaml_path = os.path.join(dirpath, name)
+                    content = yaml.load(open(yaml_path), Loader=yaml.SafeLoader)
+                    issue_yamls.append(content)
+        return issue_yamls
+
+    def find_issues(self, logs):
+        """Returns a list of known issues found in the test logs."""
+        if logs is not None:
+            log_text = self.get_file_from_url(logs)
+            if log_text is not None:
+                for issue_def in self.issue_yamls:
+                    # This could take awhile.
+                    # Let the user know something is happening
+                    print(" .")
+                    if self.issues.search(log_text, issue_def):
+                        self.issues_found.append(issue_def["description"])
+        return self.issues_found
+
+    def get_file_from_url(self, url: str):
+        "Return file content downloaded from url."
+        # Try three times to help with intermittent connection issues.
+        for i in range(3):
+            content = None
+            try:
+                content = urllib.request.urlopen(url).read().decode("utf8")
+            except IncompleteRead:
+                print(f"Caught IncompleteRead in iteration {i}.")
+                continue
+            except HTTPError:
+                print(f"Skipping download due to HTTPError: {url}")
+                break
+            else:
+                break
+        return content
diff --git a/issues/issue_OCPQE_5204.yaml b/issues/issue_OCPQE_5204.yaml
new file mode 100644
index 0000000..365a403
--- /dev/null
+++ b/issues/issue_OCPQE_5204.yaml
@@ -0,0 +1,4 @@
+description: 'https://issues.redhat.com/browse/OCPQE-5204'
+match_ordered_strings:
+- 'Scenario: Etcd basic verification ==='
+- 'RuntimeError: See log, waiting for labeled pods futile: k8s-app=etcd'
diff --git a/issues/issue_OCPQE_5743.yaml b/issues/issue_OCPQE_5743.yaml
new file mode 100644
index 0000000..4c370d2
--- /dev/null
+++ b/issues/issue_OCPQE_5743.yaml
@@ -0,0 +1,5 @@
+description: 'https://issues.redhat.com/browse/OCPQE-5743'
+match_ordered_strings:
+- '=== After Scenario:'
+- 'the server is currently unable to handle the request \(get projects.project.openshift.io\)'
+- 'RuntimeError: error getting projects by user'
diff --git a/issues/issues.py b/issues/issues.py
new file mode 100755
index 0000000..7d11f9b
--- /dev/null
+++ b/issues/issues.py
@@ -0,0 +1,51 @@
+import re
+
+
+class Issues:
+    """Issues definitions."""
+
+    def match_ordered_strings(self, log: str, targets: list):
+        """Returns True if all the regex strings in targets are found in order."""
+        found_all = set()
+        last_match_line = 0
+        for target in targets:
+            line_count = 0
+            found = False
+            for line in log.splitlines():
+                if line_count < last_match_line:
+                    continue
+                line_count += 1
+                match = re.search(target, line)
+                if match:
+                    found = True
+                    break
+            found_all.add(found)
+        return all(found_all)
+
+    def get_method_refs(self, names):
+        """Return a dict of callable method refs, keyed by name in names."""
+        refs = dict()
+        for name in names:
+            if name == "description":
+                continue
+            refs[name] = getattr(self, name)
+        return refs
+
+    def search(self, log_text, issue_def):
+        """
+        Return True if log_text matches all the criteria in issue_def.
+
+        issue_def: A dict with keys that match method names defined in this
+            class.
+        description: issue_def.keys() is expected to be a list of method
+            names, that match up with methods defined in this class. Each
+            method is called with log_text and the coresponding issue_def
+            value as arguments. The issue_def value must contain all the
+            structured data that is required by the called method.
+        """
+
+        found_all = set()
+        method_refs = self.get_method_refs(issue_def.keys())
+        for name, ref in method_refs.items():
+            found_all.add(ref(log_text, issue_def[name]))
+        return all(found_all)
diff --git a/parse_ci_monitor_json.py b/parse_ci_monitor_json.py
index c079d6a..59817cc 100755
--- a/parse_ci_monitor_json.py
+++ b/parse_ci_monitor_json.py
@@ -6,10 +6,11 @@
 import sys
 import os
 import subprocess
-import pprint
 from datetime import datetime
 from typing import List
 
+from issue_finder import IssuesFinder
+
 
 def argparser():
     parser = argparse.ArgumentParser(
@@ -17,10 +18,10 @@ def argparser():
     )
 
     parser.add_argument(
-        "-f",
-        "--files",
+        "-r",
+        "--runs",
         nargs="+",
-        help="space seperated values of test run JSON files generated from ci_monitor tool",
+        help="space seperated list of test run ids.",
     )
     parser.add_argument(
         "-o",
@@ -28,14 +29,31 @@ def argparser():
         help="location to write output file",
         default=f"./{datetime.today().strftime('%Y%m%d')}.json",
     )
-    parser.add_argument("-v", "--version", help="Specify OCP version", required=True)
+    parser.add_argument(
+        "-i",
+        "--issues",
+        action="store_true",
+        help="Searching for known issues.",
+    )
+    parser.add_argument(
+        "-v",
+        "--version",
+        help="Specify OCP version",
+        required=True,
+    )
     return parser.parse_args()
 
 
-def get_test_failure_profile(content: str, profile: str):
+def get_link_to_log(content: str):
     content = re.search("(http.*)", content)
     if content:
-        linkto_logs = "[Link to logs|" + content.groups()[0] + "]|" + profile
+        return content.groups()[0]
+
+
+def get_test_failure_profile(content: str, profile: str):
+    content = get_link_to_log(content)
+    if content:
+        linkto_logs = "[Link to logs|" + content + "]|" + profile
     else:
         linkto_logs = f"not found|{profile}"
     return linkto_logs
@@ -67,6 +85,32 @@ def get_owner(ascript: str, testid: str):
     return owner
 
 
+def get_testrun_json(run_id: str):
+    """Download the test case json data from polarshift."""
+
+    # Call $BUSHSLICER_HOME/tools/polarshift.rb get-run RUN_ID to download the json describing the test run
+    BUSHSLICER_HOME = os.getenv("BUSHSLICER_HOME")
+    # use -o to avoid extra non json garbage printed to stdout
+    cmd = [
+        f"{BUSHSLICER_HOME}/tools/polarshift.rb",
+        "get-run",
+        f"{run_id}",
+        "-o",
+        f"{run_id}.json",
+    ]
+    subprocess.check_output(cmd)
+    run_json = get_json_from_file(f"{run_id}.json")
+    return run_json
+
+
+def get_json_from_file(file_path: str):
+    """Read in json from file_path."""
+
+    with open(file_path, "r") as f:
+        content = json.load(f)
+    return content
+
+
 def write_output(data: dict, ofile: str):
     with open(ofile, "w") as outfile:
         json.dump(data, outfile, indent=4, sort_keys=True)
@@ -74,40 +118,49 @@ def write_output(data: dict, ofile: str):
 
 def main():
     args = argparser()
-    logs_files = set(args.files)
     report_struct = {"version": args.version}
-    for file in logs_files:
-        with open(file) as fh:
-            output = json.load(fh)
-            profile = output["title"]
-            profile = re.search(".* - (.*)$", profile)
-            profile = profile.groups()[0]
-            for record in output["records"]["TestRecord"]:
-                if record["result"] == "Failed":
-                    linkto_logs = get_test_failure_profile(
-                        record["comment"]["content"], profile
-                    )
-                    automation_script = get_automation_script(
-                        record["test_case"]["customFields"]["Custom"]
-                    )
-                    id = record["test_case"]["id"]
-                    owner = get_owner(automation_script, id)
-                    if report_struct.get(owner, 0):
-                        if report_struct[owner].get(automation_script, 0):
-                            if report_struct[owner][automation_script].get(id, 0):
-                                report_struct[owner][automation_script][id].append(
-                                    linkto_logs
-                                )
-                            else:
-                                report_struct[owner][automation_script].update(
-                                    {id: [linkto_logs]}
-                                )
+    for run in args.runs:
+        output = get_testrun_json(run)
+        profile = output["title"]
+        profile = re.search(".* - (.*)$", profile)
+        profile = profile.groups()[0]
+        for record in output["records"]["TestRecord"]:
+            if record["result"] == "Failed":
+                linkto_logs = get_test_failure_profile(
+                    record["comment"]["content"], profile
+                )
+                automation_script = get_automation_script(
+                    record["test_case"]["customFields"]["Custom"]
+                )
+                id = record["test_case"]["id"]
+                owner = get_owner(automation_script, id)
+                if report_struct.get(owner, 0):
+                    if report_struct[owner].get(automation_script, 0):
+                        if report_struct[owner][automation_script].get(id, 0):
+                            report_struct[owner][automation_script][id].append(
+                                linkto_logs
+                            )
                         else:
-                            report_struct[owner].update(
-                                {automation_script: {id: [linkto_logs]}}
+                            report_struct[owner][automation_script].update(
+                                {id: [linkto_logs]}
                             )
                     else:
-                        report_struct[owner] = {automation_script: {id: [linkto_logs]}}
+                        report_struct[owner].update(
+                            {automation_script: {id: [linkto_logs]}}
+                        )
+                else:
+                    report_struct[owner] = {automation_script: {id: [linkto_logs]}}
+                # Find known issues
+                if args.issues:
+                    issue_finder = IssuesFinder()
+                    issues = issue_finder.find_issues(
+                        get_link_to_log(record["comment"]["content"])
+                    )
+                    if issues:
+                        print(f"Found issues for {id}: {issues}")
+                        report_struct[owner][automation_script][id].append(
+                            dict([("known_issues", issues)])
+                        )
 
     write_output(report_struct, args.output)