Skip to content

Commit

Permalink
refactored the release_frequency data gathering to use Augur instead …
Browse files Browse the repository at this point in the history
…of the GitHub API, which removed the dependence on the GitHub API entirely, so the functions making the GH API calls were also removed
  • Loading branch information
geekygirldawn committed Aug 17, 2023
1 parent b8b26bc commit 91c2d0c
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 93 deletions.
24 changes: 2 additions & 22 deletions health_by_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,10 @@
}
Replace the 'x's with values to connect to your Augur database
api_key
A text file containing one GitHub personal access token
Usage
-----
health_by_repo.py [-h] -o ORG_NAME -r REPO_NAME [-y YEARS] -c AUGUR_CONFIG -t API_KEY
health_by_repo.py [-h] -o ORG_NAME -r REPO_NAME [-y YEARS] -c AUGUR_CONFIG
options:
-h, --help show this help message and exit
Expand All @@ -46,8 +43,6 @@
The number of years of data to collect (default to 1)
-c AUGUR_CONFIG, --configfile AUGUR_CONFIG
The full file path to an Augur config.json file (required)
-t API_KEY, --token API_KEY
The file where your GitHub personal access token can be found (required)
Output
------
Expand Down Expand Up @@ -75,14 +70,12 @@
parser.add_argument("-r", "--repo", required=True, dest = "repo_name", help="The name of a GitHub repository in that org where your PRs can be found (required)")
parser.add_argument("-y", "--years", required=False, dest = "years", type=int, default=1, help="The number of years of data to collect (default to 1)")
parser.add_argument("-c", "--configfile", required=True, dest = "augur_config", help="The full file path to an Augur config.json file (required)")
parser.add_argument("-t", "--token", required=True, dest = "api_key", help="The file where your GitHub personal access token can be found (required)")

args = parser.parse_args()
org_name = args.org_name
repo_name = args.repo_name
years = args.years
augur_config = args.augur_config
api_key = args.api_key

# Get the dates for the analysis using the years argument if provided
days = 365 * years
Expand All @@ -99,23 +92,10 @@
is_forked, is_archived = fork_archive(repo_name, org_name, engine)
print('Forked:', str(is_forked), '\nArchived:', str(is_archived))

# Get the GitHub API repository object used to gather release data.
repo_api = repo_api_call(repo_name, org_name, api_key)

# This section compares the Augur org / repo and renames them for repos that have been redirected
# using the GH API as the canonical source of data for the org and repo, rather than what's in Augur
# This fixes the problem of repos being renamed, but not updated in Augur.
full_name = org_name + '/' + repo_name
api_name = repo_api.full_name

if full_name.lower() != api_name.lower():
org_name = api_name.split("/")[0]
repo_name = api_name.split("/")[1]

# This section collects all of the data using the functions for each graph
# found in common_functions.py and creates the graphs for each metric

activity_release_graph(repo_name, org_name, start_date, end_date, repo_api)
activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, engine)

sustain_prs_by_repo_graph(repo_id, repo_name, org_name, start_date, end_date, engine)

Expand Down
48 changes: 30 additions & 18 deletions metrics/release_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,49 @@
""" Contains functions used to gather data and graph the Release Frequency metric
"""

def get_release_data(repo_api):
""" Uses the GitHub REST API repository object to collect release data
def get_release_data(repo_id, start_date, end_date, engine):
""" Get release data from the Augur database
Parameters
----------
repo_api : GitHub repository object
repo_id : str
start_date : str
end_date : str
engine : sqlalchemy object
Returns
-------
releases_df : dataframe
"""
import pandas as pd

releases = repo_api.get_releases()
releases_df = pd.DataFrame()

releases_df = pd.DataFrame(
[x, x.tag_name, x.published_at, 1] for x in releases
)
releases_df.columns = ['release', 'name', 'date', 'releases']
release_query = f"""
SELECT
release_published_at as date
FROM
releases
WHERE
repo_id = {repo_id}
AND release_published_at > {start_date}
AND release_published_at <= {end_date}
"""
releases_df = pd.read_sql_query(release_query, con=engine)

return releases_df

def activity_release_data(repo_name, org_name, start_date, end_date, repo_api):
""" Gathers release data from the GitHub API
def activity_release_data(repo_id, repo_name, org_name, start_date, end_date, engine):
""" Takes release data and does some reformatting before graphing
Parameters
----------
repo_id : str
repo_name : str
org_name : str
start_date : str
end_date : str
repo_api : GitHub repository object
engine : sqlalchemy object
Returns
-------
Expand All @@ -53,11 +64,11 @@ def activity_release_data(repo_name, org_name, start_date, end_date, repo_api):
from utils.date_calcs import convert_dates

try:
releases_df = get_release_data(repo_api)
releases_df = get_release_data(repo_id, start_date, end_date, engine)
error_num = 0
error_text = None
except:
return -1, 'NO DATA', None, None, None, None, None, None, None, None
return -1, 'NO DATA', None, None, None, None, None, None

start_dt, end_dt = convert_dates(start_date, end_date)
six_mos_dt = end_dt - datetime.timedelta(days=180)
Expand All @@ -69,24 +80,25 @@ def activity_release_data(repo_name, org_name, start_date, end_date, repo_api):

# return before creating plots if no release data in past 6 months
if release_num == 0:
return -1, 'NO DATA', None, None, None, None, None, None, None, None
return -1, 'NO DATA', None, None, None, None, None, None

title = org_name + "/" + repo_name + "\n" + str(release_num) + " releases in the past 6 months."

interpretation = 'Interpretation: Healthy projects will have frequent releases with security updates, bug fixes, and features.'

return error_num, error_text, releases_df, start_dt, end_dt, title, interpretation, release_num

def activity_release_graph(repo_name, org_name, start_date, end_date, repo_api):
def activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, engine):
""" Graphs the release data returned from the activity_release_data function
Parameters
----------
repo_id : str
repo_name : str
org_name : str
start_date : str
end_date : str
repo_api : GitHub repository object
engine : sqlalchemy object
Output
------
Expand All @@ -99,7 +111,7 @@ def activity_release_graph(repo_name, org_name, start_date, end_date, repo_api):
import matplotlib.ticker as ticker
from utils.file_operations import output_filename

error_num, error_text, releases_df, start_dt, end_dt, title, interpretation, release_num = activity_release_data(repo_name, org_name, start_date, end_date, repo_api)
error_num, error_text, releases_df, start_dt, end_dt, title, interpretation, release_num = activity_release_data(repo_id, repo_name, org_name, start_date, end_date, engine)

if error_num == -1:
return -1, 'NO DATA'
Expand All @@ -117,7 +129,7 @@ def activity_release_graph(repo_name, org_name, start_date, end_date, repo_api):
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
ax.set(yticklabels=[])

plottermonth = sns.lineplot(y='releases', x='date', data=releases_df, marker="X", linewidth=0, markersize=20).set_title(title, fontsize=30)
plottermonth = sns.lineplot(y=1, x='date', data=releases_df, marker="X", linewidth=0, markersize=20).set_title(title, fontsize=30)
xlabel_str = 'Year Month\n\n' + interpretation
plottermonthlabels = ax.set_xlabel(xlabel_str)

Expand Down
53 changes: 0 additions & 53 deletions utils/github_api.py

This file was deleted.

0 comments on commit 91c2d0c

Please sign in to comment.