Skip to content

Commit

Permalink
Merge pull request #5 from geekygirldawn/csv_file
Browse files Browse the repository at this point in the history
Added Summary CSV file
  • Loading branch information
geekygirldawn authored Dec 1, 2023
2 parents ace0308 + 806ac42 commit b455049
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 18 deletions.
28 changes: 24 additions & 4 deletions health_by_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@
"""
import argparse
import sys
import pandas as pd
from utils.augur_connect import augur_db_connect
from utils.date_calcs import get_dates
from utils.repo_info import get_repo_info, fork_archive, get_org_repos
from utils.file_operations import create_path_str
from metrics.release_frequency import activity_release_graph
from metrics.closure_ratio import sustain_prs_by_repo_graph
from metrics.first_response import response_time_graph
Expand Down Expand Up @@ -101,6 +103,18 @@
# This is the case where data is gathered on all repos from an org
repoDF = get_org_repos(org_name, engine)
print("multiple repos")

# When gathering data on an org, it can be helpful to have a summary CSV
path = create_path_str(org_name)
output_filename = path + '/_' + org_name + '_output_yr_' + str(years) + '_bdays_' + str(bus_days) + '.csv'

try:
csv_output = open(output_filename, 'w')
csv_output.write('org_name,repo_name,releases,first_resp_mos,closure_ratio_mos,bus_factor,bus_factor_percents,fork,archive\n')
except:
print('Could not write to csv file. Exiting')
sys.exit(1)

else:
# This is the case where data is gathered on a single org / repo combo
repo_id = get_repo_info(engine, org_name, repo_name)
Expand All @@ -121,14 +135,20 @@

# This section collects all of the data using the functions for each graph
# found in common_functions.py and creates the graphs for each metric
# Skips archived repos

activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)
if is_archived == False:
releases = activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)

sustain_prs_by_repo_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)
closure_ratio_mos = sustain_prs_by_repo_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)

contributor_risk_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)
bus_factor, bus_factor_percents = contributor_risk_graph(repo_id, repo_name, org_name, start_date, end_date, engine, years)

response_time_graph(repo_id, repo_name, org_name, start_date, end_date, engine, bus_days, years)
first_resp_mos = response_time_graph(repo_id, repo_name, org_name, start_date, end_date, engine, bus_days, years)

if len(repoDF) > 1:
csv_line = org_name + ',' + repo_name + ',' + releases + ',' + first_resp_mos + ',' + closure_ratio_mos + ',' + bus_factor + ',' + bus_factor_percents + ',' + str(is_forked) + ',' + str(is_archived) + '\n'
csv_output.write(csv_line)

# Print a separator between repos
print('-------------')
5 changes: 4 additions & 1 deletion metrics/bus_factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def contributor_risk_graph(repo_id, repo_name, org_name, start_date, end_date, e
error_num, error_text, names, percents, commits, title, interpretation, num_people = contributor_risk_data(repo_id, repo_name, org_name, start_date, end_date, engine)

if error_num == -1:
return
return "Error","Error"

matplotlib.use('Agg') #prevents from tying to send plot to screen
sns.set_style('ticks')
Expand Down Expand Up @@ -204,3 +204,6 @@ def contributor_risk_graph(repo_id, repo_name, org_name, start_date, end_date, e

print('Bus Factor / Contributor Risk for', org_name, '/', repo_name, 'from', start_date, 'to', end_date, '\nsaved as', filename)
print(num_people, 'people make up > 70% of the commits in the past year.')

percent_str = '--'.join(str(x) for x in percents)
return str(num_people), percent_str
4 changes: 3 additions & 1 deletion metrics/closure_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ def sustain_prs_by_repo_graph(repo_id, repo_name, org_name, start_date, end_date

if error_num == -1:
print("Closure Ratio: Too few PRs to calculate")
return
return "Too Few PRs"

matplotlib.use('Agg') #prevents from tying to send plot to screen
sns.set_style('ticks')
Expand Down Expand Up @@ -326,3 +326,5 @@ def sustain_prs_by_repo_graph(repo_id, repo_name, org_name, start_date, end_date

print('Change Request Closure Ratio (keeping up with contributions) for', org_name, '/', repo_name, 'from', start_date, 'to', end_date, '\nsaved as', filename)
print('Number of months in the past 6 months with > 15% of PRs not closed:', month_num)

return str(month_num)
4 changes: 2 additions & 2 deletions metrics/first_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def response_time_graph(repo_id, repo_name, org_name, start_date, end_date, engi
# Don't gather data if less than 24 PRs
if error_num == -1:
print("First Response: Too few PRs to calculate")
return
return "Too Few PRs"

sns.set_style('ticks')
sns.set(style="whitegrid", font_scale=2)
Expand Down Expand Up @@ -253,5 +253,5 @@ def response_time_graph(repo_id, repo_name, org_name, start_date, end_date, engi
print('Time to first response for', org_name, '/', repo_name, 'from', start_date, 'to', end_date, '\nsaved as', filename)
print(month_num, 'months with more than 10% of pull requests not responded to within specified business days in the past 6 months')


return str(month_num)

6 changes: 4 additions & 2 deletions metrics/release_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, e
error_num, error_text, releases_df, start_dt, end_dt, title, interpretation, release_num = activity_release_data(repo_id, repo_name, org_name, start_date, end_date, engine)

if error_num == -1:
return
return "0"

matplotlib.use('Agg') #prevents from tying to send plot to screen
sns.set(style="whitegrid", font_scale=2)
Expand All @@ -140,4 +140,6 @@ def activity_release_graph(repo_id, repo_name, org_name, start_date, end_date, e
plt.close(fig)

print('Release Frequency for', org_name, '/', repo_name, 'from', start_date, 'to', end_date, '\nsaved as', filename)
print(release_num, 'releases in the past 6 months')
print(release_num, 'releases in the past 6 months')

return str(release_num)
41 changes: 33 additions & 8 deletions utils/file_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,38 @@
""" This file contains several functions that perform various file operations
"""

def output_path(repo_name, org_name):
def create_path_str(org_name):
""" Creates the path string where files will be located
Parameters
----------
org_name : str
Returns
-------
path_str : str
"""
import datetime
from os.path import dirname, join
from pathlib import Path
from utils.date_calcs import get_last_month

today = datetime.date.today()
last_month = get_last_month()
current_year_month = str(last_month.year) + '-' + '{:02d}'.format(last_month.month)

current_dir = dirname(dirname(__file__)) # the double dirname is equivalent to ../
rel_path = './output/' + current_year_month + '/' + org_name
path_str = join(current_dir, rel_path)
Path(path_str).mkdir(parents=True, exist_ok=True)

return path_str


def output_path(repo_name, org_name):
""" Creates the path string including repo where .png files will be located
Parameters
----------
repo_name : str
Expand All @@ -17,17 +46,13 @@ def output_path(repo_name, org_name):
path : str
"""
import datetime
from os.path import dirname, join
from pathlib import Path
from utils.date_calcs import get_last_month

today = datetime.date.today()
last_month = get_last_month()
current_year_month = str(last_month.year) + '-' + '{:02d}'.format(last_month.month)
path_str = create_path_str(org_name)

current_dir = dirname(dirname(__file__)) # the double dirname is equivalent to ../
rel_path = './output/' + current_year_month + '/' + org_name + '/' + repo_name
rel_path = path_str + '/' + repo_name
path = join(current_dir, rel_path)
Path(path).mkdir(parents=True, exist_ok=True)

Expand All @@ -51,4 +76,4 @@ def output_filename(repo_name, org_name, metric_string):

filename = path + '/' + repo_name + '_' + metric_string + '.png'

return filename
return filename

0 comments on commit b455049

Please sign in to comment.