Skip to content

Commit

Permalink
Added the data_flags files
Browse files Browse the repository at this point in the history
- Added data_flags1 and data_flags2 xls files, which are one large table split up into two parts so they could be uploaded to Git
- Altered loading_data.py to demonstrate loading these new files into a single dataframe
  • Loading branch information
KVSRoyal committed Mar 16, 2019
1 parent e954946 commit 1f42523
Show file tree
Hide file tree
Showing 9 changed files with 240 additions and 9 deletions.
4 changes: 4 additions & 0 deletions .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/funding-analysis.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

180 changes: 180 additions & 0 deletions .idea/workspace.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 27 additions & 9 deletions funding_analysis/loading_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,36 @@
assert len(nonexhibit_items_df.index) == 14325
assert len(nonexhibit_items_df.columns) == 141


# This is a block of code that needs to be changed because the data_flags file
# is too large to push to GitHub
# Load the data flags table as a pandas data frame
#data_flags_workbook_path = workbooks_directory_path / 'data_flags.xls'
#data_flags_df = pandas.read_excel(data_flags_workbook_path)
data_flags1_workbook_path = workbooks_directory_path / 'data_flags1.xls'
data_flags1_df = pandas.read_excel(data_flags1_workbook_path)

# Do some shallow testing to verify everything went well
col1_title = data_flags1_df.columns[0]
assert col1_title == 'STATE'
assert len(data_flags1_df.index) == 7160
assert len(data_flags1_df.columns) == 130


data_flags2_workbook_path = workbooks_directory_path / 'data_flags2.xls'
data_flags2_df = pandas.read_excel(data_flags2_workbook_path)

# Do some shallow testing to verify everything went well
#col1_title = data_flags_df.columns[0]
#assert col1_title == 'STATE'
#assert len(data_flags_df.index) == 14325
#assert len(data_flags_df.columns) == 130
col1_title = data_flags2_df.columns[0]
assert col1_title == 'STATE'
assert len(data_flags2_df.index) == 7165
assert len(data_flags2_df.columns) == 130

# Append the data_flags dataframes into one large dataframe
data_flags_df = data_flags1_df.append(data_flags2_df, ignore_index=True)

# Do some shallow testing to verify everything went well
col1_title = data_flags2_df.columns[0]
assert col1_title == 'STATE'
for state_id in data_flags_df[col1_title]:
assert state_id != 'STATE'
assert len(data_flags_df.index) == 14325
assert len(data_flags_df.columns) == 130


# Load the relevant raw data table as a pandas data frame
Expand Down
Binary file added resources/raw_data/data_flags1.xls
Binary file not shown.
Binary file added resources/raw_data/data_flags2.xls
Binary file not shown.

0 comments on commit 1f42523

Please sign in to comment.