Skip to content

Commit

Permalink
Now also analyze the decadal survey document itself
Browse files Browse the repository at this point in the history
  • Loading branch information
starfleetjames committed Dec 19, 2024
1 parent 6e88a8b commit 4f0ea84
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 4 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,5 @@ cython_debug/
*.pdf
.DS_Store
*.png
Helio Decadal 2024.mp3
Helio Decadal 2024.wav
27 changes: 23 additions & 4 deletions generate_visualizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def read_white_paper_titles():


def read_white_paper_pdfs():
filenames = glob.glob("white_papers/*.pdf")
filenames = glob.glob("heliodecadal2024.pdf")

text = ''
for file in filenames:
Expand All @@ -44,6 +44,18 @@ def read_white_paper_pdfs():
return text


def read_decadal():
filenames = glob.glob("heliodecadal2024.pdf")

text = ''
for file in filenames:
reader = PdfReader(file)
for page in reader.pages:
text += page.extract_text()

return text


def plot_data(df):
sns.set_theme()
p = sns.lineplot(data=df, x="Date Submitted", y="Response ID")
Expand Down Expand Up @@ -71,7 +83,11 @@ def make_pie_chart(df):
def make_word_cloud(text, shape='fas fa-sun'):
stop_words = get_stop_words('english')
stop_words.extend(list(string.ascii_lowercase))
stop_words.extend(['et al', 'et', 'al', 'et al.', 'physic', 'geophys', 'doi', 'two', 'thu', 'space physic', 'res lett', 'provide', 'can', 'th', 'de', 'also', 're', 'res', 'lett', 'res lett', 'will', 'however', 'org', 'well', 'within', 'white paper', 'doi', 'http', 'https', 'figure', 'observation', 'observations', 'measurement', 'understanding', 'journal'])
stop_words.extend(['et al', 'et', 'al', 'et al.', 'physic', 'geophys', 'doi', 'two', 'thu', 'space physic', 'res lett', 'provide',
'can', 'th', 'de', 'also', 're', 'res', 'lett', 'res lett', 'will', 'however', 'org', 'well', 'within', 'white paper',
'doi', 'http', 'https', 'figure', 'observation', 'observations', 'measurement', 'understanding', 'copyright','journal',
'http', 'https', 'rights reserved', 'rights', 'reserved', 'prepublication copy', 'prepublication', 'copy',
'editorial correction', 'editorial', 'national', 'academy', 'national academy'])
stylecloud.gen_stylecloud(text=text,
icon_name=shape, # To select the shape, pick a name from https://fontawesome.com/icons?d=gallery&m=free
palette='colorbrewer.diverging.Spectral_11',
Expand All @@ -89,8 +105,11 @@ def make_word_cloud(text, shape='fas fa-sun'):
if do_download:
download_white_paper_pdfs(df_titles)

text_papers = read_white_paper_pdfs()
#text_papers = read_white_paper_pdfs()
#plot_data(df_titles)
generate_stats(df_titles, text_papers)
#generate_stats(df_titles, text_papers)

text_decadal = read_decadal()
make_word_cloud(text_decadal)


0 comments on commit 4f0ea84

Please sign in to comment.