diff --git a/download_repo_info.py b/download_repo_info.py
index 000ad46..f55792e 100644
--- a/download_repo_info.py
+++ b/download_repo_info.py
@@ -7,6 +7,7 @@
 The output is github_repositories.csv
 '''
 
+import logging
 import os
 import json
 import time
@@ -31,7 +32,7 @@
 def save_ckpt(lower_bound: int, upper_bound: int):
     global repo_list
     repo_list = list(set(repo_list)) # remove duplicates
-    print(f"Saving checkpoint {lower_bound, upper_bound}...")
+    logging.info(f"Saving checkpoint {lower_bound, upper_bound}...")
     with open('repo_ckpt.pkl', 'wb') as f:
         pickle.dump((lower_bound, upper_bound, repo_list), f)
 
@@ -45,9 +46,9 @@ def get_request(lower_bound: int, upper_bound: int, page: int = 1):
            )
 
     if r.status_code == 403:
-            print('API rate limit exceeded.')
+            logging.error('API rate limit exceeded.')
             save_ckpt(lower_bound, upper_bound, repo_list)
-            print('Exiting program.')
+            logging.info('Exiting program.')
             exit()
     elif r.status_code == 422:
         # No more pages available
@@ -56,16 +57,16 @@ def get_request(lower_bound: int, upper_bound: int, page: int = 1):
     try:
         assert r.status_code == 200
     except:
-        print(f'Unexpected status code. Status code returned is {r.status_code}')
-        print(r.text)
+        logging.error(f'Unexpected status code. Status code returned is {r.status_code}')
+        logging.info(r.text)
         save_ckpt(lower_bound, upper_bound)
-        print("Exiting program.")
+        logging.info("Exiting program.")
         exit()
     
     REMAINING_REQUESTS -= 1
 
     if REMAINING_REQUESTS == 0:
-        print("Sleeping 60 seconds to stay under GitHub API rate limit...")
+        logging.info("Sleeping 60 seconds to stay under GitHub API rate limit...")
         time.sleep(60)
         save_ckpt(lower_bound, upper_bound)
         REMAINING_REQUESTS = 30
@@ -102,14 +103,14 @@ def download_range(lower_bound, upper_bound):
         # Load checkpoint
         with open('repo_ckpt.pkl', 'rb') as f:
             lower_bound, upper_bound, repo_list = pickle.load(f)
-        print(f"Loading from {lower_bound}..{upper_bound}")
+        logging.info(f"Loading from {lower_bound}..{upper_bound}")
     else:
         lower_bound = 0
         upper_bound = 5
         repo_list = []
 
     if lower_bound >= 10000000:
-        print('''
+        logging.info('''
 Checkpoint is for an already completed download of GitHub repository information.
 Please delete `repo_ckpt.pkl` to restart and try again.
             ''')
@@ -140,13 +141,13 @@ def download_range(lower_bound, upper_bound):
             # Update the slope of our linear approximation
             slope = n_results/(upper_bound - lower_bound)
 
-            print(f'size {lower_bound}..{upper_bound} ~> {n_results} results')
+            logging.info(f'size {lower_bound}..{upper_bound} ~> {n_results} results')
             # If we get <= 1000 results over the range, exit the search loop
             # and download all repository names over the range
             if n_results <= 1000:
                 break
 
-        print(f"Downloading repositories in size range {lower_bound}..{upper_bound}")
+        logging.info(f"Downloading repositories in size range {lower_bound}..{upper_bound}")
         download_range(lower_bound, upper_bound)
         lower_bound = upper_bound + 1
 
diff --git a/download_repo_text.py b/download_repo_text.py
index 41aefdb..564abb9 100644
--- a/download_repo_text.py
+++ b/download_repo_text.py
@@ -1,3 +1,4 @@
+import logging
 import chardet
 import magic
 import lm_dataformat as lmd
@@ -77,7 +78,7 @@ def get_content(f):
             # something went horribly wrong!
             ...
     except:
-        print(type, f, enc)
+        logging.info(type, f, enc)
         traceback.print_exc()
         time.sleep(0.1)
         return
diff --git a/download_repos.py b/download_repos.py
index d9658b4..4be01b4 100644
--- a/download_repos.py
+++ b/download_repos.py
@@ -3,6 +3,7 @@
 Downloads all the repositories listed in repo_names.csv
 '''
 
+import logging
 import os
 import csv
 from tqdm import tqdm
@@ -13,7 +14,7 @@ def download_repo(repo):
     if file_name not in os.listdir("output/"):
         os.system(f'git clone --depth 1 --single-branch https://github.com/{repo} output/{file_name}')
     else:
-        print(f"Already downloaded {repo}")
+        logging.info(f"Already downloaded {repo}")
 
 with open('github_repositories.csv', 'r') as f:
     csv_reader = csv.reader(f)