sd17spring · jeffBP · May 1, 2017
diff --git a/frequency.py b/frequency.py
@@ -2,6 +2,7 @@
 Project Gutenberg """
 
 import string
+import operator
 
 
 def get_word_list(file_name):
@@ -10,7 +11,37 @@ def get_word_list(file_name):
     returns a list of the words used in the book as a list.
     All words are converted to lower case.
     """
-    pass
+    #Open file
+    f = open(file_name, 'r')
+    lines = f.readlines()
+    curr_line = 0
+
+    #Create Dictionary
+    wordDict = {}
+    while lines[curr_line].find('START OF THIS PROJECT GUTENBERG EBOOK') == -1:
+      curr_line += 1
+    lines = lines[curr_line+1:]
+
+    #loop through lines and words
+    for i in lines:
+        line = i.split()
+        for j in line:
+
+            #Get rid of punctuation
+            word = j.strip(string.punctuation)
+
+            #If key exists
+            try:
+                #Increment
+                wordDict[word] += 1
+            #Else
+            except KeyError:
+                #Create Key
+                wordDict[word] = 0
+    #Return items in dictionary as tuple
+    return wordDict.items()
+
+
 
 
 def get_top_n_words(word_list, n):
@@ -23,8 +54,14 @@ def get_top_n_words(word_list, n):
     returns: a list of n most frequently occurring words ordered from most
     frequently to least frequentlyoccurring
     """
-    pass
+    #Sort list according to second value of tuple
+    sortedList = sorted(word_list, key=operator.itemgetter(1))
+    #Reverse list
+    sortedList = sortedList[::-1]
+    #Return 1st hundred positions
+    return sortedList[:n]
 
 if __name__ == "__main__":
     print("Running WordFrequency Toolbox")
-    print(string.punctuation)
+    wordlist = get_word_list("pg32325.txt")
+    print(get_top_n_words(wordlist, 100))