Skip to content

Commit

Permalink
Extract vector helpers to a file
Browse files Browse the repository at this point in the history
  • Loading branch information
mkonicek committed Jan 27, 2018
1 parent 44d8162 commit 80f4065
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 30 deletions.
35 changes: 7 additions & 28 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,14 @@
import math
from typing import Any, Iterable, List, Optional, Set, Tuple

from load import load_words
from word import Word, Vector
import math
from operator import itemgetter
from typing import Any, Iterable, List, Optional, Set, Tuple

def vector_len(v: Vector) -> float:
return math.sqrt(sum([x*x for x in v]))

def dot_product(v1: Vector, v2: Vector) -> float:
assert len(v1) == len(v2)
return sum([x*y for (x,y) in zip(v1, v2)])

def add_vectors(v1: Vector, v2: Vector) -> Vector:
assert len(v1) == len(v2)
return [x + y for (x,y) in zip(v1, v2)]

def sub_vectors(v1: Vector, v2: Vector) -> Vector:
assert len(v1) == len(v2)
return [x - y for (x,y) in zip(v1, v2)]

def cosine_similarity(v1: Vector, v2: Vector) -> float:
"""
Returns the cosine of the angle between the two vectors.
Results range from -1 (very different) to 1 (very similar).
"""
return dot_product(v1, v2) / float(vector_len(v1) * vector_len(v2))
import vectors as v
from word import Word, Vector

def most_similar(base_vector: Vector, words: List[Word]) -> List[Tuple[float, Word]]:
"""Finds n words with smallest cosine similarity to a given word"""
words_with_distance = [(cosine_similarity(base_vector, w.vector), w) for w in words]
words_with_distance = [(v.cosine_similarity(base_vector, w.vector), w) for w in words]
# We want cosine similarity to be as large as possible (close to 1)
sorted_by_distance = sorted(words_with_distance, key=lambda t: t[0], reverse=True)
return sorted_by_distance
Expand Down Expand Up @@ -64,8 +43,8 @@ def closest_analogies(
word_right2 = find_word(right2, words)
if (not word_left1) or (not word_left2) or (not word_right2):
return []
vector = add_vectors(
sub_vectors(word_left1.vector, word_left2.vector),
vector = v.add(
v.sub(word_left1.vector, word_left2.vector),
word_right2.vector)
closest = most_similar(vector, words)[:10]
def is_redundant(word: str) -> bool:
Expand Down
3 changes: 1 addition & 2 deletions word.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from typing import List

Vector = List[float]
from vectors import Vector

class Word:
"""A single word (one line of the input file)"""
Expand Down

0 comments on commit 80f4065

Please sign in to comment.