Skip to content

pep8/pycodestyle fixes for hanging indents in Summarization module #1202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gensim/summarization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

# bring model classes directly into package namespace, to save some typing
from .summarizer import summarize, summarize_corpus
from .keywords import keywords
from .keywords import keywords
6 changes: 3 additions & 3 deletions gensim/summarization/bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,16 @@ def initialize(self):
self.df[word] += 1

for word, freq in iteritems(self.df):
self.idf[word] = math.log(self.corpus_size-freq+0.5) - math.log(freq+0.5)
self.idf[word] = math.log(self.corpus_size - freq + 0.5) - math.log(freq + 0.5)

def get_score(self, document, index, average_idf):
score = 0
for word in document:
if word not in self.f[index]:
continue
idf = self.idf[word] if self.idf[word] >= 0 else EPSILON * average_idf
score += (idf*self.f[index][word]*(PARAM_K1+1)
/ (self.f[index][word] + PARAM_K1*(1 - PARAM_B+PARAM_B*self.corpus_size / self.avgdl)))
score += (idf * self.f[index][word] * (PARAM_K1 + 1)
/ (self.f[index][word] + PARAM_K1 * (1 - PARAM_B + PARAM_B * self.corpus_size / self.avgdl)))
return score

def get_scores(self, document, average_idf):
Expand Down
2 changes: 1 addition & 1 deletion gensim/summarization/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def _get_combined_keywords(_keywords, split_text):
result.append(word) # appends last word if keyword and doesn't iterate
for j in xrange(i + 1, len_text):
other_word = _strip_word(split_text[j])
if other_word in _keywords and other_word == split_text[j] and not other_word in combined_word:
if other_word in _keywords and other_word == split_text[j] and other_word not in combined_word:
combined_word.append(other_word)
else:
for keyword in combined_word:
Expand Down
4 changes: 2 additions & 2 deletions gensim/summarization/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,10 @@ def summarize(text, ratio=0.2, word_count=None, split=False):
logger.warning("Input text is empty.")
return

# If only one sentence is present, the function raises an error (Avoids ZeroDivisionError).
# If only one sentence is present, the function raises an error (Avoids ZeroDivisionError).
if len(sentences) == 1:
raise ValueError("input must have more than one sentence")

# Warns if the text is too short.
if len(sentences) < INPUT_MIN_LENGTH:
logger.warning("Input text is expected to have at least " + str(INPUT_MIN_LENGTH) + " sentences.")
Expand Down