Skip to content

Commit 32f3389

Browse files
author
Jonathan Spalink
committed
commented out zlog references
1 parent 5f47752 commit 32f3389

File tree

1 file changed

+11
-10
lines changed

1 file changed

+11
-10
lines changed

readability/readability.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ def addMeta(self, dedupe, base=None):
197197
meta = fragment_fromstring(u'<p class="econtextmax meta {}">{}</p>'.format(prop, re.sub("<.*?>", '', metacontent)))
198198
except:
199199
#zlog.debug(u"metacontent {}: {}".format(prop, metacontent))
200+
pass
200201
base.insert(0, meta)
201202
#zlog.debug(u" ** Found meta: {}".format(tounicode(meta)))
202203
dedupe[prop[prop.find(':')+1:]] = metacontent
@@ -332,10 +333,10 @@ def select_best_candidate(self, candidates):
332333
for candidate in sorted_candidates[:5]:
333334
elem = candidate['elem']
334335
#zlog.debug(u"Top 5 : %6.3f %s" % (candidate['content_score'], describe(elem)))
335-
336+
336337
if len(sorted_candidates) == 0:
337338
return None
338-
339+
339340
best_candidate = sorted_candidates[0]
340341
return best_candidate
341342

@@ -395,7 +396,7 @@ def score_paragraphs(self, ):
395396
score = candidate['content_score']
396397
#zlog.debug(u"Candid: %6.3f %s link density %.3f -> %6.3f" % (score, describe(elem), ld, score * (1 - ld)))
397398
candidate['content_score'] *= (1 - ld)
398-
399+
399400
return candidates
400401

401402
def class_weight(self, e):
@@ -404,18 +405,18 @@ def class_weight(self, e):
404405
if REGEXES['negativeRe'].search(e.get('class')):
405406
#zlog.debug(u"debiting score for negativeRe in class {}".format(describe(e)))
406407
weight -= 35 * len(REGEXES['negativeRe'].findall(e.get('class')))
407-
408+
408409
if REGEXES['positiveRe'].search(e.get('class')):
409410
weight += 25 * len(REGEXES['positiveRe'].findall(e.get('class')))
410-
411+
411412
if e.get('id', None):
412413
if REGEXES['negativeRe'].search(e.get('id')):
413414
#zlog.debug(u"debiting score for negativeRe in id {}".format(describe(e)))
414415
weight -= 35 * len(REGEXES['negativeRe'].findall(e.get('id')))
415-
416+
416417
if REGEXES['positiveRe'].search(e.get('id')):
417418
weight += 25 * len(REGEXES['positiveRe'].findall(e.get('id')))
418-
419+
419420
return weight
420421

421422
def score_node(self, elem):
@@ -457,7 +458,7 @@ def remove_unlikely_candidates(self):
457458
#zlog.debug(u"Removing hidden content - %s" % describe(elem))
458459
to_remove.append(elem)
459460
continue
460-
461+
461462
for elem in to_remove:
462463
elem.drop_tree()
463464

@@ -544,7 +545,7 @@ def sanitize(self, node, candidates):
544545
for kind in ['p', 'img', 'li', 'a', 'embed', 'input']:
545546
counts[kind] = len(el.findall('.//%s' % kind))
546547
counts["li"] -= 100
547-
548+
548549
# Count the text length excluding any surrounding whitespace
549550
content_length = text_length(el)
550551
link_density = self.get_link_density(el)
@@ -633,7 +634,7 @@ def sanitize(self, node, candidates):
633634
#zlog.debug(u"Allowing %s" % describe(el))
634635
for desnode in self.tags(el, "table", "ul", "div"):
635636
allowed[desnode] = True
636-
637+
637638
if to_remove:
638639
#zlog.debug(u"Cleaned %6.3f %s with weight %s cause it has %s." % (content_score, describe(el), weight, reason))
639640
#print tounicode(el)

0 commit comments

Comments
 (0)