Skip to content

Commit

Permalink
disables NER use by default #9
Browse files Browse the repository at this point in the history
  • Loading branch information
ceteri committed Sep 15, 2017
1 parent 0f4429c commit 24d5d28
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions pytextrank/pytextrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def calc_rms (values):
return max(values)


def normalize_key_phrases (path, ranks, stopwords=None, spacy_nlp=None):
def normalize_key_phrases (path, ranks, stopwords=None, spacy_nlp=None, skip_ner=True):
"""
collect keyphrases, named entities, etc., while removing stop words
"""
Expand Down Expand Up @@ -578,14 +578,15 @@ def normalize_key_phrases (path, ranks, stopwords=None, spacy_nlp=None):
prev_lex = single_lex[id]
single_lex[id] = rl._replace(count = prev_lex.count + 1)

for rl in collect_entities(sent, ranks, stopwords, spacy_nlp):
id = str(rl.ids)
if not skip_ner:
for rl in collect_entities(sent, ranks, stopwords, spacy_nlp):
id = str(rl.ids)

if id not in phrase_lex:
phrase_lex[id] = rl
else:
prev_lex = phrase_lex[id]
phrase_lex[id] = rl._replace(count = prev_lex.count + 1)
if id not in phrase_lex:
phrase_lex[id] = rl
else:
prev_lex = phrase_lex[id]
phrase_lex[id] = rl._replace(count = prev_lex.count + 1)

for rl in collect_phrases(sent, ranks, spacy_nlp):
id = str(rl.ids)
Expand Down

0 comments on commit 24d5d28

Please sign in to comment.