Commit 8f59d6b4 authored by Kathryn Elliott's avatar Kathryn Elliott

Debugging code:

* Removed `progress_per=100000` as code was faulting at this
point
* Small edit to trigram comment
parent 9a829cf2
......@@ -29,9 +29,9 @@ LOG_LEVEL = logging.INFO
LOG_FORMAT = '%(asctime)s --- %(levelname)5s: [%(filename)s:%(lineno)d] %(message)s'
ALLOWED_POS_TAGS = ['NOUN', 'ADJ', 'VERB', 'ADV']
N_GRAM_OPTIONS = dict(min_count=5, threshold=2, delimiter=b' ', progress_per=100000)
N_GRAM_OPTIONS = dict(min_count=5, threshold=2, delimiter=b' ')
LDA_OPTIONS = dict(random_state=100, update_every=0, chunksize=100,
passes=10, alpha='auto', per_word_topics=True, progress_per=100000)
passes=10, alpha='auto', per_word_topics=True)
class TopicModel:
......@@ -113,7 +113,7 @@ class TopicModel:
""" Generate tri-grams for each document. The resultant output are a
concatenation of each tri-gram into a single token using an
underscore. For example: "the cat sat on the mat" would be:
"the_cat_sat, cat_sat_on ..." -- this may be wrong.
"the_cat_sat, cat_sat_on ..." -- although this may be wrong.
doc -- [String]
return -- [String]
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment