Commit a6e26b6d authored by Kathryn Elliott's avatar Kathryn Elliott

Simplify stopwords

This basically makes stopwords non-optional and only includes Coles &
Woolworths.
parent eb2f7808
......@@ -40,8 +40,6 @@ logging.basicConfig(level=logging.DEBUG, format='%(relativeCreated)6d %(threadNa
extra_stopwords = ['would', 'be', 're', 'edu', 'use', 'get', 'say', 'do', 'could']
extra_non_stopwords = ['to', 'go', 'not']
def load_data(dataset_file):
return pandas.read_json(dataset_file)
......
......@@ -57,10 +57,7 @@ class TopicModel:
self._logging.info("Adding extra stopwords: {0}".format(extra_stopwords))
self._stopwords = stopwords.words('english')
self._stopwords.extend(extra_stopwords)
for word in extra_non_stopwords:
if word in self._stopwords:
self._stopwords.remove(word)
self._stopwords.extend(['woolworths', 'coles'])
self._nlp = spacy.load('en', disable=['parser', 'ner'])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment