Commit a6e26b6d authored by Kathryn Elliott's avatar Kathryn Elliott

Simplify stopwords

This basically makes stopwords non-optional and only includes Coles &
parent eb2f7808
......@@ -40,8 +40,6 @@ logging.basicConfig(level=logging.DEBUG, format='%(relativeCreated)6d %(threadNa
extra_stopwords = ['would', 'be', 're', 'edu', 'use', 'get', 'say', 'do', 'could']
extra_non_stopwords = ['to', 'go', 'not']
def load_data(dataset_file):
return pandas.read_json(dataset_file)
......@@ -57,10 +57,7 @@ class TopicModel:"Adding extra stopwords: {0}".format(extra_stopwords))
self._stopwords = stopwords.words('english')
for word in extra_non_stopwords:
if word in self._stopwords:
self._stopwords.extend(['woolworths', 'coles'])
self._nlp = spacy.load('en', disable=['parser', 'ner'])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment