Commit 1dc1b02b authored by Kathryn Elliott's avatar Kathryn Elliott

Add poorly thought out persistence class

The intent here is to keep caching and persistance separate from the
main code but I'm not sure how successful I've been: it's a bit of a
prick to use!
parent 6c6f65ac
......@@ -204,3 +204,79 @@ class TopicModel:
logging.info("{:20} {:.3f}".format(term, round(frequency, 3)))
logging.info("")
# Simple class to hide the details of saving data to the filesystem.
class TopicPersistance:
TOPICS_DATA_DIR = "/tmp/LDA/"
def __init__(self, base_dir=TOPICS_DATA_DIR):
self._base_dir = Path(base_dir)
self._mkdir(self._base_dir)
def _mk_model_part(self, model_id):
path = self._base_dir.joinpath(str(model_id))
self._mkdir(path)
return path
def _mk_part(self, model_id, part, dir=False):
path = self._base_dir.joinpath(str(model_id))
if dir:
d = path.joinpath(part)
self._mkdir(d)
return d
else:
self._mkdir(path)
return path.joinpath(part)
def _mk_path(self, model_id, part):
return self._base_dir.joinpath(str(model_id)).joinpath(part)
def _mkdir(self, path):
path.mkdir(mode=0o700, exist_ok=True, parents=True)
def save_params(self, args):
path = str(self._base_dir.joinpath("meta"))
with open(path, "w") as fd:
fd.write(str(args))
def save_model(self, model, model_id):
path = str(self._mk_part("corpus", "model"))
logging.info("Saving the model to: " + path)
model.save(path)
def save_visualisation(self, vis, model_id):
path = self._mk_model_part(model_id).joinpath("visualisation.html")
logging.info("Exporting html visualisation for topic {0} to: {1}".format(model_id, path))
pyLDAvis.save_html(vis, str(path))
def csv_file_path(self, model_id):
return self._mk_part(model_id, "topic-documents.csv")
def open_topic_documents_csv(self, model_id):
with open(self._mk_part(model_id, "topic-documents"), 'w', newline='') as csvfile:
try:
yield csvfile
finally:
csvfile.close()
def export_topic_visualisation_data(self, topic_id):
model = self._models[topic_id]
html_filename = TOPICS_VISUALISATION_PREFIX + "." + str(topic_id) + ".html"
logging.info("Exporting html visualisation for topic {0} to: {1}".format(topic_id, html_filename))
vis = pyLDAvis.gensim.prepare(model, self._bow, self._dictionary)
pyLDAvis.save_html(vis, html_filename)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment