Commit eb2f7808 authored by Kathryn Elliott's avatar Kathryn Elliott

Save visualisations & topics documents to the filesystem.

parent 1dc1b02b
......@@ -18,6 +18,10 @@ from gensim.models.coherencemodel import CoherenceModel
from nltk.corpus import stopwords # stopwords library from nltk
import pyLDAvis
import pyLDAvis.gensim
import matplotlib.pyplot as plt
from pprint import pformat
import logging
......@@ -189,6 +193,40 @@ class TopicModel:
return((max_n, max_model))
def export_topics_per_documents(self, topic_id):
fields = ["topic_id", "document_id", "probability"]
model = self._models[topic_id]
path = self._persistance.csv_file_path(topic_id)
self._logging.info("Exporting documents for topic {0} to: {1}".format(topic_id, path))
with open(path, 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fields, delimiter="\t")
writer.writeheader()
for document_id in range(self._corpus_size):
topic_probability_pairs = model.get_document_topics(self._bow[document_id])
for (topic_id, probability) in topic_probability_pairs:
writer.writerow({"topic_id": topic_id, "document_id": document_id, "probability": probability})
def export_all_topics_per_documents(self):
self._logging.info("Exporting documents for all topics.")
for topic_id, max_topic in self._topics_generator():
self.export_topics_per_documents(topic_id)
def print_topics_per_documents(self, topic_id):
model = self._models[topic_id]
for n in range(self._corpus_size):
print(model.get_document_topics(self._bow[n]))
print()
def print_topics(self, topic_id, topn=10, compact=True):
""" Print the top terms for each topic.
"""
......@@ -206,6 +244,21 @@ class TopicModel:
logging.info("")
def export_all_topic_visualisation_data(self):
self._logging.info("Exporting topic visualisations for all topics.")
for topic_id, max_topic in self._topics_generator():
self.export_topic_visualisation_data(topic_id)
def export_topic_visualisation_data(self, topic_id):
model = self._models[topic_id]
html_filename = TOPICS_VISUALISATION_PREFIX + "." + str(topic_id) + ".html"
vis = pyLDAvis.gensim.prepare(model, self._bow, self._dictionary)
self._persistance.save_visualisation(vis, topic_id)
# Simple class to hide the details of saving data to the filesystem.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment