Commit 4016c00b authored by Kathryn Elliott's avatar Kathryn Elliott

Add the run id to the csv files.

parent 1fe9a228
......@@ -63,14 +63,14 @@ class TopicModel:
log_to_console=True, log_level=logging.DEBUG, bigrams=False, trigrams=False):
current_time = datetime.now()
unique_run_id = str(current_time.strftime("%s.%f"))
self._unique_run_id = str(current_time.strftime("%s.%f"))
self._persistance = TopicPersistance(base_dir = "/tmp/LDA/{0}".format(unique_run_id))
self._persistance = TopicPersistance(base_dir = "/tmp/LDA/{0}".format(self._unique_run_id))
if log_to_console:
logging.basicConfig(format=LOG_FORMAT, level=LOG_LEVEL)
else:
log_file = "/tmp/LDA/{0}/topic-model.log".format(unique_run_id)
log_file = "/tmp/LDA/{0}/topic-model.log".format(self._unique_run_id)
print("Saving log file to: " + log_file)
logging.basicConfig(format=LOG_FORMAT, level=LOG_LEVEL, filename=log_file)
......@@ -300,13 +300,13 @@ class TopicModel:
self._logging.info("Topic coherence: {0}".format(self._coherence_models[topic_number].get_coherence()))
def export_topics_per_documents(self, topic_id):
fields = ["topic_id", "document_id", "probability"]
def export_topics_per_documents(self, model_id):
fields = ["run", "model_id", "topic_id", "document_id", "probability"]
model = self._models[topic_id]
model = self._models[model_id]
path = self._persistance.csv_file_path(topic_id)
self._logging.info("Exporting documents for topic {0} to: {1}".format(topic_id, path))
path = self._persistance.csv_file_path(model_id)
self._logging.info("Exporting documents for topic {0} to: {1}".format(model_id, path))
with open(str(path), 'w', newline='') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fields, delimiter="\t")
......@@ -316,7 +316,7 @@ class TopicModel:
topic_probability_pairs = model.get_document_topics(self._bow[document_id])
for (topic_id, probability) in topic_probability_pairs:
writer.writerow({"topic_id": topic_id, "document_id": document_id, "probability": probability})
writer.writerow({"run": self._unique_run_id, "model_id": model_id, "topic_id": topic_id, "document_id": document_id, "probability": probability})
def export_all_topics_per_documents(self):
......@@ -327,7 +327,7 @@ class TopicModel:
def export_document_id_map(self):
fields = ["document_id", "document_name"]
fields = ["run", "document_id", "document_name"]
path = self._persistance.document_map_csv_file_path()
self._logging.info("Exporting document to document_id map")
......@@ -337,7 +337,7 @@ class TopicModel:
writer.writeheader()
for (k, v) in self._document_docid_map:
writer.writerow({"document_id": k, "document_name": v})
writer.writerow({"run": self._unique_run_id, "document_id": k, "document_name": v})
def print_topics_per_documents(self, topic_id):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment