Commit 91e6aba2 authored by Kathryn Elliott's avatar Kathryn Elliott

Add the sha256 hash to the meta file.

parent a5ef3072
......@@ -9,6 +9,7 @@ import csv
import time
import json
import pandas
import hashlib
import spacy
from spacy.tokens import Doc
......@@ -74,7 +75,8 @@ class TopicModel:
self._persistance = TopicPersistance(base_dir = "/tmp/LDA/{0}".format(unique_run_id))
self._persistance.save_params(dict(input_filename=filename, min_topic_count=min_topic_count, max_topic_count=max_topic_count,
self._persistance.save_params(dict(input_filename=filename, input_file_sha256=hash_input_data(self._json)
min_topic_count=min_topic_count, max_topic_count=max_topic_count,
topic_step=topic_step, tfidf=tfidf, trigrams=trigrams, pos_tags=pos_tags,
timestamp=current_time.isoformat(), git=self.repo_info()))
......@@ -343,6 +345,12 @@ class TopicModel:
return subprocess.check_output(["git", "describe", "--always"]).decode("utf-8").strip()
def hash_input_data(self, data):
logging.info("Hashing the input file.")
return hashlib.sha224(data.encode("utf-8")).hexdigest()
# Simple class to hide the details of saving data to the filesystem.
class TopicPersistance:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment