Commit 1bd710c0 authored by Derek Rodriguez's avatar Derek Rodriguez
Browse files

finished branch feature

parent 856e886d
......@@ -55,23 +55,18 @@ async def topic_search(
:returns: List of topically similar papers
::todo allow handling of unknown IDs
"""
# TODO: move this if-else to the search-runtime
if query_id in search_runtime.meta_df["id"].tolist():
reply_json = {"results": []} # type: Dict[str, List[Dict[str, str]]]
for paper in search_runtime.topic_mat_search(query_id, top_n=10).itertuples():
reply_json["results"].append(
{
"title": paper.title,
"authors": paper.authors.tolist(),
"categories": paper.categories.tolist(),
"abstract": paper.abstract,
"url": "https://arxiv.org/abs/" + paper.id,
}
)
return reply_json
else:
return {"results": [{"error": "unclassified"}]}
reply_json = {"results": []} # type: Dict[str, List[Dict[str, str]]]
for paper in search_runtime.topic_mat_search(query_id, top_n=10).itertuples():
reply_json["results"].append(
{
"title": paper.title,
"authors": paper.authors.tolist(),
"categories": paper.categories.tolist(),
"abstract": paper.abstract,
"url": "https://arxiv.org/abs/" + paper.id,
}
)
return reply_json
@backend.get("/search/tfidf/{query_id}")
......
......@@ -23,10 +23,10 @@ from typing import Union, List, Dict, Any
######################################
META_DF_FILENAME = "../arXiv_dataset.parquet"
TOPIC_MAT_FILENAME = "../chris_test_doc_topic_matrix2020-08-22T22-12-33.npy"
TERM_MAT_FILENAME = "../term_mats/chris_test_term_mat_2020-08-22T22-12-33.npz"
MODEL_PICKLE_FILENAME = "../pickles/chris_test_nmf_2020-08-22T22-12-33"
VECTORIZER_FILE_NAME = "../pickles/chris_test_tfidf_vec_2020-08-22T22-12-33"
TOPIC_MAT_FILENAME = "../semiprod_doc_topic_matrix2020-08-21T02-12-21.npy"
TERM_MAT_FILENAME = "../term_mats/semiprod_term_mat_2020-08-21T02-12-21.npz"
MODEL_PICKLE_FILENAME = "../pickles/semiprod_lda_2020-08-21T02-12-21"
VECTORIZER_FILE_NAME = "../pickles/semiprod_tfidf_vec_2020-08-21T02-12-21"
# This is the classifier object used for inferring new papers
with open(MODEL_PICKLE_FILENAME, "rb") as fh:
......@@ -124,7 +124,7 @@ def _add_new_metadata_to_global_state(query_id: str):
elif isinstance(topic_model, LDA):
gensim_doc_wrapper = Sparse2Corpus(term_vector, documents_columns=False)
topic_vector = np.array([x[1] for x in topic_model[gensim_doc_wrapper[0]]])
topic_model.update(gensim_doc_wrapper) # yay, online learning!
# topic_model.update(gensim_doc_wrapper) # yay, online learning!
doc_topic_mat = np.vstack((doc_topic_mat, topic_vector))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment