Commit eea9987b authored by Benjamin Winger's avatar Benjamin Winger

migrate: ignore duplicates when fuzzy matching and properly return results

parent f5f6493d
......@@ -297,10 +297,13 @@ def find_fuzzy_mod(path):
"""
name = os.path.basename(path).rstrip('"')
modq = PriorityQueue()
seen = set()
for mod in load_all():
match = fuzz.token_set_ratio(name, mod.NAME)
# PriorityQueues work on lowest items, so reverse the value
modq.put(PrioritizedItem(100 - match, mod))
if mod.ATOM.CM not in seen:
modq.put(PrioritizedItem(100 - match, mod))
seen.add(mod.ATOM.CM)
# If any matches are greater than 90%, return them
results = PriorityQueue()
......@@ -315,18 +318,20 @@ def find_fuzzy_mod(path):
# Otherwise, return best 5 that are greater than 50%
# to avoid overwhelming user with weak matches
if results.empty() == 0:
threshold = 50
if results.empty():
i = 5
if nextentry.priority <= 50:
if nextentry.priority <= threshold:
results.put(nextentry)
nextentry = modq.get()
i -= 1
while i > 0 and nextentry.priority <= 50 and not modq.empty():
while i > 0 and nextentry.priority <= threshold and not modq.empty():
results.put(nextentry)
nextentry = modq.get()
i -= 1
if i > 0 and modq.empty():
if i > 0 and nextentry.priority <= threshold and modq.empty():
results.put(nextentry)
return results
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment