Commit 7e3bcd48 authored by Rob Tomsick's avatar Rob Tomsick

Bound fuzzy hash len

parent bd6ae6e6
......@@ -76,7 +76,15 @@ public class NDCDictionaryService
implements DictionaryService
{
private static final int RESULT_LIMIT = 1000;
/**
* Distance that we will fuzz hashes.
*/
private static final int HASH_DISTANCE = 3;
/**
* Minimum length of hashes generated as part of our fuzzing process.
*/
private static final int MIN_FUZZY_HASH_LEN = 3;
private final DSLContext ctx;
private final Dictionary dictionary;
......@@ -172,20 +180,12 @@ implements DictionaryService
/* fuzzy hash search */
final Iterator<List<String>> hi =
partitionByLength(hashFuzz(query, HASH_DISTANCE)).iterator();
partitionByLength(hashFuzz(query, HASH_DISTANCE, MIN_FUZZY_HASH_LEN)).iterator();
while (drugs.size() < limit && hi.hasNext())
{
List<String> hashes = hi.next();
/* let's ignore anything < 3 chars since hashing gets useless at
* that point
*/
if (hashes.get(0).length() < 3)
{
break;
}
this.ctx
.select(nf)
.from(N_TABLE)
......@@ -365,23 +365,23 @@ implements DictionaryService
}
private static final Stream<String>
hashFuzz(final String str, final int distance)
hashFuzz(final String str, final int distance, final int minHashLen)
{
final char[] fh = PhoneticHash.hash(str).toCharArray();
return Stream.concat(Stream.of(fh), permute(fh, distance))
return Stream.concat(Stream.of(fh), permute(fh, distance, minHashLen))
.map(String :: valueOf)
.distinct();
}
private static final Stream<char[]>
permute(char[] input, int distance)
permute(char[] input, int distance, int minLen)
{
if (input.length < distance)
if (input.length < distance || input.length < minLen)
{
return Stream.empty();
}
else if (distance < 1)
else if (distance < 1 || input.length == minLen)
{
return Stream.of(input);
}
......@@ -389,7 +389,7 @@ implements DictionaryService
return IntStream.range(0, input.length)
.mapToObj(i -> ArrayUtils.remove(input, i))
.flatMap(h -> Stream.concat(Stream.of(input),
permute(h, distance - 1)));
permute(h, distance - 1, minLen)));
}
private static final List<List<String>>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment