Commit dbe7bcd0 authored by Rob Tomsick's avatar Rob Tomsick

Bound min query length, add base cases for scoring + shingle sizing

parent 9c0b5b7a
......@@ -40,7 +40,6 @@ import static org.jooq.impl.DSL.name;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
......@@ -48,7 +47,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
......@@ -74,6 +72,8 @@ import edu.unc.cscc.crxrest.model.product.ProductComponent;
public class NDCDictionaryService
implements DictionaryService
{
public static final int MIN_QUERY_LEN = 2;
private static final int RESULT_LIMIT = 1000;
/**
......@@ -113,6 +113,11 @@ implements DictionaryService
findTradeNames(final String query, int limit)
{
limit = limit > 0 ? limit : this.resultLimit();
if (query.length() < MIN_QUERY_LEN)
{
return Collections.emptyList();
}
/* weirdness warning: we limit twice. Once in the query, then one
* again in the result processing. This is because trade names -> drug
......@@ -165,6 +170,11 @@ implements DictionaryService
{
limit = limit > 0 ? limit : this.resultLimit();
if (query.length() < MIN_QUERY_LEN)
{
return Collections.emptyList();
}
final Field<String> nf = field(name("name"), String.class);
final Set<Drug> drugs = this.ctx
......@@ -213,6 +223,11 @@ implements DictionaryService
{
limit = limit > 0 ? limit : this.resultLimit();
if (query.length() < MIN_QUERY_LEN)
{
return Collections.emptyList();
}
final LinkedHashSet<UUID> ids = new LinkedHashSet<>();
final Field<String> hashField = field(name("phonetic_hash"), String.class);
......@@ -467,10 +482,17 @@ implements DictionaryService
private static final double
score(String a, String b)
{
if (a.length() == 0 || b.length() == 0)
{
return 0.0d;
}
int shingleSize = Math.min(a.length(), b.length());
shingleSize = Math.min(shingleSize, 3);
/* TODO a.length < n-gram size */
List<String> ang = shingle(a, 2);
List<String> bng = shingle(b, 2);
List<String> ang = shingle(a, 3);
List<String> bng = shingle(b, 3);
/* jaccard is size of intersection / size of union */
Set<String> intersection = new HashSet<>(ang);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment