Commit 548fecf1 authored by Rob Tomsick's avatar Rob Tomsick

Improve sorting

parent ade953a6
......@@ -224,32 +224,7 @@ implements DictionaryService
final List<List<String>> hashes =
partitionByLength(hashFuzz(query, HASH_DISTANCE, MIN_FUZZY_HASH_LEN));
// /* first pass for exact matches for non-wildcard hashes */
// for (List<String> hashGroup : hashes)
// {
// if (ids.size() >= limit)
// {
// break;
// }
// Condition cond = DSL.falseCondition();
// for (String hash : hashGroup)
// {
// if (! hash.contains("_"))
// {
// cond = cond.or(hashField.eq(hash));
// }
// }
// ids.addAll(this.ctx
// .select(field(name("id"), UUID.class))
// .from(D_TABLE)
// .where(cond)
// .fetch(field(name("id"), UUID.class)));
// }
for (List<String> hashGroup : hashes)
for (final List<String> hashGroup : hashes)
{
if (ids.size() >= limit)
{
......@@ -287,16 +262,29 @@ implements DictionaryService
List<NDCProduct> products = this.loadProducts(new ArrayList<>(ids));
final String ucQuery = query.toUpperCase();
products.sort(comparatorFor(query, p -> {
/* prop name and NPNs - use whatever's closest to query */
List<String> names = new ArrayList<>(p.nonProprietaryNames());
names.add(p.name());
if (0.2d < StringUtils.getJaroWinklerDistance(ucQuery,
p.name().toUpperCase())
|| p.nonProprietaryNames().isEmpty())
{
return p.name().toUpperCase();
}
/* use npns if the product name is totally dissimilar to the product name */
final List<String> names =
p.nonProprietaryNames()
.stream()
.map(s -> s.toUpperCase())
.collect(Collectors.toList());
names.sort((a, b) ->
(int) ((StringUtils.getJaroWinklerDistance(query, b)
(int) ((StringUtils.getJaroWinklerDistance(ucQuery, b)
-
StringUtils.getJaroWinklerDistance(query, a)) * 1000d));
StringUtils.getJaroWinklerDistance(ucQuery, a)) * 1000d));
return names.get(0);
......@@ -416,33 +404,75 @@ implements DictionaryService
{
final String hash = PhoneticHash.hash(reference);
return (a, b) ->
{
/*
* TODO?
*
* We get JW distance, and use that to adjust the weight of
* the lev distance between the hashes. The idea is that
* the distance in hashes become more important for strings
* that are further from reference.
*/
return (a, b) ->
{
final String ah = PhoneticHash.hash(accessor.apply(a));
final String bh = PhoneticHash.hash(accessor.apply(b));
double jwa = StringUtils.getJaroWinklerDistance(reference, accessor.apply(a));
double jwb = StringUtils.getJaroWinklerDistance(reference, accessor.apply(b));
double leva = StringUtils.getJaroWinklerDistance(hash, ah);
double levb = StringUtils.getJaroWinklerDistance(hash, bh);
double ascore = jwa;
double bscore = jwb;
return (int) (bscore * 1000d) - (int) (ascore * 1000d);
return (StringUtils.getLevenshteinDistance(reference.toUpperCase(), accessor.apply(a).toUpperCase())
+
StringUtils.getLevenshteinDistance(hash, ah))
-
(StringUtils.getLevenshteinDistance(reference.toUpperCase(), accessor.apply(b).toUpperCase())
+
StringUtils.getLevenshteinDistance(hash, bh));
};
// return (a, b) ->
// {
// /*
// * TODO?
// *
// * We get JW distance, and use that to adjust the weight of
// * the lev distance between the hashes. The idea is that
// * the distance in hashes become more important for strings
// * that are further from reference.
// */
// final String ah = PhoneticHash.hash(accessor.apply(a));
// final String bh = PhoneticHash.hash(accessor.apply(b));
// double jwa =
// StringUtils.getJaroWinklerDistance(reference.toUpperCase(),
// accessor.apply(a).toUpperCase());
// double jwb =
// StringUtils.getJaroWinklerDistance(reference.toUpperCase(),
// accessor.apply(b).toUpperCase());
// double hsa = StringUtils.getJaroWinklerDistance(hash, ah);
// double hsb = StringUtils.getJaroWinklerDistance(hash, bh);
// double ascore;
// double bscore;
// if (0.25d > jwa)
// {
// /* use hash as primary score */
// ascore = hsa * (1.0d - jwa) + jwa;
// }
// else
// {
// /* use normal dist as primary */
// ascore = jwa * (1.0d - hsa) + hsa;
// }
// if (0.25d > jwb)
// {
// /* use hash as primary score */
// bscore = hsb * (1.0d - jwb) + jwb;
// }
// else
// {
// /* use normal dist as primary */
// bscore = jwb * (1.0d - hsb) + hsb;
// }
// return (int) (bscore * 1000d) - (int) (ascore * 1000d);
// };
}
private static final Stream<String>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment