Commit 0658d99f authored by Rob Tomsick's avatar Rob Tomsick

Implement barcode support, start phonetic hashing

* Implement barcode support (via NDC->UPN conversion).
* Add support for generating phonetic hashes (no search yet)
parent fb114c81
......@@ -114,6 +114,12 @@
<version>1.5</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.11</version>
</dependency>
<dependency>
<groupId>org.jooq</groupId>
<artifactId>jooq</artifactId>
......
......@@ -39,6 +39,7 @@ import static org.jooq.impl.DSL.field;
import static org.jooq.impl.DSL.name;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
......@@ -47,6 +48,7 @@ import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.ArrayUtils;
import org.jooq.DSLContext;
......@@ -60,6 +62,7 @@ import edu.unc.cscc.crxrest.model.NDC;
import edu.unc.cscc.crxrest.model.TradeName;
import edu.unc.cscc.crxrest.model.UnitOfMeasure;
import edu.unc.cscc.crxrest.model.barcode.Barcode;
import edu.unc.cscc.crxrest.model.barcode.UPCA;
import edu.unc.cscc.crxrest.model.product.Product;
import edu.unc.cscc.crxrest.model.product.ProductComponent;
......@@ -245,7 +248,29 @@ implements DictionaryService
public List<Product>
findByBarcode(Barcode<?> barcode, int limit)
{
return Collections.emptyList();
if (! (barcode instanceof UPCA))
{
return Collections.emptyList();
}
final UPCA upc = (UPCA) barcode;
final List<NDC> ndcs =
Stream.of(NDC.possibleNDCs(upc))
/* strip packaging code */
.map(ndc -> new NDC(ndc.labelerCode(), ndc.productCode()))
.collect(Collectors.toList());
List<UUID> ids =
this.ctx.select(field(name("id"), UUID.class))
.from(D_TABLE)
.where(field(name("ndc"), String.class)
.in(ndcs.stream()
.map(String :: valueOf)
.collect(Collectors.toList())))
.fetch(field(name("id"), UUID.class));
return this.loadProducts(ids);
}
private final List<Product>
......
......@@ -140,6 +140,8 @@ implements DBDictionaryProvider
e.getDosageFormName())
.set(field(name("proprietary_name"), String.class),
e.getProprietaryName())
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(e.getProprietaryName()))
.execute();
DSL.using(config)
......@@ -148,7 +150,9 @@ implements DBDictionaryProvider
.map(name ->
DSL.using(config).insertInto(N_TABLE)
.set(field(name("id__entries"), UUID.class), id)
.set(field(name("name"), String.class), name))
.set(field(name("name"), String.class), name)
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(name)))
.collect(Collectors.toList()))
.execute();
......@@ -158,7 +162,9 @@ implements DBDictionaryProvider
.map(name ->
DSL.using(config).insertInto(S_TABLE)
.set(field(name("id__entries"), UUID.class), id)
.set(field(name("name"), String.class), name))
.set(field(name("name"), String.class), name)
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(name)))
.collect(Collectors.toList()))
.execute();
}
......
/*-
* ========================LICENSE_START=================================
* ndc-dictionary
* %%
* Copyright (C) 2017 - 2018 CSCC - University of North Carolina
* %%
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the CSCC - University of North Carolina nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* =========================LICENSE_END==================================
*/
package edu.unc.cscc.crxrest.providers.ndc;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.lang3.StringUtils;
class PhoneticHash
{
/**
* Hash the given input string using a phonetic hashing algorithm.
* Extracted here so as to allow for experimentation with various hash
* algorithms.
*
* @param input input string to hash, not <code>null</code>
* @return phonetic hash, not <code>null</code>, may be empty depending
* on input and implementation
*/
public static final String
hash(String input)
{
if (StringUtils.isEmpty(input) || StringUtils.isWhitespace(input))
{
return "";
}
Metaphone mp = new Metaphone();
return mp.metaphone(input);
}
}
......@@ -3,30 +3,37 @@ DROP TABLE IF EXISTS "substances";
DROP TABLE IF EXISTS "entries";
CREATE TABLE "entries" (
"id" UUID PRIMARY KEY NOT NULL,
"id" UUID PRIMARY KEY NOT NULL,
"product_id" VARCHAR(256) NOT NULL,
"ndc" VARCHAR(14) NOT NULL,
"ndc" VARCHAR(14) NOT NULL,
"dosage_form" VARCHAR(256) NOT NULL,
"proprietary_name" VARCHAR(512) NOT NULL
"proprietary_name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL
);
CREATE INDEX IF NOT EXISTS "entries_n_idx" ON "entries" ("ndc");
CREATE INDEX IF NOT EXISTS "entries_d_idx" ON "entries" ("dosage_form");
CREATE INDEX IF NOT EXISTS "entries_pn_idx" ON "entries" ("proprietary_name");
CREATE INDEX IF NOT EXISTS "entries_pn_hash" ON "entries" ("phonetic_hash");
CREATE TABLE "non_proprietary_names" (
"id__entries" UUID NOT NULL,
"name" VARCHAR(512) NOT NULL,
"name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL,
FOREIGN KEY ("id__entries") REFERENCES "entries" ("id")
);
CREATE INDEX IF NOT EXISTS "npn_id" ON "non_proprietary_names" ("id__entries");
CREATE INDEX IF NOT EXISTS "npn_name" ON "non_proprietary_names" ("name");
CREATE INDEX IF NOT EXISTS "npn_pn_hash" ON "non_proprietary_names" ("phonetic_hash");
CREATE TABLE "substances" (
"id__entries" UUID NOT NULL,
"name" VARCHAR(512) NOT NULL,
"name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL,
FOREIGN KEY ("id__entries") REFERENCES "entries" ("id")
);
CREATE INDEX IF NOT EXISTS "substance_entry_id_fkey_idx" ON "substances" ("id__entries");
CREATE INDEX IF NOT EXISTS "substance_name_idx" ON "substances" ("name");
CREATE INDEX IF NOT EXISTS "substance_pn_hash" ON "substances" ("phonetic_hash");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment