Commit 0658d99f authored by Rob Tomsick's avatar Rob Tomsick

Implement barcode support, start phonetic hashing

* Implement barcode support (via NDC->UPN conversion).
* Add support for generating phonetic hashes (no search yet)
parent fb114c81
...@@ -114,6 +114,12 @@ ...@@ -114,6 +114,12 @@
<version>1.5</version> <version>1.5</version>
</dependency> </dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.11</version>
</dependency>
<dependency> <dependency>
<groupId>org.jooq</groupId> <groupId>org.jooq</groupId>
<artifactId>jooq</artifactId> <artifactId>jooq</artifactId>
......
...@@ -39,6 +39,7 @@ import static org.jooq.impl.DSL.field; ...@@ -39,6 +39,7 @@ import static org.jooq.impl.DSL.field;
import static org.jooq.impl.DSL.name; import static org.jooq.impl.DSL.name;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
...@@ -47,6 +48,7 @@ import java.util.Map; ...@@ -47,6 +48,7 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.UUID; import java.util.UUID;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.ArrayUtils;
import org.jooq.DSLContext; import org.jooq.DSLContext;
...@@ -60,6 +62,7 @@ import edu.unc.cscc.crxrest.model.NDC; ...@@ -60,6 +62,7 @@ import edu.unc.cscc.crxrest.model.NDC;
import edu.unc.cscc.crxrest.model.TradeName; import edu.unc.cscc.crxrest.model.TradeName;
import edu.unc.cscc.crxrest.model.UnitOfMeasure; import edu.unc.cscc.crxrest.model.UnitOfMeasure;
import edu.unc.cscc.crxrest.model.barcode.Barcode; import edu.unc.cscc.crxrest.model.barcode.Barcode;
import edu.unc.cscc.crxrest.model.barcode.UPCA;
import edu.unc.cscc.crxrest.model.product.Product; import edu.unc.cscc.crxrest.model.product.Product;
import edu.unc.cscc.crxrest.model.product.ProductComponent; import edu.unc.cscc.crxrest.model.product.ProductComponent;
...@@ -245,7 +248,29 @@ implements DictionaryService ...@@ -245,7 +248,29 @@ implements DictionaryService
public List<Product> public List<Product>
findByBarcode(Barcode<?> barcode, int limit) findByBarcode(Barcode<?> barcode, int limit)
{ {
return Collections.emptyList(); if (! (barcode instanceof UPCA))
{
return Collections.emptyList();
}
final UPCA upc = (UPCA) barcode;
final List<NDC> ndcs =
Stream.of(NDC.possibleNDCs(upc))
/* strip packaging code */
.map(ndc -> new NDC(ndc.labelerCode(), ndc.productCode()))
.collect(Collectors.toList());
List<UUID> ids =
this.ctx.select(field(name("id"), UUID.class))
.from(D_TABLE)
.where(field(name("ndc"), String.class)
.in(ndcs.stream()
.map(String :: valueOf)
.collect(Collectors.toList())))
.fetch(field(name("id"), UUID.class));
return this.loadProducts(ids);
} }
private final List<Product> private final List<Product>
......
...@@ -140,6 +140,8 @@ implements DBDictionaryProvider ...@@ -140,6 +140,8 @@ implements DBDictionaryProvider
e.getDosageFormName()) e.getDosageFormName())
.set(field(name("proprietary_name"), String.class), .set(field(name("proprietary_name"), String.class),
e.getProprietaryName()) e.getProprietaryName())
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(e.getProprietaryName()))
.execute(); .execute();
DSL.using(config) DSL.using(config)
...@@ -148,7 +150,9 @@ implements DBDictionaryProvider ...@@ -148,7 +150,9 @@ implements DBDictionaryProvider
.map(name -> .map(name ->
DSL.using(config).insertInto(N_TABLE) DSL.using(config).insertInto(N_TABLE)
.set(field(name("id__entries"), UUID.class), id) .set(field(name("id__entries"), UUID.class), id)
.set(field(name("name"), String.class), name)) .set(field(name("name"), String.class), name)
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(name)))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.execute(); .execute();
...@@ -158,7 +162,9 @@ implements DBDictionaryProvider ...@@ -158,7 +162,9 @@ implements DBDictionaryProvider
.map(name -> .map(name ->
DSL.using(config).insertInto(S_TABLE) DSL.using(config).insertInto(S_TABLE)
.set(field(name("id__entries"), UUID.class), id) .set(field(name("id__entries"), UUID.class), id)
.set(field(name("name"), String.class), name)) .set(field(name("name"), String.class), name)
.set(field(name("phonetic_hash"), String.class),
PhoneticHash.hash(name)))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.execute(); .execute();
} }
......
/*-
* ========================LICENSE_START=================================
* ndc-dictionary
* %%
* Copyright (C) 2017 - 2018 CSCC - University of North Carolina
* %%
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the CSCC - University of North Carolina nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* =========================LICENSE_END==================================
*/
package edu.unc.cscc.crxrest.providers.ndc;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.lang3.StringUtils;
class PhoneticHash
{
/**
* Hash the given input string using a phonetic hashing algorithm.
* Extracted here so as to allow for experimentation with various hash
* algorithms.
*
* @param input input string to hash, not <code>null</code>
* @return phonetic hash, not <code>null</code>, may be empty depending
* on input and implementation
*/
public static final String
hash(String input)
{
if (StringUtils.isEmpty(input) || StringUtils.isWhitespace(input))
{
return "";
}
Metaphone mp = new Metaphone();
return mp.metaphone(input);
}
}
...@@ -3,30 +3,37 @@ DROP TABLE IF EXISTS "substances"; ...@@ -3,30 +3,37 @@ DROP TABLE IF EXISTS "substances";
DROP TABLE IF EXISTS "entries"; DROP TABLE IF EXISTS "entries";
CREATE TABLE "entries" ( CREATE TABLE "entries" (
"id" UUID PRIMARY KEY NOT NULL, "id" UUID PRIMARY KEY NOT NULL,
"product_id" VARCHAR(256) NOT NULL, "product_id" VARCHAR(256) NOT NULL,
"ndc" VARCHAR(14) NOT NULL, "ndc" VARCHAR(14) NOT NULL,
"dosage_form" VARCHAR(256) NOT NULL, "dosage_form" VARCHAR(256) NOT NULL,
"proprietary_name" VARCHAR(512) NOT NULL "proprietary_name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL
); );
CREATE INDEX IF NOT EXISTS "entries_n_idx" ON "entries" ("ndc"); CREATE INDEX IF NOT EXISTS "entries_n_idx" ON "entries" ("ndc");
CREATE INDEX IF NOT EXISTS "entries_d_idx" ON "entries" ("dosage_form"); CREATE INDEX IF NOT EXISTS "entries_d_idx" ON "entries" ("dosage_form");
CREATE INDEX IF NOT EXISTS "entries_pn_idx" ON "entries" ("proprietary_name"); CREATE INDEX IF NOT EXISTS "entries_pn_idx" ON "entries" ("proprietary_name");
CREATE INDEX IF NOT EXISTS "entries_pn_hash" ON "entries" ("phonetic_hash");
CREATE TABLE "non_proprietary_names" ( CREATE TABLE "non_proprietary_names" (
"id__entries" UUID NOT NULL, "id__entries" UUID NOT NULL,
"name" VARCHAR(512) NOT NULL, "name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL,
FOREIGN KEY ("id__entries") REFERENCES "entries" ("id") FOREIGN KEY ("id__entries") REFERENCES "entries" ("id")
); );
CREATE INDEX IF NOT EXISTS "npn_id" ON "non_proprietary_names" ("id__entries"); CREATE INDEX IF NOT EXISTS "npn_id" ON "non_proprietary_names" ("id__entries");
CREATE INDEX IF NOT EXISTS "npn_name" ON "non_proprietary_names" ("name"); CREATE INDEX IF NOT EXISTS "npn_name" ON "non_proprietary_names" ("name");
CREATE INDEX IF NOT EXISTS "npn_pn_hash" ON "non_proprietary_names" ("phonetic_hash");
CREATE TABLE "substances" ( CREATE TABLE "substances" (
"id__entries" UUID NOT NULL, "id__entries" UUID NOT NULL,
"name" VARCHAR(512) NOT NULL, "name" VARCHAR(512) NOT NULL,
"phonetic_hash" VARCHAR(32) NOT NULL,
FOREIGN KEY ("id__entries") REFERENCES "entries" ("id") FOREIGN KEY ("id__entries") REFERENCES "entries" ("id")
); );
CREATE INDEX IF NOT EXISTS "substance_entry_id_fkey_idx" ON "substances" ("id__entries"); CREATE INDEX IF NOT EXISTS "substance_entry_id_fkey_idx" ON "substances" ("id__entries");
CREATE INDEX IF NOT EXISTS "substance_name_idx" ON "substances" ("name"); CREATE INDEX IF NOT EXISTS "substance_name_idx" ON "substances" ("name");
CREATE INDEX IF NOT EXISTS "substance_pn_hash" ON "substances" ("phonetic_hash");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment