Route NPM Data Based on Source to Specific Database Tables in License DB
Problem
Our License DB Processor does not currently distinguish between different data sources for NPM data, treating all incoming NPM data the same regardless of its origin.
Solution
Implement enhancements in the License DB Processor to route NPM data based on its source. Specifically, NPM data identified as coming from deps.dev
should be directed to newly designated tables prefixed with deps_dev_
, while the current data source (CouchDB) should continue to be handled by the existing tables.
Implementation Plan
-
Update the License
struct in the data layer to include aSource
field to identify data origins. -
Adjust the database interaction functions in the Database
module to dynamically select target tables based on theSource
field, using a prefix fordeps.dev
sourced data. -
Modify logging within the database functions to include the source of the data for better traceability and monitoring. -
Remove temporary exclusion introduced by Temporarily Exclude deps.dev Source from Proces... (#459070 - closed) • Philip Cunningham • 17.0 -
Create a new release -
Deploy on dev and prod
Outline
Here is a guide indicating the essential code changes that act as an entry point:
diff --git a/data/license.go b/data/license.go
index 8c92e3c..0797f1c 100644
--- a/data/license.go
+++ b/data/license.go
@@ -37,6 +37,7 @@ type License struct {
APIVersion string `json:"api_version"`
PackageRegistry string `json:"package_registry"`
Versions []*Version `json:"versions"`
+ Source string `json:"source"` // repo or deps.dev
}
// NewLicense creates a new license for the specified packageRegistry
diff --git a/store/database.go b/store/database.go
index 0680e62..c4042f3 100644
--- a/store/database.go
+++ b/store/database.go
@@ -285,12 +285,12 @@ func (d *Database) Insert(data ...*data.License) error {
registriesUpdated[packages.PackageRegistry] = struct{}{}
// Insert component names first
- if err := d.insertComponentNames(timeoutCtx, packages.Versions, packages.PackageRegistry); err != nil {
+ if err := d.insertComponentNames(timeoutCtx, packages.Versions, packages.PackageRegistry, packages.Source); err != nil {
return fmt.Errorf("failed to insert components for package %s: %s", packages.PackageRegistry, err)
}
// Then insert license versions so they can reference component names
- if err := d.insertLicenseVersions(timeoutCtx, packages.Versions, packages.PackageRegistry); err != nil {
+ if err := d.insertLicenseVersions(timeoutCtx, packages.Versions, packages.PackageRegistry, packages.Source); err != nil {
return fmt.Errorf("failed to insert versions for package %s: %s", packages.PackageRegistry, err)
}
}
@@ -323,7 +323,12 @@ func (d *Database) updateRegistryTimestamp(timeoutCtx context.Context, registrie
return nil
}
-func (d *Database) insertComponentNames(timeoutCtx context.Context, versions []*data.Version, packageRegistryName string) error {
+func (d *Database) insertComponentNames(timeoutCtx context.Context, versions []*data.Version, packageRegistryName string, source string) error {
+ if source == "deps.dev" {
+ // Add prefix so e.g. npm becomes deps_dev_npm
+ packageRegistryName = fmt.Sprintf("deps_dev_%s", packageRegistryName)
+ }
+
tempTableName := fmt.Sprintf(TempComponentTableNameFmt, packageRegistryName)
tempTableSchema := fmt.Sprintf(TempComponentSchemaFmt, tempTableName)
copyFromTempToComponent := fmt.Sprintf(CopyFromTempToComponentFmt, packageRegistryName, packageRegistryName)
@@ -378,7 +383,12 @@ func (d *Database) insertComponentNames(timeoutCtx context.Context, versions []*
return nil
}
-func (d *Database) insertLicenseVersions(timeoutCtx context.Context, versions []*data.Version, packageRegistryName string) error {
+func (d *Database) insertLicenseVersions(timeoutCtx context.Context, versions []*data.Version, packageRegistryName string, source string) error {
+ if source == "deps.dev" {
+ // Add prefix so e.g. npm becomes deps_dev_npm
+ packageRegistryName = fmt.Sprintf("deps_dev_%s", packageRegistryName)
+ }
+
tempTableName := fmt.Sprintf(TempPackageVersionTableNameFmt, packageRegistryName)
tempTableSchema := fmt.Sprintf(TempPackageVersionsSchemaFmt, tempTableName)
copyFromTempToLicense := fmt.Sprintf(CopyFrom
TempToLicenseFmt, packageRegistryName, packageRegistryName, packageRegistryName, packageRegistryName)
@@ -411,6 +421,7 @@ func (d *Database) insertLicenseVersions(timeoutCtx context.Context, versions []
if d.isUnknownLicense(licenseID, licenseName) {
d.logger.Warn().Str("package_registry", packageRegistryName).
Str("package", version.PackageName).
+ Str("source", source).
Str("version", version.Version).
Str("license", licenseName).
Msg("unknown license detected")
Edited by Nick Ilieskou