Skip to content

New query for group-level dependency list page sorting by 'license'

What does this MR do and why?

This is the final sort being modified for the epic Group-level dependency list page performance im... (&12371)


This mirrors the work done in:

  • MR 154054 (added sorting based on highest_severity)
  • MR 154779 (added sorting based on component_name)
  • MR 154863 (added sorting based on package_manager)

Originally, the implementation plan of sorting by license called for adding a new column to sort on.

In the MR to add the column, a reviewer mentioned we could use the existing jsonb data for sorting.

This involves modifying the AggregationsFinder to be able to generate SQL to use jsonb to access the first spdx_identifier in the licenses column

Database

Diff of the raw sql

This is a diff between doing the sort with component_name (not a jsonb column) and licenses (a jsonb column)

diff --git a/home/butter/original_sql_on_component_name.sql b/home/butter/updated_code_on_licenses.sql
index 1bb00c078b48..76d59947aa39 100644
--- a/home/butter/original_sql_on_component_name.sql
+++ b/home/butter/updated_code_on_licenses.sql
@@ -16,7 +16,7 @@ WITH "namespaces" AS MATERIALIZED (
 )
 ))
 SELECT
-    "outer_occurrences"."component_name",
+    ("outer_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text,
     "outer_occurrences"."component_id",
     "outer_occurrences"."component_version_id",
     MIN(outer_occurrences.id)::bigint AS id,
@@ -26,7 +26,7 @@ SELECT
     SUM(counts.occurrence_count)::integer AS occurrence_count,
     SUM(counts.vulnerability_count)::integer AS vulnerability_count,
     SUM(counts.project_count)::integer AS project_count
-FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_name", "inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
+FROM ( SELECT DISTINCT ON (("inner_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text, "inner_occurrences"."component_id", "inner_occurrences"."component_version_id")
 "inner_occurrences"."id",
     "inner_occurrences"."created_at",
     "inner_occurrences"."updated_at",
@@ -49,7 +49,7 @@ FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_name", "inner_occurren
     "inner_occurrences"."ancestors"
 FROM
     namespaces,
-        LATERAL ( SELECT DISTINCT ON ("sbom_occurrences"."component_name", "sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
+    LATERAL ( SELECT DISTINCT ON (("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text,"sbom_occurrences"."component_id", "sbom_occurrences"."component_version_id")
     "sbom_occurrences"."id",
         "sbom_occurrences"."created_at",
         "sbom_occurrences"."updated_at",
@@ -75,14 +75,14 @@ FROM ( SELECT DISTINCT ON ("inner_occurrences"."component_name", "inner_occurren
     WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids::bigint[])
     AND "sbom_occurrences"."archived" = FALSE
 ORDER BY
-            "sbom_occurrences"."component_name" DESC,
+    ("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text DESC,
     "sbom_occurrences"."component_id" DESC,
     "sbom_occurrences"."component_version_id" DESC
 LIMIT 21) inner_occurrences
 ORDER BY
-    inner_occurrences.component_name DESC,
-    inner_occurrences.component_id DESC,
-    inner_occurrences.component_version_id DESC
+    ("inner_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text DESC,
+    "inner_occurrences"."component_id" DESC,
+    "inner_occurrences"."component_version_id" DESC
 LIMIT 21) outer_occurrences,
     LATERAL (
         SELECT
@@ -96,11 +96,11 @@ LIMIT 21) outer_occurrences,
         AND "sbom_occurrences"."archived" = FALSE
         AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
 GROUP BY
-    "outer_occurrences"."component_name",
+    ("outer_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text,
     "outer_occurrences"."component_id",
     "outer_occurrences"."component_version_id"
 ORDER BY
-    MIN(outer_occurrences.component_name) DESC,
-    MIN(outer_occurrences.component_id) DESC,
-    MIN(outer_occurrences.component_version_id) DESC
+    MIN(("outer_occurrences"."licenses" -> 0 ->> 'spdx_identifier')::text) DESC,
+    MIN("outer_occurrences"."component_id") DESC,
+    MIN("outer_occurrences"."component_version_id") DESC
 LIMIT 21

Ascending order

SQL
WITH "namespaces" AS MATERIALIZED (
    SELECT
        "namespaces"."traversal_ids"
    FROM
        "namespaces"
    WHERE
        "namespaces"."type" = 'Group'
        AND (
            traversal_ids >= (
                '{9970}'
)
)
        AND (
            traversal_ids < (
                '{9971}'
)
))
SELECT
    MIN(outer_occurrences.id)::bigint AS id,
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id",
    MIN(outer_occurrences.package_manager) AS package_manager,
    MIN(outer_occurrences.input_file_path) AS input_file_path,
    JSONB_AGG(outer_occurrences.licenses -> 0) AS licenses,
    SUM(counts.occurrence_count)::integer AS occurrence_count,
    SUM(counts.vulnerability_count)::integer AS vulnerability_count,
    SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON (("inner_occurrences"."licenses" -> 0 ->> 'spdx_identifier')
,
        "inner_occurrences"."component_id",
        "inner_occurrences"."component_version_id") "inner_occurrences"."id",
    "inner_occurrences"."created_at",
    "inner_occurrences"."updated_at",
    "inner_occurrences"."component_version_id",
    "inner_occurrences"."project_id",
    "inner_occurrences"."pipeline_id",
    "inner_occurrences"."source_id",
    "inner_occurrences"."commit_sha",
    "inner_occurrences"."component_id",
    "inner_occurrences"."uuid",
    "inner_occurrences"."package_manager",
    "inner_occurrences"."component_name",
    "inner_occurrences"."input_file_path",
    "inner_occurrences"."licenses",
    "inner_occurrences"."highest_severity",
    "inner_occurrences"."vulnerability_count",
    "inner_occurrences"."source_package_id",
    "inner_occurrences"."archived",
    "inner_occurrences"."traversal_ids",
    "inner_occurrences"."ancestors"
FROM
    namespaces,
    LATERAL ( SELECT DISTINCT ON (("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier')
,
            "sbom_occurrences"."component_id",
            "sbom_occurrences"."component_version_id") "sbom_occurrences"."id",
        "sbom_occurrences"."created_at",
        "sbom_occurrences"."updated_at",
        "sbom_occurrences"."component_version_id",
        "sbom_occurrences"."project_id",
        "sbom_occurrences"."pipeline_id",
        "sbom_occurrences"."source_id",
        "sbom_occurrences"."commit_sha",
        "sbom_occurrences"."component_id",
        "sbom_occurrences"."uuid",
        "sbom_occurrences"."package_manager",
        "sbom_occurrences"."component_name",
        "sbom_occurrences"."input_file_path",
        "sbom_occurrences"."licenses",
        "sbom_occurrences"."highest_severity",
        "sbom_occurrences"."vulnerability_count",
        "sbom_occurrences"."source_package_id",
        "sbom_occurrences"."archived",
        "sbom_occurrences"."traversal_ids",
        "sbom_occurrences"."ancestors"
    FROM
        "sbom_occurrences"
    WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids::bigint[])
    AND "sbom_occurrences"."archived" = FALSE
ORDER BY
    ("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier') ASC,
    "sbom_occurrences"."component_id" ASC,
    "sbom_occurrences"."component_version_id" ASC
LIMIT 21) inner_occurrences
ORDER BY
    (inner_occurrences."licenses" -> 0 ->> 'spdx_identifier') ASC,
    inner_occurrences.component_id ASC,
    inner_occurrences.component_version_id ASC
LIMIT 21) outer_occurrences,
    LATERAL (
        SELECT
            COUNT(project_id) AS occurrence_count,
            COUNT(DISTINCT project_id) project_count,
            SUM(vulnerability_count) vulnerability_count
        FROM
            "sbom_occurrences"
        WHERE (traversal_ids >= ('{9970}'))
        AND (traversal_ids < ('{9971}'))
        AND "sbom_occurrences"."archived" = FALSE
        AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
    ("outer_occurrences"."licenses" -> 0 ->> 'spdx_identifier'),
"outer_occurrences".component_id,
"outer_occurrences".component_version_id
ORDER BY
    MIN((outer_occurrences.licenses -> 0 ->> 'spdx_identifier')::text) ASC,
    MIN(outer_occurrences.component_id) ASC,
    MIN(outer_occurrences.component_version_id) ASC
LIMIT 21

Descending order

SQL
WITH "namespaces" AS MATERIALIZED (
    SELECT
        "namespaces"."traversal_ids"
    FROM
        "namespaces"
    WHERE
        "namespaces"."type" = 'Group'
        AND (
            traversal_ids >= (
                '{9970}'
)
)
        AND (
            traversal_ids < (
                '{9971}'
)
))
SELECT
    MIN(outer_occurrences.id)::bigint AS id,
    "outer_occurrences"."component_id",
    "outer_occurrences"."component_version_id",
    MIN(outer_occurrences.package_manager) AS package_manager,
    MIN(outer_occurrences.input_file_path) AS input_file_path,
    JSONB_AGG(outer_occurrences.licenses -> 0) AS licenses,
    SUM(counts.occurrence_count)::integer AS occurrence_count,
    SUM(counts.vulnerability_count)::integer AS vulnerability_count,
    SUM(counts.project_count)::integer AS project_count
FROM ( SELECT DISTINCT ON (("inner_occurrences"."licenses" -> 0 ->> 'spdx_identifier')
,
        "inner_occurrences"."component_id",
        "inner_occurrences"."component_version_id") "inner_occurrences"."id",
    "inner_occurrences"."created_at",
    "inner_occurrences"."updated_at",
    "inner_occurrences"."component_version_id",
    "inner_occurrences"."project_id",
    "inner_occurrences"."pipeline_id",
    "inner_occurrences"."source_id",
    "inner_occurrences"."commit_sha",
    "inner_occurrences"."component_id",
    "inner_occurrences"."uuid",
    "inner_occurrences"."package_manager",
    "inner_occurrences"."component_name",
    "inner_occurrences"."input_file_path",
    "inner_occurrences"."licenses",
    "inner_occurrences"."highest_severity",
    "inner_occurrences"."vulnerability_count",
    "inner_occurrences"."source_package_id",
    "inner_occurrences"."archived",
    "inner_occurrences"."traversal_ids",
    "inner_occurrences"."ancestors"
FROM
    namespaces,
    LATERAL ( SELECT DISTINCT ON (("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier')
,
            "sbom_occurrences"."component_id",
            "sbom_occurrences"."component_version_id") "sbom_occurrences"."id",
        "sbom_occurrences"."created_at",
        "sbom_occurrences"."updated_at",
        "sbom_occurrences"."component_version_id",
        "sbom_occurrences"."project_id",
        "sbom_occurrences"."pipeline_id",
        "sbom_occurrences"."source_id",
        "sbom_occurrences"."commit_sha",
        "sbom_occurrences"."component_id",
        "sbom_occurrences"."uuid",
        "sbom_occurrences"."package_manager",
        "sbom_occurrences"."component_name",
        "sbom_occurrences"."input_file_path",
        "sbom_occurrences"."licenses",
        "sbom_occurrences"."highest_severity",
        "sbom_occurrences"."vulnerability_count",
        "sbom_occurrences"."source_package_id",
        "sbom_occurrences"."archived",
        "sbom_occurrences"."traversal_ids",
        "sbom_occurrences"."ancestors"
    FROM
        "sbom_occurrences"
    WHERE (sbom_occurrences.traversal_ids = namespaces.traversal_ids::bigint[])
    AND "sbom_occurrences"."archived" = FALSE
ORDER BY
    ("sbom_occurrences"."licenses" -> 0 ->> 'spdx_identifier') DESC,
    "sbom_occurrences"."component_id" DESC,
    "sbom_occurrences"."component_version_id" DESC
LIMIT 21) inner_occurrences
ORDER BY
    (inner_occurrences."licenses" -> 0 ->> 'spdx_identifier') DESC,
    inner_occurrences.component_id DESC,
    inner_occurrences.component_version_id DESC
LIMIT 21) outer_occurrences,
    LATERAL (
        SELECT
            COUNT(project_id) AS occurrence_count,
            COUNT(DISTINCT project_id) project_count,
            SUM(vulnerability_count) vulnerability_count
        FROM
            "sbom_occurrences"
        WHERE (traversal_ids >= ('{9970}'))
        AND (traversal_ids < ('{9971}'))
        AND "sbom_occurrences"."archived" = FALSE
        AND (sbom_occurrences.component_version_id = outer_occurrences.component_version_id)) counts
GROUP BY
    ("outer_occurrences"."licenses" -> 0 ->> 'spdx_identifier'),
"outer_occurrences".component_id,
"outer_occurrences".component_version_id
ORDER BY
    MIN((outer_occurrences.licenses -> 0 ->> 'spdx_identifier')::text) DESC,
    MIN(outer_occurrences.component_id) DESC,
    MIN(outer_occurrences.component_version_id) DESC
LIMIT 21

MR acceptance checklist

Please evaluate this MR against the MR acceptance checklist. It helps you analyze changes to reduce risks in quality, performance, reliability, security, and maintainability.


Changelog: added
related to: #450922

Edited by Michael Becker

Merge request reports