Skip to content

Vulnerability Namespace Statistics Update Service

What does this MR do and why?

This MR implements a requirement to have a recursive count of all vulnerabilities and their severities recursively.
For example:

.
└── group1/
    ├── project1/
    │   ├── 1 critical vuln
    │   └── 2 high vuln
    └── group2/
        ├── project2/
        │   ├── 1 critical vuln
        │   └── 1 low vuln
        └── project3/
            └── 1 medium vuln

we would like to have these counts in vulnerbaility_namespace_statistics:

namespace_id critical high medium low
group1 2 2 1 1
group2 1 0 1 1

This MR introduces Vulnerabilities::NamespaceStatistics::UpdateService which is a new service that updates the vulnerabilities_namespace_statistics with the recursive counts. This service is executed every time a row is added/updated in the vulnerability_statistics table. The service is diff-based to improve performance.

More information in this issue:
#523419 (closed)

Changelog: added

Database Changes

ee/app/services/vulnerabilities/statistics/adjustment_service.rb - update query

This MR changes the query in this file.

Original Query
INSERT INTO vulnerability_statistics
  (project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at)
  (SELECT
  project_ids.project_id AS project_id,
  project_attributes.archived AS archived,
  project_attributes.traversal_ids AS traversal_ids,
  COALESCE(severity_counts.total, 0) AS total,
  COALESCE(severity_counts.info, 0) AS info,
  COALESCE(severity_counts.unknown, 0) AS unknown,
  COALESCE(severity_counts.low, 0) AS low,
  COALESCE(severity_counts.medium, 0) AS medium,
  COALESCE(severity_counts.high, 0) AS high,
  COALESCE(severity_counts.critical, 0) AS critical,
  (
    CASE
    WHEN severity_counts.critical > 0 THEN
      4
    WHEN severity_counts.high > 0 OR severity_counts.unknown > 0 THEN
      3
    WHEN severity_counts.medium > 0 THEN
      2
    WHEN severity_counts.low > 0 THEN
      1
    ELSE
      0
    END
  ) AS letter_grade,
  now() AS created_at,
  now() AS updated_at
FROM unnest(ARRAY[278964]) project_ids(project_id)
JOIN (VALUES (278964, FALSE, ARRAY[9970]::bigint[])) project_attributes(project_id, archived, traversal_ids)
  ON project_attributes.project_id = project_ids.project_id
LEFT OUTER JOIN(
  SELECT
    vulnerability_reads.project_id AS project_id,
    COUNT(*) AS total,
    COUNT(*) FILTER (WHERE severity = 1) as info,
    COUNT(*) FILTER (WHERE severity = 2) as unknown,
    COUNT(*) FILTER (WHERE severity = 4) as low,
    COUNT(*) FILTER (WHERE severity = 5) as medium,
    COUNT(*) FILTER (WHERE severity = 6) as high,
    COUNT(*) FILTER (WHERE severity = 7) as critical
  FROM vulnerability_reads
  WHERE
    vulnerability_reads.project_id IN (278964) AND
    vulnerability_reads.state IN (1, 4)
  GROUP BY vulnerability_reads.project_id
) AS severity_counts ON severity_counts.project_id = project_ids.project_id
)
ON CONFLICT (project_id)
DO UPDATE SET
  total = EXCLUDED.total,
  info = EXCLUDED.info,
  unknown = EXCLUDED.unknown,
  low = EXCLUDED.low,
  medium = EXCLUDED.medium,
  high = EXCLUDED.high,
  critical = EXCLUDED.critical,
  letter_grade = EXCLUDED.letter_grade,
  updated_at = EXCLUDED.updated_at
New Query
WITH new_values AS (
  SELECT
    project_ids.project_id AS project_id,
    project_attributes.archived AS archived,
    project_attributes.traversal_ids AS traversal_ids,
    COALESCE(severity_counts.total, 0) AS total,
    COALESCE(severity_counts.info, 0) AS info,
    COALESCE(severity_counts.unknown, 0) AS unknown,
    COALESCE(severity_counts.low, 0) AS low,
    COALESCE(severity_counts.medium, 0) AS medium,
    COALESCE(severity_counts.high, 0) AS high,
    COALESCE(severity_counts.critical, 0) AS critical,
    (
      CASE
      WHEN severity_counts.critical > 0 THEN
        4
      WHEN severity_counts.high > 0 OR severity_counts.unknown > 0 THEN
        3
      WHEN severity_counts.medium > 0 THEN
        2
      WHEN severity_counts.low > 0 THEN
        1
      ELSE
        0
      END
    ) AS letter_grade,
    now() AS created_at,
    now() AS updated_at
  FROM unnest(ARRAY[278964]) project_ids(project_id)
  JOIN (VALUES (278964, FALSE, ARRAY[9970]::bigint[])) project_attributes(project_id, archived, traversal_ids)
    ON project_attributes.project_id = project_ids.project_id
  LEFT OUTER JOIN(
    SELECT
      vulnerability_reads.project_id AS project_id,
      COUNT(*) AS total,
      COUNT(*) FILTER (WHERE severity = 1) as info,
      COUNT(*) FILTER (WHERE severity = 2) as unknown,
      COUNT(*) FILTER (WHERE severity = 4) as low,
      COUNT(*) FILTER (WHERE severity = 5) as medium,
      COUNT(*) FILTER (WHERE severity = 6) as high,
      COUNT(*) FILTER (WHERE severity = 7) as critical
    FROM vulnerability_reads
    WHERE
      vulnerability_reads.project_id IN (9970) AND
      vulnerability_reads.state IN (1, 4)
    GROUP BY vulnerability_reads.project_id
  ) AS severity_counts ON severity_counts.project_id = project_ids.project_id
), old_values AS (
  SELECT
  project_id,
  traversal_ids,
  total,
  critical,
  high,
  medium,
  low,
  unknown,
  info
FROM vulnerability_statistics
WHERE project_id IN (SELECT project_id FROM new_values)
), upserted AS (
   INSERT INTO vulnerability_statistics
   (project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at)
   (SELECT project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at
FROM new_values
)
 ON CONFLICT (project_id)
 DO UPDATE SET
   total = EXCLUDED.total,
   info = EXCLUDED.info,
   unknown = EXCLUDED.unknown,
   low = EXCLUDED.low,
   medium = EXCLUDED.medium,
   high = EXCLUDED.high,
   critical = EXCLUDED.critical,
   letter_grade = EXCLUDED.letter_grade,
   updated_at = EXCLUDED.updated_at
RETURNING project_id

), diff_values AS (
  SELECT
    new_values.traversal_ids[array_length(new_values.traversal_ids, 1)] AS namespace_id,
    new_values.traversal_ids,
    SUM(new_values.total - COALESCE(old_values.total, 0)) AS total,
    SUM(new_values.info - COALESCE(old_values.info, 0)) AS info,
    SUM(new_values.unknown - COALESCE(old_values.unknown, 0)) AS unknown,
    SUM(new_values.low - COALESCE(old_values.low, 0)) AS low,
    SUM(new_values.medium - COALESCE(old_values.medium, 0)) AS medium,
    SUM(new_values.high - COALESCE(old_values.high, 0)) AS high,
    SUM(new_values.critical - COALESCE(old_values.critical, 0)) AS critical
  FROM new_values
  LEFT JOIN old_values
      ON new_values.project_id = old_values.project_id
  WHERE EXISTS (
      SELECT 1
      FROM upserted
      WHERE upserted.project_id = new_values.project_id
  )
  GROUP BY namespace_id, new_values.traversal_ids
)
SELECT *
FROM diff_values
WHERE
  total != 0 OR
  info != 0 OR
  unknown != 0 OR
  low != 0 OR
  medium != 0 OR
  high != 0 OR
  critical != 0
Old Query Plan

Link to postgres.ai

 ModifyTable on public.vulnerability_statistics  (cost=0.57..6277.67 rows=0 width=0) (actual time=4.532..4.534 rows=0 loops=1)
   Buffers: shared hit=413 read=4 dirtied=11
   WAL: records=15 fpi=9 bytes=50749
   I/O Timings: read=3.611 write=0.000
   ->  Nested Loop Left Join  (cost=0.57..6277.67 rows=1 width=103) (actual time=0.629..0.632 rows=1 loops=1)
         Buffers: shared hit=370 dirtied=1
         WAL: records=1 fpi=0 bytes=99
         I/O Timings: read=0.000 write=0.000
         ->  Function Scan on unnest project_ids  (cost=0.00..0.02 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)
               Filter: (278964 = project_ids.project_id)
               Rows Removed by Filter: 0
               I/O Timings: read=0.000 write=0.000
         ->  Aggregate  (cost=0.57..6277.59 rows=1 width=64) (actual time=0.528..0.529 rows=1 loops=1)
               Buffers: shared hit=356
               I/O Timings: read=0.000 write=0.000
               ->  Index Only Scan using index_vuln_reads_on_project_id_state_severity_and_vuln_id on public.vulnerability_reads  (cost=0.57..3746.38 rows=77883 width=10) (actual time=0.047..0.509 rows=148 loops=1)
                     Index Cond: ((vulnerability_reads.project_id = 278964) AND (vulnerability_reads.state = ANY ('{1,4}'::integer[])))
                     Heap Fetches: 960
                     Buffers: shared hit=356
                     I/O Timings: read=0.000 write=0.000
Settings: work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off', random_page_cost = '1.5', seq_page_cost = '4'
New Query Plan

Link to postgres.ai

 Aggregate  (cost=3.55..3.64 rows=1 width=264) (actual time=433.596..433.604 rows=0 loops=1)
   Group Key: (new_values.traversal_ids[array_length(new_values.traversal_ids, 1)]), new_values.traversal_ids
   Filter: ((sum((new_values.total - COALESCE(vulnerability_statistics.total, 0))) <> '0'::numeric) OR (sum((new_values.info - COALESCE(vulnerability_statistics.info, 0))) <> '0'::numeric) OR (sum((new_values.unknown - COALESCE(vulnerability_statistics.unknown, 0))) <> '0'::numeric) OR (sum((new_values.low - COALESCE(vulnerability_statistics.low, 0))) <> '0'::numeric) OR (sum((new_values.medium - COALESCE(vulnerability_statistics.medium, 0))) <> '0'::numeric) OR (sum((new_values.high - COALESCE(vulnerability_statistics.high, 0))) <> '0'::numeric) OR (sum((new_values.critical - COALESCE(vulnerability_statistics.critical, 0))) <> '0'::numeric))
   Rows Removed by Filter: 1
   Buffers: shared hit=342 read=347 dirtied=182
   WAL: records=226 fpi=180 bytes=1352058
   I/O Timings: read=421.532 write=0.000
   CTE new_values
     ->  Nested Loop Left Join  (cost=0.57..6277.64 rows=1 width=113) (actual time=405.369..405.374 rows=1 loops=1)
           Buffers: shared hit=304 read=317 dirtied=172
           WAL: records=214 fpi=172 bytes=1309640
           I/O Timings: read=394.172 write=0.000
           ->  Function Scan on unnest project_ids  (cost=0.00..0.02 rows=1 width=4) (actual time=0.009..0.011 rows=1 loops=1)
                 Filter: (278964 = project_ids.project_id)
                 Rows Removed by Filter: 0
                 I/O Timings: read=0.000 write=0.000
           ->  Aggregate  (cost=0.57..6277.59 rows=1 width=64) (actual time=405.353..405.355 rows=1 loops=1)
                 Buffers: shared hit=304 read=317 dirtied=172
                 WAL: records=214 fpi=172 bytes=1309640
                 I/O Timings: read=394.172 write=0.000
                 ->  Index Only Scan using index_vuln_reads_on_project_id_state_severity_and_vuln_id on public.vulnerability_reads  (cost=0.57..3746.38 rows=77883 width=10) (actual time=59.120..405.067 rows=148 loops=1)
                       Index Cond: ((vulnerability_reads.project_id = 278964) AND (vulnerability_reads.state = ANY ('{1,4}'::integer[])))
                       Heap Fetches: 2400
                       Buffers: shared hit=304 read=317 dirtied=172
                       WAL: records=214 fpi=172 bytes=1309640
                       I/O Timings: read=394.172 write=0.000
   CTE upserted
     ->  ModifyTable on public.vulnerability_statistics vulnerability_statistics_1  (cost=0.00..0.04 rows=1 width=103) (actual time=23.927..23.932 rows=1 loops=1)
           Buffers: shared hit=30 read=26 dirtied=9
           WAL: records=11 fpi=7 bytes=34193
           I/O Timings: read=23.232 write=0.000
           ->  CTE Scan on new_values new_values_2  (cost=0.00..0.04 rows=1 width=103) (actual time=2.766..2.768 rows=1 loops=1)
                 Buffers: shared hit=13 read=5 dirtied=1
                 WAL: records=2 fpi=0 bytes=254
                 I/O Timings: read=2.496 write=0.000
   ->  Sort  (cost=3.55..3.55 rows=1 width=124) (actual time=433.576..433.580 rows=1 loops=1)
         Sort Key: (new_values.traversal_ids[array_length(new_values.traversal_ids, 1)]), new_values.traversal_ids
         Sort Method: quicksort  Memory: 25kB
         Buffers: shared hit=342 read=347 dirtied=182
         WAL: records=226 fpi=180 bytes=1352058
         I/O Timings: read=421.532 write=0.000
         ->  Nested Loop Semi Join  (cost=0.42..3.54 rows=1 width=124) (actual time=433.530..433.544 rows=1 loops=1)
               Buffers: shared hit=334 read=347 dirtied=182
               WAL: records=226 fpi=180 bytes=1352058
               I/O Timings: read=421.532 write=0.000
               ->  Nested Loop Left Join  (cost=0.42..3.50 rows=1 width=120) (actual time=409.587..409.600 rows=1 loops=1)
                     Buffers: shared hit=304 read=321 dirtied=173
                     WAL: records=215 fpi=173 bytes=1317865
                     I/O Timings: read=398.301 write=0.000
                     ->  CTE Scan on new_values  (cost=0.00..0.02 rows=1 width=92) (actual time=405.372..405.377 rows=1 loops=1)
                           Buffers: shared hit=304 read=317 dirtied=172
                           WAL: records=214 fpi=172 bytes=1309640
                           I/O Timings: read=394.172 write=0.000
                     ->  Nested Loop Semi Join  (cost=0.42..3.47 rows=1 width=36) (actual time=4.208..4.214 rows=1 loops=1)
                           Buffers: shared read=4 dirtied=1
                           WAL: records=1 fpi=1 bytes=8225
                           I/O Timings: read=4.129 write=0.000
                           ->  Index Scan using index_vulnerability_statistics_on_unique_project_id on public.vulnerability_statistics  (cost=0.42..3.44 rows=1 width=36) (actual time=4.190..4.195 rows=1 loops=1)
                                 Index Cond: (vulnerability_statistics.project_id = new_values.project_id)
                                 Buffers: shared read=4 dirtied=1
                                 WAL: records=1 fpi=1 bytes=8225
                                 I/O Timings: read=4.129 write=0.000
                           ->  CTE Scan on new_values new_values_1  (cost=0.00..0.02 rows=1 width=4) (actual time=0.007..0.007 rows=1 loops=1)
                                 I/O Timings: read=0.000 write=0.000
               ->  CTE Scan on upserted  (cost=0.00..0.02 rows=1 width=8) (actual time=23.932..23.932 rows=1 loops=1)
                     Buffers: shared hit=30 read=26 dirtied=9
                     WAL: records=11 fpi=7 bytes=34193
                     I/O Timings: read=23.232 write=0.000
Settings: work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off', random_page_cost = '1.5', seq_page_cost = '4'

ee/app/services/vulnerabilities/namespace_statistics/update_service.rb - new query

This MR adds a new query in this file.

New Query
WITH diffs(namespace_id, traversal_ids, total, critical, high, medium, low, unknown, info) AS (
  VALUES (94, '{94}'::bigint[], 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0)
), expanded_paths AS (
  SELECT
  -- For each position in the array, select all elements up to that position
  d.traversal_ids[i] AS namespace_id,
  d.traversal_ids[1:i] AS traversal_ids,
  d.total,
  d.critical,
  d.high,
  d.medium,
  d.low,
  d.unknown,
  d.info
FROM
  diffs d,
  generate_series(1, array_length(d.traversal_ids, 1)) AS i

), aggregated_paths AS (
  SELECT
  namespace_id,
  traversal_ids,
  SUM(total) AS total,
  SUM(critical) AS critical,
  SUM(high) AS high,
  SUM(medium) AS medium,
  SUM(low) AS low,
  SUM(unknown) AS unknown,
  SUM(info) AS info
FROM expanded_paths
GROUP BY namespace_id, traversal_ids

)

INSERT INTO vulnerability_namespace_statistics AS vt (
  namespace_id, traversal_ids, total, critical, high, medium, low, unknown, info, created_at, updated_at
)
SELECT
  ap.namespace_id,
  ap.traversal_ids,
  ap.total,
  ap.critical,
  ap.high,
  ap.medium,
  ap.low,
  ap.unknown,
  ap.info,
  now(),
  now()
FROM aggregated_paths ap
ON CONFLICT (namespace_id)
DO UPDATE SET
  traversal_ids = excluded.traversal_ids,
  total = GREATEST(vt.total + excluded.total, 0),
  critical = GREATEST(vt.critical + excluded.critical, 0),
  high = GREATEST(vt.high + excluded.high, 0),
  medium = GREATEST(vt.medium + excluded.medium, 0),
  low = GREATEST(vt.low + excluded.low, 0),
  unknown = GREATEST(vt.unknown + excluded.unknown, 0),
  info = GREATEST(vt.info + excluded.info, 0),
  updated_at = excluded.updated_at
New Query Plan

link to postgres when this query inserted data
link to postgres when this query updated data \

link to postgres when this query inserted data - with more traversal ids
link to postgres when this query updated data - with more traversal ids \

 ModifyTable on public.vulnerability_namespace_statistics vt  (cost=0.02..0.07 rows=0 width=0) (actual time=0.246..0.247 rows=0 loops=1)
   Buffers: shared hit=23 dirtied=6
   WAL: records=6 fpi=4 bytes=1142
   I/O Timings: read=0.000 write=0.000
   ->  Subquery Scan on ap  (cost=0.02..0.07 rows=1 width=92) (actual time=0.111..0.113 rows=1 loops=1)
         Buffers: shared hit=14 dirtied=1
         WAL: records=1 fpi=0 bytes=99
         I/O Timings: read=0.000 write=0.000
         ->  HashAggregate  (cost=0.02..0.04 rows=1 width=264) (actual time=0.024..0.026 rows=1 loops=1)
               Group Key: ('{9970}'::bigint[])[i.i], ('{9970}'::bigint[])[1:i.i]
               I/O Timings: read=0.000 write=0.000
               ->  Function Scan on generate_series i  (cost=0.00..0.01 rows=1 width=40) (actual time=0.013..0.013 rows=1 loops=1)
                     I/O Timings: read=0.000 write=0.000
Settings: random_page_cost = '1.5', seq_page_cost = '4', work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off'
Edited by Yuval Siev

Merge request reports

Loading