Vulnerability Namespace Statistics Update Service
What does this MR do and why?
This MR implements a requirement to have a recursive count of all vulnerabilities and their severities recursively.
For example:
.
└── group1/
├── project1/
│ ├── 1 critical vuln
│ └── 2 high vuln
└── group2/
├── project2/
│ ├── 1 critical vuln
│ └── 1 low vuln
└── project3/
└── 1 medium vuln
we would like to have these counts in vulnerbaility_namespace_statistics
:
namespace_id | critical | high | medium | low |
---|---|---|---|---|
group1 | 2 | 2 | 1 | 1 |
group2 | 1 | 0 | 1 | 1 |
This MR introduces Vulnerabilities::NamespaceStatistics::UpdateService
which is a new service that updates the vulnerabilities_namespace_statistics
with the recursive counts. This service is executed every time a row is added/updated in the vulnerability_statistics
table.
The service is diff-based to improve performance.
More information in this issue:
#523419 (closed)
Changelog: added
Database Changes
ee/app/services/vulnerabilities/statistics/adjustment_service.rb - update query
This MR changes the query in this file.
Original Query
INSERT INTO vulnerability_statistics
(project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at)
(SELECT
project_ids.project_id AS project_id,
project_attributes.archived AS archived,
project_attributes.traversal_ids AS traversal_ids,
COALESCE(severity_counts.total, 0) AS total,
COALESCE(severity_counts.info, 0) AS info,
COALESCE(severity_counts.unknown, 0) AS unknown,
COALESCE(severity_counts.low, 0) AS low,
COALESCE(severity_counts.medium, 0) AS medium,
COALESCE(severity_counts.high, 0) AS high,
COALESCE(severity_counts.critical, 0) AS critical,
(
CASE
WHEN severity_counts.critical > 0 THEN
4
WHEN severity_counts.high > 0 OR severity_counts.unknown > 0 THEN
3
WHEN severity_counts.medium > 0 THEN
2
WHEN severity_counts.low > 0 THEN
1
ELSE
0
END
) AS letter_grade,
now() AS created_at,
now() AS updated_at
FROM unnest(ARRAY[278964]) project_ids(project_id)
JOIN (VALUES (278964, FALSE, ARRAY[9970]::bigint[])) project_attributes(project_id, archived, traversal_ids)
ON project_attributes.project_id = project_ids.project_id
LEFT OUTER JOIN(
SELECT
vulnerability_reads.project_id AS project_id,
COUNT(*) AS total,
COUNT(*) FILTER (WHERE severity = 1) as info,
COUNT(*) FILTER (WHERE severity = 2) as unknown,
COUNT(*) FILTER (WHERE severity = 4) as low,
COUNT(*) FILTER (WHERE severity = 5) as medium,
COUNT(*) FILTER (WHERE severity = 6) as high,
COUNT(*) FILTER (WHERE severity = 7) as critical
FROM vulnerability_reads
WHERE
vulnerability_reads.project_id IN (278964) AND
vulnerability_reads.state IN (1, 4)
GROUP BY vulnerability_reads.project_id
) AS severity_counts ON severity_counts.project_id = project_ids.project_id
)
ON CONFLICT (project_id)
DO UPDATE SET
total = EXCLUDED.total,
info = EXCLUDED.info,
unknown = EXCLUDED.unknown,
low = EXCLUDED.low,
medium = EXCLUDED.medium,
high = EXCLUDED.high,
critical = EXCLUDED.critical,
letter_grade = EXCLUDED.letter_grade,
updated_at = EXCLUDED.updated_at
New Query
WITH new_values AS (
SELECT
project_ids.project_id AS project_id,
project_attributes.archived AS archived,
project_attributes.traversal_ids AS traversal_ids,
COALESCE(severity_counts.total, 0) AS total,
COALESCE(severity_counts.info, 0) AS info,
COALESCE(severity_counts.unknown, 0) AS unknown,
COALESCE(severity_counts.low, 0) AS low,
COALESCE(severity_counts.medium, 0) AS medium,
COALESCE(severity_counts.high, 0) AS high,
COALESCE(severity_counts.critical, 0) AS critical,
(
CASE
WHEN severity_counts.critical > 0 THEN
4
WHEN severity_counts.high > 0 OR severity_counts.unknown > 0 THEN
3
WHEN severity_counts.medium > 0 THEN
2
WHEN severity_counts.low > 0 THEN
1
ELSE
0
END
) AS letter_grade,
now() AS created_at,
now() AS updated_at
FROM unnest(ARRAY[278964]) project_ids(project_id)
JOIN (VALUES (278964, FALSE, ARRAY[9970]::bigint[])) project_attributes(project_id, archived, traversal_ids)
ON project_attributes.project_id = project_ids.project_id
LEFT OUTER JOIN(
SELECT
vulnerability_reads.project_id AS project_id,
COUNT(*) AS total,
COUNT(*) FILTER (WHERE severity = 1) as info,
COUNT(*) FILTER (WHERE severity = 2) as unknown,
COUNT(*) FILTER (WHERE severity = 4) as low,
COUNT(*) FILTER (WHERE severity = 5) as medium,
COUNT(*) FILTER (WHERE severity = 6) as high,
COUNT(*) FILTER (WHERE severity = 7) as critical
FROM vulnerability_reads
WHERE
vulnerability_reads.project_id IN (9970) AND
vulnerability_reads.state IN (1, 4)
GROUP BY vulnerability_reads.project_id
) AS severity_counts ON severity_counts.project_id = project_ids.project_id
), old_values AS (
SELECT
project_id,
traversal_ids,
total,
critical,
high,
medium,
low,
unknown,
info
FROM vulnerability_statistics
WHERE project_id IN (SELECT project_id FROM new_values)
), upserted AS (
INSERT INTO vulnerability_statistics
(project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at)
(SELECT project_id, archived, traversal_ids, total, info, unknown, low, medium, high, critical, letter_grade, created_at, updated_at
FROM new_values
)
ON CONFLICT (project_id)
DO UPDATE SET
total = EXCLUDED.total,
info = EXCLUDED.info,
unknown = EXCLUDED.unknown,
low = EXCLUDED.low,
medium = EXCLUDED.medium,
high = EXCLUDED.high,
critical = EXCLUDED.critical,
letter_grade = EXCLUDED.letter_grade,
updated_at = EXCLUDED.updated_at
RETURNING project_id
), diff_values AS (
SELECT
new_values.traversal_ids[array_length(new_values.traversal_ids, 1)] AS namespace_id,
new_values.traversal_ids,
SUM(new_values.total - COALESCE(old_values.total, 0)) AS total,
SUM(new_values.info - COALESCE(old_values.info, 0)) AS info,
SUM(new_values.unknown - COALESCE(old_values.unknown, 0)) AS unknown,
SUM(new_values.low - COALESCE(old_values.low, 0)) AS low,
SUM(new_values.medium - COALESCE(old_values.medium, 0)) AS medium,
SUM(new_values.high - COALESCE(old_values.high, 0)) AS high,
SUM(new_values.critical - COALESCE(old_values.critical, 0)) AS critical
FROM new_values
LEFT JOIN old_values
ON new_values.project_id = old_values.project_id
WHERE EXISTS (
SELECT 1
FROM upserted
WHERE upserted.project_id = new_values.project_id
)
GROUP BY namespace_id, new_values.traversal_ids
)
SELECT *
FROM diff_values
WHERE
total != 0 OR
info != 0 OR
unknown != 0 OR
low != 0 OR
medium != 0 OR
high != 0 OR
critical != 0
Old Query Plan
ModifyTable on public.vulnerability_statistics (cost=0.57..6277.67 rows=0 width=0) (actual time=4.532..4.534 rows=0 loops=1)
Buffers: shared hit=413 read=4 dirtied=11
WAL: records=15 fpi=9 bytes=50749
I/O Timings: read=3.611 write=0.000
-> Nested Loop Left Join (cost=0.57..6277.67 rows=1 width=103) (actual time=0.629..0.632 rows=1 loops=1)
Buffers: shared hit=370 dirtied=1
WAL: records=1 fpi=0 bytes=99
I/O Timings: read=0.000 write=0.000
-> Function Scan on unnest project_ids (cost=0.00..0.02 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)
Filter: (278964 = project_ids.project_id)
Rows Removed by Filter: 0
I/O Timings: read=0.000 write=0.000
-> Aggregate (cost=0.57..6277.59 rows=1 width=64) (actual time=0.528..0.529 rows=1 loops=1)
Buffers: shared hit=356
I/O Timings: read=0.000 write=0.000
-> Index Only Scan using index_vuln_reads_on_project_id_state_severity_and_vuln_id on public.vulnerability_reads (cost=0.57..3746.38 rows=77883 width=10) (actual time=0.047..0.509 rows=148 loops=1)
Index Cond: ((vulnerability_reads.project_id = 278964) AND (vulnerability_reads.state = ANY ('{1,4}'::integer[])))
Heap Fetches: 960
Buffers: shared hit=356
I/O Timings: read=0.000 write=0.000
Settings: work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off', random_page_cost = '1.5', seq_page_cost = '4'
New Query Plan
Aggregate (cost=3.55..3.64 rows=1 width=264) (actual time=433.596..433.604 rows=0 loops=1)
Group Key: (new_values.traversal_ids[array_length(new_values.traversal_ids, 1)]), new_values.traversal_ids
Filter: ((sum((new_values.total - COALESCE(vulnerability_statistics.total, 0))) <> '0'::numeric) OR (sum((new_values.info - COALESCE(vulnerability_statistics.info, 0))) <> '0'::numeric) OR (sum((new_values.unknown - COALESCE(vulnerability_statistics.unknown, 0))) <> '0'::numeric) OR (sum((new_values.low - COALESCE(vulnerability_statistics.low, 0))) <> '0'::numeric) OR (sum((new_values.medium - COALESCE(vulnerability_statistics.medium, 0))) <> '0'::numeric) OR (sum((new_values.high - COALESCE(vulnerability_statistics.high, 0))) <> '0'::numeric) OR (sum((new_values.critical - COALESCE(vulnerability_statistics.critical, 0))) <> '0'::numeric))
Rows Removed by Filter: 1
Buffers: shared hit=342 read=347 dirtied=182
WAL: records=226 fpi=180 bytes=1352058
I/O Timings: read=421.532 write=0.000
CTE new_values
-> Nested Loop Left Join (cost=0.57..6277.64 rows=1 width=113) (actual time=405.369..405.374 rows=1 loops=1)
Buffers: shared hit=304 read=317 dirtied=172
WAL: records=214 fpi=172 bytes=1309640
I/O Timings: read=394.172 write=0.000
-> Function Scan on unnest project_ids (cost=0.00..0.02 rows=1 width=4) (actual time=0.009..0.011 rows=1 loops=1)
Filter: (278964 = project_ids.project_id)
Rows Removed by Filter: 0
I/O Timings: read=0.000 write=0.000
-> Aggregate (cost=0.57..6277.59 rows=1 width=64) (actual time=405.353..405.355 rows=1 loops=1)
Buffers: shared hit=304 read=317 dirtied=172
WAL: records=214 fpi=172 bytes=1309640
I/O Timings: read=394.172 write=0.000
-> Index Only Scan using index_vuln_reads_on_project_id_state_severity_and_vuln_id on public.vulnerability_reads (cost=0.57..3746.38 rows=77883 width=10) (actual time=59.120..405.067 rows=148 loops=1)
Index Cond: ((vulnerability_reads.project_id = 278964) AND (vulnerability_reads.state = ANY ('{1,4}'::integer[])))
Heap Fetches: 2400
Buffers: shared hit=304 read=317 dirtied=172
WAL: records=214 fpi=172 bytes=1309640
I/O Timings: read=394.172 write=0.000
CTE upserted
-> ModifyTable on public.vulnerability_statistics vulnerability_statistics_1 (cost=0.00..0.04 rows=1 width=103) (actual time=23.927..23.932 rows=1 loops=1)
Buffers: shared hit=30 read=26 dirtied=9
WAL: records=11 fpi=7 bytes=34193
I/O Timings: read=23.232 write=0.000
-> CTE Scan on new_values new_values_2 (cost=0.00..0.04 rows=1 width=103) (actual time=2.766..2.768 rows=1 loops=1)
Buffers: shared hit=13 read=5 dirtied=1
WAL: records=2 fpi=0 bytes=254
I/O Timings: read=2.496 write=0.000
-> Sort (cost=3.55..3.55 rows=1 width=124) (actual time=433.576..433.580 rows=1 loops=1)
Sort Key: (new_values.traversal_ids[array_length(new_values.traversal_ids, 1)]), new_values.traversal_ids
Sort Method: quicksort Memory: 25kB
Buffers: shared hit=342 read=347 dirtied=182
WAL: records=226 fpi=180 bytes=1352058
I/O Timings: read=421.532 write=0.000
-> Nested Loop Semi Join (cost=0.42..3.54 rows=1 width=124) (actual time=433.530..433.544 rows=1 loops=1)
Buffers: shared hit=334 read=347 dirtied=182
WAL: records=226 fpi=180 bytes=1352058
I/O Timings: read=421.532 write=0.000
-> Nested Loop Left Join (cost=0.42..3.50 rows=1 width=120) (actual time=409.587..409.600 rows=1 loops=1)
Buffers: shared hit=304 read=321 dirtied=173
WAL: records=215 fpi=173 bytes=1317865
I/O Timings: read=398.301 write=0.000
-> CTE Scan on new_values (cost=0.00..0.02 rows=1 width=92) (actual time=405.372..405.377 rows=1 loops=1)
Buffers: shared hit=304 read=317 dirtied=172
WAL: records=214 fpi=172 bytes=1309640
I/O Timings: read=394.172 write=0.000
-> Nested Loop Semi Join (cost=0.42..3.47 rows=1 width=36) (actual time=4.208..4.214 rows=1 loops=1)
Buffers: shared read=4 dirtied=1
WAL: records=1 fpi=1 bytes=8225
I/O Timings: read=4.129 write=0.000
-> Index Scan using index_vulnerability_statistics_on_unique_project_id on public.vulnerability_statistics (cost=0.42..3.44 rows=1 width=36) (actual time=4.190..4.195 rows=1 loops=1)
Index Cond: (vulnerability_statistics.project_id = new_values.project_id)
Buffers: shared read=4 dirtied=1
WAL: records=1 fpi=1 bytes=8225
I/O Timings: read=4.129 write=0.000
-> CTE Scan on new_values new_values_1 (cost=0.00..0.02 rows=1 width=4) (actual time=0.007..0.007 rows=1 loops=1)
I/O Timings: read=0.000 write=0.000
-> CTE Scan on upserted (cost=0.00..0.02 rows=1 width=8) (actual time=23.932..23.932 rows=1 loops=1)
Buffers: shared hit=30 read=26 dirtied=9
WAL: records=11 fpi=7 bytes=34193
I/O Timings: read=23.232 write=0.000
Settings: work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off', random_page_cost = '1.5', seq_page_cost = '4'
ee/app/services/vulnerabilities/namespace_statistics/update_service.rb - new query
This MR adds a new query in this file.
New Query
WITH diffs(namespace_id, traversal_ids, total, critical, high, medium, low, unknown, info) AS (
VALUES (94, '{94}'::bigint[], 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0)
), expanded_paths AS (
SELECT
-- For each position in the array, select all elements up to that position
d.traversal_ids[i] AS namespace_id,
d.traversal_ids[1:i] AS traversal_ids,
d.total,
d.critical,
d.high,
d.medium,
d.low,
d.unknown,
d.info
FROM
diffs d,
generate_series(1, array_length(d.traversal_ids, 1)) AS i
), aggregated_paths AS (
SELECT
namespace_id,
traversal_ids,
SUM(total) AS total,
SUM(critical) AS critical,
SUM(high) AS high,
SUM(medium) AS medium,
SUM(low) AS low,
SUM(unknown) AS unknown,
SUM(info) AS info
FROM expanded_paths
GROUP BY namespace_id, traversal_ids
)
INSERT INTO vulnerability_namespace_statistics AS vt (
namespace_id, traversal_ids, total, critical, high, medium, low, unknown, info, created_at, updated_at
)
SELECT
ap.namespace_id,
ap.traversal_ids,
ap.total,
ap.critical,
ap.high,
ap.medium,
ap.low,
ap.unknown,
ap.info,
now(),
now()
FROM aggregated_paths ap
ON CONFLICT (namespace_id)
DO UPDATE SET
traversal_ids = excluded.traversal_ids,
total = GREATEST(vt.total + excluded.total, 0),
critical = GREATEST(vt.critical + excluded.critical, 0),
high = GREATEST(vt.high + excluded.high, 0),
medium = GREATEST(vt.medium + excluded.medium, 0),
low = GREATEST(vt.low + excluded.low, 0),
unknown = GREATEST(vt.unknown + excluded.unknown, 0),
info = GREATEST(vt.info + excluded.info, 0),
updated_at = excluded.updated_at
New Query Plan
link to postgres when this query inserted data
link to postgres when this query updated data \
link to postgres when this query inserted data - with more traversal ids
link to postgres when this query updated data - with more traversal ids \
ModifyTable on public.vulnerability_namespace_statistics vt (cost=0.02..0.07 rows=0 width=0) (actual time=0.246..0.247 rows=0 loops=1)
Buffers: shared hit=23 dirtied=6
WAL: records=6 fpi=4 bytes=1142
I/O Timings: read=0.000 write=0.000
-> Subquery Scan on ap (cost=0.02..0.07 rows=1 width=92) (actual time=0.111..0.113 rows=1 loops=1)
Buffers: shared hit=14 dirtied=1
WAL: records=1 fpi=0 bytes=99
I/O Timings: read=0.000 write=0.000
-> HashAggregate (cost=0.02..0.04 rows=1 width=264) (actual time=0.024..0.026 rows=1 loops=1)
Group Key: ('{9970}'::bigint[])[i.i], ('{9970}'::bigint[])[1:i.i]
I/O Timings: read=0.000 write=0.000
-> Function Scan on generate_series i (cost=0.00..0.01 rows=1 width=40) (actual time=0.013..0.013 rows=1 loops=1)
I/O Timings: read=0.000 write=0.000
Settings: random_page_cost = '1.5', seq_page_cost = '4', work_mem = '100MB', effective_cache_size = '472585MB', jit = 'off'