Phase 1 - Store group/project traversal paths on CH and create new materialized views for events and contributions
- Create new materialized view to store traversal paths of groups
CREATE TABLE namespace_traversal_paths (
id Int64 DEFAULT 0,
traversal_path String DEFAULT '0/',
version DateTime64(6, 'UTC') DEFAULT NOW(),
deleted Boolean DEFAULT false
)
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id
SETTINGS index_granularity = 512;
CREATE MATERIALIZED VIEW namespace_traversal_paths_mv
TO namespace_traversal_paths
AS
SELECT
id,
-- Convert organization_id to string and prepend it to traversal_ids
concat(toString(organization_id), '/',
if(length(traversal_ids) = 0, '', arrayStringConcat(traversal_ids, '/')),
'/') AS path, as path,
_siphon_replicated_at AS version,
_siphon_deleted AS deleted
FROM siphon_namespaces;
- Create materialized view to store traversal paths of projects
CREATE TABLE project_namespace_traversal_paths (
id Int64 DEFAULT 0,
path String DEFAULT '0/',
version DateTime64(6, 'UTC') DEFAULT NOW(),
deleted Boolean DEFAULT false
)
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id
SETTINGS index_granularity = 512;
CREATE MATERIALIZED VIEW project_namespace_traversal_paths_mv
TO project_namespace_traersal_paths
AS
WITH cte AS (
SELECT id, project_namespace_id FROM siphon_projects
), namespaces_cte AS (
SELECT traversal_path, id
FROM group_namespace_paths
WHERE id IN (SELECT project_namespace_id FROM cte)
)
SELECT cte.id, namespaces_cte.traversal_path
FROM cte
INNER JOIN namespaces_cte ON namespaces_cte.id = cte.project_namespace_id
- Create
rebuilt_eventstable and materialized views, later we will use it in place of oldeventstable.
CREATE TABLE rebuilt_events (
id Int64 DEFAULT 0,
path String DEFAULT '0/',
action Int8 DEFAULT 0,
target_type LowCardinality(String) DEFAULT '',
target_id Int64 DEFAULT 0,
version DateTime64(6, 'UTC') DEFAULT NOW(),
deleted Boolean DEFAULT false
)
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id
CREATE MATERIALIZED VIEW rebuilt_events_mv
TO rebuilt_events
AS
WITH cte AS (
SELECT * FROM siphon_events
), group_lookups AS (
SELECT id, path
FROM namespace_paths_mv
WHERE id IN (SELECT DISTINCT group_id FROM cte)
), project_lookups AS (
SELECT id, path
FROM project_namespace_paths_mv
WHERE id IN (SELECT DISTINCT project_id FROM cte)
)
SELECT
cte.id AS id,
CASE
WHEN cte.project_id != 0 THEN project_lookups.path
WHEN cte.group_id != 0 THEN group_lookups.path
ELSE '0/'
END AS path,
cte.action AS action,
cte.target_type AS target_type,
cte.target_id AS target_id,
cte._siphon_replicated_at AS version,
cte._siphon_deleted AS deleted
FROM cte
LEFT JOIN group_lookups ON group_lookups.id=cte.group_id
LEFT JOIN project_lookups on project_lookups.id=cte.project_id
- Recreate the existing
contributionstable using the newrebuilt_eventstable.
execute <<~SQL
CREATE MATERIALIZED VIEW IF NOT EXISTS rebuilt_contributions_mv
TO rebuilt_contributions
AS
SELECT
id,
argMax(path, events.updated_at) as path,
argMax(author_id, events.updated_at) as author_id,
argMax(target_type, events.updated_at) as target_type,
argMax(action, events.updated_at) as action,
argMax(date(created_at), events.updated_at) as created_at,
max(events.updated_at) as updated_at
FROM rebuilt_events
WHERE (("rebuilt_events"."action" IN (5, 6) AND "rebuilt_events"."target_type" = '')
OR ("rebuilt_events"."action" IN (1, 3, 7, 12)
AND "rebuilt_events"."target_type" IN ('MergeRequest', 'Issue', 'WorkItem')))
GROUP BY id
SQL
Edited by Felipe Cardozo