Phase 1 - Store group/project traversal paths on CH and create new materialized views for events and contributions

  1. Create new materialized view to store traversal paths of groups
CREATE TABLE namespace_traversal_paths (
  id Int64 DEFAULT 0,
  traversal_path String DEFAULT '0/',
  version DateTime64(6, 'UTC') DEFAULT NOW(),
  deleted Boolean DEFAULT false
) 
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id
SETTINGS index_granularity = 512;


CREATE MATERIALIZED VIEW namespace_traversal_paths_mv
TO namespace_traversal_paths
AS
SELECT
    id,
    -- Convert organization_id to string and prepend it to traversal_ids
    concat(toString(organization_id), '/', 
           if(length(traversal_ids) = 0, '', arrayStringConcat(traversal_ids, '/')), 
           '/') AS path, as path,
    _siphon_replicated_at AS version,
    _siphon_deleted AS deleted
FROM siphon_namespaces;
  1. Create materialized view to store traversal paths of projects
CREATE TABLE project_namespace_traversal_paths (
  id Int64 DEFAULT 0,
  path String DEFAULT '0/',
  version DateTime64(6, 'UTC') DEFAULT NOW(),
  deleted Boolean DEFAULT false
) 
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id
SETTINGS index_granularity = 512;


CREATE MATERIALIZED VIEW project_namespace_traversal_paths_mv
TO project_namespace_traersal_paths
AS
WITH cte AS (
  SELECT id, project_namespace_id FROM siphon_projects
), namespaces_cte AS (
  SELECT traversal_path, id
  FROM group_namespace_paths
  WHERE id IN (SELECT project_namespace_id FROM cte)
)
SELECT cte.id, namespaces_cte.traversal_path
FROM cte
INNER JOIN namespaces_cte ON namespaces_cte.id = cte.project_namespace_id
  1. Create rebuilt_events table and materialized views, later we will use it in place of old events table.
CREATE TABLE rebuilt_events (
  id Int64 DEFAULT 0,
  path String DEFAULT '0/',
  action Int8 DEFAULT 0,
  target_type LowCardinality(String) DEFAULT '',
  target_id Int64 DEFAULT 0,
  version DateTime64(6, 'UTC') DEFAULT NOW(),
  deleted Boolean DEFAULT false
) 
ENGINE=ReplacingMergeTree(version, deleted)
PRIMARY KEY id


CREATE MATERIALIZED VIEW rebuilt_events_mv
TO rebuilt_events
AS
WITH cte AS (
SELECT * FROM siphon_events
), group_lookups AS (
  SELECT id, path
  FROM namespace_paths_mv
  WHERE id IN (SELECT DISTINCT group_id FROM cte)
), project_lookups AS (
  SELECT id, path 
  FROM project_namespace_paths_mv
  WHERE id IN (SELECT DISTINCT project_id FROM cte)
)
SELECT 
    cte.id AS id,
    CASE
        WHEN cte.project_id != 0 THEN project_lookups.path
        WHEN cte.group_id != 0 THEN group_lookups.path
        ELSE '0/'
    END AS path,
    cte.action AS action,
    cte.target_type AS target_type,
    cte.target_id AS target_id,
    cte._siphon_replicated_at AS version,
    cte._siphon_deleted AS deleted
FROM cte
LEFT JOIN group_lookups ON group_lookups.id=cte.group_id
LEFT JOIN project_lookups on project_lookups.id=cte.project_id
  1. Recreate the existing contributions table using the new rebuilt_events table.
    execute <<~SQL
      CREATE MATERIALIZED VIEW IF NOT EXISTS rebuilt_contributions_mv
      TO rebuilt_contributions
      AS
      SELECT
        id,
        argMax(path, events.updated_at) as path,
        argMax(author_id, events.updated_at) as author_id,
        argMax(target_type, events.updated_at) as target_type,
        argMax(action, events.updated_at) as action,
        argMax(date(created_at), events.updated_at) as created_at,
        max(events.updated_at) as updated_at
      FROM rebuilt_events
      WHERE (("rebuilt_events"."action" IN (5, 6) AND "rebuilt_events"."target_type" = '')
        OR ("rebuilt_events"."action" IN (1, 3, 7, 12)
          AND "rebuilt_events"."target_type" IN ('MergeRequest', 'Issue', 'WorkItem')))
      GROUP BY id
    SQL
Edited by Felipe Cardozo