Skip to content
Snippets Groups Projects

ClickHouse data collector

Merged charlie ablett requested to merge 414610-cablett-clickhouse-data-collector into master
All threads resolved!
Compare and Show latest version
8 files
+ 242
171
Compare changes
  • Side-by-side
  • Inline
Files
8
@@ -2,7 +2,10 @@
module Gitlab
module ContributionAnalytics
class ClickHouseDataCollector < PostgresqlDataCollector
class ClickHouseDataCollector
# Use AR module for preventing SQL injection
include ActiveRecord::ConnectionAdapters::Quoting
attr_reader :group, :from, :to
def initialize(group:, from:, to:)
@@ -11,28 +14,40 @@ def initialize(group:, from:, to:)
@to = to
end
def base_query
def totals_by_author_target_type_action
clickhouse_query = <<~CLICKHOUSE
SELECT count(*) as count_all,
"contribution_analytics_events"."author_id" AS events_author_id,
"contribution_analytics_events"."target_type" AS events_target_type,
"contribution_analytics_events"."action" AS events_action
SELECT count(*) AS count,
"contribution_analytics_events"."author_id" AS author_id,
"contribution_analytics_events"."target_type" AS target_type,
"contribution_analytics_events"."action" AS action
FROM (
SELECT
id,
argMax(author_id, contribution_analytics_events.updated_at) as author_id,
argMax(target_type, contribution_analytics_events.updated_at) as target_type,
argMax(action, contribution_analytics_events.updated_at) as action
argMax(author_id, contribution_analytics_events.updated_at) AS author_id,
argMax(target_type, contribution_analytics_events.updated_at) AS target_type,
argMax(action, contribution_analytics_events.updated_at) AS action
FROM contribution_analytics_events
WHERE startsWith(path, '24/')
AND "contribution_analytics_events"."created_at" >= '2022-05-01'
AND "contribution_analytics_events"."created_at" <= '2023-05-07'
WHERE startsWith(path, '#{group_path}/')
AND "contribution_analytics_events"."created_at" >= '#{format_date(from)}'
AND "contribution_analytics_events"."created_at" <= '#{format_date(to)}'
GROUP BY id
) contribution_analytics_events
GROUP BY "contribution_analytics_events"."action","contribution_analytics_events"."target_type","contribution_analytics_events"."author_id"
CLICKHOUSE
ClickHouse::Client.select(clickhouse_query, :main)
ClickHouse::Client.select(clickhouse_query, :main).each_with_object({}) do |row, hash|
hash[[row['author_id'], row['target_type'], row['action']]] = row['count']
end
end
private
def group_path
@group_path ||= quote(group.traversal_ids.join('/'))
end
def format_date(date)
quote(date.utc.strftime('%Y-%m-%d'))
end
end
end
Loading