Commit dcf3db5b authored by Kathleen Tam's avatar Kathleen Tam

Merge branch '2341-convert-manage-smau-dashboard-into-a-dbt-model-2' into 'master'

Resolve "Convert Manage SMAU Dashboard into a dbt Model"

Closes #2341

See merge request !1558
parents e8c397d1 fd7b4e44
......@@ -9,3 +9,16 @@ From gitlab_dotcom database, at the moment we track the following events:
* open a merge request
{% enddocs %}
{% docs manage_gitlab_dotcom_smau_events %}
This model provides a summary of relevant actions for the Manage Stage coming from gitlab_dotcom database.
We currently include track the following events:
* Project Creation
* User Creation
{% enddocs %}
{{ config({
"unique_key": "sk_id"
})
}}
{%- set event_ctes = ["project_created",
"user_created"
]
-%}
WITH project_created AS (
SELECT
creator_id AS user_id,
TO_DATE(project_created_at) AS event_date,
'project_created_at' AS event_type,
{{ dbt_utils.surrogate_key('event_date', 'event_type', 'project_id') }}
AS sk_id
FROM {{ref('gitlab_dotcom_projects_xf')}}
WHERE project_created_at >= '2015-01-01'
)
, user_created AS (
SELECT
user_id,
TO_DATE(user_created_at) AS event_date,
'user_created' AS event_type,
{{ dbt_utils.surrogate_key('event_date', 'event_type', 'user_id') }}
AS sk_id
FROM {{ref('gitlab_dotcom_users_xf')}}
WHERE user_created_at >= '2015-01-01'
)
, unioned AS (
{% for event_cte in event_ctes %}
(
SELECT
*
FROM {{ event_cte }}
)
{%- if not loop.last -%}
UNION
{%- endif %}
{% endfor -%}
)
SELECT *
FROM unioned
......@@ -14,4 +14,17 @@ models:
tests:
- not_null
- unique
- name: manage_gitlab_dotcom_smau_events
description: '{{ doc("manage_gitlab_dotcom_smau_events") }}'
columns:
- name: user_id
tests:
- not_null
- name: event_date
tests:
- not_null
- name: sk_id
tests:
- not_null
- unique
WITH manage_snowplow_smau_events AS (
SELECT
user_snowplow_domain_id,
user_custom_id::INTEGER AS gitlab_user_id,
event_date,
event_type,
{{ dbt_utils.surrogate_key('page_view_id', 'event_type') }}
AS sk_id,
'snowplow_frontend' AS source_type
FROM {{ ref('manage_snowplow_smau_events')}}
)
, manage_gitlab_dotcom_smau_events AS (
SELECT
NULL AS user_snowplow_domain_id,
user_id::INTEGER AS gitlab_user_id,
event_date,
event_type,
sk_id,
'gitlab_backend' AS source_type
FROM {{ ref('manage_gitlab_dotcom_smau_events')}}
)
, unioned AS (
(
SELECT *
FROM manage_snowplow_smau_events
)
UNION
(
SELECT *
FROM manage_gitlab_dotcom_smau_events
)
)
SELECT *
FROM unioned
......@@ -18,6 +18,23 @@ models:
- name: data_source
description: This is the source where the data has been collected
- name: manage_smau_events
description: '{{ doc("manage_smau_events") }}'
columns:
- name: user_snowplow_domain_id
description: UUID which is generated by the Javascript tracker and stored in a (first party) cookie
- name: gitlab_user_id
description: either null or the user_id from the gitlab_dotcom database
- name: event_type
tests:
- not_null
- name: sk_id
tests:
- not_null
- unique
- name: data_source
description: This is the source where the data has been collected
- name: plan_smau_events
description: '{{ doc("plan_smau_events") }}'
columns:
......
......@@ -7,6 +7,13 @@ For more documentation on which event is tracked by each data source for this st
{% enddocs %}
{% docs manage_smau_events %}
This model encapsulates all activation events for the Manage Stage as defined in this GitLab [issue](https://gitlab.com/gitlab-org/telemetry/issues/47).
{% enddocs %}
{% docs plan_smau_events %}
This model encapsulates all activation events for stage create as defined in this gitlab [issue](https://gitlab.com/gitlab-org/telemetry/issues/48).
......
......@@ -43,7 +43,7 @@ WITH snowplow_page_views AS (
FROM snowplow_page_views
WHERE page_url_path RLIKE '(\/([a-zA-Z-])*){2}\/merge_requests/[0-9]*'
WHERE page_url_path RLIKE '(\/([0-9A-Za-z_.-])*){2}\/merge_requests/[0-9]*'
AND page_url_path NOT REGEXP '/-/ide/(.)*'
)
......@@ -75,10 +75,10 @@ WITH snowplow_page_views AS (
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/tree\/(.)*'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/tree\/(.)*'
AND page_url_path NOT REGEXP '/-/ide/(.)*'
AND page_url_path NOT REGEXP '(\/([a-zA-Z-])*){2,}\/wiki\/tree\/(.)*'
AND page_url_path NOT REGEXP '((\/([a-zA-Z-])*){2,})?\/snippets/[0-9]{1,}'
AND page_url_path NOT REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/wiki\/tree\/(.)*'
AND page_url_path NOT REGEXP '((\/([0-9A-Za-z_.-])*){2,})?\/snippets/[0-9]{1,}'
)
......@@ -108,7 +108,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path RLIKE '((\/([a-zA-Z-])*){2,})?\/snippets/new'
WHERE page_url_path RLIKE '((\/([0-9A-Za-z_.-])*){2,})?\/snippets/new'
)
, snippet_edited AS (
......@@ -122,7 +122,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path RLIKE '((\/([a-zA-Z-])*){2,})?\/snippets/[0-9]*/edit'
WHERE page_url_path RLIKE '((\/([0-9A-Za-z_.-])*){2,})?\/snippets/[0-9]*/edit'
)
, snippet_viewed AS (
......@@ -136,7 +136,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path RLIKE '((\/([a-zA-Z-])*){2,})?\/snippets/[0-9]{1,}'
WHERE page_url_path RLIKE '((\/([0-9A-Za-z_.-])*){2,})?\/snippets/[0-9]{1,}'
)
......@@ -151,7 +151,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path RLIKE '(\/([a-zA-Z-])*){2,}\/wiki\/tree\/.*'
WHERE page_url_path RLIKE '(\/([0-9A-Za-z_.-])*){2,}\/wikis(\/(([0-9A-Za-z_.-]|\%))*){1,}'
AND page_url_path NOT REGEXP '/-/ide/(.)*'
)
......
{{ config({
"materialized": "incremental",
"unique_key": "page_view_id"
})
}}
{%- set event_ctes = ["audit_events_viewed",
"cycle_analytics_viewed",
"insights_viewed",
"group_analytics_viewed",
"group_created",
"user_authenticated"
]
-%}
WITH snowplow_page_views AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
page_view_start,
page_url_path,
page_view_id,
referer_url_path
FROM {{ ref('snowplow_page_views')}}
WHERE TRUE
AND app_id = 'gitlab'
{% if is_incremental() %}
AND page_view_start >= (SELECT MAX(event_date) FROM {{this}})
{% endif %}
)
, audit_events_viewed AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'audit_events_viewed' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){1,}\/audit_events'
)
, cycle_analytics_viewed AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'cycle_analytics_viewed' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/cycle_analytics'
)
, insights_viewed AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'insights_viewed' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){1,}\/insights'
)
, group_analytics_viewed AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'group_analytics_viewed' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){1,}\/analytics'
)
, group_created AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'group_created' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '\/groups\/new'
)
/*
Looks at referrer_url in addition to page_url.
Regex matches for successful sign-in authentications,
meaning /sign_in redirects to a real GitLab page.
*/
, user_authenticated AS (
SELECT
user_snowplow_domain_id,
user_custom_id,
TO_DATE(page_view_start) AS event_date,
page_url_path,
'user_authenticated' AS event_type,
page_view_id
FROM snowplow_page_views
WHERE referer_url_path REGEXP '\/users\/sign_in'
AND page_url_path NOT REGEXP '\/users\/sign_in'
)
, unioned AS (
{% for event_cte in event_ctes %}
(
SELECT
*
FROM {{ event_cte }}
)
{%- if not loop.last -%}
UNION
{%- endif %}
{% endfor -%}
)
SELECT *
FROM unioned
......@@ -45,7 +45,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/boards\/[0-9]{1,}'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/boards\/[0-9]{1,}'
)
......@@ -60,7 +60,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/epics(\/)?'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/epics(\/)?'
)
......@@ -75,7 +75,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/epics\/[0-9]{1,}'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/epics\/[0-9]{1,}'
)
, issue_list_viewed AS (
......@@ -90,7 +90,7 @@ WITH snowplow_page_views AS (
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/issues(\/)?'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/issues(\/)?'
)
......@@ -106,7 +106,7 @@ WITH snowplow_page_views AS (
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/issues\/[0-9]{1,}'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/issues\/[0-9]{1,}'
)
......@@ -121,7 +121,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/labels(\/)?'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/labels(\/)?'
)
......@@ -136,7 +136,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/milestones(\/)?'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/milestones(\/)?'
)
......@@ -151,7 +151,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/milestones\/[0-9]{1,}'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/milestones\/[0-9]{1,}'
)
......@@ -196,7 +196,7 @@ WITH snowplow_page_views AS (
page_view_id
FROM snowplow_page_views
WHERE page_url_path REGEXP '(\/([a-zA-Z-])*){2,}\/roadmap(\/)?'
WHERE page_url_path REGEXP '(\/([0-9A-Za-z_.-])*){2,}\/roadmap(\/)?'
)
......
......@@ -17,7 +17,22 @@ models:
tests:
- not_null
- unique
- name: manage_snowplow_smau_events
description: '{{ doc("manage_snowplow_smau_events") }}'
columns:
- name: user_snowplow_domain_id
description: UUID which is generated by the Javascript tracker and stored in a (first party) cookie
tests:
- not_null
- name: user_custom_id
description: either null or the user_id from the gitlab_dotcom database
- name: event_type
tests:
- not_null
- name: page_view_id
tests:
- not_null
- name: plan_snowplow_smau_events
description: '{{ doc("plan_snowplow_smau_events") }}'
......@@ -35,4 +50,3 @@ models:
tests:
- not_null
- unique
......@@ -6,6 +6,22 @@ For more documentation on which event is tracked by each data source for this st
{% enddocs %}
{% docs manage_snowplow_smau_events %}
This model provides a summary of relevant actions for the Manage Stage coming from snowplow.
The snowplow events currently included for the Manage Stage are:
* audit_events_viewed
* cycle_analytics_viewed
* insights_viewed
* group_analytics_viewed
* group_created
* user_authenticate
{% enddocs %}
{% docs plan_snowplow_smau_events %}
This model provides a summary of relevant activation events for Plan Stage coming from snowplow frontend events (pageviews and events). A summary of all activation events is at the moment defined in this [issue](https://gitlab.com/gitlab-org/telemetry/issues/48).
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment