Update Cube to read funnels from Clickhouse
Problem to solve
With Create new configurator endpoint and migration ... (#438033 - moved) and Add background worker to push funnel configurat... (#438035 - closed) we should now have funnel configurations being stored in Clickhouse. With this change, we now need to update Cube to use the Clickhouse stored funnel configurations rather than trying to retrieve them via the GitLab API.
Proposed solution
Update Cube to retrieve user defined schemas from Clickhouse - specifically of the type funnels.
Example code changes
Index: cube/dynamic_schema/funnels.js
<+>UTF-8
===================================================================
diff --git a/cube/dynamic_schema/funnels.js b/cube/dynamic_schema/funnels.js
new file mode 100644
--- /dev/null (revision Staged)
+++ b/cube/dynamic_schema/funnels.js (revision Staged)
@@ -0,0 +1,28 @@
+/**
+ * Custom values are replaced in cube/utils/user_defined_schemas.js:getFunnelsSchemaFiles()
+ *
+ * Supported values which can be used and will be dynamically replaced are:
+ * - FUNNEL_NAME
+ * - FUNNEL_SQL
+ */
+cube('FUNNEL_NAME', {
+ sql: `FUNNEL_SQL
+ WHERE ${FILTER_PARAMS['FUNNEL_NAME']['date'].filter('derived_tstamp')}
+ GROUP BY user_id`,
+ measures: {
+ count: {
+ sql: 'count()',
+ type: 'number',
+ },
+ },
+ dimensions: {
+ step: {
+ sql: 'step',
+ type: 'number',
+ },
+ date: {
+ sql: 'x', // This is a hack to allow the date filter to work
+ type: 'time',
+ },
+ },
+});
\ No newline at end of file
Index: cube/schema/Funnels.js
===================================================================
diff --git a/cube/schema/Funnels.js b/cube/schema/Funnels.js
deleted file mode 100644
--- a/cube/schema/Funnels.js (revision HEAD)
+++ /dev/null (revision HEAD)
@@ -1,55 +0,0 @@
-const fetch = require("node-fetch");
-import { gitlabBaseURI } from "../utils/gitlab_api";
-
-asyncModule(async () => {
- const {
- securityContext: { appId, gitlabToken },
- } = COMPILE_CONTEXT;
-
- if (!appId) {
- return;
- }
-
- let funnels;
-
- try {
- funnels = await (
- await fetch(`${gitlabBaseURI()}/api/v4/projects/${appId.split("gitlab_project_")[1]}/product_analytics/funnels`, {
- headers: {
- "PRIVATE-TOKEN": gitlabToken,
- },
- })
- ).json();
- } catch(e) {
- console.info(e);
- funnels = [];
- }
-
- if (!funnels.length) {
- return;
- }
-
- funnels.forEach((funnel) => {
- cube(`${funnel.name}`, {
- sql: `${funnel.sql}
- WHERE ${FILTER_PARAMS[funnel.name]["date"].filter("derived_tstamp")}
- GROUP BY user_id`,
- measures: {
- count: {
- sql: `count()`,
- type: `number`,
- },
- },
- dimensions: {
- step: {
- sql: `step`,
- type: `number`,
- },
- date: {
- sql: `x`, // This is a hack to allow the date filter to work
- type: `time`,
- },
- },
- });
- });
-});
Index: cube/utils/constants.js
<+>UTF-8
===================================================================
diff --git a/cube/utils/constants.js b/cube/utils/constants.js
new file mode 100644
--- /dev/null (revision Staged)
+++ b/cube/utils/constants.js (revision Staged)
@@ -0,0 +1,10 @@
+const CLICKHOUSE_FORMAT = 'JSONEachRow';
+
+const SCHEMA_FILES_LOCATION = 'schema';
+const FUNNELS_SCHEMA_FILE_LOCATION = `${__dirname}/../dynamic_schema/funnels.js`;
+
+module.exports = {
+ CLICKHOUSE_FORMAT,
+ SCHEMA_FILES_LOCATION,
+ FUNNELS_SCHEMA_FILE_LOCATION,
+};
\ No newline at end of file
Index: cube/utils/gitlab_api.js
===================================================================
diff --git a/cube/utils/gitlab_api.js b/cube/utils/gitlab_api.js
deleted file mode 100644
--- a/cube/utils/gitlab_api.js (revision HEAD)
+++ /dev/null (revision HEAD)
@@ -1,1 +0,0 @@
-exports.gitlabBaseURI = () => process.env.GITLAB_BASE_URI;
\ No newline at end of file
Index: cube/utils/user_defined_schemas.js
<+>UTF-8
===================================================================
diff --git a/cube/utils/user_defined_schemas.js b/cube/utils/user_defined_schemas.js
new file mode 100644
--- /dev/null (revision Staged)
+++ b/cube/utils/user_defined_schemas.js (revision Staged)
@@ -0,0 +1,50 @@
+const fs = require("fs");
+
+const { FUNNELS_SCHEMA_FILE_LOCATION, CLICKHOUSE_FORMAT } = require('./utils/constants');
+const SCHEMA_TYPES = {
+ FUNNEL: 'funnel'
+}
+
+const userDefinedSchemasQuery = (appId, schemaType) => `
+ SELECT name, type, JSONExtractString(contents, 'sql') AS sql
+ FROM ${appId}.user_defined_schemas
+ WHERE type = ${schemaType};
+`;
+
+const getUserDefinedSchemas = async (appId, clickhouseClient, schemaType) => {
+ try {
+ const rows = await clickhouseClient.query({
+ query: userDefinedSchemasQuery(appId, schemaType),
+ format: CLICKHOUSE_FORMAT,
+ });
+
+ return rows.json();
+ } catch(e) {
+ console.info(e);
+
+ return Promise.resolve([]);
+ }
+};
+
+const getFunnelsSchemaFiles = async (appId, clickhouseClient) => {
+ const funnelsSchema = fs.readFileSync(FUNNELS_SCHEMA_FILE_LOCATION, 'utf8');
+
+ let funnels = Promise.resolve([]);
+
+ if (!appId) {
+ return funnels;
+ }
+
+ funnels = await getUserDefinedSchemas(appId, clickhouseClient, SCHEMA_TYPES.FUNNEL);
+
+ return funnels.map((funnel) => ({
+ fileName: `${funnel.name}.js`,
+ content: `${funnelsSchema}`
+ .replaceAll('FUNNEL_NAME', funnel.name)
+ .replaceAll('FUNNEL_SQL', funnel.sql)
+ }), []);
+};
+
+module.exports = {
+ getFunnelsSchemaFiles
+};
\ No newline at end of file
Index: cube/cube.js
<+>UTF-8
===================================================================
diff --git a/cube/cube.js b/cube/cube.js
--- a/cube/cube.js (revision HEAD)
+++ b/cube/cube.js (revision Staged)
@@ -9,10 +9,12 @@
} = process.env;
const useSSL = CUBEJS_DB_SSL?.toLocaleLowerCase() === 'true';
+const { FileRepository } = require('@cubejs-backend/server-core');
const ClickHouseDriver = require('@cubejs-backend/clickhouse-driver');
const { createClient } = require('@clickhouse/client');
+const { getFunnelsSchemaFiles } = require('./utils/user_defined_schemas');
+const { SCHEMA_FILES_LOCATION, CLICKHOUSE_FORMAT } = require('./utils/constants');
-const CLICKHOUSE_FORMAT = 'JSONEachRow';
const clickhouseClient = createClient({
host: `${useSSL ? 'https://' : 'http://'}${CUBEJS_DB_HOST}:${CUBEJS_DB_PORT}`,
username: CUBEJS_DB_USER,
@@ -33,6 +35,16 @@
}),
scheduledRefreshTimer: false,
scheduledRefreshContexts: () => ({}),
+ repositoryFactory: ({ securityContext: { appId, gitlabToken = null } }) => ({
+ async dataSchemaFiles () {
+ const localFiles = new FileRepository(SCHEMA_FILES_LOCATION);
+
+ return [
+ ...await localFiles.dataSchemaFiles(),
+ ...await getFunnelsSchemaFiles(appId, clickhouseClient)
+ ]
+ }
+ }),
};
if (CUBEJS_REFRESH_WORKER || CUBEJS_DEV_MODE) {
3️⃣
Implementation plan - Following the above example code:
- Update
cube.js
to build a list of schema files. - Create a method to retrieve the Clickhouse funnels data.
- Create a new dynamics schema area to store the dynamic schema structures.
- Create tests.
- Add to the devkit.
- Add to the analytics stack.
- Deploy to all environments.
Edited by Robert Hunt