From 6ea2282f827e6273c3af282ee0f3576830583c10 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Wed, 20 Jul 2022 21:31:26 -0700 Subject: [PATCH] convert analytics queries --- lib/constants.js | 1 + lib/db.js | 94 +++++++++++++++++-- queries/analytics/event/getEventMetrics.js | 34 ++++--- queries/analytics/event/getEvents.js | 35 ++++++- .../analytics/pageview/getPageviewMetrics.js | 43 ++++++++- .../analytics/pageview/getPageviewStats.js | 56 ++++++++++- queries/analytics/pageview/getPageviews.js | 34 ++++++- queries/analytics/session/getSessionByUuid.js | 36 ++++++- .../analytics/session/getSessionMetrics.js | 42 ++++++++- queries/analytics/session/getSessions.js | 40 +++++++- queries/analytics/stats/getActiveVisitors.js | 28 +++++- queries/analytics/stats/getWebsiteStats.js | 79 +++++++++++++--- 12 files changed, 470 insertions(+), 52 deletions(-) diff --git a/lib/constants.js b/lib/constants.js index a8c872e2..53c8bd25 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -64,6 +64,7 @@ export const EVENT_COLORS = [ '#ffec16', ]; +export const RELATIONAL = 'relational'; export const POSTGRESQL = 'postgresql'; export const MYSQL = 'mysql'; export const CLICKHOUSE = 'clickhouse'; diff --git a/lib/db.js b/lib/db.js index 3d89210c..9409178a 100644 --- a/lib/db.js +++ b/lib/db.js @@ -7,8 +7,10 @@ import { POSTGRESQL, POSTGRESQL_DATE_FORMATS, CLICKHOUSE, + RELATIONAL, } from 'lib/constants'; import moment from 'moment-timezone'; +import { CLICKHOUSE_DATE_FORMATS } from './constants'; BigInt.prototype.toJSON = function () { return Number(this); @@ -48,19 +50,48 @@ function initializePrisma(options) { } function initializeClickhouse() { - if (process.env.ANALYTICS_URL) { + if (!process.env.ANALYTICS_URL) { return null; } + const url = new URL(process.env.ANALYTICS_URL); + + const database = url.pathname.replace('/', ''); + return new ClickHouse({ - url: process.env.ANALYTICS_URL, + url: url.hostname, + port: Number(url.port), + basicAuth: url.password + ? { + username: url.username || 'default', + password: url.password, + } + : null, format: 'json', + config: { + database, + }, }); + + // return new ClickHouse({ + // url: 'http://164.92.95.2', + // port: 8123, + // basicAuth: { + // username: 'default', + // password: 'shhhthisissupersecret!', + // }, + // format: 'json', + // config: { + // database: 'umami_dev', + // }, + // }); } const prisma = initializePrisma(options); const clickhouse = initializeClickhouse(); +console.log('clickhouse1: ', clickhouse); + export { prisma, clickhouse }; export function getDatabase() { @@ -103,6 +134,10 @@ export function getDateStringQuery(data, unit) { } } +export function getDateStringQueryClickhouse(data, unit) { + return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`; +} + export function getDateQuery(field, unit, timezone) { const db = getDatabase(); @@ -124,15 +159,31 @@ export function getDateQuery(field, unit, timezone) { } } -export function getTimestampInterval(field) { +export function getDateQueryClickhouse(field, unit, timezone) { + if (timezone) { + return `date_trunc('${unit}', ${field},'${timezone}')`; + } + return `date_trunc('${unit}', ${field})`; +} + +export function getDateFormatClickhouse(date) { + return `parseDateTimeBestEffort('${date.toUTCString()}')`; +} + +export function getBetweenDatesClickhouse(field, start_at, end_at) { + return `${field} between ${getDateFormatClickhouse(start_at)} + and ${getDateFormatClickhouse(end_at)}`; +} + +export function getTimestampInterval(maxColumn, minColumn) { const db = getDatabase(); if (db === POSTGRESQL) { - return `floor(extract(epoch from max(${field}) - min(${field})))`; + return `floor(extract(epoch from max(${maxColumn}) - min(${minColumn})))`; } if (db === MYSQL) { - return `floor(unix_timestamp(max(${field})) - unix_timestamp(min(${field})))`; + return `floor(unix_timestamp(max(${maxColumn})) - unix_timestamp(min(${minColumn})))`; } } @@ -149,6 +200,7 @@ export function getFilterQuery(table, filters = {}, params = []) { if (table === 'pageview' || table === 'event') { arr.push(`and ${table}.${key}=$${params.length + 1}`); params.push(decodeURIComponent(value)); + console.log(params); } break; @@ -213,6 +265,22 @@ export function parseFilters(table, filters = {}, params = []) { }; } +export function replaceQueryClickhouse(string, params = []) { + let formattedString = string; + + params.forEach((a, i) => { + let replace = a; + + if (typeof a === 'string' || a instanceof String) { + replace = `'${replace}'`; + } + + formattedString = formattedString.replace(`$${i + 1}`, replace); + }); + + return formattedString; +} + export async function runQuery(query) { return query.catch(e => { throw e; @@ -231,14 +299,24 @@ export async function rawQuery(query, params = []) { return runQuery(prisma.$queryRawUnsafe.apply(prisma, [sql, ...params])); } -export function runAnalyticsQuery(relational, clickhouse) { +export async function rawQueryClickhouse(query, params = [], debug = false) { + let formattedQuery = replaceQueryClickhouse(query, params); + + if (debug || process.env.LOG_QUERY) { + console.log(formattedQuery); + } + + return clickhouse.query(formattedQuery).toPromise(); +} + +export async function runAnalyticsQuery(queries) { const db = getAnalyticsDatabase(); if (db === POSTGRESQL || db === MYSQL) { - return relational(); + return queries[`${RELATIONAL}`](); } if (db === CLICKHOUSE) { - return runQuery(clickhouse()); + return queries[`${CLICKHOUSE}`](); } } diff --git a/queries/analytics/event/getEventMetrics.js b/queries/analytics/event/getEventMetrics.js index 52be39c0..59e55dcf 100644 --- a/queries/analytics/event/getEventMetrics.js +++ b/queries/analytics/event/getEventMetrics.js @@ -1,17 +1,23 @@ +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; import { + rawQueryClickhouse, + getBetweenDatesClickhouse, getDateQuery, + getDateQueryClickhouse, getDateStringQuery, getFilterQuery, rawQuery, runAnalyticsQuery, - clickhouse, } from 'lib/db'; -export function getEventMetrics(...args) { - return runAnalyticsQuery(relationalQuery(...args), clickhouseQuery(...args)); +export async function getEventMetrics(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); } -function relationalQuery( +async function relationalQuery( website_id, start_at, end_at, @@ -38,28 +44,28 @@ function relationalQuery( ); } -function clickhouseQuery( +async function clickhouseQuery( website_id, start_at, end_at, - timezone = 'utc', + timezone = 'UTC', unit = 'day', filters = {}, ) { - const params = [website_id, start_at, end_at]; + const params = [website_id]; - return clickhouse.query( + return rawQueryClickhouse( ` select event_value x, - ${getDateStringQuery(getDateQuery('created_at', unit, timezone), unit)} t, + ${getDateQueryClickhouse('created_at', unit, timezone)} t, count(*) y from event - where website_id=$1 - and created_at between $2 and $3 - ${getFilterQuery('event', filters, params)} - group by 1, 2 - order by 2 + where website_id= $1 + and ${getBetweenDatesClickhouse('created_at', start_at, end_at)} + ${getFilterQuery('event', filters, params)} + group by x, t + order by t `, params, ); diff --git a/queries/analytics/event/getEvents.js b/queries/analytics/event/getEvents.js index f1a318bc..059c8014 100644 --- a/queries/analytics/event/getEvents.js +++ b/queries/analytics/event/getEvents.js @@ -1,6 +1,20 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + rawQueryClickhouse, + getDateFormatClickhouse, + prisma, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; -export async function getEvents(websites, start_at) { +export function getEvents(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +function relationalQuery(websites, start_at) { return runQuery( prisma.event.findMany({ where: { @@ -16,3 +30,20 @@ export async function getEvents(websites, start_at) { }), ); } + +function clickhouseQuery(websites, start_at) { + return rawQueryClickhouse( + ` + select + event_id, + website_id, + session_id, + created_at, + url, + event_type + from event + where website_id in (${websites.join[',']} + and created_at >= ${getDateFormatClickhouse(start_at)}) + `, + ); +} diff --git a/queries/analytics/pageview/getPageviewMetrics.js b/queries/analytics/pageview/getPageviewMetrics.js index 1772bce2..fb2cf8a5 100644 --- a/queries/analytics/pageview/getPageviewMetrics.js +++ b/queries/analytics/pageview/getPageviewMetrics.js @@ -1,6 +1,20 @@ -import { parseFilters, rawQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + rawQueryClickhouse, + runAnalyticsQuery, + parseFilters, + rawQuery, + getBetweenDatesClickhouse, +} from 'lib/db'; -export function getPageviewMetrics(website_id, start_at, end_at, field, table, filters = {}) { +export async function getPageviewMetrics(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(website_id, start_at, end_at, field, table, filters = {}) { const params = [website_id, start_at, end_at]; const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters( table, @@ -24,3 +38,28 @@ export function getPageviewMetrics(website_id, start_at, end_at, field, table, f params, ); } + +async function clickhouseQuery(website_id, start_at, end_at, field, table, filters = {}) { + const params = [website_id]; + const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters( + table, + filters, + params, + ); + + return rawQueryClickhouse( + ` + select ${field} x, count(*) y + from ${table} + ${joinSession} + where ${table}.website_id= $1 + and ${getBetweenDatesClickhouse(table + '.created_at', start_at, end_at)} + ${pageviewQuery} + ${joinSession && sessionQuery} + ${eventQuery} + group by x + order by y desc + `, + params, + ); +} diff --git a/queries/analytics/pageview/getPageviewStats.js b/queries/analytics/pageview/getPageviewStats.js index eccf5dc7..8a26280c 100644 --- a/queries/analytics/pageview/getPageviewStats.js +++ b/queries/analytics/pageview/getPageviewStats.js @@ -1,6 +1,24 @@ -import { parseFilters, rawQuery, getDateQuery, getDateStringQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + rawQueryClickhouse, + getBetweenDatesClickhouse, + getDateQuery, + getDateQueryClickhouse, + getDateStringQuery, + getDateStringQueryClickhouse, + parseFilters, + rawQuery, + runAnalyticsQuery, +} from 'lib/db'; -export function getPageviewStats( +export async function getPageviewStats(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery( website_id, start_at, end_at, @@ -32,3 +50,37 @@ export function getPageviewStats( params, ); } + +async function clickhouseQuery( + website_id, + start_at, + end_at, + timezone = 'UTC', + unit = 'day', + count = '*', + filters = {}, +) { + const params = [website_id]; + const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + + return rawQueryClickhouse( + ` + select + ${getDateStringQueryClickhouse('g.t', unit)} as t, + g.y as y + from + (select + ${getDateQueryClickhouse('created_at', unit, timezone)} t, + count(${count}) y + from pageview + ${joinSession} + where pageview.website_id= $1 + and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)} + ${pageviewQuery} + ${sessionQuery} + group by t) g + order by t + `, + params, + ); +} diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.js index 09efbea1..d7b28611 100644 --- a/queries/analytics/pageview/getPageviews.js +++ b/queries/analytics/pageview/getPageviews.js @@ -1,6 +1,20 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + rawQueryClickhouse, + getDateFormatClickhouse, + prisma, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; -export async function getPageviews(websites, start_at) { +export async function getPageviews(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websites, start_at) { return runQuery( prisma.pageview.findMany({ where: { @@ -16,3 +30,19 @@ export async function getPageviews(websites, start_at) { }), ); } + +async function clickhouseQuery(websites, start_at) { + return rawQueryClickhouse( + ` + select + view_id, + website_id, + session_id, + created_at, + url + from pageview + where website_id in (${websites.join[',']} + and created_at >= ${getDateFormatClickhouse(start_at)}) + `, + ); +} diff --git a/queries/analytics/session/getSessionByUuid.js b/queries/analytics/session/getSessionByUuid.js index 96d27a85..042b4914 100644 --- a/queries/analytics/session/getSessionByUuid.js +++ b/queries/analytics/session/getSessionByUuid.js @@ -1,6 +1,14 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { rawQueryClickhouse, prisma, runAnalyticsQuery, runQuery } from 'lib/db'; -export async function getSessionByUuid(session_uuid) { +export async function getSessionByUuid(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(session_uuid) { return runQuery( prisma.session.findUnique({ where: { @@ -9,3 +17,27 @@ export async function getSessionByUuid(session_uuid) { }), ); } + +async function clickhouseQuery(session_uuid) { + const params = [session_uuid]; + + return rawQueryClickhouse( + ` + select + session_id, + session_uuid, + website_id, + created_at, + hostname, + browser, + os, + device, + screen, + "language", + country + from session + where session_id = $1 + `, + params, + ); +} diff --git a/queries/analytics/session/getSessionMetrics.js b/queries/analytics/session/getSessionMetrics.js index 53294757..432b696c 100644 --- a/queries/analytics/session/getSessionMetrics.js +++ b/queries/analytics/session/getSessionMetrics.js @@ -1,6 +1,20 @@ -import { parseFilters, rawQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + getBetweenDatesClickhouse, + parseFilters, + rawQuery, + rawQueryClickhouse, + runAnalyticsQuery, +} from 'lib/db'; -export function getSessionMetrics(website_id, start_at, end_at, field, filters = {}) { +export async function getSessionMetrics(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(website_id, start_at, end_at, field, filters = {}) { const params = [website_id, start_at, end_at]; const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); @@ -23,3 +37,27 @@ export function getSessionMetrics(website_id, start_at, end_at, field, filters = params, ); } + +async function clickhouseQuery(website_id, start_at, end_at, field, filters = {}) { + const params = [website_id]; + const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + + return rawQueryClickhouse( + ` + select ${field} x, count(*) y + from session as x + where x.session_id in ( + select pageview.session_id + from pageview + ${joinSession} + where pageview.website_id=$1 + and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)} + ${pageviewQuery} + ${sessionQuery} + ) + group by x + order by y desc + `, + params, + ); +} diff --git a/queries/analytics/session/getSessions.js b/queries/analytics/session/getSessions.js index 4679670c..09b55b8d 100644 --- a/queries/analytics/session/getSessions.js +++ b/queries/analytics/session/getSessions.js @@ -1,6 +1,20 @@ -import { prisma, runQuery } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + getDateFormatClickhouse, + prisma, + rawQueryClickhouse, + runAnalyticsQuery, + runQuery, +} from 'lib/db'; -export async function getSessions(websites, start_at) { +export async function getSessions(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websites, start_at) { return runQuery( prisma.session.findMany({ where: { @@ -16,3 +30,25 @@ export async function getSessions(websites, start_at) { }), ); } + +async function clickhouseQuery(websites, start_at) { + return rawQueryClickhouse( + ` + select + session_id, + session_uuid, + website_id, + created_at, + hostname, + browser, + os, + device, + screen, + "language", + country + from session + where website_id in (${websites.join[',']} + and created_at >= ${getDateFormatClickhouse(start_at)}) + `, + ); +} diff --git a/queries/analytics/stats/getActiveVisitors.js b/queries/analytics/stats/getActiveVisitors.js index 042e364a..2789f769 100644 --- a/queries/analytics/stats/getActiveVisitors.js +++ b/queries/analytics/stats/getActiveVisitors.js @@ -1,7 +1,15 @@ -import { rawQuery } from 'lib/db'; import { subMinutes } from 'date-fns'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { getDateFormatClickhouse, rawQuery, rawQueryClickhouse, runAnalyticsQuery } from 'lib/db'; -export function getActiveVisitors(website_id) { +export async function getActiveVisitors(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(website_id) { const date = subMinutes(new Date(), 5); const params = [website_id, date]; @@ -9,9 +17,23 @@ export function getActiveVisitors(website_id) { ` select count(distinct session_id) x from pageview - where website_id=$1 + where website_id = $1 and created_at >= $2 `, params, ); } + +async function clickhouseQuery(website_id) { + const params = [website_id]; + + return rawQueryClickhouse( + ` + select count(distinct session_id) x + from pageview + where website_id = $1 + and created_at >= ${getDateFormatClickhouse(subMinutes(new Date(), 5))} + `, + params, + ); +} diff --git a/queries/analytics/stats/getWebsiteStats.js b/queries/analytics/stats/getWebsiteStats.js index de43aa16..f1ee921a 100644 --- a/queries/analytics/stats/getWebsiteStats.js +++ b/queries/analytics/stats/getWebsiteStats.js @@ -1,6 +1,23 @@ -import { parseFilters, rawQuery, getDateQuery, getTimestampInterval } from 'lib/db'; +import { CLICKHOUSE, RELATIONAL } from 'lib/constants'; +import { + getDateQuery, + getBetweenDatesClickhouse, + getDateQueryClickhouse, + getTimestampInterval, + parseFilters, + rawQuery, + rawQueryClickhouse, + runAnalyticsQuery, +} from 'lib/db'; -export function getWebsiteStats(website_id, start_at, end_at, filters = {}) { +export async function getWebsiteStats(...args) { + return runAnalyticsQuery({ + [`${RELATIONAL}`]: () => relationalQuery(...args), + [`${CLICKHOUSE}`]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(website_id, start_at, end_at, filters = {}) { const params = [website_id, start_at, end_at]; const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); @@ -9,21 +26,57 @@ export function getWebsiteStats(website_id, start_at, end_at, filters = {}) { select sum(t.c) as "pageviews", count(distinct t.session_id) as "uniques", sum(case when t.c = 1 then 1 else 0 end) as "bounces", + sum(case when m2 < m1 + interval '1 hour' then ${getTimestampInterval( + 'm2', + 'm1', + )} else 0 end) as "totaltime" sum(t.time) as "totaltime" from ( - select pageview.session_id, - ${getDateQuery('pageview.created_at', 'hour')}, - count(*) c, - ${getTimestampInterval('pageview.created_at')} as "time" - from pageview - ${joinSession} - where pageview.website_id=$1 - and pageview.created_at between $2 and $3 - ${pageviewQuery} - ${sessionQuery} - group by 1, 2 + select + pageview.session_id, + ${getDateQuery('pageview.created_at', 'hour')}, + count(*) c, + min(created_at) m1, + max(created_at) m2 + from pageview + ${joinSession} + where pageview.website_id=$1 + and pageview.created_at between $2 and $3 + ${pageviewQuery} + ${sessionQuery} + group by 1, 2 ) t `, params, ); } + +async function clickhouseQuery(website_id, start_at, end_at, filters = {}) { + const params = [website_id]; + const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); + + return rawQueryClickhouse( + ` + select + sum(t.c) as "pageviews", + count(distinct t.session_id) as "uniques", + sum(if(t.c = 1, 1, 0)) as "bounces", + sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime" + from ( + select pageview.session_id, + ${getDateQueryClickhouse('pageview.created_at', 'day')} time_series, + count(*) c, + min(created_at) min_time, + max(created_at) max_time + from pageview + ${joinSession} + where pageview.website_id = $1 + and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)} + ${pageviewQuery} + ${sessionQuery} + group by pageview.session_id, time_series + ) t; + `, + params, + ); +}