From befc5cf6c0050ad37aef4ca290bd410544e32004 Mon Sep 17 00:00:00 2001 From: Brian Cao Date: Thu, 12 Jan 2023 12:06:25 -0800 Subject: [PATCH] Re-write CH queries to use query params. --- lib/{clickhouse.js => clickhouse.ts} | 70 +++++++------------ queries/analytics/event/getEventData.ts | 6 +- queries/analytics/event/getEventMetrics.ts | 6 +- .../event/{getEvents.js => getEvents.ts} | 25 +++---- .../analytics/pageview/getPageviewMetrics.ts | 12 ++-- .../analytics/pageview/getPageviewStats.ts | 6 +- queries/analytics/pageview/getPageviews.js | 43 ------------ queries/analytics/pageview/getPageviews.ts | 44 ++++++++++++ queries/analytics/session/getSession.ts | 4 +- .../analytics/session/getSessionMetrics.ts | 9 +-- queries/analytics/stats/getActiveVisitors.ts | 8 +-- queries/analytics/stats/getWebsiteStats.ts | 6 +- 12 files changed, 114 insertions(+), 125 deletions(-) rename lib/{clickhouse.js => clickhouse.ts} (69%) rename queries/analytics/event/{getEvents.js => getEvents.ts} (50%) delete mode 100644 queries/analytics/pageview/getPageviews.js create mode 100644 queries/analytics/pageview/getPageviews.ts diff --git a/lib/clickhouse.js b/lib/clickhouse.ts similarity index 69% rename from lib/clickhouse.js rename to lib/clickhouse.ts index b28694b6..f59e04c6 100644 --- a/lib/clickhouse.js +++ b/lib/clickhouse.ts @@ -14,7 +14,7 @@ export const CLICKHOUSE_DATE_FORMATS = { const log = debug('umami:clickhouse'); -let clickhouse; +let clickhouse: ClickHouse; const enabled = Boolean(process.env.CLICKHOUSE_URL); function getClient() { @@ -49,7 +49,7 @@ function getDateStringQuery(data, unit) { return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`; } -function getDateQuery(field, unit, timezone) { +function getDateQuery(field, unit, timezone?) { if (timezone) { return `date_trunc('${unit}', ${field}, '${timezone}')`; } @@ -60,12 +60,8 @@ function getDateFormat(date) { return `'${dateFormat(date, 'UTC:yyyy-mm-dd HH:MM:ss')}'`; } -function getCommaSeparatedStringFormat(data) { - return data.map(a => `'${a}'`).join(',') || ''; -} - -function getBetweenDates(field, start_at, end_at) { - return `${field} between ${getDateFormat(start_at)} and ${getDateFormat(end_at)}`; +function getBetweenDates(field, startAt, endAt) { + return `${field} between ${getDateFormat(startAt)} and ${getDateFormat(endAt)}`; } function getJsonField(column, property) { @@ -106,7 +102,7 @@ function getEventDataFilterQuery(column, filters) { return query.join('\nand '); } -function getFilterQuery(filters = {}, params = []) { +function getFilterQuery(filters = {}, params = {}) { const query = Object.keys(filters).reduce((arr, key) => { const filter = filters[key]; @@ -120,20 +116,24 @@ function getFilterQuery(filters = {}, params = []) { case 'browser': case 'device': case 'country': - case 'event_name': - arr.push(`and ${key}=$${params.length + 1}`); - params.push(decodeURIComponent(filter)); + arr.push(`and ${key} = {${key}:String}`); + params[key] = filter; + break; + + case 'eventName': + arr.push(`and event_name = {${key}:String}`); + params[key] = filter; break; case 'referrer': - arr.push(`and referrer like $${params.length + 1}`); - params.push(`%${decodeURIComponent(filter)}%`); + arr.push(`and referrer ILIKE {${key}:String}`); + params[key] = `%${filter}`; break; case 'domain': - arr.push(`and referrer not like $${params.length + 1}`); - arr.push(`and referrer not like '/%'`); - params.push(`%://${filter}/%`); + arr.push(`and referrer NOT ILIKE {${key}:String}`); + arr.push(`and referrer NOT ILIKE '/%'`); + params[key] = `%://${filter}/%`; break; case 'query': @@ -146,49 +146,32 @@ function getFilterQuery(filters = {}, params = []) { return query.join('\n'); } -function parseFilters(filters = {}, params = []) { - const { domain, url, event_url, referrer, os, browser, device, country, event_name, query } = +function parseFilters(filters: any = {}, params: any = {}) { + const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } = filters; const pageviewFilters = { domain, url, referrer, query }; const sessionFilters = { os, browser, device, country }; - const eventFilters = { url: event_url, event_name }; + const eventFilters = { url: eventUrl, eventName }; return { pageviewFilters, sessionFilters, eventFilters, - event: { event_name }, + event: { eventName }, filterQuery: getFilterQuery(filters, params), }; } -function formatQuery(str, params = []) { - let formattedString = str; - - params.forEach((param, i) => { - let replace = param; - - if (typeof param === 'string' || param instanceof String) { - replace = `'${replace}'`; - } - - formattedString = formattedString.replace(`$${i + 1}`, replace); - }); - - return formattedString; -} - -async function rawQuery(query, params = []) { - let formattedQuery = formatQuery(query, params); - +async function rawQuery(query, params = {}) { if (process.env.LOG_QUERY) { - log(formattedQuery); + log(query); + log(params); } await connect(); - return clickhouse.query(formattedQuery).toPromise(); + return clickhouse.query(query, { params }).toPromise(); } async function findUnique(data) { @@ -204,7 +187,7 @@ async function findFirst(data) { } async function connect() { - if (!clickhouse) { + if (enabled && !clickhouse) { clickhouse = process.env.CLICKHOUSE_URL && (global[CLICKHOUSE] || getClient()); } @@ -219,7 +202,6 @@ export default { getDateStringQuery, getDateQuery, getDateFormat, - getCommaSeparatedStringFormat, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery, diff --git a/queries/analytics/event/getEventData.ts b/queries/analytics/event/getEventData.ts index 545c5112..103c3184 100644 --- a/queries/analytics/event/getEventData.ts +++ b/queries/analytics/event/getEventData.ts @@ -72,14 +72,14 @@ async function clickhouseQuery( const { rawQuery, getBetweenDates, getEventDataColumnsQuery, getEventDataFilterQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; return rawQuery( `select ${getEventDataColumnsQuery('event_data', columns)} from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.customEvent} ${eventName ? `and eventName = ${eventName}` : ''} and ${getBetweenDates('created_at', startDate, endDate)} diff --git a/queries/analytics/event/getEventMetrics.ts b/queries/analytics/event/getEventMetrics.ts index 42bc9fd8..31cfe327 100644 --- a/queries/analytics/event/getEventMetrics.ts +++ b/queries/analytics/event/getEventMetrics.ts @@ -85,7 +85,7 @@ async function clickhouseQuery( ) { const { rawQuery, getDateQuery, getBetweenDates, getFilterQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; return rawQuery( `select @@ -93,8 +93,8 @@ async function clickhouseQuery( ${getDateQuery('created_at', unit, timezone)} t, count(*) y from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.customEvent} and ${getBetweenDates('created_at', startDate, endDate)} ${getFilterQuery(filters, params)} diff --git a/queries/analytics/event/getEvents.js b/queries/analytics/event/getEvents.ts similarity index 50% rename from queries/analytics/event/getEvents.js rename to queries/analytics/event/getEvents.ts index 81a187ce..5d01824c 100644 --- a/queries/analytics/event/getEvents.js +++ b/queries/analytics/event/getEvents.ts @@ -1,29 +1,30 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; +import { EVENT_TYPE } from 'lib/constants'; -export function getEvents(...args) { +export function getEvents(...args: [websites: string[], startAt: Date]) { return runQuery({ [PRISMA]: () => relationalQuery(...args), [CLICKHOUSE]: () => clickhouseQuery(...args), }); } -function relationalQuery(websites, start_at) { +function relationalQuery(websites: string[], startAt: Date) { return prisma.client.event.findMany({ where: { websiteId: { in: websites, }, createdAt: { - gte: start_at, + gte: startAt, }, }, }); } -function clickhouseQuery(websites, start_at) { - const { rawQuery, getDateFormat, getCommaSeparatedStringFormat } = clickhouse; +function clickhouseQuery(websites: string[], startAt: Date) { + const { rawQuery } = clickhouse; return rawQuery( `select @@ -34,12 +35,12 @@ function clickhouseQuery(websites, start_at) { url, event_name from event - where event_name != '' - and ${ - websites && websites.length > 0 - ? `website_id in (${getCommaSeparatedStringFormat(websites)})` - : '0 = 0' - } - and created_at >= ${getDateFormat(start_at)}`, + where event_type = ${EVENT_TYPE.customEvent} + and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} + and created_at >= {startAt:DateTime('UTC')}`, + { + websites, + startAt, + }, ); } diff --git a/queries/analytics/pageview/getPageviewMetrics.ts b/queries/analytics/pageview/getPageviewMetrics.ts index 275c2d10..02df2877 100644 --- a/queries/analytics/pageview/getPageviewMetrics.ts +++ b/queries/analytics/pageview/getPageviewMetrics.ts @@ -68,15 +68,19 @@ async function clickhouseQuery( const { startDate, endDate, column, filters = {} } = data; const { rawQuery, parseFilters, getBetweenDates } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0, EVENT_TYPE.pageView]; + const params = { + websiteId, + revId: website?.revId || 0, + eventType: EVENT_TYPE.pageView, + }; const { filterQuery } = parseFilters(filters, params); return rawQuery( `select ${column} x, count(*) y from event - where website_id = $1 - and rev_id = $2 - and event_type = $3 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} + and event_type = {eventType:UInt32} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by x diff --git a/queries/analytics/pageview/getPageviewStats.ts b/queries/analytics/pageview/getPageviewStats.ts index ab487b18..b2d86b33 100644 --- a/queries/analytics/pageview/getPageviewStats.ts +++ b/queries/analytics/pageview/getPageviewStats.ts @@ -78,7 +78,7 @@ async function clickhouseQuery( const { startDate, endDate, timezone = 'UTC', unit = 'day', count = '*', filters = {} } = data; const { parseFilters, rawQuery, getDateStringQuery, getDateQuery, getBetweenDates } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( @@ -90,8 +90,8 @@ async function clickhouseQuery( ${getDateQuery('created_at', unit, timezone)} t, count(${count !== '*' ? 'distinct session_id' : count}) y from event - where website_id = $1 - and rev_id = $2 + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and event_type = ${EVENT_TYPE.pageView} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} diff --git a/queries/analytics/pageview/getPageviews.js b/queries/analytics/pageview/getPageviews.js deleted file mode 100644 index 2bf41b0b..00000000 --- a/queries/analytics/pageview/getPageviews.js +++ /dev/null @@ -1,43 +0,0 @@ -import prisma from 'lib/prisma'; -import clickhouse from 'lib/clickhouse'; -import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; - -export async function getPageviews(...args) { - return runQuery({ - [PRISMA]: () => relationalQuery(...args), - [CLICKHOUSE]: () => clickhouseQuery(...args), - }); -} - -async function relationalQuery(websites, start_at) { - return prisma.client.pageview.findMany({ - where: { - websiteId: { - in: websites, - }, - createdAt: { - gte: start_at, - }, - }, - }); -} - -async function clickhouseQuery(websites, start_at) { - const { rawQuery, getCommaSeparatedStringFormat } = clickhouse; - - return rawQuery( - `select - website_id, - session_id, - created_at, - url - from event - where event_name = '' - and ${ - websites && websites.length > 0 - ? `website_id in (${getCommaSeparatedStringFormat(websites)})` - : '0 = 0' - } - and created_at >= ${clickhouse.getDateFormat(start_at)}`, - ); -} diff --git a/queries/analytics/pageview/getPageviews.ts b/queries/analytics/pageview/getPageviews.ts new file mode 100644 index 00000000..eb60a1f5 --- /dev/null +++ b/queries/analytics/pageview/getPageviews.ts @@ -0,0 +1,44 @@ +import prisma from 'lib/prisma'; +import clickhouse from 'lib/clickhouse'; +import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; +import { EVENT_TYPE } from 'lib/constants'; + +export async function getPageviews(...args: [websites: string[], startAt: Date]) { + return runQuery({ + [PRISMA]: () => relationalQuery(...args), + [CLICKHOUSE]: () => clickhouseQuery(...args), + }); +} + +async function relationalQuery(websites: string[], startAt: Date) { + return prisma.client.pageview.findMany({ + where: { + websiteId: { + in: websites, + }, + createdAt: { + gte: startAt, + }, + }, + }); +} + +async function clickhouseQuery(websites: string[], startAt: Date) { + const { rawQuery } = clickhouse; + + return rawQuery( + `select + website_id, + session_id, + created_at, + url + from event + where event_type = ${EVENT_TYPE.pageView} + and ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} + and created_at >= {startAt:DateTime('UTC')}`, + { + websites, + startAt, + }, + ); +} diff --git a/queries/analytics/session/getSession.ts b/queries/analytics/session/getSession.ts index 19875117..09b238f2 100644 --- a/queries/analytics/session/getSession.ts +++ b/queries/analytics/session/getSession.ts @@ -18,7 +18,7 @@ async function relationalQuery(where: Prisma.SessionWhereUniqueInput) { async function clickhouseQuery({ id: sessionId }: { id: string }) { const { rawQuery, findFirst } = clickhouse; - const params = [sessionId]; + const params = { sessionId }; return rawQuery( `select @@ -33,7 +33,7 @@ async function clickhouseQuery({ id: sessionId }: { id: string }) { language, country from event - where session_id = $1 + where session_id = {sessionId:UUID} limit 1`, params, ).then(result => findFirst(result)); diff --git a/queries/analytics/session/getSessionMetrics.ts b/queries/analytics/session/getSessionMetrics.ts index e003ebd4..3465967e 100644 --- a/queries/analytics/session/getSessionMetrics.ts +++ b/queries/analytics/session/getSessionMetrics.ts @@ -2,6 +2,7 @@ import prisma from 'lib/prisma'; import clickhouse from 'lib/clickhouse'; import { runQuery, CLICKHOUSE, PRISMA } from 'lib/db'; import cache from 'lib/cache'; +import { EVENT_TYPE } from 'lib/constants'; export async function getSessionMetrics( ...args: [ @@ -50,15 +51,15 @@ async function clickhouseQuery( const { startDate, endDate, field, filters = {} } = data; const { parseFilters, getBetweenDates, rawQuery } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( `select ${field} x, count(distinct session_id) y from event as x - where website_id = $1 - and rev_id = $2 - and event_name = '' + where website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} + and event_type = ${EVENT_TYPE.pageView} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by x diff --git a/queries/analytics/stats/getActiveVisitors.ts b/queries/analytics/stats/getActiveVisitors.ts index 6c8a5b4f..60a1e5c1 100644 --- a/queries/analytics/stats/getActiveVisitors.ts +++ b/queries/analytics/stats/getActiveVisitors.ts @@ -28,14 +28,14 @@ async function relationalQuery(websiteId: string) { } async function clickhouseQuery(websiteId: string) { - const { rawQuery, getDateFormat } = clickhouse; - const params = [websiteId]; + const { rawQuery } = clickhouse; + const params = { websiteId, startAt: subMinutes(new Date(), 5) }; return rawQuery( `select count(distinct session_id) x from event - where website_id = $1 - and created_at >= ${getDateFormat(subMinutes(new Date(), 5))}`, + where website_id = {websiteId:UUID} + and created_at >= {startAt:DateTime('UTC')}`, params, ); } diff --git a/queries/analytics/stats/getWebsiteStats.ts b/queries/analytics/stats/getWebsiteStats.ts index 5a83cceb..73d28fb6 100644 --- a/queries/analytics/stats/getWebsiteStats.ts +++ b/queries/analytics/stats/getWebsiteStats.ts @@ -52,7 +52,7 @@ async function clickhouseQuery( const { startDate, endDate, filters = {} } = data; const { rawQuery, getDateQuery, getBetweenDates, parseFilters } = clickhouse; const website = await cache.fetchWebsite(websiteId); - const params = [websiteId, website?.revId || 0]; + const params = { websiteId, revId: website?.revId || 0 }; const { filterQuery } = parseFilters(filters, params); return rawQuery( @@ -69,8 +69,8 @@ async function clickhouseQuery( max(created_at) max_time from event where event_type = ${EVENT_TYPE.pageView} - and website_id = $1 - and rev_id = $2 + and website_id = {websiteId:UUID} + and rev_id = {revId:UInt32} and ${getBetweenDates('created_at', startDate, endDate)} ${filterQuery} group by session_id, time_series