convert analytics queries

pull/1330/head
Brian Cao 2022-07-20 21:31:26 -07:00
parent 4f12933f42
commit 6ea2282f82
12 changed files with 470 additions and 52 deletions

View File

@ -64,6 +64,7 @@ export const EVENT_COLORS = [
'#ffec16', '#ffec16',
]; ];
export const RELATIONAL = 'relational';
export const POSTGRESQL = 'postgresql'; export const POSTGRESQL = 'postgresql';
export const MYSQL = 'mysql'; export const MYSQL = 'mysql';
export const CLICKHOUSE = 'clickhouse'; export const CLICKHOUSE = 'clickhouse';

View File

@ -7,8 +7,10 @@ import {
POSTGRESQL, POSTGRESQL,
POSTGRESQL_DATE_FORMATS, POSTGRESQL_DATE_FORMATS,
CLICKHOUSE, CLICKHOUSE,
RELATIONAL,
} from 'lib/constants'; } from 'lib/constants';
import moment from 'moment-timezone'; import moment from 'moment-timezone';
import { CLICKHOUSE_DATE_FORMATS } from './constants';
BigInt.prototype.toJSON = function () { BigInt.prototype.toJSON = function () {
return Number(this); return Number(this);
@ -48,19 +50,48 @@ function initializePrisma(options) {
} }
function initializeClickhouse() { function initializeClickhouse() {
if (process.env.ANALYTICS_URL) { if (!process.env.ANALYTICS_URL) {
return null; return null;
} }
const url = new URL(process.env.ANALYTICS_URL);
const database = url.pathname.replace('/', '');
return new ClickHouse({ return new ClickHouse({
url: process.env.ANALYTICS_URL, url: url.hostname,
port: Number(url.port),
basicAuth: url.password
? {
username: url.username || 'default',
password: url.password,
}
: null,
format: 'json', format: 'json',
config: {
database,
},
}); });
// return new ClickHouse({
// url: 'http://164.92.95.2',
// port: 8123,
// basicAuth: {
// username: 'default',
// password: 'shhhthisissupersecret!',
// },
// format: 'json',
// config: {
// database: 'umami_dev',
// },
// });
} }
const prisma = initializePrisma(options); const prisma = initializePrisma(options);
const clickhouse = initializeClickhouse(); const clickhouse = initializeClickhouse();
console.log('clickhouse1: ', clickhouse);
export { prisma, clickhouse }; export { prisma, clickhouse };
export function getDatabase() { export function getDatabase() {
@ -103,6 +134,10 @@ export function getDateStringQuery(data, unit) {
} }
} }
export function getDateStringQueryClickhouse(data, unit) {
return `formatDateTime(${data}, '${CLICKHOUSE_DATE_FORMATS[unit]}')`;
}
export function getDateQuery(field, unit, timezone) { export function getDateQuery(field, unit, timezone) {
const db = getDatabase(); const db = getDatabase();
@ -124,15 +159,31 @@ export function getDateQuery(field, unit, timezone) {
} }
} }
export function getTimestampInterval(field) { export function getDateQueryClickhouse(field, unit, timezone) {
if (timezone) {
return `date_trunc('${unit}', ${field},'${timezone}')`;
}
return `date_trunc('${unit}', ${field})`;
}
export function getDateFormatClickhouse(date) {
return `parseDateTimeBestEffort('${date.toUTCString()}')`;
}
export function getBetweenDatesClickhouse(field, start_at, end_at) {
return `${field} between ${getDateFormatClickhouse(start_at)}
and ${getDateFormatClickhouse(end_at)}`;
}
export function getTimestampInterval(maxColumn, minColumn) {
const db = getDatabase(); const db = getDatabase();
if (db === POSTGRESQL) { if (db === POSTGRESQL) {
return `floor(extract(epoch from max(${field}) - min(${field})))`; return `floor(extract(epoch from max(${maxColumn}) - min(${minColumn})))`;
} }
if (db === MYSQL) { if (db === MYSQL) {
return `floor(unix_timestamp(max(${field})) - unix_timestamp(min(${field})))`; return `floor(unix_timestamp(max(${maxColumn})) - unix_timestamp(min(${minColumn})))`;
} }
} }
@ -149,6 +200,7 @@ export function getFilterQuery(table, filters = {}, params = []) {
if (table === 'pageview' || table === 'event') { if (table === 'pageview' || table === 'event') {
arr.push(`and ${table}.${key}=$${params.length + 1}`); arr.push(`and ${table}.${key}=$${params.length + 1}`);
params.push(decodeURIComponent(value)); params.push(decodeURIComponent(value));
console.log(params);
} }
break; break;
@ -213,6 +265,22 @@ export function parseFilters(table, filters = {}, params = []) {
}; };
} }
export function replaceQueryClickhouse(string, params = []) {
let formattedString = string;
params.forEach((a, i) => {
let replace = a;
if (typeof a === 'string' || a instanceof String) {
replace = `'${replace}'`;
}
formattedString = formattedString.replace(`$${i + 1}`, replace);
});
return formattedString;
}
export async function runQuery(query) { export async function runQuery(query) {
return query.catch(e => { return query.catch(e => {
throw e; throw e;
@ -231,14 +299,24 @@ export async function rawQuery(query, params = []) {
return runQuery(prisma.$queryRawUnsafe.apply(prisma, [sql, ...params])); return runQuery(prisma.$queryRawUnsafe.apply(prisma, [sql, ...params]));
} }
export function runAnalyticsQuery(relational, clickhouse) { export async function rawQueryClickhouse(query, params = [], debug = false) {
let formattedQuery = replaceQueryClickhouse(query, params);
if (debug || process.env.LOG_QUERY) {
console.log(formattedQuery);
}
return clickhouse.query(formattedQuery).toPromise();
}
export async function runAnalyticsQuery(queries) {
const db = getAnalyticsDatabase(); const db = getAnalyticsDatabase();
if (db === POSTGRESQL || db === MYSQL) { if (db === POSTGRESQL || db === MYSQL) {
return relational(); return queries[`${RELATIONAL}`]();
} }
if (db === CLICKHOUSE) { if (db === CLICKHOUSE) {
return runQuery(clickhouse()); return queries[`${CLICKHOUSE}`]();
} }
} }

View File

@ -1,17 +1,23 @@
import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import { import {
rawQueryClickhouse,
getBetweenDatesClickhouse,
getDateQuery, getDateQuery,
getDateQueryClickhouse,
getDateStringQuery, getDateStringQuery,
getFilterQuery, getFilterQuery,
rawQuery, rawQuery,
runAnalyticsQuery, runAnalyticsQuery,
clickhouse,
} from 'lib/db'; } from 'lib/db';
export function getEventMetrics(...args) { export async function getEventMetrics(...args) {
return runAnalyticsQuery(relationalQuery(...args), clickhouseQuery(...args)); return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
} }
function relationalQuery( async function relationalQuery(
website_id, website_id,
start_at, start_at,
end_at, end_at,
@ -38,28 +44,28 @@ function relationalQuery(
); );
} }
function clickhouseQuery( async function clickhouseQuery(
website_id, website_id,
start_at, start_at,
end_at, end_at,
timezone = 'utc', timezone = 'UTC',
unit = 'day', unit = 'day',
filters = {}, filters = {},
) { ) {
const params = [website_id, start_at, end_at]; const params = [website_id];
return clickhouse.query( return rawQueryClickhouse(
` `
select select
event_value x, event_value x,
${getDateStringQuery(getDateQuery('created_at', unit, timezone), unit)} t, ${getDateQueryClickhouse('created_at', unit, timezone)} t,
count(*) y count(*) y
from event from event
where website_id=$1 where website_id= $1
and created_at between $2 and $3 and ${getBetweenDatesClickhouse('created_at', start_at, end_at)}
${getFilterQuery('event', filters, params)} ${getFilterQuery('event', filters, params)}
group by 1, 2 group by x, t
order by 2 order by t
`, `,
params, params,
); );

View File

@ -1,6 +1,20 @@
import { prisma, runQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
rawQueryClickhouse,
getDateFormatClickhouse,
prisma,
runAnalyticsQuery,
runQuery,
} from 'lib/db';
export async function getEvents(websites, start_at) { export function getEvents(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
function relationalQuery(websites, start_at) {
return runQuery( return runQuery(
prisma.event.findMany({ prisma.event.findMany({
where: { where: {
@ -16,3 +30,20 @@ export async function getEvents(websites, start_at) {
}), }),
); );
} }
function clickhouseQuery(websites, start_at) {
return rawQueryClickhouse(
`
select
event_id,
website_id,
session_id,
created_at,
url,
event_type
from event
where website_id in (${websites.join[',']}
and created_at >= ${getDateFormatClickhouse(start_at)})
`,
);
}

View File

@ -1,6 +1,20 @@
import { parseFilters, rawQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
rawQueryClickhouse,
runAnalyticsQuery,
parseFilters,
rawQuery,
getBetweenDatesClickhouse,
} from 'lib/db';
export function getPageviewMetrics(website_id, start_at, end_at, field, table, filters = {}) { export async function getPageviewMetrics(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(website_id, start_at, end_at, field, table, filters = {}) {
const params = [website_id, start_at, end_at]; const params = [website_id, start_at, end_at];
const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters( const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters(
table, table,
@ -24,3 +38,28 @@ export function getPageviewMetrics(website_id, start_at, end_at, field, table, f
params, params,
); );
} }
async function clickhouseQuery(website_id, start_at, end_at, field, table, filters = {}) {
const params = [website_id];
const { pageviewQuery, sessionQuery, eventQuery, joinSession } = parseFilters(
table,
filters,
params,
);
return rawQueryClickhouse(
`
select ${field} x, count(*) y
from ${table}
${joinSession}
where ${table}.website_id= $1
and ${getBetweenDatesClickhouse(table + '.created_at', start_at, end_at)}
${pageviewQuery}
${joinSession && sessionQuery}
${eventQuery}
group by x
order by y desc
`,
params,
);
}

View File

@ -1,6 +1,24 @@
import { parseFilters, rawQuery, getDateQuery, getDateStringQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
rawQueryClickhouse,
getBetweenDatesClickhouse,
getDateQuery,
getDateQueryClickhouse,
getDateStringQuery,
getDateStringQueryClickhouse,
parseFilters,
rawQuery,
runAnalyticsQuery,
} from 'lib/db';
export function getPageviewStats( export async function getPageviewStats(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(
website_id, website_id,
start_at, start_at,
end_at, end_at,
@ -32,3 +50,37 @@ export function getPageviewStats(
params, params,
); );
} }
async function clickhouseQuery(
website_id,
start_at,
end_at,
timezone = 'UTC',
unit = 'day',
count = '*',
filters = {},
) {
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params);
return rawQueryClickhouse(
`
select
${getDateStringQueryClickhouse('g.t', unit)} as t,
g.y as y
from
(select
${getDateQueryClickhouse('created_at', unit, timezone)} t,
count(${count}) y
from pageview
${joinSession}
where pageview.website_id= $1
and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)}
${pageviewQuery}
${sessionQuery}
group by t) g
order by t
`,
params,
);
}

View File

@ -1,6 +1,20 @@
import { prisma, runQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
rawQueryClickhouse,
getDateFormatClickhouse,
prisma,
runAnalyticsQuery,
runQuery,
} from 'lib/db';
export async function getPageviews(websites, start_at) { export async function getPageviews(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(websites, start_at) {
return runQuery( return runQuery(
prisma.pageview.findMany({ prisma.pageview.findMany({
where: { where: {
@ -16,3 +30,19 @@ export async function getPageviews(websites, start_at) {
}), }),
); );
} }
async function clickhouseQuery(websites, start_at) {
return rawQueryClickhouse(
`
select
view_id,
website_id,
session_id,
created_at,
url
from pageview
where website_id in (${websites.join[',']}
and created_at >= ${getDateFormatClickhouse(start_at)})
`,
);
}

View File

@ -1,6 +1,14 @@
import { prisma, runQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import { rawQueryClickhouse, prisma, runAnalyticsQuery, runQuery } from 'lib/db';
export async function getSessionByUuid(session_uuid) { export async function getSessionByUuid(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(session_uuid) {
return runQuery( return runQuery(
prisma.session.findUnique({ prisma.session.findUnique({
where: { where: {
@ -9,3 +17,27 @@ export async function getSessionByUuid(session_uuid) {
}), }),
); );
} }
async function clickhouseQuery(session_uuid) {
const params = [session_uuid];
return rawQueryClickhouse(
`
select
session_id,
session_uuid,
website_id,
created_at,
hostname,
browser,
os,
device,
screen,
"language",
country
from session
where session_id = $1
`,
params,
);
}

View File

@ -1,6 +1,20 @@
import { parseFilters, rawQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
getBetweenDatesClickhouse,
parseFilters,
rawQuery,
rawQueryClickhouse,
runAnalyticsQuery,
} from 'lib/db';
export function getSessionMetrics(website_id, start_at, end_at, field, filters = {}) { export async function getSessionMetrics(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(website_id, start_at, end_at, field, filters = {}) {
const params = [website_id, start_at, end_at]; const params = [website_id, start_at, end_at];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params);
@ -23,3 +37,27 @@ export function getSessionMetrics(website_id, start_at, end_at, field, filters =
params, params,
); );
} }
async function clickhouseQuery(website_id, start_at, end_at, field, filters = {}) {
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params);
return rawQueryClickhouse(
`
select ${field} x, count(*) y
from session as x
where x.session_id in (
select pageview.session_id
from pageview
${joinSession}
where pageview.website_id=$1
and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)}
${pageviewQuery}
${sessionQuery}
)
group by x
order by y desc
`,
params,
);
}

View File

@ -1,6 +1,20 @@
import { prisma, runQuery } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
getDateFormatClickhouse,
prisma,
rawQueryClickhouse,
runAnalyticsQuery,
runQuery,
} from 'lib/db';
export async function getSessions(websites, start_at) { export async function getSessions(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(websites, start_at) {
return runQuery( return runQuery(
prisma.session.findMany({ prisma.session.findMany({
where: { where: {
@ -16,3 +30,25 @@ export async function getSessions(websites, start_at) {
}), }),
); );
} }
async function clickhouseQuery(websites, start_at) {
return rawQueryClickhouse(
`
select
session_id,
session_uuid,
website_id,
created_at,
hostname,
browser,
os,
device,
screen,
"language",
country
from session
where website_id in (${websites.join[',']}
and created_at >= ${getDateFormatClickhouse(start_at)})
`,
);
}

View File

@ -1,7 +1,15 @@
import { rawQuery } from 'lib/db';
import { subMinutes } from 'date-fns'; import { subMinutes } from 'date-fns';
import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import { getDateFormatClickhouse, rawQuery, rawQueryClickhouse, runAnalyticsQuery } from 'lib/db';
export function getActiveVisitors(website_id) { export async function getActiveVisitors(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(website_id) {
const date = subMinutes(new Date(), 5); const date = subMinutes(new Date(), 5);
const params = [website_id, date]; const params = [website_id, date];
@ -9,9 +17,23 @@ export function getActiveVisitors(website_id) {
` `
select count(distinct session_id) x select count(distinct session_id) x
from pageview from pageview
where website_id=$1 where website_id = $1
and created_at >= $2 and created_at >= $2
`, `,
params, params,
); );
} }
async function clickhouseQuery(website_id) {
const params = [website_id];
return rawQueryClickhouse(
`
select count(distinct session_id) x
from pageview
where website_id = $1
and created_at >= ${getDateFormatClickhouse(subMinutes(new Date(), 5))}
`,
params,
);
}

View File

@ -1,6 +1,23 @@
import { parseFilters, rawQuery, getDateQuery, getTimestampInterval } from 'lib/db'; import { CLICKHOUSE, RELATIONAL } from 'lib/constants';
import {
getDateQuery,
getBetweenDatesClickhouse,
getDateQueryClickhouse,
getTimestampInterval,
parseFilters,
rawQuery,
rawQueryClickhouse,
runAnalyticsQuery,
} from 'lib/db';
export function getWebsiteStats(website_id, start_at, end_at, filters = {}) { export async function getWebsiteStats(...args) {
return runAnalyticsQuery({
[`${RELATIONAL}`]: () => relationalQuery(...args),
[`${CLICKHOUSE}`]: () => clickhouseQuery(...args),
});
}
async function relationalQuery(website_id, start_at, end_at, filters = {}) {
const params = [website_id, start_at, end_at]; const params = [website_id, start_at, end_at];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params); const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params);
@ -9,21 +26,57 @@ export function getWebsiteStats(website_id, start_at, end_at, filters = {}) {
select sum(t.c) as "pageviews", select sum(t.c) as "pageviews",
count(distinct t.session_id) as "uniques", count(distinct t.session_id) as "uniques",
sum(case when t.c = 1 then 1 else 0 end) as "bounces", sum(case when t.c = 1 then 1 else 0 end) as "bounces",
sum(case when m2 < m1 + interval '1 hour' then ${getTimestampInterval(
'm2',
'm1',
)} else 0 end) as "totaltime"
sum(t.time) as "totaltime" sum(t.time) as "totaltime"
from ( from (
select pageview.session_id, select
${getDateQuery('pageview.created_at', 'hour')}, pageview.session_id,
count(*) c, ${getDateQuery('pageview.created_at', 'hour')},
${getTimestampInterval('pageview.created_at')} as "time" count(*) c,
from pageview min(created_at) m1,
${joinSession} max(created_at) m2
where pageview.website_id=$1 from pageview
and pageview.created_at between $2 and $3 ${joinSession}
${pageviewQuery} where pageview.website_id=$1
${sessionQuery} and pageview.created_at between $2 and $3
group by 1, 2 ${pageviewQuery}
${sessionQuery}
group by 1, 2
) t ) t
`, `,
params, params,
); );
} }
async function clickhouseQuery(website_id, start_at, end_at, filters = {}) {
const params = [website_id];
const { pageviewQuery, sessionQuery, joinSession } = parseFilters('pageview', filters, params);
return rawQueryClickhouse(
`
select
sum(t.c) as "pageviews",
count(distinct t.session_id) as "uniques",
sum(if(t.c = 1, 1, 0)) as "bounces",
sum(if(max_time < min_time + interval 1 hour, max_time-min_time, 0)) as "totaltime"
from (
select pageview.session_id,
${getDateQueryClickhouse('pageview.created_at', 'day')} time_series,
count(*) c,
min(created_at) min_time,
max(created_at) max_time
from pageview
${joinSession}
where pageview.website_id = $1
and ${getBetweenDatesClickhouse('pageview.created_at', start_at, end_at)}
${pageviewQuery}
${sessionQuery}
group by pageview.session_id, time_series
) t;
`,
params,
);
}