From 55a586fe27a946995cbf2a26ee987a64d1973f57 Mon Sep 17 00:00:00 2001 From: Francis Cao Date: Mon, 20 Feb 2023 09:04:20 -0800 Subject: [PATCH] add subdivision1/2, cities to query logic --- db/clickhouse/schema.sql | 2 +- lib/clickhouse.ts | 22 ++++++-- lib/detect.js | 6 +-- lib/prisma.ts | 24 +++++++-- lib/session.js | 9 +++- pages/api/collect.ts | 3 ++ pages/api/websites/[id]/metrics.ts | 12 +++++ pages/api/websites/[id]/pageviews.ts | 12 +++++ pages/api/websites/[id]/stats.ts | 24 ++++++++- queries/analytics/event/saveEvent.ts | 61 +++++++++++----------- queries/analytics/pageview/savePageView.ts | 28 +++++++--- queries/analytics/session/createSession.ts | 21 +++++++- queries/analytics/session/getSession.ts | 5 +- queries/analytics/session/getSessions.ts | 5 +- scripts/build-geo.js | 2 +- scripts/test.js | 5 +- 16 files changed, 184 insertions(+), 57 deletions(-) diff --git a/db/clickhouse/schema.sql b/db/clickhouse/schema.sql index 15a9c33e..3a04aea7 100644 --- a/db/clickhouse/schema.sql +++ b/db/clickhouse/schema.sql @@ -15,7 +15,7 @@ CREATE TABLE event screen LowCardinality(String), language LowCardinality(String), country LowCardinality(String), - subdivision LowCardinality(String), + subdivision1 LowCardinality(String), subdivision2 LowCardinality(String), city String, --pageview diff --git a/lib/clickhouse.ts b/lib/clickhouse.ts index f59e04c6..5420e969 100644 --- a/lib/clickhouse.ts +++ b/lib/clickhouse.ts @@ -115,6 +115,9 @@ function getFilterQuery(filters = {}, params = {}) { case 'os': case 'browser': case 'device': + case 'subdivision1': + case 'subdivision2': + case 'city': case 'country': arr.push(`and ${key} = {${key}:String}`); params[key] = filter; @@ -147,11 +150,24 @@ function getFilterQuery(filters = {}, params = {}) { } function parseFilters(filters: any = {}, params: any = {}) { - const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } = - filters; + const { + domain, + url, + eventUrl, + referrer, + os, + browser, + device, + country, + subdivision1, + subdivision2, + city, + eventName, + query, + } = filters; const pageviewFilters = { domain, url, referrer, query }; - const sessionFilters = { os, browser, device, country }; + const sessionFilters = { os, browser, device, country, subdivision1, subdivision2, city }; const eventFilters = { url: eventUrl, eventName }; return { diff --git a/lib/detect.js b/lib/detect.js index 74d95daa..508f6f3c 100644 --- a/lib/detect.js +++ b/lib/detect.js @@ -68,17 +68,17 @@ export async function getLocation(ip) { const result = lookup.get(ip); const country = result?.country?.iso_code ?? result?.registered_country?.iso_code; - const subdivision1 = result?.subdivisions[0].iso_code; - const subdivision2 = result?.subdivisions[1].iso_code; + const subdivision1 = result?.subdivisions[0]?.iso_code; + const subdivision2 = result?.subdivisions[1]?.iso_code; const city = result?.city?.names?.en; return { country, subdivision1, subdivision2, city }; } export async function getClientInfo(req, { screen }) { - const location = await getLocation(ip); const userAgent = req.headers['user-agent']; const ip = getIpAddress(req); + const location = await getLocation(ip); const country = location.country; const subdivision1 = location.subdivision1; const subdivision2 = location.subdivision2; diff --git a/lib/prisma.ts b/lib/prisma.ts index 5136958c..11db678b 100644 --- a/lib/prisma.ts +++ b/lib/prisma.ts @@ -135,6 +135,9 @@ function getFilterQuery(filters = {}, params = []): string { case 'os': case 'browser': case 'device': + case 'subdivision1': + case 'subdivision2': + case 'city': case 'country': arr.push(`and ${key}=$${params.length + 1}`); params.push(decodeURIComponent(filter)); @@ -171,11 +174,24 @@ function parseFilters( params = [], sessionKey = 'session_id', ) { - const { domain, url, eventUrl, referrer, os, browser, device, country, eventName, query } = - filters; + const { + domain, + url, + eventUrl, + referrer, + os, + browser, + device, + country, + subdivision1, + subdivision2, + city, + eventName, + query, + } = filters; const pageviewFilters = { domain, url, referrer, query }; - const sessionFilters = { os, browser, device, country }; + const sessionFilters = { os, browser, device, country, subdivision1, subdivision2, city }; const eventFilters = { url: eventUrl, eventName }; return { @@ -184,7 +200,7 @@ function parseFilters( eventFilters, event: { eventName }, joinSession: - os || browser || device || country + os || browser || device || country || subdivision1 || subdivision2 || city ? `inner join session on website_event.${sessionKey} = session.${sessionKey}` : '', filterQuery: getFilterQuery(filters, params), diff --git a/lib/session.js b/lib/session.js index f6698480..f8b35a76 100644 --- a/lib/session.js +++ b/lib/session.js @@ -44,7 +44,8 @@ export async function findSession(req) { throw new Error(`Website not found: ${websiteId}`); } - const { userAgent, browser, os, ip, country, device } = await getClientInfo(req, payload); + const { userAgent, browser, os, ip, country, subdivision1, subdivision2, city, device } = + await getClientInfo(req, payload); const sessionId = uuid(websiteId, hostname, ip, userAgent); // Clickhouse does not require session lookup @@ -59,6 +60,9 @@ export async function findSession(req) { screen, language, country, + subdivision1, + subdivision2, + city, }; } @@ -84,6 +88,9 @@ export async function findSession(req) { screen, language, country, + subdivision1, + subdivision2, + city, }); } catch (e) { if (!e.message.toLowerCase().includes('unique constraint')) { diff --git a/pages/api/collect.ts b/pages/api/collect.ts index 63bbdf1b..24dcbee1 100644 --- a/pages/api/collect.ts +++ b/pages/api/collect.ts @@ -19,6 +19,9 @@ export interface NextApiRequestCollect extends NextApiRequest { screen: string; language: string; country: string; + subdivision1: string; + subdivision2: string; + city: string; }; } diff --git a/pages/api/websites/[id]/metrics.ts b/pages/api/websites/[id]/metrics.ts index 94358076..70dcd7a6 100644 --- a/pages/api/websites/[id]/metrics.ts +++ b/pages/api/websites/[id]/metrics.ts @@ -46,6 +46,9 @@ export interface WebsiteMetricsRequestQuery { browser: string; device: string; country: string; + subdivision1: string; + subdivision2: string; + city: string; } export default async ( @@ -66,6 +69,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, } = req.query; if (req.method === 'GET') { @@ -86,6 +92,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, }, }); @@ -131,6 +140,9 @@ export default async ( browser: type !== 'browser' ? browser : undefined, device: type !== 'device' ? device : undefined, country: type !== 'country' ? country : undefined, + subdivision1: type !== 'subdivision1' ? subdivision1 : undefined, + subdivision2: type !== 'subdivision2' ? subdivision2 : undefined, + city: type !== 'city' ? city : undefined, eventUrl: type !== 'url' && table === 'event' ? url : undefined, query: type === 'query' && table !== 'event' ? true : undefined, }; diff --git a/pages/api/websites/[id]/pageviews.ts b/pages/api/websites/[id]/pageviews.ts index 3264cb29..f2176049 100644 --- a/pages/api/websites/[id]/pageviews.ts +++ b/pages/api/websites/[id]/pageviews.ts @@ -21,6 +21,9 @@ export interface WebsitePageviewRequestQuery { browser?: string; device?: string; country?: string; + subdivision1?: string; + subdivision2?: string; + city?: string; } export default async ( @@ -42,6 +45,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, } = req.query; if (req.method === 'GET') { @@ -70,6 +76,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, }, }), getPageviewStats(websiteId, { @@ -84,6 +93,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, }, }), ]); diff --git a/pages/api/websites/[id]/stats.ts b/pages/api/websites/[id]/stats.ts index 27262615..138ede60 100644 --- a/pages/api/websites/[id]/stats.ts +++ b/pages/api/websites/[id]/stats.ts @@ -17,6 +17,9 @@ export interface WebsiteStatsRequestQuery { browser: string; device: string; country: string; + subdivision1: string; + subdivision2: string; + city: string; } export default async ( @@ -26,7 +29,20 @@ export default async ( await useCors(req, res); await useAuth(req, res); - const { id: websiteId, startAt, endAt, url, referrer, os, browser, device, country } = req.query; + const { + id: websiteId, + startAt, + endAt, + url, + referrer, + os, + browser, + device, + country, + subdivision1, + subdivision2, + city, + } = req.query; if (req.method === 'GET') { if (!(await canViewWebsite(req.auth, websiteId))) { @@ -50,6 +66,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, }, }); const prevPeriod = await getWebsiteStats(websiteId, { @@ -62,6 +81,9 @@ export default async ( browser, device, country, + subdivision1, + subdivision2, + city, }, }); diff --git a/queries/analytics/event/saveEvent.ts b/queries/analytics/event/saveEvent.ts index 430ffd63..81cb10b4 100644 --- a/queries/analytics/event/saveEvent.ts +++ b/queries/analytics/event/saveEvent.ts @@ -19,6 +19,9 @@ export async function saveEvent(args: { screen?: string; language?: string; country?: string; + subdivision1?: string; + subdivision2?: string; + city?: string; }) { return runQuery({ [PRISMA]: () => relationalQuery(args), @@ -36,38 +39,33 @@ async function relationalQuery(data: { }) { const { websiteId, id: sessionId, url, eventName, eventData, referrer } = data; - const params = { - id: uuid(), - websiteId, - sessionId, - url: url?.substring(0, URL_LENGTH), - referrer: referrer?.substring(0, URL_LENGTH), - eventType: EVENT_TYPE.customEvent, - eventName: eventName?.substring(0, EVENT_NAME_LENGTH), - eventData, - }; - return prisma.client.websiteEvent.create({ - data: params, + data: { + id: uuid(), + websiteId, + sessionId, + url: url?.substring(0, URL_LENGTH), + referrer: referrer?.substring(0, URL_LENGTH), + eventType: EVENT_TYPE.customEvent, + eventName: eventName?.substring(0, EVENT_NAME_LENGTH), + eventData, + }, }); } -async function clickhouseQuery(data: { - id: string; - websiteId: string; - url: string; - referrer?: string; - eventName?: string; - eventData?: any; - hostname?: string; - browser?: string; - os?: string; - device?: string; - screen?: string; - language?: string; - country?: string; -}) { - const { websiteId, id: sessionId, url, eventName, eventData, country, ...args } = data; +async function clickhouseQuery(data) { + const { + websiteId, + id: sessionId, + url, + eventName, + eventData, + country, + subdivision1, + subdivision2, + city, + ...args + } = data; const { getDateFormat, sendMessage } = kafka; const website = await cache.fetchWebsite(websiteId); @@ -75,13 +73,16 @@ async function clickhouseQuery(data: { website_id: websiteId, session_id: sessionId, event_id: uuid(), + rev_id: website?.revId || 0, + country: country ? country : null, + subdivision1: subdivision1 ? subdivision1 : null, + subdivision2: subdivision2 ? subdivision2 : null, + city: city ? city : null, url: url?.substring(0, URL_LENGTH), event_type: EVENT_TYPE.customEvent, event_name: eventName?.substring(0, EVENT_NAME_LENGTH), event_data: eventData ? JSON.stringify(eventData) : null, - rev_id: website?.revId || 0, created_at: getDateFormat(new Date()), - country: country ? country : null, ...args, }; diff --git a/queries/analytics/pageview/savePageView.ts b/queries/analytics/pageview/savePageView.ts index ae702211..6682edad 100644 --- a/queries/analytics/pageview/savePageView.ts +++ b/queries/analytics/pageview/savePageView.ts @@ -17,6 +17,9 @@ export async function savePageView(args: { screen?: string; language?: string; country?: string; + subdivision1?: string; + subdivision2?: string; + city?: string; }) { return runQuery({ [PRISMA]: () => relationalQuery(args), @@ -45,19 +48,32 @@ async function relationalQuery(data: { } async function clickhouseQuery(data) { - const { websiteId, id: sessionId, url, referrer, country, ...args } = data; - const website = await cache.fetchWebsite(websiteId); + const { + websiteId, + id: sessionId, + url, + referrer, + country, + subdivision1, + subdivision2, + city, + ...args + } = data; const { getDateFormat, sendMessage } = kafka; + const website = await cache.fetchWebsite(websiteId); const message = { - session_id: sessionId, website_id: websiteId, + session_id: sessionId, + rev_id: website?.revId || 0, + country: country ? country : null, + subdivision1: subdivision1 ? subdivision1 : null, + subdivision2: subdivision2 ? subdivision2 : null, + city: city ? city : null, url: url?.substring(0, URL_LENGTH), referrer: referrer?.substring(0, URL_LENGTH), - rev_id: website?.revId || 0, - created_at: getDateFormat(new Date()), - country: country ? country : null, event_type: EVENT_TYPE.pageView, + created_at: getDateFormat(new Date()), ...args, }; diff --git a/queries/analytics/session/createSession.ts b/queries/analytics/session/createSession.ts index fe15f11c..3afa9c9a 100644 --- a/queries/analytics/session/createSession.ts +++ b/queries/analytics/session/createSession.ts @@ -31,8 +31,24 @@ async function clickhouseQuery(data: { screen?: string; language?: string; country?: string; + subdivision1?: string; + subdivision2?: string; + city?: string; }) { - const { id, websiteId, hostname, browser, os, device, screen, language, country } = data; + const { + id, + websiteId, + hostname, + browser, + os, + device, + screen, + language, + country, + subdivision1, + subdivision2, + city, + } = data; const { getDateFormat, sendMessage } = kafka; const website = await cache.fetchWebsite(websiteId); @@ -46,6 +62,9 @@ async function clickhouseQuery(data: { screen, language, country, + subdivision1, + subdivision2, + city, rev_id: website?.revId || 0, created_at: getDateFormat(new Date()), }; diff --git a/queries/analytics/session/getSession.ts b/queries/analytics/session/getSession.ts index 09b238f2..a3400d26 100644 --- a/queries/analytics/session/getSession.ts +++ b/queries/analytics/session/getSession.ts @@ -31,7 +31,10 @@ async function clickhouseQuery({ id: sessionId }: { id: string }) { device, screen, language, - country + country, + subdivision1, + subdivision2, + city from event where session_id = {sessionId:UUID} limit 1`, diff --git a/queries/analytics/session/getSessions.ts b/queries/analytics/session/getSessions.ts index 46057277..910efc30 100644 --- a/queries/analytics/session/getSessions.ts +++ b/queries/analytics/session/getSessions.ts @@ -40,7 +40,10 @@ async function clickhouseQuery(websites: string[], startAt: Date) { device, screen, language, - country + country, + subdivision1, + subdivision2, + city from event where ${websites && websites.length > 0 ? `website_id in {websites:Array(UUID)}` : '0 = 0'} and created_at >= {startAt:DateTime('UTC')}`, diff --git a/scripts/build-geo.js b/scripts/build-geo.js index e84c1564..8a096f4b 100644 --- a/scripts/build-geo.js +++ b/scripts/build-geo.js @@ -12,7 +12,7 @@ let url = if (process.env.MAXMIND_LICENSE_KEY) { url = `https://download.maxmind.com/app/geoip_download` + - `?edition_id=GeoLite2-Country&license_key=${process.env.MAXMIND_LICENSE_KEY}&suffix=tar.gz`; + `?edition_id=GeoLite2-City&license_key=${process.env.MAXMIND_LICENSE_KEY}&suffix=tar.gz`; } const dest = path.resolve(__dirname, '../node_modules/.geo'); diff --git a/scripts/test.js b/scripts/test.js index a913f1b0..c1eb9b3b 100644 --- a/scripts/test.js +++ b/scripts/test.js @@ -1,10 +1,7 @@ /* eslint-disable no-console */ require('dotenv').config(); -const fs = require('fs'); const path = require('path'); -const https = require('https'); -const zlib = require('zlib'); -const tar = require('tar'); + const maxmind = require('maxmind'); async function getLocation() {