-
Notifications
You must be signed in to change notification settings - Fork 587
Expand file tree
/
Copy pathhackathonTrafficSources.ts
More file actions
149 lines (133 loc) · 5.43 KB
/
hackathonTrafficSources.ts
File metadata and controls
149 lines (133 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import { runHogQL } from "@/lib/posthog-query";
const POSTHOG_BUILDER_HUB_PROJECT_ID = process.env.POSTHOG_PROJECT_ID;
const HOGQL_HOST_FILTER =
"properties.$host IN ('build.avax.network', 'www.build.avax.network')";
/**
* HogQL expression that buckets every pageview into a single `source` string.
*
* Channel-mix only — no handle/page extraction. X (t.co) and LinkedIn strip
* the originating tweet/post from the Referer header, so organic social can
* only be attributed at the channel level. UTM-tagged links keep full
* granularity via the first branch (use utm_content for the poster handle).
*
* Priority: explicit UTM (excluding PostHog's '$direct' sentinel) →
* sign-in/OAuth redirects → broad channel → bare domain → "Direct".
*/
const SOURCE_BUCKET_EXPR = `
multiIf(
notEmpty(properties.utm_source) AND properties.utm_source != '$direct',
concat(properties.utm_source, ' / ', coalesce(properties.utm_campaign, '(no campaign)')),
properties.$referring_domain IN (
'accounts.google.com', 'login.microsoftonline.com', 'github.com'
),
'Sign-in redirect',
properties.$referring_domain IN ('x.com', 'twitter.com', 't.co'),
'X (untagged)',
properties.$referring_domain = 'linkedin.com'
OR endsWith(properties.$referring_domain, '.linkedin.com'),
'LinkedIn (untagged)',
properties.$referring_domain IN ('youtube.com', 'www.youtube.com', 'youtu.be'),
'YouTube',
properties.$referring_domain IN ('discord.com', 'discord.gg'),
'Discord',
endsWith(properties.$referring_domain, 't.me'),
'Telegram',
properties.$referring_domain IN ('build.avax.network', 'www.build.avax.network'),
'BuildersHub (internal)',
notEmpty(properties.$referring_domain) AND properties.$referring_domain != '$direct',
properties.$referring_domain,
'Direct'
)
`.trim();
export interface HackathonTrafficSource {
source: string;
visitors: number;
reachedRegister: number;
}
interface RawRow {
source: string;
visitors: number | string | null;
reachedRegister: number | string | null;
}
function toNumber(value: number | string | null | undefined): number {
if (value === null || value === undefined) return 0;
return typeof value === "number" ? value : Number(value) || 0;
}
/**
* UUIDs only. Hackathon ids in this codebase are uuid v4 (see prisma/schema.prisma),
* so we restrict to that shape rather than escape-quoting. Anything else returns
* an empty result rather than risk a query injection through PostHog.
*/
function isSafeHackathonId(id: string): boolean {
return /^[a-zA-Z0-9_-]{1,64}$/.test(id);
}
export interface TopTrafficSourcesOptions {
/** Lookback window in days. Default 90. */
days?: number;
/** Number of source buckets to return. Default 3. */
limit?: number;
}
interface BatchRow extends RawRow {
hackathon_id: string | null;
}
/**
* Batched variant — top-N traffic sources for a list of hackathons in a single
* HogQL query. Used by the Builder Insights event-history view where we'd
* otherwise make one HTTP roundtrip per row. Returns a map keyed by
* hackathonId; events missing from the result are returned as empty arrays.
*/
export async function getTopHackathonTrafficSourcesBatch(
hackathonIds: string[],
{ days = 90, limit = 3 }: TopTrafficSourcesOptions = {},
): Promise<Map<string, HackathonTrafficSource[]>> {
const safeIds = Array.from(new Set(hackathonIds.filter(isSafeHackathonId)));
const result = new Map<string, HackathonTrafficSource[]>();
for (const id of safeIds) result.set(id, []);
if (safeIds.length === 0) return result;
const safeDays = Math.max(1, Math.min(365, Math.floor(days)));
const safeLimit = Math.max(1, Math.min(20, Math.floor(limit)));
const idList = safeIds.map((id) => `'${id}'`).join(", ");
// Hackathon attribution: extract the UUID from /events/<id> or /hackathons/<id>
// URLs. Both routes serve the same hackathon today.
//
// `LIMIT N BY column` is ClickHouse syntax: keep the first N rows per group
// after ORDER BY — gives top-N per hackathon in one query. We pre-filter
// pageviews that would bucket as "Direct" (no referrer + no real UTM) so
// they never compete for a top-N slot.
const HACKATHON_ID_FROM_PATH =
"extract(properties.$pathname, '^/(?:hackathons|events)/([a-fA-F0-9-]{36})')";
const query = `
SELECT
${HACKATHON_ID_FROM_PATH} AS hackathon_id,
${SOURCE_BUCKET_EXPR} AS source,
count(DISTINCT distinct_id) AS visitors,
countIf(properties.$pathname LIKE '%/registration-form%') AS reachedRegister
FROM events
WHERE event = '$pageview'
AND ${HOGQL_HOST_FILTER}
AND timestamp >= now() - INTERVAL ${safeDays} DAY
AND ${HACKATHON_ID_FROM_PATH} IN (${idList})
AND (
(notEmpty(properties.$referring_domain) AND properties.$referring_domain != '$direct')
OR (notEmpty(properties.utm_source) AND properties.utm_source != '$direct')
)
GROUP BY hackathon_id, source
ORDER BY hackathon_id, visitors DESC
LIMIT ${safeLimit} BY hackathon_id
`.trim();
const rows = await runHogQL<BatchRow>({
projectId: POSTHOG_BUILDER_HUB_PROJECT_ID,
query,
});
for (const row of rows) {
if (!row.hackathon_id) continue;
const bucket = result.get(row.hackathon_id);
if (!bucket) continue;
bucket.push({
source: row.source ?? "Direct",
visitors: toNumber(row.visitors),
reachedRegister: toNumber(row.reachedRegister),
});
}
return result;
}