|
| 1 | +import { runHogQL } from "@/lib/posthog-query"; |
| 2 | + |
| 3 | +const POSTHOG_BUILDER_HUB_PROJECT_ID = process.env.POSTHOG_PROJECT_ID; |
| 4 | + |
| 5 | +const HOGQL_HOST_FILTER = |
| 6 | + "properties.$host IN ('build.avax.network', 'www.build.avax.network')"; |
| 7 | + |
| 8 | +/** |
| 9 | + * HogQL expression that buckets every pageview into a single `source` string. |
| 10 | + * |
| 11 | + * Channel-mix only — no handle/page extraction. X (t.co) and LinkedIn strip |
| 12 | + * the originating tweet/post from the Referer header, so organic social can |
| 13 | + * only be attributed at the channel level. UTM-tagged links keep full |
| 14 | + * granularity via the first branch (use utm_content for the poster handle). |
| 15 | + * |
| 16 | + * Priority: explicit UTM (excluding PostHog's '$direct' sentinel) → |
| 17 | + * sign-in/OAuth redirects → broad channel → bare domain → "Direct". |
| 18 | + */ |
| 19 | +const SOURCE_BUCKET_EXPR = ` |
| 20 | + multiIf( |
| 21 | + notEmpty(properties.utm_source) AND properties.utm_source != '$direct', |
| 22 | + concat(properties.utm_source, ' / ', coalesce(properties.utm_campaign, '(no campaign)')), |
| 23 | + properties.$referring_domain IN ( |
| 24 | + 'accounts.google.com', 'login.microsoftonline.com', 'github.com' |
| 25 | + ), |
| 26 | + 'Sign-in redirect', |
| 27 | + properties.$referring_domain IN ('x.com', 'twitter.com', 't.co'), |
| 28 | + 'X (untagged)', |
| 29 | + properties.$referring_domain = 'linkedin.com' |
| 30 | + OR endsWith(properties.$referring_domain, '.linkedin.com'), |
| 31 | + 'LinkedIn (untagged)', |
| 32 | + properties.$referring_domain IN ('youtube.com', 'www.youtube.com', 'youtu.be'), |
| 33 | + 'YouTube', |
| 34 | + properties.$referring_domain IN ('discord.com', 'discord.gg'), |
| 35 | + 'Discord', |
| 36 | + endsWith(properties.$referring_domain, 't.me'), |
| 37 | + 'Telegram', |
| 38 | + properties.$referring_domain IN ('build.avax.network', 'www.build.avax.network'), |
| 39 | + 'BuildersHub (internal)', |
| 40 | + notEmpty(properties.$referring_domain) AND properties.$referring_domain != '$direct', |
| 41 | + properties.$referring_domain, |
| 42 | + 'Direct' |
| 43 | + ) |
| 44 | +`.trim(); |
| 45 | + |
| 46 | +export interface HackathonTrafficSource { |
| 47 | + source: string; |
| 48 | + visitors: number; |
| 49 | + reachedRegister: number; |
| 50 | +} |
| 51 | + |
| 52 | +interface RawRow { |
| 53 | + source: string; |
| 54 | + visitors: number | string | null; |
| 55 | + reachedRegister: number | string | null; |
| 56 | +} |
| 57 | + |
| 58 | +function toNumber(value: number | string | null | undefined): number { |
| 59 | + if (value === null || value === undefined) return 0; |
| 60 | + return typeof value === "number" ? value : Number(value) || 0; |
| 61 | +} |
| 62 | + |
| 63 | +/** |
| 64 | + * UUIDs only. Hackathon ids in this codebase are uuid v4 (see prisma/schema.prisma), |
| 65 | + * so we restrict to that shape rather than escape-quoting. Anything else returns |
| 66 | + * an empty result rather than risk a query injection through PostHog. |
| 67 | + */ |
| 68 | +function isSafeHackathonId(id: string): boolean { |
| 69 | + return /^[a-zA-Z0-9_-]{1,64}$/.test(id); |
| 70 | +} |
| 71 | + |
| 72 | +export interface TopTrafficSourcesOptions { |
| 73 | + /** Lookback window in days. Default 90. */ |
| 74 | + days?: number; |
| 75 | + /** Number of source buckets to return. Default 3. */ |
| 76 | + limit?: number; |
| 77 | +} |
| 78 | + |
| 79 | +interface BatchRow extends RawRow { |
| 80 | + hackathon_id: string | null; |
| 81 | +} |
| 82 | + |
| 83 | +/** |
| 84 | + * Batched variant — top-N traffic sources for a list of hackathons in a single |
| 85 | + * HogQL query. Used by the Builder Insights event-history view where we'd |
| 86 | + * otherwise make one HTTP roundtrip per row. Returns a map keyed by |
| 87 | + * hackathonId; events missing from the result are returned as empty arrays. |
| 88 | + */ |
| 89 | +export async function getTopHackathonTrafficSourcesBatch( |
| 90 | + hackathonIds: string[], |
| 91 | + { days = 90, limit = 3 }: TopTrafficSourcesOptions = {}, |
| 92 | +): Promise<Map<string, HackathonTrafficSource[]>> { |
| 93 | + const safeIds = Array.from(new Set(hackathonIds.filter(isSafeHackathonId))); |
| 94 | + const result = new Map<string, HackathonTrafficSource[]>(); |
| 95 | + for (const id of safeIds) result.set(id, []); |
| 96 | + if (safeIds.length === 0) return result; |
| 97 | + |
| 98 | + const safeDays = Math.max(1, Math.min(365, Math.floor(days))); |
| 99 | + const safeLimit = Math.max(1, Math.min(20, Math.floor(limit))); |
| 100 | + const idList = safeIds.map((id) => `'${id}'`).join(", "); |
| 101 | + |
| 102 | + // Hackathon attribution: extract the UUID from /events/<id> or /hackathons/<id> |
| 103 | + // URLs. Both routes serve the same hackathon today. |
| 104 | + // |
| 105 | + // `LIMIT N BY column` is ClickHouse syntax: keep the first N rows per group |
| 106 | + // after ORDER BY — gives top-N per hackathon in one query. We pre-filter |
| 107 | + // pageviews that would bucket as "Direct" (no referrer + no real UTM) so |
| 108 | + // they never compete for a top-N slot. |
| 109 | + const HACKATHON_ID_FROM_PATH = |
| 110 | + "extract(properties.$pathname, '^/(?:hackathons|events)/([a-fA-F0-9-]{36})')"; |
| 111 | + |
| 112 | + const query = ` |
| 113 | + SELECT |
| 114 | + ${HACKATHON_ID_FROM_PATH} AS hackathon_id, |
| 115 | + ${SOURCE_BUCKET_EXPR} AS source, |
| 116 | + count(DISTINCT distinct_id) AS visitors, |
| 117 | + countIf(properties.$pathname LIKE '%/registration-form%') AS reachedRegister |
| 118 | + FROM events |
| 119 | + WHERE event = '$pageview' |
| 120 | + AND ${HOGQL_HOST_FILTER} |
| 121 | + AND timestamp >= now() - INTERVAL ${safeDays} DAY |
| 122 | + AND ${HACKATHON_ID_FROM_PATH} IN (${idList}) |
| 123 | + AND ( |
| 124 | + (notEmpty(properties.$referring_domain) AND properties.$referring_domain != '$direct') |
| 125 | + OR (notEmpty(properties.utm_source) AND properties.utm_source != '$direct') |
| 126 | + ) |
| 127 | + GROUP BY hackathon_id, source |
| 128 | + ORDER BY hackathon_id, visitors DESC |
| 129 | + LIMIT ${safeLimit} BY hackathon_id |
| 130 | + `.trim(); |
| 131 | + |
| 132 | + const rows = await runHogQL<BatchRow>({ |
| 133 | + projectId: POSTHOG_BUILDER_HUB_PROJECT_ID, |
| 134 | + query, |
| 135 | + }); |
| 136 | + |
| 137 | + for (const row of rows) { |
| 138 | + if (!row.hackathon_id) continue; |
| 139 | + const bucket = result.get(row.hackathon_id); |
| 140 | + if (!bucket) continue; |
| 141 | + bucket.push({ |
| 142 | + source: row.source ?? "Direct", |
| 143 | + visitors: toNumber(row.visitors), |
| 144 | + reachedRegister: toNumber(row.reachedRegister), |
| 145 | + }); |
| 146 | + } |
| 147 | + |
| 148 | + return result; |
| 149 | +} |
0 commit comments