-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextract.js
More file actions
114 lines (97 loc) · 3.49 KB
/
Copy pathextract.js
File metadata and controls
114 lines (97 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import { readFileSync, writeFileSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const PROVIDERS = ['olympics', 'worldbank', 'drWho', 'expenditure', 'shared'];
// Collect all let-binding RHS lines from code.
// Returns array of flat chain strings (whitespace normalised, one per binding).
function extractBindings(code) {
const lines = code.split('\n');
const bindings = [];
let i = 0;
while (i < lines.length) {
const line = lines[i];
const m = line.match(/^let\s+\w+\s*=\s*(.*)/);
if (m) {
let text = m[1].trim();
i++;
while (i < lines.length) {
const trimmed = lines[i].trim();
if (trimmed === '') break;
if (/^(let\s|chart\.|youguess\.|compost\.|table\.|youdraw\.|\/\/)/.test(trimmed)) break;
if (lines[i].match(/^\s/) || trimmed.startsWith('.')) {
text += ' ' + trimmed;
i++;
} else {
break;
}
}
bindings.push(text.trim());
} else {
i++;
}
}
return bindings;
}
function getProvider(chain) {
for (const p of PROVIDERS)
if (chain.startsWith(p + '.') || chain.startsWith(p + ' ') || chain === p) return p;
return null;
}
// Parse a normalised chain string into an array of member name steps.
// e.g. "olympics.'filter data'.'Games is'.then.paging.take(8)" ->
// ["olympics", "filter data", "Games is", "then", "paging", "take(8)"]
function parseSteps(chain) {
// Remove spaces around dots so "byCountry .'United States'" -> "byCountry.'United States'"
const flat = chain.replace(/\s*\.\s*/g, '.');
const steps = [];
const firstDot = flat.indexOf('.');
steps.push(firstDot < 0 ? flat : flat.slice(0, firstDot));
const re = /\.(?:'([^']+)'(\([^)]*\))?|(\w+(?:\([^)]*\))?))/g;
re.lastIndex = firstDot < 0 ? flat.length : firstDot;
let m;
while ((m = re.exec(flat)) !== null)
steps.push(m[1] !== undefined ? m[1] + (m[2] ?? '') : m[3]);
return steps;
}
const snippets = JSON.parse(
readFileSync(join(__dirname, 'data', 'snippets-thegamma.json'), 'utf8')
);
const extraHints = JSON.parse(
readFileSync(join(__dirname, 'data', 'extra-hints.json'), 'utf8')
);
const results = [];
let skippedNoProvider = 0, skippedHidden = 0;
for (const s of snippets) {
if (s.hidden) { skippedHidden++; continue; }
const providerChains = extractBindings(s.code).filter(c => getProvider(c));
if (providerChains.length === 0) { skippedNoProvider++; continue; }
results.push({
id: s.id,
title: s.title,
description: s.description,
chains: providerChains.map((c, i) => {
const provider = getProvider(c);
const steps = parseSteps(c);
const chain = { provider, steps };
if (provider === 'shared' && steps.length >= 4 && (steps[1] === 'by date' || steps[1] === 'by tag')) {
chain.hint = `Use data source from ${steps[2]} named '${steps[3]}'`;
}
const chainHints = extraHints[String(s.id)];
if (chainHints?.[i]) chain.chainHint = chainHints[i];
return chain;
}),
});
}
console.log(`Extracted: ${results.length} skipped: ${skippedNoProvider} no-provider, ${skippedHidden} hidden\n`);
for (const r of results) {
console.log(`#${r.id} ${r.title}`);
for (const c of r.chains)
console.log(` [${c.provider}] ${c.steps.join(' > ')}`);
console.log();
}
writeFileSync(
join(__dirname, 'data', 'eval-snippets.json'),
JSON.stringify(results, null, 2)
);
console.log('Written to data/eval-snippets.json');