Skip to content

Commit 2e53cdf

Browse files
committed
chore(cli): fetch programming languages names from wikidata
1 parent 4b725ad commit 2e53cdf

File tree

6 files changed

+188
-48
lines changed

6 files changed

+188
-48
lines changed

catalog-data.ttl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
@prefix owl: <http://www.w3.org/2002/07/owl#> .
44
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
55
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
6+
@prefix wd: <http://www.wikidata.org/entity/> .
7+
@prefix wdt: <http://www.wikidata.org/prop/direct/> .
68
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
79

810
<http://harth.org/andreas/foaf#ah> a ex:Person ;
@@ -12,6 +14,57 @@
1214
ex:name "Andreas Harth" ;
1315
ex:webid <http://harth.org/andreas/foaf#ah> .
1416

17+
wd:Q161053 ex:name "Ruby" ;
18+
wdt:P31 wd:Q9143 .
19+
20+
wd:Q17147733 ex:name "Swift" ;
21+
wdt:P31 wd:Q9143 .
22+
23+
wd:Q2005 ex:name "JavaScript" ;
24+
wdt:P31 wd:Q9143 .
25+
26+
wd:Q2370 ex:name "C#" ;
27+
wdt:P31 wd:Q9143 .
28+
29+
wd:Q251 ex:name "Java" ;
30+
wdt:P31 wd:Q9143 .
31+
32+
wd:Q28865 ex:name "Python" ;
33+
wdt:P31 wd:Q9143 .
34+
35+
wd:Q34010 ex:name "Haskell" ;
36+
wdt:P31 wd:Q9143 .
37+
38+
wd:Q37227 ex:name "Go" ;
39+
wdt:P31 wd:Q9143 .
40+
41+
wd:Q3816639 ex:name "Kotlin" ;
42+
wdt:P31 wd:Q9143 .
43+
44+
wd:Q406009 ex:name "Dart" ;
45+
wdt:P31 wd:Q9143 .
46+
47+
wd:Q42478 ex:name "Perl" ;
48+
wdt:P31 wd:Q9143 .
49+
50+
wd:Q460584 ex:name "Scala" ;
51+
wdt:P31 wd:Q9143 .
52+
53+
wd:Q51885456 ex:name "Zig" ;
54+
wdt:P31 wd:Q9143 .
55+
56+
wd:Q5362035 ex:name "Elixir" ;
57+
wdt:P31 wd:Q9143 .
58+
59+
wd:Q575650 ex:name "Rust" ;
60+
wdt:P31 wd:Q9143 .
61+
62+
wd:Q59 ex:name "PHP" ;
63+
wdt:P31 wd:Q9143 .
64+
65+
wd:Q978185 ex:name "TypeScript" ;
66+
wdt:P31 wd:Q9143 .
67+
1568
<https://45h.inrupt.net/profile/card#me> a ex:Person ;
1669
ex:name "Luc" ;
1770
ex:webid <https://45h.inrupt.net/profile/card#me> .

cli/aggregations/wikidata.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import { DataFactory, Store } from 'n3'
2+
import { dereferenceToStore } from 'rdf-dereference-store'
3+
import { queryDataset, prefixes, ex, schema } from '../util.ts'
4+
import type { NamedNode } from '@rdfjs/types'
5+
6+
export async function aggregateWikidata(dataset: Store): Promise<Store> {
7+
return aggregateProgrammingLanguages(dataset)
8+
}
9+
10+
async function getName(store: Store, language: NamedNode, tag: string): Promise<string | undefined> {
11+
const query = `
12+
SELECT ?s ?name
13+
WHERE {
14+
<${language.value}> <${schema.name}> ?name
15+
FILTER ( LANG(?name) = "${tag}" )
16+
}`
17+
18+
let bindings = await queryDataset(store, query)
19+
return bindings[0]?.get('name')?.value
20+
}
21+
22+
export async function aggregateProgrammingLanguages(dataset: Store): Promise<Store> {
23+
const instanceOf = `${prefixes.wdt}P31`
24+
const ProgrammingLanguage = `${prefixes.wd}Q9143`
25+
const langQuery = `
26+
SELECT ?s ?name
27+
WHERE {
28+
?s <${instanceOf}> <${ProgrammingLanguage}> .
29+
OPTIONAL { ?s <${ex.name}> ?name . }
30+
}`
31+
const lbindings = await queryDataset(dataset, langQuery)
32+
const withoutNames = lbindings.filter(b => !b.get('name')).map(b => b.get('s') as NamedNode)
33+
console.info(`Fetching names for programming langages: ${withoutNames.length}`)
34+
for (const language of withoutNames) {
35+
const { store } = await dereferenceToStore(language.value.replace('http', 'https'))
36+
let name = await getName(store, language, 'en')
37+
if (!name) name = await getName(store, language, 'mul')
38+
if (!name) {
39+
console.warn(`Couldn't find name for ${language.value}`)
40+
continue
41+
}
42+
const quad = DataFactory.quad(language, ex.terms.name, DataFactory.literal(name))
43+
dataset.add(quad)
44+
}
45+
return dataset
46+
}
47+

cli/index.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { migrateWebid } from './migrations/webid.ts'
77
import { aggregateW3C } from './aggregations/w3c.ts'
88
import { aggregateGithub } from './aggregations/github.ts'
99
import { migrateTmpId } from './migrations/tmp-id.ts'
10+
import { aggregateWikidata } from './aggregations/wikidata.ts'
1011

1112
const dataPath = getPath(import.meta.url, '../catalog-data.ttl')
1213
const dataset = await loadData(dataPath)
@@ -66,4 +67,12 @@ aggregate.command('github')
6667
await saveData(updated, dataPath)
6768
})
6869

70+
aggregate.command('wikidata')
71+
.description('Adds data from Wikidata')
72+
.action(async () => {
73+
console.info('Fetching data Wikiadta')
74+
const updated = await aggregateWikidata(dataset)
75+
await saveData(updated, dataPath)
76+
})
77+
6978
program.parse(process.argv)

cli/util.ts

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,13 @@ import { write } from '@jeswr/pretty-turtle'
1313
export const prefixes = {
1414
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
1515
xsd: 'http://www.w3.org/2001/XMLSchema#',
16+
wd: 'http://www.wikidata.org/entity/',
17+
wdt: 'http://www.wikidata.org/prop/direct/',
1618
con: 'https://solidproject.solidcommunity.net/catalog/taxonomy#',
1719
cdata: 'https://solidproject.solidcommunity.net/catalog/data#',
1820
ex: 'http://example.org#',
1921
}
2022

21-
export async function formatData(filePath: string): Promise<void> {
22-
const fromStream = await readQuadStream(filePath)
23-
const fromQuads = await arrayifyStream(fromStream)
24-
const outString = await write(fromQuads, { prefixes, ordered: true })
25-
fs.writeFileSync(filePath, outString)
26-
}
27-
2823
export async function loadData(filePath: string): Promise<Store> {
2924
const fromStream = await readQuadStream(filePath)
3025
return new Store(await arrayifyStream(fromStream))
@@ -35,7 +30,9 @@ export async function saveData(dataset: Store, filePath: string): Promise<void>
3530
fs.writeFileSync(filePath, outString)
3631
}
3732

38-
export const ex = createVocabulary('http://example.org#', 'webid', 'siloId', 'member', 'siloUsername', 'Person', 'Organization')
33+
export const ex = createVocabulary('http://example.org#', 'name', 'webid', 'siloId', 'member', 'siloUsername', 'Person', 'Organization')
34+
export const schema = createVocabulary('http://schema.org/', 'name')
35+
export const rdfs = createVocabulary('http://www.w3.org/2000/01/rdf-schema#', 'label')
3936

4037
export function getPath(from: string, to: string): string {
4138
const __filename = fileURLToPath(from)

0 commit comments

Comments
 (0)