-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdisamb.py
70 lines (48 loc) · 2.33 KB
/
disamb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! /usr/bin/env python3
import pathlib
import yaml
import pandas as pd
from rdigraphs.datamanager import callbacks
parameters_file = (pathlib.Path.cwd() / 'rdigraphs' / 'datamanager'
/ 'parameters.yaml')
with open(parameters_file) as f:
parameters = yaml.load(f)
path2dm = pathlib.Path.cwd() / 'rdigraphs' / 'datamanager'
projects_file = path2dm / 'tmp_projects' / 'projects.csv.gz'
researcher_project_file = (path2dm / 'tmp_projects'
/ 'researcher_project.csv.gz')
organizations_file = path2dm / 'tmp_projects' / 'organization.csv.gz'
patstats_person_file = path2dm / 'tmp_patents' / 'person906.csv.gz'
patstats_person_application_file = (
path2dm / 'tmp_patents' / 'person_application.csv.gz')
scopus_authorship_file = path2dm / 'tmp_publications' / 'authorship.csv.gz'
# ----------
projects = pd.read_csv(projects_file)
# researcher_project = pd.read_csv(researcher_project_file)
organizations = pd.read_csv(organizations_file)
patstats_person_application = pd.read_csv(
patstats_person_application_file, dtype=str)
patstats_person = pd.read_csv(patstats_person_file, dtype=str)
scopus_authorship = pd.read_csv(scopus_authorship_file)
# ----------
# to simulate the original file
# projects = projects.drop(['disambiguated_id'], axis=1)
patstats_person = patstats_person.drop(['disambiguated_id'], axis=1)
patstats_person_application = patstats_person_application.drop(
['disambiguated_id'], axis=1)
parameters['disambiguation']['authors']['disambiguation_map'] = (
['rdigraphs', 'datamanager']
+ parameters['disambiguation']['authors']['disambiguation_map'])
parameters['disambiguation']['organizations']['disambiguation_map'] = (
['rdigraphs', 'datamanager']
+ parameters['disambiguation']['organizations']['disambiguation_map'])
callbacks.initialize(parameters['disambiguation'])
auth_disamb = callbacks.authors_disambiguator
org_disamb = callbacks.organizations_disambiguator
# res = callbacks.organizations_disambiguator.projects_organizations(
# organizations)
# res = callbacks.organizations_disambiguator.projects_projects(projects)
# res = callbacks.authors_disambiguator.patstats_person(patstats_person)
# res2 = callbacks.organizations_disambiguator.patstats_person(patstats_person)
res = callbacks.organizations_disambiguator.scopus_authorship(
scopus_authorship)