-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextractor.py
More file actions
75 lines (56 loc) · 3.58 KB
/
extractor.py
File metadata and controls
75 lines (56 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import requests
import pandas as pd
import time
def query_search(query:str,n_records=10,subj=''):
"""
This function executes a query on scoopus database and return it's result.
Args:
query (string]): query that will be made on scoopus database.
n_records (int, optional): How many records will be searched by page. Defaults to 10.
subj (str, optional): The subject filter for the query. Defaults to ''.
Returns:
[json]: query return
"""
base_url = 'https://api.elsevier.com/content/search/scopus?query='
final_url = base_url + query
final_url += subj
final_url += f'&apiKey={API_KEY}&count={n_records}&httpAccept=application/json&cursor=*'
data = requests.get(final_url).json()
return data
def get_scopus_id(query:str,n_records:int=10):
"""
Responsible for getting the ids of the N papers returned by a query search
Args:
query (str): query
n_records How many records will be searched by page. Defaults to 10.
"""
data = query_search(query,n_records)
print(data["search-results"]['opensearch:totalResults'])
next_page = data["search-results"]["link"][2]["@href"]
while next_page:
scoopus_id = [id["dc:identifier"] for id in data["search-results"]["entry"]]
next_page = data["search-results"]["link"][2]["@href"]
scoopus_id_clean = [
str(id).replace('SCOPUS_ID:','')
for id in scoopus_id
]
with open('ids.csv', "a") as id_frame:
for id in scoopus_id_clean:
id_frame.write(f'{id},\n')
data = requests.get(next_page).json()
print('Making progress ----->')
def get_abstract(id:str):
"""[summary]
Args:
id (str): id of the desired paper
Returns:
[json]: returns the infomations about the paper
"""
url = f'https://api.elsevier.com/content/abstract/scopus_id/{id}?httpAccept=application/json&apiKey={API_KEY}'
data = requests.get(url).json()
return data
query = 'TITLE-ABS-KEY ( temperature OR rain* OR precipitat* OR warm* OR summer OR winter OR spring OR autumn OR meteorolog* OR weather OR forecast* OR humidit* OR wave OR wind OR tide* OR "ocean current" OR "maritime current" OR "sea current" OR season* OR climat* OR drought OR "El Nino" OR cooling OR heat* ) AND TITLE-ABS-KEY ( "blue amazon" OR ( brazil* W/3 ( coast* OR beach* OR "continental margin*" OR "exclusive economic zone" OR ocean* OR sea* OR shore OR marine* OR "atlantic ocean*" OR offshore OR island* OR maritime OR bay OR marine OR archipelago OR "continental shelf" OR estuar* ) ) OR ( petrobras AND ( coast* OR beach* OR "continental margin*" OR "exclusive economic zone" OR ocean* OR sea* OR shore OR marine* OR "atlantic ocean*" OR offshore OR island* OR maritime OR bay OR marine OR archipelago OR "continental shelf" OR estuar* ) ) ) AND ( LIMIT-TO ( SUBJAREA , "EART" ) OR LIMIT-TO ( SUBJAREA , "ENVI" ) OR LIMIT-TO ( SUBJAREA , "ENGI" ) OR LIMIT-TO ( SUBJAREA , "ENER" ) OR LIMIT-TO ( SUBJAREA , "MULT" ) OR LIMIT-TO ( SUBJAREA , "PHYS" ) OR LIMIT-TO ( SUBJAREA , "COMP" ) OR LIMIT-TO ( SUBJAREA , "BUSI" ) OR LIMIT-TO ( SUBJAREA , "MATH" ) OR LIMIT-TO ( SUBJAREA , "ECON" ) OR LIMIT-TO ( SUBJAREA , "DECI" ) )'
subj = '&subj="EART"%2ct%2c"ENVI"%2ct%2c"ENGI"%2ct%2c"ENER"%2ct%2c"MULT"%2ct%2c"PHYS"%2ct%2c"COMP"%2ct%2c"BUSI"%2ct%2c"MATH"%2ct%2c"ECON"%2ct%2c"DECI"%2ct'
get_scopus_id(query,subj)
obj = get_abstract('20044366781')
print(obj)