Skip to content

Commit ff9113f

Browse files
Merge pull request #11 from eliteportal/abstract-wikis
Update query-pubmed-grants.R
2 parents 705d2e5 + ebf9699 commit ff9113f

File tree

1 file changed

+28
-1
lines changed

1 file changed

+28
-1
lines changed

inst/scripts/query-pubmed-grants.R

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ librarian::shelf(
2121
comprehenr,
2222
httr,
2323
tidyr,
24-
lubridate
24+
lubridate,
25+
XML,
26+
rentrez
2527
)
2628

2729
# library('synapser')
@@ -253,6 +255,21 @@ if (nrow(pmids_df) == 0) {
253255
# clean column names
254256
dat <- janitor::clean_names(dat, "lower_camel")
255257

258+
# ---- get abstract function ----------------------------------------------------------------------------------------
259+
get_abstract <- function(pmid) {
260+
# Function to get abstracts per pubmed id: https://stackoverflow.com/questions/77211966/r-how-to-extract-a-pubmed-abstract-using-rentrez
261+
record <- rentrez::entrez_fetch(db = "pubmed", id = pmid, rettype = "xml", parsed = TRUE)
262+
263+
abstract_nodes <- XML::xpathSApply(record, "//AbstractText", XML::xmlValue)
264+
265+
if (length(abstract_nodes) > 0) {
266+
abstract_text <- abstract_nodes[[1]]
267+
return(abstract_text)
268+
} else {
269+
print("No abstract found.")
270+
}
271+
}
272+
256273
## ----hacky----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
257274
# Included in hacky_cleaning is conversion to ascii and removing html formatting
258275
dat$year <- stringr::str_extract(dat$pubdate, "\\d{4}")
@@ -261,6 +278,7 @@ if (nrow(pmids_df) == 0) {
261278
dat$authors <- hacky_cleaning(dat$authors)
262279
dat$journal <- remove_unacceptable_characters(dat$fulljournalname)
263280
dat$publicationDate <- stringr::str_extract(dat$pubdate, "\\d{4}-\\d{2}-\\d{2}")
281+
dat$abstract = purrr::map(dat$pmid, get_abstract)
264282

265283
# dat$abstract <- hacky_cleaning(dat$abstract)
266284

@@ -378,6 +396,15 @@ dat <- dat %>%
378396
)
379397

380398
syn$store(file, forceVersion = FALSE)
399+
# make the wiki with abstract
400+
if (!is.null(x$abstract) && nchar(x$abstract) > 0) {
401+
wiki <- synapseclient$Wiki(
402+
owner = entity$id,
403+
markdown = x$abstract
404+
)
405+
syn$store(wiki)
406+
}
407+
381408
}
382409
)
383410
}

0 commit comments

Comments
 (0)