@@ -21,7 +21,9 @@ librarian::shelf(
2121 comprehenr ,
2222 httr ,
2323 tidyr ,
24- lubridate
24+ lubridate ,
25+ XML ,
26+ rentrez
2527)
2628
2729# library('synapser')
@@ -253,6 +255,21 @@ if (nrow(pmids_df) == 0) {
253255 # clean column names
254256 dat <- janitor :: clean_names(dat , " lower_camel" )
255257
258+ # ---- get abstract function ----------------------------------------------------------------------------------------
259+ get_abstract <- function (pmid ) {
260+ # Function to get abstracts per pubmed id: https://stackoverflow.com/questions/77211966/r-how-to-extract-a-pubmed-abstract-using-rentrez
261+ record <- rentrez :: entrez_fetch(db = " pubmed" , id = pmid , rettype = " xml" , parsed = TRUE )
262+
263+ abstract_nodes <- XML :: xpathSApply(record , " //AbstractText" , XML :: xmlValue )
264+
265+ if (length(abstract_nodes ) > 0 ) {
266+ abstract_text <- abstract_nodes [[1 ]]
267+ return (abstract_text )
268+ } else {
269+ print(" No abstract found." )
270+ }
271+ }
272+
256273 # # ----hacky----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
257274 # Included in hacky_cleaning is conversion to ascii and removing html formatting
258275 dat $ year <- stringr :: str_extract(dat $ pubdate , " \\ d{4}" )
@@ -261,6 +278,7 @@ if (nrow(pmids_df) == 0) {
261278 dat $ authors <- hacky_cleaning(dat $ authors )
262279 dat $ journal <- remove_unacceptable_characters(dat $ fulljournalname )
263280 dat $ publicationDate <- stringr :: str_extract(dat $ pubdate , " \\ d{4}-\\ d{2}-\\ d{2}" )
281+ dat $ abstract = purrr :: map(dat $ pmid , get_abstract )
264282
265283 # dat$abstract <- hacky_cleaning(dat$abstract)
266284
@@ -378,6 +396,15 @@ dat <- dat %>%
378396 )
379397
380398 syn $ store(file , forceVersion = FALSE )
399+ # make the wiki with abstract
400+ if (! is.null(x $ abstract ) && nchar(x $ abstract ) > 0 ) {
401+ wiki <- synapseclient $ Wiki(
402+ owner = entity $ id ,
403+ markdown = x $ abstract
404+ )
405+ syn $ store(wiki )
406+ }
407+
381408 }
382409 )
383410}
0 commit comments