Skip to content

Commit 34957a5

Browse files
authored
Merge pull request #567 from massimoaria/develop
Develop
2 parents 95d7b76 + ed58506 commit 34957a5

File tree

16 files changed

+2343
-81
lines changed

16 files changed

+2343
-81
lines changed

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ S3method(summary,bibliometrix)
66
S3method(summary,bibliometrix_netstat)
77
export(Hindex)
88
export(KeywordGrowth)
9+
export(authorBio)
910
export(authorProdOverTime)
1011
export(biblioAnalysis)
1112
export(biblioNetwork)
@@ -20,6 +21,8 @@ export(couplingMap)
2021
export(dominance)
2122
export(duplicatedMatching)
2223
export(fieldByYear)
24+
export(findAuthorWorks)
25+
export(get_authors_summary)
2326
export(histNetwork)
2427
export(histPlot)
2528
export(idByAuthor)
@@ -37,6 +40,7 @@ export(networkStat)
3740
export(normalizeCitationScore)
3841
export(normalizeSimilarity)
3942
export(plotThematicEvolution)
43+
export(print_author_works_summary)
4044
export(readFiles)
4145
export(retrievalByAuthorID)
4246
export(rpys)

NEWS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
bibliometrix V.5.1.2 (Release date: )
2+
3+
Features:
4+
* Added Author Bio Card
5+
* Biblioshiny: Added Article AI-Summary
6+
7+
Changes:
8+
* #564 [Solved] - Now bibliometrix supports new scopus CSV format (both in importing and local citation analyses)
9+
10+
111
bibliometrix V.5.1.1 (Release date: 2025-09-02)
212

313
Features:

NEWS.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# bibliometrix (development version)
22

3-
# bibliometrix 5.1.1
3+
Features:
4+
* Added Author Bio Card
5+
* Biblioshiny: Added Article AI-Summary
6+
7+
Changes:
8+
* #564 [Solved] - Now bibliometrix supports new scopus CSV format (both in importing and local citation analyses)
9+
410

511
# bibliometrix V.5.1.1 (Release date: 2025-09-02)
612

R/authorBio.r

Lines changed: 329 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,329 @@
1+
#' Retrieve Author Biographical Information from OpenAlex
2+
#'
3+
#' This function downloads comprehensive author information from OpenAlex based on a DOI
4+
#' and the numerical position of the author in the co-authors list. It provides detailed
5+
#' biographical data, bibliometric indicators, and affiliation information.
6+
#'
7+
#' @param author_position Integer. The numerical position of the author in the authors list (default: 1)
8+
#' @param doi Character. DOI of the article used to identify the authors
9+
#' @param verbose Logical. Print informative messages during execution (default: FALSE)
10+
#' @param return_all_authors Logical. If TRUE, returns information for all co-authors (default: FALSE)
11+
#'
12+
#' @return If \code{return_all_authors = FALSE}, returns a tibble with comprehensive information
13+
#' about the specified author including:
14+
#' \itemize{
15+
#' \item Basic information (name, ORCID, OpenAlex ID)
16+
#' \item Bibliometric indicators (works count, citations, h-index, i10-index)
17+
#' \item Affiliation details from both the paper and author profile
18+
#' \item Research topics and areas
19+
#' \item Paper-specific metadata (corresponding author status, position type)
20+
#' }
21+
#' If \code{return_all_authors = TRUE}, returns a list of tibbles, one for each co-author.
22+
#'
23+
#' @details
24+
#' The function first retrieves the work information using the provided DOI, then extracts
25+
#' author IDs from the authorships data, and finally fetches detailed author profiles from
26+
#' OpenAlex. It enriches the author data with paper-specific information such as authorship
27+
#' position, corresponding author status, and affiliations as listed in the paper.
28+
#'
29+
#' The function handles various edge cases including missing author IDs, invalid positions,
30+
#' and network errors. It also provides comprehensive error messages to help troubleshoot
31+
#' common issues.
32+
#'
33+
#' @examples
34+
#' \dontrun{
35+
#' # Get information for the first author
36+
#' first_author <- authorBio(doi = "10.1016/j.joi.2017.08.007")
37+
#'
38+
#' # Get information for the second author with verbose output
39+
#' second_author <- authorBio(
40+
#' author_position = 2,
41+
#' doi = "10.1016/j.joi.2017.08.007",
42+
#' verbose = TRUE
43+
#' )
44+
#'
45+
#' # Get information for all co-authors
46+
#' all_authors <- authorBio(
47+
#' doi = "10.1016/j.joi.2017.08.007",
48+
#' return_all_authors = TRUE
49+
#' )
50+
#' }
51+
#'
52+
#' @export
53+
#'
54+
authorBio <- function(author_position = 1,
55+
doi = "10.1016/j.joi.2017.08.007",
56+
verbose = FALSE,
57+
return_all_authors = FALSE) {
58+
59+
# Input validation
60+
if (is.null(doi) || !is.character(doi) || nchar(trimws(doi)) == 0) {
61+
stop("The 'doi' parameter must be a non-empty character string")
62+
}
63+
64+
if (!is.numeric(author_position) || author_position < 1 || author_position != as.integer(author_position)) {
65+
stop("The 'author_position' parameter must be a positive integer")
66+
}
67+
68+
# Check library availability
69+
if (!requireNamespace("openalexR", quietly = TRUE)) {
70+
stop("The 'openalexR' library is not available. Install it with: install.packages('openalexR')")
71+
}
72+
73+
if (verbose) cat("Retrieving article information for DOI:", doi, "\n")
74+
75+
# Retrieve article information with error handling
76+
au_work <- tryCatch({
77+
openalexR::oa_fetch(
78+
entity = "works",
79+
doi = doi,
80+
output = "tibble"
81+
)
82+
}, error = function(e) {
83+
stop("Error retrieving article: ", e$message,
84+
"\nPlease verify that the DOI is correct and OpenAlex is accessible")
85+
})
86+
87+
# Verify that the article was found
88+
if (is.null(au_work) || nrow(au_work) == 0) {
89+
stop("No article found for the provided DOI: ", doi)
90+
}
91+
92+
# Extract author information from the correct structure
93+
authorships <- au_work$authorships[[1]]
94+
95+
if (is.null(authorships) || nrow(authorships) == 0) {
96+
stop("No author information found for this article")
97+
}
98+
99+
# Verify that the requested position exists
100+
if (author_position > nrow(authorships)) {
101+
stop("Author position (", author_position,
102+
") is greater than the total number of authors (", nrow(authorships), ")")
103+
}
104+
105+
if (verbose) {
106+
cat("Article found:", au_work$display_name[1], "\n")
107+
cat("Total number of authors:", nrow(authorships), "\n")
108+
if (nrow(authorships) > 0) {
109+
cat("Authors:\n")
110+
for (i in 1:nrow(authorships)) {
111+
cat(" ", i, ".", authorships$display_name[i], "\n")
112+
}
113+
}
114+
}
115+
116+
# If requested, return all authors
117+
if (return_all_authors) {
118+
if (verbose) cat("Retrieving information for all authors...\n")
119+
120+
all_authors <- list()
121+
for (i in 1:nrow(authorships)) {
122+
if (verbose) cat("Processing author", i, "of", nrow(authorships), ":", authorships$display_name[i], "\n")
123+
124+
author_id <- authorships$id[i]
125+
if (!is.na(author_id) && author_id != "") {
126+
# Extract only the OpenAlex ID from the full URL
127+
clean_id <- gsub("https://openalex.org/", "", author_id)
128+
129+
author_info <- tryCatch({
130+
openalexR::oa_fetch(
131+
entity = "authors",
132+
identifier = clean_id,
133+
output = "tibble"
134+
)
135+
}, error = function(e) {
136+
if (verbose) cat("Error for author", i, ":", e$message, "\n")
137+
NULL
138+
})
139+
140+
if (!is.null(author_info) && nrow(author_info) > 0) {
141+
# Add additional information from the authorships structure
142+
author_info$author_position_in_paper <- i
143+
author_info$original_author_name <- authorships$display_name[i]
144+
author_info$is_corresponding <- authorships$is_corresponding[i]
145+
author_info$author_position_type <- authorships$author_position[i]
146+
147+
# Add affiliation information if available
148+
if (!is.null(authorships$affiliations[[i]]) && nrow(authorships$affiliations[[i]]) > 0) {
149+
author_info$primary_affiliation <- authorships$affiliations[[i]]$display_name[1]
150+
author_info$primary_affiliation_country <- authorships$affiliations[[i]]$country_code[1]
151+
} else {
152+
author_info$primary_affiliation <- NA
153+
author_info$primary_affiliation_country <- NA
154+
}
155+
156+
# Add raw affiliation if available
157+
if (!is.null(authorships$affiliation_raw) && length(authorships$affiliation_raw) >= i) {
158+
author_info$affiliation_raw <- authorships$affiliation_raw[i]
159+
} else {
160+
author_info$affiliation_raw <- NA
161+
}
162+
163+
all_authors[[i]] <- author_info
164+
}
165+
} else {
166+
if (verbose) cat("Invalid author ID for position", i, "\n")
167+
}
168+
}
169+
170+
# Combine all valid results
171+
valid_authors <- all_authors[!sapply(all_authors, is.null)]
172+
if (length(valid_authors) > 0) {
173+
# Add common metadata to all
174+
for (i in seq_along(valid_authors)) {
175+
valid_authors[[i]]$source_doi <- doi
176+
valid_authors[[i]]$source_title <- au_work$display_name[1]
177+
valid_authors[[i]]$query_timestamp <- Sys.time()
178+
}
179+
return(valid_authors)
180+
} else {
181+
stop("Unable to retrieve information for any author")
182+
}
183+
}
184+
185+
# Retrieve information for the specific author
186+
author_id <- authorships$id[author_position]
187+
188+
if (is.na(author_id) || author_id == "") {
189+
stop("Invalid author ID at position ", author_position)
190+
}
191+
192+
# Extract only the OpenAlex ID from the full URL
193+
clean_id <- gsub("https://openalex.org/", "", author_id)
194+
195+
if (verbose) {
196+
cat("Retrieving information for author at position", author_position, "\n")
197+
cat("Author name:", authorships$display_name[author_position], "\n")
198+
cat("OpenAlex ID:", clean_id, "\n")
199+
cat("Position type:", authorships$author_position[author_position], "\n")
200+
cat("Is corresponding author:", authorships$is_corresponding[author_position], "\n")
201+
}
202+
203+
# Retrieve author biographical data
204+
au_info <- tryCatch({
205+
openalexR::oa_fetch(
206+
entity = "authors",
207+
identifier = clean_id,
208+
output = "tibble"
209+
)
210+
}, error = function(e) {
211+
stop("Error retrieving author information: ", e$message)
212+
})
213+
214+
if (is.null(au_info) || nrow(au_info) == 0) {
215+
stop("No biographical information found for the author at position ", author_position)
216+
}
217+
218+
# Add useful metadata from the authorships structure
219+
au_info$author_position_in_paper <- author_position
220+
au_info$original_author_name <- authorships$display_name[author_position]
221+
au_info$is_corresponding <- authorships$is_corresponding[author_position]
222+
au_info$author_position_type <- authorships$author_position[author_position]
223+
224+
# Add affiliation information if available
225+
if (!is.null(authorships$affiliations[[author_position]]) &&
226+
nrow(authorships$affiliations[[author_position]]) > 0) {
227+
au_info$primary_affiliation <- authorships$affiliations[[author_position]]$display_name[1]
228+
au_info$primary_affiliation_country <- authorships$affiliations[[author_position]]$country_code[1]
229+
au_info$primary_affiliation_ror <- authorships$affiliations[[author_position]]$ror[1]
230+
} else {
231+
au_info$primary_affiliation <- NA
232+
au_info$primary_affiliation_country <- NA
233+
au_info$primary_affiliation_ror <- NA
234+
}
235+
236+
# Add raw affiliation if available
237+
if (!is.null(authorships$affiliation_raw) && length(authorships$affiliation_raw) >= author_position) {
238+
au_info$affiliation_raw <- authorships$affiliation_raw[author_position]
239+
} else {
240+
au_info$affiliation_raw <- NA
241+
}
242+
243+
# Add query metadata
244+
au_info$source_doi <- doi
245+
au_info$source_title <- au_work$display_name[1]
246+
au_info$query_timestamp <- Sys.time()
247+
248+
if (verbose) {
249+
cat("Information successfully retrieved for:", au_info$display_name[1], "\n")
250+
cat("Number of publications:", au_info$works_count[1], "\n")
251+
cat("Number of citations:", au_info$cited_by_count[1], "\n")
252+
cat("H-index:", au_info$h_index[1], "\n")
253+
cat("Primary affiliation:", au_info$primary_affiliation[1], "\n")
254+
}
255+
256+
return(au_info)
257+
}
258+
259+
# Helper function to analyze all authors of an article
260+
analyze_all_authors <- function(doi, verbose = FALSE) {
261+
return(authorBio(doi = doi, return_all_authors = TRUE, verbose = verbose))
262+
}
263+
264+
#' Get Authors Summary from OpenAlex
265+
#'
266+
#' Retrieves a quick summary of all authors from a paper without making additional API calls
267+
#' for individual author profiles. Useful for getting an overview of the authorship structure.
268+
#'
269+
#' @param doi Character. DOI of the article
270+
#' @param verbose Logical. Print informative messages during execution (default: FALSE)
271+
#'
272+
#' @return A data frame with summary information for all authors including:
273+
#' \itemize{
274+
#' \item position: Author position in the paper
275+
#' \item display_name: Author name as it appears in the paper
276+
#' \item author_position_type: Type of position (first, last, middle)
277+
#' \item is_corresponding: Whether the author is a corresponding author
278+
#' \item orcid: ORCID identifier if available
279+
#' \item openalex_id: OpenAlex author identifier
280+
#' \item primary_affiliation: Main institutional affiliation
281+
#' }
282+
#'
283+
#' @examples
284+
#' \dontrun{
285+
#' # Get a quick summary of all authors
286+
#' summary <- get_authors_summary(doi = "10.1016/j.joi.2017.08.007")
287+
#' print(summary)
288+
#' }
289+
#'
290+
#' @export
291+
get_authors_summary <- function(doi="10.1016/j.joi.2017.08.007", verbose = FALSE) {
292+
if (verbose) cat("Retrieving author summary for DOI:", doi, "\n")
293+
294+
au_work <- tryCatch({
295+
openalexR::oa_fetch(entity = "works", doi = doi, output = "tibble")
296+
}, error = function(e) {
297+
stop("Error retrieving article: ", e$message)
298+
})
299+
300+
301+
if (is.null(au_work) || nrow(au_work) == 0) {
302+
stop("No article found for the provided DOI: ", doi)
303+
}
304+
305+
authorships <- au_work$authorships[[1]]
306+
307+
# Create a summary without additional API calls
308+
summary_df <- data.frame(
309+
position = 1:nrow(authorships),
310+
display_name = authorships$display_name,
311+
author_position_type = authorships$author_position,
312+
is_corresponding = authorships$is_corresponding,
313+
orcid = authorships$orcid,
314+
openalex_id = authorships$id,
315+
stringsAsFactors = FALSE
316+
)
317+
318+
# Add affiliations if available
319+
summary_df$primary_affiliation <- sapply(1:nrow(authorships), function(i) {
320+
if (!is.null(authorships$affiliations[[i]]) && nrow(authorships$affiliations[[i]]) > 0) {
321+
return(authorships$affiliations[[i]]$display_name[1])
322+
} else {
323+
return(NA)
324+
}
325+
})
326+
327+
return(summary_df)
328+
}
329+

0 commit comments

Comments
 (0)