1515# ' @param ... further arguments supplied to \code{mclapply}.
1616# '
1717# ' @return
18- # ' The function returns a tibble data frame with the following variables:
18+ # ' The function returns a \code{ tibble} data frame with the following variables:
1919# ' \describe{
20+ # ' \item{record_id}{The id of the record.}
2021# ' \item{speech_no}{The speech number in the record.}
2122# ' \item{speech_id}{The id of the XML node to the introduction of the speaker.}
2223# ' \item{who}{The id of the person giving the speech.}
@@ -43,8 +44,10 @@ extract_speeches_from_record <- function(record_path){
4344 x <- xml_ns_strip(x )
4445
4546 # Extract speeches
47+ id <- xml_attr(xml_find_all(x , " TEI" ),attr = " id" )
4648 xs <- xml_find_all(x , " .//note[@type = 'speaker']|.//u|.//seg" )
47- df <- tibble(" type_speaker" = xml_attr(xs , attr = " type" ) == " speaker" ,
49+ df <- tibble(" record_id" = id ,
50+ " type_speaker" = xml_attr(xs , attr = " type" ) == " speaker" ,
4851 " name" = xml_name(xs ),
4952 " who" = xml_attr(xs , attr = " who" ),
5053 " id" = xml_attr(xs , attr = " id" ),
@@ -57,7 +60,7 @@ extract_speeches_from_record <- function(record_path){
5760 df <- df [df $ name == " seg" ,]
5861 df $ type_speaker <- NULL
5962 df $ name <- NULL
60- df [, c(" speech_no" , " speech_id" , " who" , " id" , " text" )]
63+ df [, c(" record_id " , " speech_no" , " speech_id" , " who" , " id" , " text" )]
6164}
6265
6366# ' @rdname extract_speeches_from_record
@@ -80,10 +83,7 @@ extract_speeches_from_records <- function(record_paths, mc.cores = getOption("mc
8083 res <- lapply(record_paths , extract_speeches_from_record )
8184 }
8285
83- for (i in seq_along(res )){
84- res [[i ]]$ record <- basename(record_paths [i ])
85- }
8686 res <- bind_rows(res )
87- res [, c(" record " , " speech_no" , " speech_id" , " who" , " id" , " text" )]
87+ res [, c(" record_id " , " speech_no" , " speech_id" , " who" , " id" , " text" )]
8888}
8989
0 commit comments