BOE/R/queries.R at 94fb20060862ead5c2c6cac095578cc900046e9c · rOpenSpain/BOE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
BASE_URL <- "https://boe.es/"
JOURNAL_URL <- c(BORME = "diario_borme", BOE = "diario_boe")


#' Build a query for an XML
#'
#' @param id The id of the xml document you want.
#' @return A query for the xml.
#' @seealso [sumario_nbo()]
#' @export
#' @examples
#' id <- sumario_nbo(format(as.Date("2017/10/02", "%Y/%m/%d"), "%Y%m%d"))
#' cve <- sumario_cve("2017", "237") # Same document but by the CVE
#' query_xml(id)
#' query_xml(cve)
#' @importFrom httr modify_url
query_xml <- function(id) {
    check_code(id)
    force(BASE_URL)
    force(JOURNAL_URL)

    journal <- strsplit(id, split = "-", fixed = TRUE)[[1]][1]
    journal <- match.arg(journal, c("BOE", "BORME"))
    httr::modify_url(BASE_URL,
                     path = paste0(JOURNAL_URL[journal], "/xml.php"),
                     query = paste0("id=", id))
}


query_xml_sumario <- function(id, journal = "BOE") {
    force(BASE_URL)
    force(JOURNAL_URL)

    # On 2024-10-08
    # https://boe.es/datosabiertos/api/boe/sumario/20250125
    journal <- match.arg(journal, c("BOE", "BORME"))
    path <- paste0("datosabiertos/api/", tolower(journal), "/sumario/", id)
    httr::modify_url(BASE_URL, path = path)
}

#' Build a query for the webpage
#'
#' @param cve The CVE of the document you want.
#' @return  A query url.
#' @seealso query_consolidada
#' @export
#' @examples
#' cve <- sumario_cve("2017", "117")
#' query_htm(cve)
query_htm <- function(cve) {
    check_code(cve)
    force(BASE_URL)
    force(JOURNAL_URL)
    journal <- strsplit(cve, "-", fixed = TRUE)[[1]][1]
    httr::modify_url(BASE_URL,
                     path = paste0(JOURNAL_URL[journal], "/text.php"),
                     query = paste0("id=", cve))
}

#' Build a query for the webpage
#'
#' Look for the consolidated law online
#' @param cve The CVE of the document you want.
#' @return  A query url.
#' @export
#' @examples
#' cve <- disposicion_cve("2017", "117")
#' query_consolidada(cve)
query_consolidada <- function(cve) {
    check_code(cve)
    force(BASE_URL)
    ids <- strsplit(cve, "-", fixed = TRUE)[[1]]
    if (ids[2] != "A") {
        stop("Documents with BOE-A-*", call. = FALSE)
    }

    if (ids[1] != "BOE") {
        stop("This is only for laws on the BOE.", call. = FALSE)
    }
    httr::modify_url(BASE_URL,
                     path = "buscar/act.php",
                     query = paste0("id=", cve))
}

#' Query a pdf from the BOE
#'
#' To query a pdf you must know the number of the piece you are looking for and
#' the date it was published.
#' @param year Character of the number of the year: YYYY
#' @param month Character of the number of the month: MM.
#' @param day Character of the number of the day: DD.
#' @param code Code of the publication to query.
#' @return A link to the pdf.
#' @export
#' @examples
#' cve <- sumario_cve("2017", "237")
#' query_pdf("2017", "10", "02", cve)
query_pdf <- function(year, month, day, code) {
    vapply(code, check_code, logical(1))
    force(BASE_URL)
    journal <- tolower(gsub("-.*", "", code))
    p <- paste(journal, "dias", year, month, day, "pdfs", paste0(code, ".pdf"), sep = "/")
    paste0(BASE_URL, path = p)
}

#' Retrieve the XML content
#'
#' Look up on the internet and get the content
#' @param query A query to BOE.
#' @seealso [query_xml()]
#' @importFrom httr content
#' @importFrom httr GET
#' @importFrom httr user_agent
#' @importFrom httr status_code
#' @importFrom httr http_type
#' @importFrom httr stop_for_status
#' @export
#' @examples
#' id <- sumario_nbo(format(as.Date("2017/10/02", "%Y/%m/%d"), "%Y%m%d"))
#' url <- query_xml(id)
#' \donttest{get_xml(url)}
get_xml <- function(query) {
    user_agent <- user_agent("https://github.com/llrs/BOE")
    response <- GET(query, httr::accept_xml(), user_agent)
    httr::stop_for_status(response)
    if (status_code(response) != 200) {
        stop("Could not retrieve the data.", call. = FALSE)
    }
    if (http_type(response) == "text/html") {
        warning("Missing data.", call. = FALSE)
    }
    if (http_type(response) != "application/xml") {
        stop("API did not find the requested document.", call. = FALSE)
    }
    content(response, encoding = "UTF-8")
}