|
| 1 | +# curl https://api.openai.com/v1/chat/completions \ |
| 2 | +# -H "Content-Type: application/json" \ |
| 3 | +# -H "Authorization: Bearer $OPENAI_API_KEY" \ |
| 4 | +# -d '{ |
| 5 | +# "model": "gpt-4o", |
| 6 | +# "messages": [], |
| 7 | +# "temperature": 0, |
| 8 | +# "max_tokens": 1861, |
| 9 | +# "top_p": 1, |
| 10 | +# "frequency_penalty": 0, |
| 11 | +# "presence_penalty": 0 |
| 12 | +# }' |
| 13 | + |
| 14 | +tjsp::baixar_cjpg( |
| 15 | + assunto = 3608, |
| 16 | + diretorio = "live_llm", |
| 17 | + paginas = 1:20 |
| 18 | +) |
| 19 | + |
| 20 | +dados_processos <- fs::dir_ls("live_llm") |> |
| 21 | + tjsp::tjsp_ler_cjpg() |
| 22 | + |
| 23 | +View(dados_processos) |
| 24 | + |
| 25 | + |
| 26 | + |
| 27 | +analise_gpt <- function(txt) { |
| 28 | + |
| 29 | + u_openai <- "https://api.openai.com/v1/chat/completions" |
| 30 | + |
| 31 | + messages <- list( |
| 32 | + list( |
| 33 | + role = "system", |
| 34 | + content = readr::read_file("prompt_drogas.md") |
| 35 | + ), |
| 36 | + list( |
| 37 | + role = "user", |
| 38 | + content = txt |
| 39 | + ) |
| 40 | + ) |
| 41 | + |
| 42 | + body <- list( |
| 43 | + model = "gpt-4o", |
| 44 | + messages = messages, |
| 45 | + temperature = 0, |
| 46 | + response_format = list("type" = "json_object") |
| 47 | + ) |
| 48 | + |
| 49 | + # usethis::edit_r_environ() |
| 50 | + api_key <- Sys.getenv("OPENAI_API_KEY") |
| 51 | + |
| 52 | + headers <- httr::add_headers( |
| 53 | + "Authorization" = paste("Bearer", api_key) |
| 54 | + ) |
| 55 | + |
| 56 | + res <- httr::POST( |
| 57 | + u_openai, |
| 58 | + body = body, |
| 59 | + headers, |
| 60 | + encode = "json" |
| 61 | + ) |
| 62 | + |
| 63 | + res |> |
| 64 | + httr::content() |> |
| 65 | + purrr::pluck("choices", 1, "message", "content") |> |
| 66 | + jsonlite::fromJSON(simplifyDataFrame = TRUE) |> |
| 67 | + tibble::as_tibble() |
| 68 | + |
| 69 | +} |
| 70 | + |
| 71 | +dim(dados_processos) |
| 72 | + |
| 73 | +set.seed(42) |
| 74 | + |
| 75 | +dados_processos_amostra <- dados_processos |> |
| 76 | + dplyr::distinct(processo, .keep_all = TRUE) |> |
| 77 | + dplyr::slice_sample(n = 30) |
| 78 | + |
| 79 | +safe_analise_gpt <- purrr::possibly( |
| 80 | + analise_gpt, tibble::tibble(erro = "erro") |
| 81 | +) |
| 82 | + |
| 83 | +resultado <- purrr::map( |
| 84 | + dados_processos_amostra$julgado |> |
| 85 | + purrr::set_names(dados_processos_amostra$processo), |
| 86 | + safe_analise_gpt, |
| 87 | + .progress = TRUE |
| 88 | +) |
| 89 | + |
| 90 | +base_final <- resultado |> |
| 91 | + purrr::map(\(x) { |
| 92 | + if (!is.null(x$outras_drogas)) { |
| 93 | + x |> |
| 94 | + tidyr::unnest(outras_drogas) |> |
| 95 | + dplyr::mutate(dplyr::across(dplyr::everything(), as.character)) |
| 96 | + } |
| 97 | + }) |> |
| 98 | + purrr::list_rbind(names_to = "processo") |
| 99 | + |
| 100 | +View(base_final) |
| 101 | + |
| 102 | +base_tidy <- base_final |> |
| 103 | + dplyr::select(processo, dplyr::ends_with("em_g"), decisao, pena) |> |
| 104 | + tidyr::pivot_longer(dplyr::ends_with("em_g")) |> |
| 105 | + dplyr::filter( |
| 106 | + !value %in% "não especificado" |
| 107 | + ) |> |
| 108 | + dplyr::mutate( |
| 109 | + tipo_droga = stringr::str_extract(name, "[a-z]+"), |
| 110 | + value = readr::parse_number(value), |
| 111 | + pena = readr::parse_number(pena), |
| 112 | + ) |
| 113 | + |
| 114 | +base_tidy |> |
| 115 | + dplyr::filter(value < 4000) |> |
| 116 | + ggplot2::ggplot(ggplot2::aes(x = value, y = pena)) + |
| 117 | + ggplot2::geom_point() |
0 commit comments