Skip to content

investigate differences between new AR format and old AR format #8

@cjyetman

Description

@cjyetman

Especially with the recent change with AR, we're probably going to eventually need to switch to their new format. We should investigate the differences and see if we can switch to using the new format.

library(tidyverse)
library(pacta.data.preparation)

ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")

ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)


# -------------------------------------------------------------------------

ownership_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Equity Ownership") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_ownership %>% 
  filter(company_id %in% ar_advanced_company_indicators$company_id) %>% 
  group_by(sector) %>% 
  summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(ownership_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2016 - new_2016) %>% 
  mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2016 old_n new_2016 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  6.01e12  1820  6.01e12  1820 -3.12e-2         0   
#> 2 Cement    5.01e 9  2070  4.67e 9  2035  3.46e+8         6.91
#> 3 Coal      1.09e10  2051  1.09e10  2061 -1.47e-2         0   
#> 4 HDV       0         575  0         575  0             NaN   
#> 5 LDV       1.39e 8   394  1.39e 8   435 -3.73e-1         0   
#> 6 Oil&Gas   7.11e11  4563  7.11e11  4563 -5.32e-2         0   
#> 7 Power     1.30e 7 36846  1.30e 7 36875 -3.26e-1         0   
#> 8 Shipping  2.56e14 11167  2.56e14 11169  2.95e+9         0   
#> 9 Steel     2.73e 9  1105  2.73e 9  1105 -5.76e-3         0


# -------------------------------------------------------------------------

fin_control_data <- 
  ar_advanced_company_indicators %>% 
  filter(consolidation_method == "Financial Control") %>% 
  filter(value_type == "production") %>%
  filter(
    asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
      asset_sector == "Cement" & activity_unit == "t cement" |
      asset_sector == "Coal" & activity_unit == "t coal" |
      asset_sector == "HDV" & activity_unit == "# vehicles" |
      asset_sector == "LDV" & activity_unit == "# vehicles" |
      asset_sector == "Oil&Gas" & activity_unit == "GJ" |
      asset_sector == "Power" & activity_unit == "MW" |
      asset_sector == "Shipping" & activity_unit == "dwt km" |
      asset_sector == "Steel" & activity_unit == "t steel"
  ) %>% 
  pivot_wider(names_from = "year", values_fill = 0) %>% 
  group_by(asset_sector) %>% 
  summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))

masterdata_debt %>% 
  filter(company_id %in% ar_advanced_company_indicators$company_id) %>% 
  group_by(sector) %>% 
  summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>% 
  full_join(fin_control_data, by = c(sector = "asset_sector")) %>% 
  mutate(
    new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
    new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
  ) %>% 
  mutate(diff = old_2016 - new_2016) %>% 
  mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))

#> # A tibble: 9 × 7
#>   sector   old_2016 old_n new_2016 new_n     diff percent_diff
#>   <chr>       <dbl> <int>    <dbl> <dbl>    <dbl>        <dbl>
#> 1 Aviation  3.24e12  1302  5.94e12  1609 -2.70e12        -83.3
#> 2 Cement    2.89e 9  1459  4.27e 9  1685 -1.39e 9        -48.0
#> 3 Coal      6.31e 9  1349  1.12e10  1629 -4.87e 9        -77.2
#> 4 HDV       0         283  0         426  0              NaN  
#> 5 LDV       9.36e 7   194  1.16e 8   300 -2.24e 7        -23.9
#> 6 Oil&Gas   3.06e11  2864  6.85e11  3864 -3.79e11       -124. 
#> 7 Power     6.12e 6 29582  1.28e 7 34012 -6.68e 6       -109. 
#> 8 Shipping  1.70e14  9844  2.53e14 10518 -8.30e13        -48.9
#> 9 Steel     1.66e 9   757  2.70e 9   924 -1.03e 9        -62.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions