gw_r_assignment_transition/03_compare_current_with_previous.R at main · jacobliedke/gw_r_assignment_transition · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
library(tidyverse)
library(janitor)
library(readxl)
library(writexl)

#to run new scrape(s) from the live Biden site currently:
source("01_scrape_agencyteams.R")
source("02_scrape_nominees.R")


#### WHITE HOUSE SENIOR STAFF ##### --------------------------------------------------------

#Assignment Part 1:

#Write your code here that will compare the current white house staff names on the site to the
#archived rds file storied in the archived_data folder to determine which names are new.

# https://buildbackbetter.gov/the-administration/white-house-senior-staff/

#You can use the filled-in version for the agency teams below to help model yours after if it's helpful,
#since the steps should be much the same.


#Your code here#
current_senior_staff <- readRDS("processed_data/staff_data_scraped.rds")
current_senior_staff
previous_senior_staff <- readRDS("archived_data/staff_data_archived_2020_11_24t14_00.rds")
previous_senior_staff

new_senior_staff_names <- anti_join(current_senior_staff, previous_senior_staff, by = "idstring")
new_senior_staff_names

#### AGENCY TEAMS ##### --------------------------------------------------------
current_transition <- readRDS("processed_data/transition_data_scraped.rds")
current_transition

previous_transition <- readRDS("archived_data/transition_data_archived_2020_11_24t09_52.rds")
previous_transition

new_transition_names <- anti_join(current_transition, previous_transition, by = "idstring")
new_transition_names

count_current_agencies <- current_transition %>%
  count(agency, name = "current_agency_count")
count_current_agencies

count_previous_agencies <- previous_transition %>%
  count(agency, name = "previous_agency_count")
count_previous_agencies

count_compare_agencies <- left_join(count_current_agencies, count_previous_agencies, by = "agency")
count_compare_agencies

count_difference_agencies <- count_compare_agencies %>%
  mutate(
    change = current_agency_count - previous_agency_count
  )

agencies_transition_team <- current_transition

saveRDS(new_transition_names, "processed_data/new_transition_names.rds")

saveRDS(count_compare_agencies, "processed_data/count_compare_agencies.rds")

saveRDS(agencies_transition_team, "processed_data/agencies_transition_team.rds")

### COMPARE agency team members with previous archived version ######

#load current data
transition_data_current <- readRDS("processed_data/transition_data_scraped.rds")
transition_data_current

# load archived data to compare against
transition_data_previous <- readRDS("archived_data/transition_data_archived_2020_11_24t09_52.rds")
# transition_data_previous <- readRDS("archived_data/transition_data_archived_2020_11_25t09_34.rds")
transition_data_previous

#find new records of names added since previous
newnames <- anti_join(transition_data_current, transition_data_previous, by = "idstring")

#see what we have
newnames


# Compare TOTALS by department #######
agencycount_current <- transition_data_current %>%
  count(agency, name = "current_count")

agencycount_current

agencycount_previous <- transition_data_previous %>%
  count(agency, name = "previous_count")

agencycount_previous

#join
agencycount_compare <- left_join(agencycount_current, agencycount_previous, by = "agency")
agencycount_compare

#add change columns
agencycount_compare <- agencycount_compare %>%
  mutate(
    change = current_count - previous_count
  )


#we'll create a NEW NAMED OBJECT to use from here on out for the full dataset
agencyteams <- transition_data_current


### SAVE results ####

#names of new agency review team members
saveRDS(newnames, "processed_data/newnames.rds")
#aggregate county of agency totals compared
saveRDS(agencycount_compare, "processed_data/agencycount_compare.rds")
#entire combined agency teams file
saveRDS(agencyteams, "processed_data/agencyteams.rds")