forked from brittneyho/gw_r_assignment_transition
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path03_compare_current_with_previous.R
More file actions
120 lines (80 loc) · 3.75 KB
/
03_compare_current_with_previous.R
File metadata and controls
120 lines (80 loc) · 3.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
library(tidyverse)
library(janitor)
library(readxl)
library(writexl)
#to run new scrape(s) from the live Biden site currently:
source("01_scrape_agencyteams.R")
source("02_scrape_nominees.R")
#### WHITE HOUSE SENIOR STAFF ##### --------------------------------------------------------
#Assignment Part 1:
#Write your code here that will compare the current white house staff names on the site to the
#archived rds file storied in the archived_data folder to determine which names are new.
# https://buildbackbetter.gov/the-administration/white-house-senior-staff/
#You can use the filled-in version for the agency teams below to help model yours after if it's helpful,
#since the steps should be much the same.
#Your code here#
current_senior_staff <- readRDS("processed_data/staff_data_scraped.rds")
current_senior_staff
previous_senior_staff <- readRDS("archived_data/staff_data_archived_2020_11_24t14_00.rds")
previous_senior_staff
new_senior_staff_names <- anti_join(current_senior_staff, previous_senior_staff, by = "idstring")
new_senior_staff_names
#### AGENCY TEAMS ##### --------------------------------------------------------
current_transition <- readRDS("processed_data/transition_data_scraped.rds")
current_transition
previous_transition <- readRDS("archived_data/transition_data_archived_2020_11_24t09_52.rds")
previous_transition
new_transition_names <- anti_join(current_transition, previous_transition, by = "idstring")
new_transition_names
count_current_agencies <- current_transition %>%
count(agency, name = "current_agency_count")
count_current_agencies
count_previous_agencies <- previous_transition %>%
count(agency, name = "previous_agency_count")
count_previous_agencies
count_compare_agencies <- left_join(count_current_agencies, count_previous_agencies, by = "agency")
count_compare_agencies
count_difference_agencies <- count_compare_agencies %>%
mutate(
change = current_agency_count - previous_agency_count
)
agencies_transition_team <- current_transition
saveRDS(new_transition_names, "processed_data/new_transition_names.rds")
saveRDS(count_compare_agencies, "processed_data/count_compare_agencies.rds")
saveRDS(agencies_transition_team, "processed_data/agencies_transition_team.rds")
### COMPARE agency team members with previous archived version ######
#load current data
transition_data_current <- readRDS("processed_data/transition_data_scraped.rds")
transition_data_current
# load archived data to compare against
transition_data_previous <- readRDS("archived_data/transition_data_archived_2020_11_24t09_52.rds")
# transition_data_previous <- readRDS("archived_data/transition_data_archived_2020_11_25t09_34.rds")
transition_data_previous
#find new records of names added since previous
newnames <- anti_join(transition_data_current, transition_data_previous, by = "idstring")
#see what we have
newnames
# Compare TOTALS by department #######
agencycount_current <- transition_data_current %>%
count(agency, name = "current_count")
agencycount_current
agencycount_previous <- transition_data_previous %>%
count(agency, name = "previous_count")
agencycount_previous
#join
agencycount_compare <- left_join(agencycount_current, agencycount_previous, by = "agency")
agencycount_compare
#add change columns
agencycount_compare <- agencycount_compare %>%
mutate(
change = current_count - previous_count
)
#we'll create a NEW NAMED OBJECT to use from here on out for the full dataset
agencyteams <- transition_data_current
### SAVE results ####
#names of new agency review team members
saveRDS(newnames, "processed_data/newnames.rds")
#aggregate county of agency totals compared
saveRDS(agencycount_compare, "processed_data/agencycount_compare.rds")
#entire combined agency teams file
saveRDS(agencyteams, "processed_data/agencyteams.rds")