-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGSP787-Insights From Data With Bigquery Challenge Lab.txt
216 lines (181 loc) · 6.17 KB
/
GSP787-Insights From Data With Bigquery Challenge Lab.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
Insights From Data With Bigquery Challenge Lab
For any assist & query Connect me :
Linkedin : https://www.linkedin.com/in/sahoo-ashutosh/
Discord : AshuKulu#6975
In the Cloud Console, navigate to BigQuery
Click +ADD DATA > Explore public datasets > Search 'covid19_open_data' and then select 'COVID-19 Open Data'
Locate the table 'covid19_open_data' under the 'covid19_open_data' dataset
click +Compose new query and run following queries
##Task - 1 : Total Confirmed Cases
SELECT sum(cumulative_confirmed) as total_cases_worldwide
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE date='2020-04-15'
#-#-#-#-#
##Task - 2 : Worst Affected Areas
with deaths_by_states as (
SELECT subregion1_name as state, sum(cumulative_deceased) as death_count
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL
group by subregion1_name
)
select count(*) as count_of_states
from deaths_by_states
where death_count > 100
#-#-#-#-#
##Task - 3 : Identifying Hotspots
SELECT * FROM (
SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL
GROUP BY subregion1_name
ORDER BY total_confirmed_cases DESC
)
WHERE total_confirmed_cases > 1000
#-#-#-#-#
##Task - 4 : Fatality Ratio
SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'
#-#-#-#-#
##Task - 5 : Identifying specific day
SELECT date
FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
where country_name="Italy" and cumulative_deceased>10000
order by date asc
limit 1
#-#-#-#-#
##Task - 6 : Finding days with zero net new cases
WITH india_cases_by_date AS (
SELECT
date,
SUM( cumulative_confirmed ) AS cases
FROM
`bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE
country_name ="India"
AND date between '2020-02-21' and '2020-03-15'
GROUP BY
date
ORDER BY
date ASC
)
, india_previous_day_comparison AS
(SELECT
date,
cases,
LAG(cases) OVER(ORDER BY date) AS previous_day,
cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases
FROM india_cases_by_date
)
select count(*)
from india_previous_day_comparison
where net_new_cases=0
#-#-#-#-#
##Task - 7 : Doubling rate
WITH us_cases_by_date AS (
SELECT
date,
SUM(cumulative_confirmed) AS cases
FROM
`bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE
country_name="United States of America"
AND date between '2020-03-22' and '2020-04-20'
GROUP BY
date
ORDER BY
date ASC
)
, us_previous_day_comparison AS
(SELECT
date,
cases,
LAG(cases) OVER(ORDER BY date) AS previous_day,
cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,
(cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increase
FROM us_cases_by_date
)
select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Cases
from us_previous_day_comparison
where percentage_increase > 10
#-#-#-#-#
##Task - 8 : Recovery rate
WITH cases_by_country AS (
SELECT
country_name AS country,
sum(cumulative_confirmed) AS cases,
sum(cumulative_recovered) AS recovered_cases
FROM
bigquery-public-data.covid19_open_data.covid19_open_data
WHERE
date = '2020-05-10'
GROUP BY
country_name
)
, recovered_rate AS
(SELECT
country, cases, recovered_cases,
(recovered_cases * 100)/cases AS recovery_rate
FROM cases_by_country
)
SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate
FROM recovered_rate
WHERE cases > 50000
ORDER BY recovery_rate desc
LIMIT 10
#-#-#-#-#
##Task - 9 : CDGR - Cumulative Daily Growth Rate
WITH
france_cases AS (
SELECT
date,
SUM(cumulative_confirmed) AS total_cases
FROM
`bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE
country_name="France"
AND date IN ('2020-01-24',
'2020-05-10')
GROUP BY
date
ORDER BY
date)
, summary as (
SELECT
total_cases AS first_day_cases,
LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases,
DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff
FROM
france_cases
LIMIT 1
)
select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr
from summary
#-#-#-#-#
##Task - 10 : Create a Datastudio report
SELECT
date, SUM(cumulative_confirmed) AS country_cases,
SUM(cumulative_deceased) AS country_deaths
FROM
`bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE
date BETWEEN '2020-03-15'
AND '2020-04-30'
AND country_name ="United States of America"
GROUP BY date
Click Open 'Google Data Studio' from Qwiklabs page
Hint: Do not use Explore with Data Studio option from BigQuery.
You may fail to create a report for the first-time, login again to Data Studio. Click + Blank Report and accept the Terms of Service.
Select a country and accept to continue
Choose Bigquery and Authorize Data Studio to access BigQuery.
In Public Datasets section choose your 'Qwiklab project ID' > select 'covid19_open_data' in public datasets > again select 'covid19_open_data' in table
Or
In Custom Query section choose your 'Qwiklab project ID' > run above query
Click Add to report
In the new Data Studio report, select Add a chart > Time series Chart.
Add 'country_cases' and 'country_deaths' to the Metric field.
Scroll down to filter field > Add a filter > include > select a field 'date' > select a condition 'Equal to (=)' > choose date 2020-03-15
Click 'AND' > include > select a field 'date' > select a condition 'Equal to (=)' > choose date 2020-04-30
Click Save to commit the change.
Wait for few minutes if your progress is not checked
Congratulations! Done with the challenge lab.