-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathair.py
513 lines (335 loc) · 23.2 KB
/
air.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#Required Libraries
import pandas as pd
import streamlit as st
from streamlit_option_menu import option_menu
pd.set_option('display.max_columns', None)
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")
import plotly.graph_objects as go
#import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
# Streamlit part
st.set_page_config(layout= "wide")
st.title("AIRBNB DATA ANALYSIS")
st.write("")
def datafr():
df= pd.read_csv("Airbnb.csv")
return df
df= datafr()
with st.sidebar:
select= option_menu("Main Menu", ["Home", "Data Exploration", "About","Contact"])
if select == "Home":
col1,col2,col3 = st.columns(3)
with col2:
image1= Image.open("airbnb.png")
st.image(image1)
st.header("About Airbnb")
st.write("")
st.write('''***Airbnb is an online marketplace that connects people who want to rent out
their property with people who are looking for accommodations,
typically for short stays. Airbnb offers hosts a relatively easy way to
earn some income from their property.Guests often find that Airbnb rentals
are cheaper and homier than hotels.***''')
st.write("")
st.write('''***Airbnb Inc (Airbnb) operates an online platform for hospitality services.
The company provides a mobile application (app) that enables users to list,
discover, and book unique accommodations across the world.
The app allows hosts to list their properties for lease,
and enables guests to rent or lease on a short-term basis,
which includes vacation rentals, apartment rentals, homestays, castles,
tree houses and hotel rooms. The company has presence in China, India, Japan,
Australia, Canada, Austria, Germany, Switzerland, Belgium, Denmark, France, Italy,
Norway, Portugal, Russia, Spain, Sweden, the UK, and others.
Airbnb is headquartered in San Francisco, California, the US.***''')
st.header("Background of Airbnb")
st.write("")
st.write('''***Airbnb was born in 2007 when two Hosts welcomed three guests to their
San Francisco home, and has since grown to over 4 million Hosts who have
welcomed over 1.5 billion guest arrivals in almost every country across the globe.***''')
if select == "Data Exploration":
tab1, tab2, tab3, tab4, tab5= st.tabs(["***PRICE ANALYSIS***","***AVAILABILITY ANALYSIS***","***LOCATION BASED***", "***GEOSPATIAL VISUALIZATION***", "***TOP CHARTS***"])
with tab1:
st.title("**PRICE DIFFERENCE**")
col1,col2= st.columns(2)
with col1:
country= st.selectbox("Select the Country",df["country"].unique())
df1= df[df["country"] == country]
df1.reset_index(drop= True, inplace= True)
room_ty= st.selectbox("Select the Room Type",df1["room_type"].unique())
df2= df1[df1["room_type"] == room_ty]
df2.reset_index(drop= True, inplace= True)
df_bar= pd.DataFrame(df2.groupby("property_type")[["price","review_scores","number_of_reviews"]].sum())
df_bar.reset_index(inplace= True)
fig_bar= px.bar(df_bar, x='property_type', y= "price", title= "PRICE FOR PROPERTY_TYPES",hover_data=["number_of_reviews","review_scores"],color_discrete_sequence=px.colors.sequential.Redor_r, width=600, height=500)
st.plotly_chart(fig_bar)
with col2:
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
proper_ty= st.selectbox("Select the Property_type",df2["property_type"].unique())
df4= df2[df2["property_type"] == proper_ty]
df4.reset_index(drop= True, inplace= True)
df_pie= pd.DataFrame(df4.groupby("host_response_time")[["price","bedrooms"]].sum())
df_pie.reset_index(inplace= True)
fig_pi= px.pie(df_pie, values="price", names= "host_response_time",
hover_data=["bedrooms"],
color_discrete_sequence=px.colors.sequential.BuPu_r,
title="PRICE DIFFERENCE BASED ON HOST RESPONSE TIME",
width= 600, height= 500)
st.plotly_chart(fig_pi)
col1,col2= st.columns(2)
with col1:
hostresponsetime= st.selectbox("Select the host_response_time",df4["host_response_time"].unique())
df5= df4[df4["host_response_time"] == hostresponsetime]
df_do_bar= pd.DataFrame(df5.groupby("bed_type")[["minimum_nights","maximum_nights","price"]].sum())
df_do_bar.reset_index(inplace= True)
fig_do_bar = px.bar(df_do_bar, x='bed_type', y=['minimum_nights', 'maximum_nights'],
title='MINIMUM NIGHTS AND MAXIMUM NIGHTS',hover_data="price",
barmode='group',color_discrete_sequence=px.colors.sequential.Rainbow, width=600, height=500)
st.plotly_chart(fig_do_bar)
with col2:
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
df_do_bar_2= pd.DataFrame(df5.groupby("bed_type")[["bedrooms","beds","accommodates","price"]].sum())
df_do_bar_2.reset_index(inplace= True)
fig_do_bar_2 = px.bar(df_do_bar_2, x='bed_type', y=['bedrooms', 'beds', 'accommodates'],
title='BEDROOMS AND BEDS ACCOMMODATES',hover_data="price",
barmode='group',color_discrete_sequence=px.colors.sequential.Rainbow_r, width= 600, height= 500)
st.plotly_chart(fig_do_bar_2)
with tab2:
def datafr():
df_a= pd.read_csv("Airbnb.csv")
return df_a
df_a= datafr()
st.title("**AVAILABILITY ANALYSIS**")
col1,col2= st.columns(2)
with col1:
country_a= st.selectbox("Select the Country_a",df_a["country"].unique())
df1_a= df[df["country"] == country_a]
df1_a.reset_index(drop= True, inplace= True)
property_ty_a= st.selectbox("Select the Property Type",df1_a["property_type"].unique())
df2_a= df1_a[df1_a["property_type"] == property_ty_a]
df2_a.reset_index(drop= True, inplace= True)
df_a_sunb_30= px.sunburst(df2_a, path=["room_type","bed_type","is_location_exact"], values="availability_30",width=600,height=500,title="Availability_30",color_discrete_sequence=px.colors.sequential.Peach_r)
st.plotly_chart(df_a_sunb_30)
with col2:
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
st.write("")
df_a_sunb_60= px.sunburst(df2_a, path=["room_type","bed_type","is_location_exact"], values="availability_60",width=600,height=500,title="Availability_60",color_discrete_sequence=px.colors.sequential.Blues_r)
st.plotly_chart(df_a_sunb_60)
col1,col2= st.columns(2)
with col1:
df_a_sunb_90= px.sunburst(df2_a, path=["room_type","bed_type","is_location_exact"], values="availability_90",width=600,height=500,title="Availability_90",color_discrete_sequence=px.colors.sequential.Aggrnyl_r)
st.plotly_chart(df_a_sunb_90)
with col2:
df_a_sunb_365= px.sunburst(df2_a, path=["room_type","bed_type","is_location_exact"], values="availability_365",width=600,height=500,title="Availability_365",color_discrete_sequence=px.colors.sequential.Greens_r)
st.plotly_chart(df_a_sunb_365)
roomtype_a= st.selectbox("Select the Room Type_a", df2_a["room_type"].unique())
df3_a= df2_a[df2_a["room_type"] == roomtype_a]
df_mul_bar_a= pd.DataFrame(df3_a.groupby("host_response_time")[["availability_30","availability_60","availability_90","availability_365","price"]].sum())
df_mul_bar_a.reset_index(inplace= True)
fig_df_mul_bar_a = px.bar(df_mul_bar_a, x='host_response_time', y=['availability_30', 'availability_60', 'availability_90', "availability_365"],
title='AVAILABILITY BASED ON HOST RESPONSE TIME',hover_data="price",
barmode='group',color_discrete_sequence=px.colors.sequential.Rainbow_r,width=1000)
st.plotly_chart(fig_df_mul_bar_a)
with tab3:
st.title("LOCATION ANALYSIS")
st.write("")
def datafr():
df= pd.read_csv("Airbnb.csv")
return df
df_l= datafr()
country_l= st.selectbox("Select the Country_l",df_l["country"].unique())
df1_l= df_l[df_l["country"] == country_l]
df1_l.reset_index(drop= True, inplace= True)
proper_ty_l= st.selectbox("Select the Property_type_l",df1_l["property_type"].unique())
df2_l= df1_l[df1_l["property_type"] == proper_ty_l]
df2_l.reset_index(drop= True, inplace= True)
st.write("")
def select_the_df(sel_val):
if sel_val == str(df2_l['price'].min())+' '+str('to')+' '+str(differ_max_min*0.30 + df2_l['price'].min())+' '+str("(30% of the Value)"):
df_val_30= df2_l[df2_l["price"] <= differ_max_min*0.30 + df2_l['price'].min()]
df_val_30.reset_index(drop= True, inplace= True)
return df_val_30
elif sel_val == str(differ_max_min*0.30 + df2_l['price'].min())+' '+str('to')+' '+str(differ_max_min*0.60 + df2_l['price'].min())+' '+str("(30% to 60% of the Value)"):
df_val_60= df2_l[df2_l["price"] >= differ_max_min*0.30 + df2_l['price'].min()]
df_val_60_1= df_val_60[df_val_60["price"] <= differ_max_min*0.60 + df2_l['price'].min()]
df_val_60_1.reset_index(drop= True, inplace= True)
return df_val_60_1
elif sel_val == str(differ_max_min*0.60 + df2_l['price'].min())+' '+str('to')+' '+str(df2_l['price'].max())+' '+str("(60% to 100% of the Value)"):
df_val_100= df2_l[df2_l["price"] >= differ_max_min*0.60 + df2_l['price'].min()]
df_val_100.reset_index(drop= True, inplace= True)
return df_val_100
differ_max_min= df2_l['price'].max()-df2_l['price'].min()
val_sel= st.radio("Select the Price Range",[str(df2_l['price'].min())+' '+str('to')+' '+str(differ_max_min*0.30 + df2_l['price'].min())+' '+str("(30% of the Value)"),
str(differ_max_min*0.30 + df2_l['price'].min())+' '+str('to')+' '+str(differ_max_min*0.60 + df2_l['price'].min())+' '+str("(30% to 60% of the Value)"),
str(differ_max_min*0.60 + df2_l['price'].min())+' '+str('to')+' '+str(df2_l['price'].max())+' '+str("(60% to 100% of the Value)")])
df_val_sel= select_the_df(val_sel)
st.dataframe(df_val_sel)
# checking the correlation
df_val_sel_corr= df_val_sel.drop(columns=["listing_url","name", "property_type",
"room_type", "bed_type","cancellation_policy",
"images","host_url","host_name", "host_location",
"host_response_time", "host_thumbnail_url",
"host_response_rate","host_is_superhost","host_has_profile_pic" ,
"host_picture_url","host_neighbourhood",
"host_identity_verified","host_verifications",
"street", "suburb", "government_area", "market",
"country", "country_code","location_type","is_location_exact",
"amenities"]).corr()
st.dataframe(df_val_sel_corr)
df_val_sel_gr= pd.DataFrame(df_val_sel.groupby("accommodates")[["cleaning_fee","bedrooms","beds","extra_people"]].sum())
df_val_sel_gr.reset_index(inplace= True)
fig_1= px.bar(df_val_sel_gr, x="accommodates", y= ["cleaning_fee","bedrooms","beds"], title="ACCOMMODATES",
hover_data= "extra_people", barmode='group', color_discrete_sequence=px.colors.sequential.Rainbow_r,width=1000)
st.plotly_chart(fig_1)
room_ty_l= st.selectbox("Select the Room_Type_l", df_val_sel["room_type"].unique())
df_val_sel_rt= df_val_sel[df_val_sel["room_type"] == room_ty_l]
fig_2= px.bar(df_val_sel_rt, x= ["street","host_location","host_neighbourhood"],y="market", title="MARKET",
hover_data= ["name","host_name","market"], barmode='group',orientation='h', color_discrete_sequence=px.colors.sequential.Rainbow_r,width=1000)
st.plotly_chart(fig_2)
fig_3= px.bar(df_val_sel_rt, x="government_area", y= ["host_is_superhost","host_neighbourhood","cancellation_policy"], title="GOVERNMENT_AREA",
hover_data= ["guests_included","location_type"], barmode='group', color_discrete_sequence=px.colors.sequential.Rainbow_r,width=1000)
st.plotly_chart(fig_3)
with tab4:
st.title("GEOSPATIAL VISUALIZATION")
st.write("")
fig_4 = px.scatter_mapbox(df, lat='latitude', lon='longitude', color='price', size='accommodates',
color_continuous_scale= "rainbow",hover_name='name',range_color=(0,49000), mapbox_style="carto-positron",
zoom=1)
fig_4.update_layout(width=1150,height=800,title='Geospatial Distribution of Listings')
st.plotly_chart(fig_4)
with tab5:
country_t= st.selectbox("Select the Country_t",df["country"].unique())
df1_t= df[df["country"] == country_t]
property_ty_t= st.selectbox("Select the Property_type_t",df1_t["property_type"].unique())
df2_t= df1_t[df1_t["property_type"] == property_ty_t]
df2_t.reset_index(drop= True, inplace= True)
df2_t_sorted= df2_t.sort_values(by="price")
df2_t_sorted.reset_index(drop= True, inplace= True)
df_price= pd.DataFrame(df2_t_sorted.groupby("host_neighbourhood")["price"].agg(["sum","mean"]))
df_price.reset_index(inplace= True)
df_price.columns= ["host_neighbourhood", "Total_price", "Avarage_price"]
col1, col2= st.columns(2)
with col1:
fig_price= px.bar(df_price, x= "Total_price", y= "host_neighbourhood", orientation='h',
title= "PRICE BASED ON HOST_NEIGHBOURHOOD", width= 600, height= 800)
st.plotly_chart(fig_price)
with col2:
fig_price_2= px.bar(df_price, x= "Avarage_price", y= "host_neighbourhood", orientation='h',
title= "AVERAGE PRICE BASED ON HOST_NEIGHBOURHOOD",width= 600, height= 800)
st.plotly_chart(fig_price_2)
col1, col2= st.columns(2)
with col1:
df_price_1= pd.DataFrame(df2_t_sorted.groupby("host_location")["price"].agg(["sum","mean"]))
df_price_1.reset_index(inplace= True)
df_price_1.columns= ["host_location", "Total_price", "Avarage_price"]
fig_price_3= px.bar(df_price_1, x= "Total_price", y= "host_location", orientation='h',
width= 600,height= 800,color_discrete_sequence=px.colors.sequential.Bluered_r,
title= "PRICE BASED ON HOST_LOCATION")
st.plotly_chart(fig_price_3)
with col2:
fig_price_4= px.bar(df_price_1, x= "Avarage_price", y= "host_location", orientation='h',
width= 600, height= 800,color_discrete_sequence=px.colors.sequential.Bluered_r,
title= "AVERAGE PRICE BASED ON HOST_LOCATION")
st.plotly_chart(fig_price_4)
room_type_t= st.selectbox("Select the Room_Type_t",df2_t_sorted["room_type"].unique())
df3_t= df2_t_sorted[df2_t_sorted["room_type"] == room_type_t]
df3_t_sorted_price= df3_t.sort_values(by= "price")
df3_t_sorted_price.reset_index(drop= True, inplace = True)
df3_top_50_price= df3_t_sorted_price.head(100)
fig_top_50_price_1= px.bar(df3_top_50_price, x= "name", y= "price" ,color= "price",
color_continuous_scale= "rainbow",
range_color=(0,df3_top_50_price["price"].max()),
title= "MINIMUM_NIGHTS MAXIMUM_NIGHTS AND ACCOMMODATES",
width=1200, height= 800,
hover_data= ["minimum_nights","maximum_nights","accommodates"])
st.plotly_chart(fig_top_50_price_1)
fig_top_50_price_2= px.bar(df3_top_50_price, x= "name", y= "price",color= "price",
color_continuous_scale= "greens",
title= "BEDROOMS, BEDS, ACCOMMODATES AND BED_TYPE",
range_color=(0,df3_top_50_price["price"].max()),
width=1200, height= 800,
hover_data= ["accommodates","bedrooms","beds","bed_type"])
st.plotly_chart(fig_top_50_price_2)
if select == "About":
st.header("ABOUT THIS PROJECT")
st.subheader(":orange[1. Data Collection:]")
st.write('''***Gather data from MongoDB Atlas database .
Collect information on listings, hosts, reviews, pricing, and location data.***''')
st.link_button("Data","https://www.mongodb.com/cloud/atlas/efficiency?utm_content=rlsavisitor&utm_source=google&utm_campaign=search_gs_pl_evergreen_atlas_core_retarget-brand_gic-null_apac-all_ps-all_desktop_eng_lead&utm_term=mongodb%20atlas&utm_medium=cpc_paid_search&utm_ad=e&utm_ad_campaign_id=14412646476&adgroup=131761130772&cq_cmp=14412646476&gad=1&gclid=EAIaIQobChMIp8zhuOSEgAMVTw2DAx0aewI4EAAYASABEgIv__D_BwE")
st.subheader(":orange[2. Data Cleaning and Preprocessing:]")
st.write('''***Clean and preprocess the data to handle missing values, outliers, and ensure data quality.
Convert data types, handle duplicates, and standardize formats.***''')
st.subheader(":orange[3. Exploratory Data Analysis (EDA):]")
st.write('''***Conduct exploratory data analysis to understand the distribution and patterns in the data.
Explore relationships between variables and identify potential insights.***''')
st.subheader(":orange[4. Visualization:]")
st.write('''***Create visualizations to represent key metrics and trends.
Use charts, graphs, and maps to convey information effectively.
Consider using tools like Matplotlib, Seaborn, or Plotly for visualizations.***''')
st.subheader(":orange[5. Geospatial Analysis:]")
st.write('''***Utilize geospatial analysis to understand the geographical distribution of listings.
Map out popular areas, analyze neighborhood characteristics, and visualize pricing variations.***''')
st.header("Features Analysis")
st.markdown(":green[Property Insights:] Analyze the total number of properties based on property type, room type, and bed type.")
st.markdown(":green[Stay Duration Analysis:] Investigate the minimum and maximum nights guests typically stay.")
st.markdown(":green[Cancellation Policy Impact:] Understand the impact of cancellation policies on booking trends.")
st.markdown(":green[Accommodation Metrics:] Explore accommodates, bedrooms, and beds-related statistics.")
st.markdown(":green[Review Analysis:] Examine total reviews, average review scores, and the distribution of reviews.")
st.markdown(":green[Bathroom and Pricing Analysis:] Investigate bathroom count, pricing, cleaning prices, and extra guest charges.")
st.markdown(":green[Guest Inclusion Trends:] Analyze the number of guests included in bookings.")
st.markdown(":green[Host Insights:] Explore host-related metrics, including host response time, response rate, and the number of properties hosted.")
st.markdown(":green[Geographic Analysis:] Investigate the market and country-level distribution of Airbnb listings.")
st.markdown(":green[Availability Trends:] Visualize property availability for the next 30, 60, 90, and 360 days.")
if select == "Contact":
name = "kaleeswari S"
description = "An Aspiring DATA-SCIENTIST..!"
social_media = {
"Youtube": "http://www.youtube.com/@RKaleeswari-08",
"GITHUB": "https://github.com/Kaleeswari-S",
"LINKEDIN": "https://www.linkedin.com/in/kaleeswari-s-081a392a6/",
"KAGGLE": "https://www.kaggle.com/rkaleeswari"}
tab1,tab2 =st.tabs(['Key Insights','My Profile'])
with tab1:
st.header("KEY INSIGHTS")
st.subheader(":rainbow[What I learn from this Project] >>>")
st.markdown("⭐The price increases as the extra people increases")
st.markdown("⭐The price increases as the no of AMENITIES increases")
st.markdown("⭐The price increases as the no of BED increases")
st.markdown("⭐In property type APARTMENT has more price")
st.markdown("⭐In room type ENTIRE ROOM has more price")
st.markdown("⭐MARCH month has more price comparitively to other months")
st.markdown("⭐The MINIMUM NIGHT is 1 and the MAXIMUM NIGHT is 1125")
st.markdown("⭐As the Ratings increased Price also increases")
with tab2:
col1, col2 = st.columns(2)
with col1:
st.image('airbnb_video.gif')
with col2:
st.title('Airbnb Analysis')
st.write("The goal of this project is to analyze Airbnb data using MongoDB Atlas, perform data cleaning and preparation, develop interactive geospatial visualizations, and create dynamic plots to gain insights into pricing variations, availability patterns, and location-based trends.")
st.link_button("Tableau Report","https://public.tableau.com/app/profile/kaleeswari.s/vizzes")
st.write("---")
st.subheader(mail)
st.write("#")
cols = st.columns(len(social_media))
for index, (platform, link) in enumerate(social_media.items()):
cols[index].write(f"[{platform}]({link})")
st.success('🙏Thank you for your golden time. Exiting the application')