Skip to content

Commit 87a7459

Browse files
author
WandrilleD
committed
tidying up the notebooks and the solutions
1 parent 8e75e01 commit 87a7459

8 files changed

Lines changed: 5974 additions & 255 deletions

01_data_manipulation_and_representation.ipynb

Lines changed: 77 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
"source": [
121121
"import pandas as pd\n",
122122
"\n",
123-
"df = pd.read_table(\"data/swiss_census_1880.csv\" , sep=',') \n",
123+
"df = pd.read_table(\"data/swiss_census_1880.csv\" , sep=',') \n",
124124
"#try to see what happens when sep has a different value\n",
125125
"\n",
126126
"df.head() # this returns the 5 first lines of the table"
@@ -198,6 +198,15 @@
198198
"df['Foreigner']"
199199
]
200200
},
201+
{
202+
"cell_type": "code",
203+
"execution_count": null,
204+
"metadata": {},
205+
"outputs": [],
206+
"source": [
207+
"df.Foreigner"
208+
]
209+
},
201210
{
202211
"cell_type": "markdown",
203212
"metadata": {},
@@ -257,6 +266,15 @@
257266
"maskVD.value_counts()"
258267
]
259268
},
269+
{
270+
"cell_type": "code",
271+
"execution_count": null,
272+
"metadata": {},
273+
"outputs": [],
274+
"source": [
275+
"maskVD"
276+
]
277+
},
260278
{
261279
"cell_type": "markdown",
262280
"metadata": {},
@@ -300,15 +318,27 @@
300318
"cell_type": "markdown",
301319
"metadata": {},
302320
"source": [
303-
"**micro-exercise :** Select towns with less than 1000 inhabitants (column `Total`), (*optional*: display only town name and number of inhabitants)"
321+
"**micro-exercise :** Select towns with less than 1000 inhabitants (column `Total`), (*optional*: display only town name and number of inhabitants)\n",
322+
"\n",
323+
"<details style=\"border: 2px solid #B8C3EA; margin: 1em 0.2em;padding: 0.5em; cursor: pointer;\"><summary>👁 View solution </summary>\n",
324+
"\n",
325+
"```python\n",
326+
"df.loc[ df.Total < 1000 , : ]\n",
327+
"```\n",
328+
" </div>\n",
329+
" \n",
330+
"</details>\n",
331+
"\n"
304332
]
305333
},
306334
{
307335
"cell_type": "code",
308336
"execution_count": null,
309337
"metadata": {},
310338
"outputs": [],
311-
"source": []
339+
"source": [
340+
"## write your solution to the micro-exercise here...\n"
341+
]
312342
},
313343
{
314344
"cell_type": "markdown",
@@ -400,7 +430,17 @@
400430
"outputs": [],
401431
"source": [
402432
"# %load -r 9- solutions/solution_01_01.py\n",
403-
"#2. Create a new column is the `DataFrame` representing the fraction of population which is Reformed in each town."
433+
"# 2. Create a new column is the DataFrame representing the fraction of population \n",
434+
"# which is Reformed in each town. "
435+
]
436+
},
437+
{
438+
"cell_type": "code",
439+
"execution_count": null,
440+
"metadata": {},
441+
"outputs": [],
442+
"source": [
443+
"# optional : What is the minimum/maximum value for this fraction?"
404444
]
405445
},
406446
{
@@ -632,6 +672,7 @@
632672
"plotWithMeanMedianMode( dfFractions['0-14 y.o.'] , ax=axes[0])\n",
633673
"plotWithMeanMedianMode( dfFractions['Foreigner'] , ax=axes[1])\n",
634674
"plotWithMeanMedianMode( dfFractions['Reformed'] , ax=axes[2])\n",
675+
"f.tight_layout()\n",
635676
"plt.show()"
636677
]
637678
},
@@ -651,20 +692,13 @@
651692
"3. plot the distribution of the fraction of catholics in the canton of Zurich."
652693
]
653694
},
654-
{
655-
"cell_type": "code",
656-
"execution_count": null,
657-
"metadata": {},
658-
"outputs": [],
659-
"source": []
660-
},
661695
{
662696
"cell_type": "code",
663697
"execution_count": null,
664698
"metadata": {},
665699
"outputs": [],
666700
"source": [
667-
"# %load -r 1-7 solutions/solution_01_02.py\n",
701+
"# %load -r 1-11 solutions/solution_01_02.py\n",
668702
"# 1. plot the distribution of the total number of habitants. Try to choose an appropriate mode of representation (histogram, density line? number of bins?)"
669703
]
670704
},
@@ -674,7 +708,7 @@
674708
"metadata": {},
675709
"outputs": [],
676710
"source": [
677-
"# %load -r 10-14 solutions/solution_01_02.py\n",
711+
"# %load -r 12-17 solutions/solution_01_02.py\n",
678712
"# 2. try to call `sns.histplot` twice in a row, once with to plot the fraction of Foreigner and the other for the fraction of Swiss. What happens?"
679713
]
680714
},
@@ -684,7 +718,7 @@
684718
"metadata": {},
685719
"outputs": [],
686720
"source": [
687-
"# %load -r 15- solutions/solution_01_02.py\n",
721+
"# %load -r 18- solutions/solution_01_02.py\n",
688722
"# 3. plot the distribution of the fraction of catholics in the canton of Zurich."
689723
]
690724
},
@@ -720,6 +754,13 @@
720754
"We will also create a column that describes the main religion and main languague for each town:"
721755
]
722756
},
757+
{
758+
"cell_type": "code",
759+
"execution_count": null,
760+
"metadata": {},
761+
"outputs": [],
762+
"source": []
763+
},
723764
{
724765
"cell_type": "code",
725766
"execution_count": null,
@@ -778,6 +819,15 @@
778819
"dfFractions.groupby('majority language')['Catholic'].mean() ## mean fraction of caholics in towns depending on the majority language"
779820
]
780821
},
822+
{
823+
"cell_type": "code",
824+
"execution_count": null,
825+
"metadata": {},
826+
"outputs": [],
827+
"source": [
828+
"dfFractions['Catholic'].mean()"
829+
]
830+
},
781831
{
782832
"cell_type": "code",
783833
"execution_count": null,
@@ -789,7 +839,7 @@
789839
" indexsmallestTown = data.Total.idxmin()\n",
790840
" return data['town name'][indexsmallestTown] , data.Total[indexsmallestTown]\n",
791841
"\n",
792-
"grouped.apply(getSmallestTown) ## name and population of the town with minimal number of inhabitants for each canton"
842+
"grouped.apply(getSmallestTown, include_groups=False) ## name and population of the town with minimal number of inhabitants for each canton"
793843
]
794844
},
795845
{
@@ -805,7 +855,7 @@
805855
"metadata": {},
806856
"outputs": [],
807857
"source": [
808-
"sns.catplot( x = 'majority language' , y='Catholic' , data=dfFractions)"
858+
"sns.catplot( y = 'majority language' , x='Catholic' , data=dfFractions)"
809859
]
810860
},
811861
{
@@ -911,7 +961,8 @@
911961
"outputs": [],
912962
"source": [
913963
"sns.catplot( x = 'German speakers' , y='majority religion' , \n",
914-
" data=dfFractions , kind = 'violin' ,height=2, aspect=5 )"
964+
" data=dfFractions , kind = 'violin' ,height=2, aspect=5, inner=None )\n",
965+
"#NB: I use inner=None to remov the little boxplot inside the violin"
915966
]
916967
},
917968
{
@@ -932,7 +983,7 @@
932983
"outputs": [],
933984
"source": [
934985
"sns.catplot( x = 'German speakers' , y='majority religion' , \n",
935-
" data=dfFractions , kind = 'violin' ,height=2, aspect=5 , cut=0)"
986+
" data=dfFractions , kind = 'violin' ,height=2, aspect=5 , cut=0, inner=None )"
936987
]
937988
},
938989
{
@@ -965,9 +1016,9 @@
9651016
"outputs": [],
9661017
"source": [
9671018
"sns.catplot( x = 'German speakers' , y='majority religion' , data=dfFractions , height=2, aspect=5,\n",
968-
" kind = 'bar' , ci='sd').set(title='standard deviation')\n",
1019+
" kind = 'bar' , errorbar='sd').set(title='standard deviation')\n",
9691020
"sns.catplot( x = 'German speakers' , y='majority religion' , data=dfFractions , height=2, aspect=5,\n",
970-
" kind = 'bar' , ci=95 ).set(title='95% confidence interval')\n"
1021+
" kind = 'bar' , errorbar=('ci',95) ).set(title='95% confidence interval')\n"
9711022
]
9721023
},
9731024
{
@@ -1076,7 +1127,9 @@
10761127
"execution_count": null,
10771128
"metadata": {},
10781129
"outputs": [],
1079-
"source": []
1130+
"source": [
1131+
"df['canton name'].unique()"
1132+
]
10801133
},
10811134
{
10821135
"cell_type": "code",
@@ -1221,9 +1274,9 @@
12211274
],
12221275
"metadata": {
12231276
"kernelspec": {
1224-
"display_name": "Environment (conda_py38)",
1277+
"display_name": "Environment (conda_py311)",
12251278
"language": "python",
1226-
"name": "conda_py38"
1279+
"name": "conda_py311"
12271280
},
12281281
"language_info": {
12291282
"codemirror_mode": {
@@ -1235,7 +1288,7 @@
12351288
"name": "python",
12361289
"nbconvert_exporter": "python",
12371290
"pygments_lexer": "ipython3",
1238-
"version": "3.8.8"
1291+
"version": "3.11.0"
12391292
}
12401293
},
12411294
"nbformat": 4,

0 commit comments

Comments
 (0)