|
120 | 120 | "source": [ |
121 | 121 | "import pandas as pd\n", |
122 | 122 | "\n", |
123 | | - "df = pd.read_table(\"data/swiss_census_1880.csv\" , sep=',') \n", |
| 123 | + "df = pd.read_table(\"data/swiss_census_1880.csv\" , sep=',') \n", |
124 | 124 | "#try to see what happens when sep has a different value\n", |
125 | 125 | "\n", |
126 | 126 | "df.head() # this returns the 5 first lines of the table" |
|
198 | 198 | "df['Foreigner']" |
199 | 199 | ] |
200 | 200 | }, |
| 201 | + { |
| 202 | + "cell_type": "code", |
| 203 | + "execution_count": null, |
| 204 | + "metadata": {}, |
| 205 | + "outputs": [], |
| 206 | + "source": [ |
| 207 | + "df.Foreigner" |
| 208 | + ] |
| 209 | + }, |
201 | 210 | { |
202 | 211 | "cell_type": "markdown", |
203 | 212 | "metadata": {}, |
|
257 | 266 | "maskVD.value_counts()" |
258 | 267 | ] |
259 | 268 | }, |
| 269 | + { |
| 270 | + "cell_type": "code", |
| 271 | + "execution_count": null, |
| 272 | + "metadata": {}, |
| 273 | + "outputs": [], |
| 274 | + "source": [ |
| 275 | + "maskVD" |
| 276 | + ] |
| 277 | + }, |
260 | 278 | { |
261 | 279 | "cell_type": "markdown", |
262 | 280 | "metadata": {}, |
|
300 | 318 | "cell_type": "markdown", |
301 | 319 | "metadata": {}, |
302 | 320 | "source": [ |
303 | | - "**micro-exercise :** Select towns with less than 1000 inhabitants (column `Total`), (*optional*: display only town name and number of inhabitants)" |
| 321 | + "**micro-exercise :** Select towns with less than 1000 inhabitants (column `Total`), (*optional*: display only town name and number of inhabitants)\n", |
| 322 | + "\n", |
| 323 | + "<details style=\"border: 2px solid #B8C3EA; margin: 1em 0.2em;padding: 0.5em; cursor: pointer;\"><summary>👁 View solution </summary>\n", |
| 324 | + "\n", |
| 325 | + "```python\n", |
| 326 | + "df.loc[ df.Total < 1000 , : ]\n", |
| 327 | + "```\n", |
| 328 | + " </div>\n", |
| 329 | + " \n", |
| 330 | + "</details>\n", |
| 331 | + "\n" |
304 | 332 | ] |
305 | 333 | }, |
306 | 334 | { |
307 | 335 | "cell_type": "code", |
308 | 336 | "execution_count": null, |
309 | 337 | "metadata": {}, |
310 | 338 | "outputs": [], |
311 | | - "source": [] |
| 339 | + "source": [ |
| 340 | + "## write your solution to the micro-exercise here...\n" |
| 341 | + ] |
312 | 342 | }, |
313 | 343 | { |
314 | 344 | "cell_type": "markdown", |
|
400 | 430 | "outputs": [], |
401 | 431 | "source": [ |
402 | 432 | "# %load -r 9- solutions/solution_01_01.py\n", |
403 | | - "#2. Create a new column is the `DataFrame` representing the fraction of population which is Reformed in each town." |
| 433 | + "# 2. Create a new column is the DataFrame representing the fraction of population \n", |
| 434 | + "# which is Reformed in each town. " |
| 435 | + ] |
| 436 | + }, |
| 437 | + { |
| 438 | + "cell_type": "code", |
| 439 | + "execution_count": null, |
| 440 | + "metadata": {}, |
| 441 | + "outputs": [], |
| 442 | + "source": [ |
| 443 | + "# optional : What is the minimum/maximum value for this fraction?" |
404 | 444 | ] |
405 | 445 | }, |
406 | 446 | { |
|
632 | 672 | "plotWithMeanMedianMode( dfFractions['0-14 y.o.'] , ax=axes[0])\n", |
633 | 673 | "plotWithMeanMedianMode( dfFractions['Foreigner'] , ax=axes[1])\n", |
634 | 674 | "plotWithMeanMedianMode( dfFractions['Reformed'] , ax=axes[2])\n", |
| 675 | + "f.tight_layout()\n", |
635 | 676 | "plt.show()" |
636 | 677 | ] |
637 | 678 | }, |
|
651 | 692 | "3. plot the distribution of the fraction of catholics in the canton of Zurich." |
652 | 693 | ] |
653 | 694 | }, |
654 | | - { |
655 | | - "cell_type": "code", |
656 | | - "execution_count": null, |
657 | | - "metadata": {}, |
658 | | - "outputs": [], |
659 | | - "source": [] |
660 | | - }, |
661 | 695 | { |
662 | 696 | "cell_type": "code", |
663 | 697 | "execution_count": null, |
664 | 698 | "metadata": {}, |
665 | 699 | "outputs": [], |
666 | 700 | "source": [ |
667 | | - "# %load -r 1-7 solutions/solution_01_02.py\n", |
| 701 | + "# %load -r 1-11 solutions/solution_01_02.py\n", |
668 | 702 | "# 1. plot the distribution of the total number of habitants. Try to choose an appropriate mode of representation (histogram, density line? number of bins?)" |
669 | 703 | ] |
670 | 704 | }, |
|
674 | 708 | "metadata": {}, |
675 | 709 | "outputs": [], |
676 | 710 | "source": [ |
677 | | - "# %load -r 10-14 solutions/solution_01_02.py\n", |
| 711 | + "# %load -r 12-17 solutions/solution_01_02.py\n", |
678 | 712 | "# 2. try to call `sns.histplot` twice in a row, once with to plot the fraction of Foreigner and the other for the fraction of Swiss. What happens?" |
679 | 713 | ] |
680 | 714 | }, |
|
684 | 718 | "metadata": {}, |
685 | 719 | "outputs": [], |
686 | 720 | "source": [ |
687 | | - "# %load -r 15- solutions/solution_01_02.py\n", |
| 721 | + "# %load -r 18- solutions/solution_01_02.py\n", |
688 | 722 | "# 3. plot the distribution of the fraction of catholics in the canton of Zurich." |
689 | 723 | ] |
690 | 724 | }, |
|
720 | 754 | "We will also create a column that describes the main religion and main languague for each town:" |
721 | 755 | ] |
722 | 756 | }, |
| 757 | + { |
| 758 | + "cell_type": "code", |
| 759 | + "execution_count": null, |
| 760 | + "metadata": {}, |
| 761 | + "outputs": [], |
| 762 | + "source": [] |
| 763 | + }, |
723 | 764 | { |
724 | 765 | "cell_type": "code", |
725 | 766 | "execution_count": null, |
|
778 | 819 | "dfFractions.groupby('majority language')['Catholic'].mean() ## mean fraction of caholics in towns depending on the majority language" |
779 | 820 | ] |
780 | 821 | }, |
| 822 | + { |
| 823 | + "cell_type": "code", |
| 824 | + "execution_count": null, |
| 825 | + "metadata": {}, |
| 826 | + "outputs": [], |
| 827 | + "source": [ |
| 828 | + "dfFractions['Catholic'].mean()" |
| 829 | + ] |
| 830 | + }, |
781 | 831 | { |
782 | 832 | "cell_type": "code", |
783 | 833 | "execution_count": null, |
|
789 | 839 | " indexsmallestTown = data.Total.idxmin()\n", |
790 | 840 | " return data['town name'][indexsmallestTown] , data.Total[indexsmallestTown]\n", |
791 | 841 | "\n", |
792 | | - "grouped.apply(getSmallestTown) ## name and population of the town with minimal number of inhabitants for each canton" |
| 842 | + "grouped.apply(getSmallestTown, include_groups=False) ## name and population of the town with minimal number of inhabitants for each canton" |
793 | 843 | ] |
794 | 844 | }, |
795 | 845 | { |
|
805 | 855 | "metadata": {}, |
806 | 856 | "outputs": [], |
807 | 857 | "source": [ |
808 | | - "sns.catplot( x = 'majority language' , y='Catholic' , data=dfFractions)" |
| 858 | + "sns.catplot( y = 'majority language' , x='Catholic' , data=dfFractions)" |
809 | 859 | ] |
810 | 860 | }, |
811 | 861 | { |
|
911 | 961 | "outputs": [], |
912 | 962 | "source": [ |
913 | 963 | "sns.catplot( x = 'German speakers' , y='majority religion' , \n", |
914 | | - " data=dfFractions , kind = 'violin' ,height=2, aspect=5 )" |
| 964 | + " data=dfFractions , kind = 'violin' ,height=2, aspect=5, inner=None )\n", |
| 965 | + "#NB: I use inner=None to remov the little boxplot inside the violin" |
915 | 966 | ] |
916 | 967 | }, |
917 | 968 | { |
|
932 | 983 | "outputs": [], |
933 | 984 | "source": [ |
934 | 985 | "sns.catplot( x = 'German speakers' , y='majority religion' , \n", |
935 | | - " data=dfFractions , kind = 'violin' ,height=2, aspect=5 , cut=0)" |
| 986 | + " data=dfFractions , kind = 'violin' ,height=2, aspect=5 , cut=0, inner=None )" |
936 | 987 | ] |
937 | 988 | }, |
938 | 989 | { |
|
965 | 1016 | "outputs": [], |
966 | 1017 | "source": [ |
967 | 1018 | "sns.catplot( x = 'German speakers' , y='majority religion' , data=dfFractions , height=2, aspect=5,\n", |
968 | | - " kind = 'bar' , ci='sd').set(title='standard deviation')\n", |
| 1019 | + " kind = 'bar' , errorbar='sd').set(title='standard deviation')\n", |
969 | 1020 | "sns.catplot( x = 'German speakers' , y='majority religion' , data=dfFractions , height=2, aspect=5,\n", |
970 | | - " kind = 'bar' , ci=95 ).set(title='95% confidence interval')\n" |
| 1021 | + " kind = 'bar' , errorbar=('ci',95) ).set(title='95% confidence interval')\n" |
971 | 1022 | ] |
972 | 1023 | }, |
973 | 1024 | { |
|
1076 | 1127 | "execution_count": null, |
1077 | 1128 | "metadata": {}, |
1078 | 1129 | "outputs": [], |
1079 | | - "source": [] |
| 1130 | + "source": [ |
| 1131 | + "df['canton name'].unique()" |
| 1132 | + ] |
1080 | 1133 | }, |
1081 | 1134 | { |
1082 | 1135 | "cell_type": "code", |
|
1221 | 1274 | ], |
1222 | 1275 | "metadata": { |
1223 | 1276 | "kernelspec": { |
1224 | | - "display_name": "Environment (conda_py38)", |
| 1277 | + "display_name": "Environment (conda_py311)", |
1225 | 1278 | "language": "python", |
1226 | | - "name": "conda_py38" |
| 1279 | + "name": "conda_py311" |
1227 | 1280 | }, |
1228 | 1281 | "language_info": { |
1229 | 1282 | "codemirror_mode": { |
|
1235 | 1288 | "name": "python", |
1236 | 1289 | "nbconvert_exporter": "python", |
1237 | 1290 | "pygments_lexer": "ipython3", |
1238 | | - "version": "3.8.8" |
| 1291 | + "version": "3.11.0" |
1239 | 1292 | } |
1240 | 1293 | }, |
1241 | 1294 | "nbformat": 4, |
|
0 commit comments