|
648 | 648 | "* Égal, pas égal : `==`, `!=`\n", |
649 | 649 | "* Plus grand que, plus petit que : `>`, `<`\n", |
650 | 650 | "* Plus grand ou égal, plus petit ou égal : `>=`, `<=`\n", |
651 | | - "* Opérateurs par élément ET et OU : `&` et `|`" |
| 651 | + "* Opérateurs par élément ET et OU : `&` et `|`\n", |
| 652 | + "* Opérateur d'inversion : `~`" |
652 | 653 | ] |
653 | 654 | }, |
654 | 655 | { |
|
663 | 664 | "* Equal, not equal: `==`, `!=`\n", |
664 | 665 | "* Greater than, less than: `>`, `<`\n", |
665 | 666 | "* Greater than or equal to, less than or equal to: `>=`, `<=`\n", |
666 | | - "* Element-wise AND and OR operators: `&` and `|`" |
| 667 | + "* Element-wise AND and OR operators: `&` and `|`\n", |
| 668 | + "* Invert operator: `~`" |
| 669 | + ] |
| 670 | + }, |
| 671 | + { |
| 672 | + "cell_type": "code", |
| 673 | + "execution_count": null, |
| 674 | + "id": "43358a98-ed2e-42ed-bcce-8054ad7bb8a0", |
| 675 | + "metadata": { |
| 676 | + "lang": "fr" |
| 677 | + }, |
| 678 | + "outputs": [], |
| 679 | + "source": [ |
| 680 | + "# Sélection de trois années\n", |
| 681 | + "surveys_df[\n", |
| 682 | + " (surveys_df['year'] == 1991) |\n", |
| 683 | + " (surveys_df['year'] == 1996) |\n", |
| 684 | + " (surveys_df['year'] == 2001)\n", |
| 685 | + "].shape" |
| 686 | + ] |
| 687 | + }, |
| 688 | + { |
| 689 | + "cell_type": "code", |
| 690 | + "execution_count": null, |
| 691 | + "id": "d370b9aa-4c82-4d39-955e-0c2501f18f79", |
| 692 | + "metadata": { |
| 693 | + "lang": "en" |
| 694 | + }, |
| 695 | + "outputs": [], |
| 696 | + "source": [ |
| 697 | + "# Selection of three years\n", |
| 698 | + "surveys_df[\n", |
| 699 | + " (surveys_df['year'] == 1991) |\n", |
| 700 | + " (surveys_df['year'] == 1996) |\n", |
| 701 | + " (surveys_df['year'] == 2001)\n", |
| 702 | + "].shape" |
| 703 | + ] |
| 704 | + }, |
| 705 | + { |
| 706 | + "cell_type": "code", |
| 707 | + "execution_count": null, |
| 708 | + "id": "7615c90f-77ae-425d-b955-6218e9956f89", |
| 709 | + "metadata": { |
| 710 | + "lang": "fr" |
| 711 | + }, |
| 712 | + "outputs": [], |
| 713 | + "source": [ |
| 714 | + "# Sélection de trois années avec isin()\n", |
| 715 | + "surveys_df[\n", |
| 716 | + " surveys_df['year'].isin([1991, 1996, 2001])\n", |
| 717 | + "].shape" |
| 718 | + ] |
| 719 | + }, |
| 720 | + { |
| 721 | + "cell_type": "code", |
| 722 | + "execution_count": null, |
| 723 | + "id": "c380eeb9-00b0-42c0-ae5e-f4f17e0115eb", |
| 724 | + "metadata": { |
| 725 | + "lang": "en" |
| 726 | + }, |
| 727 | + "outputs": [], |
| 728 | + "source": [ |
| 729 | + "# Selection of three years with isin()\n", |
| 730 | + "surveys_df[\n", |
| 731 | + " surveys_df['year'].isin([1991, 1996, 2001])\n", |
| 732 | + "].shape" |
| 733 | + ] |
| 734 | + }, |
| 735 | + { |
| 736 | + "cell_type": "code", |
| 737 | + "execution_count": null, |
| 738 | + "id": "d4c51e4d-b68d-40a8-8cff-088e54fa34cb", |
| 739 | + "metadata": { |
| 740 | + "lang": "fr" |
| 741 | + }, |
| 742 | + "outputs": [], |
| 743 | + "source": [ |
| 744 | + "# Sélection des poids sur trois années avec .loc[]\n", |
| 745 | + "surveys_df.loc[surveys_df['year'].isin([1991, 1996, 2001]), 'weight']" |
| 746 | + ] |
| 747 | + }, |
| 748 | + { |
| 749 | + "cell_type": "code", |
| 750 | + "execution_count": null, |
| 751 | + "id": "d8aadefc-3a8b-42b9-9894-5884a05127e0", |
| 752 | + "metadata": { |
| 753 | + "lang": "en" |
| 754 | + }, |
| 755 | + "outputs": [], |
| 756 | + "source": [ |
| 757 | + "# Selection of the weights for three years with .loc[]\n", |
| 758 | + "surveys_df.loc[surveys_df['year'].isin([1991, 1996, 2001]), 'weight']" |
| 759 | + ] |
| 760 | + }, |
| 761 | + { |
| 762 | + "cell_type": "code", |
| 763 | + "execution_count": null, |
| 764 | + "id": "e62ce6d3-edc1-4fce-9474-4abaa80b4ba9", |
| 765 | + "metadata": { |
| 766 | + "lang": "fr" |
| 767 | + }, |
| 768 | + "outputs": [], |
| 769 | + "source": [ |
| 770 | + "# Chercher des données manquantes ou erronées\n", |
| 771 | + "surveys_df.loc[~surveys_df['sex'].isin(['F', 'M']), 'sex']" |
| 772 | + ] |
| 773 | + }, |
| 774 | + { |
| 775 | + "cell_type": "code", |
| 776 | + "execution_count": null, |
| 777 | + "id": "1ef4b2e5-808d-4f9b-83e2-c47a7be6f8b1", |
| 778 | + "metadata": { |
| 779 | + "lang": "en" |
| 780 | + }, |
| 781 | + "outputs": [], |
| 782 | + "source": [ |
| 783 | + "# Search for missing or incorrect data\n", |
| 784 | + "surveys_df.loc[~surveys_df['sex'].isin(['F', 'M']), 'sex']" |
667 | 785 | ] |
668 | 786 | }, |
669 | 787 | { |
|
674 | 792 | }, |
675 | 793 | "source": [ |
676 | 794 | "### Exercices - Sélections par la présence\n", |
677 | | - "`1`. Vous pouvez utiliser la méthode `isin()` pour aller chercher\n", |
678 | | - "les enregistrements dont les valeurs d'une colonne correspondent\n", |
679 | | - "à l'une des valeurs fournies dans une liste. Par exemple :\n", |
680 | | - "```\n", |
681 | | - "surveys_df[surveys_df['nom_colonne'].isin([valeur1, valeur2, ...])]\n", |
682 | | - "```\n", |
683 | | - "Utilisez la méthode `isin()` pour trouver tous les\n", |
| 795 | + "`1`. Utilisez la méthode `isin()` pour trouver tous les\n", |
684 | 796 | "différents sites (`plot_id`) ayant certaines espèces\n", |
685 | 797 | "(`AS`, `CQ`, `OX` et `UL`) dans le DataFrame. (4 min.)" |
686 | 798 | ] |
|
693 | 805 | }, |
694 | 806 | "source": [ |
695 | 807 | "### Exercises - Selection by presence\n", |
696 | | - "`1`. You can use the `isin()` method in python to query\n", |
697 | | - "a DataFrame based upon a list of values as follows:\n", |
698 | | - "```\n", |
699 | | - "surveys_df[surveys_df['column_name'].isin([value1, value2, ...])]\n", |
700 | | - "```\n", |
701 | | - "Use the `isin()` method to find all different\n", |
| 808 | + "`1`. Use the `isin()` method to find all different\n", |
702 | 809 | "sites (`plot_id`) that contain particular species\n", |
703 | 810 | "(`AS`, `CQ`, `OX` and `UL`) in the surveys DataFrame. (4 min.)" |
704 | 811 | ] |
|
719 | 826 | "cond_especes = surveys_df['species_id'].isin(['AS', 'CQ', 'OX', 'UL'])\n", |
720 | 827 | "\n", |
721 | 828 | "# Lister les différents sites\n", |
722 | | - "surveys_df[cond_especes]['plot_id'].unique()" |
| 829 | + "surveys_df.loc[cond_especes, 'plot_id'].unique()" |
723 | 830 | ] |
724 | 831 | }, |
725 | 832 | { |
|
738 | 845 | "cond_especes = ###(['AS', 'CQ', 'OX', 'UL'])\n", |
739 | 846 | "\n", |
740 | 847 | "# Lister les différents sites\n", |
741 | | - "surveys_df[###][###].unique()" |
| 848 | + "surveys_df.###[###].unique()" |
742 | 849 | ] |
743 | 850 | }, |
744 | 851 | { |
|
757 | 864 | "species_mask = surveys_df['species_id'].isin(['AS', 'CQ', 'OX', 'UL'])\n", |
758 | 865 | "\n", |
759 | 866 | "# List all different sites\n", |
760 | | - "surveys_df[species_mask]['plot_id'].unique()" |
| 867 | + "surveys_df.loc[species_mask, 'plot_id'].unique()" |
761 | 868 | ] |
762 | 869 | }, |
763 | 870 | { |
|
776 | 883 | "species_mask = ###(['AS', 'CQ', 'OX', 'UL'])\n", |
777 | 884 | "\n", |
778 | 885 | "# List all different sites\n", |
779 | | - "surveys_df[###][###].unique()" |
| 886 | + "surveys_df.###[###].unique()" |
780 | 887 | ] |
781 | 888 | }, |
782 | 889 | { |
|
786 | 893 | "lang": "fr" |
787 | 894 | }, |
788 | 895 | "source": [ |
789 | | - "`2`. Créez un graphique de barres montrant la moyenne\n", |
790 | | - "des poids selon le site (`plot_id`), mais avec les résultats\n", |
791 | | - "des femelles et des mâles côte à côte pour chaque site.\n", |
792 | | - "Pour la préparation initiale des données :\n", |
793 | | - "* Créez une sélection contenant seulement les enregistrements\n", |
794 | | - "ayant une valeur de `sex` (soit `F` ou `M`) et ayant un poids supérieur à 0\n", |
795 | | - "* Pour le graphique final, vous devez limiter les données\n", |
796 | | - "aux colonnes de poids, de site et de sexe\n", |
| 896 | + "`2`. Calculez la moyenne des poids\n", |
| 897 | + "selon le site (`plot_id`) et le sexe :\n", |
| 898 | + "* Créez une sélection contenant seulement :\n", |
| 899 | + " * Les observations ayant une valeur de sexe `F` ou `M`\n", |
| 900 | + " et ayant un poids supérieur à `0`;\n", |
| 901 | + " * Les colonnes de poids, de site et de sexe.\n", |
| 902 | + "* Groupez les données et calculez les moyennes de poids.\n", |
797 | 903 | "\n", |
798 | 904 | "(5 min.)" |
799 | 905 | ] |
|
805 | 911 | "lang": "en" |
806 | 912 | }, |
807 | 913 | "source": [ |
808 | | - "`2`. Create a bar plot of average weight by site (`plot_id`)\n", |
809 | | - "with female and male values side by side for each site.\n", |
810 | | - "* Create a new DataFrame that contains only observations that are\n", |
811 | | - " of sex female or male and where weight values are greater than 0\n", |
812 | | - "* For the final plot, only select the\n", |
813 | | - " weight, the site and the sex columns\n", |
| 914 | + "`2`. Get the average weight by site (`plot_id`) and sex:\n", |
| 915 | + "* Create a selection that contains only:\n", |
| 916 | + " * The observations that are of sex `F` or `M`\n", |
| 917 | + " and where weight values are greater than `0`;\n", |
| 918 | + " * The weight, the site and the sex columns.\n", |
| 919 | + "* Group the data and compute the average weights.\n", |
814 | 920 | "\n", |
815 | 921 | "(5 min.)" |
816 | 922 | ] |
|
832 | 938 | "cond_poids = surveys_df['weight'] > 0\n", |
833 | 939 | "colonnes = ['weight', 'plot_id', 'sex']\n", |
834 | 940 | "\n", |
835 | | - "selection = surveys_df[cond_sexe & cond_poids][colonnes]\n", |
| 941 | + "selection = surveys_df.loc[cond_sexe & cond_poids, colonnes]\n", |
836 | 942 | "selection.tail()" |
837 | 943 | ] |
838 | 944 | }, |
|
849 | 955 | "outputs": [], |
850 | 956 | "source": [ |
851 | 957 | "# Sélection des enregistrements et des colonnes nécessaires\n", |
852 | | - "cond_sexe = surveys_df['sex']###\n", |
| 958 | + "cond_sexe = surveys_df['sex'].isin(['F', 'M'])\n", |
853 | 959 | "cond_poids = surveys_df['weight'] ###\n", |
854 | 960 | "colonnes = ['weight', 'plot_id', 'sex']\n", |
855 | 961 | "\n", |
|
874 | 980 | "weight_mask = surveys_df['weight'] > 0\n", |
875 | 981 | "columns = ['weight', 'plot_id', 'sex']\n", |
876 | 982 | "\n", |
877 | | - "selection = surveys_df[sex_mask & weight_mask][columns]\n", |
| 983 | + "selection = surveys_df.loc[sex_mask & weight_mask, columns]\n", |
878 | 984 | "selection.tail()" |
879 | 985 | ] |
880 | 986 | }, |
|
891 | 997 | "outputs": [], |
892 | 998 | "source": [ |
893 | 999 | "# Selection of the data with isin()\n", |
894 | | - "sex_mask = surveys_df['sex']###\n", |
| 1000 | + "sex_mask = surveys_df['sex'].isin(['F', 'M'])\n", |
895 | 1001 | "weight_mask = surveys_df['weight'] ###\n", |
896 | 1002 | "columns = ['weight', 'plot_id', 'sex']\n", |
897 | 1003 | "\n", |
|
967 | 1073 | "avg_by_site_sex.tail()" |
968 | 1074 | ] |
969 | 1075 | }, |
970 | | - { |
971 | | - "cell_type": "markdown", |
972 | | - "id": "a2584e88-879e-4a8f-bcdc-b6bd0cba34a2", |
973 | | - "metadata": { |
974 | | - "lang": "fr" |
975 | | - }, |
976 | | - "source": [ |
977 | | - "`3`. L'opérateur `~` peut être utilisé pour retourner l'opposé d'une\n", |
978 | | - "sélection. C'est l'équivalent de **n'est pas**. Écrivez une requête\n", |
979 | | - "sélectionnant tous les enregistrements ne contenant ni `F`, ni `M`." |
980 | | - ] |
981 | | - }, |
982 | | - { |
983 | | - "cell_type": "markdown", |
984 | | - "id": "7ffd5c69-247f-46e1-99b2-32a035f1554f", |
985 | | - "metadata": { |
986 | | - "lang": "en" |
987 | | - }, |
988 | | - "source": [ |
989 | | - "`3`. The `~` symbol in Python can be used to return the OPPOSITE\n", |
990 | | - "of the selection that you specify in python. It is equivalent\n", |
991 | | - "to **is not in**. Write a query that selects all rows\n", |
992 | | - "that are NOT equal to `F` or `M` in the surveys data." |
993 | | - ] |
994 | | - }, |
995 | | - { |
996 | | - "cell_type": "code", |
997 | | - "execution_count": null, |
998 | | - "id": "e62ce6d3-edc1-4fce-9474-4abaa80b4ba9", |
999 | | - "metadata": { |
1000 | | - "lang": "fr", |
1001 | | - "tags": [ |
1002 | | - "soln" |
1003 | | - ] |
1004 | | - }, |
1005 | | - "outputs": [], |
1006 | | - "source": [ |
1007 | | - "surveys_df[~cond_sexe]" |
1008 | | - ] |
1009 | | - }, |
1010 | | - { |
1011 | | - "cell_type": "code", |
1012 | | - "execution_count": null, |
1013 | | - "id": "a4247e87-ca2a-47f1-9cb8-7a467fad8498", |
1014 | | - "metadata": { |
1015 | | - "lang": "fr", |
1016 | | - "tags": [ |
1017 | | - "exer" |
1018 | | - ] |
1019 | | - }, |
1020 | | - "outputs": [], |
1021 | | - "source": [ |
1022 | | - "surveys_df[###cond_sexe]" |
1023 | | - ] |
1024 | | - }, |
1025 | | - { |
1026 | | - "cell_type": "code", |
1027 | | - "execution_count": null, |
1028 | | - "id": "1ef4b2e5-808d-4f9b-83e2-c47a7be6f8b1", |
1029 | | - "metadata": { |
1030 | | - "lang": "en", |
1031 | | - "tags": [ |
1032 | | - "soln" |
1033 | | - ] |
1034 | | - }, |
1035 | | - "outputs": [], |
1036 | | - "source": [ |
1037 | | - "surveys_df[~sex_mask]" |
1038 | | - ] |
1039 | | - }, |
1040 | | - { |
1041 | | - "cell_type": "code", |
1042 | | - "execution_count": null, |
1043 | | - "id": "41f2fcb5-26a0-4b51-a143-61b4ed4fba63", |
1044 | | - "metadata": { |
1045 | | - "lang": "en", |
1046 | | - "tags": [ |
1047 | | - "exer" |
1048 | | - ] |
1049 | | - }, |
1050 | | - "outputs": [], |
1051 | | - "source": [ |
1052 | | - "surveys_df[###sex_mask]" |
1053 | | - ] |
1054 | | - }, |
1055 | 1076 | { |
1056 | 1077 | "cell_type": "markdown", |
1057 | 1078 | "id": "9fcb21ff-d26b-48c5-8218-9f06d29eae21", |
|
0 commit comments