calculquebec
diff --git a/‎en/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions b/‎en/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions
diff --git a/‎fr/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions b/‎fr/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions
diff --git a/‎solution-en/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions b/‎solution-en/04-combine.ipynb‎
Lines changed: 23 additions & 34 deletions
@@ -252,13 +252,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e8192e6e-ecdd-4cef-bc8c-41dcac175415",
+   "id": "f2b0fb1d-0a90-47d1-b016-caf51a192033",
    "metadata": {
-    "lang": "en"
+    "lang": "en,fr"
    },
    "outputs": [],
    "source": [
-    "head10.columns"
+    "surveys_df.columns"
    ]
   },
   {
@@ -302,6 +302,8 @@
    },
    "outputs": [],
    "source": [
+    "head10 = surveys_df.head(10)\n",
+    "\n",
     "# Computing the inner join of head10 and species_sub\n",
     "key = 'species_id'\n",
     "merged_inner = pd.merge(left=head10, right=species_sub,\n",
@@ -390,7 +392,7 @@
    "source": [
     "## Exercise - Joining all data\n",
     "`1`. Create a new DataFrame by joining the contents of the\n",
-    "`surveys.csv` and `species.csv` tables. Keep all survey records.\n",
+    "`surveys_df` and `species.csv` tables. Keep all survey records.\n",
     "(3 min.)"
    ]
   },
@@ -420,8 +422,8 @@
     "lang": "en"
    },
    "source": [
-    "`2`. Calculate and plot the distribution of surveys (i.e. the\n",
-    "number of `record_id`) by `taxa` for each `plot_id`. (3 min.)"
+    "`2`. Calculate and plot the evolution of the average\n",
+    "hindfoot length for each genus from year to year. (3 min.)"
    ]
   },
   {
@@ -436,9 +438,9 @@
    },
    "outputs": [],
    "source": [
-    "by_site_taxa = merged_left###\n",
-    "taxa_site = by_site_taxa['record_id']###\n",
-    "taxa_site.tail()"
+    "average_lengths = merged_left.###(\n",
+    "    ###)['hindfoot_length']###\n",
+    "average_lengths.tail()"
    ]
   },
   {
@@ -450,7 +452,7 @@
    },
    "outputs": [],
    "source": [
-    "taxa_site.plot(kind='bar', logy=True)"
+    "average_lengths.plot(kind='line')"
    ]
   },
   {
@@ -460,23 +462,10 @@
     "lang": "en"
    },
    "source": [
-    "`3`. Calculate and plot the distribution\n",
-    "of `taxa` by `sex` for each `plot_id`. (2 min.)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f15d004d-3619-46e8-9ed1-df35f2153c5a",
-   "metadata": {
-    "lang": "en"
-   },
-   "outputs": [],
-   "source": [
-    "# Data cleanup\n",
-    "merged_left['sex'] = merged_left['sex'].fillna('F|M')\n",
-    "invalid_mask = ~merged_left['sex'].isin(['F', 'F|M', 'M'])\n",
-    "merged_left.loc[invalid_mask, 'sex'] = \"F|M\""
+    "`3`. Calculate and create a bar plot showing\n",
+    "the average weight per sex for each genus.\n",
+    "For this exercise, we will use a pivot table instead of `unstack()`.\n",
+    "(2 min.)"
    ]
   },
   {
@@ -491,9 +480,9 @@
    },
    "outputs": [],
    "source": [
-    "ntaxa_sex_site = merged_left.groupby(\n",
-    "    ['plot_id', 'sex'])[###].nunique()#.reset_index(level=1)\n",
-    "ntaxa_sex_site.tail()"
+    "weights_by_genus_sex = merged_left.groupby(\n",
+    "    ['genus', 'sex'])['weight'].###()#.reset_index()\n",
+    "weights_by_genus_sex.tail()"
    ]
   },
   {
@@ -506,9 +495,9 @@
    "outputs": [],
    "source": [
     "# Use pivot_table() instead of unstack()\n",
-    "pivot_taxa_sex_site = ntaxa_sex_site.pivot_table(\n",
-    "    values='taxa', columns='sex', index=ntaxa_sex_site.index)\n",
-    "pivot_taxa_sex_site.tail()"
+    "pivot_weight_genus_sex = weights_by_genus_sex.pivot_table(\n",
+    "    values='weight', index='genus', columns='sex')\n",
+    "pivot_weight_genus_sex"
    ]
   },
   {
@@ -520,7 +509,7 @@
    },
    "outputs": [],
    "source": [
-    "pivot_taxa_sex_site.plot(kind=\"bar\")"
+    "pivot_weight_genus_sex.plot(kind=\"bar\")"
    ]
   },
   {
 
@@ -254,11 +254,11 @@
    "execution_count": null,
    "id": "f2b0fb1d-0a90-47d1-b016-caf51a192033",
    "metadata": {
-    "lang": "fr"
+    "lang": "en,fr"
    },
    "outputs": [],
    "source": [
-    "premiers10.columns"
+    "surveys_df.columns"
    ]
   },
   {
@@ -302,6 +302,8 @@
    },
    "outputs": [],
    "source": [
+    "premiers10 = surveys_df.head(10)\n",
+    "\n",
     "# Calculer l'intersection de premiers10 et trois_especes\n",
     "cle = 'species_id'\n",
     "intersection = pd.merge(left=premiers10, right=trois_especes,\n",
@@ -390,7 +392,7 @@
    "source": [
     "## Exercice - Joindre toutes les données\n",
     "`1`. Créez un nouveau DataFrame tel que tous les\n",
-    "enregistrements de `surveys.csv` sont gardés dans une jonction\n",
+    "enregistrements de `surveys_df` sont gardés dans une jonction\n",
     "impliquant les informations correspondantes de `species.csv`.\n",
     "(3 min.)"
    ]
@@ -421,9 +423,9 @@
     "lang": "fr"
    },
    "source": [
-    "`2`. Calculez et créez un graphique (*bar-plot*) montrant\n",
-    "le nombre d'enregistrements (soit le nombre de `record_id`)\n",
-    "par type de `taxa` pour chaque site (`plot_id`). (3 min.)"
+    "`2`. Calculez et créez un graphique montrant l'évolution de la\n",
+    "longueur moyenne des arrière-pieds (`'hindfoot_length'`) pour\n",
+    "chaque genre d'espèce (`'genus'`) d'une année à l'autre. (3 min.)"
    ]
   },
   {
@@ -438,9 +440,9 @@
    },
    "outputs": [],
    "source": [
-    "par_site_taxa = jonc_gauche###\n",
-    "nb_par_site_taxa = par_site_taxa['record_id']###\n",
-    "nb_par_site_taxa.tail()"
+    "longueurs_moyennes = jonc_gauche.###(\n",
+    "    ###)['hindfoot_length']###\n",
+    "longueurs_moyennes.tail()"
    ]
   },
   {
@@ -452,7 +454,7 @@
    },
    "outputs": [],
    "source": [
-    "nb_par_site_taxa.plot(kind='bar', logy=True)"
+    "longueurs_moyennes.plot(kind='line')"
    ]
   },
   {
@@ -462,26 +464,13 @@
     "lang": "fr"
    },
    "source": [
-    "`3`. Calculez et créez un graphique (bar-plot) montrant le nombre\n",
-    "de différents `taxa` par type de sexe pour chaque site (`plot_id`).\n",
+    "`3`. Calculez et créez un graphique (*bar-plot*) montrant\n",
+    "le poids moyen selon le sexe pour chaque genre d'espèce.\n",
+    "Pour cet exercice, nous allons utiliser une\n",
+    "table de pivot à la place de `unstack()`.\n",
     "(2 min.)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fdc14b08-d825-44d1-a87b-480d8b5f2304",
-   "metadata": {
-    "lang": "fr"
-   },
-   "outputs": [],
-   "source": [
-    "# Nettoyage des données\n",
-    "jonc_gauche['sex'] = jonc_gauche['sex'].fillna('F|M')\n",
-    "invalides = ~jonc_gauche['sex'].isin(['F', 'F|M', 'M'])\n",
-    "jonc_gauche.loc[invalides, 'sex'] = 'F|M'"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -494,9 +483,9 @@
    },
    "outputs": [],
    "source": [
-    "ntaxa_sex_site = jonc_gauche.groupby(\n",
-    "    ['plot_id', 'sex'])[###].nunique()#.reset_index(level=1)\n",
-    "ntaxa_sex_site.tail()"
+    "poids_par_genre_sexe = jonc_gauche.groupby(\n",
+    "    ['genus', 'sex'])['weight'].###()#.reset_index()\n",
+    "poids_par_genre_sexe.tail()"
    ]
   },
   {
@@ -509,9 +498,9 @@
    "outputs": [],
    "source": [
     "# Utiliser pivot_table() au lieu de unstack()\n",
-    "pivot_taxa_sex_site = ntaxa_sex_site.pivot_table(\n",
-    "    values='taxa', columns='sex', index=ntaxa_sex_site.index)\n",
-    "pivot_taxa_sex_site.tail()"
+    "pivot_weight_genus_sex = poids_par_genre_sexe.pivot_table(\n",
+    "    values='weight', index='genus', columns='sex')\n",
+    "pivot_weight_genus_sex"
    ]
   },
   {
@@ -523,7 +512,7 @@
    },
    "outputs": [],
    "source": [
-    "pivot_taxa_sex_site.plot(kind=\"bar\")"
+    "pivot_weight_genus_sex.plot(kind=\"bar\")"
    ]
   },
   {
 
@@ -252,13 +252,13 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e8192e6e-ecdd-4cef-bc8c-41dcac175415",
+   "id": "f2b0fb1d-0a90-47d1-b016-caf51a192033",
    "metadata": {
-    "lang": "en"
+    "lang": "en,fr"
    },
    "outputs": [],
    "source": [
-    "head10.columns"
+    "surveys_df.columns"
    ]
   },
   {
@@ -302,6 +302,8 @@
    },
    "outputs": [],
    "source": [
+    "head10 = surveys_df.head(10)\n",
+    "\n",
     "# Computing the inner join of head10 and species_sub\n",
     "key = 'species_id'\n",
     "merged_inner = pd.merge(left=head10, right=species_sub,\n",
@@ -390,7 +392,7 @@
    "source": [
     "## Exercise - Joining all data\n",
     "`1`. Create a new DataFrame by joining the contents of the\n",
-    "`surveys.csv` and `species.csv` tables. Keep all survey records.\n",
+    "`surveys_df` and `species.csv` tables. Keep all survey records.\n",
     "(3 min.)"
    ]
   },
@@ -420,8 +422,8 @@
     "lang": "en"
    },
    "source": [
-    "`2`. Calculate and plot the distribution of surveys (i.e. the\n",
-    "number of `record_id`) by `taxa` for each `plot_id`. (3 min.)"
+    "`2`. Calculate and plot the evolution of the average\n",
+    "hindfoot length for each genus from year to year. (3 min.)"
    ]
   },
   {
@@ -436,9 +438,9 @@
    },
    "outputs": [],
    "source": [
-    "by_site_taxa = merged_left.groupby(['plot_id', 'taxa'])\n",
-    "taxa_site = by_site_taxa['record_id'].count().unstack()\n",
-    "taxa_site.tail()"
+    "average_lengths = merged_left.groupby(\n",
+    "    ['year', 'genus'])['hindfoot_length'].mean().unstack()\n",
+    "average_lengths.tail()"
    ]
   },
   {
@@ -450,7 +452,7 @@
    },
    "outputs": [],
    "source": [
-    "taxa_site.plot(kind='bar', logy=True)"
+    "average_lengths.plot(kind='line')"
    ]
   },
   {
@@ -460,23 +462,10 @@
     "lang": "en"
    },
    "source": [
-    "`3`. Calculate and plot the distribution\n",
-    "of `taxa` by `sex` for each `plot_id`. (2 min.)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f15d004d-3619-46e8-9ed1-df35f2153c5a",
-   "metadata": {
-    "lang": "en"
-   },
-   "outputs": [],
-   "source": [
-    "# Data cleanup\n",
-    "merged_left['sex'] = merged_left['sex'].fillna('F|M')\n",
-    "invalid_mask = ~merged_left['sex'].isin(['F', 'F|M', 'M'])\n",
-    "merged_left.loc[invalid_mask, 'sex'] = \"F|M\""
+    "`3`. Calculate and create a bar plot showing\n",
+    "the average weight per sex for each genus.\n",
+    "For this exercise, we will use a pivot table instead of `unstack()`.\n",
+    "(2 min.)"
    ]
   },
   {
@@ -491,9 +480,9 @@
    },
    "outputs": [],
    "source": [
-    "ntaxa_sex_site = merged_left.groupby(\n",
-    "    ['plot_id', 'sex'])['taxa'].nunique().reset_index(level=1)\n",
-    "ntaxa_sex_site.tail()"
+    "weights_by_genus_sex = merged_left.groupby(\n",
+    "    ['genus', 'sex'])['weight'].mean().reset_index()\n",
+    "weights_by_genus_sex.tail()"
    ]
   },
   {
@@ -506,9 +495,9 @@
    "outputs": [],
    "source": [
     "# Use pivot_table() instead of unstack()\n",
-    "pivot_taxa_sex_site = ntaxa_sex_site.pivot_table(\n",
-    "    values='taxa', columns='sex', index=ntaxa_sex_site.index)\n",
-    "pivot_taxa_sex_site.tail()"
+    "pivot_weight_genus_sex = weights_by_genus_sex.pivot_table(\n",
+    "    values='weight', index='genus', columns='sex')\n",
+    "pivot_weight_genus_sex"
    ]
   },
   {
@@ -520,7 +509,7 @@
    },
    "outputs": [],
    "source": [
-    "pivot_taxa_sex_site.plot(kind=\"bar\")"
+    "pivot_weight_genus_sex.plot(kind=\"bar\")"
    ]
   },
   {