Exercice de concaténation révisé

plstonge · plstonge · commit d8a599e8bc41 · 2025-05-09T13:10:50.000-04:00
diff --git a/en/04-combine.ipynb b/en/04-combine.ipynb
@@ -127,11 +127,11 @@
    },
    "source": [
     "## Exercise - Concatenating DataFrames\n",
-    "* In `surveys_df`, select rows where the year is 2001.\n",
-    "  Do the same for year 2002.\n",
-    "* Concatenate both dataframes.\n",
+    "* Load the data from all CSV files in the directory\n",
+    "  `../data/by_species_id/` and accumulate them in `surveys_sp`.\n",
+    "* Reset the index while dropping the accumulated one.\n",
     "\n",
-    "(3 min.)"
+    "(4 min.)"
    ]
   },
   {
@@ -146,12 +146,14 @@
    },
    "outputs": [],
    "source": [
-    "# Get data for each year\n",
-    "survey2001 = surveys_df[surveys_df['year'] ###]\n",
-    "survey2002 = surveys_df[surveys_df['year'] ###]\n",
+    "surveys_sp = pd.DataFrame()  # Empty DataFrame\n",
+    "\n",
+    "for filename in ###('../data/by_species_id/*.csv'):\n",
+    "    new_df = pd.read_csv(filename)\n",
+    "    surveys_sp = pd.###([###, new_df], ###='index')\n",
     "\n",
-    "# Concatenate vertically\n",
-    "survey_all = ###"
+    "surveys_sp = surveys_sp.###(drop=###)\n",
+    "surveys_sp"
    ]
   },
   {
@@ -161,7 +163,7 @@
     "lang": "en"
    },
    "source": [
-    "* Compute the average weight by sex for each year. (1 min.)"
+    "* Compute the average weight by sex for each species. (1 min.)"
    ]
   },
   {
@@ -176,10 +178,10 @@
    },
    "outputs": [],
    "source": [
-    "# Get the average weight by sex for each year\n",
-    "weight_year = survey_all.groupby(['year', 'sex'])###\n",
-    "weight_year = weight_year.unstack()\n",
-    "weight_year"
+    "# Get the average weight by sex for each species\n",
+    "weight_species = surveys_sp.groupby(\n",
+    "    ['species_id', 'sex'])###.unstack()\n",
+    "weight_species"
    ]
   },
   {
@@ -189,8 +191,8 @@
     "lang": "en"
    },
    "source": [
-    "* Export your results as a CSV and make sure\n",
-    "  it reads back into python properly. (2 min.)"
+    "* Export your results as a CSV file and make sure\n",
+    "  it reads back into python properly. (3 min.)"
    ]
   },
   {
@@ -206,8 +208,8 @@
    "outputs": [],
    "source": [
     "# Writing to file while keeping the index\n",
-    "csv_file = 'weight_for_year.csv'\n",
-    "weight_year###\n",
+    "csv_file = 'weight_by_species.csv'\n",
+    "weight_species###\n",
     "\n",
     "# Reading it back in with a specified index column\n",
     "pd.read_csv(csv_file, index_col=###)"
diff --git a/fr/04-combine.ipynb b/fr/04-combine.ipynb
@@ -127,10 +127,11 @@
    },
    "source": [
     "## Exercice - Concaténer des DataFrames\n",
-    "* Dans `surveys_df`, sélectionnez individuellement les enregistrements des années 2001 et 2002\n",
-    "* Concaténez les deux dataframes verticalement\n",
+    "* Chargez les données de tous les fichiers CSV du répertoire\n",
+    "  `../data/by_species_id/` et accumulez-les dans `surveys_sp`.\n",
+    "* Réinitialisez l'index sans préserver celui accumulé.\n",
     "\n",
-    "(3 min.)"
+    "(4 min.)"
    ]
   },
   {
@@ -145,12 +146,14 @@
    },
    "outputs": [],
    "source": [
-    "# Obtenir les données pour chaque année\n",
-    "annee2001 = surveys_df[surveys_df['year'] ###]\n",
-    "annee2002 = surveys_df[surveys_df['year'] ###]\n",
+    "surveys_sp = pd.DataFrame()  # DataFrame vide\n",
+    "\n",
+    "for fichier in ###('../data/by_species_id/*.csv'):\n",
+    "    nouveau_df = pd.read_csv(fichier)\n",
+    "    surveys_sp = pd.###([###, nouveau_df], ###='index')\n",
     "\n",
-    "# Concaténer verticalement\n",
-    "deux_annees = ###"
+    "surveys_sp = surveys_sp.###(drop=###)\n",
+    "surveys_sp"
    ]
   },
   {
@@ -160,7 +163,7 @@
     "lang": "fr"
    },
    "source": [
-    "* Calculez le poids moyen selon l'année et le sexe (1 min.)"
+    "* Calculez le poids moyen selon l'espèce et le sexe (1 min.)"
    ]
   },
   {
@@ -175,10 +178,10 @@
    },
    "outputs": [],
    "source": [
-    "# Calculer le poids moyen par année et par sexe\n",
-    "poids_annee = deux_annees.groupby(['year', 'sex'])###\n",
-    "poids_annee = poids_annee.unstack()\n",
-    "poids_annee"
+    "# Calculer le poids moyen par espèce et par sexe\n",
+    "poids_espece = surveys_sp.groupby(\n",
+    "    ['species_id', 'sex'])###.unstack()\n",
+    "poids_espece"
    ]
   },
   {
@@ -189,7 +192,7 @@
    },
    "source": [
     "* Sauvegardez le tableau des moyennes\n",
-    "  dans un fichier CSV et le recharger (2 min.)"
+    "  dans un fichier CSV et le recharger (3 min.)"
    ]
   },
   {
@@ -204,9 +207,9 @@
    },
    "outputs": [],
    "source": [
-    "# Écrire dans un fichier - garder l'index 'year' cette fois-ci\n",
-    "fichier_csv = 'poids_par_annee.csv'\n",
-    "poids_annee###\n",
+    "# Écrire dans un fichier - garder l'index 'species_id' cette fois-ci\n",
+    "fichier_csv = 'poids_par_espece.csv'\n",
+    "poids_espece###\n",
     "\n",
     "# Relire les données, fournir le nom de l'index\n",
     "pd.read_csv(fichier_csv, index_col=###)"
diff --git a/solution-en/04-combine.ipynb b/solution-en/04-combine.ipynb
@@ -127,11 +127,11 @@
    },
    "source": [
     "## Exercise - Concatenating DataFrames\n",
-    "* In `surveys_df`, select rows where the year is 2001.\n",
-    "  Do the same for year 2002.\n",
-    "* Concatenate both dataframes.\n",
+    "* Load the data from all CSV files in the directory\n",
+    "  `../data/by_species_id/` and accumulate them in `surveys_sp`.\n",
+    "* Reset the index while dropping the accumulated one.\n",
     "\n",
-    "(3 min.)"
+    "(4 min.)"
    ]
   },
   {
@@ -146,12 +146,14 @@
    },
    "outputs": [],
    "source": [
-    "# Get data for each year\n",
-    "survey2001 = surveys_df[surveys_df['year'] == 2001]\n",
-    "survey2002 = surveys_df[surveys_df['year'] == 2002]\n",
+    "surveys_sp = pd.DataFrame()  # Empty DataFrame\n",
+    "\n",
+    "for filename in glob('../data/by_species_id/*.csv'):\n",
+    "    new_df = pd.read_csv(filename)\n",
+    "    surveys_sp = pd.concat([surveys_sp, new_df], axis='index')\n",
     "\n",
-    "# Concatenate vertically\n",
-    "survey_all = pd.concat([survey2001, survey2002], axis='index')"
+    "surveys_sp = surveys_sp.reset_index(drop=True)\n",
+    "surveys_sp"
    ]
   },
   {
@@ -161,7 +163,7 @@
     "lang": "en"
    },
    "source": [
-    "* Compute the average weight by sex for each year. (1 min.)"
+    "* Compute the average weight by sex for each species. (1 min.)"
    ]
   },
   {
@@ -176,10 +178,10 @@
    },
    "outputs": [],
    "source": [
-    "# Get the average weight by sex for each year\n",
-    "weight_year = survey_all.groupby(['year', 'sex'])['weight'].mean()\n",
-    "weight_year = weight_year.unstack()\n",
-    "weight_year"
+    "# Get the average weight by sex for each species\n",
+    "weight_species = surveys_sp.groupby(\n",
+    "    ['species_id', 'sex'])['weight'].mean().unstack()\n",
+    "weight_species"
    ]
   },
   {
@@ -189,8 +191,8 @@
     "lang": "en"
    },
    "source": [
-    "* Export your results as a CSV and make sure\n",
-    "  it reads back into python properly. (2 min.)"
+    "* Export your results as a CSV file and make sure\n",
+    "  it reads back into python properly. (3 min.)"
    ]
   },
   {
@@ -206,11 +208,11 @@
    "outputs": [],
    "source": [
     "# Writing to file while keeping the index\n",
-    "csv_file = 'weight_for_year.csv'\n",
-    "weight_year.to_csv(csv_file, index=True)\n",
+    "csv_file = 'weight_by_species.csv'\n",
+    "weight_species.to_csv(csv_file, index=True)\n",
     "\n",
     "# Reading it back in with a specified index column\n",
-    "pd.read_csv(csv_file, index_col='year')"
+    "pd.read_csv(csv_file, index_col='species_id')"
    ]
   },
   {
diff --git a/solution-fr/04-combine.ipynb b/solution-fr/04-combine.ipynb
@@ -127,10 +127,11 @@
    },
    "source": [
     "## Exercice - Concaténer des DataFrames\n",
-    "* Dans `surveys_df`, sélectionnez individuellement les enregistrements des années 2001 et 2002\n",
-    "* Concaténez les deux dataframes verticalement\n",
+    "* Chargez les données de tous les fichiers CSV du répertoire\n",
+    "  `../data/by_species_id/` et accumulez-les dans `surveys_sp`.\n",
+    "* Réinitialisez l'index sans préserver celui accumulé.\n",
     "\n",
-    "(3 min.)"
+    "(4 min.)"
    ]
   },
   {
@@ -145,12 +146,14 @@
    },
    "outputs": [],
    "source": [
-    "# Obtenir les données pour chaque année\n",
-    "annee2001 = surveys_df[surveys_df['year'] == 2001]\n",
-    "annee2002 = surveys_df[surveys_df['year'] == 2002]\n",
+    "surveys_sp = pd.DataFrame()  # DataFrame vide\n",
+    "\n",
+    "for fichier in glob('../data/by_species_id/*.csv'):\n",
+    "    nouveau_df = pd.read_csv(fichier)\n",
+    "    surveys_sp = pd.concat([surveys_sp, nouveau_df], axis='index')\n",
     "\n",
-    "# Concaténer verticalement\n",
-    "deux_annees = pd.concat([annee2001, annee2002], axis='index')"
+    "surveys_sp = surveys_sp.reset_index(drop=True)\n",
+    "surveys_sp"
    ]
   },
   {
@@ -160,7 +163,7 @@
     "lang": "fr"
    },
    "source": [
-    "* Calculez le poids moyen selon l'année et le sexe (1 min.)"
+    "* Calculez le poids moyen selon l'espèce et le sexe (1 min.)"
    ]
   },
   {
@@ -175,10 +178,10 @@
    },
    "outputs": [],
    "source": [
-    "# Calculer le poids moyen par année et par sexe\n",
-    "poids_annee = deux_annees.groupby(['year', 'sex'])['weight'].mean()\n",
-    "poids_annee = poids_annee.unstack()\n",
-    "poids_annee"
+    "# Calculer le poids moyen par espèce et par sexe\n",
+    "poids_espece = surveys_sp.groupby(\n",
+    "    ['species_id', 'sex'])['weight'].mean().unstack()\n",
+    "poids_espece"
    ]
   },
   {
@@ -189,7 +192,7 @@
    },
    "source": [
     "* Sauvegardez le tableau des moyennes\n",
-    "  dans un fichier CSV et le recharger (2 min.)"
+    "  dans un fichier CSV et le recharger (3 min.)"
    ]
   },
   {
@@ -204,12 +207,12 @@
    },
    "outputs": [],
    "source": [
-    "# Écrire dans un fichier - garder l'index 'year' cette fois-ci\n",
-    "fichier_csv = 'poids_par_annee.csv'\n",
-    "poids_annee.to_csv(fichier_csv, index=True)\n",
+    "# Écrire dans un fichier - garder l'index 'species_id' cette fois-ci\n",
+    "fichier_csv = 'poids_par_espece.csv'\n",
+    "poids_espece.to_csv(fichier_csv, index=True)\n",
     "\n",
     "# Relire les données, fournir le nom de l'index\n",
-    "pd.read_csv(fichier_csv, index_col='year')"
+    "pd.read_csv(fichier_csv, index_col='species_id')"
    ]
   },
   {
diff --git a/src/04-combine.ipynb b/src/04-combine.ipynb