Skip to content

Commit d8a599e

Browse files
committed
Exercice de concaténation révisé
1 parent 4ea30a8 commit d8a599e

5 files changed

Lines changed: 151 additions & 132 deletions

File tree

en/04-combine.ipynb

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,11 @@
127127
},
128128
"source": [
129129
"## Exercise - Concatenating DataFrames\n",
130-
"* In `surveys_df`, select rows where the year is 2001.\n",
131-
" Do the same for year 2002.\n",
132-
"* Concatenate both dataframes.\n",
130+
"* Load the data from all CSV files in the directory\n",
131+
" `../data/by_species_id/` and accumulate them in `surveys_sp`.\n",
132+
"* Reset the index while dropping the accumulated one.\n",
133133
"\n",
134-
"(3 min.)"
134+
"(4 min.)"
135135
]
136136
},
137137
{
@@ -146,12 +146,14 @@
146146
},
147147
"outputs": [],
148148
"source": [
149-
"# Get data for each year\n",
150-
"survey2001 = surveys_df[surveys_df['year'] ###]\n",
151-
"survey2002 = surveys_df[surveys_df['year'] ###]\n",
149+
"surveys_sp = pd.DataFrame() # Empty DataFrame\n",
150+
"\n",
151+
"for filename in ###('../data/by_species_id/*.csv'):\n",
152+
" new_df = pd.read_csv(filename)\n",
153+
" surveys_sp = pd.###([###, new_df], ###='index')\n",
152154
"\n",
153-
"# Concatenate vertically\n",
154-
"survey_all = ###"
155+
"surveys_sp = surveys_sp.###(drop=###)\n",
156+
"surveys_sp"
155157
]
156158
},
157159
{
@@ -161,7 +163,7 @@
161163
"lang": "en"
162164
},
163165
"source": [
164-
"* Compute the average weight by sex for each year. (1 min.)"
166+
"* Compute the average weight by sex for each species. (1 min.)"
165167
]
166168
},
167169
{
@@ -176,10 +178,10 @@
176178
},
177179
"outputs": [],
178180
"source": [
179-
"# Get the average weight by sex for each year\n",
180-
"weight_year = survey_all.groupby(['year', 'sex'])###\n",
181-
"weight_year = weight_year.unstack()\n",
182-
"weight_year"
181+
"# Get the average weight by sex for each species\n",
182+
"weight_species = surveys_sp.groupby(\n",
183+
" ['species_id', 'sex'])###.unstack()\n",
184+
"weight_species"
183185
]
184186
},
185187
{
@@ -189,8 +191,8 @@
189191
"lang": "en"
190192
},
191193
"source": [
192-
"* Export your results as a CSV and make sure\n",
193-
" it reads back into python properly. (2 min.)"
194+
"* Export your results as a CSV file and make sure\n",
195+
" it reads back into python properly. (3 min.)"
194196
]
195197
},
196198
{
@@ -206,8 +208,8 @@
206208
"outputs": [],
207209
"source": [
208210
"# Writing to file while keeping the index\n",
209-
"csv_file = 'weight_for_year.csv'\n",
210-
"weight_year###\n",
211+
"csv_file = 'weight_by_species.csv'\n",
212+
"weight_species###\n",
211213
"\n",
212214
"# Reading it back in with a specified index column\n",
213215
"pd.read_csv(csv_file, index_col=###)"

fr/04-combine.ipynb

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,11 @@
127127
},
128128
"source": [
129129
"## Exercice - Concaténer des DataFrames\n",
130-
"* Dans `surveys_df`, sélectionnez individuellement les enregistrements des années 2001 et 2002\n",
131-
"* Concaténez les deux dataframes verticalement\n",
130+
"* Chargez les données de tous les fichiers CSV du répertoire\n",
131+
" `../data/by_species_id/` et accumulez-les dans `surveys_sp`.\n",
132+
"* Réinitialisez l'index sans préserver celui accumulé.\n",
132133
"\n",
133-
"(3 min.)"
134+
"(4 min.)"
134135
]
135136
},
136137
{
@@ -145,12 +146,14 @@
145146
},
146147
"outputs": [],
147148
"source": [
148-
"# Obtenir les données pour chaque année\n",
149-
"annee2001 = surveys_df[surveys_df['year'] ###]\n",
150-
"annee2002 = surveys_df[surveys_df['year'] ###]\n",
149+
"surveys_sp = pd.DataFrame() # DataFrame vide\n",
150+
"\n",
151+
"for fichier in ###('../data/by_species_id/*.csv'):\n",
152+
" nouveau_df = pd.read_csv(fichier)\n",
153+
" surveys_sp = pd.###([###, nouveau_df], ###='index')\n",
151154
"\n",
152-
"# Concaténer verticalement\n",
153-
"deux_annees = ###"
155+
"surveys_sp = surveys_sp.###(drop=###)\n",
156+
"surveys_sp"
154157
]
155158
},
156159
{
@@ -160,7 +163,7 @@
160163
"lang": "fr"
161164
},
162165
"source": [
163-
"* Calculez le poids moyen selon l'année et le sexe (1 min.)"
166+
"* Calculez le poids moyen selon l'espèce et le sexe (1 min.)"
164167
]
165168
},
166169
{
@@ -175,10 +178,10 @@
175178
},
176179
"outputs": [],
177180
"source": [
178-
"# Calculer le poids moyen par année et par sexe\n",
179-
"poids_annee = deux_annees.groupby(['year', 'sex'])###\n",
180-
"poids_annee = poids_annee.unstack()\n",
181-
"poids_annee"
181+
"# Calculer le poids moyen par espèce et par sexe\n",
182+
"poids_espece = surveys_sp.groupby(\n",
183+
" ['species_id', 'sex'])###.unstack()\n",
184+
"poids_espece"
182185
]
183186
},
184187
{
@@ -189,7 +192,7 @@
189192
},
190193
"source": [
191194
"* Sauvegardez le tableau des moyennes\n",
192-
" dans un fichier CSV et le recharger (2 min.)"
195+
" dans un fichier CSV et le recharger (3 min.)"
193196
]
194197
},
195198
{
@@ -204,9 +207,9 @@
204207
},
205208
"outputs": [],
206209
"source": [
207-
"# Écrire dans un fichier - garder l'index 'year' cette fois-ci\n",
208-
"fichier_csv = 'poids_par_annee.csv'\n",
209-
"poids_annee###\n",
210+
"# Écrire dans un fichier - garder l'index 'species_id' cette fois-ci\n",
211+
"fichier_csv = 'poids_par_espece.csv'\n",
212+
"poids_espece###\n",
210213
"\n",
211214
"# Relire les données, fournir le nom de l'index\n",
212215
"pd.read_csv(fichier_csv, index_col=###)"

solution-en/04-combine.ipynb

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,11 @@
127127
},
128128
"source": [
129129
"## Exercise - Concatenating DataFrames\n",
130-
"* In `surveys_df`, select rows where the year is 2001.\n",
131-
" Do the same for year 2002.\n",
132-
"* Concatenate both dataframes.\n",
130+
"* Load the data from all CSV files in the directory\n",
131+
" `../data/by_species_id/` and accumulate them in `surveys_sp`.\n",
132+
"* Reset the index while dropping the accumulated one.\n",
133133
"\n",
134-
"(3 min.)"
134+
"(4 min.)"
135135
]
136136
},
137137
{
@@ -146,12 +146,14 @@
146146
},
147147
"outputs": [],
148148
"source": [
149-
"# Get data for each year\n",
150-
"survey2001 = surveys_df[surveys_df['year'] == 2001]\n",
151-
"survey2002 = surveys_df[surveys_df['year'] == 2002]\n",
149+
"surveys_sp = pd.DataFrame() # Empty DataFrame\n",
150+
"\n",
151+
"for filename in glob('../data/by_species_id/*.csv'):\n",
152+
" new_df = pd.read_csv(filename)\n",
153+
" surveys_sp = pd.concat([surveys_sp, new_df], axis='index')\n",
152154
"\n",
153-
"# Concatenate vertically\n",
154-
"survey_all = pd.concat([survey2001, survey2002], axis='index')"
155+
"surveys_sp = surveys_sp.reset_index(drop=True)\n",
156+
"surveys_sp"
155157
]
156158
},
157159
{
@@ -161,7 +163,7 @@
161163
"lang": "en"
162164
},
163165
"source": [
164-
"* Compute the average weight by sex for each year. (1 min.)"
166+
"* Compute the average weight by sex for each species. (1 min.)"
165167
]
166168
},
167169
{
@@ -176,10 +178,10 @@
176178
},
177179
"outputs": [],
178180
"source": [
179-
"# Get the average weight by sex for each year\n",
180-
"weight_year = survey_all.groupby(['year', 'sex'])['weight'].mean()\n",
181-
"weight_year = weight_year.unstack()\n",
182-
"weight_year"
181+
"# Get the average weight by sex for each species\n",
182+
"weight_species = surveys_sp.groupby(\n",
183+
" ['species_id', 'sex'])['weight'].mean().unstack()\n",
184+
"weight_species"
183185
]
184186
},
185187
{
@@ -189,8 +191,8 @@
189191
"lang": "en"
190192
},
191193
"source": [
192-
"* Export your results as a CSV and make sure\n",
193-
" it reads back into python properly. (2 min.)"
194+
"* Export your results as a CSV file and make sure\n",
195+
" it reads back into python properly. (3 min.)"
194196
]
195197
},
196198
{
@@ -206,11 +208,11 @@
206208
"outputs": [],
207209
"source": [
208210
"# Writing to file while keeping the index\n",
209-
"csv_file = 'weight_for_year.csv'\n",
210-
"weight_year.to_csv(csv_file, index=True)\n",
211+
"csv_file = 'weight_by_species.csv'\n",
212+
"weight_species.to_csv(csv_file, index=True)\n",
211213
"\n",
212214
"# Reading it back in with a specified index column\n",
213-
"pd.read_csv(csv_file, index_col='year')"
215+
"pd.read_csv(csv_file, index_col='species_id')"
214216
]
215217
},
216218
{

solution-fr/04-combine.ipynb

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,11 @@
127127
},
128128
"source": [
129129
"## Exercice - Concaténer des DataFrames\n",
130-
"* Dans `surveys_df`, sélectionnez individuellement les enregistrements des années 2001 et 2002\n",
131-
"* Concaténez les deux dataframes verticalement\n",
130+
"* Chargez les données de tous les fichiers CSV du répertoire\n",
131+
" `../data/by_species_id/` et accumulez-les dans `surveys_sp`.\n",
132+
"* Réinitialisez l'index sans préserver celui accumulé.\n",
132133
"\n",
133-
"(3 min.)"
134+
"(4 min.)"
134135
]
135136
},
136137
{
@@ -145,12 +146,14 @@
145146
},
146147
"outputs": [],
147148
"source": [
148-
"# Obtenir les données pour chaque année\n",
149-
"annee2001 = surveys_df[surveys_df['year'] == 2001]\n",
150-
"annee2002 = surveys_df[surveys_df['year'] == 2002]\n",
149+
"surveys_sp = pd.DataFrame() # DataFrame vide\n",
150+
"\n",
151+
"for fichier in glob('../data/by_species_id/*.csv'):\n",
152+
" nouveau_df = pd.read_csv(fichier)\n",
153+
" surveys_sp = pd.concat([surveys_sp, nouveau_df], axis='index')\n",
151154
"\n",
152-
"# Concaténer verticalement\n",
153-
"deux_annees = pd.concat([annee2001, annee2002], axis='index')"
155+
"surveys_sp = surveys_sp.reset_index(drop=True)\n",
156+
"surveys_sp"
154157
]
155158
},
156159
{
@@ -160,7 +163,7 @@
160163
"lang": "fr"
161164
},
162165
"source": [
163-
"* Calculez le poids moyen selon l'année et le sexe (1 min.)"
166+
"* Calculez le poids moyen selon l'espèce et le sexe (1 min.)"
164167
]
165168
},
166169
{
@@ -175,10 +178,10 @@
175178
},
176179
"outputs": [],
177180
"source": [
178-
"# Calculer le poids moyen par année et par sexe\n",
179-
"poids_annee = deux_annees.groupby(['year', 'sex'])['weight'].mean()\n",
180-
"poids_annee = poids_annee.unstack()\n",
181-
"poids_annee"
181+
"# Calculer le poids moyen par espèce et par sexe\n",
182+
"poids_espece = surveys_sp.groupby(\n",
183+
" ['species_id', 'sex'])['weight'].mean().unstack()\n",
184+
"poids_espece"
182185
]
183186
},
184187
{
@@ -189,7 +192,7 @@
189192
},
190193
"source": [
191194
"* Sauvegardez le tableau des moyennes\n",
192-
" dans un fichier CSV et le recharger (2 min.)"
195+
" dans un fichier CSV et le recharger (3 min.)"
193196
]
194197
},
195198
{
@@ -204,12 +207,12 @@
204207
},
205208
"outputs": [],
206209
"source": [
207-
"# Écrire dans un fichier - garder l'index 'year' cette fois-ci\n",
208-
"fichier_csv = 'poids_par_annee.csv'\n",
209-
"poids_annee.to_csv(fichier_csv, index=True)\n",
210+
"# Écrire dans un fichier - garder l'index 'species_id' cette fois-ci\n",
211+
"fichier_csv = 'poids_par_espece.csv'\n",
212+
"poids_espece.to_csv(fichier_csv, index=True)\n",
210213
"\n",
211214
"# Relire les données, fournir le nom de l'index\n",
212-
"pd.read_csv(fichier_csv, index_col='year')"
215+
"pd.read_csv(fichier_csv, index_col='species_id')"
213216
]
214217
},
215218
{

0 commit comments

Comments
 (0)