|
127 | 127 | }, |
128 | 128 | "source": [ |
129 | 129 | "## Exercise - Concatenating DataFrames\n", |
130 | | - "* In `surveys_df`, select rows where the year is 2001.\n", |
131 | | - " Do the same for year 2002.\n", |
132 | | - "* Concatenate both dataframes.\n", |
| 130 | + "* Load the data from all CSV files in the directory\n", |
| 131 | + " `../data/by_species_id/` and accumulate them in `surveys_sp`.\n", |
| 132 | + "* Reset the index while dropping the accumulated one.\n", |
133 | 133 | "\n", |
134 | | - "(3 min.)" |
| 134 | + "(4 min.)" |
135 | 135 | ] |
136 | 136 | }, |
137 | 137 | { |
|
146 | 146 | }, |
147 | 147 | "outputs": [], |
148 | 148 | "source": [ |
149 | | - "# Get data for each year\n", |
150 | | - "survey2001 = surveys_df[surveys_df['year'] == 2001]\n", |
151 | | - "survey2002 = surveys_df[surveys_df['year'] == 2002]\n", |
| 149 | + "surveys_sp = pd.DataFrame() # Empty DataFrame\n", |
| 150 | + "\n", |
| 151 | + "for filename in glob('../data/by_species_id/*.csv'):\n", |
| 152 | + " new_df = pd.read_csv(filename)\n", |
| 153 | + " surveys_sp = pd.concat([surveys_sp, new_df], axis='index')\n", |
152 | 154 | "\n", |
153 | | - "# Concatenate vertically\n", |
154 | | - "survey_all = pd.concat([survey2001, survey2002], axis='index')" |
| 155 | + "surveys_sp = surveys_sp.reset_index(drop=True)\n", |
| 156 | + "surveys_sp" |
155 | 157 | ] |
156 | 158 | }, |
157 | 159 | { |
|
161 | 163 | "lang": "en" |
162 | 164 | }, |
163 | 165 | "source": [ |
164 | | - "* Compute the average weight by sex for each year. (1 min.)" |
| 166 | + "* Compute the average weight by sex for each species. (1 min.)" |
165 | 167 | ] |
166 | 168 | }, |
167 | 169 | { |
|
176 | 178 | }, |
177 | 179 | "outputs": [], |
178 | 180 | "source": [ |
179 | | - "# Get the average weight by sex for each year\n", |
180 | | - "weight_year = survey_all.groupby(['year', 'sex'])['weight'].mean()\n", |
181 | | - "weight_year = weight_year.unstack()\n", |
182 | | - "weight_year" |
| 181 | + "# Get the average weight by sex for each species\n", |
| 182 | + "weight_species = surveys_sp.groupby(\n", |
| 183 | + " ['species_id', 'sex'])['weight'].mean().unstack()\n", |
| 184 | + "weight_species" |
183 | 185 | ] |
184 | 186 | }, |
185 | 187 | { |
|
189 | 191 | "lang": "en" |
190 | 192 | }, |
191 | 193 | "source": [ |
192 | | - "* Export your results as a CSV and make sure\n", |
193 | | - " it reads back into python properly. (2 min.)" |
| 194 | + "* Export your results as a CSV file and make sure\n", |
| 195 | + " it reads back into python properly. (3 min.)" |
194 | 196 | ] |
195 | 197 | }, |
196 | 198 | { |
|
206 | 208 | "outputs": [], |
207 | 209 | "source": [ |
208 | 210 | "# Writing to file while keeping the index\n", |
209 | | - "csv_file = 'weight_for_year.csv'\n", |
210 | | - "weight_year.to_csv(csv_file, index=True)\n", |
| 211 | + "csv_file = 'weight_by_species.csv'\n", |
| 212 | + "weight_species.to_csv(csv_file, index=True)\n", |
211 | 213 | "\n", |
212 | 214 | "# Reading it back in with a specified index column\n", |
213 | | - "pd.read_csv(csv_file, index_col='year')" |
| 215 | + "pd.read_csv(csv_file, index_col='species_id')" |
214 | 216 | ] |
215 | 217 | }, |
216 | 218 | { |
|
0 commit comments