|
35 | 35 | }, |
36 | 36 | { |
37 | 37 | "cell_type": "code", |
38 | | - "execution_count": 27, |
| 38 | + "execution_count": null, |
39 | 39 | "metadata": { |
40 | 40 | "collapsed": false, |
41 | 41 | "deletable": true, |
|
49 | 49 | }, |
50 | 50 | { |
51 | 51 | "cell_type": "code", |
52 | | - "execution_count": 28, |
| 52 | + "execution_count": null, |
53 | 53 | "metadata": { |
54 | 54 | "collapsed": false, |
55 | 55 | "deletable": true, |
|
82 | 82 | }, |
83 | 83 | { |
84 | 84 | "cell_type": "code", |
85 | | - "execution_count": 29, |
| 85 | + "execution_count": null, |
86 | 86 | "metadata": { |
87 | 87 | "collapsed": false, |
88 | 88 | "deletable": true, |
89 | 89 | "editable": true |
90 | 90 | }, |
91 | | - "outputs": [ |
92 | | - { |
93 | | - "name": "stderr", |
94 | | - "output_type": "stream", |
95 | | - "text": [ |
96 | | - "<string>:2: DtypeWarning: Columns (49) have mixed types. Specify dtype option on import or set low_memory=False.\n" |
97 | | - ] |
98 | | - }, |
99 | | - { |
100 | | - "name": "stdout", |
101 | | - "output_type": "stream", |
102 | | - "text": [ |
103 | | - "CPU times: user 15.5 s, sys: 1.03 s, total: 16.5 s\n", |
104 | | - "Wall time: 16.7 s\n" |
105 | | - ] |
106 | | - } |
107 | | - ], |
| 91 | + "outputs": [], |
108 | 92 | "source": [ |
109 | 93 | "%%time\n", |
110 | 94 | "df = pd.read_csv('pokemon/300k_csv.zip')\n", |
|
117 | 101 | }, |
118 | 102 | { |
119 | 103 | "cell_type": "code", |
120 | | - "execution_count": 30, |
| 104 | + "execution_count": null, |
121 | 105 | "metadata": { |
122 | 106 | "collapsed": false, |
123 | 107 | "deletable": true, |
124 | 108 | "editable": true |
125 | 109 | }, |
126 | | - "outputs": [ |
127 | | - { |
128 | | - "name": "stdout", |
129 | | - "output_type": "stream", |
130 | | - "text": [ |
131 | | - "<class 'pandas.core.frame.DataFrame'>\n", |
132 | | - "RangeIndex: 296021 entries, 0 to 296020\n", |
133 | | - "Columns: 208 entries, pokemonId to target\n", |
134 | | - "dtypes: bool(168), float64(8), int64(17), object(8), uint64(7)\n", |
135 | | - "memory usage: 137.8+ MB\n" |
136 | | - ] |
137 | | - } |
138 | | - ], |
| 110 | + "outputs": [], |
139 | 111 | "source": [ |
140 | 112 | "df.info()" |
141 | 113 | ] |
142 | 114 | }, |
143 | 115 | { |
144 | 116 | "cell_type": "code", |
145 | | - "execution_count": 31, |
| 117 | + "execution_count": null, |
146 | 118 | "metadata": { |
147 | 119 | "collapsed": false, |
148 | 120 | "deletable": true, |
149 | 121 | "editable": true, |
150 | 122 | "scrolled": true |
151 | 123 | }, |
152 | | - "outputs": [ |
153 | | - { |
154 | | - "data": { |
155 | | - "text/html": [ |
156 | | - "<div>\n", |
157 | | - "<style>\n", |
158 | | - " .dataframe thead tr:only-child th {\n", |
159 | | - " text-align: right;\n", |
160 | | - " }\n", |
161 | | - "\n", |
162 | | - " .dataframe thead th {\n", |
163 | | - " text-align: left;\n", |
164 | | - " }\n", |
165 | | - "\n", |
166 | | - " .dataframe tbody tr th {\n", |
167 | | - " vertical-align: top;\n", |
168 | | - " }\n", |
169 | | - "</style>\n", |
170 | | - "<table border=\"1\" class=\"dataframe\">\n", |
171 | | - " <thead>\n", |
172 | | - " <tr style=\"text-align: right;\">\n", |
173 | | - " <th></th>\n", |
174 | | - " <th>pokemonId</th>\n", |
175 | | - " <th>latitude</th>\n", |
176 | | - " <th>longitude</th>\n", |
177 | | - " <th>appearedLocalTime</th>\n", |
178 | | - " <th>_id</th>\n", |
179 | | - " <th>cellId_90m</th>\n", |
180 | | - " <th>cellId_180m</th>\n", |
181 | | - " <th>cellId_370m</th>\n", |
182 | | - " <th>cellId_730m</th>\n", |
183 | | - " <th>cellId_1460m</th>\n", |
184 | | - " <th>...</th>\n", |
185 | | - " <th>cooc_143</th>\n", |
186 | | - " <th>cooc_144</th>\n", |
187 | | - " <th>cooc_145</th>\n", |
188 | | - " <th>cooc_146</th>\n", |
189 | | - " <th>cooc_147</th>\n", |
190 | | - " <th>cooc_148</th>\n", |
191 | | - " <th>cooc_149</th>\n", |
192 | | - " <th>cooc_150</th>\n", |
193 | | - " <th>cooc_151</th>\n", |
194 | | - " <th>target</th>\n", |
195 | | - " </tr>\n", |
196 | | - " </thead>\n", |
197 | | - " <tbody>\n", |
198 | | - " <tr>\n", |
199 | | - " <th>0</th>\n", |
200 | | - " <td>16</td>\n", |
201 | | - " <td>20.525745</td>\n", |
202 | | - " <td>-97.460829</td>\n", |
203 | | - " <td>2016-09-08T03:57:45</td>\n", |
204 | | - " <td>NTgxMDkzOTk4MTM5MjUwMjIzNw==</td>\n", |
205 | | - " <td>9645139108510564000</td>\n", |
206 | | - " <td>9645139108711890000</td>\n", |
207 | | - " <td>9645139108443455000</td>\n", |
208 | | - " <td>9645139109517197000</td>\n", |
209 | | - " <td>9645139113812165000</td>\n", |
210 | | - " <td>...</td>\n", |
211 | | - " <td>False</td>\n", |
212 | | - " <td>False</td>\n", |
213 | | - " <td>False</td>\n", |
214 | | - " <td>False</td>\n", |
215 | | - " <td>False</td>\n", |
216 | | - " <td>False</td>\n", |
217 | | - " <td>False</td>\n", |
218 | | - " <td>False</td>\n", |
219 | | - " <td>False</td>\n", |
220 | | - " <td>16</td>\n", |
221 | | - " </tr>\n", |
222 | | - " <tr>\n", |
223 | | - " <th>1</th>\n", |
224 | | - " <td>133</td>\n", |
225 | | - " <td>20.523695</td>\n", |
226 | | - " <td>-97.461167</td>\n", |
227 | | - " <td>2016-09-08T03:57:37</td>\n", |
228 | | - " <td>OTQ1NDgzODc1MjM3NDEzMTI2MQ==</td>\n", |
229 | | - " <td>9645139109852742000</td>\n", |
230 | | - " <td>9645139109785633000</td>\n", |
231 | | - " <td>9645139110590940000</td>\n", |
232 | | - " <td>9645139109517197000</td>\n", |
233 | | - " <td>9645139113812165000</td>\n", |
234 | | - " <td>...</td>\n", |
235 | | - " <td>False</td>\n", |
236 | | - " <td>False</td>\n", |
237 | | - " <td>False</td>\n", |
238 | | - " <td>False</td>\n", |
239 | | - " <td>False</td>\n", |
240 | | - " <td>False</td>\n", |
241 | | - " <td>False</td>\n", |
242 | | - " <td>False</td>\n", |
243 | | - " <td>False</td>\n", |
244 | | - " <td>133</td>\n", |
245 | | - " </tr>\n", |
246 | | - " <tr>\n", |
247 | | - " <th>2</th>\n", |
248 | | - " <td>16</td>\n", |
249 | | - " <td>38.903590</td>\n", |
250 | | - " <td>-77.199780</td>\n", |
251 | | - " <td>2016-09-08T03:57:25</td>\n", |
252 | | - " <td>NTQ0OTQ0NDA1Nzg2ODg3OTg2OQ==</td>\n", |
253 | | - " <td>9923201472785285000</td>\n", |
254 | | - " <td>9923201472986612000</td>\n", |
255 | | - " <td>9923201473791918000</td>\n", |
256 | | - " <td>9923201477013144000</td>\n", |
257 | | - " <td>9923201481308110000</td>\n", |
258 | | - " <td>...</td>\n", |
259 | | - " <td>False</td>\n", |
260 | | - " <td>False</td>\n", |
261 | | - " <td>False</td>\n", |
262 | | - " <td>False</td>\n", |
263 | | - " <td>False</td>\n", |
264 | | - " <td>False</td>\n", |
265 | | - " <td>False</td>\n", |
266 | | - " <td>False</td>\n", |
267 | | - " <td>False</td>\n", |
268 | | - " <td>16</td>\n", |
269 | | - " </tr>\n", |
270 | | - " <tr>\n", |
271 | | - " <th>3</th>\n", |
272 | | - " <td>13</td>\n", |
273 | | - " <td>47.665903</td>\n", |
274 | | - " <td>-122.312561</td>\n", |
275 | | - " <td>2016-09-08T03:56:22</td>\n", |
276 | | - " <td>NTU2MTU1NDM4NzA2MDk1MDcxNw==</td>\n", |
277 | | - " <td>6093392705025474600</td>\n", |
278 | | - " <td>6093392705092583400</td>\n", |
279 | | - " <td>6093392705897889800</td>\n", |
280 | | - " <td>6093392702676664300</td>\n", |
281 | | - " <td>6093392715561566200</td>\n", |
282 | | - " <td>...</td>\n", |
283 | | - " <td>False</td>\n", |
284 | | - " <td>False</td>\n", |
285 | | - " <td>False</td>\n", |
286 | | - " <td>False</td>\n", |
287 | | - " <td>False</td>\n", |
288 | | - " <td>False</td>\n", |
289 | | - " <td>False</td>\n", |
290 | | - " <td>False</td>\n", |
291 | | - " <td>False</td>\n", |
292 | | - " <td>13</td>\n", |
293 | | - " </tr>\n", |
294 | | - " <tr>\n", |
295 | | - " <th>4</th>\n", |
296 | | - " <td>133</td>\n", |
297 | | - " <td>47.666454</td>\n", |
298 | | - " <td>-122.311628</td>\n", |
299 | | - " <td>2016-09-08T03:56:08</td>\n", |
300 | | - " <td>MTY2ODg4MTAzMTczMDE0MTUwNTM=</td>\n", |
301 | | - " <td>6093392707709829100</td>\n", |
302 | | - " <td>6093392707776938000</td>\n", |
303 | | - " <td>6093392708045373400</td>\n", |
304 | | - " <td>6093392711266598900</td>\n", |
305 | | - " <td>6093392715561566200</td>\n", |
306 | | - " <td>...</td>\n", |
307 | | - " <td>False</td>\n", |
308 | | - " <td>False</td>\n", |
309 | | - " <td>False</td>\n", |
310 | | - " <td>False</td>\n", |
311 | | - " <td>False</td>\n", |
312 | | - " <td>False</td>\n", |
313 | | - " <td>False</td>\n", |
314 | | - " <td>False</td>\n", |
315 | | - " <td>False</td>\n", |
316 | | - " <td>133</td>\n", |
317 | | - " </tr>\n", |
318 | | - " </tbody>\n", |
319 | | - "</table>\n", |
320 | | - "<p>5 rows × 208 columns</p>\n", |
321 | | - "</div>" |
322 | | - ], |
323 | | - "text/plain": [ |
324 | | - " pokemonId latitude longitude appearedLocalTime \\\n", |
325 | | - "0 16 20.525745 -97.460829 2016-09-08T03:57:45 \n", |
326 | | - "1 133 20.523695 -97.461167 2016-09-08T03:57:37 \n", |
327 | | - "2 16 38.903590 -77.199780 2016-09-08T03:57:25 \n", |
328 | | - "3 13 47.665903 -122.312561 2016-09-08T03:56:22 \n", |
329 | | - "4 133 47.666454 -122.311628 2016-09-08T03:56:08 \n", |
330 | | - "\n", |
331 | | - " _id cellId_90m cellId_180m \\\n", |
332 | | - "0 NTgxMDkzOTk4MTM5MjUwMjIzNw== 9645139108510564000 9645139108711890000 \n", |
333 | | - "1 OTQ1NDgzODc1MjM3NDEzMTI2MQ== 9645139109852742000 9645139109785633000 \n", |
334 | | - "2 NTQ0OTQ0NDA1Nzg2ODg3OTg2OQ== 9923201472785285000 9923201472986612000 \n", |
335 | | - "3 NTU2MTU1NDM4NzA2MDk1MDcxNw== 6093392705025474600 6093392705092583400 \n", |
336 | | - "4 MTY2ODg4MTAzMTczMDE0MTUwNTM= 6093392707709829100 6093392707776938000 \n", |
337 | | - "\n", |
338 | | - " cellId_370m cellId_730m cellId_1460m ... \\\n", |
339 | | - "0 9645139108443455000 9645139109517197000 9645139113812165000 ... \n", |
340 | | - "1 9645139110590940000 9645139109517197000 9645139113812165000 ... \n", |
341 | | - "2 9923201473791918000 9923201477013144000 9923201481308110000 ... \n", |
342 | | - "3 6093392705897889800 6093392702676664300 6093392715561566200 ... \n", |
343 | | - "4 6093392708045373400 6093392711266598900 6093392715561566200 ... \n", |
344 | | - "\n", |
345 | | - " cooc_143 cooc_144 cooc_145 cooc_146 cooc_147 cooc_148 cooc_149 \\\n", |
346 | | - "0 False False False False False False False \n", |
347 | | - "1 False False False False False False False \n", |
348 | | - "2 False False False False False False False \n", |
349 | | - "3 False False False False False False False \n", |
350 | | - "4 False False False False False False False \n", |
351 | | - "\n", |
352 | | - " cooc_150 cooc_151 target \n", |
353 | | - "0 False False 16 \n", |
354 | | - "1 False False 133 \n", |
355 | | - "2 False False 16 \n", |
356 | | - "3 False False 13 \n", |
357 | | - "4 False False 133 \n", |
358 | | - "\n", |
359 | | - "[5 rows x 208 columns]" |
360 | | - ] |
361 | | - }, |
362 | | - "execution_count": 31, |
363 | | - "metadata": {}, |
364 | | - "output_type": "execute_result" |
365 | | - } |
366 | | - ], |
| 124 | + "outputs": [], |
367 | 125 | "source": [ |
368 | 126 | "df.head()" |
369 | 127 | ] |
|
390 | 148 | }, |
391 | 149 | { |
392 | 150 | "cell_type": "code", |
393 | | - "execution_count": 32, |
| 151 | + "execution_count": null, |
394 | 152 | "metadata": { |
395 | 153 | "collapsed": true, |
396 | 154 | "deletable": true, |
|
406 | 164 | }, |
407 | 165 | { |
408 | 166 | "cell_type": "code", |
409 | | - "execution_count": 34, |
| 167 | + "execution_count": null, |
410 | 168 | "metadata": { |
411 | 169 | "collapsed": false, |
412 | 170 | "deletable": true, |
413 | 171 | "editable": true, |
414 | 172 | "scrolled": true |
415 | 173 | }, |
416 | | - "outputs": [ |
417 | | - { |
418 | | - "name": "stdout", |
419 | | - "output_type": "stream", |
420 | | - "text": [ |
421 | | - "CPU times: user 9.55 s, sys: 562 ms, total: 10.1 s\n", |
422 | | - "Wall time: 10.3 s\n" |
423 | | - ] |
424 | | - } |
425 | | - ], |
| 174 | + "outputs": [], |
426 | 175 | "source": [ |
427 | 176 | "%%time\n", |
428 | 177 | "complib, codec = 'blosc', 'zstd'\n", |
|
436 | 185 | }, |
437 | 186 | { |
438 | 187 | "cell_type": "code", |
439 | | - "execution_count": 35, |
| 188 | + "execution_count": null, |
440 | 189 | "metadata": { |
441 | 190 | "collapsed": false, |
442 | 191 | "deletable": true, |
443 | 192 | "editable": true |
444 | 193 | }, |
445 | | - "outputs": [ |
446 | | - { |
447 | | - "name": "stdout", |
448 | | - "output_type": "stream", |
449 | | - "text": [ |
450 | | - "hdfstore:\r\n", |
451 | | - "total 70592\r\n", |
452 | | - "-rw-r--r-- 1 faltet staff 34M May 18 13:27 blosc-zstd-6.h5\r\n", |
453 | | - "\r\n", |
454 | | - "pokemon:\r\n", |
455 | | - "total 77016\r\n", |
456 | | - "-rw-r--r-- 1 faltet staff 38M May 17 12:28 300k_csv.zip\r\n", |
457 | | - "drwxr-xr-x 153 faltet staff 5.1K May 17 12:28 \u001b[34msprites\u001b[m\u001b[m/\r\n" |
458 | | - ] |
459 | | - } |
460 | | - ], |
| 194 | + "outputs": [], |
461 | 195 | "source": [ |
462 | 196 | "%ls -lh {data_dir} pokemon" |
463 | 197 | ] |
|
553 | 287 | "cell_type": "code", |
554 | 288 | "execution_count": null, |
555 | 289 | "metadata": { |
556 | | - "collapsed": true |
| 290 | + "collapsed": true, |
| 291 | + "deletable": true, |
| 292 | + "editable": true |
557 | 293 | }, |
558 | 294 | "outputs": [], |
559 | 295 | "source": [] |
|
0 commit comments