Skip to content

Commit 84f55be

Browse files
committed
Fix for data download notebook
1 parent f541412 commit 84f55be

File tree

1 file changed

+28
-41
lines changed

1 file changed

+28
-41
lines changed

lsd/tutorial/notebooks/lsd_data_download.ipynb

Lines changed: 28 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
"metadata": {
55
"colab": {
66
"name": "lsd_data_download.ipynb",
7-
"provenance": [],
8-
"collapsed_sections": []
7+
"provenance": []
98
},
109
"kernelspec": {
1110
"name": "python3",
@@ -30,14 +29,14 @@
3029
" * the s3 bucket contains about 1.7 tb of data, so bear in mind before downloading the whole bucket\n",
3130
" * boto3 is a python api useful for accessing s3 data, you can use it to find sizes of folders before committing to downloading a directory (here is an example https://stackoverflow.com/questions/49759940/how-to-find-size-of-a-folder-inside-an-s3-bucket), and there are examples for downloading some data below (links at bottom of cell)\n",
3231
" * s3 bucket contains a mixture of zarr, n5, nml, json, bson\n",
33-
" * google bucket volume metadata is stored in info files. zarr/n5 metadata is stored in attributes files. To see offsets/shapes of cropped data (i.e hemi roi 1) check these files. \n",
32+
" * google bucket volume metadata is stored in info files. zarr/n5 metadata is stored in attributes files. To see offsets/shapes of cropped data (i.e hemi roi 1) check these files.\n",
3433
" * google bucket volumes are in xyz voxel space.\n",
3534
" * zarr volumes are in zyx world space (i.e nanometers)\n",
3635
" * n5 volumes are in xyz world space\n",
3736
"\n",
3837
"---\n",
3938
"\n",
40-
"###Key: \n",
39+
"###Key:\n",
4140
" * s3 = amazon s3 bucket\n",
4241
" * gb = google bucket\n",
4342
"\n",
@@ -133,17 +132,11 @@
133132
{
134133
"cell_type": "code",
135134
"metadata": {
136-
"id": "n2O8isY0fFAJ"
135+
"id": "n2O8isY0fFAJ",
136+
"collapsed": true
137137
},
138138
"source": [
139-
"!pip install boto3\n",
140-
"!pip install cloud-volume\n",
141-
"!pip install matplotlib\n",
142-
"!pip install pandas\n",
143-
"!pip install plotly\n",
144-
"!pip install requests\n",
145-
"!pip install scikit-image\n",
146-
"!pip install zarr\n",
139+
"!pip install boto3 cloud-volume matplotlib pandas plotly pymongo requests scikit-image zarr\n",
147140
"\n",
148141
"# since colab has its own versions pre-installed but we\n",
149142
"# need some other versions, the runtime needs to be restarted\n",
@@ -170,9 +163,11 @@
170163
"import pandas as pd\n",
171164
"import plotly.express as px\n",
172165
"import zarr\n",
166+
"from botocore import UNSIGNED\n",
167+
"from botocore.client import Config\n",
173168
"from cloudvolume import CloudVolume"
174169
],
175-
"execution_count": null,
170+
"execution_count": 44,
176171
"outputs": []
177172
},
178173
{
@@ -240,7 +235,7 @@
240235
" # ensure there is a voxel offset in the metadata so cloudvolume is happy\n",
241236
" for scale in vol.info['scales']:\n",
242237
" scale['voxel_offset'] = [0, 0, 0]\n",
243-
" \n",
238+
"\n",
244239
" # slice data\n",
245240
" data = vol[x0:x1, y0:y1, z0:z1]\n",
246241
"\n",
@@ -259,7 +254,7 @@
259254
"id": "IztUw3U1-56_"
260255
},
261256
"source": [
262-
"# get a random 1000x1000 voxel patch \n",
257+
"# get a random 1000x1000 voxel patch\n",
263258
"raw_data = cloud_to_np(raw_vol,1000,2000,2000,3000,300,301)\n",
264259
"seg_data = cloud_to_np(seg_vol,1000,2000,2000,3000,300,301)"
265260
],
@@ -327,10 +322,7 @@
327322
"id": "pDL38VnxhI6T"
328323
},
329324
"source": [
330-
"# set bucket credentials\n",
331-
"access_key = ''\n",
332-
"secret_key = ''\n",
333-
"bucket = ''"
325+
"bucket = 'open-neurodata'"
334326
],
335327
"execution_count": null,
336328
"outputs": []
@@ -341,16 +333,16 @@
341333
"id": "a2gTfGEvgLr0"
342334
},
343335
"source": [
344-
"# connect to client\n",
345-
"client = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)"
336+
"client = boto3.client('s3', config=Config(signature_version=UNSIGNED))"
346337
],
347338
"execution_count": null,
348339
"outputs": []
349340
},
350341
{
351342
"cell_type": "code",
352343
"metadata": {
353-
"id": "bVGfxKT2i3MX"
344+
"id": "bVGfxKT2i3MX",
345+
"collapsed": true
354346
},
355347
"source": [
356348
"# list data\n",
@@ -415,21 +407,19 @@
415407
"# function to download all files nested in a bucket path\n",
416408
"def downloadDirectory(\n",
417409
" bucket_name,\n",
418-
" path,\n",
419-
" access_key,\n",
420-
" secret_key):\n",
421-
" \n",
410+
" path):\n",
411+
"\n",
422412
" resource = boto3.resource(\n",
423413
" 's3',\n",
424-
" aws_access_key_id=access_key,\n",
425-
" aws_secret_access_key=secret_key)\n",
426-
" \n",
414+
" config=Config(signature_version=UNSIGNED)\n",
415+
" )\n",
416+
"\n",
427417
" bucket = resource.Bucket(bucket_name)\n",
428418
"\n",
429419
" for obj in bucket.objects.filter(Prefix=path):\n",
430420
" if not os.path.exists(os.path.dirname(obj.key)):\n",
431421
" os.makedirs(os.path.dirname(obj.key))\n",
432-
" \n",
422+
"\n",
433423
" key = obj.key\n",
434424
"\n",
435425
" print(f'Downloading {key}')\n",
@@ -447,9 +437,8 @@
447437
"# download example fib25 training data\n",
448438
"downloadDirectory(\n",
449439
" bucket,\n",
450-
" 'funke/fib25/training/trvol-250-1.zarr',\n",
451-
" access_key,\n",
452-
" secret_key)"
440+
" 'funke/fib25/training/trvol-250-1.zarr'\n",
441+
")"
453442
],
454443
"execution_count": null,
455444
"outputs": []
@@ -583,9 +572,8 @@
583572
"# download example hemi region adjacency graph\n",
584573
"downloadDirectory(\n",
585574
" bucket,\n",
586-
" 'funke/hemi/testing/segmentations/data.zarr/rags/ACLSD/hemi_affs_from_lsd_200k_roi_1',\n",
587-
" access_key,\n",
588-
" secret_key)"
575+
" 'funke/hemi/testing/segmentations/data.zarr/rags/ACLSD/hemi_affs_from_lsd_200k_roi_1'\n",
576+
")"
589577
],
590578
"execution_count": null,
591579
"outputs": []
@@ -726,9 +714,8 @@
726714
"# download example zebrafinch validation skeletons\n",
727715
"downloadDirectory(\n",
728716
" bucket,\n",
729-
" 'funke/zebrafinch/testing/ground_truth/validation/consolidated/zebrafinch_gt_skeletons_new_gt_9_9_20_validation',\n",
730-
" access_key,\n",
731-
" secret_key)"
717+
" 'funke/zebrafinch/testing/ground_truth/validation/consolidated/zebrafinch_gt_skeletons_new_gt_9_9_20_validation'\n",
718+
")"
732719
],
733720
"execution_count": null,
734721
"outputs": []
@@ -814,4 +801,4 @@
814801
"outputs": []
815802
}
816803
]
817-
}
804+
}

0 commit comments

Comments
 (0)