Skip to content

Commit ec820be

Browse files
authored
Merge pull request #6 from aws-deepracer-community/enhance-data-collection
Allow additional data collection
2 parents e8b1822 + f8f9bf3 commit ec820be

File tree

6 files changed

+118
-37
lines changed

6 files changed

+118
-37
lines changed

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ boto3 = "^1.17.17"
1111
pandas = "^1.2.2"
1212
requests = "^2.25.1"
1313
joblib = "^1.0.1"
14-
deepracer-utils = "^0.19"
14+
deepracer-utils = "^0.20"
1515

1616
[tool.poetry.dev-dependencies]
1717
pytest = "^5.2"

src/deepracer_race_stats/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@
33
# Track data.
44
TRACK_FOLDER = "tracks"
55
TRACK_FOLDER_ASSETS = os.path.join(TRACK_FOLDER, "assets")
6+
TRACK_FOLDER_ROUTES = os.path.join(TRACK_FOLDER, "npy")
67
TRACK_CSV_FILEPATH = os.path.join(TRACK_FOLDER, "tracks.csv")
78

89
# Leaderboard data
910
LEADERBOARDS_FOLDER = "leaderboards"
1011
LEADERBOARDS_FOLDER_ASSETS = os.path.join(LEADERBOARDS_FOLDER, "assets")
1112
LEADERBOARDS_CSV_FILEPATH = os.path.join(LEADERBOARDS_FOLDER, "leaderboards.csv")
13+
14+
# Simapp
15+
SIMAPP_TAR_GZ = "deepracer-simapp.tar.gz"

src/deepracer_race_stats/main.py

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
import os
22
import click
3+
import boto3
4+
import glob
5+
import shutil
6+
import tarfile
37

48
from datetime import datetime
59
from joblib import Parallel, delayed
610
from deepracer_race_stats.constants import (
711
LEADERBOARDS_CSV_FILEPATH,
812
LEADERBOARDS_FOLDER_ASSETS,
913
LEADERBOARDS_FOLDER,
14+
SIMAPP_TAR_GZ,
1015
TRACK_CSV_FILEPATH,
1116
TRACK_FOLDER_ASSETS,
17+
TRACK_FOLDER_ROUTES,
1218
)
1319

1420
from deepracer_race_stats.util.csv_util import boto_response_to_csv
@@ -17,7 +23,7 @@
1723
list_leaderboards,
1824
list_tracks,
1925
)
20-
from deepracer_race_stats.util.media import fetch_media_assets
26+
from deepracer_race_stats.util.assets import fetch_assets, extract_asset_paths
2127

2228

2329
@click.group()
@@ -43,11 +49,59 @@ def track_update(ctx, output_folder):
4349

4450
boto_response_to_csv(response, output_path)
4551

46-
asset_map = {r["TrackArn"]: r["TrackPicture"] for r in response}
52+
asset_map = {}
53+
54+
for r in response:
55+
asset_map.update(extract_asset_paths(r, arn_key="TrackArn"))
4756

4857
output_assets_folder = os.path.join(output_folder, TRACK_FOLDER_ASSETS)
58+
fetch_assets(asset_map, output_assets_folder)
59+
60+
61+
@cli.command()
62+
@click.option("-o", "--output-folder", required=True)
63+
@click.option("-b", "--simapp-bucket", default="deepracer-managed-resources-us-east-1")
64+
@click.option("-k", "--simapp-key", default=SIMAPP_TAR_GZ)
65+
@click.pass_context
66+
def simapp_update(ctx, output_folder, simapp_bucket, simapp_key):
67+
# Download simapp bundle.
68+
tmp_folder = "simapp_tmp"
69+
route_prefix = "opt/install/deepracer_simulation_environment/share/deepracer_simulation_environment/routes"
70+
71+
if not os.path.exists(tmp_folder):
72+
os.makedirs(tmp_folder)
73+
74+
s3 = boto3.resource("s3")
75+
s3.Bucket(simapp_bucket).download_file(Key=simapp_key, Filename=os.path.join("simapp_tmp", SIMAPP_TAR_GZ))
76+
77+
# Specify subfolders to extract.
78+
def subfolders(tf):
79+
for m in tf.getmembers():
80+
# Routes folder with the track numpy files.
4981

50-
fetch_media_assets(asset_map, output_assets_folder)
82+
if m.path.startswith(route_prefix):
83+
yield m
84+
85+
with tarfile.open(os.path.join(tmp_folder, SIMAPP_TAR_GZ)) as f:
86+
f.extractall(path=tmp_folder)
87+
88+
# Extract specific parts of the simapp we want to store.
89+
with tarfile.open(os.path.join(tmp_folder, "bundle.tar"), mode="r") as f:
90+
f.extractall(path=tmp_folder, members=subfolders(f))
91+
92+
# Move the files we want to the raw_data folder.
93+
output_track_folder = os.path.join(output_folder, TRACK_FOLDER_ROUTES)
94+
95+
if not os.path.exists(output_track_folder):
96+
os.makedirs(output_track_folder)
97+
98+
for route in glob.glob(os.path.join(tmp_folder, route_prefix, "*.npy")):
99+
output_path = os.path.join(output_track_folder, os.path.basename(route))
100+
101+
shutil.copy(route, output_path)
102+
103+
# Remove temporary folder
104+
shutil.rmtree(tmp_folder)
51105

52106

53107
@cli.command()
@@ -65,10 +119,12 @@ def leaderboard_update(ctx, output_folder):
65119

66120
boto_response_to_csv(response, output_path)
67121

68-
asset_map = {r["Arn"]: r["ImageUrl"] for r in response if "ImageUrl" in r}
69-
output_assets_folder = os.path.join(output_folder, LEADERBOARDS_FOLDER_ASSETS)
122+
asset_map = {}
123+
for r in response:
124+
asset_map.update(extract_asset_paths(r))
70125

71-
fetch_media_assets(asset_map, output_assets_folder)
126+
output_assets_folder = os.path.join(output_folder, LEADERBOARDS_FOLDER_ASSETS)
127+
fetch_assets(asset_map, output_assets_folder)
72128

73129
# Now do an update for each unique ARN:
74130
# - If OPEN: We collect a snapshot and save it under the current data and time.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import os
2+
import requests
3+
from joblib import Parallel, delayed
4+
from urllib.parse import urlparse
5+
6+
7+
def fetch_assets(key_url_map, output_folder):
8+
# Specific for tracks, we also collect the assets.
9+
def download(key, url):
10+
try:
11+
output_path = os.path.join(output_folder, key)
12+
output_dir = os.path.dirname(output_path)
13+
14+
if not os.path.exists(output_dir):
15+
os.makedirs(output_dir, exist_ok=True)
16+
17+
r = requests.get(url)
18+
if r.status_code == 200:
19+
with open(output_path, "wb") as f:
20+
f.write(r.content)
21+
except Exception as e:
22+
print(e)
23+
# Assume we can't get it then.
24+
pass
25+
26+
return Parallel(n_jobs=-1, prefer="threads")(delayed(download)(key, url) for key, url in key_url_map.items())
27+
28+
29+
def get_asset_path(arn, url):
30+
return os.path.join(arn, urlparse(url).path.lstrip("/"))
31+
32+
33+
def extract_asset_paths(r, arn_key="Arn"):
34+
arn = r[arn_key]
35+
response_asset_map = {}
36+
37+
columns = ["ImageUrl", "LeaderboardImage", "TrackPicture"]
38+
39+
for c in columns:
40+
if c in r:
41+
response_asset_map[get_asset_path(arn, r[c])] = r[c]
42+
43+
if "TrackRaceTypePictureMap" in r:
44+
for key, value in r["TrackRaceTypePictureMap"].items():
45+
response_asset_map[get_asset_path(arn, r["TrackRaceTypePictureMap"][key])] = value
46+
47+
return response_asset_map

src/deepracer_race_stats/util/media.py

Lines changed: 0 additions & 26 deletions
This file was deleted.

0 commit comments

Comments
 (0)