Skip to content

Commit a5080a9

Browse files
authored
Switch to JSON api and allow pullling a specific generation (#14)
1 parent 2e681e4 commit a5080a9

File tree

8 files changed

+42
-4
lines changed

8 files changed

+42
-4
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
bazel-*
22
MODULE.bazel.lock
3+
.ijwb/

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,11 @@ Examples:
215215
"trainingdata/model/very_large.bin": {
216216
"remote_path": "weird/nested/path/extra/model/very_large.bin",
217217
"integrity": "sha256-Oibw8PV3cDY84HKv3sAWIEuk+R2s8Hwhvlg6qg4H7uY="
218+
},
219+
"trainingdata/model/very_large.bin.old": {
220+
"remote_path": "weird/nested/path/extra/model/very_large.bin",
221+
"generation": "1721216992694798",
222+
"integrity": "sha256-Oibw8PV3cDY84HKv3sAWIEuk+R2s8Hwhvlg6qg4H7uY="
218223
}
219224
}
220225
```
@@ -224,6 +229,7 @@ Examples:
224229
- `sha256`: the expected sha256 hash of the file. Required unless `integrity` is used.
225230
- `integrity`: the expected SRI value of the file. Required unless `sha256` is used.
226231
- `remote_path`: name of the object within the bucket. If not set, the local path is used.
232+
- `generation`: generation number to a specific version of the object in the bucket. If not set the `live` generation is used.
227233

228234
Targets in the main repository can depend on this target if the
229235
following lines are added to `MODULE.bazel`:

examples/full/gcs_lock.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
},
1414
"custom/local/path/SHA256SUMS": {
1515
"remote_path": "release/v1.30.3/SHA256SUMS",
16+
"generation": "1721216992694798",
1617
"integrity": "sha256-Oibw8PV3cDY84HKv3sAWIEuk+R2s8Hwhvlg6qg4H7uY="
1718
}
1819
}

gcs/private/extensions/gcs_bucket.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def dep_to_blob_repo(bucket_name, local_path, info):
6767
"name": object_repo_name(bucket_name, info["remote_path"]),
6868
"downloaded_file_path": local_path,
6969
"executable": True,
70+
"generation": info["generation"] if "generation" in info else None,
7071
"sha256": info["sha256"] if "sha256" in info else None,
7172
"integrity": info["integrity"] if "integrity" in info else None,
7273
"url": gs_url,
@@ -120,6 +121,7 @@ Examples:
120121
- `sha256`: the expected sha256 hash of the file. Required unless `integrity` is used.
121122
- `integrity`: the expected SRI value of the file. Required unless `sha256` is used.
122123
- `remote_path`: name of the object within the bucket. If not set, the local path is used.
124+
- `generation`: generation number to a specific version of the object in the bucket. If not set the `live` generation is used.
123125
124126
Targets in the main repository can depend on this target if the
125127
following lines are added to `MODULE.bazel`:

gcs/private/repo_rules/eager.bzl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ def _eager_impl(repository_ctx):
4545

4646
def info_to_download_args(bucket_name, local_path, info):
4747
args = {
48-
"url": bucket_url(bucket_name, info["remote_path"]),
48+
"url": bucket_url(
49+
bucket_name,
50+
info["remote_path"],
51+
info["generation"] if "generation" in info else None,
52+
),
4953
"output": local_path,
5054
"executable": True,
5155
"block": False,

gcs/private/repo_rules/gcs_archive.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,11 @@ By default, the archive type is determined from the file extension of the
255255
URL. If the file has no extension, you can explicitly specify one of the
256256
following: `"zip"`, `"jar"`, `"war"`, `"aar"`, `"tar"`, `"tar.gz"`, `"tgz"`,
257257
`"tar.xz"`, `"txz"`, `"tar.zst"`, `"tzst"`, `"tar.bz2"`, `"ar"`, or `"deb"`.""",
258+
),
259+
"generation": attr.string(
260+
doc = """A specific generation number to pull from gcs.
261+
This must be an integer, live generation is assumed if empty.
262+
""",
258263
),
259264
"url": attr.string(
260265
mandatory = True,

gcs/private/repo_rules/gcs_file.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ This must match the SHA-256 of the file downloaded. _It is a security risk
109109
to omit the SHA-256 as remote files can change._ At best omitting this
110110
field will make your build non-hermetic. It is optional to make development
111111
easier but should be set before shipping.""",
112+
),
113+
"generation": attr.string(
114+
doc = """A specific generation number to pull from gcs.
115+
This must be an integer, live generation is assumed if empty.
116+
""",
112117
),
113118
"url": attr.string(
114119
mandatory = True,

gcs/private/util.bzl

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,13 @@ def repository_ctx_download_common_args(attr, bucket_name, remote_path):
4646
has_sha256 = len(attr.sha256) > 0
4747
if has_integrity == has_sha256:
4848
fail("expected exactly one of \"integrity\" and \"sha256\"")
49+
has_generation = len(attr.generation) > 0
4950
args = {
50-
"url": bucket_url(bucket_name, remote_path),
51+
"url": bucket_url(
52+
bucket_name,
53+
remote_path,
54+
attr.generation if has_generation else None,
55+
),
5156
"sha256": attr.sha256,
5257
"integrity": attr.integrity,
5358
}
@@ -79,8 +84,14 @@ def download_and_extract_args(attr, bucket_name, remote_path):
7984
args.pop("rename_files")
8085
return args
8186

82-
def bucket_url(bucket, object_path):
83-
return "https://storage.googleapis.com/{}/{}".format(bucket, url_encode(object_path))
87+
# Use the JSON API, which allows for more options https://cloud.google.com/storage/docs/json_api/v1/objects/get
88+
def bucket_url(bucket, object_path, generation):
89+
return "https://storage.googleapis.com/storage/v1/b/{}/o/{}?alt=media".format(
90+
bucket,
91+
url_encode(object_path),
92+
) + ("&generation={}".format(
93+
generation,
94+
) if generation else "")
8495

8596
def deps_from_file(module_ctx, lockfile_label, jsonpath):
8697
lockfile_path = module_ctx.path(lockfile_label)
@@ -99,6 +110,7 @@ def parse_lockfile(lockfile_content, jsonpath):
99110
# we expect the following schema:
100111
# - exactly one of sha256 or integrity
101112
# - optionally a remote_path (if not, we populate it with the local_path instead)
113+
has_generation = "generation" in v
102114
has_remote_path = "remote_path" in v
103115
has_integrity = "integrity" in v
104116
has_sha256 = "sha256" in v
@@ -109,6 +121,8 @@ def parse_lockfile(lockfile_content, jsonpath):
109121
}
110122
if has_integrity:
111123
info["integrity"] = v["integrity"]
124+
if has_generation:
125+
info["generation"] = v["generation"]
112126
if has_sha256:
113127
info["sha256"] = v["sha256"]
114128
processed_lockfile[local_path] = info

0 commit comments

Comments
 (0)