Skip to content

Commit 63e890f

Browse files
committed
added support for fetching cas values from legacy storage
1 parent bdee852 commit 63e890f

File tree

6 files changed

+148
-9
lines changed

6 files changed

+148
-9
lines changed

breadbox/breadbox/api/dependencies.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ def get_db_with_user(request: Request):
1818
db.close()
1919

2020

21+
def get_legacy_cas_bucket():
22+
settings = get_settings()
23+
return settings.LEGACY_CAS_BUCKET
24+
25+
2126
def get_cas_db_path():
2227
settings = get_settings()
2328
return os.path.join(settings.filestore_location, "cas.sqlite3")

breadbox/breadbox/api/temp/cas.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
11
from .router import router
22
from breadbox.schemas.cas import CASKey, CASValue
33
from breadbox.io import cas
4-
import os.path
5-
from typing import Annotated
4+
from typing import Annotated, Optional
65
from fastapi import APIRouter, Body, Depends, HTTPException
7-
from breadbox.api.dependencies import get_db_with_user, get_cas_db_path
6+
from breadbox.api.dependencies import (
7+
get_db_with_user,
8+
get_cas_db_path,
9+
get_legacy_cas_bucket,
10+
)
811

912
# Methods for getting/setting values in Content-addressable-storage (CAS)
1013
@router.get(
1114
"/cas/{key}", operation_id="get_cas_value", response_model=CASValue,
1215
)
13-
def get_cas_value(key: str, cas_db_path: Annotated[str, Depends(get_cas_db_path)]):
16+
def get_cas_value(
17+
key: str,
18+
cas_db_path: Annotated[str, Depends(get_cas_db_path)],
19+
legacy_cas_bucket: Annotated[Optional[str], Depends(get_legacy_cas_bucket)],
20+
):
1421
value = cas.get_value(cas_db_path, key)
22+
23+
if legacy_cas_bucket is not None and value is None:
24+
value = cas.legacy_get_value(legacy_cas_bucket, key)
25+
1526
if value is None:
1627
raise HTTPException(status_code=404)
1728
return CASValue(value=value)
@@ -21,7 +32,14 @@ def get_cas_value(key: str, cas_db_path: Annotated[str, Depends(get_cas_db_path)
2132
"/cas", operation_id="set_cas_value", response_model=CASKey,
2233
)
2334
def set_cas_value(
24-
value: CASValue, cas_db_path: Annotated[str, Depends(get_cas_db_path)]
35+
value: CASValue,
36+
cas_db_path: Annotated[str, Depends(get_cas_db_path)],
37+
legacy_cas_bucket: Annotated[Optional[str], Depends(get_legacy_cas_bucket)],
2538
):
2639
key = cas.set_value(cas_db_path, value.value)
40+
41+
if legacy_cas_bucket is not None:
42+
legacy_key = cas.legacy_set_value(legacy_cas_bucket, value.value)
43+
assert legacy_key == key
44+
2745
return CASKey(key=key)

breadbox/breadbox/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class Settings(BaseSettings):
2929
"CELERY_RESULT_BACKEND", "redis://127.0.0.1:6379/0"
3030
)
3131

32+
LEGACY_CAS_BUCKET: Optional[str] = os.environ.get("LEGACY_CAS_BUCKET")
33+
3234
model_config = SettingsConfigDict(env_file=".env",)
3335

3436
@field_validator("host_scheme_override")

breadbox/breadbox/io/cas.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from base64 import urlsafe_b64encode
44
from sqlitedict import SqliteDict
55
from typing import Optional
6+
from google.cloud import storage
67

78

89
def get_value(db_path: str, key: str) -> Optional[str]:
@@ -20,3 +21,35 @@ def set_value(db_path: str, value: str) -> str:
2021
db[key] = value_bytes
2122
db.commit()
2223
return key
24+
25+
26+
##########
27+
# Legacy methods for getting/setting values in google bucket
28+
29+
30+
def _get_private_datasets_bucket(bucket_name: str) -> storage.Bucket:
31+
client = storage.Client()
32+
33+
bucket = client.bucket(bucket_name)
34+
return bucket
35+
36+
37+
def legacy_get_value(bucket_name: str, key: str) -> Optional[str]:
38+
bucket = _get_private_datasets_bucket(bucket_name)
39+
40+
blob = bucket.blob(key)
41+
if not blob.exists():
42+
return None
43+
44+
return blob.download_as_string().decode("utf8")
45+
46+
47+
def legacy_set_value(bucket_name: str, value: str) -> Optional[str]:
48+
bucket = _get_private_datasets_bucket(bucket_name)
49+
50+
value_bytes = value.encode("utf8")
51+
key = urlsafe_b64encode(hashlib.sha256(value_bytes).digest()).decode("utf8")
52+
blob = bucket.blob(key)
53+
if not blob.exists():
54+
blob.upload_from_string(value_bytes)
55+
return key

breadbox/poetry.lock

Lines changed: 84 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

breadbox/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ depmap-compute = {version = "0.1.13", source = "public-python"}
3333
# depmap-compute = {path = "../depmap-compute", develop = true}
3434
fastparquet = "^2024.5.0"
3535
sqlitedict="^2.1.0"
36+
google-cloud-storage = "^3.1.0"
3637

3738
[tool.poetry.group.dev.dependencies]
3839
pyright-ratchet = {git = "https://github.com/pgm/pyright-ratchet.git", rev = "v0.3.1"}

0 commit comments

Comments
 (0)