Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __call__(self, batch):
for i, prompt in zip(batch["idx"], batch["prompt"]):
# Generate 1 image at a time to reduce memory consumption.
for image in self.pipeline(prompt).images:
hash_image = hashlib.sha1(image.tobytes()).hexdigest()
hash_image = hashlib.sha256(image.tobytes()).hexdigest()
image_filename = path.join(self.output_dir, f"{i}-{hash_image}.jpg")
image.save(image_filename)
print(f"Saved {image_filename}")
Expand Down
2 changes: 1 addition & 1 deletion python/ray/_private/function_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def compute_collision_identifier(self, function_or_class):
collision_identifier = function_or_class.__name__ + ":" + string_file.getvalue()

# Return a hash of the identifier in case it is too large.
return hashlib.sha1(collision_identifier.encode("utf-8")).digest()
return hashlib.sha256(collision_identifier.encode("utf-8")).digest()

def load_function_or_class_from_local(self, module_name, function_or_class_name):
"""Try to load a function or class in the module from local."""
Expand Down
2 changes: 1 addition & 1 deletion python/ray/air/_internal/filelock.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, path: str, **kwargs):
self.path = path
temp_dir = Path(ray._common.utils.get_default_system_temp_dir()).resolve()
self._lock_dir = temp_dir / RAY_LOCKFILE_DIR
self._path_hash = hashlib.sha1(
self._path_hash = hashlib.sha256(
str(Path(self.path).resolve()).encode("utf-8")
).hexdigest()
self._lock_path = self._lock_dir / f"{self._path_hash}.lock"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ def _update_cloudwatch_config(self, config_type: str, is_head_node: bool) -> Non
cw_config_ssm = self._set_cloudwatch_ssm_config_param(
param_name, config_type
)
cur_cw_config_hash = self._sha1_hash_file(config_type)
ssm_cw_config_hash = self._sha1_hash_json(cw_config_ssm)
cur_cw_config_hash = self._sha256_hash_file(config_type)
ssm_cw_config_hash = self._sha256_hash_json(cw_config_ssm)
# check if user updated cloudwatch related config files.
# if so, perform corresponding actions.
if cur_cw_config_hash != ssm_cw_config_hash:
Expand Down Expand Up @@ -381,7 +381,7 @@ def _set_cloudwatch_ssm_config_param(

def _get_default_empty_config_file_hash(self):
default_cw_config = "{}"
parameter_value = self._sha1_hash_json(default_cw_config)
parameter_value = self._sha256_hash_json(default_cw_config)
return parameter_value

def _get_ssm_param(self, parameter_name: str) -> str:
Expand All @@ -394,31 +394,31 @@ def _get_ssm_param(self, parameter_name: str) -> str:
cwa_parameter = res.get("Value", {})
return cwa_parameter

def _sha1_hash_json(self, value: str) -> str:
"""calculate the json string sha1 hash"""
sha1_hash = hashlib.new("sha1")
binary_value = value.encode("ascii")
sha1_hash.update(binary_value)
sha1_res = sha1_hash.hexdigest()
return sha1_res
def _sha256_hash_json(self, value: str) -> str:
"""calculate the json string sha256 hash"""
sha256_hash = hashlib.new("sha256")
binary_value = value.encode("utf-8")
sha256_hash.update(binary_value)
sha256_res = sha256_hash.hexdigest()
return sha256_res

def _sha1_hash_file(self, config_type: str) -> str:
"""calculate the config file sha1 hash"""
def _sha256_hash_file(self, config_type: str) -> str:
"""calculate the config file sha256 hash"""
config = self.CLOUDWATCH_CONFIG_TYPE_TO_CONFIG_VARIABLE_REPLACE_FUNC.get(
config_type
)(config_type)
value = json.dumps(config)
sha1_res = self._sha1_hash_json(value)
return sha1_res
sha256_res = self._sha256_hash_json(value)
return sha256_res

def _upload_config_to_ssm_and_set_hash_tag(self, config_type: str):
data = self.CLOUDWATCH_CONFIG_TYPE_TO_CONFIG_VARIABLE_REPLACE_FUNC.get(
config_type
)(config_type)
sha1_hash_value = self._sha1_hash_file(config_type)
sha256_hash_value = self._sha256_hash_file(config_type)
self._upload_config_to_ssm(data, config_type)
self._update_cloudwatch_hash_tag_value(
self.node_id, sha1_hash_value, config_type
self.node_id, sha256_hash_value, config_type
)

def _add_cwa_installed_tag(self, node_id: str) -> None:
Expand All @@ -432,12 +432,12 @@ def _add_cwa_installed_tag(self, node_id: str) -> None:
)

def _update_cloudwatch_hash_tag_value(
self, node_id: str, sha1_hash_value: str, config_type: str
self, node_id: str, sha256_hash_value: str, config_type: str
):
hash_key_value = "-".join([CLOUDWATCH_CONFIG_HASH_TAG_BASE, config_type])
self.ec2_client.create_tags(
Resources=[node_id],
Tags=[{"Key": hash_key_value, "Value": sha1_hash_value}],
Tags=[{"Key": hash_key_value, "Value": sha256_hash_value}],
)
logger.info(
"Successfully update cloudwatch {} hash tag on {}".format(
Expand Down
4 changes: 2 additions & 2 deletions python/ray/autoscaler/_private/command_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ def __init__(
use_internal_ip,
):

ssh_control_hash = hashlib.sha1(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha1(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
Comment on lines +176 to +177
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode("utf-8")).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode("utf-8")).hexdigest()

if sys.platform == "win32":
# Disable SSH control paths on Windows - currently using it cause socket errors
ssh_control_path = None
Expand Down
2 changes: 1 addition & 1 deletion python/ray/autoscaler/_private/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,7 @@ def _bootstrap_config(
config = prepare_config(config)
# NOTE: multi-node-type autoscaler is guaranteed to be in use after this.

hasher = hashlib.sha1()
hasher = hashlib.sha256()
hasher.update(json.dumps([config], sort_keys=True).encode("utf-8"))
cache_key = os.path.join(
tempfile.gettempdir(), "ray-config-{}".format(hasher.hexdigest())
Expand Down
6 changes: 3 additions & 3 deletions python/ray/autoscaler/_private/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def with_head_node_ip(cmds, head_ip=None):


def hash_launch_conf(node_conf, auth):
hasher = hashlib.sha1()
hasher = hashlib.sha256()
# For hashing, we replace the path to the key with the
# key itself. This is to make sure the hashes are the
# same even if keys live at different locations on different
Expand Down Expand Up @@ -467,8 +467,8 @@ def hash_runtime_conf(
cluster_synced_files contents have changed. It is used at monitor time to
determine if additional file syncing is needed.
"""
runtime_hasher = hashlib.sha1()
contents_hasher = hashlib.sha1()
runtime_hasher = hashlib.sha256()
contents_hasher = hashlib.sha256()

def add_content_hashes(path, allow_non_existing_paths: bool = False):
def add_hash_of_file(fpath):
Expand Down
2 changes: 1 addition & 1 deletion python/ray/data/preprocessors/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def simple_split_tokenizer(value: str) -> List[str]:
def simple_hash(value: object, num_features: int) -> int:
"""Deterministically hash a value into the integer space."""
encoded_value = str(value).encode()
hashed_value = hashlib.sha1(encoded_value)
hashed_value = hashlib.sha256(encoded_value)
hashed_value_int = int(hashed_value.hexdigest(), 16)
return hashed_value_int % num_features

Expand Down
8 changes: 4 additions & 4 deletions python/ray/data/tests/preprocessors/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ def test_simple_split_tokenizer():

def test_simple_hash():
# Tests simple_hash determinism.
assert simple_hash(1, 100) == 83
assert simple_hash("a", 100) == 52
assert simple_hash("banana", 100) == 16
assert simple_hash([1, 2, "apple"], 100) == 37
assert simple_hash(1, 100) == 15
assert simple_hash("a", 100) == 99
assert simple_hash("banana", 100) == 10
assert simple_hash([1, 2, "apple"], 100) == 58


if __name__ == "__main__":
Expand Down
4 changes: 2 additions & 2 deletions python/ray/data/tests/preprocessors/test_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def test_hashing_vectorizer():
transformed = vectorizer.transform(ds)
out_df = transformed.to_pandas()

processed_col_a = [[2, 1, 3], [0, 4, 1]]
processed_col_b = [[1, 0, 0], [0, 3, 0]]
processed_col_a = [[0, 4, 2], [0, 5, 0]]
processed_col_b = [[0, 0, 1], [3, 0, 0]]

expected_df = pd.DataFrame.from_dict({"A": processed_col_a, "B": processed_col_b})

Expand Down
2 changes: 1 addition & 1 deletion python/ray/serve/_private/deploy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@ def get_app_code_version(app_config: ServeApplicationSchema) -> str:
},
sort_keys=True,
).encode("utf-8")
return hashlib.sha1(encoded).hexdigest()
return hashlib.sha256(encoded).hexdigest()
8 changes: 4 additions & 4 deletions python/ray/tests/aws/utils/stubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def get_ec2_cwa_installed_tag_true(ec2_client_stub, node_id):

def update_hash_tag_success(ec2_client_stub, node_id, config_type, cloudwatch_helper):
hash_key_value = "-".join([CLOUDWATCH_CONFIG_HASH_TAG_BASE, config_type])
cur_hash_value = get_sha1_hash_of_cloudwatch_config_file(
cur_hash_value = get_sha256_hash_of_cloudwatch_config_file(
config_type, cloudwatch_helper
)
ec2_client_stub.add_response(
Expand All @@ -312,7 +312,7 @@ def add_cwa_installed_tag_response(ec2_client_stub, node_id):

def get_head_node_config_hash_different(ec2_client_stub, config_type, cwh, node_id):
hash_key_value = "-".join([CLOUDWATCH_CONFIG_HASH_TAG_BASE, config_type])
cur_hash_value = get_sha1_hash_of_cloudwatch_config_file(config_type, cwh)
cur_hash_value = get_sha256_hash_of_cloudwatch_config_file(config_type, cwh)
filters = cwh._get_current_cluster_session_nodes(cwh.cluster_name)
filters.append(
{
Expand Down Expand Up @@ -497,8 +497,8 @@ def get_param_ssm_same(ssm_client_stub, ssm_param_name, cloudwatch_helper, confi
return command_id


def get_sha1_hash_of_cloudwatch_config_file(config_type, cloudwatch_helper):
cw_value_file = cloudwatch_helper._sha1_hash_file(config_type)
def get_sha256_hash_of_cloudwatch_config_file(config_type, cloudwatch_helper):
cw_value_file = cloudwatch_helper._sha256_hash_file(config_type)
return cw_value_file


Expand Down
8 changes: 4 additions & 4 deletions python/ray/tests/gcp/test_gcp_tpu_command_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def test_tpu_ssh_command_runner():
instance = MockTpuInstance(num_workers=num_workers)
provider.create_node({}, {}, 1)
cluster_name = "cluster"
ssh_control_hash = hashlib.sha1(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha1(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
Comment on lines +48 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode("utf-8")).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode("utf-8")).hexdigest()

ssh_control_path = "/tmp/ray_ssh_{}/{}".format(
ssh_user_hash[:10], ssh_control_hash[:10]
)
Expand Down Expand Up @@ -119,8 +119,8 @@ def test_tpu_docker_command_runner():
instance = MockTpuInstance(num_workers=num_workers)
provider.create_node({}, {}, 1)
cluster_name = "cluster"
ssh_control_hash = hashlib.sha1(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha1(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
Comment on lines +122 to +123
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode("utf-8")).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode("utf-8")).hexdigest()

ssh_control_path = "/tmp/ray_ssh_{}/{}".format(
ssh_user_hash[:10], ssh_control_hash[:10]
)
Expand Down
8 changes: 4 additions & 4 deletions python/ray/tests/test_command_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ def test_ssh_command_runner():
provider = MockProvider()
provider.create_node({}, {}, 1)
cluster_name = "cluster"
ssh_control_hash = hashlib.sha1(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha1(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
Comment on lines +65 to +66
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode("utf-8")).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode("utf-8")).hexdigest()

ssh_control_path = "/tmp/ray_ssh_{}/{}".format(
ssh_user_hash[:10], ssh_control_hash[:10]
)
Expand Down Expand Up @@ -129,8 +129,8 @@ def test_docker_command_runner():
provider = MockProvider()
provider.create_node({}, {}, 1)
cluster_name = "cluster"
ssh_control_hash = hashlib.sha1(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha1(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
Comment on lines +132 to +133
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
ssh_control_hash = hashlib.sha256(cluster_name.encode()).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode()).hexdigest()
ssh_control_hash = hashlib.sha256(cluster_name.encode("utf-8")).hexdigest()
ssh_user_hash = hashlib.sha256(getuser().encode("utf-8")).hexdigest()

ssh_control_path = "/tmp/ray_ssh_{}/{}".format(
ssh_user_hash[:10], ssh_control_hash[:10]
)
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/impl/placeholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def create_resolvers_map():

def _id_hash(path_tuple):
"""Compute a hash for the specific placeholder based on its path."""
return hashlib.sha1(str(path_tuple).encode("utf-8")).hexdigest()[:ID_HASH_LENGTH]
return hashlib.sha256(str(path_tuple).encode("utf-8")).hexdigest()[:ID_HASH_LENGTH]


class _FunctionResolver:
Expand Down
26 changes: 13 additions & 13 deletions python/ray/tune/tests/test_placeholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ def testGridSearch(self):

self.assertEqual(
config["param2"][1]["grid_search"],
["ok", (_RefResolver.TOKEN, "e1eaa08f")],
["ok", (_RefResolver.TOKEN, "1870fa9b")],
)
self.assertEqual(
config["param3"]["param4"]["grid_search"],
[(_RefResolver.TOKEN, "35397f1a"), "not ok"],
[(_RefResolver.TOKEN, "8515e998"), "not ok"],
)

# Pretend we picked a choice from the grid searches.
config["param2"][1] = (_RefResolver.TOKEN, "e1eaa08f")
config["param2"][1] = (_RefResolver.TOKEN, "1870fa9b")
config["param3"]["param4"] = "not ok"

resolve_placeholders(config, replaced)
Expand All @@ -79,15 +79,15 @@ def testCategorical(self):

self.assertEqual(
config["param2"][1].categories,
[(_RefResolver.TOKEN, "e6a5a3d5"), "not ok"],
[(_RefResolver.TOKEN, "ec0e030c"), "not ok"],
)
self.assertEqual(
config["param3"]["param4"].categories,
[(_RefResolver.TOKEN, "35397f1a"), "not ok"],
[(_RefResolver.TOKEN, "8515e998"), "not ok"],
)

# Pretend we picked a choice from the categoricals.
config["param2"][1] = (_RefResolver.TOKEN, "e6a5a3d5")
config["param2"][1] = (_RefResolver.TOKEN, "ec0e030c")
config["param3"]["param4"] = "not ok"

resolve_placeholders(config, replaced)
Expand All @@ -105,9 +105,9 @@ def _testNonSearchSpaceRef(self, value):
self.assertEqual(
config["param"].categories,
[
(_RefResolver.TOKEN, "ab9affa5"),
(_RefResolver.TOKEN, "ec15c422"),
"other",
(_RefResolver.TOKEN, "ceae296d"),
(_RefResolver.TOKEN, "3c7edff5"),
],
)

Expand Down Expand Up @@ -227,10 +227,10 @@ def testPointToEval(self):
# Normal params are not replaced.
self.assertEqual(
config["param2"][1].categories,
[(_RefResolver.TOKEN, "e6a5a3d5"), "not ok"],
[(_RefResolver.TOKEN, "ec0e030c"), "not ok"],
)
self.assertEqual(
config["param3"]["param4"], (_FunctionResolver.TOKEN, "843363f5")
config["param3"]["param4"], (_FunctionResolver.TOKEN, "134aff3a")
)

# Now, say we manually resolved the placeholders based on
Expand Down Expand Up @@ -259,7 +259,7 @@ def testSimpleNestedSearchSpaces(self):
config = inject_placeholders(config, replaced)

# Manually resolve. Select the Dummy value.
config["param2"] = (_RefResolver.TOKEN, "41821403")
config["param2"] = (_RefResolver.TOKEN, "6f33af83")

resolve_placeholders(config, replaced)

Expand All @@ -281,7 +281,7 @@ def testSimpleNestedSearchSpaces2(self):
config = inject_placeholders(config, replaced)

# Manually resolve. Select the Dummy value.
config["param2"] = (None, None, (_RefResolver.TOKEN, "49964529"))
config["param2"] = (None, None, (_RefResolver.TOKEN, "a1433750"))

resolve_placeholders(config, replaced)

Expand All @@ -300,7 +300,7 @@ def testResolveFunctionAfterRef(self):
config = inject_placeholders(config, replaced)

# Manually resolve param2.
config["param2"] = (_RefResolver.TOKEN, "60238385")
config["param2"] = (_RefResolver.TOKEN, "07cb6238")

resolve_placeholders(config, replaced)

Expand Down
4 changes: 2 additions & 2 deletions python/ray/util/collective/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def get_store_name(group_name):
Args:
group_name: unique user name for the store.
Return:
str: SHA1-hexlified name for the store.
str: SHA256-hexlified name for the store.
"""
if not group_name:
raise ValueError("group_name is None.")
hexlified_name = hashlib.sha1(group_name.encode()).hexdigest()
hexlified_name = hashlib.sha256(group_name.encode()).hexdigest()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency and to avoid platform-dependent behavior, it's better to explicitly specify the encoding as utf-8 when calling .encode().

Suggested change
hexlified_name = hashlib.sha256(group_name.encode()).hexdigest()
hexlified_name = hashlib.sha256(group_name.encode("utf-8")).hexdigest()

return hexlified_name


Expand Down
4 changes: 2 additions & 2 deletions release/nightly_tests/stress_tests/test_state_api_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def test_large_log_file(log_file_size_byte: int):
@ray.remote
class LogActor:
def write_log(self, log_file_size_byte: int):
ctx = hashlib.sha1()
ctx = hashlib.sha256()
job_id = ray.get_runtime_context().get_job_id()
prefix = f"{LOG_PREFIX_JOB_ID}{job_id}\n{LOG_PREFIX_ACTOR_NAME}LogActor\n"
ctx.update(prefix.encode())
Expand All @@ -273,7 +273,7 @@ def write_log(self, log_file_size_byte: int):
assert node_id is not None, "Empty node id from the log actor"

# Retrieve the log and compare the checksum
ctx = hashlib.sha1()
ctx = hashlib.sha256()

time_taken = 0
t_start = time.perf_counter()
Expand Down