Skip to content

Commit

Permalink
Merge pull request #50 from spectriclabs/dev
Browse files Browse the repository at this point in the history
Fix long file name bug and clear fastapi reg ex DOS CVE-2024-24762
  • Loading branch information
desean1625 authored Feb 7, 2024
2 parents cdb9d34 + 3bf7868 commit cc7d78e
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 14 deletions.
34 changes: 29 additions & 5 deletions elastic_datashader/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from collections import OrderedDict
from datetime import datetime, timedelta, timezone
from os import scandir
from hashlib import sha256
import os
from contextlib import suppress
from pathlib import Path
Expand All @@ -23,14 +24,37 @@ def path_age(now: datetime, path: Path) -> timedelta:

return now - path_dt

index_hash_map = {}

def get_index_hash(idx: str) -> str:
'''
Calculates a hash value for the specific index set
On some OS's the pathname becomes too long and causes errors when
creating files if multiple CCS indexes have been explicitly defined
*:my-data-* listed as
mysite-1:my-data-*,mysite-2:my-data-*,mysite-3:my-data-*,mysite-4:my-data-*,mysite-5:my-data-*
'''
idx_hash = index_hash_map.get(idx, None)
if idx_hash is not None:
return idx_hash
idx_hash = sha256()
idx_hash.update(str(idx).encode("utf-8"))
idx_hash = idx_hash.hexdigest()[0:20]
index_hash_map[idx] = idx_hash
return idx_hash

def tile_name(idx, x, y, z, parameter_hash) -> str:
return f"{idx}/{parameter_hash}/{z}/{x}/{y}.png"
idx_hash = get_index_hash(idx)
return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.png"

def rendering_tile_name(idx, x, y, z, parameter_hash) -> str:
return f"{idx}/{parameter_hash}/{z}/{x}/{y}.rendering"
idx_hash = get_index_hash(idx)

return f"{idx_hash}/{parameter_hash}/{z}/{x}/{y}.rendering"

def tile_id(idx, x, y, z, parameter_hash) -> str:
return f"{idx}_{parameter_hash}_{z}_{x}_{y}"
idx_hash = get_index_hash(idx)
return f"{idx_hash}_{parameter_hash}_{z}_{x}_{y}"

def directory_size(path: Path) -> int:
'''
Expand Down Expand Up @@ -134,14 +158,14 @@ def release_cache_placeholder(cache_path: Path, tile: str) -> None:
if tile_path.exists():
tile_path.unlink(missing_ok=True)

def check_cache_dir(cache_path: Path, layer_name: str) -> None:
def check_cache_dir(cache_path: Path, idx: str) -> None:
"""
Ensure the folder ``cache_path``/``layer_name`` exists
:param cache_path: Top level directory
:param layer_name: Specific layer in cache
"""
tile_cache_path = cache_path / layer_name
tile_cache_path = cache_path / get_index_hash(idx)
tile_cache_path.mkdir(parents=True, exist_ok=True)

def clear_hash_cache(cache_path: Path, idx_name: str, param_hash: Optional[str]) -> None:
Expand Down
2 changes: 1 addition & 1 deletion elastic_datashader/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def get_parameter_hash(params: Dict[str, Any]) -> str:
p = p.isoformat()
parameter_hash.update(str(p).encode("utf-8"))

return parameter_hash.hexdigest()
return parameter_hash.hexdigest()[0:30]

def extract_parameters(headers: Dict[Any, Any], query_params: Dict[Any, Any]) -> Tuple[str, Dict[str, Any]]:
"""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ numpy = "^1.23"
PyYAML = "*"
humanize = "*"
uvicorn = {extras = ["standard"], version = "0.24.0", optional = true}
fastapi = "^0.96"
fastapi = ">=0.109.1"
georgio = "2023.156.924"
jinja2 = "3.1.2"

Expand Down
8 changes: 4 additions & 4 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def test_du(tmp_path):


def test_tile_name():
assert cache.tile_name("abc", 1, 2, 3, "somehash") == "abc/somehash/3/1/2.png"
assert cache.tile_name("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141/somehash/3/1/2.png"


def test_tile_id():
assert cache.tile_id("abc", 1, 2, 3, "somehash") == "abc_somehash_3_1_2"
assert cache.tile_id("abc", 1, 2, 3, "somehash") == "ba7816bf8f01cfea4141_somehash_3_1_2"


def test_get_cache_none():
Expand All @@ -57,8 +57,8 @@ def test_set_cache(tmp_path):


def test_check_cache_dir(tmp_path):
cache.check_cache_dir(tmp_path, "foo")
assert (tmp_path / "foo").exists()
cache.check_cache_dir(tmp_path, "abc")
assert (tmp_path / "ba7816bf8f01cfea4141").exists()


def test_clear_hash_cache(tmp_path):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,9 @@ def test_get_category_field():
assert parameters.get_category_field("banana") == "banana"

def test_get_parameter_hash():
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399b13d9a5c86cfecaf8f9fd0fbe9af7533db"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb9ab3938be64374569480ed3bfd7f3d70e9"
assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1}) == "a6488297eb1cdaa23e196800b1c399"
assert parameters.get_parameter_hash({"foo": "bar", "baz": 1, "abc": datetime(2022, 2, 17, 11, 0, 0, tzinfo=timezone.utc)}) == "88ade56886a8099e6fd3c25525a0fb"
assert parameters.get_parameter_hash({}) == "e3b0c44298fc1c149afbf4c8996fb9"

def test_get_time_bounds_already_quantized():
now = datetime(2022, 6, 14, 12, 15, 0, tzinfo=timezone.utc)
Expand Down

0 comments on commit cc7d78e

Please sign in to comment.