Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,7 @@ ENV/
/docs/dev/speasy.webservices.csa.rst
/docs/dev/speasy.webservices.rst
/docs/dev/speasy.webservices.ssc.rst

# Local dev
.claude/
.pre-commit-config.yaml
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[build-system]
build-backend = "flit_core.buildapi"
requires = ["flit_core"]
Expand Down Expand Up @@ -42,7 +42,7 @@
'numpy',
'packaging',
'pandas',
'pyistp>=0.7.2',
'pyistp[netcdf]>=0.7.2',
'python-dateutil',
'requests',
'scipy',
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ python_dateutil
PyYAML
requests
urllib3>=1.26.0
pyistp>=0.7.2
pyistp[netcdf]>=0.7.2
scipy
tqdm
certifi
Expand Down
4 changes: 2 additions & 2 deletions speasy/core/cache/_function_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import inspect
from datetime import timedelta
from functools import wraps
from typing import Callable, Optional
from typing import Callable, Optional, Union

from ._instance import _cache
from .cache import CacheItem
Expand All @@ -16,7 +16,7 @@ def make_key_from_args(*args, **kwargs):


class CacheCall(object):
def __init__(self, cache_retention=60 * 15, is_pure=False, cache_instance=_cache, version=1, leak_cache=False):
def __init__(self, cache_retention: Union[int, float, timedelta] = 60 * 15, is_pure=False, cache_instance=_cache, version=1, leak_cache=False):
from ..platform import is_running_on_wasm
if type(cache_retention) is timedelta:
cache_retention = cache_retention.total_seconds()
Expand Down
7 changes: 4 additions & 3 deletions speasy/core/codecs/bundled_codecs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .istp_cdf import IstpCdf
from .hapi.csv import HapiCsv
from .hapi.binary import HapiBinary
from .istp.cdf import IstpCdf # noqa: F401
from .istp.netcdf import IstpNetCDF # noqa: F401
from .hapi.csv import HapiCsv # noqa: F401
from .hapi.binary import HapiBinary # noqa: F401
116 changes: 116 additions & 0 deletions speasy/core/codecs/bundled_codecs/istp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from typing import List, Optional
import re
import logging

import numpy as np

import pyistp
from pyistp.support_data_variable import SupportDataVariable

from speasy.core.any_files import any_loc_open
from speasy.core.url_utils import urlparse, is_local_file
from speasy.products import SpeasyVariable, VariableAxis, VariableTimeAxis, DataContainer

log = logging.getLogger(__name__)


def _fix_value_type(value):
if type(value) in (str, int, float):
return value
if type(value) is list:
return [_fix_value_type(sub_v) for sub_v in value]
if type(value) is bytes:
return value.decode('utf-8')
return str(value)


def _fix_attributes_types(attributes: dict):
cleaned = {}
for key, value in attributes.items():
cleaned[key] = _fix_value_type(value)
return cleaned


def _is_time_dependent(axis, time_axis_name):
if axis.attributes.get('DEPEND_TIME', '') == time_axis_name:
return not axis.is_nrv
if axis.attributes.get('DEPEND_0', '') == time_axis_name:
return not axis.is_nrv
return False


def _display_type(variable: pyistp.loader.DataVariable) -> str:
if 'DISPLAY_TYPE' in variable.attributes:
return variable.attributes['DISPLAY_TYPE']
if 'display_type' in variable.attributes:
return variable.attributes['display_type']
return ''


def _make_axis(axis, time_axis_name):
return VariableAxis(values=axis.values.copy(), meta=_fix_attributes_types(axis.attributes), name=axis.name,
is_time_dependent=_is_time_dependent(axis, time_axis_name))


def _build_labels(variable: pyistp.loader.DataVariable):
if len(variable.values.shape) != 2:
return _fix_value_type(variable.labels)
if type(variable.labels) is list and len(variable.labels) == variable.values.shape[1]:
return _fix_value_type(variable.labels)
if type(variable.labels) is list and len(variable.labels) == 1:
return [f"{variable.labels[0]}[{i}]" for i in range(variable.values.shape[1])]
return [f"component_{i}" for i in range(variable.values.shape[1])]


def _filter_extra_axes(variable: pyistp.loader.DataVariable) -> List[SupportDataVariable]:
return variable.axes[1:]


def _valid_variable_or_none(variable: SpeasyVariable) -> Optional[SpeasyVariable]:
if len(variable) == 1 and variable.time[0] < np.datetime64('1900-01-01'): # handle fill values in epoch
return None
return variable


def _load_variable(istp_loader: pyistp.loader.ISTPLoader, variable) -> SpeasyVariable or None:
if variable in istp_loader.data_variables():
var = istp_loader.data_variable(variable)
elif variable.replace('-', '_') in istp_loader.data_variables(): # THX CSA/ISTP
var = istp_loader.data_variable(variable.replace('-', '_'))
else: # CDA https://cdaweb.gsfc.nasa.gov/WebServices/REST/#Get_Data_GET
alternative = re.sub(r"[\\/.%!@#^&*()\-+=`~|?<> ]", "$", variable)
if alternative in istp_loader.data_variables():
var = istp_loader.data_variable(alternative)
else:
return None
if (var is not None) and (var.values.shape[0] == var.axes[0].values.shape[0]):
time_axis_name = var.axes[0].name
return _valid_variable_or_none(SpeasyVariable(
axes=[VariableTimeAxis(values=var.axes[0].values.copy(),
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding what I mentioned in SciQLop/PyISTP#1 (review), this would be the right place to delegate to the codec the interpretation and conversion of the time variable data into datetime64.

meta=_fix_attributes_types(var.axes[0].attributes))] + [
_make_axis(axis, time_axis_name) for axis in _filter_extra_axes(var)],
values=DataContainer(values=var.values.copy(), meta=_fix_attributes_types(var.attributes),
name=var.name,
is_time_dependent=True),
columns=_build_labels(var)))
return None


def _resolve_url_type(url, prefix="", cache_remote_files=True):
if url is None:
return prefix + "file", None
if type(url) is str:
if is_local_file(url):
return prefix + "file", urlparse(url=url).path
return prefix + "buffer", any_loc_open(url, mode='rb', cache_remote_files=cache_remote_files).read()
if type(url) in (memoryview, bytes):
return prefix + "buffer", url
if hasattr(url, 'read'):
return prefix + "buffer", url.read()
return prefix + "file", None


def _simplify_shape(values: np.ndarray) -> np.ndarray:
if len(values.shape) == 2 and values.shape[1] == 1:
return np.reshape(values, (-1))
return values
Original file line number Diff line number Diff line change
@@ -1,134 +1,31 @@
from typing import List, AnyStr, Optional, Mapping, Union
import io
import re
import logging

from datetime import timedelta
import numpy as np

import pyistp
from pyistp.support_data_variable import SupportDataVariable
import numpy as np
import pycdfpp
import pyistp

from speasy.core.codecs import CodecInterface, register_codec, Buffer
from speasy.core.any_files import any_loc_open
from speasy.core.url_utils import urlparse, is_local_file
from speasy.core.cache import CacheCall
from speasy.products import SpeasyVariable, VariableAxis, VariableTimeAxis, DataContainer

log = logging.getLogger(__name__)
_PTR_rx = re.compile(r".*_PTR(_\d+)?")


def _fix_value_type(value):
if type(value) in (str, int, float):
return value
if type(value) is list:
return [_fix_value_type(sub_v) for sub_v in value]
if type(value) is bytes:
return value.decode('utf-8')
return str(value)


def _fix_attributes_types(attributes: dict):
cleaned = {}
for key, value in attributes.items():
cleaned[key] = _fix_value_type(value)
return cleaned


def _is_time_dependent(axis, time_axis_name):
if axis.attributes.get('DEPEND_TIME', '') == time_axis_name:
return not axis.is_nrv
if axis.attributes.get('DEPEND_0', '') == time_axis_name:
return not axis.is_nrv
return False

from speasy.products import SpeasyVariable, VariableAxis

def _display_type(variable: pyistp.loader.DataVariable) -> str:
if 'DISPLAY_TYPE' in variable.attributes:
return variable.attributes['DISPLAY_TYPE']
if 'display_type' in variable.attributes:
return variable.attributes['display_type']
return ''


def _make_axis(axis, time_axis_name):
return VariableAxis(values=axis.values.copy(), meta=_fix_attributes_types(axis.attributes), name=axis.name,
is_time_dependent=_is_time_dependent(axis, time_axis_name))


def _build_labels(variable: pyistp.loader.DataVariable):
if len(variable.values.shape) != 2:
return _fix_value_type(variable.labels)
if type(variable.labels) is list and len(variable.labels) == variable.values.shape[1]:
return _fix_value_type(variable.labels)
if type(variable.labels) is list and len(variable.labels) == 1:
return [f"{variable.labels[0]}[{i}]" for i in range(variable.values.shape[1])]
return [f"component_{i}" for i in range(variable.values.shape[1])]


def _filter_extra_axes(variable: pyistp.loader.DataVariable) -> List[SupportDataVariable]:
return variable.axes[1:]
import re

from . import _load_variable, _resolve_url_type, _simplify_shape

def _valid_variable_or_none(variable: SpeasyVariable) -> Optional[SpeasyVariable]:
if len(variable) == 1 and variable.time[0] < np.datetime64('1900-01-01'): # handle fill values in epoch
return None
return variable


def _load_variable(istp_loader: pyistp.loader.ISTPLoader, variable) -> SpeasyVariable or None:
if variable in istp_loader.data_variables():
var = istp_loader.data_variable(variable)
elif variable.replace('-', '_') in istp_loader.data_variables(): # THX CSA/ISTP
var = istp_loader.data_variable(variable.replace('-', '_'))
else: # CDA https://cdaweb.gsfc.nasa.gov/WebServices/REST/#Get_Data_GET
alternative = re.sub(r"[\\/.%!@#^&*()\-+=`~|?<> ]", "$", variable)
if alternative in istp_loader.data_variables():
var = istp_loader.data_variable(alternative)
else:
return None
if (var is not None) and (var.values.shape[0] == var.axes[0].values.shape[0]):
time_axis_name = var.axes[0].name
return _valid_variable_or_none(SpeasyVariable(
axes=[VariableTimeAxis(values=var.axes[0].values.copy(),
meta=_fix_attributes_types(var.axes[0].attributes))] + [
_make_axis(axis, time_axis_name) for axis in _filter_extra_axes(var)],
values=DataContainer(values=var.values.copy(), meta=_fix_attributes_types(var.attributes),
name=var.name,
is_time_dependent=True),
columns=_build_labels(var)))
return None
log = logging.getLogger(__name__)
_PTR_rx = re.compile(r".*_PTR(_\d+)?")


def _load_variables(variables, file=None, buffer=None, master_file=None, master_buffer=None) -> SpeasyVariable or None:
def _load_variables(variables, file=None, buffer=None, master_file=None, master_buffer=None):
istp_loader = pyistp.load(file=file, buffer=buffer, master_file=master_file, master_buffer=master_buffer)
if istp_loader is not None:
return {variable: _load_variable(istp_loader, variable) for variable in variables}
return None


def _resolve_url_type(url, prefix="", cache_remote_files=True):
if url is None:
return prefix + "file", None
if type(url) is str:
if is_local_file(url):
return prefix + "file", urlparse(url=url).path
return prefix + "buffer", any_loc_open(url, mode='rb', cache_remote_files=cache_remote_files).read()
if type(url) in (memoryview, bytes):
return prefix + "buffer", url
if hasattr(url, 'read'):
return prefix + "buffer", url.read()
return prefix + "file", None


def _simplify_shape(values: np.ndarray) -> np.ndarray:
if len(values.shape) == 2 and values.shape[1] == 1:
return np.reshape(values, (-1))
return values


def _convert_attributes_to_variables(variable_name: str, attrs: Mapping, cdf: pycdfpp.CDF):
clean_attrs = {}
for name, attr_v in attrs.items():
Expand Down Expand Up @@ -158,7 +55,7 @@ def _write_axis(ax: VariableAxis, cdf: pycdfpp.CDF, compress_variables=False) ->


def _write_variable(v: SpeasyVariable, cdf: pycdfpp.CDF, already_saved_axes: List[VariableAxis],
compress_variables=False) -> bool:
compress_variables=False) -> None:
def _already_in_cdf(ax: VariableAxis):
for _ax in already_saved_axes:
if _ax == ax:
Expand All @@ -173,7 +70,7 @@ def _already_in_cdf(ax: VariableAxis):
depends[f"DEPEND_{index}"] = ax.name
already_saved_axes.append(ax)
else:
depends[f"DEPEND_{index}"] = a.name
depends[f"DEPEND_{index}"] = a
attributes = v.meta
attributes.update(depends)
cdf.add_variable(
Expand Down Expand Up @@ -228,7 +125,7 @@ def save_variables(self,
if type(file) is str:
pycdfpp.save(cdf, file)
return True
elif hasattr(file, 'write'):
elif isinstance(file, io.IOBase):
file.write(pycdfpp.save(cdf))
return True
elif file is None:
Expand Down
Loading
Loading