Skip to content

Commit 7742f14

Browse files
authored
Merge pull request #787 from crim-ca/cov-params
2 parents e0e195e + c9f9c89 commit 7742f14

File tree

14 files changed

+149
-44
lines changed

14 files changed

+149
-44
lines changed

.github/workflows/tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
strategy:
4747
matrix:
4848
os: [ubuntu-latest]
49-
python-version: ["3.9", "3.10", "3.11", "3.12"]
49+
python-version: ["3.10", "3.11", "3.12"]
5050
allow-failure: [false]
5151
test-case: [test-unit-only, test-func-only]
5252
include:

CHANGES.rst

+6-2
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,15 @@ Changes
1212

1313
Changes:
1414
--------
15-
- No change.
15+
- Update ``owslib==0.32.1`` for parameters fixes employed by *Collection Input* with ``format=ogc-coverage-collection``.
16+
- Drop support of Python 3.9 (required for ``owslib==0.32.1`` dependency).
1617

1718
Fixes:
1819
------
19-
- No change.
20+
- Fix parsing of *Collection Input* ``format=ogc-coverage-collection`` and ``format=ogc-map-collection``
21+
to provide additional parameters to the remote collection request.
22+
- Update ``pygeofilter>=0.3.1`` to resolve ``filter-lang=FES`` parser as per other filters
23+
(relates to `geopython/pygeofilter#102 <https://github.com/geopython/pygeofilter/pull/102>`_).
2024

2125
.. _changes_6.2.0:
2226

README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ for each process.
5454
* - releases
5555
- | |version| |commits-since| |docker_image|
5656

57-
.. |py_ver| image:: https://img.shields.io/badge/python-3.9%2B-blue.svg?logo=python
58-
:alt: Requires Python 3.9+
57+
.. |py_ver| image:: https://img.shields.io/badge/python-3.10%2B-blue.svg?logo=python
58+
:alt: Requires Python 3.10+
5959
:target: https://www.python.org/getit
6060

6161
.. |commits-since| image:: https://img.shields.io/github/commits-since/crim-ca/weaver/6.2.0.svg?logo=github

requirements.txt

+4-8
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,10 @@ mako
7171
# employed by cwltool -> schema-salad -> mistune
7272
#mistune>=2.0.3,<2.1
7373
mypy_boto3_s3
74-
numpy>=1.22.2,<2; python_version < "3.10"
75-
numpy>=1.22.2; python_version >= "3.10"
74+
numpy>=1.22.2
7675
# esgf-compute-api (cwt) needs oauthlib but doesn't add it in their requirements
7776
oauthlib
78-
owslib==0.29.3
77+
owslib==0.32.1
7978
PasteDeploy>=3.1.0; python_version >= "3.12"
8079
pint
8180
psutil
@@ -86,7 +85,7 @@ psutil
8685
# https://github.com/tschaub/ogcapi-features/tree/json-array-expression/extensions/cql/jfe
8786
# - extra 'backend-native' uses shapely for Python objects
8887
# - support FES, CQL(TEXT,JSON), CQL2(TEXT,JSON), ECQL, JFE
89-
pygeofilter[fes,backend-native]
88+
pygeofilter[fes,backend-native]>=0.3.1
9089
# pymongo>=4 breaks for some kombu combinations corresponding to pinned Celery
9190
# - https://github.com/crim-ca/weaver/issues/386
9291
# - https://github.com/celery/kombu/pull/1536
@@ -119,10 +118,7 @@ schema-salad>=8.3.20221209165047,<9
119118
shapely
120119
simplejson
121120
# urllib3 not directly required, pinned by Snyk to avoid CVE-2024-37891
122-
# Python<3.10 error via pip, avoid endless package install lookup error with botocore
123-
# (https://github.com/pypa/pip/issues/12827#issuecomment-2211291150)
124-
urllib3>=2.2.2 ; python_version >= "3.10"
125-
urllib3==1.26.19 ; python_version < "3.10" # pyup: ignore
121+
urllib3>=2.2.2
126122
urlmatch
127123
xmltodict
128124
webob

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def _parse_requirements(file_path, requirements, links):
120120
package_data={"": ["*.mako"]},
121121
zip_safe=False,
122122
test_suite="tests",
123-
python_requires=">=3.9, <4",
123+
python_requires=">=3.10, <4",
124124
install_requires=REQUIREMENTS,
125125
dependency_links=LINKS,
126126
extras_require={

tests/functional/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
import os
22

33
APP_PKG_ROOT = os.path.join(os.path.dirname(__file__), "application-packages")
4+
TEST_DATA_ROOT = os.path.join(os.path.dirname(__file__), "test-data")

tests/functional/test-data/test.nc

39.1 KB
Binary file not shown.

tests/functional/test_wps_package.py

+84-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from parameterized import parameterized
2929

3030
from tests import resources
31+
from tests.functional import TEST_DATA_ROOT
3132
from tests.functional.utils import ResourcesUtil, WpsConfigBase
3233
from tests.utils import (
3334
MOCK_AWS_REGION,
@@ -2354,7 +2355,6 @@ def test_execute_job_with_collection_input_ogc_features(self, filter_method, fil
23542355
filter_match = responses.matchers.query_param_matcher({
23552356
"filter": filter_value,
23562357
"filter-lang": filter_lang,
2357-
"timeout": "10", # added internally by collection processor
23582358
})
23592359
else:
23602360
filter_match = responses.matchers.json_params_matcher(filter_value)
@@ -2489,6 +2489,89 @@ def test_execute_job_with_collection_input_stac_items(self):
24892489
assert "features" in out_data and isinstance(out_data["features"], list)
24902490
assert len(out_data["features"]) == 1
24912491

2492+
@parameterized.expand([
2493+
(
2494+
{"subset": "Lat(10:20),Lon(30:40)", "datetime": "2025-01-01/2025-01-02"},
2495+
"?subset=Lat(10:20),Lon(30:40)&datetime=2025-01-01/2025-01-02",
2496+
),
2497+
(
2498+
{"subset": {"Lat": [10, 20], "Lon": [30, 40]}, "datetime": ["2025-01-01", "2025-01-02"]},
2499+
"?subset=Lat(10:20),Lon(30:40)&datetime=2025-01-01/2025-01-02",
2500+
),
2501+
])
2502+
def test_execute_job_with_collection_input_coverages_netcdf(self, coverage_parameters, coverage_request):
2503+
# type: (JSON, str) -> None
2504+
proc_name = "DockerNetCDF2Text"
2505+
body = self.retrieve_payload(proc_name, "deploy", local=True)
2506+
cwl = self.retrieve_payload(proc_name, "package", local=True)
2507+
body["executionUnit"] = [{"unit": cwl}]
2508+
proc_id = self.fully_qualified_test_name(self._testMethodName)
2509+
self.deploy_process(body, describe_schema=ProcessSchema.OGC, process_id=proc_id)
2510+
2511+
with contextlib.ExitStack() as stack:
2512+
tmp_host = "https://mocked-file-server.com" # must match collection prefix hostnames
2513+
tmp_svr = stack.enter_context(responses.RequestsMock(assert_all_requests_are_fired=False))
2514+
test_file = "test.nc"
2515+
test_data = stack.enter_context(open(os.path.join(TEST_DATA_ROOT, test_file), mode="rb")).read()
2516+
2517+
# coverage request expected with resolved query parameters matching submitted collection input parameters
2518+
col_url = f"{tmp_host}/collections/climate-data"
2519+
col_cov_url = f"{col_url}/coverage"
2520+
col_cov_req = f"{col_cov_url}{coverage_request}"
2521+
tmp_svr.add("GET", col_cov_req, body=test_data)
2522+
2523+
col_exec_body = {
2524+
"mode": ExecuteMode.ASYNC,
2525+
"response": ExecuteResponse.DOCUMENT,
2526+
"inputs": {
2527+
"input_nc": {
2528+
"collection": col_url,
2529+
"format": ExecuteCollectionFormat.OGC_COVERAGE, # NOTE: this is the test!
2530+
"type": ContentType.APP_NETCDF, # must align with process input media-type
2531+
**coverage_parameters,
2532+
}
2533+
}
2534+
}
2535+
2536+
for mock_exec in mocked_execute_celery():
2537+
stack.enter_context(mock_exec)
2538+
proc_url = f"/processes/{proc_id}/execution"
2539+
resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5,
2540+
data=col_exec_body, headers=self.json_headers, only_local=True)
2541+
assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
2542+
2543+
status_url = resp.json["location"]
2544+
results = self.monitor_job(status_url)
2545+
assert "output_txt" in results
2546+
2547+
job_id = status_url.rsplit("/", 1)[-1]
2548+
log_url = f"{status_url}/logs"
2549+
log_txt = self.app.get(log_url, headers={"Accept": ContentType.TEXT_PLAIN}).text
2550+
cov_col = "coverage.nc" # file name applied by 'collection_processor' (resolved by 'format' + 'type' extension)
2551+
cov_out = "coverage.txt" # extension modified by invoked process from input file name, literal copy of NetCDF
2552+
assert cov_col in log_txt, "Resolved NetCDF file from collection handler should have been logged."
2553+
assert cov_out in log_txt, "Chained NetCDF copied by the process as text should have been logged."
2554+
2555+
wps_dir = get_wps_output_dir(self.settings)
2556+
job_dir = os.path.join(wps_dir, job_id)
2557+
job_out = os.path.join(job_dir, "output_txt", cov_out)
2558+
assert os.path.isfile(job_out), f"Invalid output file not found: [{job_out}]"
2559+
with open(job_out, mode="rb") as out_fd: # output, although ".txt" is actually a copy of the submitted NetCDF
2560+
out_data = out_fd.read(3)
2561+
assert out_data == b"CDF", "Output file from (collection + process) chain should contain the NetCDF header."
2562+
2563+
for file_path in [
2564+
os.path.join(job_dir, cov_col),
2565+
os.path.join(job_dir, "inputs", cov_col),
2566+
os.path.join(job_dir, "output_txt", cov_col),
2567+
os.path.join(job_out, cov_col),
2568+
os.path.join(job_out, "inputs", cov_col),
2569+
os.path.join(job_out, "output_txt", cov_col),
2570+
]:
2571+
assert not os.path.exists(file_path), (
2572+
f"Intermediate collection coverage file should not exist: [{file_path}]"
2573+
)
2574+
24922575
def test_execute_job_with_context_output_dir(self):
24932576
cwl = {
24942577
"cwlVersion": "v1.0",

weaver/compat.py

-6
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,3 @@ def patch(self) -> int:
3838
@property
3939
def micro(self) -> int:
4040
return self.patch
41-
42-
try:
43-
from functools import cache # pylint: disable=unused-import # definition for other modules to import
44-
except ImportError: # python<3.9 # pragma: no cover
45-
from functools import lru_cache
46-
cache = lru_cache(maxsize=None)

weaver/formats.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import re
77
import socket
8+
from functools import cache
89
from typing import TYPE_CHECKING, cast, overload
910
from urllib.error import HTTPError, URLError
1011
from urllib.request import urlopen
@@ -17,7 +18,6 @@
1718
from requests.exceptions import ConnectionError
1819

1920
from weaver.base import Constants, classproperty
20-
from weaver.compat import cache
2121

2222
if TYPE_CHECKING:
2323
from typing import Any, AnyStr, Dict, List, Optional, Tuple, TypeAlias, TypeVar, Union

weaver/processes/builtin/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import os
33
import sys
4+
from functools import cache
45
from importlib import import_module
56
from string import Template
67
from typing import TYPE_CHECKING
@@ -12,7 +13,6 @@
1213
from cwltool.singularity import SingularityCommandLineJob
1314

1415
from weaver import WEAVER_ROOT_DIR
15-
from weaver.compat import cache
1616
from weaver.database import get_db
1717
from weaver.datatype import Process
1818
from weaver.exceptions import PackageExecutionError, PackageNotFound, ProcessNotAccessible, ProcessNotFound

weaver/processes/builtin/collection_processor.py

+44-14
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from weaver.wps_restapi import swagger_definitions as sd # isort:skip # noqa: E402
4242

4343
if TYPE_CHECKING:
44-
from typing import List
44+
from typing import Dict, List
4545

4646
from pystac import Asset
4747

@@ -127,8 +127,13 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
127127
api_url, col_id = col_parts if len(col_parts) == 2 else (None, col_parts[0])
128128
col_id_alt = get_any_id(col_input, pop=True)
129129
col_id = col_id or col_id_alt
130+
timeout = col_args.pop("timeout", 10)
130131

131-
col_args.setdefault("timeout", 10)
132+
# convert all query parameters to their corresponding function parameter name
133+
for arg in list(col_args):
134+
if "-" in arg:
135+
col_args[arg.replace("-", "_")] = col_args.pop(arg)
136+
col_args = parse_collection_parameters(col_args)
132137

133138
logger.log( # pylint: disable=E1205 # false positive
134139
logging.INFO,
@@ -144,7 +149,7 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
144149
col_href,
145150
queries=col_args,
146151
headers={"Accept": f"{ContentType.APP_GEOJSON},{ContentType.APP_JSON}"},
147-
timeout=col_args["timeout"],
152+
timeout=timeout,
148153
retries=3,
149154
only_server_errors=False,
150155
)
@@ -161,16 +166,12 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
161166
resolved_files.append(file_obj)
162167

163168
elif col_fmt in [ExecuteCollectionFormat.STAC, ExecuteCollectionFormat.STAC_ITEMS]:
164-
# convert all parameters to their corresponding name of the query utility, and ignore unknown ones
165-
for arg in list(col_args):
166-
if "-" in arg:
167-
col_args[arg.replace("-", "_")] = col_args.pop(arg)
169+
# ignore unknown or enforced parameters
168170
known_params = set(inspect.signature(ItemSearch).parameters)
169171
known_params -= {"url", "method", "stac_io", "client", "collection", "ids", "modifier"}
170172
for param in set(col_args) - known_params:
171173
col_args.pop(param)
172174

173-
timeout = col_args.pop("timeout", 10)
174175
search_url = f"{api_url}/search"
175176
search = ItemSearch(
176177
url=search_url,
@@ -193,12 +194,18 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
193194
resolved_files.append(file_obj)
194195

195196
elif col_fmt == ExecuteCollectionFormat.OGC_FEATURES:
196-
if str(col_args.get("filter-lang")) == "cql2-json":
197+
if str(col_args.get("filter_lang")) == "cql2-json":
197198
col_args["cql"] = col_args.pop("filter")
199+
col_args.pop("filter_lang")
200+
else:
201+
for arg in list(col_args):
202+
if arg.startswith("filter_"):
203+
col_args[arg.replace("_", "-")] = col_args.pop(arg)
198204
search = Features(
199205
url=api_url,
200206
# FIXME: add 'auth' or 'headers' authorization/cookies?
201207
headers={"Accept": f"{ContentType.APP_GEOJSON}, {ContentType.APP_VDN_GEOJSON}, {ContentType.APP_JSON}"},
208+
json_="{}", # avoid unnecessary request on init
202209
)
203210
items = search.collection_items(col_id, **col_args)
204211
if items.get("type") != "FeatureCollection" or "features" not in items:
@@ -228,13 +235,14 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
228235
url=api_url,
229236
# FIXME: add 'auth' or 'headers' authorization/cookies?
230237
headers={"Accept": ContentType.APP_JSON},
238+
json_="{}", # avoid unnecessary request on init
231239
)
232240
ctype = (col_media_type or [ContentType.IMAGE_COG])[0]
233241
ext = get_extension(ctype, dot=False)
234-
path = os.path.join(output_dir, f"map.{ext}")
242+
path = os.path.join(output_dir, f"coverage.{ext}")
235243
with open(path, mode="wb") as file:
236-
data = cast(io.BytesIO, cov.coverage(col_id)).getbuffer()
237-
file.write(data) # type: ignore
244+
data = cast(io.BytesIO, cov.coverage(col_id, **col_args)).getbuffer()
245+
file.write(data)
238246
_, file_fmt = get_cwl_file_format(ctype)
239247
file_obj = {"class": PACKAGE_FILE_TYPE, "path": f"file://{path}", "format": file_fmt}
240248
resolved_files.append(file_obj)
@@ -244,13 +252,14 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
244252
url=api_url,
245253
# FIXME: add 'auth' or 'headers' authorization/cookies?
246254
headers={"Accept": ContentType.APP_JSON},
255+
json_="{}", # avoid unnecessary request on init
247256
)
248257
ctype = (col_media_type or [ContentType.IMAGE_COG])[0]
249258
ext = get_extension(ctype[0], dot=False)
250259
path = os.path.join(output_dir, f"map.{ext}")
251260
with open(path, mode="wb") as file:
252-
data = cast(io.BytesIO, maps.map(col_id)).getbuffer()
253-
file.write(data) # type: ignore
261+
data = cast(io.BytesIO, maps.map(col_id, **col_args)).getbuffer()
262+
file.write(data)
254263
_, file_fmt = get_cwl_file_format(ctype)
255264
file_obj = {"class": PACKAGE_FILE_TYPE, "path": f"file://{path}", "format": file_fmt}
256265
resolved_files.append(file_obj)
@@ -271,6 +280,27 @@ def process_collection(collection_input, input_definition, output_dir, logger=LO
271280
return resolved_files
272281

273282

283+
def parse_collection_parameters(parameters):
284+
# type: (Dict[str, JSON]) -> Dict[str, JSON]
285+
"""
286+
Applies any relevant conversions of known parameters between allowed request format and expected utilities.
287+
"""
288+
if not parameters:
289+
return {}
290+
291+
subset = parameters.get("subset")
292+
if subset and isinstance(subset, str):
293+
subset_dims = {}
294+
for item in subset.split(","):
295+
dim, span = item.split("(", 1)
296+
span = span.split(")", 1)[0]
297+
ranges = span.split(":")
298+
subset_dims[dim] = list(ranges)
299+
parameters["subset"] = subset_dims
300+
301+
return parameters
302+
303+
274304
def process_cwl(collection_input, input_definition, output_dir):
275305
# type: (JobValueCollection, ProcessInputOutputItem, Path) -> CWL_IO_ValueMap
276306
files = process_collection(collection_input, input_definition, output_dir)

weaver/processes/wps_package.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import tempfile
2121
import time
2222
import uuid
23+
from functools import cache
2324
from typing import TYPE_CHECKING, cast, overload
2425
from urllib.parse import parse_qsl, urlparse
2526

@@ -48,7 +49,6 @@
4849
from pywps.validator.mode import MODE
4950
from requests.structures import CaseInsensitiveDict
5051

51-
from weaver.compat import cache
5252
from weaver.config import WeaverConfiguration, WeaverFeature, get_weaver_configuration
5353
from weaver.database import get_db
5454
from weaver.datatype import DockerAuthentication

0 commit comments

Comments
 (0)