Skip to content
4 changes: 3 additions & 1 deletion osprey/processes/wps_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
log_level,
nc_output,
)
from osprey.utils import logger
from osprey.utils import logger, replace_urls

# Library imports
import os
import json
from datetime import datetime
from pkg_resources import resource_filename


class Parameters(Process):
Expand Down Expand Up @@ -97,6 +98,7 @@ def _handler(self, request, response):
logger.info(version.short_version)

(config, np, loglevel) = self.collect_args(request)
replace_urls(config, self.workdir)
log_handler(
self,
response,
Expand Down
34 changes: 34 additions & 0 deletions osprey/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import logging
import os
import json
import requests
from datetime import datetime, timedelta
from tempfile import NamedTemporaryFile

logger = logging.getLogger("PYWPS")
logger.setLevel(logging.NOTSET)
Expand Down Expand Up @@ -34,6 +36,38 @@ def replace_filenames(config, temp_config):
temp_config.writelines(newdata)


def replace_urls(config, outdir):
"""
Copy https URLs to local storage and replace URLs
with local paths in config file.
Parameters:
config (str): Config file
outdir (str): Output directory
"""
read_config = open(config, "r")
filedata = read_config.readlines()
read_config.close()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use a context manager here.


for i in range(len(filedata)):
if "https" in filedata[i]:
url = filedata[i].split(" ")[-1] # https url is last word in line
url = url.rstrip() # remove \n character at end
r = requests.get(url)
filename = url.split("/")[-1]
prefix, suffix = filename.split(".")
suffix = "." + suffix
local_file = NamedTemporaryFile(
suffix=suffix, prefix=prefix, dir=outdir, delete=False
)
Comment on lines +57 to +59

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great use of tempfile here 👍

local_file.write(r.content)
filedata[i] = filedata[i].replace(url, local_file.name)
Comment on lines +49 to +61

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this is fine, we generally don't want to use indices if we can help it. It is more understandable to iterate through the iterable object : for data in filedata:....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the index because I wasn't sure if the assignment in line 63 would modify the filedata list if I iterated through each element.


write_config = open(config, "w")
for line in filedata:
write_config.write(f"{line}\n")
write_config.close()


def config_hander(workdir, unprocessed, config_template):
"""
This function enables users to provide dictionary-like string for Configuration input.
Expand Down
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pytest
flake8
pytest-flake8
requests-mock

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't know this package existed. I wonder if we can apply to our notebooks to make truly offline tests.

ipython
pytest-notebook
nbsphinx
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ SEARCH_FOR_CHANNEL: False
#-- Path to Pour Points File (char) --#
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to use this file in the jupyter lab demo as well

# A comma separated file of outlets to route to [lons, lats] - one coordinate pair per line (order not important)
# May optionally include a column [names] - which will (if not aggregating) be included in param file
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_pour.txt
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_pour.txt

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the purpose of this change from dodsC to fileServer?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is so that https instead of OpenDAP is used to download the file from THREDDS. James told me that it makes downloading entire files easier since I won't have to specify attributes or time ranges.


Copy link
Contributor

@sum1lim sum1lim Aug 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the ideal use of pour input is to get user inputs (lon and lat) and create a pour.txt file in the tmp directory. It is a variable that needs to be dynamically defined by users. @jameshiebert @corviday , do you think this is the right approach? I guess we don't have to worry about that in this PR. I will write an issue if it is needed.

#-- ====================================== --#

Expand All @@ -83,7 +83,7 @@ FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/da
# This defines the unit hydrograph to rout flow to the edge of each grid cell.
# A comma separated file of [time in seconds, unit hydrograph ordinate] - one timestep per line
# The timestep should be 1hr (3600 sec) or less.
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/uhbox.csv
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/uhbox.csv

#-- Number of Header lines to ignore in [UH_BOX]FILE_NAME (INT) --#
HEADER_LINES = 1
Expand All @@ -92,7 +92,7 @@ HEADER_LINES = 1
[ROUTING]
#-- ====================================== --#
#-- Path to routing inputs netcdf (char) --#
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_flow_parameters.nc
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_flow_parameters.nc

#-- netCDF Variable Names --#
LONGITUDE_VAR: lon
Expand Down Expand Up @@ -124,7 +124,7 @@ CELL_FLOWDAYS: 4
[DOMAIN]
#-- ====================================== --#
#-- Path to cesm compliant domain file (char) --#
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/dodsC/datasets/RVIC/sample_routing_domain.nc
FILE_NAME: https://docker-dev03.pcic.uvic.ca/twitcher/ows/proxy/thredds/fileServer/datasets/RVIC/sample_routing_domain.nc

#-- netCDF Variable Names --#
LONGITUDE_VAR: lon
Expand Down
18 changes: 18 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pytest
from pkg_resources import resource_filename


@pytest.fixture
def make_mock_urls(config, requests_mock):
read_config = open(config, "r")
config_data = read_config.readlines()
read_config.close()
for line in config_data:
if "https" in line:
url = line.split(" ")[-1] # https url is last word in line
url = url.rstrip() # remove \n character at end
filename = url.split("/")[-1]
f = open(resource_filename(__name__, f"data/samples/{filename}"), "rb")
filedata = f.read()
f.close()
requests_mock.get(url, content=filedata)
16 changes: 16 additions & 0 deletions tests/test_wps_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,19 @@ def test_parameters_local(config):
temp_config.read()
params = f"config={temp_config.name};"
run_wps_process(Parameters(), params)


@pytest.mark.online
@pytest.mark.parametrize(
("config"), [resource_filename(__name__, "configs/parameter_https.cfg")],
)
def test_parameters_https(config, make_mock_urls):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is failing with the following error message:

Traceback (most recent call last):
  File "/home/sangwonl/Desktop/osprey/venv/lib/python3.6/site-packages/pywps/app/Process.py", line 249, in _run_process
    self.handler(wps_request, wps_response)  # the user must update the wps_response.
  File "/home/sangwonl/Desktop/osprey/osprey/processes/wps_parameters.py", line 119, in _handler
    parameters(config, np)
  File "/home/sangwonl/Desktop/osprey/venv/lib/python3.6/site-packages/rvic/parameters.py", line 51, in parameters
    outlets, config_dict, directories = gen_uh_init(config_file)
  File "/home/sangwonl/Desktop/osprey/venv/lib/python3.6/site-packages/rvic/parameters.py", line 161, in gen_uh_init
    config_dict = copy_inputs(config_file, directories['inputs'])
  File "/home/sangwonl/Desktop/osprey/venv/lib/python3.6/site-packages/rvic/core/utilities.py", line 243, in copy_inputs
    copyfile(section['FILE_NAME'], new_file_name)
  File "/usr/lib/python3.6/shutil.py", line 120, in copyfile
    with open(src, 'rb') as fsrc:
FileNotFoundError: [Errno 2] No such file or directory: '/tmp/pywps_process_36j0za4t/sample_pour2b3izq8y.txt'

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does each FILE_PATH in your parameter_https.cfg file look like? I just realized that running the notebook as is permanently changes it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, the demo doesn't use that .cfg file, so it won't be modified. I'll look into it more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you try replacing self.workdir in this line with a different directory, and after the for loop in replace_urls, print filedata? I just want to see if the contents of the config file are being modified correctly.

Copy link
Contributor

@sum1lim sum1lim Aug 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does each FILE_PATH in your parameter_https.cfg file look like? I just realized that running the notebook as is permanently changes it.

I think you are right here. I did git stash and ran pytest again, and it passed.

Actually, the demo doesn't use that .cfg file, so it won't be modified. I'll look into it more.

Last time I ran the demo first by changing the input to the parameter_https.cfg, so that caused the issue you said.

config_name = os.path.splitext(config)[0] # Remove .cfg extension
with NamedTemporaryFile(
suffix=".cfg", prefix=os.path.basename(config_name), mode="w+t"
) as temp_config: # Avoid permanent replacement of https URLs
read_config = open(config, "r")
temp_config.writelines(read_config.read())
temp_config.read()
Comment on lines +35 to +37

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we aren't just directly reading into the temp_config?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what you mean here.

params = f"config={temp_config.name};"
run_wps_process(Parameters(), params)