Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ HGA follows semantic versioning. All notable changes to this project will be
documented in this file. The format is based on [Keep a
Changelog](http://keepachangelog.com/en/1.0.0/).

## [v3.0.5] - 2025-10-01

### Changed:

* `gdalwarp`, `gdal_translate`, `ogr2ogr` and `gdal_edit.py` commands have been
migrated to use the Python based bindings from the `osgeo.gdal` module.

## [v3.0.4] - 2025-09-30

### Changed:
Expand Down
2 changes: 1 addition & 1 deletion docker/service.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN conda create -y --name hga python=3.11 --channel conda-forge --override-chan
-q -y && conda clean -a

# Install GDAL
RUN conda run --name hga conda install gdal=3.6.2
RUN conda run --name hga conda install gdal=3.6.2 --channel conda-forge --override-channels
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a head scratcher that I won't get pulled into. But good catch.


# Copy service requirements file into image.
COPY requirements.txt .
Expand Down
9 changes: 9 additions & 0 deletions gdal_subsetter/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,12 @@ class MissingVariableError(HGANoRetryException):

def __init__(self, variable_name):
super().__init__(f"Missing variable in input file: {variable_name}")


class UnsupportedInterpolationMethodError(HGANoRetryException):
"""Raised when a Harmony request specified an unknown interpolation method."""

def __init__(self, interpolation_method):
super().__init__(
f"Unsupported interpolation method specified: {interpolation_method}"
)
241 changes: 116 additions & 125 deletions gdal_subsetter/transform.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
"""Adapter for the Harmony GDAL Adapter service."""

from shutil import copyfile, rmtree
from subprocess import check_output
from tempfile import mkdtemp
from zipfile import ZipFile
import os

from harmony_service_lib.adapter import BaseHarmonyAdapter
from harmony_service_lib.message import Source as HarmonySource
from harmony_service_lib.message_utility import rgetattr
from harmony_service_lib.util import (
bbox_to_geometry,
download,
generate_output_filename,
stage,
)
from osgeo import gdal, osr, ogr, gdal_array
from osgeo.gdal import (
Translate,
TranslateOptions,
VectorTranslate,
VectorTranslateOptions,
Warp,
WarpOptions,
)
from osgeo.gdalconst import GA_ReadOnly, GA_Update, GMF_PER_DATASET
from pyproj import Proj
from pystac import Asset, Item
Expand Down Expand Up @@ -44,13 +52,13 @@
from gdal_subsetter.utilities import (
get_file_type,
get_geotiff_variables,
get_resample_algorithm,
get_unzipped_geotiffs,
get_version,
has_world_file,
OpenGDAL,
process_flags,
rename_file,
resampling_methods,
)


Expand Down Expand Up @@ -336,26 +344,21 @@ def update_layernames(self, file_name, layer_names):
for index, layer_name in enumerate(layer_names, start=1):
dataset.GetRasterBand(index).SetDescription(layer_name)

def execute_gdal_command(self, *args) -> list[str]:
"""This is used to execute gdal* commands."""
self.logger.info(
args[0] + " " + " ".join(["'{}'".format(arg) for arg in args[1:]])
)
result_str = check_output(args).decode("utf-8")
return result_str.split("\n")
Comment on lines -339 to -345
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

def varsubset(self, srcfile: str, dstfile: str, band: int | None = None) -> str:
"""Perform a single-band variable subset of a GeoTIFF.

If no band is supplied, return the path of the input file.

def varsubset(self, srcfile, dstfile, band=None):
if not band:
"""
if band is None:
return srcfile

command = ["gdal_translate"]
command.extend(["-b", str(band)])
command.extend([srcfile, dstfile])
self.execute_gdal_command(*command)
self.logger.info(f"Performing variable subset on {srcfile} for Band {band}")
Translate(dstfile, srcfile, options=TranslateOptions(bandList=[band]))

return dstfile

def subset(self, layerid, srcfile, dstdir, band=None):
def subset(self, layerid, srcfile: str, dstdir: str, band: int | None = None):
"""Subset layer to region defined in message.subset."""
normalized_layerid = layerid.replace("/", "_")
subset = self.message.subset
Expand Down Expand Up @@ -447,109 +450,101 @@ def get_shapefile(self, subsetshape, dstdir):
tmpfile_geojson = convert_to_multipolygon(shapefile, tmpfile_geojson, buf=buf)

# convert into ESRI shapefile
outfile = f"{fileprex}.shp"
command = ["ogr2ogr", "-f", "ESRI Shapefile"]
command.extend([outfile, tmpfile_geojson])
self.execute_gdal_command(*command)
output_shape_file = f"{fileprex}.shp"
VectorTranslate(
output_shape_file,
tmpfile_geojson,
options=VectorTranslateOptions(format="ESRI Shapefile"),
)

return outfile
return output_shape_file

def resize(self, layerid, srcfile, dstdir):
"""Resizes the input layer
def resize(self, layerid: str, srcfile: str, dstdir: str) -> str:
"""Resizes the input layer and returns the name of the output file.

Uses a call to gdal_translate using information in message.format.
Uses the `osgeo.gdal.Translate` Python binding to execute
gdal_translate using information in the Harmony message.

NOTE: Message parameters are retrieved without marking them as
processed. This means that they will be passed on to any subsequent
steps in a workflow chain. To change this behaviour, the
`harmony_service_lib.message.Message.process` method can be used.

"""
interpolation = self.message.format.process("interpolation")
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't process mark this as being "used" or something? I thought process was a way to remove things from a message before downstream services use it. I only have a vague idea about this and I'm sure you told me. But just looking at this change and want to make sure we're not breaking something.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right - process is meant to do that. From what I can tell:

So:

  1. My first concern was that the message wouldn't be able to reuse the same property within the service, and some of the message properties are used multiple times (CRS, for example). This is irrelevant. Message.output_data won't preclude parameter reuse in the same service.
  2. Do we want to strip things out of the message? I'm honestly not sure. I could see times when it's good (preventing repeat processing) and times when it's bad (the same parameter being needed in multiple services).

I guess I don't have an answer for what we should do, but definitely acknowledge this is now doing something different. What are your thoughts?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have strong opinions. This used to remove an "interpolation" value so it wouldn't be available to downstream services, but this is the service that should do interpolation probably. I don't think a lot of services are using the process mechanism. But I also don't know if they should be.

How is that for a non-answer?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😆

I added a disclaimer in the method and function documentation strings that makes it clear the parameters aren't being removed in b37c762.

if interpolation in resampling_methods:
resample_method = interpolation
else:
resample_method = "bilinear"
resample_algorithm = get_resample_algorithm(self.message)

command = ["gdal_translate"]
fmt = self.message.format
normalized_layerid = layerid.replace("/", "_")
dstfile = os.path.join(dstdir, f"{normalized_layerid}__resized.tif")

if self.message.format.mime == "image/png":
# need to unscale values to color PNGs correctly
command.extend(["-unscale", "-ot", "Float64"])

if fmt.width or fmt.height:
width = fmt.process("width") or 0
height = fmt.process("height") or 0
command.extend(["-outsize", str(width), str(height)])
command.extend(["-r", resample_method])
command.extend([srcfile, dstfile])
self.execute_gdal_command(*command)
return dstfile
# Retrieve requested height and width from message. If either are not
# specified set that size to 0, so the other is used while preserving
# the aspect ratio of the original input.
output_width = self.message.format.process("width") or 0
output_height = self.message.format.process("height") or 0

if output_width or output_height:
self.logger.info(f"Resizing {srcfile} to ({output_width}, {output_height})")
translate_options = TranslateOptions(
resampleAlg=resample_algorithm,
height=output_height,
width=output_width,
)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're missing the unscale option from L476? (nevermind, we stopped supporting PNGs)

Translate(dstfile, srcfile, options=translate_options)
output_file_name = dstfile
else:
return srcfile
output_file_name = srcfile

def reproject(self, layerid, srcfile, dstdir):
crs = self.message.format.process("crs")
interpolation = self.message.format.process("interpolation")
if interpolation in resampling_methods:
resample_method = interpolation
else:
resample_method = "bilinear"
if not crs:
return srcfile
return output_file_name

def reproject(self, layerid: str, srcfile: str, dstdir: str) -> str:
"""Project input to requested target projection using gdalwarp.

Uses the `osgeo.gdal.Warp` Python binding to call gdalwarp.

NOTE: Message parameters are retrieved without marking them as
processed. This means that they will be passed on to any subsequent
steps in a workflow chain. To change this behaviour, the
`harmony_service_lib.message.Message.process` method can be used.

"""
normalized_layerid = layerid.replace("/", "_")
dstfile = os.path.join(dstdir, f"{normalized_layerid}__reprojected.tif")
fmt = self.message.format
if fmt.scaleSize:
xres, yres = fmt.process("scaleSize").x, fmt.process("scaleSize").y
else:
xres, yres = None, None
if fmt.scaleExtent:
box = [
fmt.process("scaleExtent").x.min,
fmt.process("scaleExtent").y.min,
fmt.process("scaleExtent").x.max,
fmt.process("scaleExtent").y.max,
]
else:
box = None

def _regrid(
infile,
outfile,
resampling_mode="bilinear",
ref_crs=None,
ref_box=None,
ref_xres=None,
ref_yres=None,
):
command = ["gdalwarp", "-of", "GTiff", "-overwrite", "-r", resampling_mode]

if ref_crs: # proj, box, xres/yres
command.extend(["-t_srs", ref_crs])
# command.extend([ '-t_srs', "'{ref_crs}'".format(ref_crs=ref_crs)])
if ref_box:
box = [str(x) for x in ref_box]
command.extend(
["-te", box[0], box[1], box[2], box[3], "-te_srs", ref_crs]
)
# command.extend(['-te', box[0], box[1], box[2], box[3],
# '-te_srs', "'{ref_crs`}'".format(ref_crs=ref_crs)])
if ref_xres and ref_yres:
command.extend(["-tr", str(ref_xres), str(ref_yres)])

command.extend([infile, outfile])
self.execute_gdal_command(*command)
return outfile

return _regrid(
srcfile,
dstfile,
resampling_mode=resample_method,
ref_crs=crs,
ref_box=box,
ref_xres=xres,
ref_yres=yres,
)

output_crs = rgetattr(self.message, "format.crs", None)

if output_crs is None:
# No reprojection requested, return path to input file
return srcfile

warp_options_dict = {
"dstSRS": output_crs,
"format": "GTiff",
"outputBoundsSRS": output_crs,
"resampleAlg": get_resample_algorithm(self.message),
}

# Extract requested x and y resolutions for output
output_xres = rgetattr(self.message, "format.scaleSize.x", None)
output_yres = rgetattr(self.message, "format.scaleSize.y", None)

if output_xres is not None and output_yres is not None:
warp_options_dict["xRes"] = output_xres
warp_options_dict["yRes"] = output_yres

# Extract requested bounding box
output_x_min = rgetattr(self.message, "format.scaleExtent.x.min", None)
output_x_max = rgetattr(self.message, "format.scaleExtent.x.max", None)
output_y_min = rgetattr(self.message, "format.scaleExtent.y.min", None)
output_y_max = rgetattr(self.message, "format.scaleExtent.y.max", None)
output_bounds = [output_x_min, output_y_min, output_x_max, output_y_max]

if all(bound is not None for bound in output_bounds):
warp_options_dict["outputBounds"] = output_bounds
Comment thread
owenlittlejohns marked this conversation as resolved.

self.logger.info(f"Projecting to CRS: {output_crs}")
Warp(dstfile, srcfile, options=WarpOptions(**warp_options_dict))

return dstfile

def add_to_result(self, filelist, dstdir):
dstfile = os.path.join(dstdir, "result")
Expand Down Expand Up @@ -693,15 +688,9 @@ def reformat(self, transformed_geotiff_file: str, dstdir: str) -> str:
# * No output specified (GeoTIFF default),
# * An alternative format that would be handled in downstream
# services in the chain.
gdal_subsetter_version = f"gdal_subsetter_version={self.service_version}"
# TODO: Add to consolidated gdal_translate command.
command = [
"gdal_edit.py",
"-mo",
gdal_subsetter_version,
transformed_geotiff_file,
]
self.execute_gdal_command(*command)
with OpenGDAL(transformed_geotiff_file, GA_Update) as dataset:
dataset.SetMetadataItem("gdal_subsetter_version", self.service_version)

formatted_file_path = transformed_geotiff_file

return formatted_file_path
Expand Down Expand Up @@ -866,8 +855,8 @@ def subset2(
# covert to absolute path
tiffile = os.path.abspath(tiffile)
outputfile = os.path.abspath(outputfile)
# RasterFormat = 'GTiff'
tmpfile = f"{os.path.splitext(outputfile)[0]}-tmp.tif"

if bbox or shapefile:
if bbox:
shapefile_out = box_to_shapefile(tiffile, bbox)
Expand All @@ -878,19 +867,21 @@ def subset2(
)
boxproj = shapefile_boxproj(shapefile, tiffile, shapefile_out)

ul_x, ul_y, ul_i, ul_j, cols, rows = calc_subset_envelope_window(
tiffile, boxproj
)

command = ["gdal_translate"]
_, _, ul_i, ul_j, cols, rows = calc_subset_envelope_window(tiffile, boxproj)

if band:
command.extend(["-b", str(band)])
translate_options_dict = {
"srcWin": [str(ul_i), str(ul_j), str(cols), str(rows)],
}

command.extend(["-srcwin", str(ul_i), str(ul_j), str(cols), str(rows)])
if band is not None:
translate_options_dict["bandList"] = [str(band)]

command.extend([tiffile, tmpfile])
self.execute_gdal_command(*command)
self.logger.info(
f"Subsetting {tiffile} with subwindow {translate_options_dict['srcWin']}",
)
Translate(
tmpfile, tiffile, options=TranslateOptions(**translate_options_dict)
)
self.mask_via_combined(tmpfile, shapefile_out, outputfile)
else:
copyfile(tiffile, outputfile)
Expand Down
Loading