-
Notifications
You must be signed in to change notification settings - Fork 6
TRT-635: Migrate GDAL commands to Python bindings. #42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9308f57
a1dcef9
62f49ef
56bef34
3d11c28
b37c762
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,20 +1,28 @@ | ||
| """Adapter for the Harmony GDAL Adapter service.""" | ||
|
|
||
| from shutil import copyfile, rmtree | ||
| from subprocess import check_output | ||
| from tempfile import mkdtemp | ||
| from zipfile import ZipFile | ||
| import os | ||
|
|
||
| from harmony_service_lib.adapter import BaseHarmonyAdapter | ||
| from harmony_service_lib.message import Source as HarmonySource | ||
| from harmony_service_lib.message_utility import rgetattr | ||
| from harmony_service_lib.util import ( | ||
| bbox_to_geometry, | ||
| download, | ||
| generate_output_filename, | ||
| stage, | ||
| ) | ||
| from osgeo import gdal, osr, ogr, gdal_array | ||
| from osgeo.gdal import ( | ||
| Translate, | ||
| TranslateOptions, | ||
| VectorTranslate, | ||
| VectorTranslateOptions, | ||
| Warp, | ||
| WarpOptions, | ||
| ) | ||
| from osgeo.gdalconst import GA_ReadOnly, GA_Update, GMF_PER_DATASET | ||
| from pyproj import Proj | ||
| from pystac import Asset, Item | ||
|
|
@@ -44,13 +52,13 @@ | |
| from gdal_subsetter.utilities import ( | ||
| get_file_type, | ||
| get_geotiff_variables, | ||
| get_resample_algorithm, | ||
| get_unzipped_geotiffs, | ||
| get_version, | ||
| has_world_file, | ||
| OpenGDAL, | ||
| process_flags, | ||
| rename_file, | ||
| resampling_methods, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -336,26 +344,21 @@ def update_layernames(self, file_name, layer_names): | |
| for index, layer_name in enumerate(layer_names, start=1): | ||
| dataset.GetRasterBand(index).SetDescription(layer_name) | ||
|
|
||
| def execute_gdal_command(self, *args) -> list[str]: | ||
| """This is used to execute gdal* commands.""" | ||
| self.logger.info( | ||
| args[0] + " " + " ".join(["'{}'".format(arg) for arg in args[1:]]) | ||
| ) | ||
| result_str = check_output(args).decode("utf-8") | ||
| return result_str.split("\n") | ||
|
Comment on lines
-339
to
-345
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 |
||
| def varsubset(self, srcfile: str, dstfile: str, band: int | None = None) -> str: | ||
| """Perform a single-band variable subset of a GeoTIFF. | ||
|
|
||
| If no band is supplied, return the path of the input file. | ||
|
|
||
| def varsubset(self, srcfile, dstfile, band=None): | ||
| if not band: | ||
| """ | ||
| if band is None: | ||
| return srcfile | ||
|
|
||
| command = ["gdal_translate"] | ||
| command.extend(["-b", str(band)]) | ||
| command.extend([srcfile, dstfile]) | ||
| self.execute_gdal_command(*command) | ||
| self.logger.info(f"Performing variable subset on {srcfile} for Band {band}") | ||
| Translate(dstfile, srcfile, options=TranslateOptions(bandList=[band])) | ||
|
|
||
| return dstfile | ||
|
|
||
| def subset(self, layerid, srcfile, dstdir, band=None): | ||
| def subset(self, layerid, srcfile: str, dstdir: str, band: int | None = None): | ||
| """Subset layer to region defined in message.subset.""" | ||
| normalized_layerid = layerid.replace("/", "_") | ||
| subset = self.message.subset | ||
|
|
@@ -447,109 +450,101 @@ def get_shapefile(self, subsetshape, dstdir): | |
| tmpfile_geojson = convert_to_multipolygon(shapefile, tmpfile_geojson, buf=buf) | ||
|
|
||
| # convert into ESRI shapefile | ||
| outfile = f"{fileprex}.shp" | ||
| command = ["ogr2ogr", "-f", "ESRI Shapefile"] | ||
| command.extend([outfile, tmpfile_geojson]) | ||
| self.execute_gdal_command(*command) | ||
| output_shape_file = f"{fileprex}.shp" | ||
| VectorTranslate( | ||
| output_shape_file, | ||
| tmpfile_geojson, | ||
| options=VectorTranslateOptions(format="ESRI Shapefile"), | ||
| ) | ||
|
|
||
| return outfile | ||
| return output_shape_file | ||
|
|
||
| def resize(self, layerid, srcfile, dstdir): | ||
| """Resizes the input layer | ||
| def resize(self, layerid: str, srcfile: str, dstdir: str) -> str: | ||
| """Resizes the input layer and returns the name of the output file. | ||
|
|
||
| Uses a call to gdal_translate using information in message.format. | ||
| Uses the `osgeo.gdal.Translate` Python binding to execute | ||
| gdal_translate using information in the Harmony message. | ||
|
|
||
| NOTE: Message parameters are retrieved without marking them as | ||
| processed. This means that they will be passed on to any subsequent | ||
| steps in a workflow chain. To change this behaviour, the | ||
| `harmony_service_lib.message.Message.process` method can be used. | ||
|
|
||
| """ | ||
| interpolation = self.message.format.process("interpolation") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right -
So:
I guess I don't have an answer for what we should do, but definitely acknowledge this is now doing something different. What are your thoughts?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have strong opinions. This used to remove an "interpolation" value so it wouldn't be available to downstream services, but this is the service that should do interpolation probably. I don't think a lot of services are using the process mechanism. But I also don't know if they should be. How is that for a non-answer?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😆 I added a disclaimer in the method and function documentation strings that makes it clear the parameters aren't being removed in b37c762. |
||
| if interpolation in resampling_methods: | ||
| resample_method = interpolation | ||
| else: | ||
| resample_method = "bilinear" | ||
| resample_algorithm = get_resample_algorithm(self.message) | ||
|
|
||
| command = ["gdal_translate"] | ||
| fmt = self.message.format | ||
| normalized_layerid = layerid.replace("/", "_") | ||
| dstfile = os.path.join(dstdir, f"{normalized_layerid}__resized.tif") | ||
|
|
||
| if self.message.format.mime == "image/png": | ||
| # need to unscale values to color PNGs correctly | ||
| command.extend(["-unscale", "-ot", "Float64"]) | ||
|
|
||
| if fmt.width or fmt.height: | ||
| width = fmt.process("width") or 0 | ||
| height = fmt.process("height") or 0 | ||
| command.extend(["-outsize", str(width), str(height)]) | ||
| command.extend(["-r", resample_method]) | ||
| command.extend([srcfile, dstfile]) | ||
| self.execute_gdal_command(*command) | ||
| return dstfile | ||
| # Retrieve requested height and width from message. If either are not | ||
| # specified set that size to 0, so the other is used while preserving | ||
| # the aspect ratio of the original input. | ||
| output_width = self.message.format.process("width") or 0 | ||
| output_height = self.message.format.process("height") or 0 | ||
|
|
||
| if output_width or output_height: | ||
| self.logger.info(f"Resizing {srcfile} to ({output_width}, {output_height})") | ||
| translate_options = TranslateOptions( | ||
| resampleAlg=resample_algorithm, | ||
| height=output_height, | ||
| width=output_width, | ||
| ) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| Translate(dstfile, srcfile, options=translate_options) | ||
| output_file_name = dstfile | ||
| else: | ||
| return srcfile | ||
| output_file_name = srcfile | ||
|
|
||
| def reproject(self, layerid, srcfile, dstdir): | ||
| crs = self.message.format.process("crs") | ||
| interpolation = self.message.format.process("interpolation") | ||
| if interpolation in resampling_methods: | ||
| resample_method = interpolation | ||
| else: | ||
| resample_method = "bilinear" | ||
| if not crs: | ||
| return srcfile | ||
| return output_file_name | ||
|
|
||
| def reproject(self, layerid: str, srcfile: str, dstdir: str) -> str: | ||
| """Project input to requested target projection using gdalwarp. | ||
|
|
||
| Uses the `osgeo.gdal.Warp` Python binding to call gdalwarp. | ||
|
|
||
| NOTE: Message parameters are retrieved without marking them as | ||
| processed. This means that they will be passed on to any subsequent | ||
| steps in a workflow chain. To change this behaviour, the | ||
| `harmony_service_lib.message.Message.process` method can be used. | ||
|
|
||
| """ | ||
| normalized_layerid = layerid.replace("/", "_") | ||
| dstfile = os.path.join(dstdir, f"{normalized_layerid}__reprojected.tif") | ||
| fmt = self.message.format | ||
| if fmt.scaleSize: | ||
| xres, yres = fmt.process("scaleSize").x, fmt.process("scaleSize").y | ||
| else: | ||
| xres, yres = None, None | ||
| if fmt.scaleExtent: | ||
| box = [ | ||
| fmt.process("scaleExtent").x.min, | ||
| fmt.process("scaleExtent").y.min, | ||
| fmt.process("scaleExtent").x.max, | ||
| fmt.process("scaleExtent").y.max, | ||
| ] | ||
| else: | ||
| box = None | ||
|
|
||
| def _regrid( | ||
| infile, | ||
| outfile, | ||
| resampling_mode="bilinear", | ||
| ref_crs=None, | ||
| ref_box=None, | ||
| ref_xres=None, | ||
| ref_yres=None, | ||
| ): | ||
| command = ["gdalwarp", "-of", "GTiff", "-overwrite", "-r", resampling_mode] | ||
|
|
||
| if ref_crs: # proj, box, xres/yres | ||
| command.extend(["-t_srs", ref_crs]) | ||
| # command.extend([ '-t_srs', "'{ref_crs}'".format(ref_crs=ref_crs)]) | ||
| if ref_box: | ||
| box = [str(x) for x in ref_box] | ||
| command.extend( | ||
| ["-te", box[0], box[1], box[2], box[3], "-te_srs", ref_crs] | ||
| ) | ||
| # command.extend(['-te', box[0], box[1], box[2], box[3], | ||
| # '-te_srs', "'{ref_crs`}'".format(ref_crs=ref_crs)]) | ||
| if ref_xres and ref_yres: | ||
| command.extend(["-tr", str(ref_xres), str(ref_yres)]) | ||
|
|
||
| command.extend([infile, outfile]) | ||
| self.execute_gdal_command(*command) | ||
| return outfile | ||
|
|
||
| return _regrid( | ||
| srcfile, | ||
| dstfile, | ||
| resampling_mode=resample_method, | ||
| ref_crs=crs, | ||
| ref_box=box, | ||
| ref_xres=xres, | ||
| ref_yres=yres, | ||
| ) | ||
|
|
||
| output_crs = rgetattr(self.message, "format.crs", None) | ||
|
|
||
| if output_crs is None: | ||
| # No reprojection requested, return path to input file | ||
| return srcfile | ||
|
|
||
| warp_options_dict = { | ||
| "dstSRS": output_crs, | ||
| "format": "GTiff", | ||
| "outputBoundsSRS": output_crs, | ||
| "resampleAlg": get_resample_algorithm(self.message), | ||
| } | ||
|
|
||
| # Extract requested x and y resolutions for output | ||
| output_xres = rgetattr(self.message, "format.scaleSize.x", None) | ||
| output_yres = rgetattr(self.message, "format.scaleSize.y", None) | ||
|
|
||
| if output_xres is not None and output_yres is not None: | ||
| warp_options_dict["xRes"] = output_xres | ||
| warp_options_dict["yRes"] = output_yres | ||
|
|
||
| # Extract requested bounding box | ||
| output_x_min = rgetattr(self.message, "format.scaleExtent.x.min", None) | ||
| output_x_max = rgetattr(self.message, "format.scaleExtent.x.max", None) | ||
| output_y_min = rgetattr(self.message, "format.scaleExtent.y.min", None) | ||
| output_y_max = rgetattr(self.message, "format.scaleExtent.y.max", None) | ||
| output_bounds = [output_x_min, output_y_min, output_x_max, output_y_max] | ||
|
|
||
| if all(bound is not None for bound in output_bounds): | ||
| warp_options_dict["outputBounds"] = output_bounds | ||
|
owenlittlejohns marked this conversation as resolved.
|
||
|
|
||
| self.logger.info(f"Projecting to CRS: {output_crs}") | ||
| Warp(dstfile, srcfile, options=WarpOptions(**warp_options_dict)) | ||
|
|
||
| return dstfile | ||
|
|
||
| def add_to_result(self, filelist, dstdir): | ||
| dstfile = os.path.join(dstdir, "result") | ||
|
|
@@ -693,15 +688,9 @@ def reformat(self, transformed_geotiff_file: str, dstdir: str) -> str: | |
| # * No output specified (GeoTIFF default), | ||
| # * An alternative format that would be handled in downstream | ||
| # services in the chain. | ||
| gdal_subsetter_version = f"gdal_subsetter_version={self.service_version}" | ||
| # TODO: Add to consolidated gdal_translate command. | ||
| command = [ | ||
| "gdal_edit.py", | ||
| "-mo", | ||
| gdal_subsetter_version, | ||
| transformed_geotiff_file, | ||
| ] | ||
| self.execute_gdal_command(*command) | ||
| with OpenGDAL(transformed_geotiff_file, GA_Update) as dataset: | ||
| dataset.SetMetadataItem("gdal_subsetter_version", self.service_version) | ||
|
|
||
| formatted_file_path = transformed_geotiff_file | ||
|
|
||
| return formatted_file_path | ||
|
|
@@ -866,8 +855,8 @@ def subset2( | |
| # covert to absolute path | ||
| tiffile = os.path.abspath(tiffile) | ||
| outputfile = os.path.abspath(outputfile) | ||
| # RasterFormat = 'GTiff' | ||
| tmpfile = f"{os.path.splitext(outputfile)[0]}-tmp.tif" | ||
|
|
||
| if bbox or shapefile: | ||
| if bbox: | ||
| shapefile_out = box_to_shapefile(tiffile, bbox) | ||
|
|
@@ -878,19 +867,21 @@ def subset2( | |
| ) | ||
| boxproj = shapefile_boxproj(shapefile, tiffile, shapefile_out) | ||
|
|
||
| ul_x, ul_y, ul_i, ul_j, cols, rows = calc_subset_envelope_window( | ||
| tiffile, boxproj | ||
| ) | ||
|
|
||
| command = ["gdal_translate"] | ||
| _, _, ul_i, ul_j, cols, rows = calc_subset_envelope_window(tiffile, boxproj) | ||
|
|
||
| if band: | ||
| command.extend(["-b", str(band)]) | ||
| translate_options_dict = { | ||
| "srcWin": [str(ul_i), str(ul_j), str(cols), str(rows)], | ||
| } | ||
|
|
||
| command.extend(["-srcwin", str(ul_i), str(ul_j), str(cols), str(rows)]) | ||
| if band is not None: | ||
| translate_options_dict["bandList"] = [str(band)] | ||
|
|
||
| command.extend([tiffile, tmpfile]) | ||
| self.execute_gdal_command(*command) | ||
| self.logger.info( | ||
| f"Subsetting {tiffile} with subwindow {translate_options_dict['srcWin']}", | ||
| ) | ||
| Translate( | ||
| tmpfile, tiffile, options=TranslateOptions(**translate_options_dict) | ||
| ) | ||
| self.mask_via_combined(tmpfile, shapefile_out, outputfile) | ||
| else: | ||
| copyfile(tiffile, outputfile) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a head scratcher that I won't get pulled into. But good catch.