Skip to content

Commit e0153a0

Browse files
committed
Adjusted bounds of map to be fully global
1 parent c8b6fe9 commit e0153a0

File tree

1 file changed

+87
-150
lines changed

1 file changed

+87
-150
lines changed

ISRIC/convert.py

Lines changed: 87 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818

1919
# variable info
2020
var = 'cSoil'
21-
long_name = 'carbon mass in soil pool'
21+
anc_var = 'coefficient_of_variation'
22+
var_long_name = 'carbon mass in soil pool'
2223
source_units = 't ha-1'
2324
target_units = 'kg m-2'
24-
anc_var = 'coefficient_of_variation'
2525
sdate = datetime.datetime(1905, 3, 31, 0, 0, 0) # Found here: https://data.isric.org/geonetwork/srv/api/records/713396f4-1687-11ea-a7c0-a0481ca9e724
2626
edate = datetime.datetime(2016, 7, 4, 0, 0, 0)
2727

@@ -31,11 +31,11 @@
3131
local_u_data = 'ocs_0-30cm_uncertainty'
3232
github_path = 'https://github.com/rubisco-sfa/ILAMB-Data/blob/master/ISRIC/convert.py'
3333
output_path = 'soilgrids2_cSoil.nc'
34-
cellsize = 1000 # download res (m) in homolosine proj; I chose a higher res (but less than 0.5 in EPSG:4326) for processing speed
34+
cellsize = 1000 # download res (m) in homolosine epsg; I chose a higher res (but less than 0.5 in EPSG:4326) for processing speed
3535
nodata = -32768
3636

3737
# regridding info
38-
proj = 'EPSG:4326'
38+
target_epsg = '4326'
3939
res = 0.5
4040
interp = 'average'
4141

@@ -46,125 +46,86 @@
4646
gdal.UseExceptions()
4747

4848
# 1. download data
49-
def download_data(local_data, remote_data, cellsize):
49+
def download_data(local_file, remote_file, cellsize, target_epsg):
5050

51-
local_data_wext = f'{local_data}.tif'
52-
if not os.path.isfile(local_data_wext):
51+
local_file_wext = f'{local_file}.tif'
52+
if not os.path.isfile(local_file_wext):
5353

5454
# download data using GDAL in bash
5555
print('Creating VRT ...')
56-
create_vrt = f"""
57-
gdal_translate --config GDAL_HTTP_UNSAFESSL YES \
58-
-of VRT -tr {cellsize} {cellsize} \
59-
{remote_data}/ocs/{local_data}.vrt \
60-
{local_data}.vrt
61-
"""
56+
create_vrt = (
57+
f'gdal_translate --config GDAL_HTTP_UNSAFESSL YES '
58+
f'-of VRT '
59+
f'-tr {cellsize} {cellsize} '
60+
f'{remote_file}/ocs/{local_file}.vrt '
61+
f'{local_file}.vrt'
62+
)
63+
print(create_vrt)
6264
process = subprocess.Popen(create_vrt.split(), stdout=None)
6365
process.wait()
6466

65-
# reproject from Homolosine to EPSG 4326
67+
# reproject from Homolosine to target_epsg 4326
6668
print('Reprojecting VRT ...')
67-
reproject = f"""
68-
gdalwarp --config GDAL_HTTP_UNSAFESSL YES \
69-
-overwrite -t_srs EPSG:4326 -of VRT \
70-
{local_data}.vrt \
71-
{local_data}_4326.vrt
72-
"""
69+
reproject = (
70+
f'gdalwarp --config GDAL_HTTP_UNSAFESSL YES '
71+
f'-overwrite '
72+
f'-t_srs EPSG:{target_epsg} '
73+
f'-of VRT '
74+
f'-te -180 -90 180 90 '
75+
f'{local_file}.vrt '
76+
f'{local_file}_{target_epsg}.vrt'
77+
)
78+
print(reproject)
7379
process = subprocess.Popen(reproject.split(), stdout=None)
7480
process.wait()
7581

7682
# create tiff
7783
print('Creating TIFF ...')
78-
create_tif = f"""
79-
gdal_translate --config GDAL_HTTP_UNSAFESSL YES \
80-
-co TILED=YES -co COMPRESS=DEFLATE -co PREDICTOR=2 -co BIGTIFF=YES \
81-
{local_data}_4326.vrt \
82-
{local_data}.tif
83-
"""
84+
create_tif = (
85+
f'gdal_translate --config GDAL_HTTP_UNSAFESSL YES '
86+
f'-co TILED=YES -co COMPRESS=DEFLATE -co PREDICTOR=2 -co BIGTIFF=YES '
87+
f'{local_file}_{target_epsg}.vrt '
88+
f'{local_file}.tif'
89+
)
90+
print(create_tif)
8491
process = subprocess.Popen(create_tif.split(), stdout=None)
8592
process.wait()
8693

87-
download_stamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(local_data_wext)))
94+
download_stamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(os.path.getmtime(local_file_wext)))
8895
return download_stamp
8996

90-
# 3. open data
91-
def open_data(data_path, res, interp, proj, nodata):
92-
print('Opening and resampling raster ...')
93-
94-
# Map string interp to Resampling enum
95-
resampling_methods = {
96-
"nearest": Resampling.nearest,
97-
"bilinear": Resampling.bilinear,
98-
"cubic": Resampling.cubic,
99-
"cubic_spline": Resampling.cubic_spline,
100-
"lanczos": Resampling.lanczos,
101-
"average": Resampling.average,
102-
"mode": Resampling.mode,
103-
"max": Resampling.max,
104-
"min": Resampling.min,
105-
"med": Resampling.med,
106-
"q1": Resampling.q1,
107-
"q3": Resampling.q3,
108-
"sum": Resampling.sum,
109-
"rms": Resampling.rms,
110-
}
111-
112-
# Validate interp
113-
if interp not in resampling_methods:
114-
raise ValueError(
115-
f"Invalid resampling method '{interp}'. Choose from: {list(resampling_methods.keys())}"
116-
)
117-
resampling = resampling_methods[interp]
118-
119-
# Open the raster
120-
data = rxr.open_rasterio(data_path, band_as_variable=True)
121-
122-
# Always reproject with resolution adjustment
123-
data = data.rio.reproject(
124-
dst_crs=proj,
125-
shape=(int(180/res), int(360/res)),
126-
resampling=resampling
127-
)
128-
129-
# Get current bounds
130-
target_bounds = (-180, -90, 180, 90)
131-
current_bounds = data.rio.bounds()
132-
133-
# Conditional clipping or padding
134-
if (current_bounds[0] < target_bounds[0] or # Left bound smaller
135-
current_bounds[1] < target_bounds[1] or # Bottom bound smaller
136-
current_bounds[2] > target_bounds[2] or # Right bound larger
137-
current_bounds[3] > target_bounds[3]): # Top bound larger
138-
# Clip to match the target bounds
139-
print("Clipping data to target bounds...")
140-
data = data.rio.clip_box(*target_bounds)
141-
elif (current_bounds[0] > target_bounds[0] or # Left bound larger
142-
current_bounds[1] > target_bounds[1] or # Bottom bound larger
143-
current_bounds[2] < target_bounds[2] or # Right bound smaller
144-
current_bounds[3] < target_bounds[3]): # Top bound smaller
145-
# Pad to match the target bounds
146-
print("Padding data to target bounds...")
147-
data = data.rio.pad_box(*target_bounds)
148-
149-
# Adjust the affine transform to match the exact bounds
150-
transform = data.rio.transform()
151-
new_transform = transform * transform.translation(
152-
xoff=(target_bounds[0] - transform.c) / transform.a,
153-
yoff=(target_bounds[1] - transform.f) / -transform.e
154-
)
155-
data = data.rio.write_transform(new_transform)
156-
157-
# Set nodata value to NaN
158-
data = data.where(data != nodata, np.nan)
97+
# 2. create xarray dataset
98+
def create_xarray(local_data, local_u_data, anc_var, target_epsg, nodata):
99+
100+
def open_raster(local_data, target_epsg):
101+
102+
# read a .tif file into an xarray
103+
data = rxr.open_rasterio(local_data, band_as_variable=True)
104+
epsg_code = int(data.rio.crs.to_epsg())
105+
print(epsg_code)
106+
if epsg_code != target_epsg:
107+
data = data.rio.reproject(dst_crs=f'EPSG:{target_epsg}')
108+
return data
109+
110+
# open data and error files
111+
data = open_raster(local_data, target_epsg)
112+
errd = open_raster(local_u_data, target_epsg)
113+
errd_aligned = errd.rio.reproject_match(data)
114+
data[anc_var] = errd_aligned['band_1']
159115

160-
# Debugging: Print the resulting dimensions and resolution
161-
print(f"Output CRS: {data.rio.crs}")
162-
print("Output bounds:", data.rio.bounds())
163-
print(f"Output resolution: {data.rio.resolution()}")
164-
print(f"Output dimensions: {data.sizes}")
116+
# set nodata
117+
data = data.where(data != nodata, np.nan)
165118

166119
return data
167120

121+
# 3. resample to 0.5 degrees
122+
def coarsen(data, target_res):
123+
124+
resampled_data = data.coarsen(x=(int(target_res / abs(data.rio.resolution()[0]))),
125+
y=(int(target_res / abs(data.rio.resolution()[1]))),
126+
boundary='trim').mean()
127+
return resampled_data
128+
168129
# 4(a). sub-function to convert units
169130
def convert_units(data_array, target_units):
170131
print('Converting units ...')
@@ -189,57 +150,32 @@ def convert_units(data_array, target_units):
189150
return new_data_array
190151

191152
# 4. create and format netcdf
192-
def create_netcdf(data, errd, output_path, var, long_name, source_units, target_units, anc_var, sdate, edate, download_stamp):
153+
def create_netcdf(data, var, var_long_name, anc_var,
154+
source_units, target_units,
155+
sdate, edate, download_stamp,
156+
output_path):
193157
print('Creating netcdf ...')
194158

195-
# rename dims
159+
# rename bands and mask invalid data
196160
ds = data.rename({'x': 'lon', 'y': 'lat', 'band_1':var})
197-
er = errd.rename({'x': 'lon', 'y': 'lat', 'band_1':anc_var})
198161

199-
# numpy array of time bounds
162+
# create time dimension
200163
tb_arr = np.asarray([
201164
[cf.DatetimeNoLeap(sdate.year, sdate.month, sdate.day)],
202165
[cf.DatetimeNoLeap(edate.year, edate.month, edate.day)]
203166
]).T
204-
205-
# np array to xr data array (time bounds)
206167
tb_da = xr.DataArray(tb_arr, dims=('time', 'nv'))
207-
208-
# add time dimension and time bounds attribute
209168
ds = ds.expand_dims(time=tb_da.mean(dim='nv'))
210169
ds['time_bounds'] = tb_da
211-
er = er.expand_dims(time=tb_da.mean(dim='nv'))
212-
er['time_bounds'] = tb_da
213-
214-
# add standard error variable
215-
ds[anc_var] = er[anc_var]
216-
217-
# https://cfconventions.org/Data/cf-documents/requirements-recommendations/conformance-1.8.html
218-
# edit time attributes
219-
t_attrs = {
220-
'axis':'T',
221-
'long_name':'time'}
222-
223-
# edit lat/lon attributes
224-
y_attrs = {
225-
'axis':'Y',
226-
'long_name':'latitude',
227-
'units':'degrees_north'}
228-
x_attrs = {
229-
'axis':'X',
230-
'long_name':'longitude',
231-
'units':'degrees_east'}
232-
233-
# edit variable attributes
234-
v_attrs = {
235-
'long_name': long_name,
236-
'units': source_units,
237-
'ancillary_variables': anc_var}
238-
e_attrs = {
239-
'long_name': f'{var} {anc_var}',
240-
'units': 1}
241-
242-
# set attributes
170+
171+
# dictionaries for formatting each dimension and variable
172+
t_attrs = {'axis': 'T', 'long_name': 'time'}
173+
y_attrs = {'axis': 'Y', 'long_name': 'latitude', 'units': 'degrees_north'}
174+
x_attrs = {'axis': 'X', 'long_name': 'longitude', 'units': 'degrees_east'}
175+
v_attrs = {'long_name': var_long_name, 'units': source_units, 'ancillary_variables': anc_var}
176+
e_attrs = {'long_name': f'{var} {anc_var}', 'units': 1}
177+
178+
# apply formatting
243179
ds['time'].attrs = t_attrs
244180
ds['time_bounds'].attrs['long_name'] = 'time_bounds'
245181
ds['lat'].attrs = y_attrs
@@ -295,18 +231,19 @@ def create_netcdf(data, errd, output_path, var, long_name, source_units, target_
295231
# use all steps above to convert the data into a netcdf
296232
def main():
297233

298-
# open data & resample
299-
download_stamp = download_data(local_data, remote_data, cellsize)
300-
data = open_data(f'{local_data}.tif', res, interp, proj, nodata)
234+
# download data
235+
download_stamp = download_data(local_data, remote_data, cellsize, target_epsg)
236+
_ = download_data(local_u_data, remote_data, cellsize, target_epsg)
301237

302-
# open error data & resample
303-
_ = download_data(local_u_data, remote_data, cellsize)
304-
errd = open_data(f'{local_u_data}_resampled.tif', res, interp, proj, nodata)
238+
# create xarray and re-grid
239+
data = create_xarray(f'{local_data}.tif', f'{local_u_data}.tif', anc_var, target_epsg, nodata)
240+
data = coarsen(data, res)
305241

306242
# export and format netcdf
307-
create_netcdf(data, errd, output_path, var, long_name,
308-
source_units, target_units, anc_var,
309-
sdate, edate, download_stamp)
243+
create_netcdf(data, var, var_long_name, anc_var,
244+
source_units, target_units,
245+
sdate, edate, download_stamp,
246+
output_path)
310247

311248
if __name__ == "__main__":
312249
main()

0 commit comments

Comments
 (0)