Skip to content

Commit 78bc721

Browse files
committed
Merge remote-tracking branch 'origin/main' into favyen/20260413-olmoearth-embedding-as-a-modality
2 parents aebf485 + 6a32ad2 commit 78bc721

15 files changed

Lines changed: 617 additions & 88 deletions

File tree

data/rslearn_dataset_configs/config_worldcereal.json

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,19 @@
66
"bands": [
77
"tc-annual-temporarycrops-classification"
88
],
9-
"dtype": "float32"
9+
"dtype": "float32",
10+
"nodata_vals": [
11+
255.0
12+
]
1013
}
1114
],
1215
"data_source": {
1316
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
1417
"init_args": {
1518
"worldcereal_dir": "source_data/worldcereal/"
19+
},
20+
"query_config": {
21+
"space_mode": "SINGLE_COMPOSITE"
1622
}
1723
},
1824
"resampling_method": "nearest",
@@ -24,13 +30,19 @@
2430
"bands": [
2531
"tc-maize-main-irrigation-classification"
2632
],
27-
"dtype": "float32"
33+
"dtype": "float32",
34+
"nodata_vals": [
35+
255.0
36+
]
2837
}
2938
],
3039
"data_source": {
3140
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
3241
"init_args": {
3342
"worldcereal_dir": "source_data/worldcereal/"
43+
},
44+
"query_config": {
45+
"space_mode": "SINGLE_COMPOSITE"
3446
}
3547
},
3648
"resampling_method": "nearest",
@@ -42,13 +54,19 @@
4254
"bands": [
4355
"tc-maize-main-maize-classification"
4456
],
45-
"dtype": "float32"
57+
"dtype": "float32",
58+
"nodata_vals": [
59+
255.0
60+
]
4661
}
4762
],
4863
"data_source": {
4964
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
5065
"init_args": {
5166
"worldcereal_dir": "source_data/worldcereal/"
67+
},
68+
"query_config": {
69+
"space_mode": "SINGLE_COMPOSITE"
5270
}
5371
},
5472
"resampling_method": "nearest",
@@ -60,13 +78,19 @@
6078
"bands": [
6179
"tc-maize-second-irrigation-classification"
6280
],
63-
"dtype": "float32"
81+
"dtype": "float32",
82+
"nodata_vals": [
83+
255.0
84+
]
6485
}
6586
],
6687
"data_source": {
6788
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
6889
"init_args": {
6990
"worldcereal_dir": "source_data/worldcereal/"
91+
},
92+
"query_config": {
93+
"space_mode": "SINGLE_COMPOSITE"
7094
}
7195
},
7296
"resampling_method": "nearest",
@@ -78,13 +102,19 @@
78102
"bands": [
79103
"tc-maize-second-maize-classification"
80104
],
81-
"dtype": "float32"
105+
"dtype": "float32",
106+
"nodata_vals": [
107+
255.0
108+
]
82109
}
83110
],
84111
"data_source": {
85112
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
86113
"init_args": {
87114
"worldcereal_dir": "source_data/worldcereal/"
115+
},
116+
"query_config": {
117+
"space_mode": "SINGLE_COMPOSITE"
88118
}
89119
},
90120
"resampling_method": "nearest",
@@ -96,13 +126,19 @@
96126
"bands": [
97127
"tc-springcereals-springcereals-classification"
98128
],
99-
"dtype": "float32"
129+
"dtype": "float32",
130+
"nodata_vals": [
131+
255.0
132+
]
100133
}
101134
],
102135
"data_source": {
103136
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
104137
"init_args": {
105138
"worldcereal_dir": "source_data/worldcereal/"
139+
},
140+
"query_config": {
141+
"space_mode": "SINGLE_COMPOSITE"
106142
}
107143
},
108144
"resampling_method": "nearest",
@@ -114,13 +150,19 @@
114150
"bands": [
115151
"tc-wintercereals-irrigation-classification"
116152
],
117-
"dtype": "float32"
153+
"dtype": "float32",
154+
"nodata_vals": [
155+
255.0
156+
]
118157
}
119158
],
120159
"data_source": {
121160
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
122161
"init_args": {
123162
"worldcereal_dir": "source_data/worldcereal/"
163+
},
164+
"query_config": {
165+
"space_mode": "SINGLE_COMPOSITE"
124166
}
125167
},
126168
"resampling_method": "nearest",
@@ -132,13 +174,19 @@
132174
"bands": [
133175
"tc-wintercereals-wintercereals-classification"
134176
],
135-
"dtype": "float32"
177+
"dtype": "float32",
178+
"nodata_vals": [
179+
255.0
180+
]
136181
}
137182
],
138183
"data_source": {
139184
"class_path": "rslearn.data_sources.worldcereal.WorldCereal",
140185
"init_args": {
141186
"worldcereal_dir": "source_data/worldcereal/"
187+
},
188+
"query_config": {
189+
"space_mode": "SINGLE_COMPOSITE"
142190
}
143191
},
144192
"resampling_method": "nearest",

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/era5.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,9 @@ def convert_era5(window: Window, olmoearth_path: UPath) -> None:
7171
time_range = Item.deserialize(group[0]).geometry.time_range
7272

7373
raster_dir = window.get_raster_dir(LAYER_NAME, band_set.bands, group_idx)
74-
raster = raster_format.decode_raster(
74+
image = raster_format.decode_raster(
7575
raster_dir, window.projection, window.bounds
76-
)
77-
image = raster.get_chw_array()
76+
).get_chw_array()
7877

7978
year_images.append(image)
8079
year_time_ranges.append(time_range)

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/era5_10.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,9 @@ def convert_era5(window: Window, olmoearth_path: UPath) -> None:
7979
)
8080

8181
raster_dir = window.get_raster_dir(LAYER_NAME, band_set.bands, group_idx)
82-
raster = raster_format.decode_raster(
82+
image = raster_format.decode_raster(
8383
raster_dir, adjusted_projection, adjusted_bounds
84-
)
85-
image = raster.get_chw_array()
84+
).get_chw_array()
8685

8786
year_images.append(image)
8887
year_time_ranges.append(time_range)

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/multitemporal_raster.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,9 @@ def convert_freq(
156156
window.bounds,
157157
adjusted_bounds,
158158
)
159-
raster = GEOTIFF_RASTER_FORMAT.decode_raster(
159+
image = GEOTIFF_RASTER_FORMAT.decode_raster(
160160
raster_dir, adjusted_projection, adjusted_bounds
161-
)
162-
image = raster.get_chw_array()
161+
).get_chw_array()
163162
expected_image_size = band_set.get_expected_image_size(
164163
window_metadata.get_resolution_factor()
165164
)
@@ -288,10 +287,9 @@ def convert_monthly(
288287
if not raster_dir.exists():
289288
break
290289

291-
raster = GEOTIFF_RASTER_FORMAT.decode_raster(
290+
image = GEOTIFF_RASTER_FORMAT.decode_raster(
292291
raster_dir, adjusted_projection, adjusted_bounds
293-
)
294-
image = raster.get_chw_array()
292+
).get_chw_array()
295293
expected_image_size = band_set.get_expected_image_size(
296294
modality.tile_resolution_factor
297295
)

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/worldcereal.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,11 @@ def convert_worldcereal(window: Window, olmoearth_path: UPath) -> None:
5757
continue
5858
window_dir = window.get_raster_dir(band, [band])
5959

60-
raster = GEOTIFF_RASTER_FORMAT.decode_raster(
61-
path=window_dir, projection=window.projection, bounds=window.bounds
60+
ndarrays.append(
61+
GEOTIFF_RASTER_FORMAT.decode_raster(
62+
path=window_dir, projection=window.projection, bounds=window.bounds
63+
).get_chw_array()
6264
)
63-
ndarrays.append(raster.get_chw_array())
6465

6566
assert len(ndarrays) == len(band_set.bands), (
6667
f"Expected {len(band_set.bands)} arrays, got {len(ndarrays)}"

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/worldpop.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def convert_worldpop(window: Window, olmoearth_path: UPath) -> None:
4040
raster_dir = window.get_raster_dir(LAYER_NAME, band_set.bands)
4141
raster = GEOTIFF_RASTER_FORMAT.decode_raster(
4242
raster_dir, window.projection, window.bounds
43-
)
43+
).get_chw_array()
4444

4545
# Clip population count to 0. NODATA is -99999 and includes locations that are
4646
# mapped as "unsettled" but really that is 0 population.

olmoearth_pretrain/dataset_creation/rslearn_to_olmoearth/wri_canopy_height_map.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def convert_chm(window: Window, olmoearth_path: UPath) -> None:
4242
raster_dir = window.get_raster_dir(LAYER_NAME, band_set.bands)
4343
raster = GEOTIFF_RASTER_FORMAT.decode_raster(
4444
raster_dir, window.projection, window.bounds
45-
)
45+
).get_chw_array()
4646

4747
# Skip areas with any nodata (255).
4848
if raster.array.max() == 255:

olmoearth_pretrain/dataset_creation/wri_canopy_height_map/download_wri_canopy_height_map.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,20 +54,20 @@ def process_chm_tif(bucket: str, key: str, out_fname: str) -> None:
5454
bounds[3] * y_factor,
5555
)
5656

57-
array = GeotiffRasterFormat().decode_raster(
57+
raster = GeotiffRasterFormat().decode_raster(
5858
local_fname.parent,
5959
wanted_projection,
6060
wanted_bounds,
6161
resampling=Resampling.average,
6262
fname=local_fname.name,
6363
)
6464
out_upath = UPath(out_fname)
65-
print(f"writing {array.shape} to {out_upath}")
65+
print(f"writing {raster.array.shape} to {out_upath}")
6666
GeotiffRasterFormat().encode_raster(
6767
out_upath.parent,
6868
wanted_projection,
6969
wanted_bounds,
70-
array,
70+
raster,
7171
fname=(out_upath.name + ".tmp"),
7272
)
7373
os.rename(out_upath.path + ".tmp", out_upath.path)

0 commit comments

Comments
 (0)