Skip to content

Commit e37e18f

Browse files
export ee image to gcs (#2)
* export ee image to GCS * make sure key prefix ends with slash * update readme * update tests * update readme * update docstring * update docstring
1 parent c477f41 commit e37e18f

5 files changed

Lines changed: 311 additions & 2 deletions

File tree

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,11 @@ earthengine authenticate
2828
Export coverage data to a BigQuery table:
2929

3030
```bash
31-
aef-export coverage BQ_DATASET_NAME BQ_TABLE_NAME
31+
aef-export coverage <BQ_DATASET_NAME> <BQ_TABLE_NAME>
32+
```
33+
34+
Export a single image to GCS, an example image ID is `GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL/xs6bvzj41inm2e1cc`. It is recommended to export embeddings in their quantized form (int8) to reduce storage costs.
35+
36+
```bash
37+
aef-export image <IMAGE_ID> <GCS_BUCKET_NAME> <GCS_KEY_PREFIX> --quantize
3238
```

aef_export/cli.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import click
22

3+
from aef_export.embeddings import export_image
34
from aef_export.coverage import export_image_collection
45
from aef_export.settings import get_settings
56
from aef_export.utils import initialize_ee
@@ -30,3 +31,26 @@ def coverage(bq_dataset_name: str, bq_table_name: str):
3031
img_collection_name=settings.image_collection_name,
3132
)
3233
click.echo(f"Task id: {task_id}")
34+
35+
36+
@app.command()
37+
@click.argument("image_id")
38+
@click.argument("gcs_bucket_name")
39+
@click.argument("gcs_key_prefix")
40+
@click.option("--quantize", is_flag=True, default=False)
41+
def image(
42+
image_id: str, gcs_bucket_name: str, gcs_key_prefix: str, quantize: bool = False
43+
):
44+
"""Export a single Earth Engine image to GCS.
45+
46+
Exports the specified Earth Engine Image asset to Google Cloud Storage as a
47+
Cloud Optimized GeoTIFF. Optionally applies quantization to reduce file size.
48+
"""
49+
settings = get_settings()
50+
51+
if not gcs_key_prefix.endswith("/"):
52+
gcs_key_prefix += "/"
53+
54+
initialize_ee(settings.google_cloud_project)
55+
task_id = export_image(image_id, gcs_bucket_name, gcs_key_prefix, quantize)
56+
click.echo(f"Task id: {task_id}")

aef_export/embeddings.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import ee
2+
3+
from aef_export.utils import set_workload_tag
4+
5+
6+
def _quantize_embeddings(image: ee.Image) -> ee.Image:
7+
"""Apply quantization to embedding values for efficient storage.
8+
9+
Transforms floating-point embedding values to 8-bit signed integers using
10+
a power-law transformation followed by scaling and clamping as described
11+
by the AEF paper. This reduces storage requirements while preserving relative
12+
magnitudes of each vector.
13+
14+
Args:
15+
image: Earth Engine Image containing embedding values to quantize.
16+
17+
Returns:
18+
Earth Engine Image with quantized embedding values as int8.
19+
"""
20+
power = 2.0
21+
scale = 127.5
22+
min_value = -127
23+
max_value = 127
24+
25+
sat = image.abs().pow(ee.Number(1.0).divide(power)).multiply(image.signum())
26+
snapped = sat.multiply(scale).round()
27+
image = snapped.clamp(min_value, max_value).int8()
28+
return image
29+
30+
31+
def export_image(
32+
image_id: str, gcs_bucket_name: str, gcs_key_prefix: str, quantize: bool = False
33+
) -> str:
34+
"""Export an Earth Engine Image to Google Cloud Storage.
35+
36+
Exports an Earth Engine image to Google Cloud Storage as a Cloud
37+
Optimized GeoTIFF. Optionally applies quantization to reduce file size.
38+
Uses workload tags for Earth Engine quota management.
39+
40+
Args:
41+
image_id: Earth Engine image id to export.
42+
gcs_bucket_name: Google Cloud Storage bucket name for the export.
43+
gcs_key_prefix: GCS object key prefix for the exported file.
44+
quantize: Whether to apply quantization to the image values. Defaults to False.
45+
46+
Returns:
47+
Earth Engine task ID for the export operation.
48+
49+
Example:
50+
>>> task_id = export_image(
51+
... "GOOGLE/SATELLITE_EMBEDDING/V1/ANNUAL/xs6bvzj41inm2e1cc",
52+
... "my-bucket",
53+
... "my-key-prefix",
54+
... quantize=True
55+
... )
56+
"""
57+
image = ee.Image(image_id)
58+
if quantize:
59+
image = _quantize_embeddings(image)
60+
61+
with set_workload_tag("export-image"):
62+
short_uuid = image_id.split("/")[-1]
63+
task = ee.batch.Export.image.toCloudStorage(
64+
image=image,
65+
description=f"export-image-{short_uuid}",
66+
bucket=gcs_bucket_name,
67+
fileNamePrefix=gcs_key_prefix,
68+
maxPixels=2e10,
69+
formatOptions={"cloudOptimized": True},
70+
)
71+
task.start()
72+
73+
return task.id

tests/unit/test_cli.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from unittest.mock import patch, MagicMock
22
from click.testing import CliRunner
33

4-
from aef_export.cli import coverage
4+
from aef_export.cli import coverage, image
55

66

77
@patch("aef_export.cli.export_image_collection")
@@ -47,3 +47,70 @@ def test_coverage_command_missing_arguments():
4747
result = runner.invoke(coverage, ["test_dataset"])
4848
assert result.exit_code == 2
4949
assert "Missing argument" in result.output
50+
51+
52+
@patch("aef_export.cli.export_image")
53+
@patch("aef_export.cli.initialize_ee")
54+
@patch("aef_export.cli.get_settings")
55+
def test_image_command_success(
56+
mock_get_settings, mock_initialize_ee, mock_export_image
57+
):
58+
# Setup mocks
59+
mock_settings = MagicMock()
60+
mock_settings.google_cloud_project = "test-project"
61+
mock_get_settings.return_value = mock_settings
62+
mock_export_image.return_value = "image_task_123"
63+
64+
runner = CliRunner()
65+
result = runner.invoke(
66+
image, ["PROJECTS/test/assets/test_image", "test-bucket", "test/prefix"]
67+
)
68+
69+
# Verify the calls
70+
mock_get_settings.assert_called_once()
71+
mock_initialize_ee.assert_called_once_with("test-project")
72+
mock_export_image.assert_called_once_with(
73+
"PROJECTS/test/assets/test_image", "test-bucket", "test/prefix/", False
74+
)
75+
76+
# Verify the output and exit code
77+
assert result.exit_code == 0
78+
assert "Task id: image_task_123" in result.output
79+
80+
81+
@patch("aef_export.cli.export_image")
82+
@patch("aef_export.cli.initialize_ee")
83+
@patch("aef_export.cli.get_settings")
84+
def test_image_command_with_quantize_flag(
85+
mock_get_settings, mock_initialize_ee, mock_export_image
86+
):
87+
# Setup mocks
88+
mock_settings = MagicMock()
89+
mock_settings.google_cloud_project = "test-project"
90+
mock_get_settings.return_value = mock_settings
91+
mock_export_image.return_value = "quantized_task_456"
92+
93+
runner = CliRunner()
94+
result = runner.invoke(
95+
image,
96+
[
97+
"PROJECTS/test/assets/embedding_image",
98+
"embedding-bucket",
99+
"quantized/prefix",
100+
"--quantize",
101+
],
102+
)
103+
104+
# Verify the calls
105+
mock_get_settings.assert_called_once()
106+
mock_initialize_ee.assert_called_once_with("test-project")
107+
mock_export_image.assert_called_once_with(
108+
"PROJECTS/test/assets/embedding_image",
109+
"embedding-bucket",
110+
"quantized/prefix/",
111+
True,
112+
)
113+
114+
# Verify the output and exit code
115+
assert result.exit_code == 0
116+
assert "Task id: quantized_task_456" in result.output

tests/unit/test_embeddings.py

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
from aef_export.embeddings import _quantize_embeddings, export_image
4+
5+
6+
@patch("aef_export.embeddings.ee")
7+
def test_quantize_embeddings_applies_correct_transformations(mock_ee):
8+
# Mock the Earth Engine objects
9+
mock_image = MagicMock()
10+
mock_abs = MagicMock()
11+
mock_pow = MagicMock()
12+
mock_sat = MagicMock()
13+
mock_signum = MagicMock()
14+
mock_scaled = MagicMock()
15+
mock_snapped = MagicMock()
16+
mock_clamped = MagicMock()
17+
mock_result = MagicMock()
18+
19+
# Configure the mock chain for: image.abs().pow(power).multiply(signum)
20+
mock_image.abs.return_value = mock_abs
21+
mock_abs.pow.return_value = mock_pow
22+
mock_pow.multiply.return_value = mock_sat
23+
mock_image.signum.return_value = mock_signum
24+
25+
# Configure the mock chain for: sat.multiply(scale).round()
26+
mock_sat.multiply.return_value = mock_scaled
27+
mock_scaled.round.return_value = mock_snapped
28+
29+
# Configure the mock chain for: snapped.clamp(min, max).int8()
30+
mock_snapped.clamp.return_value = mock_clamped
31+
mock_clamped.int8.return_value = mock_result
32+
33+
# Configure ee.Number mock
34+
mock_number = MagicMock()
35+
mock_number.divide.return_value = 0.5
36+
mock_ee.Number.return_value = mock_number
37+
38+
# Call the function
39+
result = _quantize_embeddings(mock_image)
40+
41+
# Verify the transformations
42+
mock_image.abs.assert_called_once()
43+
mock_ee.Number.assert_called_once_with(1.0)
44+
mock_number.divide.assert_called_once_with(2.0)
45+
mock_abs.pow.assert_called_once_with(0.5)
46+
mock_image.signum.assert_called_once()
47+
mock_pow.multiply.assert_called_once_with(mock_signum)
48+
mock_sat.multiply.assert_called_once_with(127.5)
49+
mock_scaled.round.assert_called_once()
50+
mock_snapped.clamp.assert_called_once_with(-127, 127)
51+
mock_clamped.int8.assert_called_once()
52+
53+
assert result == mock_result
54+
55+
56+
@patch("aef_export.embeddings.set_workload_tag")
57+
@patch("aef_export.embeddings.ee")
58+
@patch("aef_export.embeddings._quantize_embeddings")
59+
def test_export_image_without_quantization(mock_quantize, mock_ee, mock_workload_tag):
60+
# Setup mocks
61+
mock_image = MagicMock()
62+
mock_task = MagicMock()
63+
mock_task.id = "test_task_id"
64+
65+
mock_ee.Image.return_value = mock_image
66+
mock_ee.batch.Export.image.toCloudStorage.return_value = mock_task
67+
68+
# Call the function without quantization
69+
export_image(
70+
image_id="PROJECTS/test/assets/test_image_12345",
71+
gcs_bucket_name="test-bucket",
72+
gcs_key_prefix="test/prefix",
73+
)
74+
75+
# Verify _quantize_embeddings is not called
76+
mock_quantize.assert_not_called()
77+
78+
79+
@patch("aef_export.embeddings._quantize_embeddings")
80+
@patch("aef_export.embeddings.set_workload_tag")
81+
@patch("aef_export.embeddings.ee")
82+
def test_export_image_with_quantization(mock_ee, mock_workload_tag, mock_quantize):
83+
# Setup mocks
84+
mock_original_image = MagicMock()
85+
mock_quantized_image = MagicMock()
86+
mock_task = MagicMock()
87+
mock_task.id = "quantized_task_id"
88+
89+
mock_ee.Image.return_value = mock_original_image
90+
mock_quantize.return_value = mock_quantized_image
91+
mock_ee.batch.Export.image.toCloudStorage.return_value = mock_task
92+
93+
# Call the function with quantization
94+
result = export_image(
95+
image_id="PROJECTS/test/assets/embeddings_67890",
96+
gcs_bucket_name="embeddings-bucket",
97+
gcs_key_prefix="quantized/embeddings",
98+
quantize=True,
99+
)
100+
101+
# Verify the calls
102+
mock_ee.Image.assert_called_once_with("PROJECTS/test/assets/embeddings_67890")
103+
mock_quantize.assert_called_once_with(mock_original_image)
104+
mock_workload_tag.assert_called_once_with("export-image")
105+
mock_ee.batch.Export.image.toCloudStorage.assert_called_once_with(
106+
image=mock_quantized_image,
107+
description="export-image-embeddings_67890",
108+
bucket="embeddings-bucket",
109+
fileNamePrefix="quantized/embeddings",
110+
maxPixels=2e10,
111+
formatOptions={"cloudOptimized": True},
112+
)
113+
mock_task.start.assert_called_once()
114+
115+
assert result == "quantized_task_id"
116+
117+
118+
@patch("aef_export.embeddings.set_workload_tag")
119+
@patch("aef_export.embeddings.ee")
120+
def test_export_image_uses_workload_tag_context(mock_ee, mock_workload_tag):
121+
# Setup mocks
122+
mock_image = MagicMock()
123+
mock_task = MagicMock()
124+
mock_task.id = "context_task_id"
125+
126+
mock_ee.Image.return_value = mock_image
127+
mock_ee.batch.Export.image.toCloudStorage.return_value = mock_task
128+
129+
# Call the function
130+
export_image(
131+
image_id="PROJECTS/test/assets/context_test",
132+
gcs_bucket_name="context-bucket",
133+
gcs_key_prefix="context/test",
134+
)
135+
136+
# Verify workload tag context manager is used
137+
mock_workload_tag.assert_called_once_with("export-image")
138+
mock_workload_tag.return_value.__enter__.assert_called_once()
139+
mock_workload_tag.return_value.__exit__.assert_called_once()

0 commit comments

Comments
 (0)