Skip to content

Commit 0c8bc2b

Browse files
committed
gdal vector partition: make --fields optional
Fixes OSGeo#13758
1 parent cf497a1 commit 0c8bc2b

File tree

3 files changed

+77
-5
lines changed

3 files changed

+77
-5
lines changed

apps/gdalalg_vector_partition.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ constexpr int DIRECTORY_CREATION_MODE = 0755;
3030
constexpr const char *NULL_MARKER = "__HIVE_DEFAULT_PARTITION__";
3131

3232
constexpr const char *DEFAULT_PATTERN_HIVE = "part_%010d";
33+
constexpr const char *DEFAULT_PATTERN_FLAT_NO_FIELD = "{LAYER_NAME}_%010d";
3334
constexpr const char *DEFAULT_PATTERN_FLAT = "{LAYER_NAME}_{FIELD_VALUE}_%010d";
3435

3536
constexpr char DIGIT_ZERO = '0';
@@ -84,8 +85,8 @@ GDALVectorPartitionAlgorithm::GDALVectorPartitionAlgorithm(bool standaloneStep)
8485
AddLayerCreationOptionsArg(&m_layerCreationOptions);
8586

8687
AddArg("field", 0,
87-
_("Attribute or geometry field(s) on which to partition"), &m_fields)
88-
.SetRequired();
88+
_("Attribute or geometry field(s) on which to partition"),
89+
&m_fields);
8990
AddArg("scheme", 0, _("Partitioning scheme"), &m_scheme)
9091
.SetChoices(SCHEME_HIVE, SCHEME_FLAT)
9192
.SetDefault(m_scheme);
@@ -205,6 +206,20 @@ GDALVectorPartitionAlgorithm::GDALVectorPartitionAlgorithm(bool standaloneStep)
205206
.SetMinValueIncluded(1)
206207
.SetDefault(m_transactionSize)
207208
.SetHidden();
209+
210+
AddValidationAction(
211+
[this]()
212+
{
213+
if (m_fields.empty() && m_featureLimit == 0 && m_maxFileSize == 0)
214+
{
215+
ReportError(
216+
CE_Failure, CPLE_IllegalArg,
217+
"When 'fields' argument is not specified, "
218+
"'feature-limit' and/or 'max-file-size' must be specified");
219+
return false;
220+
}
221+
return true;
222+
});
208223
}
209224

210225
/************************************************************************/
@@ -384,7 +399,8 @@ static bool GetCurrentOutputLayer(
384399
!osPatternIn.empty() ? osPatternIn
385400
: osScheme == GDALVectorPartitionAlgorithm::SCHEME_HIVE
386401
? DEFAULT_PATTERN_HIVE
387-
: DEFAULT_PATTERN_FLAT;
402+
: osKey.empty() ? DEFAULT_PATTERN_FLAT_NO_FIELD
403+
: DEFAULT_PATTERN_FLAT;
388404

389405
bool bLimitReached = false;
390406
bool bOpenOrCreateNewFile = true;

autotest/utilities/test_gdalalg_vector_partition.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,3 +1210,40 @@ def test_gdalalg_vector_partition_geometry_multi_geom_fields(tmp_vsimem):
12101210
lyr.GetFeature(1).GetGeomFieldRef(1).ExportToIsoWkt()
12111211
== "LINESTRING (5 6,7 8)"
12121212
)
1213+
1214+
1215+
@pytest.mark.require_driver("GPKG")
1216+
def test_gdalalg_vector_partition_no_fields(tmp_vsimem):
1217+
1218+
with pytest.raises(
1219+
Exception,
1220+
match="When 'fields' argument is not specified, 'feature-limit' and/or 'max-file-size' must be specified",
1221+
):
1222+
gdal.alg.vector.partition(
1223+
input=src_ds(), output=tmp_vsimem / "out", output_format="GPKG"
1224+
)
1225+
1226+
gdal.alg.vector.partition(
1227+
input=src_ds(), output=tmp_vsimem / "out", feature_limit=2, output_format="GPKG"
1228+
)
1229+
1230+
assert gdal.ReadDir(tmp_vsimem / "out" / "test") == [
1231+
"part_0000000001.gpkg",
1232+
"part_0000000002.gpkg",
1233+
]
1234+
1235+
gdal.RmdirRecursive(tmp_vsimem / "out")
1236+
1237+
gdal.alg.vector.partition(
1238+
input=src_ds(),
1239+
output=tmp_vsimem / "out",
1240+
feature_limit=2,
1241+
output_format="GPKG",
1242+
scheme="flat",
1243+
)
1244+
1245+
assert gdal.ReadDir(tmp_vsimem / "out") == [
1246+
"non%20spatial_0000000001.gpkg",
1247+
"test_0000000001.gpkg",
1248+
"test_0000000002.gpkg",
1249+
]

doc/source/programs/gdal_vector_partition.rst

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ Description
2222

2323
:program:`gdal vector partition` dispatches features into different
2424
files, depending on the values the feature take on a subset of attribute or
25-
geometry fields specified by the user.
25+
geometry fields specified by the user and/or by limiting each output layer
26+
to a maximum number of features and/or a maximum file size.
2627

2728
Two partitioning schemes are available:
2829

@@ -62,7 +63,7 @@ Program-Specific Options
6263

6364
.. option:: --field <FIELD-NAME>
6465

65-
Fields(s) on which to partition. [required]
66+
Fields(s) on which to partition
6667

6768
Only attribute fields of type String, Integer and Integer64 are allowed.
6869
The order into which fields are specified matter to determine the directory
@@ -73,6 +74,10 @@ Program-Specific Options
7374
geometry fields is done on the geometry type. This can be useful for file
7475
formats where a single geometry type per layer is allowed.
7576

77+
Starting with GDAL 3.13, :option:`--field` is no longer required, but when
78+
it is not specified, :option:`--feature-limit` and/or :option:`--max-file-size`
79+
must be specified.
80+
7681
.. option:: --max-file-size <MAX-FILE-SIZE>
7782

7883
Maximum file size (MB or GB suffix can be used). By default, unlimited.
@@ -168,3 +173,17 @@ Examples
168173
.. code-block:: bash
169174
170175
$ gdal vector pipeline ! read input.gpkg ! set-geom-type --multi ! partition out_directory --scheme flat --field OGR_GEOMETRY --format "ESRI Shapefile"
176+
177+
.. example::
178+
:title: Split a file into files with at most 100,000 features.
179+
180+
.. code-block:: bash
181+
182+
$ gdal vector partition world_cities.gpkg out_directory --feature-limit 100000 --scheme flat --format Parquet
183+
184+
.. example::
185+
:title: Sort a file spatially and split it into files with at most 100,000 features.
186+
187+
.. code-block:: bash
188+
189+
$ gdal vector pipeline read world_cities.gpkg ! sort ! partition out_directory --feature-limit 100000 --scheme flat --format Parquet

0 commit comments

Comments
 (0)