Skip to content

Commit aede0eb

Browse files
[SEDONA-711] Add Geography user-defined type (#1828)
* start basic object * compiling udt class * maybe actually register * add one input and one output function * maybe one input and one output function * maybe builds * some possible Python requirements * format * maybe a few more references to geography * remove word * Making ST_GeogFromWKT, ST_AsEWKT, ST_AsEWKB work properly * Add geography serde tests and dataframe api tests for python binding --------- Co-authored-by: Dewey Dunnington <[email protected]>
1 parent ebbbda9 commit aede0eb

File tree

25 files changed

+612
-16
lines changed

25 files changed

+612
-16
lines changed

common/src/main/java/org/apache/sedona/common/Constructors.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import javax.xml.parsers.ParserConfigurationException;
2323
import org.apache.sedona.common.enums.FileDataSplitter;
2424
import org.apache.sedona.common.enums.GeometryType;
25+
import org.apache.sedona.common.geometryObjects.Geography;
2526
import org.apache.sedona.common.utils.FormatUtils;
2627
import org.apache.sedona.common.utils.GeoHashDecoder;
2728
import org.locationtech.jts.geom.*;
@@ -44,6 +45,10 @@ public static Geometry geomFromWKT(String wkt, int srid) throws ParseException {
4445
return new WKTReader(geometryFactory).read(wkt);
4546
}
4647

48+
public static Geography geogFromWKT(String wkt, int srid) throws ParseException {
49+
return new Geography(geomFromWKT(wkt, srid));
50+
}
51+
4752
public static Geometry geomFromEWKT(String ewkt) throws ParseException {
4853
if (ewkt == null) {
4954
return null;

common/src/main/java/org/apache/sedona/common/Functions.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.util.stream.Collectors;
3030
import org.apache.commons.lang3.tuple.Pair;
3131
import org.apache.sedona.common.geometryObjects.Circle;
32+
import org.apache.sedona.common.geometryObjects.Geography;
3233
import org.apache.sedona.common.sphere.Spheroid;
3334
import org.apache.sedona.common.subDivide.GeometrySubDivider;
3435
import org.apache.sedona.common.utils.*;
@@ -776,6 +777,10 @@ public static String asEWKT(Geometry geometry) {
776777
return GeomUtils.getEWKT(geometry);
777778
}
778779

780+
public static String asEWKT(Geography geography) {
781+
return asEWKT(geography.getGeometry());
782+
}
783+
779784
public static String asWKT(Geometry geometry) {
780785
return GeomUtils.getWKT(geometry);
781786
}
@@ -784,6 +789,10 @@ public static byte[] asEWKB(Geometry geometry) {
784789
return GeomUtils.getEWKB(geometry);
785790
}
786791

792+
public static byte[] asEWKB(Geography geography) {
793+
return asEWKB(geography.getGeometry());
794+
}
795+
787796
public static String asHexEWKB(Geometry geom, String endian) {
788797
if (endian.equalsIgnoreCase("NDR")) {
789798
return GeomUtils.getHexEWKB(geom, ByteOrderValues.LITTLE_ENDIAN);
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.common.geometryObjects;
20+
21+
import org.locationtech.jts.geom.Geometry;
22+
23+
public class Geography {
24+
private final Geometry geometry;
25+
26+
public Geography(Geometry geometry) {
27+
this.geometry = geometry;
28+
}
29+
30+
public Geometry getGeometry() {
31+
return this.geometry;
32+
}
33+
34+
public String toString() {
35+
return this.geometry.toText();
36+
}
37+
}

common/src/main/java/org/apache/sedona/common/geometrySerde/GeometrySerde.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.esotericsoftware.kryo.io.Output;
2626
import java.io.Serializable;
2727
import org.apache.sedona.common.geometryObjects.Circle;
28+
import org.apache.sedona.common.geometryObjects.Geography;
2829
import org.locationtech.jts.geom.Envelope;
2930
import org.locationtech.jts.geom.Geometry;
3031
import org.locationtech.jts.geom.GeometryCollection;
@@ -36,7 +37,7 @@
3637
* Provides methods to efficiently serialize and deserialize geometry types.
3738
*
3839
* <p>Supports Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon,
39-
* GeometryCollection, Circle and Envelope types.
40+
* GeometryCollection, Circle, Envelope, and Geography types.
4041
*
4142
* <p>First byte contains {@link Type#id}. Then go type-specific bytes, followed by user-data
4243
* attached to the geometry.
@@ -63,6 +64,9 @@ public void write(Kryo kryo, Output out, Object object) {
6364
out.writeDouble(envelope.getMaxX());
6465
out.writeDouble(envelope.getMinY());
6566
out.writeDouble(envelope.getMaxY());
67+
} else if (object instanceof Geography) {
68+
writeType(out, Type.GEOGRAPHY);
69+
writeGeometry(kryo, out, ((Geography) object).getGeometry());
6670
} else {
6771
throw new UnsupportedOperationException(
6872
"Cannot serialize object of type " + object.getClass().getName());
@@ -118,6 +122,10 @@ public Object read(Kryo kryo, Input input, Class aClass) {
118122
return new Envelope();
119123
}
120124
}
125+
case GEOGRAPHY:
126+
{
127+
return new Geography(readGeometry(kryo, input));
128+
}
121129
default:
122130
throw new UnsupportedOperationException(
123131
"Cannot deserialize object of type " + geometryType);
@@ -145,7 +153,8 @@ private Geometry readGeometry(Kryo kryo, Input input) {
145153
private enum Type {
146154
SHAPE(0),
147155
CIRCLE(1),
148-
ENVELOPE(2);
156+
ENVELOPE(2),
157+
GEOGRAPHY(3);
149158

150159
private final int id;
151160

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
from shapely.geometry.base import BaseGeometry
19+
20+
21+
class Geography:
22+
geometry: BaseGeometry
23+
24+
def __init__(self, geometry: BaseGeometry):
25+
self.geometry = geometry

python/sedona/register/java_libs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class SedonaJvmLib(Enum):
2626
KNNQuery = "org.apache.sedona.core.spatialOperator.KNNQuery"
2727
RangeQuery = "org.apache.sedona.core.spatialOperator.RangeQuery"
2828
Envelope = "org.locationtech.jts.geom.Envelope"
29+
Geography = "org.apache.sedona.common.geometryObjects.Geography"
2930
GeoSerializerData = (
3031
"org.apache.sedona.python.wrapper.adapters.GeoSparkPythonConverter"
3132
)

python/sedona/spark/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from sedona.sql.st_constructors import *
4343
from sedona.sql.st_functions import *
4444
from sedona.sql.st_predicates import *
45-
from sedona.sql.types import GeometryType, RasterType
45+
from sedona.sql.types import GeometryType, GeographyType, RasterType
4646
from sedona.utils import KryoSerializer, SedonaKryoRegistrator
4747
from sedona.utils.adapter import Adapter
4848
from sedona.utils.geoarrow import dataframe_to_arrow

python/sedona/sql/st_constructors.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,22 @@ def ST_GeomFromWKT(
176176
return _call_constructor_function("ST_GeomFromWKT", args)
177177

178178

179+
@validate_argument_types
180+
def ST_GeogFromWKT(
181+
wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None
182+
) -> Column:
183+
"""Generate a geography column from a Well-Known Text (WKT) string column.
184+
185+
:param wkt: WKT string column to generate from.
186+
:type wkt: ColumnOrName
187+
:return: Geography column representing the WKT string.
188+
:rtype: Column
189+
"""
190+
args = (wkt) if srid is None else (wkt, srid)
191+
192+
return _call_constructor_function("ST_GeogFromWKT", args)
193+
194+
179195
@validate_argument_types
180196
def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column:
181197
"""Generate a geometry column from a OGC Extended Well-Known Text (WKT) string column.

python/sedona/sql/types.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
SedonaRaster = None
3434

3535
from ..utils import geometry_serde
36+
from ..core.geom.geography import Geography
3637

3738

3839
class GeometryType(UserDefinedType):
@@ -60,6 +61,31 @@ def scalaUDT(cls):
6061
return "org.apache.spark.sql.sedona_sql.UDT.GeometryUDT"
6162

6263

64+
class GeographyType(UserDefinedType):
65+
66+
@classmethod
67+
def sqlType(cls):
68+
return BinaryType()
69+
70+
def serialize(self, obj):
71+
return geometry_serde.serialize(obj.geometry)
72+
73+
def deserialize(self, datum):
74+
geom, offset = geometry_serde.deserialize(datum)
75+
return Geography(geom)
76+
77+
@classmethod
78+
def module(cls):
79+
return "sedona.sql.types"
80+
81+
def needConversion(self):
82+
return True
83+
84+
@classmethod
85+
def scalaUDT(cls):
86+
return "org.apache.spark.sql.sedona_sql.UDT.GeographyUDT"
87+
88+
6389
class RasterType(UserDefinedType):
6490

6591
@classmethod

python/sedona/utils/prep.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
)
2929
from shapely.geometry.base import BaseGeometry
3030

31+
from ..core.geom.geography import Geography
32+
3133

3234
def assign_all() -> bool:
3335
geoms = [
@@ -41,6 +43,7 @@ def assign_all() -> bool:
4143
]
4244
assign_udt_shapely_objects(geoms=geoms)
4345
assign_user_data_to_shapely_objects(geoms=geoms)
46+
assign_udt_geography()
4447
return True
4548

4649

@@ -55,3 +58,9 @@ def assign_udt_shapely_objects(geoms: List[type(BaseGeometry)]) -> bool:
5558
def assign_user_data_to_shapely_objects(geoms: List[type(BaseGeometry)]) -> bool:
5659
for geom in geoms:
5760
geom.getUserData = lambda geom_instance: geom_instance.userData
61+
62+
63+
def assign_udt_geography():
64+
from sedona.sql.types import GeographyType
65+
66+
Geography.__UDT__ = GeographyType()

0 commit comments

Comments
 (0)