Skip to content

Commit 4a858a1

Browse files
committed
implement ST_ASEWKT
refactor and add tests clean up
1 parent bb49756 commit 4a858a1

File tree

48 files changed

+447
-14
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+447
-14
lines changed

docs/code-example-notebooks/accessors.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,30 @@ df.select(st_astext(st_point($"lon", $"lat")).alias("wkt")).show()
105105
// MAGIC %r
106106
// MAGIC df <- createDataFrame(data.frame(lon = 30.0, lat = 10.0))
107107
// MAGIC showDF(select(df, alias(st_aswkt(st_point(column("lon"), column("lat"))), "wkt")), truncate=F)
108+
109+
// COMMAND ----------
110+
111+
// MAGIC %md
112+
// MAGIC ### st_asewkt
113+
114+
// COMMAND ----------
115+
116+
// MAGIC %python
117+
// MAGIC df = spark.createDataFrame([{'lon': 30., 'lat': 10.}])
118+
// MAGIC df.select(st_asewkt(st_point('lon', 'lat')).alias('ewkt')).show()
119+
120+
// COMMAND ----------
121+
122+
val df = List((30.0, 10.0)).toDF("lon", "lat")
123+
df.select(st_asewkt(st_point($"lon", $"lat")).alias("ewkt")).show()
124+
125+
// COMMAND ----------
126+
127+
// MAGIC %sql
128+
// MAGIC SELECT st_asewkt(st_point(30D, 10D)) AS ewkt
129+
130+
// COMMAND ----------
131+
132+
// MAGIC %r
133+
// MAGIC df <- createDataFrame(data.frame(lon = 30.0, lat = 10.0))
134+
// MAGIC showDF(select(df, alias(st_asewkt(st_point(column("lon"), column("lat"))), "ewkt")), truncate=F)

docs/code-example-notebooks/kepler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@
7474

7575
# WKB representation
7676
.withColumn("geom_wkb", mos.st_aswkb(col("geom_internal")))
77+
78+
# WKT representation
79+
.withColumn("geom_ewkt", mos.st_asewkt(col("geom_internal")))
7780

7881
# Limit to only 1 shape
7982
.limit(1)

docs/source/api/geometry-accessors.rst

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,3 +278,60 @@ st_aswkt
278278

279279

280280
.. note:: Alias for :ref:`st_astext`.
281+
282+
st_aswkt
283+
********
284+
285+
.. function:: st_aeswkt(col)
286+
287+
Translate a geometry into its representation in Extended Well-known Text (EWKT) format.
288+
289+
:param col: Geometry column
290+
:type col: Column: BinaryType, HexType, JSONType or InternalGeometryType
291+
:rtype: Column: StringType
292+
293+
:example:
294+
295+
.. tabs::
296+
.. code-tab:: py
297+
298+
>>> df = spark.createDataFrame([{'lon': 30., 'lat': 10.}])
299+
>>> df.select(st_asewkt(st_point('lon', 'lat')).alias('ewkt')).show()
300+
+-----------------------+
301+
| ewkt|
302+
+-----------------------+
303+
|SRID=4326;POINT (30 10)|
304+
+-----------------------+
305+
306+
.. code-tab:: scala
307+
308+
>>> val df = List((30.0, 10.0)).toDF("lon", "lat")
309+
>>> df.select(st_asewkt(st_point($"lon", $"lat")).alias("ewkt")).show()
310+
+-----------------------+
311+
| ewkt|
312+
+-----------------------+
313+
|SRID=4326;POINT (30 10)|
314+
+-----------------------+
315+
316+
.. code-tab:: sql
317+
318+
>>> SELECT st_asewkt(st_point(30.0D, 10.0D)) AS ewkt
319+
+-----------------------+
320+
| ewkt|
321+
+-----------------------+
322+
|SRID=4326;POINT (30 10)|
323+
+-----------------------+
324+
325+
.. code-tab:: r R
326+
327+
>>> df <- createDataFrame(data.frame(lon = 30.0, lat = 10.0))
328+
>>> showDF(select(df, alias(st_asewkt(st_point(column("lon"), column("lat"))), "ewkt")), truncate=F)
329+
+-----------------------+
330+
| ewkt|
331+
+-----------------------+
332+
|SRID=4326;POINT (30 10)|
333+
+-----------------------+
334+
335+
336+
.. note:: Default SRID value of a geometry created without specifying the explicit SRID value may be specific to a chosen geometry API. Currently,
337+
default SRID on ESRI is 4326 (as shown in the examples), whereas it is 0 on JTS.

python/mosaic/api/accessors.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"as_hex",
1818
"as_json",
1919
"convert_to",
20+
"st_asewkt",
2021
]
2122

2223

@@ -131,3 +132,22 @@ def convert_to(geom: ColumnOrName) -> Column:
131132
return config.mosaic_context.invoke_function(
132133
"convert_to", pyspark_to_java_column(geom)
133134
)
135+
136+
def st_asewkt(geom: ColumnOrName) -> Column:
137+
"""
138+
Translate a geometry into its Extended Well-known Text (EWKT) representation.
139+
140+
Parameters
141+
----------
142+
geom : Column (BinaryType, HexType, JSONType or InternalGeometryType)
143+
Geometry column
144+
145+
Returns
146+
-------
147+
Column (StringType)
148+
An EWKT geometry
149+
150+
"""
151+
return config.mosaic_context.invoke_function(
152+
"st_asewkt", pyspark_to_java_column(geom)
153+
)

src/main/scala/com/databricks/labs/mosaic/codegen/format/ConvertToCodeGen.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ object ConvertToCodeGen {
6868
case "JSONOBJECT" => geometryCodeGen.toJSON(ctx, eval, geometryAPI)
6969
case "GEOJSON" => geometryCodeGen.toGeoJSON(ctx, eval, geometryAPI)
7070
case "COORDS" => geometryCodeGen.toInternal(ctx, eval, geometryAPI)
71+
case "EWKT" => geometryCodeGen.toEWKT(ctx, eval, geometryAPI)
7172
case _ => throw new Error(s"Data type unsupported: $outputDataFormatName.")
7273
}
7374
}

src/main/scala/com/databricks/labs/mosaic/codegen/format/GeometryIOCodeGen.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ trait GeometryIOCodeGen {
1616

1717
def fromInternal(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
1818

19+
def fromEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
20+
1921
def toWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
2022

2123
def toWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
@@ -28,4 +30,6 @@ trait GeometryIOCodeGen {
2830

2931
def toInternal(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
3032

33+
def toEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String)
34+
3135
}

src/main/scala/com/databricks/labs/mosaic/codegen/format/MosaicGeometryIOCodeGenESRI.scala

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@ package com.databricks.labs.mosaic.codegen.format
22

33
import java.nio.ByteBuffer
44

5-
import com.databricks.labs.mosaic.core.geometry.MosaicGeometryESRI
5+
import com.databricks.labs.mosaic.core.geometry.{MosaicGeometry, MosaicGeometryESRI}
66
import com.databricks.labs.mosaic.core.geometry.api.GeometryAPI
77
import com.databricks.labs.mosaic.core.types.InternalGeometryType
88
import com.esri.core.geometry.ogc.OGCGeometry
9+
import com.esri.core.geometry.SpatialReference
910
import org.locationtech.jts.io.{WKBReader, WKBWriter}
1011

1112
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
@@ -15,9 +16,10 @@ import org.apache.spark.sql.types.{BinaryType, StringType}
1516
object MosaicGeometryIOCodeGenESRI extends GeometryIOCodeGen {
1617

1718
override def fromWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
18-
val inputGeom = ctx.freshName("inputGeom")
19-
val ogcGeom = classOf[OGCGeometry].getName
20-
(s"""$ogcGeom $inputGeom = $ogcGeom.fromText($eval.toString());""", inputGeom)
19+
// Technically, fromEWKT can have an implementation which is only a subset of implementation of
20+
// fromWKT but it's not really necessary and both can use the same implementation so long as
21+
// it works for both.
22+
fromEWKT(ctx, eval, geometryAPI)
2123
}
2224

2325
override def fromWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
@@ -77,6 +79,30 @@ object MosaicGeometryIOCodeGenESRI extends GeometryIOCodeGen {
7779
)
7880
}
7981

82+
override def fromEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
83+
val inputGeom = ctx.freshName("inputGeom")
84+
val geom = ctx.freshName("geom")
85+
val parts = ctx.freshName("parts")
86+
val srid = ctx.freshName("srid")
87+
val ogcGeom = classOf[OGCGeometry].getName
88+
val sptRef = classOf[SpatialReference].getName
89+
(
90+
s"""
91+
|$ogcGeom $inputGeom;
92+
|String $geom = $eval.toString();
93+
|if ($geom.startsWith("SRID=")) {
94+
| String[] $parts = $geom.split(";", 0);
95+
| String $srid = $parts[0].split("=", 0)[1];
96+
| $inputGeom = $ogcGeom.fromText($parts[1]);
97+
| $inputGeom.setSpatialReference($sptRef.create(Integer.parseInt($srid)));
98+
|} else {
99+
| $inputGeom = $ogcGeom.fromText($geom);
100+
|}
101+
|""".stripMargin,
102+
inputGeom
103+
)
104+
}
105+
80106
override def toWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
81107
val outputGeom = ctx.freshName("outputGeom")
82108
val javaStringType = CodeGenerator.javaType(StringType)
@@ -153,4 +179,18 @@ object MosaicGeometryIOCodeGenESRI extends GeometryIOCodeGen {
153179
)
154180
}
155181

182+
override def toEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
183+
val outputGeom = ctx.freshName("outputGeom")
184+
val srid = ctx.freshName("grid")
185+
val javaStringType = CodeGenerator.javaType(StringType)
186+
(
187+
s"""
188+
|int $srid = 0;
189+
|if ($eval.esriSR != null) $srid = $eval.getEsriSpatialReference().getID();
190+
|$javaStringType $outputGeom = $javaStringType.fromString("SRID=" + Integer.toString($srid) + ";" + $eval.asText());
191+
|""".stripMargin,
192+
outputGeom
193+
)
194+
}
195+
156196
}

src/main/scala/com/databricks/labs/mosaic/codegen/format/MosaicGeometryIOCodeGenJTS.scala

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@ import org.apache.spark.sql.types.{BinaryType, StringType}
1414
object MosaicGeometryIOCodeGenJTS extends GeometryIOCodeGen {
1515

1616
override def fromWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
17-
val inputGeom = ctx.freshName("inputGeom")
18-
val jtsGeom = classOf[Geometry].getName
19-
val wktReader = classOf[WKTReader].getName
20-
(s"""$jtsGeom $inputGeom = new $wktReader().read($eval.toString());""", inputGeom)
17+
fromEWKT(ctx, eval, geometryAPI)
2118
}
2219

2320
override def fromWKB(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
@@ -78,6 +75,30 @@ object MosaicGeometryIOCodeGenJTS extends GeometryIOCodeGen {
7875
)
7976
}
8077

78+
override def fromEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
79+
val inputGeom = ctx.freshName("inputGeom")
80+
val geom = ctx.freshName("geom")
81+
val parts = ctx.freshName("parts")
82+
val srid = ctx.freshName("srid")
83+
val jtsGeom = classOf[Geometry].getName
84+
val wktReader = classOf[WKTReader].getName
85+
(
86+
s"""
87+
|$jtsGeom $inputGeom;
88+
|String $geom = $eval.toString();
89+
|if ($geom.startsWith("SRID=")) {
90+
| String[] $parts = $geom.split(";", 0);
91+
| String $srid = $parts[0].split("=", 0)[1];
92+
| $inputGeom = new $wktReader().read($parts[1]);
93+
| $inputGeom.setSRID(Integer.parseInt($srid));
94+
|} else {
95+
| $inputGeom = new $wktReader().read($geom);;
96+
|}
97+
|""".stripMargin,
98+
inputGeom
99+
)
100+
}
101+
81102
override def toWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
82103
val outputGeom = ctx.freshName("outputGeom")
83104
val javaStringType = CodeGenerator.javaType(StringType)
@@ -172,4 +193,16 @@ object MosaicGeometryIOCodeGenJTS extends GeometryIOCodeGen {
172193
)
173194
}
174195

196+
override def toEWKT(ctx: CodegenContext, eval: String, geometryAPI: GeometryAPI): (String, String) = {
197+
val outputGeom = ctx.freshName("outputGeom")
198+
val javaStringType = CodeGenerator.javaType(StringType)
199+
val wktWriterClass = classOf[WKTWriter].getName
200+
(
201+
s"""
202+
|$javaStringType $outputGeom = $javaStringType.fromString("SRID=" + Integer.toString($eval.getSRID()) + ";" + new $wktWriterClass().write($eval));
203+
|""".stripMargin,
204+
outputGeom
205+
)
206+
}
207+
175208
}

src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryReader.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ trait GeometryReader {
1919

2020
def fromSeq[T <: MosaicGeometry](geomSeq: Seq[T], geomType: GeometryTypeEnum.Value): MosaicGeometry
2121

22+
def fromEWKT(ewkt: String): MosaicGeometry
2223
}

src/main/scala/com/databricks/labs/mosaic/core/geometry/GeometryWriter.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ trait GeometryWriter {
1414

1515
def toHEX: String
1616

17+
def toEWKT: String
18+
1719
}

0 commit comments

Comments
 (0)