Skip to content

Commit 07c13b1

Browse files
combined geojson and geojson_multi readers
1 parent 083d347 commit 07c13b1

File tree

8 files changed

+20
-40
lines changed

8 files changed

+20
-40
lines changed

src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ com.databricks.labs.gbx.vectorx.ds.ogr.OGR_DataSource
33
com.databricks.labs.gbx.vectorx.ds.shp.ShapeFile_DataSource
44
com.databricks.labs.gbx.vectorx.ds.gdb.FileGDB_DataSource
55
com.databricks.labs.gbx.vectorx.ds.geojson.GeoJSON_DataSource
6-
com.databricks.labs.gbx.vectorx.ds.geojson.GeoJSONMulti_DataSource
76
com.databricks.labs.gbx.vectorx.ds.gpkg.GPKG_DataSource
87
com.databricks.labs.gbx.ds.register.RegisterDataSource

src/main/scala/com/databricks/labs/gbx/ds/DataSourceExtras.scala

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,17 @@ import scala.jdk.CollectionConverters._
66

77
trait DataSourceExtras {
88

9-
def dsExtraMap(): Map[String, String]
9+
def dsExtraMap(checkMap: Map[String, String] = Map.empty): Map[String, String]
1010

1111
def extraJavaUtilMap(properties: java.util.Map[String, String]): java.util.Map[String, String] = {
12-
val newProperties = properties.asScala.toMap ++ dsExtraMap()
13-
newProperties.asJava
12+
val cMap = properties.asScala.toMap
13+
val newMap = cMap ++ dsExtraMap(checkMap = cMap)
14+
newMap.asJava
1415
}
1516

1617
def extraCaseInsensitiveStringMap(options: CaseInsensitiveStringMap): CaseInsensitiveStringMap = {
17-
val newMap = options.asCaseSensitiveMap().asScala.toMap ++ dsExtraMap()
18+
val cMap = options.asCaseSensitiveMap().asScala.toMap
19+
val newMap = cMap ++ dsExtraMap(checkMap = cMap)
1820
new CaseInsensitiveStringMap(newMap.asJava)
1921
}
2022

src/main/scala/com/databricks/labs/gbx/vectorx/ds/gdb/FileGDB_DataSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
1010
//noinspection ScalaUnusedSymbol
1111
class FileGDB_DataSource extends OGR_DataSource with DataSourceExtras {
1212

13-
override def dsExtraMap(): Map[String, String] = Map(
13+
override def dsExtraMap(checkMap: Map[String, String] = Map.empty): Map[String, String] = Map(
1414
"driverName" -> "OpenFileGDB"
1515
)
1616

src/main/scala/com/databricks/labs/gbx/vectorx/ds/geojson/GeoJSONMulti_DataSource.scala

Lines changed: 0 additions & 27 deletions
This file was deleted.

src/main/scala/com/databricks/labs/gbx/vectorx/ds/geojson/GeoJSON_DataSource.scala

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,14 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
1111
//noinspection ScalaUnusedSymbol
1212
class GeoJSON_DataSource extends OGR_DataSource with DataSourceExtras{
1313

14-
override def dsExtraMap(): Map[String, String] = Map(
15-
"driverName" -> "GeoJSON"
16-
)
14+
// default to multi = true given common use
15+
override def dsExtraMap(checkMap: Map[String, String] = Map.empty): Map[String, String] = {
16+
if (checkMap.getOrElse("multi", "true").toBoolean) {
17+
Map("driverName" -> "GeoJSONSeq")
18+
} else {
19+
Map("driverName" -> "GeoJSON")
20+
}
21+
}
1722

1823
override def shortName(): String = "geojson"
1924

src/main/scala/com/databricks/labs/gbx/vectorx/ds/gpkg/GPKG_DataSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
1111
//noinspection ScalaUnusedSymbol
1212
class GPKG_DataSource extends OGR_DataSource with DataSourceExtras{
1313

14-
override def dsExtraMap(): Map[String, String] = Map(
14+
override def dsExtraMap(checkMap: Map[String, String] = Map.empty): Map[String, String] = Map(
1515
"driverName" -> "GPKG"
1616
)
1717

src/main/scala/com/databricks/labs/gbx/vectorx/ds/shp/ShapeFile_DataSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
1111
//noinspection ScalaUnusedSymbol
1212
class ShapeFile_DataSource extends OGR_DataSource with DataSourceExtras{
1313

14-
override def dsExtraMap(): Map[String, String] = Map(
14+
override def dsExtraMap(checkMap: Map[String, String] = Map.empty): Map[String, String] = Map(
1515
"driverName" -> "ESRI Shapefile"
1616
)
1717

src/test/scala/com/databricks/labs/gbx/vectorx/ds/OGR_DataSourceTest.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,16 @@ class OGR_DataSourceTest extends PlanTest with SilentSparkSession {
6363

6464
val res_gj = spark.read
6565
.format("geojson")
66+
.option("multi", "false")
6667
.load(gjPath)
6768

6869
res_gj.count() shouldEqual 1L // single geom
6970

7071
val res_gj1 = spark.read
71-
.format("geojson_multi")
72+
.format("geojson")
7273
.load(gjPath)
7374

74-
res_gj1.count() should be > 1L // individual geoms
75+
res_gj1.count() should be > 1L // newline geoms
7576

7677
// gpkg (zip fails)
7778

0 commit comments

Comments
 (0)