Skip to content

Commit 3b5b597

Browse files
author
Milos Colic
authored
Merge pull request #487 from databrickslabs/benchmarking
Fix the GDAL max cache value.
2 parents d97cd07 + 0fa9c49 commit 3b5b597

15 files changed

+57
-71
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
## v0.3.14
22
- Fixes for Warning and Error messages on mosaic_enable call.
3+
- Performance improvements for raster functions.
4+
- Fix support for GDAL configuration via spark config (use 'spark.databricks.labs.mosaic.gdal.' prefix).
35

46
## v0.3.13
57
- R bindings generation fixed and improved.

src/main/scala/com/databricks/labs/mosaic/core/raster/api/GDAL.scala

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,6 @@ import java.util.UUID
1919
*/
2020
object GDAL {
2121

22-
def dropDrivers(): Unit = {
23-
val n = gdal.GetDriverCount()
24-
for (i <- 0 until n) {
25-
val driver = gdal.GetDriver(i)
26-
driver.delete()
27-
}
28-
}
29-
3022
/**
3123
* Returns the no data value for the given GDAL data type. For non-numeric
3224
* data types, it returns 0.0. For numeric data types, it returns the

src/main/scala/com/databricks/labs/mosaic/core/raster/gdal/MosaicRasterGDAL.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,6 @@ object MosaicRasterGDAL extends RasterReader {
510510
case Some(driverShortName) =>
511511
val drivers = new JVector[String]()
512512
drivers.add(driverShortName)
513-
gdal.GetDriverByName(driverShortName).Register()
514513
gdal.OpenEx(path, GA_ReadOnly, drivers)
515514
case None => gdal.Open(path, GA_ReadOnly)
516515
}

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/Raster1ArgExpression.scala

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.databricks.labs.mosaic.expressions.raster.base
22

33
import com.databricks.labs.mosaic.core.raster.api.GDAL
4+
import com.databricks.labs.mosaic.core.raster.io.RasterCleaner
45
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile
56
import com.databricks.labs.mosaic.expressions.base.GenericExpressionFactory
67
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig
@@ -74,13 +75,13 @@ abstract class Raster1ArgExpression[T <: Expression: ClassTag](
7475
// noinspection DuplicatedCode
7576
override def nullSafeEval(input: Any, arg1: Any): Any = {
7677
GDAL.enable(expressionConfig)
77-
val row = input.asInstanceOf[InternalRow]
78-
serialize(
79-
rasterTransform(MosaicRasterTile.deserialize(row, expressionConfig.getCellIdType), arg1),
80-
returnsRaster,
81-
outputType,
82-
expressionConfig
83-
)
78+
val tile = MosaicRasterTile.deserialize(input.asInstanceOf[InternalRow], expressionConfig.getCellIdType)
79+
val raster = tile.getRaster
80+
val result = rasterTransform(tile, arg1)
81+
val serialized = serialize(result, returnsRaster, outputType, expressionConfig)
82+
RasterCleaner.dispose(raster)
83+
RasterCleaner.dispose(result)
84+
serialized
8485
}
8586

8687
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/Raster2ArgExpression.scala

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,13 +83,12 @@ abstract class Raster2ArgExpression[T <: Expression: ClassTag](
8383
// noinspection DuplicatedCode
8484
override def nullSafeEval(input: Any, arg1: Any, arg2: Any): Any = {
8585
GDAL.enable(expressionConfig)
86-
val row = input.asInstanceOf[InternalRow]
87-
serialize(
88-
rasterTransform(MosaicRasterTile.deserialize(row, expressionConfig.getCellIdType), arg1, arg2),
89-
returnsRaster,
90-
outputType,
91-
expressionConfig
92-
)
86+
val tile = MosaicRasterTile.deserialize(input.asInstanceOf[InternalRow], expressionConfig.getCellIdType)
87+
val result = rasterTransform(tile, arg1, arg2)
88+
val serialized = serialize(result, returnsRaster, outputType, expressionConfig)
89+
// passed by name makes things re-evaluated
90+
RasterCleaner.dispose(tile)
91+
serialized
9392
}
9493

9594
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/RasterArray1ArgExpression.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.databricks.labs.mosaic.expressions.raster.base
22

33
import com.databricks.labs.mosaic.core.raster.api.GDAL
4+
import com.databricks.labs.mosaic.core.raster.io.RasterCleaner
45
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile
56
import com.databricks.labs.mosaic.expressions.base.GenericExpressionFactory
67
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig
@@ -69,12 +70,11 @@ abstract class RasterArray1ArgExpression[T <: Expression: ClassTag](
6970
*/
7071
override def nullSafeEval(input: Any, arg1: Any): Any = {
7172
GDAL.enable(expressionConfig)
72-
serialize(
73-
rasterTransform(RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig), arg1),
74-
returnsRaster,
75-
dataType,
76-
expressionConfig
77-
)
73+
val tiles = RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig)
74+
val result = rasterTransform(tiles, arg1)
75+
val serialized = serialize(result, returnsRaster, dataType, expressionConfig)
76+
tiles.foreach(t => RasterCleaner.dispose(t))
77+
serialized
7878
}
7979

8080
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 2, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/RasterArray2ArgExpression.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package com.databricks.labs.mosaic.expressions.raster.base
22

33
import com.databricks.labs.mosaic.core.raster.api.GDAL
4+
import com.databricks.labs.mosaic.core.raster.io.RasterCleaner
45
import com.databricks.labs.mosaic.core.types.model.MosaicRasterTile
56
import com.databricks.labs.mosaic.expressions.base.GenericExpressionFactory
67
import com.databricks.labs.mosaic.functions.MosaicExpressionConfig
@@ -74,12 +75,11 @@ abstract class RasterArray2ArgExpression[T <: Expression: ClassTag](
7475
*/
7576
override def nullSafeEval(input: Any, arg1: Any, arg2: Any): Any = {
7677
GDAL.enable(expressionConfig)
77-
serialize(
78-
rasterTransform(RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig), arg1, arg2),
79-
returnsRaster,
80-
dataType,
81-
expressionConfig
82-
)
78+
val tiles = RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig)
79+
val result = rasterTransform(tiles, arg1, arg2)
80+
val serialized = serialize(result, returnsRaster, dataType, expressionConfig)
81+
tiles.foreach(t => RasterCleaner.dispose(t))
82+
serialized
8383
}
8484

8585
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 3, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/RasterArrayExpression.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,11 @@ abstract class RasterArrayExpression[T <: Expression: ClassTag](
6565
*/
6666
override def nullSafeEval(input: Any): Any = {
6767
GDAL.enable(expressionConfig)
68-
serialize(
69-
rasterTransform(RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig)),
70-
returnsRaster,
71-
dataType,
72-
expressionConfig
73-
)
68+
val tiles = RasterArrayUtils.getTiles(input, rastersExpr, expressionConfig)
69+
val result = rasterTransform(tiles)
70+
val serialized = serialize(result, returnsRaster, dataType, expressionConfig)
71+
tiles.foreach(t => RasterCleaner.dispose(t))
72+
serialized
7473
}
7574

7675
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/RasterExpression.scala

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,11 @@ abstract class RasterExpression[T <: Expression: ClassTag](
6969
*/
7070
override def nullSafeEval(input: Any): Any = {
7171
GDAL.enable(expressionConfig)
72-
serialize(
73-
rasterTransform(MosaicRasterTile.deserialize(input.asInstanceOf[InternalRow], cellIdDataType)),
74-
returnsRaster,
75-
dataType,
76-
expressionConfig
77-
)
72+
val tile = MosaicRasterTile.deserialize(input.asInstanceOf[InternalRow], cellIdDataType)
73+
val result = rasterTransform(tile)
74+
val serialized = serialize(result, returnsRaster, dataType, expressionConfig)
75+
RasterCleaner.dispose(tile)
76+
serialized
7877
}
7978

8079
override def makeCopy(newArgs: Array[AnyRef]): Expression = GenericExpressionFactory.makeCopyImpl[T](this, newArgs, 1, expressionConfig)

src/main/scala/com/databricks/labs/mosaic/expressions/raster/base/RasterExpressionSerialization.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,13 @@ trait RasterExpressionSerialization {
3434
expressionConfig: MosaicExpressionConfig
3535
): Any = {
3636
if (returnsRaster) {
37+
val tile = data.asInstanceOf[MosaicRasterTile]
3738
val checkpoint = expressionConfig.getRasterCheckpoint
3839
val rasterType = outputDataType.asInstanceOf[StructType].fields(1).dataType
39-
val result = data
40-
.asInstanceOf[MosaicRasterTile]
40+
val result = tile
4141
.formatCellId(IndexSystemFactory.getIndexSystem(expressionConfig.getIndexSystem))
4242
.serialize(rasterType, checkpoint)
43+
RasterCleaner.dispose(tile)
4344
result
4445
} else {
4546
data

0 commit comments

Comments
 (0)