Skip to content

Accepting user input to override default hive configs #214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html)

## [0.1.84] - 2025-02-19
Taking hive input config from user to override default values
### Changed
-core/src/main/scala/core/Migration.scala
-core/src/main/scala/core/DataFrameFromTo.scala

## [0.1.83] - 2025-01-24
fix java.util.NoSuchElementException: key not found: url
### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,4 +642,5 @@ private ListClustersResult retryListClusters(final AmazonElasticMapReduce emr, f

return listClustersResult;
}
}

}
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -375,4 +375,4 @@
</dependency>

</dependencies>
</project>
</project>
33 changes: 32 additions & 1 deletion core/src/main/scala/core/DataFrameFromTo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,38 @@ class DataFrameFromTo(appConfig: AppConfig, pipeline: String) extends Serializab
df_temp.write.mode(savemode).options(jdbcOptions).jdbc(db_url, table, connectionProperties)
}

def hiveToDataFrame(sparkSession: org.apache.spark.sql.SparkSession, query: String): org.apache.spark.sql.DataFrame = {
def hiveToDataFrame(sparkSession: SparkSession, query: String, properties: Option[JSONObject] = None): DataFrame = {

println("Properties Passed:" + properties)

val defaultConfigs = Map(
"spark.sql.hive.caseSensitiveInferenceMode" -> "INFER_ONLY",
"spark.sql.hive.metastore.version" -> "1.2.1",
"spark.sql.hive.metastore.jars" -> "builtin"
)
val parsedProperties = properties.map { jsonObj =>
import scala.collection.JavaConverters._
val propertyMap = scala.collection.mutable.Map[String, String]()
val iter = jsonObj.keys()

while (iter.hasNext) {
val key = iter.next().toString
propertyMap(key) = jsonObj.getString(key)
}
propertyMap.toMap
}.getOrElse(Map.empty[String, String])

val finalConfigs = defaultConfigs ++ parsedProperties

finalConfigs.foreach { case (key, value) =>
sparkSession.sqlContext.setConf(key, value)
}

println("Configurations applied:")
finalConfigs.foreach { case (key, value) =>
println(s"$key -> ${sparkSession.sqlContext.getConf(key)}")
}

sparkSession.sql(query)
}

Expand Down
8 changes: 7 additions & 1 deletion core/src/main/scala/core/Migration.scala
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,13 @@ class Migration extends SparkListener {
)
}
else if (platform == "hive") {
dataframeFromTo.hiveToDataFrame(sparkSession, propertiesMap("query"))
val properties = if (platformObject.has("properties")) {
Option(platformObject.getJSONObject("properties"))
} else {
None
}

dataframeFromTo.hiveToDataFrame(sparkSession = sparkSession, query = propertiesMap("query"), properties = properties)
} else if (platform == "mongodb") {
dataframeFromTo.mongodbToDataFrame(propertiesMap("awsenv"), propertiesMap("cluster"), propertiesMap.getOrElse("overrideconnector", "false"), propertiesMap("database"), propertiesMap("authenticationdatabase"), propertiesMap("collection"), propertiesMap("login"), propertiesMap("password"), sparkSession, propertiesMap("vaultenv"), platformObject.optJSONObject("sparkoptions"), propertiesMap.getOrElse("secretstore", secretStoreDefaultValue), propertiesMap.getOrElse("authenticationenabled", "true"), propertiesMap.getOrElse("tmpfilelocation", null), propertiesMap.getOrElse("samplesize", null), propertiesMap.getOrElse("sslenabled", "false"))
}
Expand Down