Skip to content

Datapull null subnet #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html)


## [0.1.85] - 2025-04-10
Addressing NULL and invalid subnet issue
### Changed
api/src/main/java/com/homeaway/datapullclient/process/DataPullTask.java

## [0.1.84] - 2025-02-19
Taking hive input config from user to override default values
### Changed
-core/src/main/scala/core/Migration.scala
-core/src/main/scala/core/DataFrameFromTo.scala

## [0.1.83] - 2025-01-24
fix java.util.NoSuchElementException: key not found: url
### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,31 @@ private JobFlowInstancesConfig getJobFlowInstancesConfig(EMRProperties emrProper
subnets.add(0,clusterProperties.getSubnetId());
}

Set<String> subnets_deduped = new LinkedHashSet<>(subnets);
subnets.clear();
subnets.addAll(subnets_deduped);
// Introducing below logic to address null and invalid subnet issue
String getSubnetId = clusterProperties.getSubnetId();
String finalSubnetId;


if (StringUtils.isNotBlank(getSubnetId) && getSubnetId.startsWith("subnet-")) {
finalSubnetId = getSubnetId;
System.out.println("Subnet '" + finalSubnetId + "' provided by the user will be used for EMR cluster creation.");
} else {
if (StringUtils.isNotBlank(getSubnetId)) {
System.out.println("The user provided an invalid value '" + getSubnetId + "' for subnet. Hence, default subnet pool will be used for EMR creation.");
} else {
System.out.println("The user either provided a NULL value for the subnet or did not specify subnet in the payload. Hence, the default subnet pool will be used for EMR creation.");
}

Set<String> subnetsDeduped = new LinkedHashSet<>(subnets);
subnets.clear();
subnets.addAll(subnetsDeduped);

finalSubnetId = subnets.get(0);
System.out.println("EMR cluster will be created using a subnet from the default subnet pool: " + finalSubnetId);
}

final JobFlowInstancesConfig jobConfig = new JobFlowInstancesConfig()
.withEc2SubnetIds(subnets.get(0))
.withEc2SubnetIds(finalSubnetId)
.withInstanceFleets(masterInstanceFleetConfig)
.withKeepJobFlowAliveWhenNoSteps(!Boolean.valueOf(Objects.toString
(this.clusterProperties.getTerminateClusterAfterExecution(), "true")));
Expand Down Expand Up @@ -642,4 +661,5 @@ private ListClustersResult retryListClusters(final AmazonElasticMapReduce emr, f

return listClustersResult;
}
}

}
2 changes: 1 addition & 1 deletion core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -375,4 +375,4 @@
</dependency>

</dependencies>
</project>
</project>
33 changes: 32 additions & 1 deletion core/src/main/scala/core/DataFrameFromTo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,38 @@ class DataFrameFromTo(appConfig: AppConfig, pipeline: String) extends Serializab
df_temp.write.mode(savemode).options(jdbcOptions).jdbc(db_url, table, connectionProperties)
}

def hiveToDataFrame(sparkSession: org.apache.spark.sql.SparkSession, query: String): org.apache.spark.sql.DataFrame = {
def hiveToDataFrame(sparkSession: SparkSession, query: String, properties: Option[JSONObject] = None): DataFrame = {

println("Properties Passed:" + properties)

val defaultConfigs = Map(
"spark.sql.hive.caseSensitiveInferenceMode" -> "INFER_ONLY",
"spark.sql.hive.metastore.version" -> "1.2.1",
"spark.sql.hive.metastore.jars" -> "builtin"
)
val parsedProperties = properties.map { jsonObj =>
import scala.collection.JavaConverters._
val propertyMap = scala.collection.mutable.Map[String, String]()
val iter = jsonObj.keys()

while (iter.hasNext) {
val key = iter.next().toString
propertyMap(key) = jsonObj.getString(key)
}
propertyMap.toMap
}.getOrElse(Map.empty[String, String])

val finalConfigs = defaultConfigs ++ parsedProperties

finalConfigs.foreach { case (key, value) =>
sparkSession.sqlContext.setConf(key, value)
}

println("Configurations applied:")
finalConfigs.foreach { case (key, value) =>
println(s"$key -> ${sparkSession.sqlContext.getConf(key)}")
}

sparkSession.sql(query)
}

Expand Down
8 changes: 7 additions & 1 deletion core/src/main/scala/core/Migration.scala
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,13 @@ class Migration extends SparkListener {
)
}
else if (platform == "hive") {
dataframeFromTo.hiveToDataFrame(sparkSession, propertiesMap("query"))
val properties = if (platformObject.has("properties")) {
Option(platformObject.getJSONObject("properties"))
} else {
None
}

dataframeFromTo.hiveToDataFrame(sparkSession = sparkSession, query = propertiesMap("query"), properties = properties)
} else if (platform == "mongodb") {
dataframeFromTo.mongodbToDataFrame(propertiesMap("awsenv"), propertiesMap("cluster"), propertiesMap.getOrElse("overrideconnector", "false"), propertiesMap("database"), propertiesMap("authenticationdatabase"), propertiesMap("collection"), propertiesMap("login"), propertiesMap("password"), sparkSession, propertiesMap("vaultenv"), platformObject.optJSONObject("sparkoptions"), propertiesMap.getOrElse("secretstore", secretStoreDefaultValue), propertiesMap.getOrElse("authenticationenabled", "true"), propertiesMap.getOrElse("tmpfilelocation", null), propertiesMap.getOrElse("samplesize", null), propertiesMap.getOrElse("sslenabled", "false"))
}
Expand Down