Skip to content

Commit ad8ff3a

Browse files
author
rmishra
committed
Merge branch 'master' into SNAP-2158
2 parents 8310890 + 28b43fe commit ad8ff3a

File tree

157 files changed

+4090
-646
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

157 files changed

+4090
-646
lines changed

cluster/sbin/snappy-nodes.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -223,14 +223,6 @@ function execute() {
223223
2>&1 | sed "s/^/$host: /") &
224224
LAST_PID="$!"
225225
fi
226-
if [ -z "$RUN_IN_BACKGROUND" ]; then
227-
wait $LAST_PID
228-
else
229-
sleep 1
230-
if [ -e "/proc/$LAST_PID/status" ]; then
231-
sleep 1
232-
fi
233-
fi
234226
else
235227
if [ "$dirfolder" != "" ]; then
236228
# Create the directory for the snappy component if the folder is a default folder
@@ -239,7 +231,16 @@ function execute() {
239231
fi
240232
fi
241233
launchcommand="${@// /\\ } ${args} < /dev/null 2>&1"
242-
eval $launchcommand
234+
eval $launchcommand &
235+
LAST_PID="$!"
236+
fi
237+
if [ -z "$RUN_IN_BACKGROUND" ]; then
238+
wait $LAST_PID
239+
else
240+
sleep 1
241+
if [ -e "/proc/$LAST_PID/status" ]; then
242+
sleep 1
243+
fi
243244
fi
244245
245246
df=${dirfolder}

cluster/src/dunit/scala/io/snappydata/cluster/ClusterManagerTestBase.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ abstract class ClusterManagerTestBase(s: String)
7575
sysProps.setProperty("p2p.minJoinTries", "1")
7676

7777
// spark memory fill to detect any uninitialized memory accesses
78-
// sysProps.setProperty("spark.memory.debugFill", "true")
78+
sysProps.setProperty("spark.memory.debugFill", "true")
7979

8080
var host: Host = _
8181
var vm0: VM = _

cluster/src/dunit/scala/io/snappydata/cluster/SnappySecureJob.scala

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ import java.io.{FileOutputStream, PrintWriter}
2121

2222
import com.pivotal.gemfirexd.Attribute
2323
import com.typesafe.config.{Config, ConfigException}
24-
import io.snappydata.Constant
24+
import io.snappydata.{Constant, ServiceManager}
25+
import io.snappydata.impl.LeadImpl
26+
import org.apache.spark.SparkCallbacks
2527
import org.apache.spark.sql.types.{DecimalType, IntegerType, StructField, StructType}
2628
import org.apache.spark.sql._
2729
import org.apache.spark.sql.streaming.SnappyStreamingJob
2830
import org.apache.spark.streaming.SnappyStreamingContext
31+
import org.apache.spark.ui.SnappyBasicAuthenticator
2932

3033
// scalastyle:off println
3134
class SnappySecureJob extends SnappySQLJob {
@@ -56,6 +59,12 @@ class SnappySecureJob extends SnappySQLJob {
5659
} else {
5760
accessAndModifyTablesOwnedByOthers(snSession, jobConfig)
5861
}
62+
// Confirm that our zeppelin interpreter is not initialized.
63+
assert(ServiceManager.getLeadInstance.asInstanceOf[LeadImpl].getInterpreterServerClass() ==
64+
null, "Zeppelin interpreter must not be initialized in secure cluster")
65+
// Check SnappyData Pulse UI is secured by our custom authenticator.
66+
assert(SparkCallbacks.getAuthenticatorForJettyServer().get
67+
.isInstanceOf[SnappyBasicAuthenticator], "SnappyData Pulse UI not secured")
5968
pw.println(msg)
6069
} finally {
6170
pw.close()

cluster/src/dunit/scala/io/snappydata/cluster/SplitSnappyClusterDUnitTest.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ class SplitSnappyClusterDUnitTest(s: String)
266266
ColumnUpdateDeleteTests.testSNAP1925(session)
267267
ColumnUpdateDeleteTests.testSNAP1926(session)
268268
ColumnUpdateDeleteTests.testConcurrentOps(session)
269+
ColumnUpdateDeleteTests.testSNAP2124(session, checkPruning = true)
269270
} finally {
270271
StoreUtils.TEST_RANDOM_BUCKETID_ASSIGNMENT = false
271272
}

cluster/src/main/scala/io/snappydata/impl/LeadImpl.scala

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class LeadImpl extends ServerImpl with Lead
8181

8282
isTestSetup = bootProperties.getProperty("isTest", "false").toBoolean
8383
bootProperties.remove("isTest")
84+
val authSpecified = Misc.checkAuthProvider(bootProperties)
8485

8586
// prefix all store properties with "snappydata.store" for SparkConf
8687

@@ -162,7 +163,7 @@ class LeadImpl extends ServerImpl with Lead
162163

163164
val zeppelinEnabled = bootProperties.getProperty(
164165
Constant.ENABLE_ZEPPELIN_INTERPRETER, "false").equalsIgnoreCase("true")
165-
if (zeppelinEnabled) {
166+
if (zeppelinEnabled && !authSpecified) {
166167
try {
167168

168169
val zeppelinIntpUtilClass = Utils.classForName(
@@ -190,7 +191,7 @@ class LeadImpl extends ServerImpl with Lead
190191

191192
// The auth service is not yet initialized at this point.
192193
// So simply check the auth-provider property value.
193-
if (Misc.checkAuthProvider(bootProperties)) {
194+
if (authSpecified) {
194195
logInfo("Enabling user authentication for SnappyData Pulse")
195196
SparkCallbacks.setAuthenticatorForJettyServer()
196197
}
@@ -234,7 +235,7 @@ class LeadImpl extends ServerImpl with Lead
234235
}
235236

236237
// wait for a while until servers get registered
237-
val endWait = System.currentTimeMillis() + 10000
238+
val endWait = System.currentTimeMillis() + 120000
238239
while (!SnappyContext.hasServerBlockIds && System.currentTimeMillis() <= endWait) {
239240
Thread.sleep(100)
240241
}
@@ -258,8 +259,10 @@ class LeadImpl extends ServerImpl with Lead
258259
// start other add-on services (job server)
259260
startAddOnServices(conf, confFile, jobServerConfig)
260261

261-
// finally start embedded zeppelin interpreter if configured
262-
checkAndStartZeppelinInterpreter(zeppelinEnabled, bootProperties)
262+
// finally start embedded zeppelin interpreter if configured and security is not enabled.
263+
if (!authSpecified) {
264+
checkAndStartZeppelinInterpreter(zeppelinEnabled, bootProperties)
265+
}
263266

264267
if (jobServerWait) {
265268
// mark RUNNING after job server and zeppelin initialization if so configured
@@ -620,13 +623,13 @@ class LeadImpl extends ServerImpl with Lead
620623

621624
/**
622625
* This method is used to start the zeppelin interpreter thread.
623-
* As discussed by default zeppelin interpreter will be enabled.User can disable it by
624-
* setting "zeppelin.interpreter.enable" to false in leads conf file.User can also specify
625-
* the port on which intrepreter should listen using property zeppelin.interpreter.port
626+
* By default, zeppelin interpreter will be disabled. User can enable it by
627+
* setting "zeppelin.interpreter.enable" to true in leads conf file. User can also specify
628+
* the port on which interpreter should listen using property "zeppelin.interpreter.port"
626629
*/
627630
private def checkAndStartZeppelinInterpreter(enabled: Boolean,
628631
bootProperties: Properties): Unit = {
629-
// As discussed ZeppelinRemoteInterpreter Server will be enabled by default.
632+
// As discussed ZeppelinRemoteInterpreter Server will be disabled by default.
630633
// [sumedh] Our startup times are already very high and we are looking to
631634
// cut that down and not increase further with these external utilities.
632635
if (enabled) {
@@ -654,6 +657,10 @@ class LeadImpl extends ServerImpl with Lead
654657

655658
}
656659
}
660+
661+
def getInterpreterServerClass(): Class[_] = {
662+
remoteInterpreterServerClass
663+
}
657664
}
658665

659666
object LeadImpl {

cluster/src/main/scala/org/apache/spark/SparkCallbacks.scala

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
package org.apache.spark
1818

1919
import org.apache.spark
20-
2120
import org.apache.spark.deploy.SparkHadoopUtil
2221
import org.apache.spark.memory.StoreUnifiedManager
2322
import org.apache.spark.rpc.RpcEnv
2423
import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig}
2524
import org.apache.spark.ui.{JettyUtils, SnappyBasicAuthenticator}
25+
import org.eclipse.jetty.security.authentication.BasicAuthenticator
2626

2727
/**
2828
* Calls that are needed to be sent to snappy-cluster classes because
@@ -39,8 +39,10 @@ object SparkCallbacks {
3939
ioEncryptionKey: Option[Array[Byte]],
4040
isLocal: Boolean): SparkEnv = {
4141

42-
SparkEnv.createExecutorEnv(driverConf, executorId, hostname,
42+
val env = SparkEnv.createExecutorEnv(driverConf, executorId, hostname,
4343
port, numCores, ioEncryptionKey, isLocal)
44+
env.memoryManager.asInstanceOf[StoreUnifiedManager].init()
45+
env
4446
}
4547

4648
def getRpcEnv(sparkEnv: SparkEnv): RpcEnv = {
@@ -92,6 +94,10 @@ object SparkCallbacks {
9294
}
9395
}
9496

97+
def getAuthenticatorForJettyServer(): Option[BasicAuthenticator] = {
98+
JettyUtils.customAuthenticator
99+
}
100+
95101
def setSparkConf(sc: SparkContext, key: String, value: String): Unit = {
96102
if (value ne null) sc.conf.set(key, value) else sc.conf.remove(key)
97103
}

cluster/src/main/scala/org/apache/spark/memory/SnappyUnifiedMemoryManager.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,10 @@ class SnappyUnifiedMemoryManager private[memory](
785785
wrapperStats.setMemoryManagerStats(stats)
786786
}
787787

788+
/**
789+
* Initializes the memoryManager
790+
*/
791+
override def init(): Unit = memoryForObject
788792
}
789793

790794
object SnappyUnifiedMemoryManager extends Logging {

cluster/src/main/scala/org/apache/spark/scheduler/cluster/SnappyEmbeddedModeClusterManager.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ class SnappyEmbeddedModeClusterManager extends ExternalClusterManager {
5050
(split(0).trim, split(1).trim)
5151
}
5252
else if (locator.isEmpty ||
53-
locator == "" ||
5453
locator == "null" ||
5554
!ServiceUtils.LOCATOR_URL_PATTERN.matcher(locator).matches()
5655
) {

cluster/src/test/scala/io/snappydata/QueryTest.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package io.snappydata
1919

2020
import scala.collection.JavaConverters._
2121

22-
import org.apache.spark.sql.QueryTest.checkAnswer
2322
import org.apache.spark.sql.execution.benchmark.ColumnCacheBenchmark
2423
import org.apache.spark.sql.internal.SQLConf
2524
import org.apache.spark.sql.{AnalysisException, Row, SnappyContext, SnappySession, SparkSession}
@@ -271,10 +270,11 @@ class QueryTest extends SnappyFunSuite {
271270
"select count(*), city from $t group by city",
272271
"select count(*), city from $t where country like 'country_1%' group by city",
273272
"select count(*), city, collect_list(airport_id), collect_list(name), " +
274-
"collect_list(country) from (select * from $t order by airport_id) as t group by city",
273+
"collect_list(country) from (select * from $t order by airport_id, name, country) " +
274+
"as t group by city order by city",
275275
"select count(*), city, collect_list(airport_id), collect_list(name), " +
276276
"collect_list(country) from (select * from $t where country like 'country_1%' " +
277-
" order by airport_id) as t group by city"
277+
" order by airport_id, name, country) as t group by city order by city"
278278
)
279279

280280
// To validate the results against queries directly on data disabling snappy aggregation.
@@ -287,8 +287,8 @@ class QueryTest extends SnappyFunSuite {
287287
}
288288

289289
for (((r1, r2), e) <- results.zip(expectedResults)) {
290-
org.apache.spark.sql.QueryTest.checkAnswer(r1, e)
291-
org.apache.spark.sql.QueryTest.checkAnswer(r2, e)
290+
checkAnswer(r1, e)
291+
checkAnswer(r2, e)
292292
}
293293

294294
// fire updates and check again
@@ -302,8 +302,8 @@ class QueryTest extends SnappyFunSuite {
302302
}
303303

304304
for (((r1, r2), e) <- results.zip(expectedResults)) {
305-
org.apache.spark.sql.QueryTest.checkAnswer(r1, e)
306-
org.apache.spark.sql.QueryTest.checkAnswer(r2, e)
305+
checkAnswer(r1, e)
306+
checkAnswer(r2, e)
307307
}
308308
}
309309
}
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
* Copyright (c) 2017 SnappyData, Inc. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you
5+
* may not use this file except in compliance with the License. You
6+
* may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13+
* implied. See the License for the specific language governing
14+
* permissions and limitations under the License. See accompanying
15+
* LICENSE file.
16+
*/
17+
18+
package io.snappydata.benchmark.snappy
19+
20+
import java.io.{File, FileOutputStream, PrintStream}
21+
22+
import io.snappydata.SnappyFunSuite
23+
import org.apache.spark.sql.execution.benchmark.TPCDSQuerySnappyBenchmark
24+
import org.apache.spark.sql.{SnappySession, SparkSession}
25+
import org.apache.spark.{SparkConf, SparkContext}
26+
import org.scalatest.BeforeAndAfterAll
27+
28+
29+
class TPCDSSuite extends SnappyFunSuite
30+
with BeforeAndAfterAll {
31+
32+
var tpcdsQueries = Seq[String]()
33+
34+
35+
val conf =
36+
new SparkConf()
37+
.setMaster("local[*]")
38+
.setAppName("test-sql-context")
39+
.set("spark.driver.allowMultipleContexts", "true")
40+
.set("spark.sql.shuffle.partitions", "4")
41+
.set("spark.driver.memory", "1g")
42+
.set("spark.executor.memory", "1g")
43+
.set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
44+
45+
override def beforeAll(): Unit = {
46+
super.beforeAll()
47+
tpcdsQueries = Seq(
48+
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11",
49+
"q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20",
50+
"q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30",
51+
"q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40",
52+
"q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50",
53+
"q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60",
54+
"q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70",
55+
"q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80",
56+
"q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
57+
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
58+
}
59+
60+
// Disabling the test run from precheckin as it takes around an hour.
61+
// TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated
62+
// machine.
63+
64+
ignore("Test with Snappy") {
65+
val sc = new SparkContext(conf)
66+
TPCDSQuerySnappyBenchmark.snappy = new SnappySession(sc)
67+
val dataLocation = "/export/shared/QA_DATA/TPCDS/data"
68+
val snappyHome = System.getenv("SNAPPY_HOME")
69+
val snappyRepo = s"$snappyHome/../../.."
70+
71+
TPCDSQuerySnappyBenchmark.execute(dataLocation,
72+
queries = tpcdsQueries, true, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds")
73+
}
74+
75+
// Disabling the test run from precheckin as it takes around an hour.
76+
// TODO : Add TPCDS tests to be run as a part of smokePerf bt which will run on a dedicated
77+
// machine.
78+
79+
ignore("Test with Spark") {
80+
TPCDSQuerySnappyBenchmark.spark = SparkSession.builder.config(conf).getOrCreate()
81+
val dataLocation = "/export/shared/QA_DATA/TPCDS/data"
82+
val snappyHome = System.getenv("SNAPPY_HOME")
83+
val snappyRepo = s"$snappyHome/../../..";
84+
85+
TPCDSQuerySnappyBenchmark.execute(dataLocation,
86+
queries = tpcdsQueries, false, s"$snappyRepo/spark/sql/core/src/test/resources/tpcds")
87+
88+
}
89+
90+
// Disabling the validation for now as this requires the expected result files to be created
91+
// using stock spark before hand.
92+
93+
ignore("Validate Results") {
94+
95+
for (query <- tpcdsQueries) {
96+
97+
val actualResultsAvailableAt = "path for actual result"
98+
val expectedResultsAvailableAt = "path for expected result"
99+
100+
val resultFileStream: FileOutputStream = new FileOutputStream(new File("Comparison.out"))
101+
val resultOutputStream: PrintStream = new PrintStream(resultFileStream)
102+
103+
val expectedFile = sc.textFile(s"file://$expectedResultsAvailableAt/Spark_$query.out")
104+
val actualFile = sc.textFile(s"file://$actualResultsAvailableAt/Snappy_$query.out")
105+
106+
val expectedLineSet = expectedFile.collect().toList.sorted
107+
val actualLineSet = actualFile.collect().toList.sorted
108+
109+
if (!actualLineSet.equals(expectedLineSet)) {
110+
if (!(expectedLineSet.size == actualLineSet.size)) {
111+
resultOutputStream.println(s"For $query " +
112+
s"result count mismatched observed with " +
113+
s"expected ${expectedLineSet.size} and actual ${actualLineSet.size}")
114+
} else {
115+
for ((expectedLine, actualLine) <- expectedLineSet zip actualLineSet) {
116+
if (!expectedLine.equals(actualLine)) {
117+
resultOutputStream.println(s"For $query result mismatched observed")
118+
resultOutputStream.println(s"Excpected : $expectedLine")
119+
resultOutputStream.println(s"Found : $actualLine")
120+
resultOutputStream.println(s"-------------------------------------")
121+
}
122+
}
123+
}
124+
}
125+
}
126+
}
127+
}

0 commit comments

Comments
 (0)