Skip to content
This repository was archived by the owner on Oct 23, 2024. It is now read-only.

Commit 6982478

Browse files
External Volumes Integration Tests for Hello-World and Cassandra. (#3322)
SDK: * Add improved logging to indicate Pod-Replacement policy has been used. Hello-World: * Add scenario with external-volumes and basic integration test. Cassandra: * Add flag to enable/disable Pod-Replacement failure policy. * Remove mentions of Portworx from service spec, switch NetApp to Generic Driver. * Add basic integration test.
1 parent c53d4fe commit 6982478

File tree

18 files changed

+476
-87
lines changed

18 files changed

+476
-87
lines changed

conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def configure_universe(tmpdir_factory):
7676

7777
@pytest.fixture(scope="session", autouse=True)
7878
def configure_external_volumes():
79-
if is_env_var_set("ENABLE_EXTERNAL_VOLUMES", default=""):
79+
if is_env_var_set("ENABLE_EXTERNAL_VOLUMES", default=str(False)):
8080
yield from sdk_external_volumes.external_volumes_session()
8181
else:
8282
yield

frameworks/cassandra/src/main/dist/svc.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ pods:
6363
type: DOCKER
6464
container-path: container-path
6565
driver-name: {{CASSANDRA_EXTERNAL_VOLUME_DRIVER_NAME}}
66-
driver-options: '{{{CASSANDRA_EXTERNAL_VOLUME_PORTWORX_OPTIONS}}}'
66+
driver-options: '{{{CASSANDRA_EXTERNAL_VOLUME_DRIVER_OPTIONS}}}'
6767
{{#CASSANDRA_EXTERNAL_VOLUME_NAME}}
6868
volume-name: {{CASSANDRA_EXTERNAL_VOLUME_NAME}}
6969
{{/CASSANDRA_EXTERNAL_VOLUME_NAME}}

frameworks/cassandra/src/main/java/com/mesosphere/sdk/cassandra/scheduler/Main.java

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import com.mesosphere.sdk.cassandra.api.SeedsResource;
44
import com.mesosphere.sdk.config.validate.TaskEnvCannotChange;
5+
import com.mesosphere.sdk.framework.EnvStore;
56
import com.mesosphere.sdk.scheduler.DefaultScheduler;
67
import com.mesosphere.sdk.scheduler.SchedulerBuilder;
78
import com.mesosphere.sdk.scheduler.SchedulerConfig;
@@ -32,18 +33,19 @@ public final class Main {
3233
private Main() {}
3334

3435
public static void main(String[] args) throws Exception {
36+
final EnvStore envStore = EnvStore.fromEnv();
3537
if (args.length != 1) {
3638
throw new IllegalArgumentException(
3739
"Expected one file argument, got: " + Arrays.toString(args)
3840
);
3941
}
4042
SchedulerRunner
41-
.fromSchedulerBuilder(createSchedulerBuilder(new File(args[0])))
43+
.fromSchedulerBuilder(createSchedulerBuilder(new File(args[0]), envStore))
4244
.run();
4345
}
4446

4547
@SuppressWarnings("checkstyle:MultipleStringLiterals")
46-
private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws Exception {
48+
private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile, EnvStore envStore) throws Exception {
4749
SchedulerConfig schedulerConfig = SchedulerConfig.fromEnv();
4850
RawServiceSpec rawServiceSpec = RawServiceSpec.newBuilder(yamlSpecFile).build();
4951
List<String> localSeeds = CassandraSeedUtils
@@ -64,7 +66,7 @@ private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws
6466
}
6567

6668
DefaultServiceSpec serviceSpec = DefaultServiceSpec.newBuilder(serviceSpecGenerator.build())
67-
.replacementFailurePolicy(getReplacementFailurePolicy())
69+
.replacementFailurePolicy(getReplacementFailurePolicy(envStore))
6870
.build();
6971

7072
return DefaultScheduler.newBuilder(serviceSpec, schedulerConfig)
@@ -82,13 +84,15 @@ private static SchedulerBuilder createSchedulerBuilder(File yamlSpecFile) throws
8284
.withSingleRegionConstraint();
8385
}
8486

85-
private static ReplacementFailurePolicy getReplacementFailurePolicy() throws Exception {
86-
return ReplacementFailurePolicy.newBuilder()
87-
.permanentFailureTimoutSecs(
88-
Integer.valueOf(System.getenv("PERMANENT_FAILURE_TIMEOUT_SECS")))
89-
.minReplaceDelaySecs(
90-
Integer.valueOf(System.getenv("MIN_REPLACE_DELAY_SECS")))
91-
.build();
87+
private static ReplacementFailurePolicy getReplacementFailurePolicy(EnvStore envStore) throws Exception {
88+
if (envStore.getOptionalBoolean("ENABLE_AUTOMATIC_POD_REPLACEMENT", false)) {
89+
return ReplacementFailurePolicy.newBuilder()
90+
.permanentFailureTimoutSecs(Integer.valueOf(System.getenv("PERMANENT_FAILURE_TIMEOUT_SECS")))
91+
.minReplaceDelaySecs(Integer.valueOf(System.getenv("MIN_REPLACE_DELAY_SECS")))
92+
.build();
93+
} else {
94+
return null;
95+
}
9296
}
9397

9498
private static Collection<Object> getResources(List<String> localSeeds) {
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
from typing import Iterator
22

33
import pytest
4+
import sdk_external_volumes
45
import sdk_security
56
from tests import config
67

78

89
@pytest.fixture(scope="session")
910
def configure_security(configure_universe: None) -> Iterator[None]:
1011
yield from sdk_security.security_session(config.SERVICE_NAME)
12+
13+
14+
@pytest.fixture(scope="session")
15+
def configure_external_volumes():
16+
# Handle creation of external volumes.
17+
yield from sdk_external_volumes.external_volumes_session()
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import logging
2+
import pytest
3+
import re
4+
5+
import sdk_agents
6+
import sdk_install
7+
import sdk_plan
8+
import sdk_tasks
9+
from tests import config
10+
11+
log = logging.getLogger(__name__)
12+
13+
14+
@pytest.fixture(scope="module", autouse=True)
15+
def configure_package(configure_security):
16+
try:
17+
sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
18+
yield # let the test session execute
19+
finally:
20+
sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
21+
22+
23+
@pytest.mark.external_volumes
24+
@pytest.mark.sanity
25+
@pytest.mark.dcos_min_version("2.1")
26+
def test_default_deployment():
27+
# Test default installation with external volumes.
28+
# Ensure service comes up successfully.
29+
options = {
30+
"nodes": {"external_volume": {"enabled": True}},
31+
}
32+
sdk_install.install(
33+
config.PACKAGE_NAME,
34+
config.SERVICE_NAME,
35+
3,
36+
additional_options=options,
37+
wait_for_deployment=True,
38+
)
39+
# Wait for scheduler to restart.
40+
sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
41+
42+
43+
@pytest.mark.skip(reason="Conflicts with Cassandra Custom Recovery Manager")
44+
@pytest.mark.sanity
45+
def test_auto_replace_on_drain():
46+
candidate_tasks = sdk_tasks.get_tasks_avoiding_scheduler(
47+
config.SERVICE_NAME, re.compile("^node-[0-9]+-server$")
48+
)
49+
50+
assert len(candidate_tasks) != 0, "Could not find a node to drain"
51+
52+
# Pick the host of the first task from the above list
53+
replace_agent_id = candidate_tasks[0].agent_id
54+
replace_tasks = [task for task in candidate_tasks if task.agent_id == replace_agent_id]
55+
log.info(
56+
"Tasks on agent {} to be replaced after drain: {}".format(replace_agent_id, replace_tasks)
57+
)
58+
sdk_agents.drain_agent(replace_agent_id)
59+
60+
sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME)
61+
sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
62+
63+
new_tasks = sdk_tasks.get_summary()
64+
65+
for replaced_task in replace_tasks:
66+
new_task = [
67+
task
68+
for task in new_tasks
69+
if task.name == replaced_task.name and task.id != replaced_task.id
70+
][0]
71+
log.info(
72+
"Checking affected task has moved to a new agent:\n"
73+
"old={}\nnew={}".format(replaced_task, new_task)
74+
)
75+
assert replaced_task.agent_id != new_task.agent_id
76+
77+
# Reactivate the drained agent, otherwise uninstall plans will be halted for portworx
78+
sdk_agents.reactivate_agent(replace_agent_id)

frameworks/cassandra/universe/config.json

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,6 @@
4747
"type": "string",
4848
"default": ""
4949
},
50-
"permanent-failure-timeout-secs": {
51-
"type": "integer",
52-
"description": "Time in seconds to wait before declaring a task as permanently failed.",
53-
"default": 120
54-
},
55-
"min-replace-delay-secs": {
56-
"type": "integer",
57-
"description": "Time to wait between destructive task recoveries.",
58-
"default": 240
59-
},
6050
"log_level": {
6151
"description": "The log level for the DC/OS service.",
6252
"type": "string",
@@ -441,6 +431,27 @@
441431
"minimum": 15
442432
}
443433
}
434+
},
435+
"pod-replacement-failure-policy": {
436+
"description": "Options relating to automatic pod-replacement failure policies.",
437+
"type": "object",
438+
"properties": {
439+
"enable-automatic-pod-replacement": {
440+
"description": "Determines whether pods should be replaced automatically on failure.",
441+
"type": "boolean",
442+
"default": false
443+
},
444+
"permanent-failure-timeout-secs": {
445+
"description": "Default time to wait before declaring a pod as permanently failed in seconds.",
446+
"type": "integer",
447+
"default": 120
448+
},
449+
"min-replace-delay-secs": {
450+
"description": "Default time to wait between successive pod-replace operations in seconds.",
451+
"type": "integer",
452+
"default": 240
453+
}
454+
}
444455
}
445456
},
446457
"required": [
@@ -495,18 +506,18 @@
495506
}
496507
},
497508
"external_volume": {
509+
"description": "Cassandra external volume configuration.",
498510
"type": "object",
499-
"description": "The Cassandra external volume configuration.\nOnly Portworx external volumes are supported.",
500511
"properties": {
501512
"enabled": {
502513
"type": "boolean",
503-
"description": "If true, external profile will be used.",
514+
"description": "If true, external volumes will be used.",
504515
"default": false
505516
},
506-
"portworx_volume_options": {
517+
"driver_options": {
507518
"type": "string",
508519
"default": "size=10",
509-
"description": "Volume options."
520+
"description": "External Volume storage provider options."
510521
},
511522
"volume_name": {
512523
"type": "string",
@@ -515,7 +526,7 @@
515526
},
516527
"driver_name": {
517528
"type": "string",
518-
"description": "Docker volume driver name.",
529+
"description": "External Volume storage provider to use.",
519530
"default": "pxd"
520531
}
521532
}

frameworks/cassandra/universe/marathon.json.mustache

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@
4141
"FRAMEWORK_LOG_LEVEL": "{{service.log_level}}",
4242
"CASSANDRA_VERSION": "3.11.6",
4343
"S3CLI_VERSION": "s3cli-0.0.55-linux-amd64",
44-
"PERMANENT_FAILURE_TIMEOUT_SECS": "{{service.permanent-failure-timeout-secs}}",
45-
"MIN_REPLACE_DELAY_SECS": "{{service.min-replace-delay-secs}}",
4644
4745
{{#service.service_account_secret}}
4846
"DCOS_SERVICE_ACCOUNT_CREDENTIAL": "secrets/service-account.json",
@@ -112,10 +110,16 @@
112110
{{#nodes.volume_profile}}
113111
"CASSANDRA_VOLUME_PROFILE": "{{nodes.volume_profile}}",
114112
{{/nodes.volume_profile}}
113+
115114
"CASSANDRA_EXTERNAL_VOLUME_ENABLED" : "{{nodes.external_volume.enabled}}",
116-
"CASSANDRA_EXTERNAL_VOLUME_PORTWORX_OPTIONS" : "{{nodes.external_volume.portworx_volume_options}}",
115+
"CASSANDRA_EXTERNAL_VOLUME_DRIVER_OPTIONS" : "{{nodes.external_volume.driver_options}}",
117116
"CASSANDRA_EXTERNAL_VOLUME_NAME" : "{{nodes.external_volume.volume_name}}",
118117
"CASSANDRA_EXTERNAL_VOLUME_DRIVER_NAME" : "{{nodes.external_volume.driver_name}}",
118+
119+
"ENABLE_AUTOMATIC_POD_REPLACEMENT": "{{service.pod-replacement-failure-policy.enable-automatic-pod-replacement}}",
120+
"PERMANENT_FAILURE_TIMEOUT_SECS": "{{service.pod-replacement-failure-policy.permanent-failure-timeout-secs}}",
121+
"MIN_REPLACE_DELAY_SECS": "{{service.pod-replacement-failure-policy.min-replace-delay-secs}}",
122+
119123
"TASKCFG_ALL_CASSANDRA_HEAP_SIZE_MB": "{{nodes.heap.size}}",
120124
"TASKCFG_ALL_CASSANDRA_HEAP_NEW_MB": "{{nodes.heap.new}}",
121125
"CASSANDRA_JAVA_URI": "{{resource.assets.uris.cassandra-jre-tar-gz}}",
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
name: {{FRAMEWORK_NAME}}
2+
scheduler:
3+
principal: {{FRAMEWORK_PRINCIPAL}}
4+
user: {{FRAMEWORK_USER}}
5+
pods:
6+
hello:
7+
count: {{HELLO_COUNT}}
8+
placement: '{{{HELLO_PLACEMENT}}}'
9+
external-volumes:
10+
hello-volume:
11+
type: DOCKER
12+
volume-mode: RW
13+
container-path: hello-container-path
14+
driver-name: {{EXTERNAL_VOLUME_DRIVER_NAME}}
15+
driver-options: {{EXTERNAL_VOLUME_DRIVER_OPTIONS}}
16+
{{#HELLO_EXTERNAL_VOLUME_NAME}}
17+
volume-name: {{HELLO_EXTERNAL_VOLUME_NAME}}
18+
{{/HELLO_EXTERNAL_VOLUME_NAME}}
19+
tasks:
20+
server:
21+
goal: RUNNING
22+
cmd: env && echo hello >> hello-container-path/output && sleep $SLEEP_DURATION
23+
cpus: {{HELLO_CPUS}}
24+
memory: {{HELLO_MEM}}
25+
env:
26+
SLEEP_DURATION: {{SLEEP_DURATION}}
27+
health-check:
28+
cmd: stat hello-container-path/output
29+
interval: 5
30+
grace-period: 30
31+
delay: 0
32+
timeout: 10
33+
max-consecutive-failures: 3
34+
labels: {{HELLO_LABELS}}
35+
world:
36+
count: {{WORLD_COUNT}}
37+
allow-decommission: true
38+
placement: '{{{WORLD_PLACEMENT}}}'
39+
external-volumes:
40+
world-volume:
41+
type: DOCKER
42+
volume-mode: RW
43+
container-path: world-container-path
44+
driver-name: {{EXTERNAL_VOLUME_DRIVER_NAME}}
45+
driver-options: {{EXTERNAL_VOLUME_DRIVER_OPTIONS}}
46+
{{#WORLD_EXTERNAL_VOLUME_NAME}}
47+
volume-name: {{WORLD_EXTERNAL_VOLUME_NAME}}
48+
{{/WORLD_EXTERNAL_VOLUME_NAME}}
49+
tasks:
50+
server:
51+
goal: RUNNING
52+
cmd: |
53+
# for graceful shutdown
54+
# trap SIGTERM and mock a cleanup timeframe
55+
terminated () {
56+
echo "$(date) received SIGTERM, zzz for 3 ..."
57+
sleep 3
58+
echo "$(date) ... all clean, peace out"
59+
exit 0
60+
}
61+
trap terminated SIGTERM
62+
echo "$(date) trapping SIGTERM, watch here for the signal..."
63+
64+
echo "${TASK_NAME}" >>world-container-path/output &&
65+
# instead of running for a short duration (equal to SLEEP_DURATION), run infinitely
66+
# to allow for testing of SIGTERM..grace..SIGKILL
67+
while true; do
68+
sleep 0.1
69+
done
70+
cpus: {{WORLD_CPUS}}
71+
memory: {{WORLD_MEM}}
72+
env:
73+
SLEEP_DURATION: {{SLEEP_DURATION}}
74+
readiness-check:
75+
# wordcount (wc) will report an error if the file does not exist, which effectively is zero (0) bytes
76+
# so send the error to /dev/null, BUT also zero-left-pad the variable BYTES to ensure that it is zero
77+
# on empty for comparison sake.
78+
cmd: BYTES="$(wc -c world-container-path/output 2>/dev/null| awk '{print $1;}')" && [ 0$BYTES -gt 0 ]
79+
interval: {{WORLD_READINESS_CHECK_INTERVAL}}
80+
delay: {{WORLD_READINESS_CHECK_DELAY}}
81+
timeout: {{WORLD_READINESS_CHECK_TIMEOUT}}
82+
kill-grace-period: {{WORLD_KILL_GRACE_PERIOD}}

frameworks/helloworld/src/main/dist/multiport.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ pods:
99
- {{BOOTSTRAP_URI}}
1010
resource-sets:
1111
multi-port-resources:
12+
cpus: {{HELLO_CPUS}}
13+
memory: {{HELLO_MEM}}
1214
ports:
1315
port_one:
1416
port: {{HELLO_PORT_ONE}}
@@ -51,7 +53,5 @@ pods:
5153
sum=$(($exit_1+$exit_2))
5254
echo "exit codes : ${exit_1} ${exit_2} and sum : ${sum}"
5355
exit $sum
54-
cpus: {{HELLO_CPUS}}
55-
memory: {{HELLO_MEM}}
5656
env:
5757
HELLO_PORT_ONE: {{HELLO_PORT_ONE}}

0 commit comments

Comments
 (0)