Skip to content

Commit 9546666

Browse files
authored
Merge pull request #115 from marklogic/release/2.1.0
Merging 2.1.0 into master
2 parents 428fbf9 + c86ac17 commit 9546666

File tree

117 files changed

+2714
-721
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+2714
-721
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,5 @@ gradle-local.properties
1515
logs
1616
.ipynb_checkpoints
1717
venv
18+
.venv
19+
docker

CONTRIBUTING.md

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,22 @@ have cloned this repository to your local workstation.
55

66
In order to develop and/or test the connector, or to try out the PySpark instructions below, you first
77
need to deploy the test application in this project to MarkLogic. You can do so either on your own installation of
8-
MarkLogic, or you can use `docker-compose` to install a 3-node MarkLogic cluster with a load balancer in front of it.
8+
MarkLogic, or you can use `docker-compose` to install MarkLogic, optionally as a 3-node cluster with a load balancer
9+
in front of it.
910

10-
## Installing a 3-node cluster with docker-compose
11+
## Installing MarkLogic with docker-compose
1112

1213
If you wish to use `docker-compose`, perform the following steps before deploying the test application.
1314

1415
1. [Install Docker](https://docs.docker.com/get-docker/).
1516
2. Ensure that you don't have a MarkLogic instance running locally (if you do, you may run into port conflicts in
1617
the next step).
17-
3. Run `./gradlew dockerUp` (Gradle tasks are included as shortcuts for running Docker commands). This will start up
18-
a 3-node cluster with a load balancer in front of it. Additionally, the 8000/8001/8002 ports are available on the
19-
"bootstrap" node of the cluster for accessing the out-of-the-box MarkLogic applications.
18+
3. Run `docker-compose up -d --build`.
19+
20+
The above will result in a new MarkLogic instance with a single node.
21+
22+
Alternatively, if you would like to test against a 3-node MarkLogic cluster with a load balancer in front of it,
23+
run `docker-compose -f docker-compose-3nodes.yaml up -d --build`.
2024

2125
### Accessing MarkLogic logs in Grafana
2226

@@ -85,7 +89,7 @@ This will produce a single jar file for the connector in the `./build/libs` dire
8589

8690
You can then launch PySpark with the connector available via:
8791

88-
pyspark --jars build/libs/marklogic-spark-connector-2.0-SNAPSHOT.jar
92+
pyspark --jars build/libs/marklogic-spark-connector-2.1.0.jar
8993

9094
The below command is an example of loading data from the test application deployed via the instructions at the top of
9195
this page.
@@ -114,3 +118,7 @@ more commands you can try out.
114118

115119
See the section with the same name in the
116120
[MarkLogic Koop contributing guide](https://github.com/koopjs/koop-provider-marklogic/blob/master/CONTRIBUTING.md).
121+
122+
If you are looking to test the examples in the documentation, please be sure to follow the instructions in the
123+
"Getting Started" guide. That involves creating an application in MarkLogic that has an app server listening on port
124+
8003. You will use that app server instead of the test-app server on port 8016.

build.gradle

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
plugins {
22
id 'java-library'
33
id 'net.saliman.properties' version '1.5.2'
4-
id 'com.github.johnrengelman.shadow' version '7.1.2'
5-
id "com.marklogic.ml-gradle" version "4.5.2"
4+
id 'com.github.johnrengelman.shadow' version '8.1.1'
5+
id "com.marklogic.ml-gradle" version "4.6.0"
66
id 'maven-publish'
77
id 'signing'
88
}
99

1010
group 'com.marklogic'
11-
version '2.0.0'
11+
version '2.1.0'
1212

1313
java {
1414
sourceCompatibility = 1.8
@@ -21,7 +21,15 @@ repositories {
2121

2222
dependencies {
2323
compileOnly 'org.apache.spark:spark-sql_2.12:' + sparkVersion
24-
implementation "com.marklogic:marklogic-client-api:6.2.1"
24+
implementation ("com.marklogic:marklogic-client-api:6.4.0") {
25+
// The Java Client uses Jackson 2.15.2; Scala 3.4.x does not yet support that and will throw the following error:
26+
// Scala module 2.14.2 requires Jackson Databind version >= 2.14.0 and < 2.15.0 - Found jackson-databind version 2.15.2
27+
// So the 4 Jackson modules are excluded to allow for Spark's to be used.
28+
exclude module: 'jackson-core'
29+
exclude module: 'jackson-databind'
30+
exclude module: 'jackson-annotations'
31+
exclude module: 'jackson-dataformat-csv'
32+
}
2533

2634
// Makes it possible to use lambdas in Java 8 to implement Spark's Function1 and Function2 interfaces
2735
// See https://github.com/scala/scala-java8-compat for more information
@@ -31,8 +39,21 @@ dependencies {
3139
}
3240

3341
testImplementation 'org.apache.spark:spark-sql_2.12:' + sparkVersion
34-
testImplementation 'com.marklogic:ml-app-deployer:4.5.2'
35-
testImplementation 'com.marklogic:marklogic-junit5:1.3.0'
42+
43+
// The exclusions in these two modules ensure that we use the Jackson libraries from spark-sql when running the tests.
44+
testImplementation ('com.marklogic:ml-app-deployer:4.6.0') {
45+
exclude module: 'jackson-core'
46+
exclude module: 'jackson-databind'
47+
exclude module: 'jackson-annotations'
48+
exclude module: 'jackson-dataformat-csv'
49+
}
50+
testImplementation ('com.marklogic:marklogic-junit5:1.4.0') {
51+
exclude module: 'jackson-core'
52+
exclude module: 'jackson-databind'
53+
exclude module: 'jackson-annotations'
54+
exclude module: 'jackson-dataformat-csv'
55+
}
56+
3657
testImplementation "ch.qos.logback:logback-classic:1.3.5"
3758
testImplementation "org.slf4j:jcl-over-slf4j:1.7.36"
3859
testImplementation "org.skyscreamer:jsonassert:1.5.1"
@@ -44,6 +65,14 @@ test {
4465
environment "mlHost", mlHost
4566
}
4667

68+
task reloadTestData(type: com.marklogic.gradle.task.MarkLogicTask) {
69+
description = "Convenience task for clearing the test database and reloading the test data; only intended for a connector developer to use."
70+
doLast {
71+
new com.marklogic.mgmt.resource.databases.DatabaseManager(getManageClient()).clearDatabase("spark-test-test-content")
72+
}
73+
}
74+
reloadTestData.finalizedBy mlLoadData
75+
4776
if (JavaVersion.current().isCompatibleWith(JavaVersion.VERSION_17)) {
4877
test {
4978
// See https://stackoverflow.com/questions/72724816/running-unit-tests-with-spark-3-3-0-on-java-17-fails-with-illegalaccesserror-cl
@@ -67,16 +96,11 @@ shadowJar {
6796
}
6897

6998
task perfTest(type: JavaExec) {
70-
main = "com.marklogic.spark.reader.PerformanceTester"
99+
mainClass = "com.marklogic.spark.reader.PerformanceTester"
71100
classpath = sourceSets.test.runtimeClasspath
72101
args mlHost
73102
}
74103

75-
task dockerUp(type: Exec) {
76-
description = "Creates and starts a 3 node MarkLogic cluster."
77-
commandLine "docker-compose", "up", "-d", "--build"
78-
}
79-
80104
task dockerBuildCache(type: Exec) {
81105
description = "Creates an image named 'marklogic-spark-cache' containing a cache of the Gradle dependencies."
82106
commandLine 'docker', 'build', '--no-cache', '-t', 'marklogic-spark-cache', '.'
@@ -110,12 +134,12 @@ task dockerPerfTest(type: Exec) {
110134
}
111135

112136
task sourcesJar(type: Jar, dependsOn: classes) {
113-
classifier 'sources'
137+
archiveClassifier = "sources"
114138
from sourceSets.main.allSource
115139
}
116140

117141
task javadocJar(type: Jar, dependsOn: javadoc) {
118-
classifier "javadoc"
142+
archiveClassifier = "javadoc"
119143
from javadoc
120144
}
121145
javadoc.failOnError = false
@@ -190,6 +214,6 @@ task gettingStartedZip(type: Zip) {
190214
from "examples/getting-started"
191215
exclude "build", ".gradle", "gradle-*.properties"
192216
into "marklogic-spark-getting-started-${version}"
193-
archiveName "marklogic-spark-getting-started-${version}.zip"
194-
destinationDir(file('build'))
217+
archiveFileName = "marklogic-spark-getting-started-${version}.zip"
218+
destinationDirectory = file("build")
195219
}

docker-compose-3nodes.yaml

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
version: '3.8'
2+
name: marklogic_spark_3nodes
3+
4+
services:
5+
6+
# See https://caddyserver.com/docs/quick-starts/reverse-proxy for more information on using Caddy as a reverse proxy
7+
# and load balancer.
8+
caddy-load-balancer:
9+
image: caddy:2-alpine
10+
volumes:
11+
- ./caddy/data:/data
12+
- ./caddy/config/Caddyfile:/etc/caddy/Caddyfile
13+
depends_on:
14+
- bootstrap_3n
15+
- node2
16+
- node3
17+
ports:
18+
# For running marklogic-spark-connector tests against the 3-node cluster
19+
- 8016:8816
20+
- 8015:8815
21+
# For running performance tests against quick-table data
22+
- 8009:8809
23+
# For the getting-started project
24+
- 8020:8820
25+
networks:
26+
- external_net
27+
- internal_net
28+
29+
# Named "bootstrap" as it's the first MarkLogic host created. Other MarkLogic hosts will then join the cluster created
30+
# by this host. Note that each MarkLogic host has its 8000-8002 ports exposed externally so that the apps on those
31+
# ports can each be accessed if needed.
32+
bootstrap_3n:
33+
image: "marklogicdb/marklogic-db:11.1.0-centos-1.1.0"
34+
platform: linux/amd64
35+
container_name: bootstrap_3n
36+
hostname: bootstrap_3n.local
37+
labels:
38+
logging: "promtail"
39+
logging_jobname: "containerlogs"
40+
environment:
41+
- MARKLOGIC_INIT=true
42+
- MARKLOGIC_ADMIN_USERNAME=admin
43+
- MARKLOGIC_ADMIN_PASSWORD=admin
44+
volumes:
45+
- ./docker/marklogic/logs/bootstrap_3n:/var/opt/MarkLogic/Logs
46+
ports:
47+
- 8000-8002:8000-8002
48+
networks:
49+
- external_net
50+
- internal_net
51+
52+
node2:
53+
image: "marklogicdb/marklogic-db:11.1.0-centos-1.1.0"
54+
platform: linux/amd64
55+
container_name: node2
56+
hostname: node2.local
57+
labels:
58+
logging: "promtail"
59+
logging_jobname: "containerlogs"
60+
environment:
61+
- MARKLOGIC_INIT=true
62+
- MARKLOGIC_ADMIN_USERNAME=admin
63+
- MARKLOGIC_ADMIN_PASSWORD=admin
64+
- MARKLOGIC_JOIN_CLUSTER=true
65+
- MARKLOGIC_BOOTSTRAP_HOST=bootstrap_3n.local
66+
volumes:
67+
- ./docker/marklogic/logs/node2:/var/opt/MarkLogic/Logs
68+
depends_on:
69+
- bootstrap_3n
70+
ports:
71+
- 8100-8102:8000-8002
72+
networks:
73+
- external_net
74+
- internal_net
75+
76+
node3:
77+
image: "marklogicdb/marklogic-db:11.1.0-centos-1.1.0"
78+
platform: linux/amd64
79+
container_name: node3
80+
hostname: node3.local
81+
labels:
82+
logging: "promtail"
83+
logging_jobname: "containerlogs"
84+
environment:
85+
- MARKLOGIC_INIT=true
86+
- MARKLOGIC_ADMIN_USERNAME=admin
87+
- MARKLOGIC_ADMIN_PASSWORD=admin
88+
- MARKLOGIC_JOIN_CLUSTER=true
89+
- MARKLOGIC_BOOTSTRAP_HOST=bootstrap_3n.local
90+
volumes:
91+
- ./docker/marklogic/logs/node3:/var/opt/MarkLogic/Logs
92+
depends_on:
93+
- bootstrap_3n
94+
ports:
95+
- 8200-8202:8000-8002
96+
networks:
97+
- external_net
98+
- internal_net
99+
100+
grafana:
101+
image: grafana/grafana:latest
102+
ports:
103+
- 3000:3000
104+
volumes:
105+
- ./config/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yaml
106+
environment:
107+
- GF_AUTH_ANONYMOUS_ENABLED=true
108+
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
109+
- GF_AUTH_DISABLE_LOGIN_FORM=true
110+
networks:
111+
- external_net
112+
- internal_net
113+
114+
loki:
115+
image: grafana/loki:latest
116+
ports:
117+
- 3100:3100
118+
command: -config.file=/etc/loki/local-config.yaml
119+
networks:
120+
- external_net
121+
- internal_net
122+
123+
promtail:
124+
image: grafana/promtail:latest
125+
container_name: promtail
126+
volumes:
127+
- ./config/promtail.yaml:/etc/promtail/docker-config.yaml
128+
- ./docker/marklogic/logs:/var/log/marklogic
129+
- /var/lib/docker/containers:/var/lib/docker/containers:ro
130+
- /var/run/docker.sock:/var/run/docker.sock
131+
command: -config.file=/etc/promtail/docker-config.yaml
132+
depends_on:
133+
- loki
134+
- bootstrap_3n
135+
- node2
136+
- node3
137+
networks:
138+
- internal_net
139+
140+
networks:
141+
external_net: { }
142+
internal_net:
143+
internal: true
144+
driver_opts:
145+
com.docker.network.bridge.enable_icc: "true"

0 commit comments

Comments
 (0)