marklogic
diff --git a/‎CONTRIBUTING.md
Lines changed: 135 additions & 177 deletions b/‎CONTRIBUTING.md
Lines changed: 135 additions & 177 deletions
diff --git a/‎build.gradle
Lines changed: 18 additions & 106 deletions b/‎build.gradle
Lines changed: 18 additions & 106 deletions
diff --git a/‎docker-compose.yml
Lines changed: 0 additions & 50 deletions b/‎docker-compose.yml
Lines changed: 0 additions & 50 deletions
diff --git a/‎examples/ScalingConsiderations.md renamed to ‎docs/ScalingConsiderations.md
Lines changed: 11 additions & 2 deletions b/‎examples/ScalingConsiderations.md renamed to ‎docs/ScalingConsiderations.md
Lines changed: 11 additions & 2 deletions
diff --git a/‎docs/writing-data.md
Lines changed: 26 additions & 1 deletion b/‎docs/writing-data.md
Lines changed: 26 additions & 1 deletion
diff --git a/‎examples/ConfluentConnectorConfigs/marklogic-purchases-sink.json
Lines changed: 23 additions & 0 deletions b/‎examples/ConfluentConnectorConfigs/marklogic-purchases-sink.json
Lines changed: 23 additions & 0 deletions
diff --git a/‎src/test/resources/confluent/marklogic-purchases-source.json renamed to ‎examples/ConfluentConnectorConfigs/marklogic-purchases-source.json
Lines changed: 2 additions & 2 deletions b/‎src/test/resources/confluent/marklogic-purchases-source.json renamed to ‎examples/ConfluentConnectorConfigs/marklogic-purchases-source.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/ML_Kafka_Connector_With_AWS_MSK.pdf
-1.71 MB b/‎examples/ML_Kafka_Connector_With_AWS_MSK.pdf
-1.71 MB
diff --git a/‎examples/dmsdk-transform/installTransform.sh
Lines changed: 0 additions & 1 deletion b/‎examples/dmsdk-transform/installTransform.sh
Lines changed: 0 additions & 1 deletion
@@ -5,7 +5,7 @@ plugins {
   id "com.github.jk1.dependency-license-report" version "1.19"
 
   // Only used for testing
-  id 'com.marklogic.ml-gradle' version '4.6.0'
+  id 'com.marklogic.ml-gradle' version '4.8.0'
   id 'jacoco'
   id "org.sonarqube" version "4.4.1.3373"
 
@@ -31,24 +31,26 @@ configurations {
 }
 
 ext {
-  kafkaVersion = "3.5.1"
+  // Even though Kafka Connect 3.7.0 is out, we're staying with 3.6.1 in order to continue
+  // using the third-party Kafka JUnit tool. See https://github.com/mguenther/kafka-junit?tab=readme-ov-file
+  kafkaVersion = "3.6.1"
 }
 
 dependencies {
   compileOnly "org.apache.kafka:connect-api:${kafkaVersion}"
   compileOnly "org.apache.kafka:connect-json:${kafkaVersion}"
   compileOnly "org.apache.kafka:connect-runtime:${kafkaVersion}"
-  compileOnly "org.slf4j:slf4j-api:1.7.36"
+  compileOnly "org.slf4j:slf4j-api:2.0.13"
 
-  implementation 'com.marklogic:ml-javaclient-util:4.6.0'
+  implementation 'com.marklogic:ml-javaclient-util:4.8.0'
   // Force DHF to use the latest version of ml-app-deployer, which minimizes security vulnerabilities
-  implementation "com.marklogic:ml-app-deployer:4.6.0"
+  implementation "com.marklogic:ml-app-deployer:4.8.0"
 
-  implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.15.2"
+  implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.15.3"
 
   // Note that in general, the version of the DHF jar must match that of the deployed DHF instance. Different versions
   // may work together, but that behavior is not guaranteed.
-  implementation("com.marklogic:marklogic-data-hub:5.8.0") {
+  implementation("com.marklogic:marklogic-data-hub:6.0.0") {
     exclude module: "marklogic-client-api"
     exclude module: "ml-javaclient-util"
     exclude module: "ml-app-deployer"
@@ -63,13 +65,13 @@ dependencies {
 
   testImplementation "org.apache.kafka:connect-api:${kafkaVersion}"
   testImplementation "org.apache.kafka:connect-json:${kafkaVersion}"
-  testImplementation 'net.mguenther.kafka:kafka-junit:3.5.1'
+  testImplementation 'net.mguenther.kafka:kafka-junit:3.6.0'
 
-  testImplementation "org.apache.avro:avro-compiler:1.11.1"
+  testImplementation "org.apache.avro:avro-compiler:1.11.3"
 
   // Forcing logback to be used for test logging
-  testImplementation "ch.qos.logback:logback-classic:1.3.5"
-  testImplementation "org.slf4j:jcl-over-slf4j:1.7.36"
+  testImplementation "ch.qos.logback:logback-classic:1.3.14"
+  testImplementation "org.slf4j:jcl-over-slf4j:2.0.13"
 
   documentation files('LICENSE.txt')
   documentation files('NOTICE.txt')
@@ -115,29 +117,6 @@ shadowJar {
   exclude "scaffolding/**"
 }
 
-task copyJarToKafka(type: Copy, dependsOn: shadowJar) {
-  description = "Used for local development and testing; copies the jar to your local Kafka install"
-  from "build/libs"
-  into "${kafkaHome}/libs"
-}
-
-task copyPropertyFilesToKafka(type: Copy) {
-  description = "Used for local development and testing; copies the properties files to your local Kafka install"
-  from "config"
-  into "${kafkaHome}/config"
-  filter { String line ->
-    line.startsWith('ml.connection.username=') ? 'ml.connection.username=' + kafkaMlUsername : line
-  }
-  filter { String line ->
-    line.startsWith('ml.connection.password=') ? 'ml.connection.password=' + kafkaMlPassword : line
-  }
-}
-
-task deploy {
-  description = "Used for local development and testing; builds the jar and copies it and the properties files to your local Kafka install"
-  dependsOn = ["copyJarToKafka", "copyPropertyFilesToKafka"]
-}
-
 ext {
   confluentArchiveGroup = "Confluent Connector Archive"
   confluentTestingGroup = "Confluent Platform Local Testing"
@@ -212,77 +191,10 @@ task connectorArchive(type: Zip, dependsOn: connectorArchive_BuildDirectory, gro
   destinationDirectory = file('build/distro')
 }
 
-task installConnectorInConfluent(type: Exec, group: confluentTestingGroup, dependsOn: [connectorArchive]) {
-  description = "Uses 'Confluent-hub' to install the connector in your local Confluent Platform"
-  commandLine "confluent-hub", "install", "--no-prompt", "build/distro/${baseArchiveName}.zip"
-  ignoreExitValue = true
-}
-
-// See https://docs.confluent.io/confluent-cli/current/command-reference/local/confluent_local_destroy.html
-task destroyLocalConfluent(type: Exec, group: confluentTestingGroup) {
-  description = "Destroy the local Confluent Platform instance"
-  commandLine "confluent", "local", "destroy"
-  // Main reason this will fail is because Confluent is not running, which shouldn't cause a failure
-  ignoreExitValue = true
-}
-
-// See https://docs.confluent.io/confluent-cli/current/command-reference/local/services/confluent_local_services_start.html
-task startLocalConfluent(type: Exec, group: confluentTestingGroup) {
-  description = "Convenience task for starting a local instance of Confluent Platform"
-  commandLine "confluent", "local", "services", "start"
-}
-
-task loadDatagenPurchasesConnector(type: Exec, group: confluentTestingGroup) {
-  description = "Load an instance of the Datagen connector into Confluent Platform for sending JSON documents to " +
-    "the 'purchases' topic"
-  commandLine "confluent", "local", "services", "connect", "connector", "load", "datagen-purchases-source", "-c",
-    "src/test/resources/confluent/datagen-purchases-source.json"
-}
-
-task loadMarkLogicPurchasesSinkConnector(type: Exec, group: confluentTestingGroup) {
-  description = "Load an instance of the MarkLogic Kafka connector into Confluent Platform for writing data to " +
-    "MarkLogic from the 'purchases' topic"
-  commandLine "confluent", "local", "services", "connect", "connector", "load", "marklogic-purchases-sink", "-c",
-    "src/test/resources/confluent/marklogic-purchases-sink.json"
-}
-
-task loadMarkLogicPurchasesSourceConnector(type: Exec, group: confluentTestingGroup) {
-  description = "Load an instance of the MarkLogic Kafka connector into Confluent Platform for reading rows from " +
-    "the demo/purchases view"
-  commandLine "confluent", "local", "services", "connect", "connector", "load", "marklogic-purchases-source", "-c",
-    "src/test/resources/confluent/marklogic-purchases-source.json"
-}
-
-task loadMarkLogicAuthorsSourceConnector(type: Exec, group: confluentTestingGroup) {
-  description = "Loads a source connector that retrieves authors from the citations.xml file, which is also used for " +
-    "all the automated tests"
-  commandLine "confluent", "local", "services", "connect", "connector", "load", "marklogic-authors-source", "-c",
-    "src/test/resources/confluent/marklogic-authors-source.json"
-}
+// Tasks for using the connector with Confluent Platform on Docker
 
-task loadMarkLogicEmployeesSourceConnector(type: Exec, group: confluentTestingGroup) {
-  commandLine "confluent", "local", "services", "connect", "connector", "load", "marklogic-employees-source", "-c",
-    "src/test/resources/confluent/marklogic-employees-source.json"
-}
-
-task setupLocalConfluent(group: confluentTestingGroup) {
-  description = "Start a local Confluent Platform instance and load the Datagen and MarkLogic connectors"
-}
-
-// Temporarily only loading the source connector to make manual testing easier, will re-enable all of these before 1.8.0
-//setupLocalConfluent.dependsOn startLocalConfluent, loadDatagenPurchasesConnector, loadMarkLogicPurchasesSinkConnector, loadMarkLogicPurchasesSourceConnector
-setupLocalConfluent.dependsOn startLocalConfluent, loadMarkLogicEmployeesSourceConnector
-
-loadDatagenPurchasesConnector.mustRunAfter startLocalConfluent
-loadMarkLogicPurchasesSinkConnector.mustRunAfter startLocalConfluent
-loadMarkLogicPurchasesSourceConnector.mustRunAfter startLocalConfluent
-loadMarkLogicAuthorsSourceConnector.mustRunAfter startLocalConfluent
-loadMarkLogicEmployeesSourceConnector.mustRunAfter startLocalConfluent
-
-task insertAuthors(type: Test) {
-  useJUnitPlatform()
-  systemProperty "AUTHOR_IDS", authorIds
-  description = "Insert a new author into the kafka-test-content database via a new citations XML document; " +
-    "use e.g. -PauthorIds=7,8,9 to insert 3 new authors with IDs of 7, 8, and 9"
-  include "com/marklogic/kafka/connect/source/debug/InsertAuthorsTest.class"
+task copyConnectorToDockerVolume(type: Copy, dependsOn: connectorArchive, group: confluentTestingGroup) {
+  description = "Copies the connector's archive directory to the Docker volume shared with the Connect server"
+  from "build/connectorArchive"
+  into "test-app/docker/confluent-marklogic-components"
 }
@@ -1,3 +1,9 @@
+---
+layout: default
+title: Scaling Considerations
+nav_order: 7
+---
+
 # Scaling Considerations
 Each of the three parts of parts (Kafka, MarkLogic, and this connector) of this system maybe easily scaled to handle
 your throughput requirements. To use the connector in a clustered environment you only need to ensure a couple of
@@ -11,8 +17,8 @@ started or shutdown, this information is also relayed to the connectors so that
 ## MarkLogic
 MarkLogic is designed to be used in large clusters of servers. In order to spread the load of data I/O across the
 cluster, a load balancer is typically used. In this case, the connector should be configured to be aware of the use
-of a load balancer. This is accomplished by setting the "ml.connection.host" to point to the load balancer, and by setting "ml.connection.type" to "gateway" in the marklogic-sink.properties
-file.
+of a load balancer. This is accomplished by setting the "ml.connection.host" to point to the load balancer, and by
+setting "ml.connection.type" to "gateway" in the marklogic-sink.properties file.
 
 <pre><code># A MarkLogic host to connect to. The connector uses the Data Movement SDK, and thus it will connect to each of the
 # hosts in a cluster.
@@ -22,6 +28,9 @@ ml.connection.host=MarkLogic-LoadBalancer-1024238516.us-east-1.elb.amazonaws.com
 # See https://docs.marklogic.com/guide/java/data-movement#id_26583 for more information.
 ml.connection.type=gateway</code></pre>
 
+For additional information regarding scaling a MarkLogic cluster, please see the MarkLogic
+[Scalability, Availability, and Failover Guide](https://docs.marklogic.com/guide/cluster/scalability).
+
 ## Connector
 When configuring multiple instances of the connector to consume the same topic(s), the Kafka Connect framework
 automatically handles dividing up the connections by assigning specific topic partitions (spread across the Kafka
 
@@ -113,6 +113,24 @@ This will result in the following pieces of Kafka record metadata being in each
 - `kafka.partition` = the partition of the Kafka record
 - `kafka.timestamp` = the timestamp of the Kafka record
 
+### Including Kafka headers
+
+Each Kafka record passed to the MarkLogic connector also has headers that may contain useful information which can be
+included in the metadata written with documents. This includes the headers that are included in Kafka records by
+default as well as any custom headers. Kafka headers can be included in each document by configuring the following
+property:
+
+- `ml.dmsdk.includeKafkaHeaders` = `true` to include Kafka headers
+
+When the headers are added to the document metadata, they are simply given the same name as the key for the header.
+However, you may also specify a prefix that will be prepended to each header key. To set that prefix, use the following
+property:
+
+- `ml.dmsdk.includeKafkaHeaders.prefix` = `<prefix>` to be prepended to header keys in the metadata.
+
+The headers that are on the Kafka records will depend on the Kafka distribution you are using and the message producer
+configuration.
+
 ### Configuring DMSDK performance
 
 The performance of how data is written to MarkLogic can be configured via the following properties:
@@ -211,7 +229,7 @@ endpointConstants = fn.head(xdmp.fromJSON(endpointConstants));
 
 for (let item of inputSequence) {
   item = fn.head(xdmp.fromJSON(item));
-  // TODO Determine what to do with each item
+  // Determine what to do with each item
 }
 ```
 
@@ -311,3 +329,10 @@ required to catch any error that occurs, an unexpected error in the sink connect
 and logged by Kafka. However, nothing will be sent to the user-configured DLQ topic in this scenario as the error will
 not be associated with a particular sink record. Kafka and MarkLogic server logs should be examined to determine the
 cause of the error. 
+
+## JSON-based Connector Configuration
+
+Some Kafka environments permit REST-based instantiation of connectors. Confluent is one of those environments.
+[Please see the Confluent documentation](https://docs.confluent.io/kafka-connectors/maprdb/current/map_r_d_b_sink_connector_example.html)
+to read about this technique. Examples of JSON files to use with the REST service can be found in
+examples/ConfluentConnectorConfigs.
@@ -0,0 +1,23 @@
+{
+  "name": "marklogic-purchases-sink",
+  "config": {
+    "topics": "purchases",
+    "connector.class": "com.marklogic.kafka.connect.sink.MarkLogicSinkConnector",
+    "key.converter": "org.apache.kafka.connect.storage.StringConverter",
+    "value.converter": "org.apache.kafka.connect.storage.StringConverter",
+    "tasks.max": "1",
+    "ml.connection.host": "marklogic",
+    "ml.connection.port": 8011,
+    "ml.connection.username": "kafka-test-user",
+    "ml.connection.password": "kafkatest",
+    "ml.connection.securityContextType": "DIGEST",
+    "ml.document.format": "JSON",
+    "ml.document.uriPrefix": "/purchase/",
+    "ml.document.uriSuffix": ".json",
+    "ml.document.collections": "purchases,kafka-data",
+    "ml.document.permissions": "kafka-test-minimal-user,read,kafka-test-minimal-user,update",
+    "ml.dmsdk.includeKafkaMetadata": "false",
+    "ml.dmsdk.includeKafkaHeaders": "true",
+    "ml.dmsdk.includeKafkaHeaders.prefix": ""
+  }
+}
@@ -5,8 +5,8 @@
     "key.converter": "org.apache.kafka.connect.storage.StringConverter",
     "value.converter": "org.apache.kafka.connect.storage.StringConverter",
     "tasks.max": "1",
-    "ml.connection.host": "localhost",
-    "ml.connection.port": 8018,
+    "ml.connection.host": "marklogic",
+    "ml.connection.port": 8011,
     "ml.connection.username": "kafka-test-user",
     "ml.connection.password": "kafkatest",
     "ml.connection.securityContextType": "DIGEST",