DATAGO-120936: Hotfix - Disable partition key handling for checkpointing (#46)

SravanThotakura05 · web-flow · commit b1b02616307e · 2026-01-09T13:45:22.000+05:30
diff --git a/src/docs/asciidoc/User-Guide.adoc b/src/docs/asciidoc/User-Guide.adoc
@@ -45,6 +45,8 @@ NOTE: Above sample code used parquet as example data source. You can configure y
 
 NOTE: In case of databricks deployment, it is recommended to store and retrieve sensitive credentials from Databricks secrets. Please refer to <<Using Databricks Secret Management>> on how to configure secrets and use them in notebook.
 
+NOTE: Solace Partitioned Queue's is not supported.
+
 === Databricks Considerations
 
 In case if you are using Shared compute cluster, make sure your cluster has https://docs.databricks.com/en/data-governance/unity-catalog/manage-privileges/allowlist.html[appropriate permissions] to install connector from maven central and access the jars. Please contact your Databricks administrator for required permissions.
@@ -73,14 +75,15 @@ NOTE: In case of recovery, connector uses offset state from LVQ to identify last
 
 In some cases, there might be checkpoint failures as spark may fail to write to checkpoint during instance crash or unavailability or other reasons. Though the connector will handle duplicates in most cases, we recommend to keep your downstream systems idempotent.
 
-==== Pre-requisites for LVQ creation
-The following pre-requisites are applicable for LVQ that are provisioned by Solace Administrator.
+==== Prerequisites for LVQ creation
+The following prerequisites are applicable for LVQ that are provisioned by Solace Administrator.
+
 1. The Queue should be of type Exclusive
 2. Spool Quota should be set to 0
 3. Owner of the Queue should be the client username used by the micro integration
 4. Non-Owner access should be set to No Access to prevent unauthorized access
 5. Add a topic subscription
-6. Ensure the ACL applied to the user has publish access to the topic subscribed by LVQ
+6. Ensure the ACL applied to the user has publish and subscribe access to the topic subscribed by LVQ
 
 The following pre-requisites are applicable if the micro integration need to create LVQ if it doesn't exist.
 
diff --git a/src/main/java/com/solacecoe/connectors/spark/streaming/SolaceMicroBatch.java b/src/main/java/com/solacecoe/connectors/spark/streaming/SolaceMicroBatch.java
@@ -217,7 +217,7 @@ public Offset deserializeOffset(String json) {
         if(solaceSourceOffset.getCheckpoints() != null && solaceSourceOffset.getCheckpoints().isEmpty()) {
             log.info("SolaceSparkConnector - No offset is available in spark checkpoint location. New checkpoint state will be created");
         } else {
-            log.info("SolaceSparkConnector - Deserialized offset {}", new Gson().toJson(solaceSourceOffset));
+            log.trace("SolaceSparkConnector - Deserialized offset {}", new Gson().toJson(solaceSourceOffset));
         }
         lastKnownOffsetId = solaceSourceOffset.getOffset();
         currentCheckpoint = solaceSourceOffset.getCheckpoints();
@@ -289,7 +289,8 @@ public void commit(Offset end) {
 
         if(!offsetToCommit.isEmpty()) {
             currentCheckpoint = offsetToCommit;
-            log.trace("SolaceSparkConnector - Final checkpoint publishing to LVQ {}", new Gson().toJson(offsetToCommit));
+            log.info("SolaceSparkConnector - Final checkpoint published to LVQ on topic {}", properties.getOrDefault(SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_TOPIC, SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_DEFAULT_TOPIC));
+            log.trace("SolaceSparkConnector - Final checkpoint publishing to LVQ {} on topic {}", new Gson().toJson(offsetToCommit), properties.getOrDefault(SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_TOPIC, SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_DEFAULT_TOPIC));
             this.solaceBroker.publishMessage(properties.getOrDefault(SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_TOPIC, SolaceSparkStreamingProperties.SOLACE_SPARK_CONNECTOR_LVQ_DEFAULT_TOPIC), new Gson().toJson(offsetToCommit));
             checkException();
             offsetToCommit.clear();
diff --git a/src/main/java/com/solacecoe/connectors/spark/streaming/partitions/SolaceInputPartitionReader.java b/src/main/java/com/solacecoe/connectors/spark/streaming/partitions/SolaceInputPartitionReader.java
@@ -59,7 +59,7 @@ public class SolaceInputPartitionReader implements PartitionReader<InternalRow>,
     private int messages = 0;
     private Iterator<SolaceMessage> iterator;
     private boolean shouldTrackMessage = true;
-
+    private boolean isPartitionQueue = false;
     public SolaceInputPartitionReader(SolaceInputPartition inputPartition, boolean includeHeaders, Map<String, String> properties,
                                       TaskContext taskContext, CopyOnWriteArrayList<SolaceSparkPartitionCheckpoint> checkpoints, String checkpointLocation) {
 
@@ -96,7 +96,7 @@ public SolaceInputPartitionReader(SolaceInputPartition inputPartition, boolean i
             log.info("SolaceSparkConnector - Acknowledging any processed messages to Solace as commit is successful");
             long startTime = System.currentTimeMillis();
             SolaceMessageTracker.ackMessages(uniqueId);
-            log.trace("SolaceSparkConnector - Total time taken to acknowledge messages {} ms", (System.currentTimeMillis() - startTime));
+            log.info("SolaceSparkConnector - Total time taken to acknowledge messages {} ms", (System.currentTimeMillis() - startTime));
         } else {
             log.info("SolaceSparkConnector - Spark Batch with id {} is requesting data again. It may be because of multiple operations on same dataframe.", currentBatchId);
             isCommitTriggered = false;
@@ -185,7 +185,7 @@ public InternalRow get() {
             }
             // No need to add message to tracker as the call is from same dataframe operation.
             if (shouldTrackMessage) {
-                if (solaceRecord.getPartitionKey() != null && !solaceRecord.getPartitionKey().isEmpty()) {
+                if (solaceRecord.getPartitionKey() != null && !solaceRecord.getPartitionKey().isEmpty() && isPartitionQueue) {
                     SolaceMessageTracker.addMessageID(solaceRecord.getPartitionKey(), solaceRecord.getMessageId());
                 } else {
                     SolaceMessageTracker.addMessageID(this.uniqueId, solaceRecord.getMessageId());
@@ -339,15 +339,15 @@ private void registerTaskListener() {
                 logShutdownMessage(context);
             } else if (context.isCompleted()) {
                 String processedMessageIDs = SolaceMessageTracker.getProcessedMessagesIDs(this.solaceInputPartition.getId());
+                Path path = Paths.get(this.checkpointLocation + "/" + this.solaceInputPartition.getId() + ".txt");
+                log.info("SolaceSparkConnector - File path {} to store checkpoint processed in worker node {}", path.toString(), this.solaceInputPartition.getPreferredLocation());
                 if(processedMessageIDs != null && !processedMessageIDs.isEmpty()) {
                     try {
-                        Path path = Paths.get(this.checkpointLocation + "/" + this.solaceInputPartition.getId() + ".txt");
-                        log.trace("SolaceSparkConnector - File path {} to store checkpoint processed in worker node {}", path.toString(), this.solaceInputPartition.getPreferredLocation());
                         Path parentDir = path.getParent();
                         if (parentDir != null) {
                             // Create the directory and all nonexistent parent directories
                             Files.createDirectories(parentDir);
-                            log.trace("SolaceSparkConnector - Created parent directory {} for file path {}", parentDir.toString(), path.toString());
+                            log.info("SolaceSparkConnector - Created parent directory {} for file path {}", parentDir.toString(), path.toString());
                         }
                         // overwrite checkpoint to preserve latest value
                         try (BufferedWriter writer = Files.newBufferedWriter(path, StandardOpenOption.CREATE,
@@ -359,7 +359,7 @@ private void registerTaskListener() {
                             // Publish state to checkpoint. On commit the state is published to Solace LVQ.
                             writer.write(new Gson().toJson(solaceSparkPartitionCheckpoints));
                             writer.newLine();
-                            log.trace("SolaceSparkConnector - Checkpoint {} stored in file path {}", new Gson().toJson(solaceSparkPartitionCheckpoints), path.toString());
+                            log.info("SolaceSparkConnector - Checkpoint {} stored in file path {}", new Gson().toJson(solaceSparkPartitionCheckpoints), path.toString());
                             SolaceMessageTracker.removeProcessedMessagesIDs(this.solaceInputPartition.getId());
                             //                        }
                         }
@@ -368,9 +368,10 @@ private void registerTaskListener() {
                         this.solaceBroker.close();
                         throw new RuntimeException(e);
                     }
+                } else {
+                    log.info("SolaceSparkConnector - No processed message id's available for input partition {} and nothing is written to checkpoint {}", this.solaceInputPartition.getId(), this.checkpointLocation);
                 }
 
-
                 log.info("SolaceSparkConnector - Total time taken by executor is {} ms for Task {}", context.taskMetrics().executorRunTime(), uniqueId);
 
                 if (closeReceiversOnPartitionClose) {
diff --git a/src/main/java/com/solacecoe/connectors/spark/streaming/solace/SolaceBroker.java b/src/main/java/com/solacecoe/connectors/spark/streaming/solace/SolaceBroker.java
@@ -206,8 +206,8 @@ private void setReceiver(EventListener eventListener) {
     }
 
     public void createLVQIfNotExist() {
+        log.info("SolaceSparkConnector - Configured LVQ name {} and topic {}", this.lvqName, this.lvqTopic);
         lvq = JCSMPFactory.onlyInstance().createQueue(this.lvqName);
-
         EndpointProperties endpoint_props = new EndpointProperties();
         endpoint_props.setAccessType(EndpointProperties.ACCESSTYPE_EXCLUSIVE);
         endpoint_props.setQuota(0);
@@ -226,7 +226,7 @@ public void createLVQIfNotExist() {
             if(e instanceof JCSMPErrorResponseException) {
                 JCSMPErrorResponseException jce = (JCSMPErrorResponseException) e;
                 if(jce.getResponsePhrase().contains("Subscription Already Exists")) {
-                    log.warn("SolaceSparkConnector - Subscription Already Exists on LVQ {}", this.lvqName);
+                    log.warn("SolaceSparkConnector - Subscription {} Already Exists on LVQ {}", this.lvqTopic, this.lvqName);
                 } else {
                     close();
                     this.isException = true;
@@ -377,6 +377,7 @@ public void publishMessage(String topic, Object msg) {
         Destination destination = JCSMPFactory.onlyInstance().createTopic(topic);
         try {
             this.producer.send(xmlMessage, destination);
+            log.info("SolaceSparkConnector - Published checkpoint to LVQ topic {}", topic);
         } catch (JCSMPException e) {
             log.error("SolaceSparkConnector - Exception publishing lvq message to Solace", e);
             handleException("SolaceSparkConnector - Exception publishing lvq message to Solace ", e);