ag-ramachandran
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 61 additions & 0 deletions b/‎.github/workflows/build.yml‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 106 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 106 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.scalafix.conf‎
Lines changed: 21 additions & 0 deletions b/‎.scalafix.conf‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎.scalafmt.conf‎
Lines changed: 34 additions & 0 deletions b/‎.scalafmt.conf‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 50 additions & 15 deletions b/‎README.md‎
Lines changed: 50 additions & 15 deletions
@@ -0,0 +1,61 @@
+name: AzureKustoSpark
+on:
+  push:
+    branches: [ "**" ]
+  pull_request:
+    branches: [ "**" ]
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    environment: build
+    permissions:
+      checks: write
+      pull-requests: write
+      id-token: write
+      contents: read
+    steps:
+      - name: Check out code into the Spark module directory
+        uses: actions/checkout@v4
+      - name: Setup java
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'adopt'
+          java-version: 8
+          cache: 'maven'
+          cache-dependency-path: |
+            pom.xml
+            connector/pom.xml
+            samples/pom.xml
+      - name: Azure login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.APP_ID }}
+          tenant-id: ${{ secrets.TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: "Run az commands"
+        run: |
+              access_token=$(az account get-access-token --resource=${{ secrets.APP_ID }} --scope=${{ secrets.CLUSTER }}/.default --query accessToken -o tsv)
+              echo "ACCESS_TOKEN=$access_token" >> $GITHUB_ENV
+      - name: Run the Maven verify phase
+        env:
+          kustoAadAuthorityID: ${{ secrets.TENANT_ID }}
+          kustoDatabase: ${{ secrets.DATABASE }}
+          kustoCluster: ${{ secrets.CLUSTER }}
+          kustoAadAppId: ${{secrets.APP_ID}}
+          accessToken: ${{env.ACCESS_TOKEN}}
+          storageAccountUrl: '${{secrets.STORAGE_CONTAINER_URL}}'
+          ingestStorageUrl: ${{ secrets.INGEST_STORAGE_URL }}
+          ingestStorageContainer: ${{ secrets.INGEST_STORAGE_CONTAINER }}
+        run: |
+          mvn clean verify -DkustoAadAppId=${{ secrets.APP_ID }} -DkustoAadAuthorityID=${{ secrets.TENANT_ID }} -DkustoDatabase=${{ secrets.DATABASE }} -DkustoCluster=${{ secrets.CLUSTER }} -DaccessToken=${{env.ACCESS_TOKEN}} -DingestStorageUrl=${{secrets.INGEST_STORAGE_URL}} -DingestStorageContainer=${{secrets.INGEST_STORAGE_CONTAINER}}
+          
+      - name: Publish Unit Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v2
+        if: always()
+        with:
+          files: |
+            connector/target/surefire-reports/*.xml
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v2
@@ -0,0 +1,106 @@
+name: release
+permissions:
+  checks: write
+  pull-requests: write
+  packages: write
+  deployments: write
+  contents: write
+  id-token: write
+
+on:
+  push:
+    tags:
+      - "v(2.4|3.0)_[0-9]+.[0-9]+.[0-9]+"
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Tag'
+        required: true
+        default: 'refs/tags/v0.0.0'
+      upload_to_azure:
+        description: 'Upload to Azure storage'
+        required: false
+        default: false
+      github_release:
+        description: 'Create Github release'
+        required: false
+        default: false
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    environment: build
+    strategy:
+      matrix:
+        java: [ '8' ]
+    name: Java ${{ matrix.java }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.tag || github.ref }}
+      - name: Azure login
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.APP_ID }}
+          tenant-id: ${{ secrets.TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+      - name: Setup java ${{ matrix.java }}
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'adopt'
+          java-version: ${{ matrix.java }}
+          cache: 'maven'
+      - name: "Run az commands"
+        run: |
+          access_token=$(az account get-access-token --resource=${{ secrets.APP_ID }} --scope=${{ secrets.CLUSTER }}/.default --query accessToken -o tsv)
+          echo "ACCESS_TOKEN=$access_token" >> $GITHUB_ENV
+      - name: Run the Maven verify phase
+        env:
+          kustoAadAuthorityID: ${{ secrets.TENANT_ID }}
+          kustoDatabase: ${{ secrets.DATABASE }}
+          kustoCluster: ${{ secrets.CLUSTER }}
+          kustoAadAppId: ${{secrets.APP_ID}}
+          accessToken: ${{env.ACCESS_TOKEN}}
+          storageAccountUrl: ${{ secrets.STORAGE_CONTAINER_URL }}
+        run: |
+          mvn clean verify -DkustoAadAppId=${{ secrets.APP_ID }} -DkustoAadAuthorityID=${{ secrets.TENANT_ID }} -DkustoDatabase=${{ secrets.DATABASE }} -DkustoCluster=${{ secrets.CLUSTER }} -DaccessToken=${{env.ACCESS_TOKEN}}
+      - name: Get versions
+        id: get_version
+        run: |
+          echo ::set-output name=VERSION::$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
+          echo ::set-output name=SCALA_VERSION::$(mvn help:evaluate -Dexpression=scala.version.major -q -DforceStdout)
+          echo ::set-output name=SPARK_VERSION::$(mvn help:evaluate -Dexpression=spark.version.major -q -DforceStdout)
+      - name: Move artifacts to staging
+        run: |
+          version=${{ steps.get_version.outputs.VERSION }}
+          scalaversion=${{steps.get_version.outputs.SCALA_VERSION}}
+          sparkversion=${{steps.get_version.outputs.SPARK_VERSION}}
+          mkdir staging
+          echo ${{steps.get_version.outputs.SPARK_VERSION}}_${{steps.get_version.outputs.SCALA_VERSION}}-$version
+          cp connector/target/*.jar staging
+          cp connector/.flattened-pom.xml staging/kusto-spark_${{steps.get_version.outputs.SPARK_VERSION}}_${{steps.get_version.outputs.SCALA_VERSION}}-$version.pom
+      - name: Github Release
+        uses: docker://antonyurchenko/git-release:v6
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        with:
+          args: |
+            staging/kusto-spark_${{ steps.get_version.outputs.SPARK_VERSION }}_${{ steps.get_version.outputs.SCALA_VERSION }}-${{ steps.get_version.outputs.VERSION }}-jar-with-dependencies.jar
+            staging/kusto-spark_${{ steps.get_version.outputs.SPARK_VERSION }}_${{ steps.get_version.outputs.SCALA_VERSION }}-${{ steps.get_version.outputs.VERSION }}-sources.jar
+        continue-on-error: true
+        if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.github_release == 'true') }}
+      - name: Azure login for azuresdkpartnerdrops SA
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURESDKPARTNERDROPS_CLIENT_ID }}
+          tenant-id: ${{ secrets.TENANT_ID }}
+          subscription-id:
+            ${{ secrets.AZURESDKPARTNERDROPS_SUBSCRIPTION_ID }}
+      - name: Upload file to Blob Storage
+        run: |
+          az storage blob upload-batch \
+            --account-name ${{ secrets.AZURE_RELEASE_STORAGE_ACCOUNT }} \
+            --destination ${{ secrets.AZURE_STORAGE_CONTAINER }}/kusto/spark/${{ steps.get_version.outputs.SPARK_VERSION }}_${{ steps.get_version.outputs.VERSION }} \
+            --source staging \
+            --auth-mode login
+        if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_dispatch' && github.event.inputs.upload_to_azure == 'true') }}
@@ -51,3 +51,7 @@ src_managed/
 project/boot/
 project/plugins/project/
 
+# VS Code and extensions
+.vscode/
+.bloop/
+.metals/
@@ -0,0 +1,21 @@
+rules = [
+  NoAutoTupling,
+  RemoveUnused,
+  DisableSyntax,
+  LeakingImplicitClassVal,
+  NoValInForComprehension,
+  ProcedureSyntax
+]
+
+DisableSyntax.noVars = true
+DisableSyntax.noThrows = true
+DisableSyntax.noNulls = true
+DisableSyntax.noReturns = true
+DisableSyntax.noWhileLoops = true
+DisableSyntax.noAsInstanceOf = true
+DisableSyntax.noIsInstanceOf = true
+DisableSyntax.noXml = true
+DisableSyntax.noDefaultArgs = true
+DisableSyntax.noFinalVal = true
+DisableSyntax.noFinalize = true
+DisableSyntax.noValPatterns = true
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+align = none
+align.openParenDefnSite = false
+align.openParenCallSite = false
+align.tokens = []
+optIn = {
+  configStyleArguments = false
+}
+danglingParentheses.preset = false
+docstrings.style = Asterisk
+maxColumn = 98
+runner.dialect = scala212
+fileOverride {
+  "glob:**/src/**/scala-2.13/**.scala" {
+    runner.dialect = scala213
+  }
+}
+version = 3.5.9
@@ -3,22 +3,30 @@
 </p>
 
 # Azure Data Explorer Connector for Apache Spark
+  
+master: [![Build](https://github.com/Azure/azure-kusto-spark/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/Azure/azure-kusto-spark/actions/workflows/build.yml)
 
-master: [![Build status](https://msazure.visualstudio.com/One/_apis/build/status/Custom/Kusto/azure-kusto-spark%20ci?branchName=master)](https://msazure.visualstudio.com/One/_build/latest?definitionId=58677)
- 
 This library contains the source code for Azure Data Explorer Data Source and Data Sink Connector for Apache Spark.
 
 Azure Data Explorer (A.K.A. [Kusto](https://azure.microsoft.com/services/data-explorer/)) is a lightning-fast indexing and querying service. 
 
 [Spark](https://spark.apache.org/) is a unified analytics engine for large-scale data processing.
 
-Making Azure Data Explorer and Spark work together enables building fast and scalable applications, targeting a variety of Machine Learning, Extract-Transform-Load, Log Analytics and other data-driven scenarios. 
+Making Azure Data Explorer and Spark work together enables building fast and scalable applications, targeting a variety of Machine Learning, Extract-Transform-Load, Log Analytics and other data-driven scenarios.
+
+This connector works with the following spark environments:
+[Azure Databricks](https://azure.microsoft.com/products/databricks),
+[Azure Synapse Data Explorer](https://docs.microsoft.com/azure/synapse-analytics/data-explorer/data-explorer-overview) and
+[Real time analytics in Fabric](https://learn.microsoft.com/fabric/real-time-analytics/overview)
 
 ## Changelog
 
+**Breaking changes in versions 5.2.x** - From these versions, the published packages are shaded and packaged as a self contained jar. This is to avoid issues with common OSS libraries, spark runtimes and/or application dependencies.
+
 For major changes from previous releases, please refer to [Releases](https://github.com/Azure/azure-kusto-spark/releases).
 For known or new issues, please refer to the [issues](https://github.com/Azure/azure-kusto-spark/issues) section.
-> Note: Use the 4.x series only if you are using JDK 11 and 3.x in JDK 8
+> Note: Use the 4.x series only if you are using JDK 11. Versions 3.x and 5.x will work with JDK8 and all versions up
+From versions 5.2.0 and up, the connector is packaged as an uber jar to avoid conflicts with other jars that are added as part of the spark job definitions.
 
 ## Usage
 
@@ -33,14 +41,14 @@ link your application with the artifact below to use the Azure Data Explorer Con
 ```
 groupId = com.microsoft.azure.kusto
 artifactId = kusto-spark_3.0_2.12
-version = 4.0.2
+version = 5.3.0
 ```
 
 **In Maven**:
 
 Look for the following coordinates: 
 ```
-com.microsoft.azure.kusto:kusto-spark_3.0_2.12:4.0.2
+com.microsoft.azure.kusto:kusto-spark_3.0_2.12:5.3.0
 ```
 
 Or clone this repository and build it locally to add it to your local maven repository,.
@@ -50,15 +58,15 @@ The jar can also be found under the [released package](https://github.com/Azure/
     <dependency>
         <groupId>com.microsoft.azure.kusto</groupId>
         <artifactId>kusto-spark_3.0_2.12</artifactId>
-        <version>4.0.2</version>
+        <version>5.2.2</version>
     </dependency>
 ```
 
 **In SBT**:
 
 ```scala
 libraryDependencies ++= Seq(
-  "com.microsoft.azure.kusto" %% "kusto-spark_3.0" % "4.0.2"
+  "com.microsoft.azure.kusto" %% "kusto-spark_3.0" % "5.2.2"
 )
 ```
 
@@ -67,7 +75,7 @@ libraryDependencies ++= Seq(
 Libraries -> Install New -> Maven -> copy the following coordinates:
 
 ```
-com.microsoft.azure.kusto:kusto-spark_3.0_2.12:4.0.2
+com.microsoft.azure.kusto:kusto-spark_3.0_2.12:5.2.2
 ```
 
 #### Building Samples Module
@@ -95,23 +103,50 @@ To use the connector, you need:
 > Note: when working with Spark version 2.3 or lower, build the jar locally from branch "2.4" and 
 simply change the spark version in the pom file. 
 
+## Local Run - Build Setup
+
+The newer options in the connector have tests pertaining to Blob storage, providing support for user impersonation based data export and also providing a custom blob storage for ingestion.
+
+These are set up on the CI already. To configure these on local machines, set up is required on the machine. The following are commands to be executed on AzCli, the setup can be done through the Azure portal as well.
+
+```
+az login
+az ad signed-in-user show --query "id" --output json
+```
+This will usually output a GUID 
+
+```
+"10ac405f-8d3f-4f95-a012-201801b257d2"
+```
+This ID can then be used to grant access to storage as follows
+
+```shell
+az role assignment create --assignee 10ac405f-8d3f-4f95-a012-201801b257d2 --role "Storage Blob Delegator" --scope /subscriptions/<sub-id>/resourceGroups/<rg-name>/providers/Microsoft.Storage/storageAccounts/<storageacc>
+ 
+az role assignment create --assignee 10ac405f-8d3f-4f95-a012-201801b257d2 --role "Storage Blob Data Contributor" --scope /subscriptions/<sub-id>/resourceGroups/<rg-name>/providers/Microsoft.Storage/storageAccounts/<storageacc>/containers/<container-name>
+```
+
+These commands will set up test storage accounts required for tests.
+
+Once this is set up, you can use the following commands to build and run the tests
+
 ## Build Commands
 
 ```shell
-// Builds jar and runs all tests
-mvn clean package
+mvn clean package -DkustoCluster='https://cluster.westus2.kusto.windows.net' -DkustoDatabase='spark' -DkustoAadAuthorityID='72f988bf-86f1-41af-91ab-2d7cd011db47'  -DkustoIngestionUri='https://ingest-cluster.westus2.kusto.windows.net' -DingestStorageUrl='https://storageacc.blob.core.windows.net' -DingestStorageContainer='ingestcontainer' -DstorageAccountUrl='https://storageacc.blob.core.windows.net/synapseppe\;impersonate'
+
+
+# You can pass all the properties as env variables too
+export kustoCluster="https://cluster.westus2.kusto.windows.net"
 
-// Builds jar, runs all tests, and installs jar to your local maven repository
-mvn clean install
 ```
 
 ## Pre-Compiled Libraries
 To facilitate ramp-up from local jar on platforms such as Azure Databricks, pre-compiled libraries
 are published under [GitHub Releases](https://github.com/Azure/azure-kusto-spark/releases).
 These libraries include:
 * Azure Data Explorer connector library
-* User may also need to include Kusto Java SDK libraries (kusto-data and kusto-ingest), which are published under
-[GitHub Releases](https://github.com/Azure/azure-kusto-java/releases)
+* Version 5.2.0 and up of the library publish uber jars to maven. This is because of conflicts between custom jars that are added as part of the job and the exclude/include process that has to be followed to avoid conflicts.
 
 ## Dependencies
 Spark Azure Data Explorer connector depends on [Azure Data Explorer Data Client Library](https://mvnrepository.com/artifact/com.microsoft.azure.kusto/kusto-data)