diff --git a/.github/workflows/build-report.yml b/.github/workflows/build-report.yml
deleted file mode 100644
index 9c2f33843..000000000
--- a/.github/workflows/build-report.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright © 2024 Cask Data, Inc.
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-# This workflow will build a Java project with Maven
-# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
-# Note: Any changes to this workflow would be used only after merging into develop
-name: Build Unit Tests Report
-
-on:
- workflow_run:
- workflows:
- - Build with unit tests
- types:
- - completed
-
-permissions:
- actions: read # Allows reading workflow run information
- statuses: write # Required if the action updates commit statuses
- checks: write # Required if it updates GitHub Checks API
-
-jobs:
- build:
- runs-on: ubuntu-latest
-
- if: ${{ github.event.workflow_run.conclusion != 'skipped' }}
-
- steps:
- # Pinned 1.0.0 version
- - uses: marocchino/action-workflow_run-status@54b6e87d6cb552fc5f36dbe9a722a6048725917a
-
- - name: Download artifact
- uses: actions/download-artifact@v4
- with:
- github-token: ${{ secrets.GITHUB_TOKEN }}
- run-id: ${{ github.event.workflow_run.id }}
- path: artifacts/
-
- - name: Surefire Report
- # Pinned 3.5.2 version
- uses: mikepenz/action-junit-report@16a9560bd02f11e7e3bf6b3e2ef6bba6c9d07c32
- if: always()
- with:
- report_paths: '**/target/surefire-reports/TEST-*.xml'
- github_token: ${{ secrets.GITHUB_TOKEN }}
- detailed_summary: true
- commit: ${{ github.event.workflow_run.head_sha }}
- check_name: Build Test Report
-
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index c4d80553c..000000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,60 +0,0 @@
-# Copyright © 2021 Cask Data, Inc.
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-# This workflow will build a Java project with Maven
-# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
-# Note: Any changes to this workflow would be used only after merging into develop
-name: Build with unit tests
-
-on:
- push:
- branches: [ develop, release/** ]
- pull_request:
- branches: [ develop, release/** ]
- types: [opened, synchronize, reopened, labeled]
-
-jobs:
- build:
- runs-on: k8s-runner-build
-
- # We allow builds:
- # 1) When it's a merge into a branch
- # 2) For PRs that are labeled as build and
- # - It's a code change
- # - A build label was just added
- # A bit complex, but prevents builds when other labels are manipulated
- if: >
- github.event_name == 'push'
- || (contains(github.event.pull_request.labels.*.name, 'build')
- && (github.event.action != 'labeled' || github.event.label.name == 'build')
- )
- steps:
- - uses: actions/checkout@v4
- with:
- ref: ${{ github.event.workflow_run.head_sha }}
- - name: Cache
- uses: actions/cache@v4
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-maven-${{ github.workflow }}
- - name: Build with Maven
- run: mvn clean test -fae -T 2 -B -V -DcloudBuild -Dmaven.wagon.http.retryHandler.count=3 -Dmaven.wagon.httpconnectionManager.ttlSeconds=25
- - name: Archive build artifacts
- uses: actions/upload-artifact@v4
- if: always()
- with:
- name: reports-${{ github.run_id }}
- path: |
- **/target/rat.txt
- **/target/surefire-reports/*
-
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
deleted file mode 100644
index 199225ef8..000000000
--- a/.github/workflows/e2e.yml
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright © 2023 Cask Data, Inc.
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not
-# use this file except in compliance with the License. You may obtain a copy of
-# the License at
-# http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-# License for the specific language governing permissions and limitations under
-# the License.
-
-# This workflow will build a Java project with Maven
-# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
-# Note: Any changes to this workflow would be used only after merging into develop
-name: Build e2e tests
-
-on:
- push:
- branches: [ develop ]
- pull_request:
- branches: [ develop ]
- types: [ opened, synchronize, reopened ]
- workflow_dispatch:
-
-jobs:
- build:
- runs-on: k8s-runner-e2e
- # We allow builds:
- # 1) When triggered manually
- # 2) When it's a merge into a branch
- # 3) For PRs that are labeled as build and
- # - It's a code change
- # - A build label was just added
- # A bit complex, but prevents builds when other labels are manipulated
- if: >
- github.event_name == 'workflow_dispatch'
- || github.event_name == 'push'
- || github.event_name == 'pull_request'
- strategy:
- matrix:
- module: [wrangler-transform]
- fail-fast: false
-
- steps:
- # Pinned 1.0.0 version
- - uses: actions/checkout@v4
- with:
- path: plugin
- submodules: 'recursive'
- ref: ${{ github.event.workflow_run.head_sha }}
-
- - uses: dorny/paths-filter@b2feaf19c27470162a626bd6fa8438ae5b263721
- if: github.event_name != 'workflow_dispatch' && github.event_name != 'push'
- id: filter
- with:
- working-directory: plugin
- filters: |
- e2e-test:
- - '${{ matrix.module }}/**/e2e-test/**'
-
- - name: Checkout e2e test repo
- uses: actions/checkout@v4
- with:
- repository: cdapio/cdap-e2e-tests
- path: e2e
-
- - name: Cache
- uses: actions/cache@v4
- with:
- path: ~/.m2/repository
- key: ${{ runner.os }}-maven-${{ github.workflow }}-${{ hashFiles('**/pom.xml') }}
- restore-keys: |
- ${{ runner.os }}-maven-${{ github.workflow }}
-
- - name: Run required e2e tests
- if: github.event_name != 'workflow_dispatch' && github.event_name != 'push' && steps.filter.outputs.e2e-test == 'false'
- run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }} --testRunner TestRunnerRequired.java
-
- - name: Run all e2e tests
- if: github.event_name == 'workflow_dispatch' || github.event_name == 'push' || steps.filter.outputs.e2e-test == 'true'
- run: python3 e2e/src/main/scripts/run_e2e_test.py --module ${{ matrix.module }}
-
- - name: Upload debug files
- uses: actions/upload-artifact@v4
- if: always()
- with:
- name: Debug files - ${{ matrix.module }}
- path: ./**/target/e2e-debug
-
- - name: Upload files to GCS
- uses: google-github-actions/upload-cloud-storage@v2
- if: always()
- with:
- path: ./plugin
- destination: e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }}
- glob: '**/target/cucumber-reports/**'
- - name: Cucumber Report URL
- if: always()
- run: echo "https://storage.googleapis.com/e2e-tests-cucumber-reports/${{ github.event.repository.name }}/${{ github.ref }}/plugin/${{ matrix.module }}/target/cucumber-reports/advanced-reports/cucumber-html-reports/overview-features.html"
diff --git a/.gitignore b/.gitignore
index 5a9b182d3..b37ae2766 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,3 +51,5 @@ release.properties
# Remove dev directory.
dev
+
+/node_modules
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index a0c1f5e6a..000000000
--- a/LICENSE
+++ /dev/null
@@ -1,176 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- columns, trademarks, service marks, or product columns of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
diff --git a/README.md b/README.md
index 4aa6eeb3a..6d3ae87e3 100644
--- a/README.md
+++ b/README.md
@@ -1,218 +1,142 @@
-# Data Prep
-
-
-
-[](https://travis-ci.org/cdapio/hydrator-plugins)
-[](https://scan.coverity.com/projects/hydrator-wrangler-transform)
-[](https://maven-badges.herokuapp.com/maven-central/io.cdap.wrangler/wrangler-core)
-[](http://www.javadoc.io/doc/io.cdap.wrangler/wrangler-core)
-[](https://opensource.org/licenses/Apache-2.0)
-[](https://cdap-users.herokuapp.com?t=1)
-
-A collection of libraries, a pipeline plugin, and a CDAP service for performing data
-cleansing, transformation, and filtering using a set of data manipulation instructions
-(directives). These instructions are either generated using an interative visual tool or
-are manually created.
-
- * Data Prep defines few concepts that might be useful if you are just getting started with it. Learn about them [here](wrangler-docs/concepts.md)
- * The Data Prep Transform is [separately documented](wrangler-transform/wrangler-docs/data-prep-transform.md).
- * [Data Prep Cheatsheet](wrangler-docs/cheatsheet.md)
-
-## New Features
-
-More [here](wrangler-docs/upcoming-features.md) on upcoming features.
-
- * **User Defined Directives, also known as UDD**, allow you to create custom functions to transform records within CDAP DataPrep or a.k.a Wrangler. CDAP comes with a comprehensive library of functions. There are however some omissions, and some specific cases for which UDDs are the solution. Additional information on how you can build your custom directives [here](wrangler-docs/custom-directive.md).
- * Migrating directives from version 1.0 to version 2.0 [here](wrangler-docs/directive-migration.md)
- * Information about Grammar [here](wrangler-docs/grammar/grammar-info.md)
- * Various `TokenType` supported by system [here](../api/src/main/java/io/cdap/wrangler/api/parser/TokenType.java)
- * Custom Directive Implementation Internals [here](wrangler-docs/udd-internal.md)
-
- * A new capability that allows CDAP Administrators to **restrict the directives** that are accessible to their users.
-More information on configuring can be found [here](wrangler-docs/exclusion-and-aliasing.md)
-
-## Demo Videos and Recipes
-
-Videos and Screencasts are best way to learn, so we have compiled simple, short screencasts that shows some of the features of Data Prep. Additional videos can be found [here](https://www.youtube.com/playlist?list=PLhmsf-NvXKJn-neqefOrcl4n7zU4TWmIr)
-
-### Videos
-
- * [SCREENCAST] [Creating Lookup Dataset and Joining](https://www.youtube.com/watch?v=Nc1b0rsELHQ)
- * [SCREENCAST] [Restricted Directives](https://www.youtube.com/watch?v=71EcMQU714U)
- * [SCREENCAST] [Parse Excel files in CDAP](https://www.youtube.com/watch?v=su5L1noGlEk)
- * [SCREENCAST] [Parse File As AVRO File](https://www.youtube.com/watch?v=tmwAw4dKUNc)
- * [SCREENCAST] [Parsing Binary Coded AVRO Messages](https://www.youtube.com/watch?v=Ix_lPo-PDJY)
- * [SCREENCAST] [Parsing Binary Coded AVRO Messages & Protobuf messages using schema registry](https://www.youtube.com/watch?v=LVLIdWnUX1k)
- * [SCREENCAST] [Quantize a column - Digitize](https://www.youtube.com/watch?v=VczkYX5SRtY)
- * [SCREENCAST] [Data Cleansing capability with send-to-error directive](https://www.youtube.com/watch?v=aZd5H8hIjDc)
- * [SCREENCAST] [Building Data Prep from the GitHub source](https://youtu.be/pGGjKU04Y38)
- * [VOICE-OVER] [End-to-End Demo Video](https://youtu.be/AnhF0qRmn24)
- * [SCREENCAST] [Ingesting into Kudu](https://www.youtube.com/watch?v=KBW7a38vlUM)
- * [SCREENCAST] [Realtime HL7 CCDA XML from Kafka into Time Parititioned Parquet](https://youtu.be/0fqNmnOnD-0)
- * [SCREENCAST] [Parsing JSON file](https://youtu.be/vwnctcGDflE)
- * [SCREENCAST] [Flattening arrays](https://youtu.be/SemHxgBYIsY)
- * [SCREENCAST] [Data cleansing with send-to-error directive](https://www.youtube.com/watch?v=aZd5H8hIjDc)
- * [SCREENCAST] [Publishing to Kafka](https://www.youtube.com/watch?v=xdc8pvvlI48)
- * [SCREENCAST] [Fixed length to JSON](https://www.youtube.com/watch?v=3AXu4m1swuM)
-
-### Recipes
-
- * [Parsing Apache Log Files](wrangler-demos/parsing-apache-log-files.md)
- * [Parsing CSV Files and Extracting Column Values](wrangler-demos/parsing-csv-extracting-column-values.md)
- * [Parsing HL7 CCDA XML Files](wrangler-demos/parsing-hl7-ccda-xml-files.md)
-
-## Available Directives
-
-These directives are currently available:
-
-| Directive | Description |
-| ---------------------------------------------------------------------- | ---------------------------------------------------------------- |
-| **Parsers** | |
-| [JSON Path](wrangler-docs/directives/json-path.md) | Uses a DSL (a JSON path expression) for parsing JSON records |
-| [Parse as AVRO](wrangler-docs/directives/parse-as-avro.md) | Parsing an AVRO encoded message - either as binary or json |
-| [Parse as AVRO File](wrangler-docs/directives/parse-as-avro-file.md) | Parsing an AVRO data file |
-| [Parse as CSV](wrangler-docs/directives/parse-as-csv.md) | Parsing an input record as comma-separated values |
-| [Parse as Date](wrangler-docs/directives/parse-as-date.md) | Parsing dates using natural language processing |
-| [Parse as Excel](wrangler-docs/directives/parse-as-excel.md) | Parsing excel file. |
-| [Parse as Fixed Length](wrangler-docs/directives/parse-as-fixed-length.md) | Parses as a fixed length record with specified widths |
-| [Parse as HL7](wrangler-docs/directives/parse-as-hl7.md) | Parsing Health Level 7 Version 2 (HL7 V2) messages |
-| [Parse as JSON](wrangler-docs/directives/parse-as-json.md) | Parsing a JSON object |
-| [Parse as Log](wrangler-docs/directives/parse-as-log.md) | Parses access log files as from Apache HTTPD and nginx servers |
-| [Parse as Protobuf](wrangler-docs/directives/parse-as-log.md) | Parses an Protobuf encoded in-memory message using descriptor |
-| [Parse as Simple Date](wrangler-docs/directives/parse-as-simple-date.md) | Parses date strings |
-| [Parse XML To JSON](wrangler-docs/directives/parse-xml-to-json.md) | Parses an XML document into a JSON structure |
-| [Parse as Currency](wrangler-docs/directives/parse-as-currency.md) | Parses a string representation of currency into a number. |
-| [Parse as Datetime](wrangler-docs/directives/parse-as-datetime.md) | Parses strings with datetime values to CDAP datetime type |
-| **Output Formatters** | |
-| [Write as CSV](wrangler-docs/directives/write-as-csv.md) | Converts a record into CSV format |
-| [Write as JSON](wrangler-docs/directives/write-as-json-map.md) | Converts the record into a JSON map |
-| [Write JSON Object](wrangler-docs/directives/write-as-json-object.md) | Composes a JSON object based on the fields specified. |
-| [Format as Currency](wrangler-docs/directives/format-as-currency.md) | Formats a number as currency as specified by locale. |
-| **Transformations** | |
-| [Changing Case](wrangler-docs/directives/changing-case.md) | Changes the case of column values |
-| [Cut Character](wrangler-docs/directives/cut-character.md) | Selects parts of a string value |
-| [Set Column](wrangler-docs/directives/set-column.md) | Sets the column value to the result of an expression execution |
-| [Find and Replace](wrangler-docs/directives/find-and-replace.md) | Transforms string column values using a "sed"-like expression |
-| [Index Split](wrangler-docs/directives/index-split.md) | (_Deprecated_) |
-| [Invoke HTTP](wrangler-docs/directives/invoke-http.md) | Invokes an HTTP Service (_Experimental_, potentially slow) |
-| [Quantization](wrangler-docs/directives/quantize.md) | Quantizes a column based on specified ranges |
-| [Regex Group Extractor](wrangler-docs/directives/extract-regex-groups.md) | Extracts the data from a regex group into its own column |
-| [Setting Character Set](wrangler-docs/directives/set-charset.md) | Sets the encoding and then converts the data to a UTF-8 String |
-| [Setting Record Delimiter](wrangler-docs/directives/set-record-delim.md) | Sets the record delimiter |
-| [Split by Separator](wrangler-docs/directives/split-by-separator.md) | Splits a column based on a separator into two columns |
-| [Split Email Address](wrangler-docs/directives/split-email.md) | Splits an email ID into an account and its domain |
-| [Split URL](wrangler-docs/directives/split-url.md) | Splits a URL into its constituents |
-| [Text Distance (Fuzzy String Match)](wrangler-docs/directives/text-distance.md) | Measures the difference between two sequences of characters |
-| [Text Metric (Fuzzy String Match)](wrangler-docs/directives/text-metric.md) | Measures the difference between two sequences of characters |
-| [URL Decode](wrangler-docs/directives/url-decode.md) | Decodes from the `application/x-www-form-urlencoded` MIME format |
-| [URL Encode](wrangler-docs/directives/url-encode.md) | Encodes to the `application/x-www-form-urlencoded` MIME format |
-| [Trim](wrangler-docs/directives/trim.md) | Functions for trimming white spaces around string data |
-| **Encoders and Decoders** | |
-| [Decode](wrangler-docs/directives/decode.md) | Decodes a column value as one of `base32`, `base64`, or `hex` |
-| [Encode](wrangler-docs/directives/encode.md) | Encodes a column value as one of `base32`, `base64`, or `hex` |
-| **Unique ID** | |
-| [UUID Generation](wrangler-docs/directives/generate-uuid.md) | Generates a universally unique identifier (UUID) .Recommended to use with Wrangler version 4.4.0 and above due to an important bug fix [CDAP-17732](https://cdap.atlassian.net/browse/CDAP-17732) |
-| **Date Transformations** | |
-| [Diff Date](wrangler-docs/directives/diff-date.md) | Calculates the difference between two dates |
-| [Format Date](wrangler-docs/directives/format-date.md) | Custom patterns for date-time formatting |
-| [Format Unix Timestamp](wrangler-docs/directives/format-unix-timestamp.md) | Formats a UNIX timestamp as a date |
-| **DateTime Transformations** | |
-| [Current DateTime](wrangler-docs/directives/current-datetime.md) | Generates the current datetime using the given zone or UTC by default|
-| [Datetime To Timestamp](wrangler-docs/directives/datetime-to-timestamp.md) | Converts a datetime value to timestamp with the given zone |
-| [Format Datetime](wrangler-docs/directives/format-datetime.md) | Formats a datetime value to custom date time pattern strings |
-| [Timestamp To Datetime](wrangler-docs/directives/timestamp-to-datetime.md) | Converts a timestamp value to datetime |
-| **Lookups** | |
-| [Catalog Lookup](wrangler-docs/directives/catalog-lookup.md) | Static catalog lookup of ICD-9, ICD-10-2016, ICD-10-2017 codes |
-| [Table Lookup](wrangler-docs/directives/table-lookup.md) | Performs lookups into Table datasets |
-| **Hashing & Masking** | |
-| [Message Digest or Hash](wrangler-docs/directives/hash.md) | Generates a message digest |
-| [Mask Number](wrangler-docs/directives/mask-number.md) | Applies substitution masking on the column values |
-| [Mask Shuffle](wrangler-docs/directives/mask-shuffle.md) | Applies shuffle masking on the column values |
-| **Row Operations** | |
-| [Filter Row if Matched](wrangler-docs/directives/filter-row-if-matched.md) | Filters rows that match a pattern for a column |
-| [Filter Row if True](wrangler-docs/directives/filter-row-if-true.md) | Filters rows if the condition is true. |
-| [Filter Row Empty of Null](wrangler-docs/directives/filter-empty-or-null.md) | Filters rows that are empty of null. |
-| [Flatten](wrangler-docs/directives/flatten.md) | Separates the elements in a repeated field |
-| [Fail on condition](wrangler-docs/directives/fail.md) | Fails processing when the condition is evaluated to true. |
-| [Send to Error](wrangler-docs/directives/send-to-error.md) | Filtering of records to an error collector |
-| [Send to Error And Continue](wrangler-docs/directives/send-to-error-and-continue.md) | Filtering of records to an error collector and continues processing |
-| [Split to Rows](wrangler-docs/directives/split-to-rows.md) | Splits based on a separator into multiple records |
-| **Column Operations** | |
-| [Change Column Case](wrangler-docs/directives/change-column-case.md) | Changes column names to either lowercase or uppercase |
-| [Changing Case](wrangler-docs/directives/changing-case.md) | Change the case of column values |
-| [Cleanse Column Names](wrangler-docs/directives/cleanse-column-names.md) | Sanatizes column names, following specific rules |
-| [Columns Replace](wrangler-docs/directives/columns-replace.md) | Alters column names in bulk |
-| [Copy](wrangler-docs/directives/copy.md) | Copies values from a source column into a destination column |
-| [Drop Column](wrangler-docs/directives/drop.md) | Drops a column in a record |
-| [Fill Null or Empty Columns](wrangler-docs/directives/fill-null-or-empty.md) | Fills column value with a fixed value if null or empty |
-| [Keep Columns](wrangler-docs/directives/keep.md) | Keeps specified columns from the record |
-| [Merge Columns](wrangler-docs/directives/merge.md) | Merges two columns by inserting a third column |
-| [Rename Column](wrangler-docs/directives/rename.md) | Renames an existing column in the record |
-| [Set Column Header](wrangler-docs/directives/set-headers.md) | Sets the names of columns, in the order they are specified |
-| [Split to Columns](wrangler-docs/directives/split-to-columns.md) | Splits a column based on a separator into multiple columns |
-| [Swap Columns](wrangler-docs/directives/swap.md) | Swaps column names of two columns |
-| [Set Column Data Type](wrangler-docs/directives/set-type.md) | Convert data type of a column |
-| **NLP** | |
-| [Stemming Tokenized Words](wrangler-docs/directives/stemming.md) | Applies the Porter stemmer algorithm for English words |
-| **Transient Aggregators & Setters** | |
-| [Increment Variable](wrangler-docs/directives/increment-variable.md) | Increments a transient variable with a record of processing. |
-| [Set Variable](wrangler-docs/directives/set-variable.md) | Sets a transient variable with a record of processing. |
-| **Functions** | |
-| [Data Quality](wrangler-docs/functions/dq-functions.md) | Data quality check functions. Checks for date, time, etc. |
-| [Date Manipulations](wrangler-docs/functions/date-functions.md) | Functions that can manipulate date |
-| [DDL](wrangler-docs/functions/ddl-functions.md) | Functions that can manipulate definition of data |
-| [JSON](wrangler-docs/functions/json-functions.md) | Functions that can be useful in transforming your data |
-| [Types](wrangler-docs/functions/type-functions.md) | Functions for detecting the type of data |
-
-## Performance
-
-Initial performance tests show that with a set of directives of high complexity for
-transforming data, *DataPrep* is able to process at about ~106K records per second. The
-rates below are specified as *records/second*.
-
-| Directive Complexity | Column Count | Records | Size | Mean Rate |
-| -------------------- | :----------: | ---------: | -------------: | --------: |
-| High (167 Directives) | 426 | 127,946,398 | 82,677,845,324 | 106,367.27 |
-| High (167 Directives) | 426 | 511,785,592 | 330,711,381,296 | 105,768.93 |
-
-
-## Contact
-
-### Mailing Lists
-
-CDAP User Group and Development Discussions:
-
-* [cdap-user@googlegroups.com](https://groups.google.com/d/forum/cdap-user)
-
-The *cdap-user* mailing list is primarily for users using the product to develop
-applications or building plugins for appplications. You can expect questions from
-users, release announcements, and any other discussions that we think will be helpful
-to the users.
-
-### IRC Channel
-
-CDAP IRC Channel: [#cdap on irc.freenode.net](http://webchat.freenode.net?channels=%23cdap)
-
-### Slack Team
-
-CDAP Users on Slack: [cdap-users team](https://cdap-users.herokuapp.com)
-
-
-## License and Trademarks
-
-Copyright © 2016-2019 Cask Data, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-in compliance with the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the
-License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
-either express or implied. See the License for the specific language governing permissions
-and limitations under the License.
-
-Cask is a trademark of Cask Data, Inc. All rights reserved.
-
-Apache, Apache HBase, and HBase are trademarks of The Apache Software Foundation. Used with
-permission. No endorsement by The Apache Software Foundation is implied by the use of these marks.
+# ZeoTap Bidirectional ClickHouse & Flat File Data Ingestion Tool
+
+This web application facilitates data ingestion between ClickHouse database and flat files. It supports bidirectional data flow, column selection, and multi-table joins.
+
+## Features
+
+- **Bidirectional Data Flow:**
+ - ClickHouse -> Flat File
+ - Flat File -> ClickHouse
+
+- **ClickHouse Integration:**
+ - Connect using Host, Port, Database, User
+ - JWT Token-based authentication
+ - Support for both HTTP and HTTPS connections
+
+- **Flat File Integration:**
+ - Support for CSV files
+ - Configurable delimiters
+ - Header detection
+
+- **Schema Discovery & Column Selection:**
+ - View available tables and columns
+ - Select specific columns for ingestion
+ - Preserve data types
+
+- **Multi-Table Join (Bonus Feature):**
+ - Select multiple tables
+ - Specify JOIN conditions
+ - Combined data export
+
+- **Additional Features:**
+ - Data preview before ingestion
+ - Record count reporting
+ - Error handling
+
+## Project Structure
+
+The project is divided into two main parts:
+
+1. **Backend (Java + Spring Boot):**
+ - REST API for data ingestion operations
+ - ClickHouse connectivity using JDBC
+ - CSV file handling
+ - Multi-table JOIN support
+
+2. **Frontend (Next.js + React):**
+ - User interface for configuring connections
+ - Table/column selection interface
+ - Data preview functionality
+ - Progress reporting
+
+## Setup and Installation
+
+### Prerequisites
+
+- Java 17+
+- Node.js 16+
+- npm 8+
+- ClickHouse database (local or remote)
+
+### Backend Setup
+
+1. Navigate to the backend directory:
+ ```bash
+ cd backend
+ ```
+
+2. Build the project:
+ ```bash
+ ./mvnw clean package
+ ```
+
+3. Run the application:
+ ```bash
+ java -jar target/data-ingestion-tool-0.0.1-SNAPSHOT.jar
+ ```
+
+### Frontend Setup
+
+1. Navigate to the frontend directory:
+ ```bash
+ cd frontend
+ ```
+
+2. Install dependencies:
+ ```bash
+ npm install
+ ```
+
+3. Run the development server:
+ ```bash
+ npm run dev
+ ```
+
+4. Access the application at http://localhost:3000
+
+## Usage Guide
+
+1. **Select Source and Target:**
+ - Choose between ClickHouse → Flat File or Flat File → ClickHouse
+
+2. **Configure Source:**
+ - For ClickHouse: Provide connection details and JWT token
+ - For Flat File: Upload a CSV file and configure delimiter settings
+
+3. **Configure Target:**
+ - For ClickHouse: Provide connection details
+ - For Flat File: Specify file name and delimiter preferences
+
+4. **Select Columns:**
+ - Choose specific tables and columns to include
+ - For multi-table ingestion, configure JOIN conditions
+
+5. **Preview Data:**
+ - Review a sample of the data before proceeding
+
+6. **Start Ingestion:**
+ - Begin the data transfer process
+ - View progress and completion status
+
+7. **Download Results:**
+ - For Flat File targets, download the generated file
+ - See total record count and processing summary
+
+## Testing
+
+The application can be tested with:
+
+- ClickHouse example datasets (`uk_price_paid`, `ontime`)
+- Any CSV file with proper formatting
+
+## Security Considerations
+
+- JWT tokens are used for secure authentication with ClickHouse
+- File uploads are validated for security
+- No sensitive information is logged
+
+## License
+
+This project is proprietary and confidential.
+
+Copyright © 2025 ZeoTap. All rights reserved.
\ No newline at end of file
diff --git a/SECURITY.md b/SECURITY.md
deleted file mode 100644
index c13abd11a..000000000
--- a/SECURITY.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# Security Policy
-
-## Reporting a Vulnerability
-
-To report a security issue, please use [https://g.co/vulnz](https://g.co/vulnz).
-We use g.co/vulnz for our intake, and do coordination and disclosure here on
-GitHub (including using GitHub Security Advisory). The Google Security Team will
-respond within 5 working days of your report on g.co/vulnz.
diff --git a/backend/pom.xml b/backend/pom.xml
new file mode 100644
index 000000000..96fe07634
--- /dev/null
+++ b/backend/pom.xml
@@ -0,0 +1,102 @@
+
+
+ 4.0.0
+
+ org.springframework.boot
+ spring-boot-starter-parent
+ 3.2.5
+
+
+ com.zeotap
+ data-ingestion-tool
+ 0.0.1-SNAPSHOT
+ data-ingestion-tool
+ Bidirectional ClickHouse & Flat File Data Ingestion Tool
+
+
+ 17
+ 0.4.6
+
+
+
+
+ org.springframework.boot
+ spring-boot-starter-web
+
+
+ org.springframework.boot
+ spring-boot-starter-validation
+
+
+
+
+ com.clickhouse
+ clickhouse-jdbc
+ ${clickhouse.jdbc.version}
+ http
+
+
+
+
+ commons-io
+ commons-io
+ 2.11.0
+
+
+
+
+ com.opencsv
+ opencsv
+ 5.7.1
+
+
+
+
+ io.jsonwebtoken
+ jjwt-api
+ 0.11.5
+
+
+ io.jsonwebtoken
+ jjwt-impl
+ 0.11.5
+ runtime
+
+
+ io.jsonwebtoken
+ jjwt-jackson
+ 0.11.5
+ runtime
+
+
+
+ org.projectlombok
+ lombok
+ true
+
+
+
+ org.springframework.boot
+ spring-boot-starter-test
+ test
+
+
+
+
+
+
+ org.springframework.boot
+ spring-boot-maven-plugin
+
+
+
+ org.projectlombok
+ lombok
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/backend/src/main/java/com/zeotap/dataingestion/DataIngestionApplication.java b/backend/src/main/java/com/zeotap/dataingestion/DataIngestionApplication.java
new file mode 100644
index 000000000..43f39f0ab
--- /dev/null
+++ b/backend/src/main/java/com/zeotap/dataingestion/DataIngestionApplication.java
@@ -0,0 +1,29 @@
+package com.zeotap.dataingestion;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+import org.springframework.context.annotation.Bean;
+import org.springframework.web.servlet.config.annotation.CorsRegistry;
+import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
+
+@SpringBootApplication
+public class DataIngestionApplication {
+
+ public static void main(String[] args) {
+ SpringApplication.run(DataIngestionApplication.class, args);
+ }
+
+ @Bean
+ public WebMvcConfigurer corsConfigurer() {
+ return new WebMvcConfigurer() {
+ @Override
+ public void addCorsMappings(CorsRegistry registry) {
+ // Allow frontend to access the API
+ registry.addMapping("/api/**")
+ .allowedOrigins("http://localhost:3000")
+ .allowedMethods("GET", "POST", "PUT", "DELETE")
+ .allowCredentials(true);
+ }
+ };
+ }
+}
\ No newline at end of file
diff --git a/backend/src/main/java/com/zeotap/dataingestion/controller/ClickHouseController.java b/backend/src/main/java/com/zeotap/dataingestion/controller/ClickHouseController.java
new file mode 100644
index 000000000..88ae703a9
--- /dev/null
+++ b/backend/src/main/java/com/zeotap/dataingestion/controller/ClickHouseController.java
@@ -0,0 +1,52 @@
+package com.zeotap.dataingestion.controller;
+
+import com.zeotap.dataingestion.model.ClickHouseConfig;
+import com.zeotap.dataingestion.model.TableInfo;
+import com.zeotap.dataingestion.service.ClickHouseService;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.PostMapping;
+import org.springframework.web.bind.annotation.RequestBody;
+import org.springframework.web.bind.annotation.RequestMapping;
+import org.springframework.web.bind.annotation.RestController;
+
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@RestController
+@RequestMapping("/api/clickhouse")
+@RequiredArgsConstructor
+@Slf4j
+public class ClickHouseController {
+
+ private final ClickHouseService clickHouseService;
+
+ @PostMapping("/test-connection")
+ public ResponseEntity