Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
565bc64
feat: adding fusion releasor and kind cluster setup
hash-data Feb 11, 2026
4c014ee
fix: makefile cluster name
hash-data Feb 11, 2026
809e73b
chore: testing new release after change in postgres init
hash-data Feb 14, 2026
c26cc23
Merge branch 'master' of https://github.com/datazip-inc/olake-amoro i…
hash-data Feb 14, 2026
216e3cf
chore: branch tag
hash-data Feb 14, 2026
8b4a6c2
voila: working perfectly
hash-data Feb 14, 2026
c693d2b
fix: upgrading version of java scala and spark
hash-data Mar 5, 2026
40aa4ad
chore: add licence comments
hash-data Mar 5, 2026
731d88f
chore: changes asked by Badal and some licence updates
hash-data Mar 5, 2026
1b2af45
chore: try building from org secrets
hash-data Mar 5, 2026
a3fb3a7
Merge branch 'staging' of https://github.com/datazip-inc/olake-amoro …
hash-data Mar 5, 2026
cafdf1f
chore: updating licence text
hash-data Mar 5, 2026
b4b0ae9
chore: update regex
hash-data Mar 5, 2026
662354a
feat: debug mode and major interval configurations (#13)
hash-data Mar 12, 2026
ae65e72
chore: release dev-v2 and header check
hash-data Mar 12, 2026
49e6041
fix: licence and build image
hash-data Mar 12, 2026
9a04ea3
chore: remove image push
hash-data Mar 12, 2026
b1cef9c
chore: build correct images
hash-data Mar 13, 2026
2980bb8
chore: removing build tag, prev commit fixed all terminal and optimiz…
hash-data Mar 13, 2026
953f295
fix: added fix for parquet decoding error (#17)
shubham19may Mar 21, 2026
e817cf0
chore: fixing header check as well as adding test check back
hash-data Mar 22, 2026
c9a6498
chore: resolving schitiz comment on adding a ip which avoid collision
hash-data Mar 22, 2026
2bec72e
chore: remove cluster role binding
hash-data Mar 23, 2026
d8549fb
chore: resolve badal comment
hash-data Mar 23, 2026
70aa936
chore: move folder to local-test
hash-data Mar 23, 2026
c8aaff3
chore: add todo
hash-data Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 55 additions & 142 deletions .github/workflows/docker-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,39 +14,46 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Modified by Datazip Inc. in 2026


# This workflow will build docker images when commit merged or pushed to master.
# or tags pushed.
# Build and publish Docker images. Triggered by push (dev-v3) or workflow_call with tag (e.g. release).

name: Publish Docker Image

on:
push:
branches:
- "master"
tags:
- "v*"
workflow_call:
inputs:
tag:
description: 'Version tag for the image (e.g. v1.0.0).'
type: string
required: true
# Use org-level or repo-level secrets; caller must pass with secrets: inherit
secrets:
DOCKER_USERNAME:
description: 'Docker Hub username (org or repo secret)'
required: true
DOCKER_PASSWORD:
description: 'Docker Hub password or token (org or repo secret)'
required: true


concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
docker-amoro:
name: Push Amoro Docker Image to Docker Hub
docker-fusion:
name: Push Fusion Docker Image
runs-on: ubuntu-latest
if: ${{ startsWith(github.repository, 'apache/') }}
strategy:
matrix:
hadoop: [ "v2", "v3" ]
environment: docker publish
if: ${{ startsWith(github.repository, 'datazip-inc/') }}
steps:
- uses: actions/checkout@v3
- name: Set up JDK 11
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: '11'
java-version: '17'
distribution: 'temurin'
cache: maven
check-latest: false
Expand All @@ -56,40 +63,30 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Set up Docker tags
uses: docker/metadata-action@v5
- name: Set Docker tags
id: meta
with:
flavor: |
latest=false
images: |
name=apache/amoro
tags: |
type=ref,event=branch,enable=${{ matrix.hadoop == 'v3' }},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.hadoop == 'v2' }},suffix=-snapshot-hadoop2
type=raw,enable=${{ matrix.hadoop == 'v3' && startsWith(github.ref, 'refs/tags/v') }},value=latest
type=semver,event=tag,enable=${{ matrix.hadoop == 'v3' }},pattern={{version}}
type=semver,event=tag,enable=${{ matrix.hadoop == 'v3' }},pattern={{version}}, suffix=-hadoop3
type=semver,event=tag,enable=${{ matrix.hadoop == 'v2' }},pattern={{version}}, suffix=-hadoop2
run: |
VERSION_TAG="${{ github.event_name == 'workflow_call' && inputs.tag || 'dev-v3' }}"
if [ "${{ github.ref }}" = "refs/heads/master" ]; then
case "${VERSION_TAG}" in v*) ;; *) echo "::error::On master branch, version tag must start with 'v' (e.g. v1.0.0). Got: ${VERSION_TAG}"; exit 1 ;; esac
echo "tags=olakego/fusion:latest,olakego/fusion:${VERSION_TAG}" >> $GITHUB_OUTPUT
else
echo "tags=olakego/fusion:latest-${VERSION_TAG},olakego/fusion:${VERSION_TAG}" >> $GITHUB_OUTPUT
fi

- name: Print tags
run: echo '${{ steps.meta.outputs.tags }}'

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set Maven Build Properties
if: ${{ matrix.hadoop == 'v2' }}
run: |
echo "MVN_HADOOP=-Phadoop2" >> $GITHUB_ENV
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Build dist module with Maven
run: ./mvnw clean package -pl 'dist' -am -e ${MVN_HADOOP} -DskipTests -B -ntp -Psupport-all-formats -Pno-extended-disk-storage -Pno-plugin-bin
run: ./mvnw clean package -pl 'dist' -am -e -DskipTests -B -ntp -Psupport-all-formats -Pno-extended-disk-storage -Pno-plugin-bin

- name: Build and Push Amoro Docker Image
- name: Build and Push Fusion Docker Image to olakego/fusion
uses: docker/build-push-action@v4
with:
context: .
Expand All @@ -98,19 +95,21 @@ jobs:
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}

docker-optimizer-flink:
name: Push Amoro Optimizer-Flink Docker Image to Docker Hub
docker-optimizer-spark:
name: Push Fusion Spark Optimizer Docker Image to olakego/fusion-spark
runs-on: ubuntu-latest
if: ${{ startsWith(github.repository, 'apache/') }}
environment: docker publish
if: ${{ startsWith(github.repository, 'datazip-inc/') }}
strategy:
matrix:
flink: [ "1.14.6", "1.20.0" ]
spark: [ "3.5.8" ] # spark version supported
scala: [ "2.13.18" ] # scala version supported
steps:
- uses: actions/checkout@v3
- name: Set up JDK 11
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
java-version: '11'
java-version: '17'
distribution: 'temurin'
cache: maven
check-latest: false
Expand All @@ -120,112 +119,25 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Set up Docker tags
uses: docker/metadata-action@v5
- name: Set Docker tags
id: meta
with:
flavor: |
latest=false
images: |
name=apache/amoro-flink-optimizer
tags: |
type=ref,event=branch,enable=${{ matrix.flink == '1.14.6' }},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.flink == '1.14.6' }},suffix=-snapshot-flink1.14
type=ref,event=branch,enable=${{ matrix.flink == '1.20.0' }},suffix=-snapshot-flink1.20
type=raw,enable=${{ matrix.hadoop == '1.14.6' && startsWith(github.ref, 'refs/tags/v') }},value=latest
type=semver,enable=${{ matrix.flink == '1.14.6' }},pattern={{version}}
type=semver,enable=${{ matrix.flink == '1.14.6' }},pattern={{version}}, suffix=-flink1.14
type=semver,enable=${{ matrix.flink == '1.20.0' }},pattern={{version}}, suffix=-flink1.20

- name: Print tags
run: echo '${{ steps.meta.outputs.tags }}'

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set optimizer flink version
run: |
OPTIMIZER_FLINK=${{ matrix.flink }} && \
echo "OPTIMIZER_FLINK=-Dflink-optimizer.flink-version${OPTIMIZER_FLINK}" >> $GITHUB_ENV
if [[ "$OPTIMIZER_FLINK" < "1.15" ]]; then
echo "Adding -Pflink-optimizer-pre-1.15 for Flink version less than 1.15"
echo "OPTIMIZER_FLINK=-Pflink-optimizer-pre-1.15 -Dflink-optimizer.flink-version=${OPTIMIZER_FLINK}" >> $GITHUB_ENV
VERSION_TAG="${{ github.event_name == 'workflow_call' && inputs.tag || 'dev-v3' }}"
if [ "${{ github.ref }}" = "refs/heads/master" ]; then
case "${VERSION_TAG}" in v*) ;; *) echo "::error::On master branch, version tag must start with 'v' (e.g. v1.0.0). Got: ${VERSION_TAG}"; exit 1 ;; esac
echo "tags=olakego/fusion-spark:latest,olakego/fusion-spark:${VERSION_TAG}" >> $GITHUB_OUTPUT
else
echo "tags=olakego/fusion-spark:latest-${VERSION_TAG},olakego/fusion-spark:${VERSION_TAG}" >> $GITHUB_OUTPUT
Comment thread
badalprasadsingh marked this conversation as resolved.
fi

- name: Set ENV Amoro version
id: version
run: |
AMORO_VERSION=`cat pom.xml | grep 'amoro-parent' -C 3 | grep -Eo '<version>.*</version>' | awk -F'[><]' '{print $3}'` \
&& echo "$AMORO_VERSION" \
&& echo "AMORO_VERSION=${AMORO_VERSION}" >> $GITHUB_ENV \
&& echo "AMORO_VERSION=${AMORO_VERSION}" >> $GITHUB_OUTPUT

- name: Build optimizer module with Maven
run: ./mvnw clean package -pl 'amoro-optimizer/amoro-optimizer-flink' -am -e ${OPTIMIZER_FLINK} -DskipTests -B -ntp

- name: Build and Push Flink Optimizer Docker Image
uses: docker/build-push-action@v4
env:
AMORO_VERSION: ${{ steps.version.outputs.AMORO_VERSION }}
with:
context: .
push: true
file: docker/optimizer-flink/Dockerfile
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
FLINK_VERSION=${{ matrix.flink }}
OPTIMIZER_JOB=amoro-optimizer/amoro-optimizer-flink/target/amoro-optimizer-flink-${{ env.AMORO_VERSION }}-jar-with-dependencies.jar

docker-optimizer-spark:
name: Push Amoro Optimizer-Spark Docker Image to Docker Hub
runs-on: ubuntu-latest
if: ${{ startsWith(github.repository, 'apache/') }}
strategy:
matrix:
spark: [ "3.5.7" ]
scala: [ "2.12.15" ]
steps:
- uses: actions/checkout@v3
- name: Set up JDK 11
uses: actions/setup-java@v3
with:
java-version: '11'
distribution: 'temurin'
cache: maven
check-latest: false
- name: Set up QEMU
uses: docker/setup-qemu-action@v2

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2

- name: Set up Docker tags
uses: docker/metadata-action@v5
id: meta
with:
flavor: |
latest=false
images: |
name=apache/amoro-spark-optimizer
tags: |
type=ref,event=branch,enable=${{ matrix.spark == '3.5.7' }},suffix=-snapshot
type=ref,event=branch,enable=${{ matrix.spark == '3.5.7' }},suffix=-snapshot-spark3.5
type=raw,enable=${{ matrix.hadoop == '3.5.7' && startsWith(github.ref, 'refs/tags/v') }},value=latest
type=semver,enable=${{ matrix.spark == '3.5.7' }},pattern={{version}}
type=semver,enable=${{ matrix.spark == '3.5.7' }},pattern={{version}}, suffix=-spark3.5

- name: Print tags
run: echo '${{ steps.meta.outputs.tags }}'

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Set optimizer spark version and extract versions
id: versions
Expand All @@ -245,9 +157,9 @@ jobs:
&& echo "AMORO_VERSION=${AMORO_VERSION}" >> $GITHUB_OUTPUT

- name: Build optimizer module with Maven
run: ./mvnw clean package -pl 'amoro-optimizer/amoro-optimizer-spark' -am -e ${SPARK_VERSION} -DskipTests -B -ntp
run: ./mvnw clean package -pl 'amoro-optimizer/amoro-optimizer-spark' -am -e ${SPARK_VERSION} -Dscala.version=${{ matrix.scala }} -Dscala.binary.version=${{ steps.versions.outputs.SCALA_BINARY_VERSION }} -DskipTests -B -ntp

- name: Build and Push Spark Optimizer Docker Image
- name: Build and Push Spark Optimizer Docker Image to olakego/fusion-spark
uses: docker/build-push-action@v4
env:
AMORO_VERSION: ${{ steps.version.outputs.AMORO_VERSION }}
Expand All @@ -261,6 +173,7 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
build-args: |
SPARK_VERSION=${{ matrix.spark }}
SPARK_JAVA_TAG=${{ matrix.spark }}-java17
OPTIMIZER_JOB=amoro-optimizer/amoro-optimizer-spark/target/amoro-optimizer-spark-${{ env.SPARK_MAJOR_VERSION}}_${{ env.SCALA_BINARY_VERSION}}-${{ env.AMORO_VERSION }}-jar-with-dependencies.jar


72 changes: 72 additions & 0 deletions .github/workflows/draft-changelog.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Modified by Datazip Inc. in 2026

name: Draft Releaser From Master

on:
pull_request:
types: [closed]
branches:
- master

jobs:
create_draft_release:
if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'master' && github.event.pull_request.head.ref == 'staging'
name: Create Draft Release
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Get latest release
id: latest-release
run: |
latest_tag=$(git tag -l | sort -V | tail -1)
echo "LATEST_TAG=$latest_tag" >> $GITHUB_ENV

- name: Generate next version
id: generate-next-version
run: |
if [ -z "$LATEST_TAG" ]; then
next_version="v0.0.0"
else
# Remove 'v' prefix and split version into array
version=${LATEST_TAG#v}
IFS='.' read -ra VERSION_PARTS <<< "$version"

# Increment the last number (patch version)
VERSION_PARTS[2]=$((VERSION_PARTS[2] + 1))

# Reconstruct the version with 'v' prefix
next_version="v${VERSION_PARTS[0]}.${VERSION_PARTS[1]}.${VERSION_PARTS[2]}"
fi
echo "NEXT_VERSION=$next_version" >> $GITHUB_ENV

- name: Create draft release
id: create-draft-release
uses: ncipollo/release-action@v1
with:
tag: ${{ env.NEXT_VERSION }}
generateReleaseNotes: true
draft: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
12 changes: 8 additions & 4 deletions .github/workflows/modification-header-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# limitations under the License.
#

# Modified by Datazip Pvt. Ltd. in 2026
# Modified by Datazip Inc. in 2026
# Original work Copyright The Apache Software Foundation (ASF)

name: Modification-Header-Check
Expand All @@ -42,15 +42,19 @@ jobs:

BASE_BRANCH=${{ github.base_ref }}
CHANGED_FILES=$(git diff --name-only origin/$BASE_BRANCH...HEAD)
DATAZIP_MODIFICATION_YEAR_REGEX="Modified by Datazip Pvt\. Ltd\. in [0-9]{4}"
ASF_ORIGINAL_WORK_NOTICE_REGEX="Original work Copyright The Apache Software Foundation \\(ASF\\)"
DATAZIP_MODIFICATION_YEAR_REGEX="Modified by Datazip Inc\. in [0-9]{4}"
NOT_MODIFIED_FILES=""
LICENSE_REGEX="Licensed to the Apache Software Foundation|Apache License|SPDX-License-Identifier"
for file in $CHANGED_FILES; do
if [[ ! -f "$file" ]]; then
continue
fi

if ! head -40 "$file" | grep -q -E "$DATAZIP_MODIFICATION_YEAR_REGEX" || ! head -40 "$file" | grep -q -E "$ASF_ORIGINAL_WORK_NOTICE_REGEX"; then
if ! head -40 "$file" | grep -q -E "$LICENSE_REGEX"; then
continue
fi

if ! head -40 "$file" | grep -q -E "$DATAZIP_MODIFICATION_YEAR_REGEX"; then
NOT_MODIFIED_FILES+="$file"$'\n'
fi
done
Expand Down
Loading
Loading