Skip to content

Upgrade Airflow version for custom Image #2

Open
@nileshbhadana

Description

@nileshbhadana

Problem Statement
Upgrade airflow version to latest in custom image.

Dockerfile

# VERSION 1.10.9
# AUTHOR: Matthieu "Puckel_" Roisil
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow

FROM python:3.9-slim-buster
LABEL maintainer="Puckel_"

# Never prompt the user for choices on installation/configuration of packages
ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=2.4.0
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
ENV AIRFLOW_HOME=${AIRFLOW_USER_HOME}

# Define en_US.
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8

COPY requirements.txt /requirements.txt

# Disable noisy "Handling signal" log messages:
# ENV GUNICORN_CMD_ARGS --log-level WARNING
RUN apt-get update && apt-get install -y libpq5 libpq-dev gcc
RUN set -ex \
    && buildDeps=' \
    freetds-dev \
    libkrb5-dev \
    libsasl2-dev \
    libssl-dev \
    libffi-dev \
    libpq-dev \
    git \
    ' \
    && apt-get update -yqq \
    && apt-get upgrade -yqq \
    && apt-get install -yqq --no-install-recommends \
    $buildDeps \
    freetds-bin \
    build-essential \
    default-libmysqlclient-dev \
    apt-utils \
    curl \
    rsync \
    netcat \
    locales \
    && sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
    && locale-gen \
    && update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
    && useradd -ms /bin/bash -d ${AIRFLOW_USER_HOME} airflow \
    && pip install -U pip setuptools wheel \
    && pip install pytz \
    && pip install pyOpenSSL \
    && pip install ndg-httpsclient \
    && pip install pyasn1 \
    && pip install singer-target-postgres \
    && pip install psycopg2==2.8.4 \
    && pip install psycopg2-binary==2.8.4 \
    && pip install flask_bcrypt \
    && pip install sqlalchemy==1.2.0 \
    && pip install SQLAlchemy==1.3.23 \
    && pip install Flask-SQLAlchemy==2.4.4 \
    && pip install hvac \
    && pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
    && pip install 'redis==3.2' \
    && if [ -n "${PYTHON_REQS}" ]; then pip install -r ${PYTHON_REQS}; fi \
    && apt-get purge --auto-remove -yqq $buildDeps \
    && apt-get autoremove -yqq --purge \
    && apt-get clean \
    && rm -rf \
    /var/lib/apt/lists/* \
    /tmp/* \
    /var/tmp/* \
    /usr/share/man \
    /usr/share/doc \
    /usr/share/doc-base
RUN pip install -r requirements.txt
COPY script/entrypoint.sh /entrypoint.sh
COPY config/airflow.cfg ${AIRFLOW_USER_HOME}/airflow.cfg
RUN apt-get autoremove -y gcc
RUN chown -R airflow: ${AIRFLOW_USER_HOME}

# install Java
USER root
RUN echo "deb http://security.debian.org/debian-security stretch/updates main" >> /etc/apt/sources.list                                                   
RUN mkdir -p /usr/share/man/man1 && \
    apt-get update -y && \
    apt-get install -y openjdk-8-jdk

RUN apt-get install unzip -y && \
    apt-get autoremove -y

USER airflow
EXPOSE 8080 5555 8793

USER airflow
WORKDIR ${AIRFLOW_USER_HOME}
ENTRYPOINT ["/entrypoint.sh"]
CMD ["webserver"]

Docker-compose.yaml

version: "2.2"
services:
  redis:
    depends_on:
      - vault-dev
    image: "redis:5.0.5"
    command: redis-server --requirepass redispass

  postgres:
    image: postgres:9.6
    environment:
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - PGDATA=/var/lib/postgresql/data/pgdata
    volumes:
      - ./pgdata:/var/lib/postgresql/data/pgdata

  webserver:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - postgres
      - redis
    environment:
      - LOAD_EX=n
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg      
    ports:
      - "8181:8080"
    command: webserver
    healthcheck:
      test: ["CMD-SHELL", "[ -f /usr/local/airflow/airflow-webserver.pid ]"]
      interval: 30s
      timeout: 30s
      retries: 3

  flower:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - redis
    environment:
      - EXECUTOR=Celery
      - REDIS_PASSWORD=redispass
    volumes:
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg
    ports:
      - "5555:5555"
    command: flower

  scheduler:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - webserver
    volumes:
      - ./dags:/usr/local/airflow/dags
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg
    environment:
      - LOAD_EX=n
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    command: scheduler

  worker:
    build: .
    image: docker.io/nileshbhadana/airflow
    restart: never
    depends_on:
      - scheduler
    volumes:
      - ./dags:/usr/local/airflow/dags
      # Uncomment to include custom plugins
      - ./plugins:/usr/local/airflow/plugins
      - ./requirements.txt:/requirements.txt
      - ./config/airflow.cfg:/usr/local/airflow/airflow.cfg      
    environment:
      - FERNET_KEY=46BKJoQYlPPOexq0OhDZnIlNepKFf87WFwLbfzqDDho=
      - EXECUTOR=Celery
      - POSTGRES_USER=airflow
      - POSTGRES_PASSWORD=airflow
      - POSTGRES_DB=airflow
      - REDIS_PASSWORD=redispass
      - AIRFLOW__SECRETS__BACKEND_KWARGS=${AIRFLOW__SECRETS__BACKEND_KWARGS}
    command: worker

  vault-dev:
    image: vault
    container_name: vault-dev
    command: [ 'vault', 'server', '-config=/vault/config' ]
    environment:
      VAULT_DEV_ROOT_TOKEN_ID: "myroot"
      VAULT_LOCAL_CONFIG: '{"backend": {"file": {"path": "/vault/file"}}, "default_lease_ttl": "168h", "max_lease_ttl": "720h"}'
      VAULT_SUPPLEMENTAL_CONFIG: '{"ui":true, "listener": {"tcp":{"address": "0.0.0.0:8200", "tls_disable": 1}}}'
      VAULT_ADDR: "http://127.0.0.1:8200"
    ports:
      - "8200:8200"
    volumes:
      - ./vault:/vault/file
      - ./unseal:/vault/unseal
      - ./docker-entrypoint.sh:/usr/local/bin/docker-entrypoint.sh
    cap_add:
      - IPC_LOCK

Added the following block in airflow.cfg for hashicorp:

[secrets]
backend = airflow.providers.hashicorp.secrets.vault.VaultBackend
backend_kwargs = {"connections_path": "connections", "variables_path": "variables", "mount_point": "airflow", "url": "http://127.0.0.1:8200", "auth_type": "token", "token": "myroot"}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions