diff --git a/Labs/Airflow_Labs/Lab_1/Dockerfile b/Labs/Airflow_Labs/Lab_1/Dockerfile new file mode 100644 index 000000000..1f757723b --- /dev/null +++ b/Labs/Airflow_Labs/Lab_1/Dockerfile @@ -0,0 +1,18 @@ +FROM apache/airflow:2.5.1-python3.7 + +# Install as airflow user (not root) +USER airflow + +# Install packages with --user flag to ensure they go to the right location +RUN pip install --user \ + pandas \ + scikit-learn \ + numpy \ + matplotlib \ + seaborn + # Any additional packages that might be needed. + +# Verify installations +RUN python -c "import pandas; print('pandas:', pandas.__version__)" && \ + python -c "import sklearn; print('sklearn:', sklearn.__version__)" && \ + python -c "import numpy; print('numpy:', numpy.__version__)" \ No newline at end of file diff --git a/Labs/Airflow_Labs/Lab_1/README.md b/Labs/Airflow_Labs/Lab_1/README.md index 6defefba0..4395b98fd 100755 --- a/Labs/Airflow_Labs/Lab_1/README.md +++ b/Labs/Airflow_Labs/Lab_1/README.md @@ -125,7 +125,11 @@ Cloud AIRFLOW__CORE__LOAD_EXAMPLES: 'false' # Additional python package - _PIP_ADDITIONAL_REQUIREMENTS: ${_PIP_ADDITIONAL_REQUIREMENTS:- pandas } + # 1. edit the Dockerfile with the packages of your interest + # 2. Replace image: ${AIRFLOW_IMAGE_NAME:-apache/airflow:3.1.0} with + image: my-airflow:3.1.0 + # 3. build your image + docker build --no-cache -t my-airflow:3.1.0 # Output dir - ${AIRFLOW_PROJ_DIR:-.}/working_data:/opt/airflow/working_data