Create unified script and workflow for llama-fast models validationin

guangy10 · guangy10 · commit 7d1ccb2a8274 · 2024-04-10T21:16:18.000-07:00
diff --git a/.ci/scripts/convert_checkpoint.sh b/.ci/scripts/convert_checkpoint.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+set -eu
+
+function convert_checkpoint() {
+    local MODEL_REPO="$1"
+    local CHECKPOINT_NAME="${MODEL_REPO##*/}"
+
+    if [[ $CHECKPOINT_NAME == *"stories15M"* || $CHECKPOINT_NAME == *"stories42M"* || $CHECKPOINT_NAME == *"stories110M"* ]]; then
+        # We need this to make the workflow unique for all models because convert_hf_checkpoint will always convert the checkpoint to model.pth
+        pushd "checkpoints/${MODEL_REPO}"
+        if [ ! -f "model.pth" ]; then
+            mv "$CHECKPOINT_NAME.pt" "model.pth"
+        fi
+        popd
+        return 0
+    fi
+
+    if [ -f "checkpoints/$MODEL_REPO/model.pth" ]; then
+        echo "Converted checkpoint already exists. Skipping conversion for $MODEL_REPO."
+        return 0
+    fi
+    echo "Convert Huggingface checkpoint for $MODEL_REPO"
+    python scripts/convert_hf_checkpoint.py --checkpoint-dir "checkpoints/$MODEL_REPO"
+}
+
+
+convert_checkpoint $1
diff --git a/.ci/scripts/gather_test_models.py b/.ci/scripts/gather_test_models.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import itertools
+import json
+import os
+from typing import Any
+
+
+MODEL_REPOS = {
+    "tinyllamas/stories15M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin",
+    # "tinyllamas/stories42M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories42M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin",
+    "tinyllamas/stories110M": "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt,https://github.com/karpathy/llama2.c/raw/master/tokenizer.model,https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin",
+}
+
+JOB_RUNNERS = {
+    "32-core-ubuntu": "linux x86",
+    # "macos-13": "macos x86", # not working for ExecuTorch yet
+    "macos-14": "macos M1",
+}
+
+
+def set_output(name: str, val: Any) -> None:
+    """
+    Set the GitHb output so that it can be accessed by other jobs
+    """
+    print(f"Setting {val} to GitHub output")
+
+    if os.getenv("GITHUB_OUTPUT"):
+        with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
+            print(f"{name}={val}", file=env)
+    else:
+        print(f"::set-output name={name}::{val}")
+
+
+def export_models_for_ci() -> dict[str, dict]:
+    """
+    This gathers all the models that we want to test on GitHub OSS CI
+    """
+
+    # This is the JSON syntax for configuration matrix used by GitHub
+    # https://docs.github.com/en/actions/using-jobs/using-a-matrix-for-your-jobs
+    models = {"include": []}
+
+    for repo_name, runner in itertools.product(
+        MODEL_REPOS.keys(),
+        JOB_RUNNERS.keys(),
+    ):
+        record = {
+            "repo_name": repo_name,
+            "resources": MODEL_REPOS[repo_name],
+            "runner": runner,
+            "platform": JOB_RUNNERS[runner],
+            "timeout": 90,
+        }
+
+        models["include"].append(record)
+
+    set_output("models", json.dumps(models))
+
+
+if __name__ == "__main__":
+    export_models_for_ci()
diff --git a/.ci/scripts/validate.sh b/.ci/scripts/validate.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+set -u
+
+function generate_eager_model_output() {
+    local CHECKPOINT_PATH="$1"
+    local TARGET_DEVICE="${2:-cpu}"
+    local MODEL_DIR="${CHECKPOINT_PATH%/*}"
+    local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
+    echo "Run inference with eager model for $MODEL_NAME"
+    python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_eager"
+    cat "$MODEL_DIR/output_eager"
+}
+
+function generate_compiled_model_output() {
+    local CHECKPOINT_PATH="$1"
+    local TARGET_DEVICE="${2:-cpu}"
+    local MODEL_DIR="${CHECKPOINT_PATH%/*}"
+    local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
+    echo ""############### Run inference with torch.compile for $MODEL_NAME "###############"
+    python -W ignore generate.py --compile --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" > "$MODEL_DIR/output_compiled"
+    cat "$MODEL_DIR/output_compiled"
+}
+
+function generate_aoti_model_output() {
+    local CHECKPOINT_PATH="$1"
+    local TARGET_DEVICE="${2:-cpu}"
+    local MODEL_DIR="${CHECKPOINT_PATH%/*}"
+    local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
+    echo ""############### Run inference with AOTInductor for $MODEL_NAME "###############"
+    python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-dso-path "${MODEL_DIR}/${MODEL_NAME}.so" --device "$TARGET_DEVICE"
+    python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --dso-path "$MODEL_DIR/${MODEL_NAME}.so" --prompt "$PROMPT" > "$MODEL_DIR/output_aoti"
+    cat "$MODEL_DIR/output_aoti"
+}
+
+function generate_executorch_model_output() {
+    local CHECKPOINT_PATH="$1"
+    local TARGET_DEVICE="${2:-cpu}"
+    local MODEL_DIR="${CHECKPOINT_PATH%/*}"
+    local MODEL_NAME=$(basename "$CHECKPOINT_PATH" | sed 's/\.[^.]*$//')
+    echo ""############### Run inference with ExecuTorch using XNNPACK for $MODEL_NAME "###############"
+    python -W ignore export.py --checkpoint-path "$CHECKPOINT_PATH" --output-pte-path "$MODEL_DIR/${MODEL_NAME}.pte" -d "fp32"
+    python -W ignore generate.py --checkpoint-path "$CHECKPOINT_PATH" --prompt "$PROMPT" --device "$TARGET_DEVICE" --pte-path "$MODEL_DIR/${MODEL_NAME}.pte" > "$MODEL_DIR/output_et"
+    cat "$MODEL_DIR/output_et"
+}
+
+
+CHECKPOINT_PATH="$1"
+TARGET_DEVICE="${2:-cpu}"
+PROMPT="Hello, my name is"
+
+generate_compiled_model_output $CHECKPOINT_PATH $TARGET_DEVICE
+generate_aoti_model_output $CHECKPOINT_PATH $TARGET_DEVICE
+generate_executorch_model_output $CHECKPOINT_PATH $TARGET_DEVICE
diff --git a/.ci/scripts/wget_checkpoint.sh b/.ci/scripts/wget_checkpoint.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+MODEL_REPO="$1"
+RESOURCES_STRING="$2"
+CHECKPOINT_NAME="${MODEL_REPO##*/}"
+
+pushd "${LLAMA_FAST_ROOT}" || exit
+
+# Create the directory for the checkpoint
+mkdir -p "checkpoints/${MODEL_REPO}"
+cd "checkpoints/${MODEL_REPO}" || exit
+
+# Download all resources
+IFS=',' # Set the field separator to comma
+for resource in $RESOURCES_STRING; do
+  echo "Downloading: $resource"
+  if ! wget "$resource" 2>&1; then
+    echo "Error: Failed to download $resource" >&2
+    exit 1
+  fi
+done
+
+popd || exit
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -0,0 +1,59 @@
+name: pull
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+  gather-models:
+    runs-on: ubuntu-22.04
+    outputs:
+      models: ${{ steps.gather-models.outputs.models }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: 'false'
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Extract the list of models to test
+        id: gather-models
+        run: |
+          set -eux
+          PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py
+  test-cpu:
+    name: test-cpu (${{ matrix.platform }}, ${{ matrix.repo_name }})
+    needs: gather-models
+    strategy:
+      matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
+      fail-fast: false
+    runs-on: ${{ matrix.runner }}
+    env:
+      LLAMA_FAST_ROOT: ${{ github.workspace }}
+      REPO_NAME: ${{ matrix.repo_name }}
+      ENABKE_ET_PYBIND: ${{ matrix.runner == 'macos-14' && 'false' || 'true' }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+      - name: Print machine info
+        run: |
+          echo "$(uname -a)"
+      - name: Install dependencies
+        run: |
+          bash ${LLAMA_FAST_ROOT}/scripts/install_et.sh $ENABKE_ET_PYBIND
+      - name: Download checkpoints
+        run: |
+          bash ${LLAMA_FAST_ROOT}/.ci/scripts/wget_checkpoint.sh ${{ matrix.repo_name }} "${{ matrix.resources }}"
+      - name: Run validation
+        run: |
+          pushd ${LLAMA_FAST_ROOT}
+          export CHECKPOINT_PATH=${LLAMA_FAST_ROOT}/checkpoints/${REPO_NAME}/model.pth
+          bash ${LLAMA_FAST_ROOT}/.ci/scripts/convert_checkpoint.sh ${REPO_NAME}
+          bash ${LLAMA_FAST_ROOT}/.ci/scripts/validate.sh ${CHECKPOINT_PATH}
diff --git a/.github/workflows/runner_et.yml b/.github/workflows/runner_et.yml
@@ -37,8 +37,8 @@ jobs:
           pip install -r requirements.txt
 
           export LLAMA_FAST_ROOT=${PWD}
-          export ET_NO_PYBIND=1
-          ./scripts/install_et.sh
+          export ENABLE_ET_PYBIND=false
+          ./scripts/install_et.sh $ENABLE_ET_PYBIND
           cmake -S ./runner-et -B build/cmake-out -G Ninja
           cmake --build ./build/cmake-out
       - name: Download checkpoints
diff --git a/scripts/install_et.sh b/scripts/install_et.sh
@@ -1,32 +1,63 @@
-cd ${LLAMA_FAST_ROOT}
-echo "Inside: $LLAMA_FAST_ROOT"
-
-echo "Cloning executorch to ${LLAMA_FAST_ROOT}/build/src"
-rm -rf ${LLAMA_FAST_ROOT}/build
-mkdir -p ${LLAMA_FAST_ROOT}/build/src
-cd ${LLAMA_FAST_ROOT}/build/src
-git clone https://github.com/pytorch/executorch.git
-cd executorch
-echo "Install executorch: submodule update"
-git submodule sync
-git submodule update --init
-
-echo "Applying fixes"
-cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/module.cpp ${LLAMA_FAST_ROOT}/build/src/executorch/extension/module/module.cpp # ET uses non-standard C++ that does not compile in GCC
-cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/managed_tensor.h ${LLAMA_FAST_ROOT}/build/src/executorch/extension/runner_util/managed_tensor.h # ET is missing headers for vector/memory.  This causes downstream issues when building runner-et.
-
-echo "Building and installing python libraries"
-if [ -n "${ET_NO_PYBIND}" ]; then
-    echo "Not installing pybind"
-    ./install_requirements.sh
-else
-    echo "Installing pybind"
-    ./install_requirements.sh --pybind xnnpack
-fi
-
-echo "Building and installing C++ libraries"
-echo "Inside: ${PWD}"
-mkdir cmake-out
-cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_OPTIMIZED=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON -DEXECUTORCH_BUILD_XNNPACK=ON -S . -B cmake-out -G Ninja
-cmake --build cmake-out
-cmake --install cmake-out --prefix ${LLAMA_FAST_ROOT}/build/install
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+install_pip_dependencies() {
+  echo "Intalling common pip packages"
+
+  pip install wheel
+  pip install cmake
+  pip install ninja
+  pip install zstd
+  pushd ${LLAMA_FAST_ROOT}
+  pip install -r ./requirements.txt
+  popd
+}
+
+install_executorch() {
+  echo "Cloning executorch to ${LLAMA_FAST_ROOT}/build/src"
+  rm -rf ${LLAMA_FAST_ROOT}/build
+  mkdir -p ${LLAMA_FAST_ROOT}/build/src
+  pushd ${LLAMA_FAST_ROOT}/build/src
+  git clone https://github.com/pytorch/executorch.git
+  cd executorch
+  echo "Install executorch: submodule update"
+  git submodule sync
+  git submodule update --init
+
+  echo "Applying fixes"
+  cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/module.cpp ${LLAMA_FAST_ROOT}/build/src/executorch/extension/module/module.cpp # ET uses non-standard C++ that does not compile in GCC
+  cp ${LLAMA_FAST_ROOT}/scripts/fixes_et/managed_tensor.h ${LLAMA_FAST_ROOT}/build/src/executorch/extension/runner_util/managed_tensor.h # ET is missing headers for vector/memory.  This causes downstream issues when building runner-et.
+
+  echo "Building and installing python libraries"
+  echo "Building and installing python libraries"
+  if [ "${ENABLE_ET_PYBIND}" = false ]; then
+      echo "Not installing pybind"
+      bash ./install_requirements.sh
+  else
+      echo "Installing pybind"
+      bash ./install_requirements.sh --pybind xnnpack
+  fi
+  pip list
+
+  echo "Building and installing C++ libraries"
+  echo "Inside: ${PWD}"
+  mkdir cmake-out
+  cmake -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_BUILD_OPTIMIZED=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON -DEXECUTORCH_BUILD_XNNPACK=ON -S . -B cmake-out -G Ninja
+  cmake --build cmake-out
+  cmake --install cmake-out --prefix ${LLAMA_FAST_ROOT}/build/install
+  popd
+}
+
+
+ENABLE_ET_PYBIND="${1:-true}"
+
+pushd ${LLAMA_FAST_ROOT}
+install_pip_dependencies
+install_executorch $ENABLE_ET_PYBIND
+popd
diff --git a/scripts/workflow.sh b/scripts/workflow.sh