Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ jobs:
bash ./tests/special_tests.sh --mark_type=standalone --filter_pattern='test_f'
displayName: 'Testing: standalone multi-gpu'

- bash: |
. /tmp/venvs/fts_dev/bin/activate
bash ./tests/special_tests.sh --mark_type=exp_patch --filter_pattern='test_f' --experiment_patch_mask="1 0 0 1"
displayName: 'Testing: experimental einsum patch'
# - bash: |
# . /tmp/venvs/fts_dev/bin/activate
# bash ./tests/special_tests.sh --mark_type=exp_patch --filter_pattern='test_f' --experiment_patch_mask="1 0 0 1"
# displayName: 'Testing: Experimental Multi-GPU'

- bash: |
. /tmp/venvs/fts_dev/bin/activate
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ lightning_logs/
# ignore ipynb files themselves since we want to only store the source *.py
*.ipynb

# SQLite database files
*.db

# Test-tube
test_tube_*/

Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ name/pattern-based configuration instead of manually inspecting modules and appl

### Added

- **FSDP Scheduled Fine-Tuning** is now supported! [See the tutorial here.](https://finetuning-scheduler.readthedocs.io/en/stable/advanced/fsdp_scheduled_fine_tuning.html)
- **FSDP Scheduled Fine-Tuning** is now supported! [See the tutorial here.](https://finetuning-scheduler.readthedocs.io/en/stable/distributed/fsdp_scheduled_fine_tuning.html)
- Introduced [``StrategyAdapter``](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.StrategyAdapter)s. If you want to extend Fine-Tuning Scheduler (FTS) to use a custom, currently unsupported strategy or override current FTS behavior in the context of a given training strategy, subclassing ``StrategyAdapter`` is now a way to do so. See [``FSDPStrategyAdapter``](https://finetuning-scheduler.readthedocs.io/en/stable/api/finetuning_scheduler.strategy_adapters.html#finetuning_scheduler.strategy_adapters.FSDPStrategyAdapter) for an example implementation.
- support for `pytorch-lightning` 1.9.0

Expand Down
3 changes: 3 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ identifiers:
- description: "Fine-Tuning Scheduler (v2.5.0)"
type: doi
value: 10.5281/zenodo.14537830
- description: "Fine-Tuning Scheduler (v2.5.1)"
type: doi
value: 10.5281/zenodo.15099039
license: "Apache-2.0"
url: "https://finetuning-scheduler.readthedocs.io/"
repository-code: "https://github.com/speediedan/finetuning-scheduler"
Expand Down
4 changes: 2 additions & 2 deletions dockers/base-cuda/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ RUN \
# ... pytorch nightly dev version
#pip install --pre torch==2.7.0.dev20250201 torchvision==0.22.0.dev20250201 --index-url https://download.pytorch.org/whl/nightly/cu128; \
# temporarily remove torchvision from the nightly build until it supports cu128 in nightlies
pip install --pre torch==2.7.0.dev20250201 --index-url https://download.pytorch.org/whl/nightly/cu128; \
#pip install --pre torch==2.7.0.dev20250201 --index-url https://download.pytorch.org/whl/nightly/cu128; \
# ... test channel
#pip install --pre torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/test/cu128; \
pip install --pre torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/test/cu128; \
fi && \
# Install all requirements
pip install -r requirements/devel.txt --no-cache-dir && \
Expand Down
2 changes: 2 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def _transform_changelog(path_in: str, path_out: str) -> None:
"sphinx_togglebutton",
]

autodoc_typehints = "none"

# Suppress warnings about duplicate labels (needed for PL tutorials)
suppress_warnings = [
"autosectionlabel.*",
Expand Down
6 changes: 3 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ thawed/unfrozen parameter groups associated with each fine-tuning phase as desir
and executed in ascending order. In addition to being zero-indexed, fine-tuning phase keys should be contiguous and
either integers or convertible to integers via ``int()``.

1. First, generate the default schedule to ``Trainer.log_dir``. It will be named after your
:external+pl:class:`~lightning.pytorch.core.module.LightningModule` subclass with the suffix
``_ft_schedule.yaml``.
1. First, generate the default schedule (output to :paramref:`~finetuning_scheduler.fts.FinetuningScheduler.log_dir`,
defaults to ``Trainer.log_dir``). It will be named after your
:external+pl:class:`~lightning.pytorch.core.module.LightningModule` subclass with the suffix ``_ft_schedule.yaml``.

.. code-block:: python

Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ requires = [
[tool.ruff]
line-length = 120
# Enable Pyflakes `E` and `F` codes by default.
select = [
lint.select = [
"E", "W", # see: https://pypi.org/project/pycodestyle
"F", # see: https://pypi.org/project/pyflakes
]
ignore = [
lint.ignore = [
"E731", # Do not assign a lambda expression, use a def
]
# Exclude a variety of commonly ignored directories.
Expand All @@ -23,7 +23,7 @@ exclude = [
"build",
"temp",
]
ignore-init-module-imports = true
lint.ignore-init-module-imports = true
output-format = "pylint"

[tool.ruff.per-file-ignores]
Expand Down
2 changes: 1 addition & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#lightning>=2.7.0,<2.7.1
# the below is uncommented when master is targeting a specific pl dev master commit
git+https://github.com/Lightning-AI/lightning.git@efe311cd46a372aeb5912ea5adfeef573a5d64ca#egg=lightning
git+https://github.com/Lightning-AI/lightning.git@669486afdd524fb66c1afc36bf93955384ac1224#egg=lightning
torch>=2.4.0
4 changes: 2 additions & 2 deletions requirements/docs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ myst-parser==0.18.1
nbsphinx>=0.8.5
pandoc>=1.0
docutils>=0.16
sphinxcontrib-fulltoc>=1.0
sphinxcontrib-mockautodoc
#sphinxcontrib-fulltoc>=1.0
#sphinxcontrib-mockautodoc
sphinx-autodoc-typehints>=1.16
sphinx-paramlinks>=0.5.1
sphinx-togglebutton>=0.2
Expand Down
2 changes: 1 addition & 1 deletion requirements/standalone_base.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#pytorch-lightning>=2.7.0,<2.7.1
# the below is uncommented when master is targeting a specific pl dev master commit
git+https://github.com/Lightning-AI/pytorch-lightning.git@efe311cd46a372aeb5912ea5adfeef573a5d64ca#egg=pytorch-lightning
git+https://github.com/Lightning-AI/pytorch-lightning.git@669486afdd524fb66c1afc36bf93955384ac1224#egg=pytorch-lightning
torch>=2.4.0
2 changes: 2 additions & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ twine==3.2
mypy>=0.920
pre-commit>=1.0
protobuf<=3.20.1 # strict
# optional dependencies used in base fts coverage
mlflow>=1.0.0
159 changes: 159 additions & 0 deletions scripts/build_fts_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#!/bin/bash
#
# Utility script to build FTS environments
# Usage examples:
# build latest:
# ./build_fts_env.sh --repo_home=~/repos/finetuning-scheduler --target_env_name=fts_latest
# build release:
# ./build_fts_env.sh --repo_home=${HOME}/repos/fts-release --target_env_name=fts_release
# build latest with specific pytorch nightly:
# ./build_fts_env.sh --repo_home=~/repos/finetuning-scheduler --target_env_name=fts_latest --torch_dev_ver=dev20230820 --torchvision_dev_ver=dev20230821
# build latest with torch test channel:
# ./build_fts_env.sh --repo_home=~/repos/finetuning-scheduler --target_env_name=fts_latest --torch_test_channel
set -eo pipefail

unset repo_home
unset target_env_name
unset torch_dev_ver
unset torchvision_dev_ver
unset torch_test_channel
unset pip_install_flags

usage(){
>&2 cat << EOF
Usage: $0
[ --repo_home input]
[ --target_env_name input ]
[ --torch_dev_ver input ]
[ --torchvision_dev_ver input ]
[ --torch_test_channel ]
[ --pip_install_flags "flags" ]
[ --help ]
Examples:
# build latest:
# ./build_fts_env.sh --repo_home=${HOME}/repos/finetuning-scheduler --target_env_name=fts_latest
# build release:
# ./build_fts_env.sh --repo_home=${HOME}/repos/fts-release --target_env_name=fts_release
# build release from torch test channel:
# ./build_fts_env.sh --repo_home=${HOME}/repos/fts-release --target_env_name=fts_release --torch_test_channel
# build latest with specific pytorch nightly:
# ./build_fts_env.sh --repo_home=${HOME}/repos/finetuning-scheduler --target_env_name=fts_latest --torch_dev_ver=dev20231014 --torchvision_dev_ver=dev20231014
# build latest with torch test channel:
# ./build_fts_env.sh --repo_home=${HOME}/repos/finetuning-scheduler --target_env_name=fts_latest --torch_test_channel
# build latest with no cache directory:
# ./build_fts_env.sh --repo_home=${HOME}/repos/finetuning-scheduler --target_env_name=fts_latest --pip_install_flags="--no-cache-dir"
EOF
exit 1
}

args=$(getopt -o '' --long repo_home:,target_env_name:,torch_dev_ver:,torchvision_dev_ver:,torch_test_channel,pip_install_flags:,help -- "$@")
if [[ $? -gt 0 ]]; then
usage
fi

eval set -- ${args}
while :
do
case $1 in
--repo_home) repo_home=$2 ; shift 2 ;;
--target_env_name) target_env_name=$2 ; shift 2 ;;
--torch_dev_ver) torch_dev_ver=$2 ; shift 2 ;;
--torchvision_dev_ver) torchvision_dev_ver=$2 ; shift 2 ;;
--torch_test_channel) torch_test_channel=1 ; shift ;;
--pip_install_flags) pip_install_flags=$2 ; shift 2 ;;
--help) usage ; shift ;;
# -- means the end of the arguments; drop this, and break out of the while loop
--) shift; break ;;
*) >&2 echo Unsupported option: $1
usage ;;
esac
done

# Use pip_install_flags in pip commands
pip_install_flags=${pip_install_flags:-""}

maybe_deactivate(){
if [ -n "$VIRTUAL_ENV" ]; then
deactivate
fi
}

clear_activate_env(){
$1 -m venv --clear ~/.venvs/${target_env_name}
source ~/.venvs/${target_env_name}/bin/activate
echo "Current venv prompt is now ${VIRTUAL_ENV_PROMPT}"
pip install ${pip_install_flags} --upgrade pip
}

base_env_build(){
case ${target_env_name} in
fts_latest)
clear_activate_env python3.12
if [[ -n ${torch_dev_ver} ]]; then
if [[ -n ${torchvision_dev_ver} ]]; then
torchvision_dev_ver=${torch_dev_ver}
fi
# temporarily remove torchvision until it supports cu128 in nightly binary
pip install ${pip_install_flags} --pre torch==2.7.0.${torch_dev_ver} --index-url https://download.pytorch.org/whl/nightly/cu128
#pip install ${pip_install_flags} --pre torch==2.7.0.${torch_dev_ver} torchvision==0.22.0.${torchvision_dev_ver} --index-url https://download.pytorch.org/whl/nightly/cu128
elif [[ $torch_test_channel -eq 1 ]]; then
pip install ${pip_install_flags} --pre torch==2.7.0 torchvision==0.22.0 --index-url https://download.pytorch.org/whl/test/cu128
else
pip install ${pip_install_flags} torch torchvision --index-url https://download.pytorch.org/whl/cu128
fi
;;
fts_release)
clear_activate_env python3.12
if [[ $torch_test_channel -eq 1 ]]; then
pip install ${pip_install_flags} --pre torch torchvision --index-url https://download.pytorch.org/whl/test/cu126
else
pip install ${pip_install_flags} torch torchvision --index-url https://download.pytorch.org/whl/cu126
fi
;;
fts_latest_pt_oldest | fts_latest_pt2_3_x | fts_release_pt2_3_x)
clear_activate_env python3.11
pip install ${pip_install_flags} torch==2.3.1 torchvision --index-url https://download.pytorch.org/whl/cu118
;;
fts_latest_pt2_4_x | fts_release_pt2_4_x)
clear_activate_env python3.11
pip install ${pip_install_flags} torch==2.4.1 torchvision --index-url https://download.pytorch.org/whl/cu118
;;
fts_latest_pt2_5_x | fts_release_pt2_5_x)
clear_activate_env python3.12
pip install ${pip_install_flags} torch==2.5.1 torchvision --index-url https://download.pytorch.org/whl/cu124
;;
fts_latest_pt2_6_x | fts_release_pt2_6_x)
clear_activate_env python3.12
pip install ${pip_install_flags} torch==2.6.0 torchvision --index-url https://download.pytorch.org/whl/cu126
;;
*)
echo "no matching environment found, exiting..."
exit 1
;;
esac

}

fts_install(){
source ~/.venvs/${target_env_name}/bin/activate
unset PACKAGE_NAME
cd ${repo_home}
python -m pip install ${pip_install_flags} -e ".[all]" -r requirements/docs.txt
rm -rf .mypy_cache
mypy --install-types --non-interactive
pre-commit install
git lfs install
python -c "import importlib.metadata; import torch; import lightning.pytorch; import finetuning_scheduler;
for package in ['torch', 'lightning', 'finetuning_scheduler']:
print(f'{package} version: {importlib.metadata.distribution(package).version}');"
}

d=`date +%Y%m%d%H%M%S`
echo "FTS env build executing at ${d} PT"
echo "Beginning env removal/update for ${target_env_name}"
maybe_deactivate
echo "Beginning FTS base env install for ${target_env_name}"
base_env_build
echo "Beginning FTS dev install for ${target_env_name}"
fts_install
echo "FTS env successfully built for ${target_env_name}!"
Loading