Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 21 additions & 11 deletions .github/workflows/H-Coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ jobs:
group: GZ_BD-CPU
env:
TASK: fleet-ci-paddle-build-whl-${{ github.event.pull_request.number }}
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test"
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:ubuntu24-cuda129-dev"
steps:
- name: Check docker image and run container
env:
Expand Down Expand Up @@ -458,13 +458,13 @@ jobs:
steps:
- name: Determine the runner
run: |
gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') + 3 ))
gpu_id=$(( $(echo $PWD | awk -F'/' '{print $3}' | awk -F'-' '{print $2}') - 1 ))
echo GPU_DEVICES="$gpu_id" >> $GITHUB_ENV

- name: Check docker image and run container
env:
GPU_DEVICES: ${{ env.GPU_DEVICES }}
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test"
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:ubuntu24-cuda129-dev"
run: |
container_name=${TASK}-$(date +%Y%m%d-%H%M%S)
echo "container_name=${container_name}" >> ${{ github.env }}
Expand Down Expand Up @@ -500,19 +500,21 @@ jobs:
docker exec -t ${{ env.container_name }} /bin/bash -ce '
rm -rf * .[^.]*
source /root/proxy
pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt pytest matplotlib parameterized
wget -q --tries=5 --no-proxy --no-check-certificate https://paddle-github-action.cdn.bcebos.com/PR/paddlefleet/${PR_ID}/${COMMIT_ID}/paddldfleet.tar.gz
tar -xf paddldfleet.tar.gz --strip-components=1
git config --global --add safe.directory /paddle
pip install dist/paddlefleet-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url=https://www.paddlepaddle.org.cn/packages/stable/cu129/ --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/cu129/
echo "paddlefleet commit:"
python -c "import paddlefleet; print(paddlefleet.version.commit)"
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
'

- name: Download paddle.tar.gz and install paddle whl
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
set -e
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
mkdir -p /PaddlePaddle
cd /PaddlePaddle
echo "Downloading Paddle.tar.gz from cfs"
Expand All @@ -522,12 +524,15 @@ jobs:
export UV_HTTP_TIMEOUT=300
pip uninstall paddlepaddle-gpu -y
pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --force-reinstall --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/cu129/
echo "paddlefleet commit:"
python -c "import paddlefleet; print(paddlefleet.version.commit)"
'

- name: Single card test
run: |
docker exec -t ${{ env.container_name }} /bin/bash -xce '
pwd
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
if [ "${BRANCH}" != "develop" ]; then
git checkout $fleet_branch
echo "Checked out fleet branch: $fleet_branch"
Expand All @@ -539,7 +544,7 @@ jobs:
export UV_NO_SYNC=1 # This environment variable prevents uv sync from being executed when running un run.
export UV_HTTP_TIMEOUT=300
python -c "import paddle; print(paddle.version.commit)"
timeout 10m bash ci/single_card_test.sh
timeout 40m bash ci/single_card_test.sh
single_card_exit_code=$?
if [[ "$single_card_exit_code" != "0" ]]; then
echo -e "::error:: \033[31mSingle card test failed.\033[0m"
Expand All @@ -565,7 +570,7 @@ jobs:
env:
PIP_CACHE_DIR: /root/.cache/pip
TASK: paddle-fleet-CI-${{ github.event.pull_request.number }}-multi-card_test
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:cuda129-coverage-test"
docker_image: "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddle:ubuntu24-cuda129-dev"
steps:
- name: Check docker image and run container
run: |
Expand Down Expand Up @@ -604,19 +609,21 @@ jobs:
docker exec -t ${{ env.container_name }} /bin/bash -ce '
rm -rf * .[^.]*
source /root/proxy
pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
pip install uv coverage==7.6.1 bce-python-sdk==0.8.74 wrapt pytest matplotlib parameterized
wget -q --tries=5 --no-proxy --no-check-certificate https://paddle-github-action.cdn.bcebos.com/PR/paddlefleet/${PR_ID}/${COMMIT_ID}/paddldfleet.tar.gz
tar -xf paddldfleet.tar.gz --strip-components=1
git config --global --add safe.directory /paddle
pip install dist/paddlefleet-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url=https://www.paddlepaddle.org.cn/packages/stable/cu129/ --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/cu129/
echo "paddlefleet commit:"
python -c "import paddlefleet; print(paddlefleet.version.commit)"
wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/local/bin/yq
chmod +x /usr/local/bin/yq
'

- name: Download paddle.tar.gz and install paddle whl
run: |
docker exec -t ${{ env.container_name }} /bin/bash -c '
set -e
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
mkdir -p /PaddlePaddle
cd /PaddlePaddle
echo "Downloading Paddle.tar.gz from cfs"
Expand All @@ -626,12 +633,15 @@ jobs:
export UV_HTTP_TIMEOUT=300
pip uninstall paddlepaddle-gpu -y
pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --force-reinstall --extra-index-url=https://www.paddlepaddle.org.cn/packages/nightly/cu129/
echo "paddlefleet commit:"
python -c "import paddlefleet; print(paddlefleet.version.commit)"
'

- name: Multi-card test
run: |
docker exec -t ${{ env.container_name }} /bin/bash -ce '
export PYTHONPATH=$(pwd)
export LD_LIBRARY_PATH=/usr/local/cuda-12.9/targets/x86_64-linux/lib:/usr/local/cuda/lib64
python -c "import paddle; print(paddle.version.commit)"
if [ "${BRANCH}" != "develop" ]; then
git checkout $fleet_branch
Expand Down
3 changes: 2 additions & 1 deletion ci/h-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ concurrency_list="^test_fp8_deep_gemm$|\
^test_scaled_dot_product_attention$|\
^test_compat_scaled_dot_product_attention$|\
^test_flash_attention$|\
^test_batched_gemm$"
^test_batched_gemm$|\
^test_parallel_dygraph_muon$"

cd ${work_dir}/build
tmp_dir=$(mktemp -d)
Expand Down
2 changes: 2 additions & 0 deletions python/paddle/distributed/fleet/base/distributed_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ def __init__(self) -> None:
]
self.sync_param_name: list[str] = ["embedding", "layer_norm", ".b_"]

self.use_muon_sharding: bool = False

self.__lock_attr = True
logger.info("distributed strategy initialized")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
meta_optimizer_names.remove("HybridParallelOptimizer")
meta_optimizer_names.remove("HeterParallelOptimizer")
meta_optimizer_names.remove("DGCMomentumOptimizer")
meta_optimizer_names.remove("MuonShardingOptimizer")


class MetaOptimizerFactory:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
AdaptiveLocalSGDOptimizer,
LocalSGDOptimizer,
)
from .muon_sharding_optimizer import MuonShardingOptimizer # noqa: F401
from .pipeline_optimizer import PipelineOptimizer # noqa: F401
from .ps_optimizer import ParameterServerOptimizer # noqa: F401
from .qat_optimizer import QATOptimizer # noqa: F401
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
DygraphShardingOptimizer,
DygraphShardingOptimizerV2,
)
from paddle.distributed.fleet.meta_optimizers.muon_sharding_optimizer import (
MuonShardingOptimizer,
)
from paddle.distributed.fleet.utils.hybrid_parallel_util import (
obtain_optimizer_parameters_list,
)
Expand Down Expand Up @@ -284,11 +287,13 @@ def __init__(self, optimizer, hcg, strategy):
split_param = strategy.hybrid_configs[
'sharding_configs'
].split_param
ShardingOptimizer = (
DygraphShardingOptimizerV2
if split_param
else DygraphShardingOptimizer
)
use_muon_sharding = getattr(strategy, "use_muon_sharding", False)
if use_muon_sharding:
ShardingOptimizer = MuonShardingOptimizer
elif split_param:
ShardingOptimizer = DygraphShardingOptimizerV2
else:
ShardingOptimizer = DygraphShardingOptimizer
optimizer = ShardingOptimizer(optimizer, hcg)

self._enable_timer = strategy.hybrid_configs["enable_optimizer_timer"]
Expand Down Expand Up @@ -335,6 +340,7 @@ def __init__(self, optimizer, hcg, strategy):
MixPrecisionOptimizer,
DygraphShardingOptimizer,
DygraphShardingOptimizerV2,
MuonShardingOptimizer,
),
)

Expand Down Expand Up @@ -628,7 +634,11 @@ def _hybrid_sync_grad(self, parameter_list):
if self._sharding_enable:
assert isinstance(
self._inner_opt,
(DygraphShardingOptimizer, DygraphShardingOptimizerV2),
(
DygraphShardingOptimizer,
DygraphShardingOptimizerV2,
MuonShardingOptimizer,
),
)
self._inner_opt.reduce_gradients(parameter_list, self._hcg)
dp_parameter_list = self._inner_opt.filter_parameters(
Expand Down
Loading
Loading