Commit a10a30e
committed
Merge remote-tracking branch 'upstream/main' into rocm7.1_internal_testing_IFU_2025-09-23
# Conflicts:
# .ci/aarch64_linux/aarch64_ci_build.sh
# .ci/aarch64_linux/aarch64_wheel_ci_build.py
# .ci/docker/build.sh
# .ci/docker/ci_commit_pins/huggingface-requirements.txt
# .ci/docker/ci_commit_pins/triton.txt
# .ci/docker/common/install_rocm.sh
# .ci/docker/requirements-ci.txt
# .ci/docker/requirements-docs.txt
# .ci/libtorch/build.sh
# .ci/lumen_cli/cli/lib/core/vllm/lib.py
# .ci/lumen_cli/cli/lib/core/vllm/vllm_build.py
# .ci/lumen_cli/cli/lib/core/vllm/vllm_test.py
# .ci/wheel/build_wheel.sh
# .github/ci_commit_pins/audio.txt
# .github/ci_commit_pins/vllm.txt
# .github/ci_commit_pins/xla.txt
# .github/ci_configs/vllm/Dockerfile.tmp_vllm
# .github/scripts/generate_binary_build_matrix.py
# .github/templates/macos_binary_build_workflow.yml.j2
# .github/workflows/build-vllm-wheel.yml
# .github/workflows/docker-builds.yml
# .github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml
# .github/workflows/generated-linux-binary-manywheel-main.yml
# .github/workflows/generated-linux-binary-manywheel-nightly.yml
# .github/workflows/generated-linux-binary-manywheel-rocm-main.yml
# .github/workflows/generated-macos-arm64-binary-libtorch-release-nightly.yml
# .github/workflows/generated-macos-arm64-binary-wheel-nightly.yml
# .github/workflows/inductor-nightly.yml
# .github/workflows/inductor-perf-test-nightly-x86-zen.yml
# .github/workflows/inductor-perf-test-nightly-x86.yml
# .github/workflows/inductor-periodic.yml
# .github/workflows/inductor-unittest.yml
# .github/workflows/inductor.yml
# .github/workflows/operator_benchmark.yml
# .github/workflows/pull.yml
# .github/workflows/trunk.yml
# .github/workflows/vllm.yml
# aten/src/ATen/CMakeLists.txt
# aten/src/ATen/DLConvertor.cpp
# aten/src/ATen/cuda/CUDABlas.cpp
# aten/src/ATen/native/CPUBlas.cpp
# aten/src/ATen/native/LinearAlgebra.cpp
# aten/src/ATen/native/Normalization.cpp
# aten/src/ATen/native/cuda/Blas.cpp
# aten/src/ATen/native/cuda/int8mm.cu
# aten/src/ATen/native/cudnn/MHA.cpp
# aten/src/ATen/native/miopen/BatchNorm_miopen.cpp
# aten/src/ATen/native/miopen/Conv_miopen.cpp
# aten/src/ATen/native/mps/operations/GridSampler.mm
# aten/src/ATen/native/native_functions.yaml
# aten/src/ATen/native/sparse/mps/SparseMPSTensorMath.mm
# aten/src/ATen/native/transformers/hip/flash_attn/flash_api.h
# benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv
# benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_amp_freezing_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_freezing_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv
# benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv
# benchmarks/dynamo/ci_expected_accuracy/rocm/aot_eager_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/rocm/dynamic_aot_eager_torchbench_inference.csv
# benchmarks/dynamo/ci_expected_accuracy/rocm/dynamo_eager_torchbench_inference.csv
# benchmarks/dynamo/pr_time_benchmarks/expected_results.csv
# benchmarks/operator_benchmark/benchmark_core.py
# build_variables.bzl
# c10/cuda/CUDAFunctions.cpp
# cmake/Codegen.cmake
# cmake/External/aotriton.cmake
# docs/source/accelerator/index.md
# docs/source/accelerator/operators.md
# functorch/dim/__init__.py
# functorch/dim/wrap_type.py
# requirements-build.txt
# requirements.txt
# test/cpp/nativert/CMakeLists.txt
# test/cpp/nativert/test_triton_kernel_manager_registration.cpp
# test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/csrc/kernel.cpp
# test/cpp_extensions/libtorch_agnostic_extension/libtorch_agnostic/ops.py
# test/cpp_extensions/libtorch_agnostic_extension/test/test_libtorch_agnostic.py
# test/cpp_extensions/open_registration_extension/torch_openreg/README.md
# test/cpp_extensions/open_registration_extension/torch_openreg/setup.py
# test/cpp_extensions/open_registration_extension/torch_openreg/third_party/openreg/README.md
# test/cpp_extensions/open_registration_extension/torch_openreg/third_party/openreg/example/example.cpp
# test/cpp_extensions/open_registration_extension/torch_openreg/torch_openreg/__init__.py
# test/distributed/_composable/fsdp/test_fully_shard_training.py
# test/distributed/_composable/test_composability/test_2d_composability.py
# test/distributed/fsdp/test_fsdp_comm_hooks.py
# test/distributed/tensor/parallel/test_tp_examples.py
# test/distributed/tensor/test_attention.py
# test/distributed/tensor/test_dtensor_compile.py
# test/distributed/tensor/test_dtensor_ops.py
# test/distributed/tensor/test_op_schema.py
# test/distributed/test_inductor_collectives.py
# test/distributed/test_nvshmem.py
# test/distributed/test_nvshmem_triton.py
# test/distributed/test_symmetric_memory.py
# test/dynamo/test_activation_checkpointing.py
# test/dynamo/test_aot_compile.py
# test/dynamo/test_callback.py
# test/dynamo/test_error_messages.py
# test/dynamo/test_guard_serialization.py
# test/dynamo/test_misc.py
# test/dynamo/test_package.py
# test/dynamo/test_structured_trace.py
# test/export/test_export.py
# test/export/test_export_opinfo.py
# test/export/test_passes.py
# test/export/test_serialize.py
# test/functorch/test_control_flow.py
# test/inductor/test_aot_inductor.py
# test/inductor/test_aot_inductor_package.py
# test/inductor/test_flex_attention.py
# test/inductor/test_fxir_backend.py
# test/inductor/test_loop_ordering.py
# test/inductor/test_max_autotune.py
# test/inductor/test_torchinductor.py
# test/nn/test_convolution.py
# test/nn/test_pooling.py
# test/run_test.py
# test/slow_tests.json
# test/test_binary_ufuncs.py
# test/test_dynamic_shapes.py
# test/test_matmul_cuda.py
# test/test_nestedtensor.py
# test/test_nn.py
# test/test_openreg.py
# third_party/xpu.txt
# tools/flight_recorder/components/config_manager.py
# tools/pyi/gen_pyi.py
# torch/_C/_dynamo/guards.pyi
# torch/_dynamo/aot_compile.py
# torch/_dynamo/convert_frame.py
# torch/_dynamo/functional_export.py
# torch/_dynamo/graph_break_registry.json
# torch/_dynamo/guards.py
# torch/_dynamo/output_graph.py
# torch/_dynamo/package.py
# torch/_dynamo/symbolic_convert.py
# torch/_dynamo/variables/higher_order_ops.py
# torch/_dynamo/variables/lists.py
# torch/_dynamo/variables/optimizer.py
# torch/_export/serde/serialize.py
# torch/_export/wrappers.py
# torch/_functorch/_aot_autograd/autograd_cache.py
# torch/_higher_order_ops/__init__.py
# torch/_higher_order_ops/associative_scan.py
# torch/_higher_order_ops/flex_attention.py
# torch/_higher_order_ops/triton_kernel_wrap.py
# torch/_inductor/choices.py
# torch/_inductor/codegen/cpp.py
# torch/_inductor/codegen/cpp_micro_gemm.py
# torch/_inductor/codegen/cpp_wrapper_cpu.py
# torch/_inductor/codegen/triton.py
# torch/_inductor/codegen/wrapper_fxir.py
# torch/_inductor/config.py
# torch/_inductor/cpp_builder.py
# torch/_inductor/decomposition.py
# torch/_inductor/kernel/bmm.py
# torch/_inductor/kernel/flex/flex_attention.py
# torch/_inductor/kernel/flex/templates/flex_attention.py.jinja
# torch/_inductor/kernel/flex/templates/flex_backwards.py.jinja
# torch/_inductor/kernel/flex/templates/flex_decode.py.jinja
# torch/_inductor/kernel/flex/templates/utilities.py.jinja
# torch/_inductor/kernel/mm.py
# torch/_inductor/kernel/mm_plus_mm.py
# torch/_inductor/kernel_template_choice.py
# torch/_inductor/memory.py
# torch/_inductor/runtime/triton_heuristics.py
# torch/_inductor/scheduler.py
# torch/_inductor/select_algorithm.py
# torch/_inductor/template_heuristics/base.py
# torch/_inductor/template_heuristics/triton.py
# torch/_inductor/utils.py
# torch/_meta_registrations.py
# torch/_prims_common/__init__.py
# torch/csrc/Module.cpp
# torch/csrc/autograd/python_variable.cpp
# torch/csrc/autograd/python_variable_indexing.cpp
# torch/csrc/distributed/c10d/FlightRecorder.cpp
# torch/csrc/distributed/c10d/ProcessGroupGloo.hpp
# torch/csrc/distributed/c10d/symm_mem/NVSHMEMSymmetricMemory.cu
# torch/csrc/inductor/aoti_runtime/utils.h
# torch/csrc/stable/accelerator.h
# torch/csrc/stable/ops.h
# torch/csrc/utils/generated_serialization_types.h
# torch/csrc/utils/tensor_numpy.cpp
# torch/distributed/_symmetric_memory/_nvshmem_triton.py
# torch/distributed/device_mesh.py
# torch/distributed/pipelining/_schedule_visualizer.py
# torch/distributed/tensor/_api.py
# torch/distributed/tensor/_dispatch.py
# torch/distributed/tensor/_op_schema.py
# torch/distributed/tensor/_random.py
# torch/distributed/tensor/_sharding_prop.py
# torch/export/_trace.py
# torch/export/_unlift.py
# torch/export/exported_program.py
# torch/fx/experimental/proxy_tensor.py
# torch/nativert/executor/triton/CpuTritonKernelManager.cpp
# torch/nativert/executor/triton/CudaTritonKernelManager.cpp
# torch/nativert/executor/triton/TritonKernelManager.h
# torch/nativert/kernels/KernelHandlerRegistry.cpp
# torch/nativert/kernels/TritonKernel.cpp
# torch/nested/_internal/ops.py
# torch/onnx/__init__.py
# torch/overrides.py
# torch/testing/_internal/common_cuda.py
# torch/testing/_internal/common_distributed.py
# torch/testing/_internal/common_quantization.py
# torch/testing/_internal/common_utils.py
# torch/testing/_internal/distributed/_tensor/common_dtensor.py
# torch/testing/_internal/distributed/fake_pg.py
# torch/testing/_internal/hop_db.py
# torch/utils/_python_dispatch.py
# torch/utils/data/datapipes/iter/combinatorics.pyFile tree
984 files changed
+39283
-6622
lines changed- .ci
- aarch64_linux
- docker
- centos-rocm
- ci_commit_pins
- common
- ubuntu-rocm
- libtorch
- lumen_cli/cli/lib/core/vllm
- pytorch
- smoke_test
- win-test-helpers
- installation-helpers
- wheel
- .github
- actions
- reuse-old-whl
- setup-win
- ci_commit_pins
- ci_configs/vllm
- requirements
- scripts
- templates
- workflows
- aten/src/ATen
- core
- boxing
- dispatch
- cuda
- functorch
- native
- cpu
- cuda
- cudnn
- miopen
- mkldnn/xpu
- detail
- mps
- kernels
- operations
- quantized/cuda
- sparse
- cuda
- mps
- kernels
- transformers
- cuda
- hip/flash_attn
- test
- xpu
- benchmarks
- dynamo
- ci_expected_accuracy
- rocm
- pr_time_benchmarks
- operator_benchmark
- c10
- core
- impl
- cuda
- util
- caffe2
- perfkernels
- serialize
- cmake
- External
- public
- docs
- cpp/source
- source
- _static/img
- dynamic_shapes
- inductor_provenance
- accelerator
- compile
- functorch
- dim
- docs/source
- tutorials
- _src
- einops
- op_analysis
- test
- ao/sparsity
- bottleneck_test
- cpp_extensions
- libtorch_agnostic_extension
- libtorch_agnostic
- csrc
- test
- open_registration_extension/torch_openreg
- csrc/runtime
- tests
- third_party/openreg
- example
- torch_openreg
- python_agnostic_extension
- python_agnostic
- csrc
- test
- cpp
- aoti_inference
- nativert
- distributed
- _composable
- fsdp
- test_composability
- _pycute
- elastic
- agent/server/test
- multiprocessing
- bin
- timer
- fsdp
- launcher
- tensor
- debug
- parallel
- distributions
- dynamo_expected_failures
- dynamo_skips
- dynamo
- export
- functorch
- dim
- fx
- higher_order_ops
- inductor_expected_failures
- inductor
- nn
- onnx
- exporter
- ops
- optim
- profiler
- quantization
- core
- pt2e
- xpu
- third_party
- miniz-3.0.2
- tools
- experimental/dynamic_shapes/torchfuzz
- operators
- flight_recorder/components
- linter
- adapters
- pyi
- setup_helpers
- testing
- torch
- _C
- _dynamo
- _export
- _decomp
- _dynamo
- variables
- _export
- db/examples
- serde
- _functorch
- _activation_checkpointing
- _aot_autograd
- _higher_order_ops
- _inductor
- codegen
- cuda
- cutlass_lib_extensions
- rocm
- fx_passes
- kernel
- flex
- templates
- runtime
- template_heuristics
- _library
- _prims_common
- _prims
- _refs
- _subclasses
- amp
- ao
- pruning/_experimental/data_sparsifier/lightning/callbacks
- quantization
- fx
- pt2e
- compiler
- csrc
- api/include/torch/nn/modules
- autograd
- cuda
- distributed
- autograd/engine
- c10d
- control_plane
- symm_mem
- dynamo
- export
- functorch
- inductor
- aoti_runner
- aoti_runtime
- aoti_torch
- c
- generated
- jit
- codegen
- fuser
- onednn
- mobile
- python
- serialization
- tensorexpr
- lazy/core
- profiler
- python
- stable
- utils
- xpu
- cuda
- distributed
- _pycute
- _symmetric_memory
- checkpoint
- elastic
- agent/server
- multiprocessing
- rendezvous
- fsdp
- _fully_shard
- launcher
- pipelining
- tensor
- _ops
- experimental
- export
- pt2_archive
- fx
- experimental
- passes
- headeronly/macros
- jit
- mtia
- nativert
- backends
- detail
- executor
- triton
- graph
- kernels
- nested/_internal
- nn
- attention
- modules
- parallel
- utils
- onnx
- _internal
- exporter
- _torchlib/ops
- torchscript_exporter
- ops
- optim
- sparse
- testing/_internal
- distributed
- _tensor
- optests
- utils
- _sympy
- benchmark/utils
- bottleneck
- data
- datapipes
- iter
- tensorboard
- xpu
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
984 files changed
+39283
-6622
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
5 | 5 | | |
6 | 6 | | |
7 | 7 | | |
| 8 | + | |
8 | 9 | | |
9 | 10 | | |
10 | 11 | | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
11 | 17 | | |
12 | 18 | | |
13 | 19 | | |
| |||
31 | 37 | | |
32 | 38 | | |
33 | 39 | | |
34 | | - | |
35 | | - | |
| 40 | + | |
36 | 41 | | |
37 | 42 | | |
38 | 43 | | |
| |||
42 | 47 | | |
43 | 48 | | |
44 | 49 | | |
| 50 | + | |
45 | 51 | | |
46 | 52 | | |
47 | 53 | | |
| 54 | + | |
| 55 | + | |
48 | 56 | | |
49 | 57 | | |
50 | 58 | | |
51 | 59 | | |
| 60 | + | |
52 | 61 | | |
53 | 62 | | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
54 | 66 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
138 | 138 | | |
139 | 139 | | |
140 | 140 | | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
141 | 146 | | |
142 | 147 | | |
143 | 148 | | |
| |||
211 | 216 | | |
212 | 217 | | |
213 | 218 | | |
| 219 | + | |
214 | 220 | | |
| 221 | + | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
215 | 225 | | |
216 | 226 | | |
217 | 227 | | |
| |||
221 | 231 | | |
222 | 232 | | |
223 | 233 | | |
| 234 | + | |
224 | 235 | | |
| 236 | + | |
| 237 | + | |
| 238 | + | |
225 | 239 | | |
226 | 240 | | |
227 | 241 | | |
| |||
237 | 251 | | |
238 | 252 | | |
239 | 253 | | |
| 254 | + | |
| 255 | + | |
| 256 | + | |
| 257 | + | |
| 258 | + | |
240 | 259 | | |
241 | 260 | | |
242 | 261 | | |
| |||
275 | 294 | | |
276 | 295 | | |
277 | 296 | | |
278 | | - | |
279 | | - | |
280 | | - | |
281 | | - | |
282 | | - | |
283 | | - | |
284 | | - | |
285 | | - | |
| 297 | + | |
286 | 298 | | |
287 | 299 | | |
288 | 300 | | |
| |||
319 | 331 | | |
320 | 332 | | |
321 | 333 | | |
322 | | - | |
| 334 | + | |
323 | 335 | | |
324 | 336 | | |
325 | 337 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
214 | 214 | | |
215 | 215 | | |
216 | 216 | | |
| 217 | + | |
217 | 218 | | |
218 | 219 | | |
| 220 | + | |
| 221 | + | |
| 222 | + | |
219 | 223 | | |
220 | 224 | | |
221 | 225 | | |
| |||
263 | 267 | | |
264 | 268 | | |
265 | 269 | | |
266 | | - | |
267 | | - | |
268 | | - | |
269 | | - | |
| 270 | + | |
270 | 271 | | |
271 | | - | |
272 | | - | |
| 272 | + | |
| 273 | + | |
273 | 274 | | |
274 | 275 | | |
275 | 276 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
59 | 59 | | |
60 | 60 | | |
61 | 61 | | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
62 | 65 | | |
63 | 66 | | |
64 | | - | |
| 67 | + | |
| 68 | + | |
65 | 69 | | |
66 | 70 | | |
67 | 71 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1 | | - | |
| 1 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
1 | 2 | | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
2 | 6 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
| 1 | + | |
1 | 2 | | |
| 3 | + | |
| 4 | + | |
| 5 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
42 | 42 | | |
43 | 43 | | |
44 | 44 | | |
| 45 | + | |
| 46 | + | |
45 | 47 | | |
46 | 48 | | |
47 | 49 | | |
48 | 50 | | |
49 | 51 | | |
50 | | - | |
51 | | - | |
52 | 52 | | |
53 | | - | |
| 53 | + | |
54 | 54 | | |
55 | 55 | | |
56 | | - | |
57 | 56 | | |
58 | 57 | | |
59 | | - | |
60 | | - | |
61 | | - | |
62 | | - | |
63 | | - | |
| 58 | + | |
| 59 | + | |
| 60 | + | |
| 61 | + | |
| 62 | + | |
| 63 | + | |
| 64 | + | |
| 65 | + | |
| 66 | + | |
| 67 | + | |
| 68 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
2 | 2 | | |
3 | 3 | | |
4 | 4 | | |
| 5 | + | |
| 6 | + | |
| 7 | + | |
| 8 | + | |
| 9 | + | |
5 | 10 | | |
6 | 11 | | |
7 | 12 | | |
| |||
109 | 114 | | |
110 | 115 | | |
111 | 116 | | |
| 117 | + | |
112 | 118 | | |
113 | 119 | | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
114 | 123 | | |
115 | 124 | | |
116 | 125 | | |
| |||
195 | 204 | | |
196 | 205 | | |
197 | 206 | | |
| 207 | + | |
| 208 | + | |
198 | 209 | | |
199 | 210 | | |
200 | 211 | | |
| |||
0 commit comments