Patch (#138)

danielhanchen · Erland366 · bradhilton · web-flow · commit 37dfb23217ca · 2025-05-14T05:41:21.000-07:00
* Update compiler.py * Update patching_utils.py * Update temporary_patches.py * Update compiler.py * Update compiler.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * Update compiler.py * compiler * Update gradient_checkpointing.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * causal mask dtype * Fix checkpoint and save from local file (#74) * Enhance gradient checkpointing and add original model ID retrieval in saving utilities * In case adapter_config.json as well * Update patching_utils.py * Update patching_utils.py * Update temporary_patches.py * Update temporary_patches.py * Update compiler.py * Update loss_utils.py * Update compiler.py * Update vllm_utils.py * Update compiler.py * Update peft_utils.py * Update rl_replacements.py * Update vllm_utils.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update compiler.py * Update vllm_lora_worker_manager.py * Update utils.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update dataset_utils.py * bidirectional attention * Update vllm_utils.py * Update __init__.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_lora_worker_manager.py * Update vllm_lora_worker_manager.py * Update vllm_lora_worker_manager.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update temporary_patches.py * Update loss_utils.py * Update loss_utils.py * Update loss_utils.py * Update loss_utils.py * Update loss_utils.py * Update __init__.py * fix: AsyncLLMEngine bugs (#82) * fixed a typo in L119, removing unnecessary len() (#84) Co-authored-by: Xiaochen Zhu <xz479@cl.cam.ac.uk> * Fix gradient checkpointing warning filter implementation * Input grads fix for gemma3 (#96) * gemma require gradients fix * Update peft_utils.py --------- Co-authored-by: Daniel Han <danielhanchen@gmail.com> * Update vision_utils.py * Vision requires grad * Check SDPA for Mistral / Pixtral * Update compiler.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update __init__.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vision_utils.py * Update vllm_utils.py (#99) Fix bugs in generate_batches.py.Original output = [] will result in duplication of results. * Update vision_utils.py * Fixes to support IterableDataset (#98) * Support Iterable Datasets * Update dataset_utils.py * Update dataset_utils.py * Update dataset_utils.py * Update dataset_utils.py * Preserve batch size from iterable dataset * Preserve batch size from iterable dataset * Support train_on_response_only with IterableDataset * Support train_on_response_only with IterableDataset * Support train_on_response_only with IterableDataset * Support train_on_response_only with IterableDataset * Update vllm_utils.py * Create vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * Update vllm_rlhf_utils.py * vLLM for Qwen 3 * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update compiler.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Update rl_replacements.py * Swap space reduce * Update vllm_utils.py * Update vllm_utils.py * Update rl_replacements.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update __init__.py * Update rl_replacements.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update vllm_utils.py * Update rl_replacements.py * Update vllm_utils.py * Update rl_replacements.py * Revert "Update rl_replacements.py" This reverts commit c0a4022. * Update __init__.py --------- Co-authored-by: Edd <68678137+Erland366@users.noreply.github.com> Co-authored-by: Brad Hilton <brad.hilton.nw@gmail.com> Co-authored-by: SpaceHunter <30568250+SpaceHunterInf@users.noreply.github.com> Co-authored-by: Xiaochen Zhu <xz479@cl.cam.ac.uk> Co-authored-by: Roland Tannous <rolandtannous@gonovel.co> Co-authored-by: DoubleMathew <mmathew23@gmail.com> Co-authored-by: Michael Han <107991372+shimmyshimmer@users.noreply.github.com> Co-authored-by: Qian Wu <121997440+5k5000@users.noreply.github.com> Co-authored-by: marcandrelarochelle <marcandrelarochelle1820@gmail.com>
diff --git a/unsloth_zoo/__init__.py b/unsloth_zoo/__init__.py
@@ -14,7 +14,7 @@
 # You should have received a copy of the GNU Lesser General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-__version__ = "2025.5.4"
+__version__ = "2025.5.5"
 
 from importlib.util import find_spec
 if find_spec("unsloth") is None:
diff --git a/unsloth_zoo/vllm_utils.py b/unsloth_zoo/vllm_utils.py
@@ -414,11 +414,12 @@ def unpatch_bitsandbytes_compute_dtype():
 pass
 
 
-def patch_vllm():
+def patch_vllm(debug = True):
     # Temporary patch to disable multiprocessing for vLLM
     # Allows accessing model_executor
     os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
-    os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
+    if debug:
+        os.environ["VLLM_LOGGING_LEVEL"] = "DEBUG"
     # os.environ["VLLM_TRACE_FUNCTION"] = "1"
     patch_vllm_set_inductor_config()
     patch_bitsandbytes_quant_state()
@@ -1524,7 +1525,6 @@ def generate_batches(llm, inputs, n_batches = None, lora_request = None, *args,
 
 def delete_vllm(llm = None):
     # From https://github.com/vllm-project/vllm/issues/1908
-    import ray
     from vllm.distributed.parallel_state import (
         destroy_model_parallel,
         destroy_distributed_environment,
@@ -1540,7 +1540,11 @@ def delete_vllm(llm = None):
         torch.distributed.destroy_process_group()
     gc.collect()
     torch.cuda.empty_cache()
-    ray.shutdown()
+    try:
+        import ray
+        ray.shutdown()
+    except:
+        pass
     return llm
 pass