-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Open
Labels
Description
import unsloth
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
from unsloth import FastLanguageModel
model_name = "/media/1luik/46BB55AF65F351D4/5/Jamba2-Mini-int4_hf"#"/media/1luik/C2F0D801F0D7FA1D/AI21-Jamba2-Mini" # 或 qwen/Qwen-7B、internlm/internlm3-8b 等
model, tokenizer = FastLanguageModel.from_pretrained(
model_name,
load_in_4bit = True,
device_map = "balanced",
)
((venv) ) root@1luik-PC:/media/1luik/46BB55AF65F351D4/5# python
Python 3.12.11 (main, Jun 24 2025, 05:19:07) [GCC 12.3.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import os; os.environ['UNSLOTH_USE_MODELSCOPE'] = '1'
>>> import unsloth
^[[F🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
INFO 02-12 13:38:12 [__init__.py:216] Automatically detected platform cuda.
ERROR 02-12 13:38:12 [fa_utils.py:57] Cannot use FA version 2 is not supported due to FA2 is only supported on devices with compute capability >= 8
🦥 Unsloth Zoo will now patch everything to make training faster!
>>> from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
>>> import torch
>>> from unsloth import FastLanguageModel
>>> model_name = "/media/1luik/46BB55AF65F351D4/5/Jamba2-Mini-int4_hf"#"/media/1luik/C2F0D801F0D7FA1D/AI21-Jamba2-Mini" # 或 qwen/Qwen-7B、internlm/internlm3-8b 等
>>>
>>> model, tokenizer = FastLanguageModel.from_pretrained(
... model_name,
... load_in_4bit = True,
... device_map = "balanced",
... )
==((====))== Unsloth 2026.2.1: Fast Jamba patching. Transformers: 4.57.6. vLLM: 0.11.0.
\\ /| Tesla V100-SXM2-16GB. Num GPUs = 4. Max memory: 15.766 GB. Platform: Linux.
O^O/ \_/ \ Torch: 2.8.0+cu128. CUDA: 7.0. CUDA Toolkit: 12.8. Triton: 3.4.0
\ / Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
"-____-" Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Loading checkpoint shards: 12%|████████████████▏ | 1/8 [00:01<00:12, 1.76s/it]
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/venv/lib/python3.12/site-packages/unsloth/models/loader.py", line 543, in from_pretrained
return FastModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/unsloth/models/loader.py", line 1292, in from_pretrained
model, tokenizer = FastBaseModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/unsloth/models/vision.py", line 775, in from_pretrained
model = auto_model.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained
return model_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/modeling_utils.py", line 277, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5048, in from_pretrained
) = cls._load_pretrained_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/modeling_utils.py", line 5468, in _load_pretrained_model
_error_msgs, disk_offload_index = load_shard_file(args)
^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/modeling_utils.py", line 843, in load_shard_file
disk_offload_index = _load_state_dict_into_meta_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/modeling_utils.py", line 774, in _load_state_dict_into_meta_model
hf_quantizer.create_quantized_param(model, param, param_name, param_device)
File "/home/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_bnb_4bit.py", line 190, in create_quantized_param
module, tensor_name = get_module_from_name(model, param_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/transformers/quantizers/quantizers_utils.py", line 20, in get_module_from_name
module = module.get_submodule(module_name)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 723, in get_submodule
raise AttributeError(
AttributeError: JambaAttention has no attribute `feed_forward`
>>> Reactions are currently unavailable