You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importtorchimporttorch.nnasnnfromtransformersimportAutoModelForCausalLMdefconvert_model(model, to_transformer_engine=True, _convert_linear=True, _convert_ln=True):
""" Recursively converts the linear and layernorm layers of a model to their `transformers_engine` counterpart. """importtransformer_engine.pytorchasteforname, moduleinmodel.named_children():
ifisinstance(module, nn.Linear) andto_transformer_engineand_convert_linear:
has_bias=module.biasisnotNoneifany(p%16!=0forpinmodule.weight.shape):
returnte_module=te.Linear(
module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype
)
te_module.weight.copy_(module.weight)
ifhas_bias:
te_module.bias.copy_(module.bias)
setattr(model, name, te_module)
# Note: @xrsrke (Phuc) found that te.LayerNorm doesn't have any real memory savings or speedups over nn.LayerNormelifisinstance(module, nn.LayerNorm) andto_transformer_engineand_convert_ln:
te_module=te.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype)
te_module.weight.copy_(module.weight)
te_module.bias.copy_(module.bias)
setattr(model, name, te_module)
elifisinstance(module, te.Linear) andnotto_transformer_engineand_convert_linear:
has_bias=module.biasisnotNonenew_module=nn.Linear(
module.in_features, module.out_features, bias=has_bias, params_dtype=module.weight.dtype
)
new_module.weight.copy_(module.weight)
ifhas_bias:
new_module.bias.copy_(module.bias)
setattr(model, name, new_module)
elifisinstance(module, te.LayerNorm) andnotto_transformer_engineand_convert_ln:
new_module=nn.LayerNorm(module.normalized_shape[0], eps=module.eps, params_dtype=module.weight.dtype)
new_module.weight.copy_(module.weight)
new_module.bias.copy_(module.bias)
setattr(model, name, new_module)
else:
convert_model(
module,
to_transformer_engine=to_transformer_engine,
_convert_linear=_convert_linear,
_convert_ln=_convert_ln,
)
model_path="Qwen/Qwen1.5-0.5B"model=AutoModelForCausalLM.from_pretrained(model_path,
torch_dtype=torch.bfloat16)
withtorch.no_grad():
convert_model(model)
model=model.cuda()
model.train()
model.eval()
state_dict=model.state_dict()
model.save_pretrained("tmp", state_dict=state_dict, safe_serialization=False)
information of the error
[2025-02-13 14:21:14,729] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Traceback (most recent call last):
File "/home/xinpeng/workspace/spui/test/TE/demo.py", line 71, in <module>
model.save_pretrained("tmp", state_dict=state_dict, safe_serialization=False)
File "/home/xinpeng/miniforge3/envs/torch/lib/python3.11/site-packages/transformers/modeling_utils.py", line 2971, in save_pretrained
state_dict_split = split_torch_state_dict_into_shards(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xinpeng/miniforge3/envs/torch/lib/python3.11/site-packages/huggingface_hub/serialization/_torch.py", line 351, in split_torch_state_dict_into_shards
return split_state_dict_into_shards_factory(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xinpeng/miniforge3/envs/torch/lib/python3.11/site-packages/huggingface_hub/serialization/_base.py", line 108, in split_state_dict_into_shards_factory
storage_id = get_storage_id(tensor)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/xinpeng/miniforge3/envs/torch/lib/python3.11/site-packages/huggingface_hub/serialization/_torch.py", line 403, in get_torch_storage_id
if tensor.device.type == "meta":
^^^^^^^^^^^^^
AttributeError: '_io.BytesIO' object has no attribute 'device'
The text was updated successfully, but these errors were encountered:
envs
reimplement code
information of the error
The text was updated successfully, but these errors were encountered: