mirror of
https://github.com/modelscope/modelscope.git
synced 2025-12-25 04:29:22 +01:00
[to #47630265] fix modelscope gpt moe finetune checkpoint path error
Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11716118
This commit is contained in:
@@ -38,7 +38,7 @@ def get_checkpoint_names(checkpoints_path,
|
||||
f'mp_rank_{tensor_rank:02d}')
|
||||
|
||||
if num_experts[0] > 0:
|
||||
model_name = common_path + '_model_states.pt'
|
||||
model_name = os.path.join(common_path, 'model_rng.pt')
|
||||
optim_name = os.path.join(
|
||||
checkpoints_path, path_load_tag,
|
||||
f'expp_rank_{expp_rank}_mp_rank_{tensor_rank:02d}_optim_states.pt')
|
||||
|
||||
@@ -1151,7 +1151,8 @@ class DistributedGPTMoE(TorchModel):
|
||||
attention_mask=None,
|
||||
position_ids=None,
|
||||
labels=None,
|
||||
prompt_length=None):
|
||||
prompt_length=None,
|
||||
is_pair=(False, )):
|
||||
|
||||
outputs, *other_losses = self.dist_model(
|
||||
tokens,
|
||||
|
||||
Reference in New Issue
Block a user