[to #47630265] fix modelscope gpt moe finetune checkpoint path error

Link: https://code.alibaba-inc.com/Ali-MaaS/MaaS-lib/codereview/11716118
2025-12-25 04:29:22 +01:00 · 2023-02-20 14:59:50 +08:00
parent 1ae4782c83
commit ada2fcf48e
2 changed files with 3 additions and 2 deletions
--- a/modelscope/models/nlp/gpt_moe/checkpointing.py
+++ b/modelscope/models/nlp/gpt_moe/checkpointing.py
@@ -38,7 +38,7 @@ def get_checkpoint_names(checkpoints_path,
                               f'mp_rank_{tensor_rank:02d}')

    if num_experts[0] > 0:
-        model_name = common_path + '_model_states.pt'
+        model_name = os.path.join(common_path, 'model_rng.pt')
        optim_name = os.path.join(
            checkpoints_path, path_load_tag,
            f'expp_rank_{expp_rank}_mp_rank_{tensor_rank:02d}_optim_states.pt')
--- a/modelscope/models/nlp/gpt_moe/distributed_gpt_moe.py
+++ b/modelscope/models/nlp/gpt_moe/distributed_gpt_moe.py
@@ -1151,7 +1151,8 @@ class DistributedGPTMoE(TorchModel):
                attention_mask=None,
                position_ids=None,
                labels=None,
-                prompt_length=None):
+                prompt_length=None,
+                is_pair=(False, )):

        outputs, *other_losses = self.dist_model(
            tokens,