diff --git a/examples/pytorch/chatglm6b/finetune.py b/examples/pytorch/chatglm6b/finetune.py index 0e31ce28..5e7ff6a5 100644 --- a/examples/pytorch/chatglm6b/finetune.py +++ b/examples/pytorch/chatglm6b/finetune.py @@ -192,12 +192,12 @@ if args.dataset_json_file is None: args.train_dataset_name, subset_name=args.train_subset_name, split=args.train_split, - namespace=args.train_dataset_namespace) + namespace=args.train_dataset_namespace).to_hf_dataset() validation_dataset = MsDataset.load( args.val_dataset_name, subset_name=args.val_subset_name, split=args.val_split, - namespace=args.val_dataset_namespace) + namespace=args.val_dataset_namespace).to_hf_dataset() else: train_dataset, validation_dataset = build_dataset_from_file( args.dataset_json_file) @@ -364,14 +364,14 @@ def preprocess_function_train(examples): return model_inputs -train_dataset = train_dataset.to_hf_dataset().map( +train_dataset = train_dataset.map( preprocess_function_train, batched=True, num_proc=args.preprocessing_num_workers, desc='Running tokenizer on train dataset', ) -validation_dataset = validation_dataset.to_hf_dataset().map( +validation_dataset = validation_dataset.map( preprocess_function_eval, batched=True, num_proc=args.preprocessing_num_workers,