diff --git a/dpo_training.py b/dpo_training.py index 6fb3fe6..1370cfb 100644 --- a/dpo_training.py +++ b/dpo_training.py @@ -233,13 +233,18 @@ def main(): prompt_template = get_conv_template(args.template_name) if tokenizer.eos_token_id is None: tokenizer.eos_token = prompt_template.stop_str # eos token is required - logger.info("Add eos token: {}".format(tokenizer.eos_token)) + tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token}) + logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}") + if tokenizer.bos_token_id is None: + tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token}) + tokenizer.bos_token_id = tokenizer.eos_token_id + logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}") if tokenizer.pad_token_id is None: if tokenizer.unk_token_id is not None: tokenizer.pad_token = tokenizer.unk_token else: tokenizer.pad_token = tokenizer.eos_token - logger.info("Add pad token: {}".format(tokenizer.pad_token)) + logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}") logger.debug(f"Tokenizer: {tokenizer}") # Get datasets diff --git a/ppo_training.py b/ppo_training.py index 23bd087..934cc56 100644 --- a/ppo_training.py +++ b/ppo_training.py @@ -239,13 +239,18 @@ def main(): prompt_template = get_conv_template(args.template_name) if tokenizer.eos_token_id is None: tokenizer.eos_token = prompt_template.stop_str # eos token is required - logger.info("Add eos token: {}".format(tokenizer.eos_token)) + tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token}) + logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}") + if tokenizer.bos_token_id is None: + tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token}) + tokenizer.bos_token_id = tokenizer.eos_token_id + logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}") if tokenizer.pad_token_id is None: if tokenizer.unk_token_id is not None: tokenizer.pad_token = tokenizer.unk_token else: tokenizer.pad_token = tokenizer.eos_token - logger.info("Add pad token: {}".format(tokenizer.pad_token)) + logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}") logger.debug(f"Tokenizer: {tokenizer}") # Load model diff --git a/reward_modeling.py b/reward_modeling.py index b50b581..fc7a212 100644 --- a/reward_modeling.py +++ b/reward_modeling.py @@ -420,13 +420,18 @@ def main(): prompt_template = get_conv_template(script_args.template_name) if tokenizer.eos_token_id is None: tokenizer.eos_token = prompt_template.stop_str # eos token is required - logger.info("Add eos token: {}".format(tokenizer.eos_token)) + tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token}) + logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}") + if tokenizer.bos_token_id is None: + tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token}) + tokenizer.bos_token_id = tokenizer.eos_token_id + logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}") if tokenizer.pad_token_id is None: if tokenizer.unk_token_id is not None: tokenizer.pad_token = tokenizer.unk_token else: tokenizer.pad_token = tokenizer.eos_token - logger.info("Add pad token: {}".format(tokenizer.pad_token)) + logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}") logger.debug(f"Tokenizer: {tokenizer}") if script_args.use_peft: diff --git a/run_dpo.sh b/run_dpo.sh index dbaf147..af8db6a 100644 --- a/run_dpo.sh +++ b/run_dpo.sh @@ -14,8 +14,8 @@ CUDA_VISIBLE_DEVICES=0,1 python dpo_training.py \ --max_steps 100 \ --eval_steps 20 \ --save_steps 50 \ - --max_source_length 1028 \ - --max_target_length 1028 \ + --max_source_length 1024 \ + --max_target_length 512 \ --output_dir outputs-dpo-qwen-v1 \ --target_modules all \ --lora_rank 8 \ diff --git a/run_orpo.sh b/run_orpo.sh index 5e9cf93..368876e 100644 --- a/run_orpo.sh +++ b/run_orpo.sh @@ -3,9 +3,8 @@ CUDA_VISIBLE_DEVICES=0,1 python orpo_training.py \ --model_name_or_path Qwen/Qwen1.5-0.5B-Chat \ --template_name qwen \ --train_file_dir ./data/reward \ - --validation_file_dir ./data/reward \ - --per_device_train_batch_size 4 \ - --per_device_eval_batch_size 1 \ + --per_device_train_batch_size 2 \ + --per_device_eval_batch_size 2 \ --do_train \ --do_eval \ --use_peft True \ @@ -14,8 +13,8 @@ CUDA_VISIBLE_DEVICES=0,1 python orpo_training.py \ --max_steps 100 \ --eval_steps 20 \ --save_steps 50 \ - --max_source_length 2028 \ - --max_target_length 1028 \ + --max_source_length 1024 \ + --max_target_length 512 \ --output_dir outputs-orpo-qwen-v1 \ --target_modules all \ --lora_rank 8 \ diff --git a/run_ppo.sh b/run_ppo.sh index d3eb3a0..92b794f 100644 --- a/run_ppo.sh +++ b/run_ppo.sh @@ -8,7 +8,7 @@ CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 ppo_training.py \ --train_file_dir ./data/finetune \ --validation_file_dir ./data/finetune \ --batch_size 8 \ - --max_source_length 2056 \ + --max_source_length 1024 \ --max_target_length 256 \ --max_train_samples 1000 \ --use_peft True \ diff --git a/run_rm.sh b/run_rm.sh index 1521903..db9760b 100644 --- a/run_rm.sh +++ b/run_rm.sh @@ -21,7 +21,7 @@ CUDA_VISIBLE_DEVICES=0,1 python reward_modeling.py \ --save_steps 500 \ --save_strategy steps \ --save_total_limit 3 \ - --max_source_length 2056 \ + --max_source_length 1024 \ --max_target_length 256 \ --output_dir outputs-rm-qwen-v1 \ --overwrite_output_dir \ diff --git a/run_sft.sh b/run_sft.sh index 498483a..4478646 100644 --- a/run_sft.sh +++ b/run_sft.sh @@ -11,7 +11,7 @@ CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 supervised_finetuning.py \ --use_peft True \ --max_train_samples 1000 \ --max_eval_samples 10 \ - --model_max_length 1024 \ + --model_max_length 4096 \ --num_train_epochs 1 \ --learning_rate 2e-5 \ --warmup_ratio 0.05 \