Skip to content

Commit

Permalink
update bos token.
Browse files Browse the repository at this point in the history
  • Loading branch information
shibing624 committed Apr 26, 2024
1 parent 0e2bca9 commit 39ba562
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 16 deletions.
9 changes: 7 additions & 2 deletions dpo_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,18 @@ def main():
prompt_template = get_conv_template(args.template_name)
if tokenizer.eos_token_id is None:
tokenizer.eos_token = prompt_template.stop_str # eos token is required
logger.info("Add eos token: {}".format(tokenizer.eos_token))
tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token})
logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}")
if tokenizer.bos_token_id is None:
tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token})
tokenizer.bos_token_id = tokenizer.eos_token_id
logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}")
if tokenizer.pad_token_id is None:
if tokenizer.unk_token_id is not None:
tokenizer.pad_token = tokenizer.unk_token
else:
tokenizer.pad_token = tokenizer.eos_token
logger.info("Add pad token: {}".format(tokenizer.pad_token))
logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}")
logger.debug(f"Tokenizer: {tokenizer}")

# Get datasets
Expand Down
9 changes: 7 additions & 2 deletions ppo_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,18 @@ def main():
prompt_template = get_conv_template(args.template_name)
if tokenizer.eos_token_id is None:
tokenizer.eos_token = prompt_template.stop_str # eos token is required
logger.info("Add eos token: {}".format(tokenizer.eos_token))
tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token})
logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}")
if tokenizer.bos_token_id is None:
tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token})
tokenizer.bos_token_id = tokenizer.eos_token_id
logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}")
if tokenizer.pad_token_id is None:
if tokenizer.unk_token_id is not None:
tokenizer.pad_token = tokenizer.unk_token
else:
tokenizer.pad_token = tokenizer.eos_token
logger.info("Add pad token: {}".format(tokenizer.pad_token))
logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}")
logger.debug(f"Tokenizer: {tokenizer}")

# Load model
Expand Down
9 changes: 7 additions & 2 deletions reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,13 +420,18 @@ def main():
prompt_template = get_conv_template(script_args.template_name)
if tokenizer.eos_token_id is None:
tokenizer.eos_token = prompt_template.stop_str # eos token is required
logger.info("Add eos token: {}".format(tokenizer.eos_token))
tokenizer.add_special_tokens({"eos_token": tokenizer.eos_token})
logger.info(f"Add eos_token: {tokenizer.eos_token}, eos_token_id: {tokenizer.eos_token_id}")
if tokenizer.bos_token_id is None:
tokenizer.add_special_tokens({"bos_token": tokenizer.eos_token})
tokenizer.bos_token_id = tokenizer.eos_token_id
logger.info(f"Add bos_token: {tokenizer.bos_token}, bos_token_id: {tokenizer.bos_token_id}")
if tokenizer.pad_token_id is None:
if tokenizer.unk_token_id is not None:
tokenizer.pad_token = tokenizer.unk_token
else:
tokenizer.pad_token = tokenizer.eos_token
logger.info("Add pad token: {}".format(tokenizer.pad_token))
logger.info(f"Add pad_token: {tokenizer.pad_token}, pad_token_id: {tokenizer.pad_token_id}")
logger.debug(f"Tokenizer: {tokenizer}")

if script_args.use_peft:
Expand Down
4 changes: 2 additions & 2 deletions run_dpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ CUDA_VISIBLE_DEVICES=0,1 python dpo_training.py \
--max_steps 100 \
--eval_steps 20 \
--save_steps 50 \
--max_source_length 1028 \
--max_target_length 1028 \
--max_source_length 1024 \
--max_target_length 512 \
--output_dir outputs-dpo-qwen-v1 \
--target_modules all \
--lora_rank 8 \
Expand Down
9 changes: 4 additions & 5 deletions run_orpo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@ CUDA_VISIBLE_DEVICES=0,1 python orpo_training.py \
--model_name_or_path Qwen/Qwen1.5-0.5B-Chat \
--template_name qwen \
--train_file_dir ./data/reward \
--validation_file_dir ./data/reward \
--per_device_train_batch_size 4 \
--per_device_eval_batch_size 1 \
--per_device_train_batch_size 2 \
--per_device_eval_batch_size 2 \
--do_train \
--do_eval \
--use_peft True \
Expand All @@ -14,8 +13,8 @@ CUDA_VISIBLE_DEVICES=0,1 python orpo_training.py \
--max_steps 100 \
--eval_steps 20 \
--save_steps 50 \
--max_source_length 2028 \
--max_target_length 1028 \
--max_source_length 1024 \
--max_target_length 512 \
--output_dir outputs-orpo-qwen-v1 \
--target_modules all \
--lora_rank 8 \
Expand Down
2 changes: 1 addition & 1 deletion run_ppo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 ppo_training.py \
--train_file_dir ./data/finetune \
--validation_file_dir ./data/finetune \
--batch_size 8 \
--max_source_length 2056 \
--max_source_length 1024 \
--max_target_length 256 \
--max_train_samples 1000 \
--use_peft True \
Expand Down
2 changes: 1 addition & 1 deletion run_rm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ CUDA_VISIBLE_DEVICES=0,1 python reward_modeling.py \
--save_steps 500 \
--save_strategy steps \
--save_total_limit 3 \
--max_source_length 2056 \
--max_source_length 1024 \
--max_target_length 256 \
--output_dir outputs-rm-qwen-v1 \
--overwrite_output_dir \
Expand Down
2 changes: 1 addition & 1 deletion run_sft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node 2 supervised_finetuning.py \
--use_peft True \
--max_train_samples 1000 \
--max_eval_samples 10 \
--model_max_length 1024 \
--model_max_length 4096 \
--num_train_epochs 1 \
--learning_rate 2e-5 \
--warmup_ratio 0.05 \
Expand Down

0 comments on commit 39ba562

Please sign in to comment.