-
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3e1c37c
commit 19c13bd
Showing
1 changed file
with
219 additions
and
0 deletions.
There are no files selected for viewing
219 changes: 219 additions & 0 deletions
219
notebook/experiment/rwkv-x-exp/multi-size-train/v5-L12-D2048-baseline.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# RWKV v5 multi-size training experiment\n", | ||
"\n", | ||
"**Note:** This project assumes you have the rwkv-infctx conda env setup" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Basic Setup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# First lets setup the various directories, and init the model\n", | ||
"!mkdir -p ../../../../model/\n", | ||
"!mkdir -p ../../../../datapath/\n", | ||
"!mkdir -p ../../../../checkpoint/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"DEEPSPEED_STRAT=\"deepspeed_stage_2_offload\"\n", | ||
"GPU_DEVICES=\"auto\"\n", | ||
"ENABLE_WANDB=True\n", | ||
"\n", | ||
"EMBED_SCALE=0.01\n", | ||
"EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", | ||
"\n", | ||
"LAYER_COUNT=12\n", | ||
"EMBED_SIZE=2048\n", | ||
"\n", | ||
"WANDB_PREFIX=f\"[Multi-size] v5-L{LAYER_COUNT}-D{EMBED_SIZE}-E{EMBED_SCALE}\"\n", | ||
"FILENAME_PREFIX=f\"v5-L{LAYER_COUNT}-D{EMBED_SIZE}-E{EMBED_SCALE_LABEL}\"\n", | ||
"\n", | ||
"print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", | ||
"print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", | ||
"print(\"GPU_DEVICES:\", GPU_DEVICES)\n", | ||
"\n", | ||
"if ENABLE_WANDB:\n", | ||
" WANDB_MODE=\"online\"\n", | ||
"else:\n", | ||
" WANDB_MODE=\"disabled\"\n", | ||
"\n", | ||
"# Computing the notebook, and various paths\n", | ||
"import os\n", | ||
"NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", | ||
"PROJECT_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../../../../\"))\n", | ||
"TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"\n", | ||
"print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", | ||
"print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", | ||
"print(\"TRAINER_DIR:\", TRAINER_DIR)\n", | ||
"print(\"PROJECT_DIR:\", PROJECT_DIR)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Init the model\n", | ||
"!cd \"{PROJECT_DIR}/model/\" && wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/multi-size-train/{FILENAME_PREFIX}-enwiki-4k-p1.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets preload the requried dataset s\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 preload_datapath.py \"{NOTEBOOK_DIR}/enwiki-4k-part2.yaml\"\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 preload_datapath.py \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Enwiki Stage 2 : Baseline training" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Start the foundation model training\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", | ||
" python3 lightning_trainer.py fit \\\n", | ||
" -c \"{NOTEBOOK_DIR}/enwiki-4k-part2.yaml\" \\\n", | ||
" --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Baseline Part 2 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", | ||
" --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", | ||
" --trainer.devices=\"{GPU_DEVICES}\" \\\n", | ||
" --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-baseline-p2/\" \\\n", | ||
" --model.load_model=\"../model/{FILENAME_PREFIX}-enwiki-4k-p1.pth\" \\\n", | ||
" --model.ctx_len=4096 \\\n", | ||
" --model.bptt_learning_range=1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets export the model from the checkpoint\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-baseline-p2/last.ckpt\" \"../model/{FILENAME_PREFIX}-baseline-p2.pth\" \"bf16\"\n", | ||
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-baseline-p2.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# # Lets do a quick dragon prompt validation\n", | ||
"!cd \"{INFERENCE_DIR}\" && \\\n", | ||
" python3 dragon_test.py \"../model/{FILENAME_PREFIX}-baseline-p2.pth\" \"cuda fp32\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Enwiki Stage 3 : Baseline training" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Start the foundation model training\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", | ||
" python3 lightning_trainer.py fit \\\n", | ||
" -c \"{NOTEBOOK_DIR}/enwiki-4k-part3.yaml\" \\\n", | ||
" --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Baseline Part 3 (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", | ||
" --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", | ||
" --trainer.devices=\"{GPU_DEVICES}\" \\\n", | ||
" --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-baseline-p3/\" \\\n", | ||
" --model.load_model=\"../model/{FILENAME_PREFIX}-baseline-p2.pth\" \\\n", | ||
" --model.ctx_len=4096 \\\n", | ||
" --model.bptt_learning_range=1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets export the model from the checkpoint\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-baseline-p3/last.ckpt\" \"../model/{FILENAME_PREFIX}-baseline-p3.pth\" \"bf16\"\n", | ||
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-baseline-p3.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# # Lets do a quick dragon prompt validation\n", | ||
"!cd \"{INFERENCE_DIR}\" && \\\n", | ||
" python3 dragon_test.py \"../model/{FILENAME_PREFIX}-baseline-p3.pth\" \"cuda fp32\"" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |