-
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added stage 4 for the L6 memory test
- Loading branch information
1 parent
cd0620b
commit 7d1b247
Showing
2 changed files
with
468 additions
and
0 deletions.
There are no files selected for viewing
234 changes: 234 additions & 0 deletions
234
notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# RWKV v5\n", | ||
"\n", | ||
"Simple memory training for a small model\n", | ||
"\n", | ||
"**Note:** This project assumes you have the rwkv-infctx conda env setup" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Basic Setup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# First lets setup the various directories, and init the model\n", | ||
"!ls ../../../../../\n", | ||
"!mkdir -p ../../../../../model/\n", | ||
"!mkdir -p ../../../../../datapath/\n", | ||
"!mkdir -p ../../../../../checkpoint/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Additional dependencies for eval stuff\n", | ||
"!pip3 install -q aiocsv aiofiles" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", | ||
"GPU_DEVICES=\"auto\"\n", | ||
"ENABLE_WANDB=True\n", | ||
"\n", | ||
"# Layer count and embed dim to start with\n", | ||
"LAYER_COUNT=6\n", | ||
"EMBED_DIM=2048\n", | ||
"\n", | ||
"EMBED_SCALE=0.1\n", | ||
"EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", | ||
"\n", | ||
"WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", | ||
"FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", | ||
"\n", | ||
"print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", | ||
"print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", | ||
"print(\"GPU_DEVICES:\", GPU_DEVICES)\n", | ||
"\n", | ||
"if ENABLE_WANDB:\n", | ||
" WANDB_MODE=\"online\"\n", | ||
"else:\n", | ||
" WANDB_MODE=\"disabled\"\n", | ||
"\n", | ||
"# Computing the notebook, and various paths\n", | ||
"import os\n", | ||
"NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", | ||
"CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", | ||
"PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", | ||
"TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"\n", | ||
"# Get the notebook dir name\n", | ||
"DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", | ||
"\n", | ||
"# Log names and dir\n", | ||
"print(\"DIR_NAME:\", DIR_NAME)\n", | ||
"print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", | ||
"print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", | ||
"print(\"TRAINER_DIR:\", TRAINER_DIR)\n", | ||
"print(\"PROJECT_DIR:\", PROJECT_DIR)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Download the model directly (stop gap till HF sync issues is resolved)\n", | ||
"!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", | ||
" wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-2k.pth\"\n", | ||
"\n", | ||
"!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n", | ||
" ls -alh ." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Tune 5 : Ramping up the ctx size (4096), memory training\n", | ||
"\n", | ||
"- Tune 5: Mid ctx size (4096), Scaling up!" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%script bash\n", | ||
"\n", | ||
"########################################\n", | ||
"# Generate the required jsonl dataset\n", | ||
"########################################\n", | ||
"\n", | ||
"# Go to config dir\n", | ||
"cd \"../\"\n", | ||
"\n", | ||
"# Reset the dataset dir\n", | ||
"mkdir -p ../dataset\n", | ||
"rm -rf ../dataset/*.jsonl\n", | ||
"\n", | ||
"# Generate the various datasets\n", | ||
"echo \"## Generating word reptition dataset ##\"\n", | ||
"\n", | ||
"#\n", | ||
"# We reduce the training set for < 50 words - and shift the focus upwards\n", | ||
"# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n", | ||
"#\n", | ||
"python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 100 &\n", | ||
"for i in {5..500..5} \n", | ||
"do\n", | ||
" python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n", | ||
" python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n", | ||
"done\n", | ||
"\n", | ||
"#\n", | ||
"# Ramping up the 50+ - 2100 words dataset\n", | ||
"# \n", | ||
"for i in {505..4000..5} \n", | ||
"do\n", | ||
" python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 200 & \n", | ||
" python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n", | ||
"done\n", | ||
"\n", | ||
"wait\n", | ||
"echo \"## Done ##\"\n", | ||
"\n", | ||
"ls -alh ../dataset/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Start the finetune model training\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", | ||
" python3 lightning_trainer.py fit \\\n", | ||
" -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n", | ||
" --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-4k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", | ||
" --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", | ||
" --trainer.devices=\"{GPU_DEVICES}\" \\\n", | ||
" --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/\" \\\n", | ||
" --model.lr_init=3e-4 \\\n", | ||
" --model.lr_final=1e-4 \\\n", | ||
" --data.max_token_size=4096 \\\n", | ||
" --model.ctx_len=4096 \\\n", | ||
" --model.bptt_learning_range=1 \\\n", | ||
" --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-2k.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets export the model from the checkpoint\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 export_checkpoint.py \\\n", | ||
" \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/last.ckpt\" \\\n", | ||
" \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\" \"bf16\"\n", | ||
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets do a quick memory test\n", | ||
"!python3 ../../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-4k.pth\"" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "python3 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
Oops, something went wrong.