-
-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0f6aad4
commit 45f656a
Showing
1 changed file
with
202 additions
and
0 deletions.
There are no files selected for viewing
202 changes: 202 additions & 0 deletions
202
notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# RWKV v5\n", | ||
"\n", | ||
"Simple memory training for a small model\n", | ||
"\n", | ||
"**Note:** This project assumes you have the rwkv-infctx conda env setup" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Basic Setup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# First lets setup the various directories, and init the model\n", | ||
"!ls ../../../../../\n", | ||
"!mkdir -p ../../../../../model/\n", | ||
"!mkdir -p ../../../../../datapath/\n", | ||
"!mkdir -p ../../../../../checkpoint/" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Additional dependencies for eval stuff\n", | ||
"!pip3 install -q aiocsv aiofiles" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", | ||
"GPU_DEVICES=\"auto\"\n", | ||
"ENABLE_WANDB=True\n", | ||
"\n", | ||
"# Layer count and embed dim to start with\n", | ||
"LAYER_COUNT=6\n", | ||
"EMBED_DIM=2560\n", | ||
"\n", | ||
"EMBED_SCALE=0.1\n", | ||
"EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", | ||
"\n", | ||
"WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", | ||
"FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", | ||
"\n", | ||
"print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", | ||
"print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", | ||
"print(\"GPU_DEVICES:\", GPU_DEVICES)\n", | ||
"\n", | ||
"if ENABLE_WANDB:\n", | ||
" WANDB_MODE=\"online\"\n", | ||
"else:\n", | ||
" WANDB_MODE=\"disabled\"\n", | ||
"\n", | ||
"# Computing the notebook, and various paths\n", | ||
"import os\n", | ||
"NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", | ||
"CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", | ||
"PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", | ||
"TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", | ||
"\n", | ||
"# Get the notebook dir name\n", | ||
"DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", | ||
"\n", | ||
"# Log names and dir\n", | ||
"print(\"DIR_NAME:\", DIR_NAME)\n", | ||
"print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", | ||
"print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", | ||
"print(\"TRAINER_DIR:\", TRAINER_DIR)\n", | ||
"print(\"PROJECT_DIR:\", PROJECT_DIR)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Init the model\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 ./init_model.py \\\n", | ||
" --n_layer \"{LAYER_COUNT}\" --n_embd \"{EMBED_DIM}\" \\\n", | ||
" --emb-scale \"{EMBED_SCALE}\" \\\n", | ||
" --vocab_size neox --skip-if-exists \\\n", | ||
" \"../model/{FILENAME_PREFIX}-neox-init.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Enwiki Stage 1 : Foundation 4k model training" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets preload the requried dataset \n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 preload_datapath.py \"{CONFIG_DIR}/config-enwiki-4k.yaml\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Start the foundation model training\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", | ||
" python3 lightning_trainer.py fit \\\n", | ||
" -c \"{CONFIG_DIR}/config-enwiki-4k.yaml\" \\\n", | ||
" --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Foundation (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", | ||
" --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", | ||
" --trainer.devices=\"{GPU_DEVICES}\" \\\n", | ||
" --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/\" \\\n", | ||
" --model.load_model=\"../model/{FILENAME_PREFIX}-neox-init.pth\" \\\n", | ||
" --model.ctx_len=4096 \\\n", | ||
" --model.bptt_learning_range=1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets export the model from the checkpoint\n", | ||
"!cd \"{TRAINER_DIR}\" && \\\n", | ||
" python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/last.ckpt\" \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"bf16\"\n", | ||
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# # Lets do a quick dragon prompt validation\n", | ||
"!cd \"{INFERENCE_DIR}\" && \\\n", | ||
" python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"cuda fp32\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Lets do a quick memory test\n", | ||
"!export python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-enwiki-4k.pth\"" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |