Skip to content

Commit

Permalink
enwiki train
Browse files Browse the repository at this point in the history
  • Loading branch information
PicoCreator committed Sep 13, 2023
1 parent 0f6aad4 commit 45f656a
Showing 1 changed file with 202 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# RWKV v5\n",
"\n",
"Simple memory training for a small model\n",
"\n",
"**Note:** This project assumes you have the rwkv-infctx conda env setup"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Basic Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# First lets setup the various directories, and init the model\n",
"!ls ../../../../../\n",
"!mkdir -p ../../../../../model/\n",
"!mkdir -p ../../../../../datapath/\n",
"!mkdir -p ../../../../../checkpoint/"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Additional dependencies for eval stuff\n",
"!pip3 install -q aiocsv aiofiles"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
"GPU_DEVICES=\"auto\"\n",
"ENABLE_WANDB=True\n",
"\n",
"# Layer count and embed dim to start with\n",
"LAYER_COUNT=6\n",
"EMBED_DIM=2560\n",
"\n",
"EMBED_SCALE=0.1\n",
"EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
"\n",
"WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
"FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
"\n",
"print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
"print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
"print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
"\n",
"if ENABLE_WANDB:\n",
" WANDB_MODE=\"online\"\n",
"else:\n",
" WANDB_MODE=\"disabled\"\n",
"\n",
"# Computing the notebook, and various paths\n",
"import os\n",
"NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
"CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
"PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
"TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
"INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
"\n",
"# Get the notebook dir name\n",
"DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
"\n",
"# Log names and dir\n",
"print(\"DIR_NAME:\", DIR_NAME)\n",
"print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
"print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
"print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
"print(\"PROJECT_DIR:\", PROJECT_DIR)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Init the model\n",
"!cd \"{TRAINER_DIR}\" && \\\n",
" python3 ./init_model.py \\\n",
" --n_layer \"{LAYER_COUNT}\" --n_embd \"{EMBED_DIM}\" \\\n",
" --emb-scale \"{EMBED_SCALE}\" \\\n",
" --vocab_size neox --skip-if-exists \\\n",
" \"../model/{FILENAME_PREFIX}-neox-init.pth\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Enwiki Stage 1 : Foundation 4k model training"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Lets preload the requried dataset \n",
"!cd \"{TRAINER_DIR}\" && \\\n",
" python3 preload_datapath.py \"{CONFIG_DIR}/config-enwiki-4k.yaml\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Start the foundation model training\n",
"!cd \"{TRAINER_DIR}\" && \\\n",
" export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
" python3 lightning_trainer.py fit \\\n",
" -c \"{CONFIG_DIR}/config-enwiki-4k.yaml\" \\\n",
" --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Foundation (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
" --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
" --trainer.devices=\"{GPU_DEVICES}\" \\\n",
" --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/\" \\\n",
" --model.load_model=\"../model/{FILENAME_PREFIX}-neox-init.pth\" \\\n",
" --model.ctx_len=4096 \\\n",
" --model.bptt_learning_range=1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Lets export the model from the checkpoint\n",
"!cd \"{TRAINER_DIR}\" && \\\n",
" python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/last.ckpt\" \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"bf16\"\n",
"!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# # Lets do a quick dragon prompt validation\n",
"!cd \"{INFERENCE_DIR}\" && \\\n",
" python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"cuda fp32\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Lets do a quick memory test\n",
"!export python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-enwiki-4k.pth\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit 45f656a

Please sign in to comment.