diff --git a/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb b/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb new file mode 100644 index 00000000..2e3eb40a --- /dev/null +++ b/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2560-E1e-1-ctx4k/part1.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RWKV v5\n", + "\n", + "Simple memory training for a small model\n", + "\n", + "**Note:** This project assumes you have the rwkv-infctx conda env setup" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Basic Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# First lets setup the various directories, and init the model\n", + "!ls ../../../../../\n", + "!mkdir -p ../../../../../model/\n", + "!mkdir -p ../../../../../datapath/\n", + "!mkdir -p ../../../../../checkpoint/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Additional dependencies for eval stuff\n", + "!pip3 install -q aiocsv aiofiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n", + "GPU_DEVICES=\"auto\"\n", + "ENABLE_WANDB=True\n", + "\n", + "# Layer count and embed dim to start with\n", + "LAYER_COUNT=6\n", + "EMBED_DIM=2560\n", + "\n", + "EMBED_SCALE=0.1\n", + "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n", + "\n", + "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n", + "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n", + "\n", + "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n", + "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n", + "print(\"GPU_DEVICES:\", GPU_DEVICES)\n", + "\n", + "if ENABLE_WANDB:\n", + " WANDB_MODE=\"online\"\n", + "else:\n", + " WANDB_MODE=\"disabled\"\n", + "\n", + "# Computing the notebook, and various paths\n", + "import os\n", + "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n", + "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n", + "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n", + "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n", + "\n", + "# Get the notebook dir name\n", + "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n", + "\n", + "# Log names and dir\n", + "print(\"DIR_NAME:\", DIR_NAME)\n", + "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n", + "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n", + "print(\"TRAINER_DIR:\", TRAINER_DIR)\n", + "print(\"PROJECT_DIR:\", PROJECT_DIR)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Init the model\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 ./init_model.py \\\n", + " --n_layer \"{LAYER_COUNT}\" --n_embd \"{EMBED_DIM}\" \\\n", + " --emb-scale \"{EMBED_SCALE}\" \\\n", + " --vocab_size neox --skip-if-exists \\\n", + " \"../model/{FILENAME_PREFIX}-neox-init.pth\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Enwiki Stage 1 : Foundation 4k model training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets preload the requried dataset \n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 preload_datapath.py \"{CONFIG_DIR}/config-enwiki-4k.yaml\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Start the foundation model training\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " export WANDB_MODE=\"{WANDB_MODE}\" && \\\n", + " python3 lightning_trainer.py fit \\\n", + " -c \"{CONFIG_DIR}/config-enwiki-4k.yaml\" \\\n", + " --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Enwiki-4k Foundation (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n", + " --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n", + " --trainer.devices=\"{GPU_DEVICES}\" \\\n", + " --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/\" \\\n", + " --model.load_model=\"../model/{FILENAME_PREFIX}-neox-init.pth\" \\\n", + " --model.ctx_len=4096 \\\n", + " --model.bptt_learning_range=1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets export the model from the checkpoint\n", + "!cd \"{TRAINER_DIR}\" && \\\n", + " python3 export_checkpoint.py \"../checkpoint/{FILENAME_PREFIX}-enwiki-4k/last.ckpt\" \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"bf16\"\n", + "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# # Lets do a quick dragon prompt validation\n", + "!cd \"{INFERENCE_DIR}\" && \\\n", + " python3 dragon_test.py \"../model/{FILENAME_PREFIX}-enwiki-4k.pth\" \"cuda fp32\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets do a quick memory test\n", + "!export python3 ../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-enwiki-4k.pth\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}