Added stage 4 for the L6 memory test

RWKV · Sep 16, 2023 · 7d1b247 · 7d1b247
1 parent cd0620b
commit 7d1b247
Show file tree

Hide file tree

Showing 2 changed files with 468 additions and 0 deletions.
diff --git a/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb b/notebook/experiment/rwkv-x-exp/v5-r3-memory/L6-D2048-E1e-1-ctx4k/stage4.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# RWKV v5\n",
+    "\n",
+    "Simple memory training for a small model\n",
+    "\n",
+    "**Note:** This project assumes you have the rwkv-infctx conda env setup"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Basic Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# First lets setup the various directories, and init the model\n",
+    "!ls ../../../../../\n",
+    "!mkdir -p ../../../../../model/\n",
+    "!mkdir -p ../../../../../datapath/\n",
+    "!mkdir -p ../../../../../checkpoint/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Additional dependencies for eval stuff\n",
+    "!pip3 install -q aiocsv aiofiles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DEEPSPEED_STRAT=\"deepspeed_stage_1\"\n",
+    "GPU_DEVICES=\"auto\"\n",
+    "ENABLE_WANDB=True\n",
+    "\n",
+    "# Layer count and embed dim to start with\n",
+    "LAYER_COUNT=6\n",
+    "EMBED_DIM=2048\n",
+    "\n",
+    "EMBED_SCALE=0.1\n",
+    "EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(\".\", \"_\")\n",
+    "\n",
+    "WANDB_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}\"\n",
+    "FILENAME_PREFIX=f\"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}\"\n",
+    "\n",
+    "print(\"DEEPSPEED_STRAT:\", DEEPSPEED_STRAT)\n",
+    "print(\"ENABLE_WANDB:\", ENABLE_WANDB)\n",
+    "print(\"GPU_DEVICES:\", GPU_DEVICES)\n",
+    "\n",
+    "if ENABLE_WANDB:\n",
+    "    WANDB_MODE=\"online\"\n",
+    "else:\n",
+    "    WANDB_MODE=\"disabled\"\n",
+    "\n",
+    "# Computing the notebook, and various paths\n",
+    "import os\n",
+    "NOTEBOOK_DIR=os.path.dirname(os.path.abspath(\"__file__\"))\n",
+    "CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, \"../\"))\n",
+    "PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, \"../../../../\"))\n",
+    "TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, \"./RWKV-v5/\"))\n",
+    "\n",
+    "# Get the notebook dir name\n",
+    "DIR_NAME=os.path.basename(NOTEBOOK_DIR)\n",
+    "\n",
+    "# Log names and dir\n",
+    "print(\"DIR_NAME:\", DIR_NAME)\n",
+    "print(\"NOTEBOOK_DIR:\", NOTEBOOK_DIR)\n",
+    "print(\"INFERENCE_DIR:\", INFERENCE_DIR)\n",
+    "print(\"TRAINER_DIR:\", TRAINER_DIR)\n",
+    "print(\"PROJECT_DIR:\", PROJECT_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download the model directly (stop gap till HF sync issues is resolved)\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    wget -nc \"https://huggingface.co/rwkv-x-dev/rwkv-x-playground/resolve/main/experiment/rwkv-x-exp/v5-r3-memory/{DIR_NAME}/{FILENAME_PREFIX}-mem-ctx-2k.pth\"\n",
+    "\n",
+    "!cd \"{TRAINER_DIR}\" && cd \"../model/\" && \\\n",
+    "    ls -alh ."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tune 5 : Ramping up the ctx size (4096), memory training\n",
+    "\n",
+    "- Tune 5: Mid ctx size (4096), Scaling up!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%script bash\n",
+    "\n",
+    "########################################\n",
+    "# Generate the required jsonl dataset\n",
+    "########################################\n",
+    "\n",
+    "# Go to config dir\n",
+    "cd \"../\"\n",
+    "\n",
+    "# Reset the dataset dir\n",
+    "mkdir -p ../dataset\n",
+    "rm -rf ../dataset/*.jsonl\n",
+    "\n",
+    "# Generate the various datasets\n",
+    "echo \"## Generating word reptition dataset ##\"\n",
+    "\n",
+    "#\n",
+    "# We reduce the training set for < 50 words - and shift the focus upwards\n",
+    "# (aka 50-100 token * 2 : ~100 - 250 token ctx len)\n",
+    "#\n",
+    "python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/word-2-count.jsonl 2 100 &\n",
+    "for i in {5..500..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 100 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 1 & \n",
+    "done\n",
+    "\n",
+    "#\n",
+    "# Ramping up the 50+ - 2100 words dataset\n",
+    "# \n",
+    "for i in {505..4000..5} \n",
+    "do\n",
+    "    python3 ../memory_script/gen_limited_prompt_completion_jsonl.py ../dataset/gen-word-$i-count.jsonl $i 200 & \n",
+    "    python3 ../memory_script/shuffle_limited_prompt_completion_jsonl.py ../dataset/shuffle-word-$i-count.jsonl $i 20 & \n",
+    "done\n",
+    "\n",
+    "wait\n",
+    "echo \"## Done ##\"\n",
+    "\n",
+    "ls -alh ../dataset/"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Start the finetune model training\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    export WANDB_MODE=\"{WANDB_MODE}\" && \\\n",
+    "    python3 lightning_trainer.py fit \\\n",
+    "        -c \"{CONFIG_DIR}/config-mem-template.yaml\" \\\n",
+    "        --trainer.logger.init_args.name=\"{WANDB_PREFIX} - Mem-Tune ctx-4k (train-ctx=4k, {DEEPSPEED_STRAT})\" \\\n",
+    "        --trainer.strategy=\"{DEEPSPEED_STRAT}\" \\\n",
+    "        --trainer.devices=\"{GPU_DEVICES}\"  \\\n",
+    "        --trainer.callbacks.init_args.dirpath=\"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/\" \\\n",
+    "        --model.lr_init=3e-4 \\\n",
+    "        --model.lr_final=1e-4 \\\n",
+    "        --data.max_token_size=4096 \\\n",
+    "        --model.ctx_len=4096 \\\n",
+    "        --model.bptt_learning_range=1 \\\n",
+    "        --model.load_model=\"../model/{FILENAME_PREFIX}-mem-ctx-2k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets export the model from the checkpoint\n",
+    "!cd \"{TRAINER_DIR}\" && \\\n",
+    "    python3 export_checkpoint.py \\\n",
+    "        \"../checkpoint/{FILENAME_PREFIX}-mem-ctx-4k/last.ckpt\" \\\n",
+    "        \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\" \"bf16\"\n",
+    "!cd \"{TRAINER_DIR}\" && ls -alh \"../model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Lets do a quick memory test\n",
+    "!python3 ../../memory_script/eval_v5_memory_guided.py \"{PROJECT_DIR}/model/{FILENAME_PREFIX}-mem-ctx-4k.pth\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}