Skip to content

Commit

Permalink
re-org
Browse files Browse the repository at this point in the history
  • Loading branch information
fwang2 committed Aug 27, 2023
1 parent cbe878d commit c1b985a
Show file tree
Hide file tree
Showing 11 changed files with 387 additions and 0 deletions.
File renamed without changes.
261 changes: 261 additions & 0 deletions dl-notebooks/Llama.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "bc091be0-2260-4eea-98ba-46daf4b10899",
"metadata": {},
"outputs": [],
"source": [
"##\n",
"## conda update --all\n",
"## conda install -c conda-forge ipywidgets\n",
"##\n",
"## source: https://github.com/bkitano/llama-from-scratch\n",
"##\n",
"\n",
"import torch\n",
"from torch import nn\n",
"from torch.nn import functional as F\n",
"import numpy as np\n",
"from matplotlib import pyplot as plt\n",
"import time\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1f51d6f7-17ff-4765-b30e-cbedb3dc24fc",
"metadata": {},
"outputs": [],
"source": [
"# read in text\n",
"lines = open('./input.txt', 'r').read()\n",
"vocab = sorted(list(set(lines)))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "87acf3dc-7610-42f5-9e6a-f262ba058e7a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['\\n', ' ', '!', '$', '&', \"'\", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']\n"
]
}
],
"source": [
"print(vocab)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "3ea85c0a-6732-4a6e-899b-4aedf93bd714",
"metadata": {},
"outputs": [],
"source": [
"itos = {i:ch for i, ch in enumerate(vocab)}\n",
"stoi = {ch:i for i, ch in enumerate(vocab)}"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "f559fdcd-3049-4406-bfc3-0ebc23a1d204",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{0: '\\n', 1: ' ', 2: '!', 3: '$', 4: '&', 5: \"'\", 6: ',', 7: '-', 8: '.', 9: '3', 10: ':', 11: ';', 12: '?', 13: 'A', 14: 'B', 15: 'C', 16: 'D', 17: 'E', 18: 'F', 19: 'G', 20: 'H', 21: 'I', 22: 'J', 23: 'K', 24: 'L', 25: 'M', 26: 'N', 27: 'O', 28: 'P', 29: 'Q', 30: 'R', 31: 'S', 32: 'T', 33: 'U', 34: 'V', 35: 'W', 36: 'X', 37: 'Y', 38: 'Z', 39: 'a', 40: 'b', 41: 'c', 42: 'd', 43: 'e', 44: 'f', 45: 'g', 46: 'h', 47: 'i', 48: 'j', 49: 'k', 50: 'l', 51: 'm', 52: 'n', 53: 'o', 54: 'p', 55: 'q', 56: 'r', 57: 's', 58: 't', 59: 'u', 60: 'v', 61: 'w', 62: 'x', 63: 'y', 64: 'z'}\n"
]
}
],
"source": [
"print(itos)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "fefead73-d6d4-4371-b6f1-921308c06391",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'\\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, \"'\": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}\n"
]
}
],
"source": [
"print(stoi)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "b6d823b5-7b01-43af-8a16-703ecc73bdb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[46, 43, 50, 50, 53]\n"
]
}
],
"source": [
"# simple tokenization by characters\n",
"def encode(s):\n",
" return [stoi[ch] for ch in s]\n",
"\n",
"def decode(l):\n",
" return ''.join([itos[i] for i in l])\n",
"\n",
"print(encode(\"hello\"))"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "083fb2e5-fe3c-4149-a541-f0bfb53eb4dc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hello\n"
]
}
],
"source": [
"print(decode(encode(\"hello\")))"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "55e5d77f-aed6-4db0-b99b-2b2eb8e56c91",
"metadata": {},
"outputs": [],
"source": [
"MASTER_CONFIG = {\n",
" \"vocab_size\": len(vocab),\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "fad0ede8-0152-4939-83f8-9f7807c6826a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([1115394])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dataset = torch.tensor(encode(lines), dtype=torch.int8)\n",
"dataset.shape"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "fb3e6512-78cc-4601-b40d-213f0d24138f",
"metadata": {},
"outputs": [],
"source": [
"def get_batches(data, split, batch_size, context_window, config=MASTER_CONFIG):\n",
" train = data[:int(.8 * len(data))]\n",
" val = data[int(.8 * len(data)): int(.9 * len(data))]\n",
" test = data[int(.9 * len(data)):]\n",
"\n",
" batch_data = train\n",
" if split == 'val':\n",
" batch_data = val\n",
"\n",
" if split == 'test':\n",
" batch_data = test\n",
"\n",
" # pick random starting points\n",
" ix = torch.randint(0, batch_data.size(0) - context_window - 1, (batch_size,))\n",
" x = torch.stack([batch_data[i:i+context_window] for i in ix]).long()\n",
" y = torch.stack([batch_data[i+1:i+context_window+1] for i in ix]).long()\n",
" return x, y"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "2b5a6586-2fa0-4019-a255-3fd0fd179d47",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'vocab_size': 65, 'batch_size': 32, 'context_window': 16}"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"MASTER_CONFIG.update({\n",
" 'batch_size': 32,\n",
" 'context_window': 16\n",
"})\n",
"MASTER_CONFIG"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a419246-6cc7-4cec-ab41-f42ab021f3cc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
115 changes: 115 additions & 0 deletions frontier/frontier_pytorch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@

# pytorch on Frontier

- [pytorch on Frontier](#pytorch-on-frontier)
- [prep Frontier modules](#prep-frontier-modules)
- [module output on Frontier](#module-output-on-frontier)
- [setup miniconda3](#setup-miniconda3)
- [build pytorch](#build-pytorch)
- [Build options: see `setup.py`](#build-options-see-setuppy)
- [regenerate CMAKE build files](#regenerate-cmake-build-files)
- [Kineto and roctracer.h problem](#kineto-and-roctracerh-problem)
- [DeepSpeed](#deepspeed)
- [GPTNeoX](#gptneox)
- [Verification](#verification)



## prep Frontier modules

```
module load PrgEnv-gnu
module load gcc/10.3.0
module load rocm/5.1.0
module load craype-x86-trento
export HCC_AMDGPU_TARGET=gfx90a
export PYTORCH_ROCM_ARCH=gfx90a
export ROCM_SOURCE_DIR=/opt/rocm-5.1.0
export CRAY_CPU_TARGET=x86_64 # just to remove warning noise
```
## module output on Frontier

```
Currently Loaded Modules:
1) libfabric/1.15.2.0 4) cray-dsmml/0.2.2 7) gcc/10.3.0 10) DefApps/default 13) craype-accel-amd-gfx90a
2) craype-network-ofi 5) cray-libsci/22.12.1.1 8) darshan-runtime/3.4.0 11) cray-mpich/8.1.23 14) craype-x86-trento
3) craype/2.7.19 6) PrgEnv-gnu/8.3.3 9) hsi/default 12) rocm/5.1.0
```

Note: One of the module between `craype-x86-trento` and `craype-accel-amd-gfx90a` fixed a linking problem. My guess is the former.

## setup miniconda3

```
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash ./Miniconda3-latest-Linux-x86_64.sh -b -p miniconda
conda create -n pytorch python=3.8
conda active pytorch
pip install -r requirements.txt
```

## build pytorch

```
git clone --recursive -b IFU-master-2022-11-22 https://github.com/ROCmSoftwarePlatform/pytorch
python tools/amd_build/build_amd.py
USE_KINETO=0 USE_ROCM=1 MAX_JOBS=4 python setup.py bdist_wheel 2>&1 | tee output
```

### Build options: see `setup.py`

```
USE_KINETO=0 # disable profiler, ask for roctracer.h
```
### regenerate CMAKE build files

This will trigger a rebuild for the changed configuration.

```
cd pytorch/build
rm CMakeCache.txt
```
To remove previous build as well:

```
python setup.py clean
```
### Kineto and roctracer.h problem

Kineto requires roctracer, which fails in rocm 5.1.0

```
if (NOT ROCM_SOURCE_DIR)
set(ROCM_SOURCE_DIR "$ENV{ROCM_SOURCE_DIR}")
message(INFO " ROCM_SOURCE_DIR = ${ROCM_SOURCE_DIR}")
endif()
```

For reason unknown at this point, the `ROCM_SOURCE_DIR` is still set as `/opt/rocm` instead of `/opt/rocm-5.1.0` even though the environment variable is set.

So the easy workaround is:

```
set(ROCM_SOURCE_DIR /opt/rocm-5.1.0)
```

## DeepSpeed

```
git clone https://github.com/microsoft/DeepSpeed
DS_BUILD_FUSED_LAMB=1 DS_BUILD_FUSED_ADAM=1 DS_BUILD_TRANSFORMER=1 DS_BUILD_STOCHASTIC_TRANSFORMER=1 DS_BUILD_UTILS=1 python setup.py bdist_wheel
python setup.py install
```

## GPTNeoX

```
pip install shortuuid # missed from
git clone https://github.com/EleutherAI/gpt-neox.git
pip install -r requirements/requirements.txt
pip install -r requirements/requirements-wandb.txt
pip install -r requirements/requirements-tensorboard.txt
```
## Verification


11 changes: 11 additions & 0 deletions pytorch-sbs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Study Notes


## Standardization

* Based on loss function, each feature has a loss curve
* Each feature, based on the loss curve, has **very different** optimal learning rates
* However, learning rate is global (apply to all features)
* therefore, it makes sense to _try_ to make all features loss curve similar - but not always possible.
* Standardization (`StandardScalar` in sklearn) is one such technique that could make loss curve(s) more uniform (?), thus converge better (reaching optimal point) and faster.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit c1b985a

Please sign in to comment.