Skip to content

Commit

Permalink
gpt backprop passing
Browse files Browse the repository at this point in the history
  • Loading branch information
xl0 committed Dec 14, 2023
1 parent d1291dc commit 5eaa0cf
Show file tree
Hide file tree
Showing 22 changed files with 1,975 additions and 755 deletions.
2 changes: 1 addition & 1 deletion .style.yapf
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ split_before_bitwise_operator=True
split_before_closing_bracket=False
split_before_dot=True
split_complex_comprehension=True
blank_lines_around_top_level_definition=1
blank_lines_around_top_level_definition=1
13 changes: 7 additions & 6 deletions nbs/01_tensor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@
" def __init__(self, data, name=None, op=None, eps=1e-8, requires_grad=False):\n",
" global _num_tensors\n",
" _num_tensors += 1\n",
" self.data = np.asarray(data)\n",
" self.data = np.asarray(data, dtype=np.float64) # , dtype=np.float32\n",
"\n",
" self.grad = (np.zeros_like(self.data, dtype=np.float32) if requires_grad else None)\n",
" self.grad = (np.zeros_like(self.data, dtype=np.float64) if requires_grad else None)\n",
" self.eps = eps\n",
" self.op = op or ops.Load(name=name)\n",
" self.name = name or self.op.name\n",
Expand All @@ -86,8 +86,8 @@
" value_str = f\"v={lovely(self.data)}\"\n",
" grad_str = f\"∇={lovely(self.grad)}\" if self.grad is not None else \"\"\n",
" parents = (f\" parents=[\" + \",\".join([p.name for p in self.op.parents]) + \"]\" if self.op.parents else \"\")\n",
" # name=\"{self.name}\n",
" return f'Tensor{list(self.data.shape)}(\" op={type(self.op).__name__}{parents}):\\n {value_str}\\n {grad_str}'\n",
" \n",
" return f'Tensor{list(self.data.shape)}(name=\"{self.name}\" op={type(self.op).__name__}{parents}):\\n {value_str}\\n {grad_str}'\n",
"\n",
" def accum_grad(self, grad):\n",
" if not self.requires_grad:\n",
Expand Down Expand Up @@ -128,8 +128,9 @@
" def mmul(self, other, name=None):\n",
" return ops.Matmul(self, other, name=name).out\n",
"\n",
" def sum(self, name=None, axis=None, keepdims=False):\n",
" return ops.Sum(self, name=name, axis=axis, keepdims=keepdims).out\n",
" # XXX move name to the end of arg list\n",
" def sum(self, name=None, axis=None, keepdims=False, ):\n",
" return ops.Sum(self, axis=axis, keepdims=keepdims, name=name,).out\n",
"\n",
" def transpose(\n",
" self,\n",
Expand Down
2 changes: 1 addition & 1 deletion nbs/01_tensor_helpers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"def std(input: Tensor, name=None, axis=None, keepdims=False, correction=1) -> Tensor:\n",
" if isinstance(axis, int): axis = (axis, )\n",
" v1 = input - input.mean(axis=axis, keepdims=True)\n",
" var = (v1)**2\n",
" var = v1 ** 2\n",
"\n",
" if axis is None: numel = np.prod(input.data.shape)\n",
" else: numel = np.prod([input.data.shape[i] for i in axis])\n",
Expand Down
11 changes: 9 additions & 2 deletions nbs/02_func.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,13 @@
"def layer_norm(x: Tensor, w: Tensor, b: Tensor, eps=1e-5) -> Tensor:\n",
" mu = x.mean(axis=-1, keepdims=True)\n",
" sigma = x.std(axis=-1, keepdims=True, correction=0)\n",
" if sigma.data.any() == 0:\n",
" print(\"x\", x)\n",
" print(\"w\", w)\n",
" print(\"b\", b)\n",
" print(\"mu\", mu)\n",
" print(\"sigma\", sigma)\n",
" raise ValueError(\"sigma is zero\")\n",
"\n",
" return ((x-mu) / (sigma+eps)) * w + b # tensor[10, 768] n=7680 (30Kb) x∈[-0.788, 0.579] μ=-0.005 σ=0.106"
]
Expand Down Expand Up @@ -209,9 +216,9 @@
" sm = softmax(logits)\n",
" loss = -target * sm.log()\n",
" if reduction == \"mean\":\n",
" return loss.mean()\n",
" return loss.mean(axis=-1, keepdims=True)\n",
" if reduction == \"sum\":\n",
" return loss.sum()\n",
" return loss.sum(axis=-1, keepdims=True)\n",
" assert 0, \"Invalid reduction\""
]
}
Expand Down
Loading

0 comments on commit 5eaa0cf

Please sign in to comment.