Rework

xl0 · Dec 11, 2023 · e37b39f · e37b39f
1 parent 7c2936a
commit e37b39f
Show file tree

Hide file tree

Showing 28 changed files with 2,329 additions and 2,045 deletions.
diff --git a/nbs/01_tensor.ipynb b/nbs/01_tensor.ipynb
diff --git a/nbs/01_tensor_helpers.ipynb b/nbs/01_tensor_helpers.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | hide\n",
+    "# | default_exp tensor_helpers\n",
+    "import nbdev\n",
+    "from nbdev.showdoc import *\n",
+    "\n",
+    "nbdev.nbdev_export()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tensor class\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "import numpy as np\n",
+    "import tidygrad.ops as ops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "\n",
+    "class Tensor:\n",
+    "    pass\n",
+    "\n",
+    "def mean(input: Tensor, name=None, axis=None, keepdims=False) -> Tensor:\n",
+    "    reduced = np.prod(input.data.shape)\n",
+    "    if isinstance(axis, int):\n",
+    "        axis = (axis, )\n",
+    "    if axis:\n",
+    "        reduced = np.prod([input.data.shape[i] for i in axis])\n",
+    "    return ops.Sum(input, name=name, axis=axis, keepdims=keepdims).out / reduced\n",
+    "\n",
+    "def std(input: Tensor, name=None, axis=None, keepdims=False, correction=1) -> Tensor:\n",
+    "    if isinstance(axis, int): axis = (axis, )\n",
+    "    v1 = input - input.mean(axis=axis, keepdims=True)\n",
+    "    var = (v1)**2\n",
+    "\n",
+    "    if axis is None: numel = np.prod(input.data.shape)\n",
+    "    else: numel = np.prod([input.data.shape[i] for i in axis])\n",
+    "    assert numel > correction, \"Cannot compute std of a single value\"\n",
+    "\n",
+    "    res = (var.sum(axis=axis, keepdims=keepdims) / (numel-correction)).pow(0.5)\n",
+    "    if name is not None: res.name = name\n",
+    "    return res\n",
+    "\n",
+    "def split(t: Tensor, n: int, axis: int):\n",
+    "    step = t.shape[axis] // n\n",
+    "    assert step * n == t.shape[axis], \"Can't split tensor evenly\"\n",
+    "\n",
+    "    chunks = []\n",
+    "\n",
+    "    key = [slice(None)] * len(t.shape)\n",
+    "    for i in range(n):\n",
+    "        start = i * step\n",
+    "        end = (i + 1) * step\n",
+    "        key[axis] = slice(start, end)\n",
+    "        chunks.append(t[tuple(key)])\n",
+    "\n",
+    "    return chunks"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nbs/02_func.ipynb b/nbs/02_func.ipynb
@@ -0,0 +1,228 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | hide\n",
+    "# | default_exp func\n",
+    "import nbdev\n",
+    "from nbdev.showdoc import *\n",
+    "\n",
+    "nbdev.nbdev_export()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Functions that operate on tensors\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | exporti\n",
+    "\n",
+    "import math\n",
+    "import numpy as np\n",
+    "\n",
+    "import tidygrad.ops as ops\n",
+    "from tidygrad import Tensor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def add(a: Tensor, b: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Add two tensors\"\"\"\n",
+    "    return ops.Add(a, b, name=name).out\n",
+    "\n",
+    "def sub(a: Tensor, b: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Subtract two tensors\"\"\"\n",
+    "    return ops.Sub(a, b, name=name).out\n",
+    "\n",
+    "def mul(a: Tensor, b: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Multiply two tensors\"\"\"\n",
+    "    return ops.Mul(a, b, name=name).out\n",
+    "\n",
+    "def div(a: Tensor, b: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Divide two tensors (a/b)\"\"\"\n",
+    "    return ops.Div(a, b, name=name).out\n",
+    "\n",
+    "def neg(a: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Negate a tensor (-a)\"\"\"\n",
+    "    return ops.Neg(a, name=name).out\n",
+    "\n",
+    "def pow(a: Tensor, power: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Raise a tensor to a power (a**power)\"\"\"\n",
+    "    return ops.Pow(a, power, name=name).out\n",
+    "\n",
+    "def log(a: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Take the natural logarithm of a tensor\"\"\"\n",
+    "    return ops.Log(a, name=name).out\n",
+    "\n",
+    "def exp(a: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Exponentiate a tensor\"\"\"\n",
+    "    return ops.Exp(a, name=name).out\n",
+    "\n",
+    "def logexp(a: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Exponentiate a tensor\"\"\"\n",
+    "    return ops.ExpLog(a, name=name).out\n",
+    "\n",
+    "def matmul(a: Tensor, b: Tensor, name=None) -> Tensor:\n",
+    "    \"\"\"Matrix multiplication of two tensors\"\"\"\n",
+    "    return ops.Matmul(a, b, name=name).out\n",
+    "\n",
+    "def sum(a: Tensor, name=None, axis=None, keepdims=False) -> Tensor:\n",
+    "    \"\"\"Sum-reduce a tensor along the given axis (int or tuple of ints)\"\"\"\n",
+    "    return ops.Sum(a, name=name, axis=axis, keepdims=keepdims).out\n",
+    "\n",
+    "def broadcast(a: Tensor, target_shape, name=None) -> Tensor:\n",
+    "    \"\"\"Broadcast a tensor to the given shape\"\"\"\n",
+    "    return ops.Broadcast(a, target_shape, name=name).out\n",
+    "\n",
+    "def slice(a: Tensor, key, name=None) -> Tensor:\n",
+    "    return ops.Slice(a, key, name=name).out\n",
+    "\n",
+    "def transpose(a: Tensor, dim0, dim1, name=None) -> Tensor:\n",
+    "    \"\"\"Transpose a tensor\"\"\"\n",
+    "    return ops.Transpose(a, dim0, dim1, name=name).out\n",
+    "\n",
+    "def dropout(x: Tensor, p=0.5, training=True) -> Tensor:\n",
+    "    \"\"\"Apply Dropout to a tensor\"\"\"\n",
+    "    return ops.Dropout(x, p_drop=p, training=training).out\n",
+    "\n",
+    "def embedding(input: Tensor, indices, name=None) -> Tensor:\n",
+    "    return ops.Embedding(input, indices, name=name).out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "def sigmoid(input: Tensor, name=None) -> Tensor:\n",
+    "    return ops.Sigmoid(input, name=name).out\n",
+    "\n",
+    "def tanh(input: Tensor, name=None) -> Tensor:\n",
+    "    return 2 * sigmoid(2 * input, name=name) - 1\n",
+    "\n",
+    "def relu(input: Tensor, name=None) -> Tensor:\n",
+    "    return ops.Relu(input, name=name).out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def sigmoid_gelu(x: Tensor):\n",
+    "    return x * sigmoid(1.702 * x)\n",
+    "\n",
+    "def gelu(input: Tensor):\n",
+    "    return (0.5 * input * (1.0 + tanh(math.sqrt(2.0 / math.pi) * (input + 0.044715 * input.pow(3)))))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def softmax(input: Tensor, name=None) -> Tensor:\n",
+    "    exp = input.exp()\n",
+    "    return exp.div(exp.sum(axis=-1, keepdims=True), name=name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "\n",
+    "def layer_norm(x: Tensor, w: Tensor, b: Tensor, eps=1e-5) -> Tensor:\n",
+    "    mu = x.mean(axis=-1, keepdims=True)\n",
+    "    sigma = x.std(axis=-1, keepdims=True, correction=0)\n",
+    "\n",
+    "    return ((x-mu) / (sigma+eps)) * w + b  #  tensor[10, 768] n=7680 (30Kb) x∈[-0.788, 0.579] μ=-0.005 σ=0.106"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "\n",
+    "def stack(tensors: list[Tensor], axis=0, name=None):\n",
+    "    return ops.Stack(tensors, axis=axis, name=name).out\n",
+    "\n",
+    "def concat(tensors: list[Tensor], axis=0, name=None):\n",
+    "    return ops.Concat(tensors, axis=axis, name=name).out\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "def BCE_loss(logits: Tensor, target: Tensor, reduction=\"mean\"):\n",
+    "    loss = logits - logits*target + ops.ExpLog(-logits).out\n",
+    "    if reduction == \"mean\":\n",
+    "        return loss.mean()\n",
+    "    if reduction == \"sum\":\n",
+    "        return loss.sum()\n",
+    "    assert 0, \"Invalid reduction\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "\n",
+    "def CrossEntropy_loss(logits: Tensor, target: Tensor, reduction=\"mean\"):\n",
+    "    if not isinstance(target, Tensor):\n",
+    "        target = Tensor(target)\n",
+    "    sm = softmax(logits)\n",
+    "    loss = -target * sm.log()\n",
+    "    if reduction == \"mean\":\n",
+    "        return loss.mean()\n",
+    "    if reduction == \"sum\":\n",
+    "        return loss.sum()\n",
+    "    assert 0, \"Invalid reduction\""
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}