diff --git a/tutorials/W3D4_ReinforcementLearning/W3D4_Tutorial3.ipynb b/tutorials/W3D4_ReinforcementLearning/W3D4_Tutorial3.ipynb index ed042cc67a..231b8a8425 100644 --- a/tutorials/W3D4_ReinforcementLearning/W3D4_Tutorial3.ipynb +++ b/tutorials/W3D4_ReinforcementLearning/W3D4_Tutorial3.ipynb @@ -61,12 +61,12 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "WBY85G-tPoUy", - "outputId": "a82a8b42-9062-42d6-ae64-9e4ee35c0ee6", "colab": { "base_uri": "https://localhost:8080/", "height": 519 - } + }, + "id": "WBY85G-tPoUy", + "outputId": "a82a8b42-9062-42d6-ae64-9e4ee35c0ee6" }, "outputs": [ { @@ -125,11 +125,11 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "BapijHbdPoUz", - "outputId": "0948c73b-e899-46d8-c796-e94b99f2ed50", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "BapijHbdPoUz", + "outputId": "0948c73b-e899-46d8-c796-e94b99f2ed50" }, "outputs": [ { @@ -333,8 +333,6 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "zNzx_ovmPoU1", - "outputId": "ae7374d6-cdbe-44de-e79e-cf1475fb3230", "colab": { "base_uri": "https://localhost:8080/", "height": 581, @@ -346,7 +344,9 @@ "2bd6c36e111d4e4ab5da9472adcef709", "ad2363c8f35b4ae793d0e8abe5bcbd94" ] - } + }, + "id": "zNzx_ovmPoU1", + "outputId": "ae7374d6-cdbe-44de-e79e-cf1475fb3230" }, "outputs": [ { @@ -418,8 +418,6 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "KroZAJ7KPoU2", - "outputId": "191a5def-3ecb-421a-b7ff-1ceb284410d4", "colab": { "base_uri": "https://localhost:8080/", "height": 81, @@ -452,7 +450,9 @@ "8a8a299ba9ab4d579a75706854abae04", "a63344b795f24695a8712f6634695ee0" ] - } + }, + "id": "KroZAJ7KPoU2", + "outputId": "191a5def-3ecb-421a-b7ff-1ceb284410d4" }, "outputs": [ { @@ -490,7 +490,7 @@ "\n", "For our discussion we will be looking at the classic Cliff World, or Cliff Walker, environment. This is a 4x10 grid with a starting position in the lower-left and the goal position in the lower-right. Every tile between these two is the \"cliff\", and should the agent enter the cliff, they will receive a -100 reward and be sent back to the starting position. Every tile other than the cliff produces a -1 reward when entered. The goal tile ends the episode after taking any action from it.\n", "\n", - "\"CliffWorld\"\n", + "\"CliffWorld\"\n", "\n", "Given these conditions, the maximum achievable reward is -11 (1 up, 9 right, 1 down). Using negative rewards is a common technique to encourage the agent to move and seek out the goal state as fast as possible." ] @@ -756,12 +756,12 @@ "execution_count": 11, "metadata": { "execution": {}, - "id": "f6j2YOPJPoU3", - "outputId": "990d76ac-ae37-4d68-a740-eae4595343d4", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - } + }, + "id": "f6j2YOPJPoU3", + "outputId": "990d76ac-ae37-4d68-a740-eae4595343d4" }, "outputs": [ { @@ -844,8 +844,6 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "kCanClIhPoU3", - "outputId": "522f884b-4994-455a-d7cc-a09048c6f5a0", "colab": { "base_uri": "https://localhost:8080/", "height": 81, @@ -878,7 +876,9 @@ "0d5b103794d24ab7a391c2125d147f85", "f1c38826e56f4ecebec2b5731f49a906" ] - } + }, + "id": "kCanClIhPoU3", + "outputId": "522f884b-4994-455a-d7cc-a09048c6f5a0" }, "outputs": [ { @@ -1049,12 +1049,12 @@ "execution_count": 14, "metadata": { "execution": {}, - "id": "OioHjCPCPoU4", - "outputId": "c4f62d4e-d3bc-4108-fec0-8ea92b41a921", "colab": { "base_uri": "https://localhost:8080/", "height": 1000 - } + }, + "id": "OioHjCPCPoU4", + "outputId": "c4f62d4e-d3bc-4108-fec0-8ea92b41a921" }, "outputs": [ { @@ -1142,8 +1142,6 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "jmBTJQaDPoU4", - "outputId": "f048479a-4cf6-44cb-d6c1-c3c2879ad7d1", "colab": { "base_uri": "https://localhost:8080/", "height": 81, @@ -1176,7 +1174,9 @@ "4796bc1c63134c1681a30369839265db", "3529206a31b54dcaba8eb84189ec5f6b" ] - } + }, + "id": "jmBTJQaDPoU4", + "outputId": "f048479a-4cf6-44cb-d6c1-c3c2879ad7d1" }, "outputs": [ { @@ -1230,12 +1230,12 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "hquQlEAnPoU5", - "outputId": "4d6bb3a7-94b7-4e7e-ad72-c1f487d3ef08", "colab": { "base_uri": "https://localhost:8080/", "height": 591 - } + }, + "id": "hquQlEAnPoU5", + "outputId": "4d6bb3a7-94b7-4e7e-ad72-c1f487d3ef08" }, "outputs": [ { @@ -1303,12 +1303,12 @@ "metadata": { "cellView": "form", "execution": {}, - "id": "jcXbRqzkPoU5", - "outputId": "9734efec-1d39-4f32-e858-b099f54a4bc6", "colab": { "base_uri": "https://localhost:8080/", "height": 592 - } + }, + "id": "jcXbRqzkPoU5", + "outputId": "9734efec-1d39-4f32-e858-b099f54a4bc6" }, "outputs": [ {