From 3ee56a1b608bcc528ea5062690e238711c5c1f58 Mon Sep 17 00:00:00 2001 From: mmamedli Date: Mon, 11 Jan 2021 20:40:49 +0300 Subject: [PATCH] Delete gym_interface.ipynb --- week1_intro/gym_interface.ipynb | 218 -------------------------------- 1 file changed, 218 deletions(-) delete mode 100644 week1_intro/gym_interface.ipynb diff --git a/week1_intro/gym_interface.ipynb b/week1_intro/gym_interface.ipynb deleted file mode 100644 index 4ef90609f..000000000 --- a/week1_intro/gym_interface.ipynb +++ /dev/null @@ -1,218 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys, os\n", - "if 'google.colab' in sys.modules and not os.path.exists('.setup_complete'):\n", - " !wget -q https://raw.githubusercontent.com/yandexdataschool/Practical_RL/master/setup_colab.sh -O- | bash\n", - "\n", - " !wget -q https://raw.githubusercontent.com/yandexdataschool/Practical_RL/coursera/grading.py -O ../grading.py\n", - " !wget -q https://raw.githubusercontent.com/yandexdataschool/Practical_RL/coursera/week1_intro/submit.py\n", - "\n", - " !touch .setup_complete\n", - "\n", - "# This code creates a virtual display to draw game images on.\n", - "# It will have no effect if your machine has a monitor.\n", - "if type(os.environ.get(\"DISPLAY\")) is not str or len(os.environ.get(\"DISPLAY\")) == 0:\n", - " !bash ../xvfb start\n", - " os.environ['DISPLAY'] = ':1'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### OpenAI Gym\n", - "\n", - "We're gonna spend several next weeks learning algorithms that solve decision processes. We are then in need of some interesting decision problems to test our algorithms.\n", - "\n", - "That's where OpenAI Gym comes into play. It's a Python library that wraps many classical decision problems including robot control, videogames and board games.\n", - "\n", - "So here's how it works:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import gym\n", - "\n", - "env = gym.make(\"MountainCar-v0\")\n", - "env.reset()\n", - "\n", - "plt.imshow(env.render('rgb_array'))\n", - "print(\"Observation space:\", env.observation_space)\n", - "print(\"Action space:\", env.action_space)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note: if you're running this on your local machine, you'll see a window pop up with the image above. Don't close it, just alt-tab away." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Gym interface\n", - "\n", - "The three main methods of an environment are\n", - "* `reset()`: reset environment to the initial state, _return first observation_\n", - "* `render()`: show current environment state (a more colorful version :) )\n", - "* `step(a)`: commit action `a` and return `(new_observation, reward, is_done, info)`\n", - " * `new_observation`: an observation right after committing the action `a`\n", - " * `reward`: a number representing your reward for committing action `a`\n", - " * `is_done`: True if the MDP has just finished, False if still in progress\n", - " * `info`: some auxiliary stuff about what just happened. For now, ignore it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "obs0 = env.reset()\n", - "print(\"initial observation code:\", obs0)\n", - "\n", - "# Note: in MountainCar, observation is just two numbers: car position and velocity" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"taking action 2 (right)\")\n", - "new_obs, reward, is_done, _ = env.step(2)\n", - "\n", - "print(\"new observation code:\", new_obs)\n", - "print(\"reward:\", reward)\n", - "print(\"is game over?:\", is_done)\n", - "\n", - "# Note: as you can see, the car has moved to the right slightly (around 0.0005)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Play with it\n", - "\n", - "Below is the code that drives the car to the right. However, if you simply use the default policy, the car will not reach the flag at the far right due to gravity.\n", - "\n", - "__Your task__ is to fix it. Find a strategy that reaches the flag. \n", - "\n", - "You are not required to build any sophisticated algorithms for now, and you definitely don't need to know any reinforcement learning for this. Feel free to hard-code :)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from IPython import display\n", - "\n", - "# Create env manually to set time limit. Please don't change this.\n", - "TIME_LIMIT = 250\n", - "env = gym.wrappers.TimeLimit(\n", - " gym.envs.classic_control.MountainCarEnv(),\n", - " max_episode_steps=TIME_LIMIT + 1,\n", - ")\n", - "actions = {'left': 0, 'stop': 1, 'right': 2}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def policy(obs, t):\n", - " # Write the code for your policy here. You can use the observation\n", - " # (a tuple of position and velocity), the current time step, or both,\n", - " # if you want.\n", - " position, velocity = obs\n", - " \n", - " # This is an example policy. You can try running it, but it will not work.\n", - " # Your goal is to fix that. You don't need anything sophisticated here,\n", - " # and you can hard-code any policy that seems to work.\n", - " # Hint: think how you would make a swing go farther and faster.\n", - " return actions['right']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(4, 3))\n", - "display.clear_output(wait=True)\n", - "\n", - "obs = env.reset()\n", - "for t in range(TIME_LIMIT):\n", - " plt.gca().clear()\n", - " \n", - " action = policy(obs, t) # Call your policy\n", - " obs, reward, done, _ = env.step(action) # Pass the action chosen by the policy to the environment\n", - " \n", - " # We don't do anything with reward here because MountainCar is a very simple environment,\n", - " # and reward is a constant -1. Therefore, your goal is to end the episode as quickly as possible.\n", - "\n", - " # Draw game image on display.\n", - " plt.imshow(env.render('rgb_array'))\n", - " \n", - " display.display(plt.gcf())\n", - " display.clear_output(wait=True)\n", - "\n", - " if done:\n", - " print(\"Well done!\")\n", - " break\n", - "else:\n", - " print(\"Time limit exceeded. Try again.\")\n", - "\n", - "display.clear_output(wait=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from submit import submit_interface\n", - "submit_interface(policy, 'your.email@example.com', 'YourAssignmentToken')" - ] - } - ], - "metadata": { - "language_info": { - "name": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 1 -}