Skip to content

Commit

Permalink
DDPG updated
Browse files Browse the repository at this point in the history
  • Loading branch information
CUN-bjy committed Nov 21, 2020
1 parent 110544d commit c7af08c
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 17 deletions.
7 changes: 4 additions & 3 deletions agent/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,10 @@ def train(self, obs, critic, q_grads):
"""
with tf.GradientTape() as tape:
actions = self.network(obs)
actor_loss = -tf.reduce_mean(critic([obs,actions]))
# actor_grad = tape.gradient(self.network(obs), self.network.trainable_variables,-q_grads)
actor_grad = tape.gradient(actor_loss,self.network.trainable_variables)
# actor_loss = -tf.reduce_mean(critic([obs,actions]))
actor_grad = tape.gradient(self.network(obs), self.network.trainable_variables,-q_grads)
# tf.print("actor loss :",actor_loss)
# actor_grad = tape.gradient(actor_loss,self.network.trainable_variables)
self.optimizer.apply_gradients(zip(actor_grad,self.network.trainable_variables))

def target_update(self):
Expand Down
4 changes: 2 additions & 2 deletions agent/ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def update_networks(self, obs, acts, critic_target):

# get next action and Q-value Gradient
n_actions = self.actor.network.predict(obs)
q_grads = None#self.critic.Qgradient(obs, n_actions)
q_grads = self.critic.Qgradient(obs, n_actions)

# update actor
self.actor.train(obs,self.critic.network,q_grads)
Expand All @@ -78,7 +78,7 @@ def update_networks(self, obs, acts, critic_target):
self.critic.target_update()

def replay(self, replay_num_):
if self.buffer.size() <= self.batch_size: return
if self.with_per and (self.buffer.size() <= self.batch_size): return

for _ in range(replay_num_):
# sample from buffer
Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
gym==0.15.4
roboschool==1.0.48
tensorflow>=2.4.0
tensorflow==2.2.1
keras==2.4.3
scipy==1.4.1
tqdm
numpy==1.16.0
numpy==1.16.0
matplotlib
22 changes: 12 additions & 10 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,21 @@
import roboschool, gym
import numpy as np, time, os
from tqdm import tqdm
import matplotlib.pyplot as plt

import argparse

from agent.ddpg import ddpgAgent

NUM_EPISODES_ = 1000
NUM_EPISODES_ = 3000

def model_train(pretrained_):
# Create Environments
models = { 'cheetah':"RoboschoolHalfCheetah-v1",
'ant':'RoboschoolAnt-v1',
'pong':"RoboschoolPong-v1",
'walker':"RoboschoolWalker2d-v1",
'hopper':"RoboschoolHopper-v1",
'humanoid':"RoboschoolHumanoid-v1",
'humanoidflag':"RoboschoolHumanoidFlagrun-v1"}
'hopper':"RoboschoolHopper-v1"}

env = gym.make(models['cheetah'])
env = gym.make(models['hopper'])

# Create Agent model
agent = ddpgAgent(env, batch_size=500, w_per=False)
Expand All @@ -65,15 +62,19 @@ def model_train(pretrained_):
print("======================================")


logger = dict()
plt.ion()

try:
act_range = env.action_space.high
rewards = []
for epi in range(NUM_EPISODES_):
print("=========EPISODE # %d =========="%epi)
obs = env.reset()
actions, states, rewards, dones, new_states = [],[],[],[],[]

epi_reward = 0
for t in tqdm(range(steps)):
plt.pause(0.01)
# environment rendering on Graphics
env.render()

Expand All @@ -92,14 +93,15 @@ def model_train(pretrained_):
epi_reward = epi_reward + reward


if t%100 == 0: agent.replay(1)
if t%50 == 0: agent.replay(1)

# check if the episode is finished
if done or (t == steps-1):
# Replay
agent.replay(1)
print("Episode#%d, steps:%d, rewards:%f"%(epi,t,epi_reward))
if epi%30 == 0:
rewards.append(epi_reward)
if epi%50 == 0:
dir_path = "%s/weights"%os.getcwd()
if not os.path.isdir(dir_path):
os.mkdir(dir_path)
Expand Down

0 comments on commit c7af08c

Please sign in to comment.