October 2018
Intermediate to advanced
252 pages
6h 49m
English
In this section, we look at how the agent is trained for EPISODES, improving the reward and recalculating the epsilon:
if __name__ == "__main__": env = gym.make('CartPole-v1') output_file = open("cartpole_v1_output.csv","w+") state_size = env.observation_space.shape[0] action_size = env.action_space.n agent = DqnAgent(state_size, action_size) # agent.load("./save/cartpole-dqn.h5") done = False batch_size = 32 count = 0 for e in range(EPISODES): state = env.reset() state = np.reshape(state, [1, state_size]) for time in range(500): # env.render() action = agent.act(state) next_state, reward, done, _ = env.step(action) reward = reward if not done else -10 next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, ...