
278
|
第
11
章
def validate(self, e, episodes):
''' 验证DQL智能体
'''
state = self.valid_env.reset()
state = np.reshape(state, [1, self.valid_env.lags,
self.valid_env.n_features])
for _ in range(10000):
action = np.argmax(self.model.predict(state)[0, 0])
next_state, reward, done, info = self.valid_env.step(action)
state = np.reshape(next_state, [1, self.valid_env.lags,
self.valid_env.n_features])
if done:
treward = _ + 1
perf = self.valid_env.performance
self.vperformances.append(perf)
if e % int(episodes / 6) == 0:
templ = 71 * '='
templ += '\nepisode: {:2d}/{} | VALIDATION | '
templ += 'treward: {:4d} | perf: {:5.3f} | eps: {:.2f}\n'
templ += 71 * ...