try to solve smallClassic

This commit is contained in:
2024-07-18 19:18:55 +08:00
parent 1bf4cc1efe
commit ceae34ea86
6 changed files with 58 additions and 13 deletions

View File

@ -249,6 +249,13 @@ class ReinforcementAgent(ValueEstimationAgent):
print('\tAverage Rewards over testing: %.2f' % testAvg)
print('\tAverage Rewards for last %d episodes: %.2f' % (
NUM_EPS_UPDATE,windowAvg))
if windowAvg>-220:
if not hasattr(self,'best_window_avg_score'):
self.best_window_avg_score=-300
if windowAvg>self.best_window_avg_score:
print("find an excellent policy, ready to save model")
self.save_model("para.best.bin")
self.best_window_avg_score=windowAvg
print('\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime))
self.lastWindowAccumRewards = 0.0
self.episodeStartTime = time.time()
@ -256,3 +263,5 @@ class ReinforcementAgent(ValueEstimationAgent):
if self.episodesSoFar == self.numTraining:
msg = 'Training Done (turning off epsilon and alpha)'
print('%s\n%s' % (msg,'-' * len(msg)))
import traceback
traceback.print_stack()