try to solve smallClassic

2024-07-18 19:18:55 +08:00
parent 1bf4cc1efe
commit ceae34ea86
6 changed files with 58 additions and 13 deletions
--- a/reinforcement/learningAgents.py
+++ b/reinforcement/learningAgents.py
@ -249,6 +249,13 @@ class ReinforcementAgent(ValueEstimationAgent):
                print('\tAverage Rewards over testing: %.2f' % testAvg)
            print('\tAverage Rewards for last %d episodes: %.2f'  % (
                    NUM_EPS_UPDATE,windowAvg))
+            if windowAvg>-220:
+                if not hasattr(self,'best_window_avg_score'):
+                    self.best_window_avg_score=-300
+                if windowAvg>self.best_window_avg_score:
+                    print("find an excellent policy, ready to save model")
+                    self.save_model("para.best.bin")
+                    self.best_window_avg_score=windowAvg
            print('\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime))
            self.lastWindowAccumRewards = 0.0
            self.episodeStartTime = time.time()
@ -256,3 +263,5 @@ class ReinforcementAgent(ValueEstimationAgent):
        if self.episodesSoFar == self.numTraining:
            msg = 'Training Done (turning off epsilon and alpha)'
            print('%s\n%s' % (msg,'-' * len(msg)))
+            import traceback
+            traceback.print_stack()