rein q4

2024-07-08 18:50:23 +08:00
parent 88ed6ea135
commit 675529e9dc
5 changed files with 28 additions and 341 deletions
--- a/reinforcement/qlearningAgents.py
+++ b/reinforcement/qlearningAgents.py
@ -58,7 +58,10 @@ class QLearningAgent(ReinforcementAgent):
          or the Q node value otherwise
        """
        "*** YOUR CODE HERE ***"
-
+        if (state, action) in self.qVals:
+            return self.qVals[(state, action)]
+        else:
+            return 0.0


    def computeValueFromQValues(self, state):
@ -69,6 +72,12 @@ class QLearningAgent(ReinforcementAgent):
          terminal state, you should return a value of 0.0.
        """
        "*** YOUR CODE HERE ***"
+        legalActions = self.getLegalActions(state)
+        if not legalActions:
+            return 0.0
+        
+        maxQValue = max(self.getQValue(state, action) for action in legalActions)
+        return maxQValue
        

    def computeActionFromQValues(self, state):
@ -78,6 +87,14 @@ class QLearningAgent(ReinforcementAgent):
          you should return None.
        """
        "*** YOUR CODE HERE ***"
+        legalActions = self.getLegalActions(state)
+        if not legalActions:
+            return None
+        
+        maxQValue = self.computeValueFromQValues(state)
+        bestActions = [action for action in legalActions if self.getQValue(state, action) == maxQValue]
+        
+        return random.choice(bestActions)
        

    def getAction(self, state):
@ -94,6 +111,13 @@ class QLearningAgent(ReinforcementAgent):
        legalActions = self.getLegalActions(state)
        action = None
        "*** YOUR CODE HERE ***"
+        if not legalActions:
+            return None
+        
+        if util.flipCoin(self.epsilon):
+            return random.choice(legalActions)
+        else:
+            return self.computeActionFromQValues(state)


    def update(self, state, action, nextState, reward: float):
@ -105,6 +129,9 @@ class QLearningAgent(ReinforcementAgent):
          it will be called on your behalf
        """
        "*** YOUR CODE HERE ***"
+        sample = reward + self.discount * self.computeValueFromQValues(nextState)
+        currentQValue = self.getQValue(state, action)
+        self.qVals[(state, action)] = (1 - self.alpha) * currentQValue + self.alpha * sample
 

    def getPolicy(self, state):