This commit is contained in:
2024-07-08 18:50:23 +08:00
parent 88ed6ea135
commit 675529e9dc
5 changed files with 28 additions and 341 deletions

View File

@ -58,7 +58,10 @@ class QLearningAgent(ReinforcementAgent):
or the Q node value otherwise
"""
"*** YOUR CODE HERE ***"
if (state, action) in self.qVals:
return self.qVals[(state, action)]
else:
return 0.0
def computeValueFromQValues(self, state):
@ -69,6 +72,12 @@ class QLearningAgent(ReinforcementAgent):
terminal state, you should return a value of 0.0.
"""
"*** YOUR CODE HERE ***"
legalActions = self.getLegalActions(state)
if not legalActions:
return 0.0
maxQValue = max(self.getQValue(state, action) for action in legalActions)
return maxQValue
def computeActionFromQValues(self, state):
@ -78,6 +87,14 @@ class QLearningAgent(ReinforcementAgent):
you should return None.
"""
"*** YOUR CODE HERE ***"
legalActions = self.getLegalActions(state)
if not legalActions:
return None
maxQValue = self.computeValueFromQValues(state)
bestActions = [action for action in legalActions if self.getQValue(state, action) == maxQValue]
return random.choice(bestActions)
def getAction(self, state):
@ -94,6 +111,13 @@ class QLearningAgent(ReinforcementAgent):
legalActions = self.getLegalActions(state)
action = None
"*** YOUR CODE HERE ***"
if not legalActions:
return None
if util.flipCoin(self.epsilon):
return random.choice(legalActions)
else:
return self.computeActionFromQValues(state)
def update(self, state, action, nextState, reward: float):
@ -105,6 +129,9 @@ class QLearningAgent(ReinforcementAgent):
it will be called on your behalf
"""
"*** YOUR CODE HERE ***"
sample = reward + self.discount * self.computeValueFromQValues(nextState)
currentQValue = self.getQValue(state, action)
self.qVals[(state, action)] = (1 - self.alpha) * currentQValue + self.alpha * sample
def getPolicy(self, state):