rein q4
This commit is contained in:
@ -58,7 +58,10 @@ class QLearningAgent(ReinforcementAgent):
|
||||
or the Q node value otherwise
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
|
||||
if (state, action) in self.qVals:
|
||||
return self.qVals[(state, action)]
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
|
||||
def computeValueFromQValues(self, state):
|
||||
@ -69,6 +72,12 @@ class QLearningAgent(ReinforcementAgent):
|
||||
terminal state, you should return a value of 0.0.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
legalActions = self.getLegalActions(state)
|
||||
if not legalActions:
|
||||
return 0.0
|
||||
|
||||
maxQValue = max(self.getQValue(state, action) for action in legalActions)
|
||||
return maxQValue
|
||||
|
||||
|
||||
def computeActionFromQValues(self, state):
|
||||
@ -78,6 +87,14 @@ class QLearningAgent(ReinforcementAgent):
|
||||
you should return None.
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
legalActions = self.getLegalActions(state)
|
||||
if not legalActions:
|
||||
return None
|
||||
|
||||
maxQValue = self.computeValueFromQValues(state)
|
||||
bestActions = [action for action in legalActions if self.getQValue(state, action) == maxQValue]
|
||||
|
||||
return random.choice(bestActions)
|
||||
|
||||
|
||||
def getAction(self, state):
|
||||
@ -94,6 +111,13 @@ class QLearningAgent(ReinforcementAgent):
|
||||
legalActions = self.getLegalActions(state)
|
||||
action = None
|
||||
"*** YOUR CODE HERE ***"
|
||||
if not legalActions:
|
||||
return None
|
||||
|
||||
if util.flipCoin(self.epsilon):
|
||||
return random.choice(legalActions)
|
||||
else:
|
||||
return self.computeActionFromQValues(state)
|
||||
|
||||
|
||||
def update(self, state, action, nextState, reward: float):
|
||||
@ -105,6 +129,9 @@ class QLearningAgent(ReinforcementAgent):
|
||||
it will be called on your behalf
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
sample = reward + self.discount * self.computeValueFromQValues(nextState)
|
||||
currentQValue = self.getQValue(state, action)
|
||||
self.qVals[(state, action)] = (1 - self.alpha) * currentQValue + self.alpha * sample
|
||||
|
||||
|
||||
def getPolicy(self, state):
|
||||
|
Reference in New Issue
Block a user