rein q4
This commit is contained in:
@ -58,7 +58,10 @@ class QLearningAgent(ReinforcementAgent):
|
|||||||
or the Q node value otherwise
|
or the Q node value otherwise
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
if (state, action) in self.qVals:
|
||||||
|
return self.qVals[(state, action)]
|
||||||
|
else:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
def computeValueFromQValues(self, state):
|
def computeValueFromQValues(self, state):
|
||||||
@ -69,6 +72,12 @@ class QLearningAgent(ReinforcementAgent):
|
|||||||
terminal state, you should return a value of 0.0.
|
terminal state, you should return a value of 0.0.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
legalActions = self.getLegalActions(state)
|
||||||
|
if not legalActions:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
maxQValue = max(self.getQValue(state, action) for action in legalActions)
|
||||||
|
return maxQValue
|
||||||
|
|
||||||
|
|
||||||
def computeActionFromQValues(self, state):
|
def computeActionFromQValues(self, state):
|
||||||
@ -78,6 +87,14 @@ class QLearningAgent(ReinforcementAgent):
|
|||||||
you should return None.
|
you should return None.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
legalActions = self.getLegalActions(state)
|
||||||
|
if not legalActions:
|
||||||
|
return None
|
||||||
|
|
||||||
|
maxQValue = self.computeValueFromQValues(state)
|
||||||
|
bestActions = [action for action in legalActions if self.getQValue(state, action) == maxQValue]
|
||||||
|
|
||||||
|
return random.choice(bestActions)
|
||||||
|
|
||||||
|
|
||||||
def getAction(self, state):
|
def getAction(self, state):
|
||||||
@ -94,6 +111,13 @@ class QLearningAgent(ReinforcementAgent):
|
|||||||
legalActions = self.getLegalActions(state)
|
legalActions = self.getLegalActions(state)
|
||||||
action = None
|
action = None
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
if not legalActions:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if util.flipCoin(self.epsilon):
|
||||||
|
return random.choice(legalActions)
|
||||||
|
else:
|
||||||
|
return self.computeActionFromQValues(state)
|
||||||
|
|
||||||
|
|
||||||
def update(self, state, action, nextState, reward: float):
|
def update(self, state, action, nextState, reward: float):
|
||||||
@ -105,6 +129,9 @@ class QLearningAgent(ReinforcementAgent):
|
|||||||
it will be called on your behalf
|
it will be called on your behalf
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
sample = reward + self.discount * self.computeValueFromQValues(nextState)
|
||||||
|
currentQValue = self.getQValue(state, action)
|
||||||
|
self.qVals[(state, action)] = (1 - self.alpha) * currentQValue + self.alpha * sample
|
||||||
|
|
||||||
|
|
||||||
def getPolicy(self, state):
|
def getPolicy(self, state):
|
||||||
|
@ -1,70 +0,0 @@
|
|||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
|||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
@ -1,110 +0,0 @@
|
|||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
__________ 0.0000 __________
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
__________ 0.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
@ -1,90 +0,0 @@
|
|||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ illegal illegal
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ __________ illegal
|
|
||||||
0.0000 illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
Reference in New Issue
Block a user