rein q6
This commit is contained in:
@ -192,12 +192,21 @@ class ApproximateQAgent(PacmanQAgent):
|
|||||||
where * is the dotProduct operator
|
where * is the dotProduct operator
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
featureVector = self.featExtractor.getFeatures(state, action)
|
||||||
|
qValue = 0
|
||||||
|
for feature in featureVector:
|
||||||
|
qValue += self.weights[feature] * featureVector[feature]
|
||||||
|
return qValue
|
||||||
|
|
||||||
def update(self, state, action, nextState, reward: float):
|
def update(self, state, action, nextState, reward: float):
|
||||||
"""
|
"""
|
||||||
Should update your weights based on transition
|
Should update your weights based on transition
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
|
featureVector = self.featExtractor.getFeatures(state, action)
|
||||||
|
difference = reward + self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state, action)
|
||||||
|
for feature in featureVector:
|
||||||
|
self.weights[feature] += self.alpha * difference * featureVector[feature]
|
||||||
|
|
||||||
|
|
||||||
def final(self, state):
|
def final(self, state):
|
||||||
|
@ -1,71 +0,0 @@
|
|||||||
==================== Iteration 0 ====================
|
|
||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
|||||||
==================== Iteration 0 ====================
|
|
||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal
|
|
||||||
0.0000
|
|
||||||
illegal
|
|
||||||
"""
|
|
||||||
|
|
@ -1,111 +0,0 @@
|
|||||||
==================== Iteration 0 ====================
|
|
||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
__________ 0.0000 __________
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
0.0000 illegal 0.0000
|
|
||||||
__________ 0.0000 __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
illegal illegal illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
__________ illegal __________
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
illegal 0.0000 illegal
|
|
||||||
__________ illegal __________
|
|
||||||
"""
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
|||||||
==================== Iteration 0 ====================
|
|
||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ illegal illegal
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ __________ illegal
|
|
||||||
0.0000 illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
@ -1,91 +0,0 @@
|
|||||||
==================== Iteration 0 ====================
|
|
||||||
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_south: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_west: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_exit: """
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ illegal illegal
|
|
||||||
0.0000 illegal 0.0000 illegal illegal
|
|
||||||
0.0000 illegal __________ __________ illegal
|
|
||||||
0.0000 illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_east: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
||||||
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ illegal illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
illegal illegal __________ __________ illegal
|
|
||||||
illegal illegal illegal illegal illegal
|
|
||||||
"""
|
|
||||||
|
|
||||||
Correct solution:
|
|
||||||
q_values_k_0_action_north: """
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ 0.0000 0.0000
|
|
||||||
illegal 0.0000 illegal 0.0000 0.0000
|
|
||||||
illegal 0.0000 __________ __________ 0.0000
|
|
||||||
illegal 0.0000 0.0000 0.0000 0.0000
|
|
||||||
"""
|
|
||||||
|
|
Reference in New Issue
Block a user