diff --git a/reinforcement/qlearningAgents.py b/reinforcement/qlearningAgents.py index 73c1ae5..b72b86f 100644 --- a/reinforcement/qlearningAgents.py +++ b/reinforcement/qlearningAgents.py @@ -192,12 +192,21 @@ class ApproximateQAgent(PacmanQAgent): where * is the dotProduct operator """ "*** YOUR CODE HERE ***" + featureVector = self.featExtractor.getFeatures(state, action) + qValue = 0 + for feature in featureVector: + qValue += self.weights[feature] * featureVector[feature] + return qValue def update(self, state, action, nextState, reward: float): """ Should update your weights based on transition """ "*** YOUR CODE HERE ***" + featureVector = self.featExtractor.getFeatures(state, action) + difference = reward + self.discount * self.computeValueFromQValues(nextState) - self.getQValue(state, action) + for feature in featureVector: + self.weights[feature] += self.alpha * difference * featureVector[feature] def final(self, state): diff --git a/reinforcement/test_cases/q6/1-tinygrid.test_output b/reinforcement/test_cases/q6/1-tinygrid.test_output deleted file mode 100644 index 3a86929..0000000 --- a/reinforcement/test_cases/q6/1-tinygrid.test_output +++ /dev/null @@ -1,71 +0,0 @@ -==================== Iteration 0 ==================== -Q-Values at iteration 0 for action 'south' are NOT correct. Student solution: - q_values_k_0_action_south: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_south: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'west' are NOT correct. Student solution: - q_values_k_0_action_west: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_west: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution: - q_values_k_0_action_exit: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_exit: """ - 0.0000 - illegal - 0.0000 -""" - -Q-Values at iteration 0 for action 'east' are NOT correct. Student solution: - q_values_k_0_action_east: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_east: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'north' are NOT correct. Student solution: - q_values_k_0_action_north: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_north: """ - illegal - 0.0000 - illegal -""" - diff --git a/reinforcement/test_cases/q6/2-tinygrid-noisy.test_output b/reinforcement/test_cases/q6/2-tinygrid-noisy.test_output deleted file mode 100644 index 3a86929..0000000 --- a/reinforcement/test_cases/q6/2-tinygrid-noisy.test_output +++ /dev/null @@ -1,71 +0,0 @@ -==================== Iteration 0 ==================== -Q-Values at iteration 0 for action 'south' are NOT correct. Student solution: - q_values_k_0_action_south: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_south: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'west' are NOT correct. Student solution: - q_values_k_0_action_west: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_west: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution: - q_values_k_0_action_exit: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_exit: """ - 0.0000 - illegal - 0.0000 -""" - -Q-Values at iteration 0 for action 'east' are NOT correct. Student solution: - q_values_k_0_action_east: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_east: """ - illegal - 0.0000 - illegal -""" - -Q-Values at iteration 0 for action 'north' are NOT correct. Student solution: - q_values_k_0_action_north: """ - illegal - illegal - illegal -""" - - Correct solution: - q_values_k_0_action_north: """ - illegal - 0.0000 - illegal -""" - diff --git a/reinforcement/test_cases/q6/3-bridge.test_output b/reinforcement/test_cases/q6/3-bridge.test_output deleted file mode 100644 index 9b52b5a..0000000 --- a/reinforcement/test_cases/q6/3-bridge.test_output +++ /dev/null @@ -1,111 +0,0 @@ -==================== Iteration 0 ==================== -Q-Values at iteration 0 for action 'south' are NOT correct. Student solution: - q_values_k_0_action_south: """ - __________ illegal __________ - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - __________ illegal __________ -""" - - Correct solution: - q_values_k_0_action_south: """ - __________ illegal __________ - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - __________ illegal __________ -""" - -Q-Values at iteration 0 for action 'west' are NOT correct. Student solution: - q_values_k_0_action_west: """ - __________ illegal __________ - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - __________ illegal __________ -""" - - Correct solution: - q_values_k_0_action_west: """ - __________ illegal __________ - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - __________ illegal __________ -""" - -Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution: - q_values_k_0_action_exit: """ - __________ illegal __________ - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - __________ illegal __________ -""" - - Correct solution: - q_values_k_0_action_exit: """ - __________ 0.0000 __________ - 0.0000 illegal 0.0000 - 0.0000 illegal 0.0000 - 0.0000 illegal 0.0000 - 0.0000 illegal 0.0000 - 0.0000 illegal 0.0000 - __________ 0.0000 __________ -""" - -Q-Values at iteration 0 for action 'east' are NOT correct. Student solution: - q_values_k_0_action_east: """ - __________ illegal __________ - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - __________ illegal __________ -""" - - Correct solution: - q_values_k_0_action_east: """ - __________ illegal __________ - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - __________ illegal __________ -""" - -Q-Values at iteration 0 for action 'north' are NOT correct. Student solution: - q_values_k_0_action_north: """ - __________ illegal __________ - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - illegal illegal illegal - __________ illegal __________ -""" - - Correct solution: - q_values_k_0_action_north: """ - __________ illegal __________ - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - illegal 0.0000 illegal - __________ illegal __________ -""" - diff --git a/reinforcement/test_cases/q6/4-discountgrid.test_output b/reinforcement/test_cases/q6/4-discountgrid.test_output deleted file mode 100644 index 7ab24e6..0000000 --- a/reinforcement/test_cases/q6/4-discountgrid.test_output +++ /dev/null @@ -1,91 +0,0 @@ -==================== Iteration 0 ==================== -Q-Values at iteration 0 for action 'south' are NOT correct. Student solution: - q_values_k_0_action_south: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_south: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'west' are NOT correct. Student solution: - q_values_k_0_action_west: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_west: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution: - q_values_k_0_action_exit: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_exit: """ - 0.0000 illegal 0.0000 illegal illegal - 0.0000 illegal __________ illegal illegal - 0.0000 illegal 0.0000 illegal illegal - 0.0000 illegal __________ __________ illegal - 0.0000 illegal illegal illegal illegal -""" - -Q-Values at iteration 0 for action 'east' are NOT correct. Student solution: - q_values_k_0_action_east: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_east: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'north' are NOT correct. Student solution: - q_values_k_0_action_north: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_north: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - diff --git a/reinforcement/test_cases/q6/5-coord-extractor.test_output b/reinforcement/test_cases/q6/5-coord-extractor.test_output deleted file mode 100644 index 7ab24e6..0000000 --- a/reinforcement/test_cases/q6/5-coord-extractor.test_output +++ /dev/null @@ -1,91 +0,0 @@ -==================== Iteration 0 ==================== -Q-Values at iteration 0 for action 'south' are NOT correct. Student solution: - q_values_k_0_action_south: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_south: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'west' are NOT correct. Student solution: - q_values_k_0_action_west: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_west: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution: - q_values_k_0_action_exit: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_exit: """ - 0.0000 illegal 0.0000 illegal illegal - 0.0000 illegal __________ illegal illegal - 0.0000 illegal 0.0000 illegal illegal - 0.0000 illegal __________ __________ illegal - 0.0000 illegal illegal illegal illegal -""" - -Q-Values at iteration 0 for action 'east' are NOT correct. Student solution: - q_values_k_0_action_east: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_east: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" - -Q-Values at iteration 0 for action 'north' are NOT correct. Student solution: - q_values_k_0_action_north: """ - illegal illegal illegal illegal illegal - illegal illegal __________ illegal illegal - illegal illegal illegal illegal illegal - illegal illegal __________ __________ illegal - illegal illegal illegal illegal illegal -""" - - Correct solution: - q_values_k_0_action_north: """ - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ 0.0000 0.0000 - illegal 0.0000 illegal 0.0000 0.0000 - illegal 0.0000 __________ __________ 0.0000 - illegal 0.0000 0.0000 0.0000 0.0000 -""" -