This commit is contained in:
2024-07-08 18:50:23 +08:00
parent 88ed6ea135
commit 675529e9dc
5 changed files with 28 additions and 341 deletions

View File

@ -58,7 +58,10 @@ class QLearningAgent(ReinforcementAgent):
or the Q node value otherwise or the Q node value otherwise
""" """
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
if (state, action) in self.qVals:
return self.qVals[(state, action)]
else:
return 0.0
def computeValueFromQValues(self, state): def computeValueFromQValues(self, state):
@ -69,6 +72,12 @@ class QLearningAgent(ReinforcementAgent):
terminal state, you should return a value of 0.0. terminal state, you should return a value of 0.0.
""" """
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
legalActions = self.getLegalActions(state)
if not legalActions:
return 0.0
maxQValue = max(self.getQValue(state, action) for action in legalActions)
return maxQValue
def computeActionFromQValues(self, state): def computeActionFromQValues(self, state):
@ -78,6 +87,14 @@ class QLearningAgent(ReinforcementAgent):
you should return None. you should return None.
""" """
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
legalActions = self.getLegalActions(state)
if not legalActions:
return None
maxQValue = self.computeValueFromQValues(state)
bestActions = [action for action in legalActions if self.getQValue(state, action) == maxQValue]
return random.choice(bestActions)
def getAction(self, state): def getAction(self, state):
@ -94,6 +111,13 @@ class QLearningAgent(ReinforcementAgent):
legalActions = self.getLegalActions(state) legalActions = self.getLegalActions(state)
action = None action = None
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
if not legalActions:
return None
if util.flipCoin(self.epsilon):
return random.choice(legalActions)
else:
return self.computeActionFromQValues(state)
def update(self, state, action, nextState, reward: float): def update(self, state, action, nextState, reward: float):
@ -105,6 +129,9 @@ class QLearningAgent(ReinforcementAgent):
it will be called on your behalf it will be called on your behalf
""" """
"*** YOUR CODE HERE ***" "*** YOUR CODE HERE ***"
sample = reward + self.discount * self.computeValueFromQValues(nextState)
currentQValue = self.getQValue(state, action)
self.qVals[(state, action)] = (1 - self.alpha) * currentQValue + self.alpha * sample
def getPolicy(self, state): def getPolicy(self, state):

View File

@ -1,70 +0,0 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""

View File

@ -1,70 +0,0 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000
illegal
0.0000
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal
0.0000
illegal
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal
illegal
illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal
0.0000
illegal
"""

View File

@ -1,110 +0,0 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_south: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_west: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_exit: """
__________ 0.0000 __________
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
0.0000 illegal 0.0000
__________ 0.0000 __________
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_east: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
__________ illegal __________
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
illegal illegal illegal
__________ illegal __________
"""
Correct solution:
q_values_k_0_action_north: """
__________ illegal __________
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
illegal 0.0000 illegal
__________ illegal __________
"""

View File

@ -1,90 +0,0 @@
Q-Values at iteration 0 for action 'south' are NOT correct. Student solution:
q_values_k_0_action_south: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_south: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'west' are NOT correct. Student solution:
q_values_k_0_action_west: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_west: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'exit' are NOT correct. Student solution:
q_values_k_0_action_exit: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_exit: """
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ illegal illegal
0.0000 illegal 0.0000 illegal illegal
0.0000 illegal __________ __________ illegal
0.0000 illegal illegal illegal illegal
"""
Q-Values at iteration 0 for action 'east' are NOT correct. Student solution:
q_values_k_0_action_east: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_east: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""
Q-Values at iteration 0 for action 'north' are NOT correct. Student solution:
q_values_k_0_action_north: """
illegal illegal illegal illegal illegal
illegal illegal __________ illegal illegal
illegal illegal illegal illegal illegal
illegal illegal __________ __________ illegal
illegal illegal illegal illegal illegal
"""
Correct solution:
q_values_k_0_action_north: """
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ 0.0000 0.0000
illegal 0.0000 illegal 0.0000 0.0000
illegal 0.0000 __________ __________ 0.0000
illegal 0.0000 0.0000 0.0000 0.0000
"""