This commit is contained in:
2024-07-08 18:16:30 +08:00
parent 138a07eef6
commit 88ed6ea135
2 changed files with 15 additions and 32 deletions

View File

@ -65,23 +65,6 @@ class ValueIterationAgent(ValueEstimationAgent):
value iteration, V_k+1(...) depends on V_k(...)'s.
"""
"*** YOUR CODE HERE ***"
# Write value iteration code here
# Hints:
# - After each iteration, store the new values in self.values
# - When updating a value, use self.values[state] = <new value>
# - You will need to copy the state values into a separate dictionary
# to avoid changing values before computing the update.
# - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|)
# should be less than self.epsilon for all states s
# - Make sure to use the discount factor self.discount
# - Make sure to use the bellman equations to update the state values
# - The number of iterations is given by self.iterations
# - You may use the util.Counter() class
# - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method
# - You may also use the self.mdp.getReward(state, action, nextState) method
# - You may also use the self.mdp.getPossibleActions(state) method
# - You may also use the self.mdp.isTerminal(state) method
for i in range(self.iterations):
newValues = util.Counter()
for state in self.mdp.getStates():