rein q2
This commit is contained in:
@ -23,9 +23,9 @@ def question2a():
|
|||||||
"""
|
"""
|
||||||
Prefer the close exit (+1), risking the cliff (-10).
|
Prefer the close exit (+1), risking the cliff (-10).
|
||||||
"""
|
"""
|
||||||
answerDiscount = None
|
answerDiscount = 0.5
|
||||||
answerNoise = None
|
answerNoise = 0.01
|
||||||
answerLivingReward = None
|
answerLivingReward = -1
|
||||||
return answerDiscount, answerNoise, answerLivingReward
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
@ -33,9 +33,9 @@ def question2b():
|
|||||||
"""
|
"""
|
||||||
Prefer the close exit (+1), but avoiding the cliff (-10).
|
Prefer the close exit (+1), but avoiding the cliff (-10).
|
||||||
"""
|
"""
|
||||||
answerDiscount = None
|
answerDiscount = 0.5
|
||||||
answerNoise = None
|
answerNoise = 0.1
|
||||||
answerLivingReward = None
|
answerLivingReward = -1
|
||||||
return answerDiscount, answerNoise, answerLivingReward
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
@ -43,9 +43,9 @@ def question2c():
|
|||||||
"""
|
"""
|
||||||
Prefer the distant exit (+10), risking the cliff (-10).
|
Prefer the distant exit (+10), risking the cliff (-10).
|
||||||
"""
|
"""
|
||||||
answerDiscount = None
|
answerDiscount = 0.8
|
||||||
answerNoise = None
|
answerNoise = 0.01
|
||||||
answerLivingReward = None
|
answerLivingReward = 0
|
||||||
return answerDiscount, answerNoise, answerLivingReward
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
@ -53,9 +53,9 @@ def question2d():
|
|||||||
"""
|
"""
|
||||||
Prefer the distant exit (+10), avoiding the cliff (-10).
|
Prefer the distant exit (+10), avoiding the cliff (-10).
|
||||||
"""
|
"""
|
||||||
answerDiscount = None
|
answerDiscount = 0.8
|
||||||
answerNoise = None
|
answerNoise = 0.2
|
||||||
answerLivingReward = None
|
answerLivingReward = -0.1
|
||||||
return answerDiscount, answerNoise, answerLivingReward
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
@ -63,9 +63,9 @@ def question2e():
|
|||||||
"""
|
"""
|
||||||
Avoid both exits and the cliff (so an episode should never terminate).
|
Avoid both exits and the cliff (so an episode should never terminate).
|
||||||
"""
|
"""
|
||||||
answerDiscount = None
|
answerDiscount = 0.8
|
||||||
answerNoise = None
|
answerNoise = 0.5
|
||||||
answerLivingReward = None
|
answerLivingReward = 100
|
||||||
return answerDiscount, answerNoise, answerLivingReward
|
return answerDiscount, answerNoise, answerLivingReward
|
||||||
# If not possible, return 'NOT POSSIBLE'
|
# If not possible, return 'NOT POSSIBLE'
|
||||||
|
|
||||||
|
@ -65,23 +65,6 @@ class ValueIterationAgent(ValueEstimationAgent):
|
|||||||
value iteration, V_k+1(...) depends on V_k(...)'s.
|
value iteration, V_k+1(...) depends on V_k(...)'s.
|
||||||
"""
|
"""
|
||||||
"*** YOUR CODE HERE ***"
|
"*** YOUR CODE HERE ***"
|
||||||
# Write value iteration code here
|
|
||||||
# Hints:
|
|
||||||
# - After each iteration, store the new values in self.values
|
|
||||||
# - When updating a value, use self.values[state] = <new value>
|
|
||||||
# - You will need to copy the state values into a separate dictionary
|
|
||||||
# to avoid changing values before computing the update.
|
|
||||||
# - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|)
|
|
||||||
# should be less than self.epsilon for all states s
|
|
||||||
# - Make sure to use the discount factor self.discount
|
|
||||||
# - Make sure to use the bellman equations to update the state values
|
|
||||||
# - The number of iterations is given by self.iterations
|
|
||||||
# - You may use the util.Counter() class
|
|
||||||
# - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method
|
|
||||||
# - You may also use the self.mdp.getReward(state, action, nextState) method
|
|
||||||
# - You may also use the self.mdp.getPossibleActions(state) method
|
|
||||||
# - You may also use the self.mdp.isTerminal(state) method
|
|
||||||
|
|
||||||
for i in range(self.iterations):
|
for i in range(self.iterations):
|
||||||
newValues = util.Counter()
|
newValues = util.Counter()
|
||||||
for state in self.mdp.getStates():
|
for state in self.mdp.getStates():
|
||||||
|
Reference in New Issue
Block a user