rein q2

2024-07-08 18:16:30 +08:00
parent 138a07eef6
commit 88ed6ea135
2 changed files with 15 additions and 32 deletions
--- a/reinforcement/analysis.py
+++ b/reinforcement/analysis.py
@ -23,9 +23,9 @@ def question2a():
    """
      Prefer the close exit (+1), risking the cliff (-10).
    """
-    answerDiscount = None
+    answerDiscount = 0.5
-    answerNoise = None
+    answerNoise = 0.01
-    answerLivingReward = None
+    answerLivingReward = -1
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
@ -33,9 +33,9 @@ def question2b():
    """
      Prefer the close exit (+1), but avoiding the cliff (-10).
    """
-    answerDiscount = None
+    answerDiscount = 0.5
-    answerNoise = None
+    answerNoise = 0.1
-    answerLivingReward = None
+    answerLivingReward = -1
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
@ -43,9 +43,9 @@ def question2c():
    """
      Prefer the distant exit (+10), risking the cliff (-10).
    """
-    answerDiscount = None
+    answerDiscount = 0.8
-    answerNoise = None
+    answerNoise = 0.01
-    answerLivingReward = None
+    answerLivingReward = 0
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
@ -53,9 +53,9 @@ def question2d():
    """
      Prefer the distant exit (+10), avoiding the cliff (-10).
    """
-    answerDiscount = None
+    answerDiscount = 0.8
-    answerNoise = None
+    answerNoise = 0.2
-    answerLivingReward = None
+    answerLivingReward = -0.1
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
@ -63,9 +63,9 @@ def question2e():
    """
      Avoid both exits and the cliff (so an episode should never terminate).
    """
-    answerDiscount = None
+    answerDiscount = 0.8
-    answerNoise = None
+    answerNoise = 0.5
-    answerLivingReward = None
+    answerLivingReward = 100
    return answerDiscount, answerNoise, answerLivingReward
    # If not possible, return 'NOT POSSIBLE'
--- a/reinforcement/valueIterationAgents.py
+++ b/reinforcement/valueIterationAgents.py
@ -65,23 +65,6 @@ class ValueIterationAgent(ValueEstimationAgent):
          value iteration, V_k+1(...) depends on V_k(...)'s.
        """
        "*** YOUR CODE HERE ***"
        # Write value iteration code here
        # Hints:
        # - After each iteration, store the new values in self.values
        # - When updating a value, use self.values[state] = <new value>
        # - You will need to copy the state values into a separate dictionary
        #   to avoid changing values before computing the update.
        # - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|)
        #   should be less than self.epsilon for all states s
        # - Make sure to use the discount factor self.discount
        # - Make sure to use the bellman equations to update the state values
        # - The number of iterations is given by self.iterations
        # - You may use the util.Counter() class
        # - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method
        # - You may also use the self.mdp.getReward(state, action, nextState) method
        # - You may also use the self.mdp.getPossibleActions(state) method
        # - You may also use the self.mdp.isTerminal(state) method
        for i in range(self.iterations):
            newValues = util.Counter()
            for state in self.mdp.getStates():