From 88ed6ea1356e7f529770dbd3bdf4da1bc95bf972 Mon Sep 17 00:00:00 2001
From: ZhuangYumin <zhuangyumin@sjtu.edu.cn>
Date: Mon, 8 Jul 2024 18:16:30 +0800
Subject: [PATCH] rein q2

---
 reinforcement/analysis.py             | 30 +++++++++++++--------------
 reinforcement/valueIterationAgents.py | 17 ---------------
 2 files changed, 15 insertions(+), 32 deletions(-)
diff --git a/reinforcement/analysis.py b/reinforcement/analysis.py
index 0bd2c9f..f979534 100644
--- a/reinforcement/analysis.py
+++ b/reinforcement/analysis.py
@@ -23,9 +23,9 @@ def question2a():
     """
       Prefer the close exit (+1), risking the cliff (-10).
     """
-    answerDiscount = None
-    answerNoise = None
-    answerLivingReward = None
+    answerDiscount = 0.5
+    answerNoise = 0.01
+    answerLivingReward = -1
     return answerDiscount, answerNoise, answerLivingReward
     # If not possible, return 'NOT POSSIBLE'
 
@@ -33,9 +33,9 @@ def question2b():
     """
       Prefer the close exit (+1), but avoiding the cliff (-10).
     """
-    answerDiscount = None
-    answerNoise = None
-    answerLivingReward = None
+    answerDiscount = 0.5
+    answerNoise = 0.1
+    answerLivingReward = -1
     return answerDiscount, answerNoise, answerLivingReward
     # If not possible, return 'NOT POSSIBLE'
 
@@ -43,9 +43,9 @@ def question2c():
     """
       Prefer the distant exit (+10), risking the cliff (-10).
     """
-    answerDiscount = None
-    answerNoise = None
-    answerLivingReward = None
+    answerDiscount = 0.8
+    answerNoise = 0.01
+    answerLivingReward = 0
     return answerDiscount, answerNoise, answerLivingReward
     # If not possible, return 'NOT POSSIBLE'
 
@@ -53,9 +53,9 @@ def question2d():
     """
       Prefer the distant exit (+10), avoiding the cliff (-10).
     """
-    answerDiscount = None
-    answerNoise = None
-    answerLivingReward = None
+    answerDiscount = 0.8
+    answerNoise = 0.2
+    answerLivingReward = -0.1
     return answerDiscount, answerNoise, answerLivingReward
     # If not possible, return 'NOT POSSIBLE'
 
@@ -63,9 +63,9 @@ def question2e():
     """
       Avoid both exits and the cliff (so an episode should never terminate).
     """
-    answerDiscount = None
-    answerNoise = None
-    answerLivingReward = None
+    answerDiscount = 0.8
+    answerNoise = 0.5
+    answerLivingReward = 100
     return answerDiscount, answerNoise, answerLivingReward
     # If not possible, return 'NOT POSSIBLE'
 
diff --git a/reinforcement/valueIterationAgents.py b/reinforcement/valueIterationAgents.py
index c987461..338340c 100644
--- a/reinforcement/valueIterationAgents.py
+++ b/reinforcement/valueIterationAgents.py
@@ -65,23 +65,6 @@ class ValueIterationAgent(ValueEstimationAgent):
           value iteration, V_k+1(...) depends on V_k(...)'s.
         """
         "*** YOUR CODE HERE ***"
-        # Write value iteration code here
-        # Hints:
-        # - After each iteration, store the new values in self.values
-        # - When updating a value, use self.values[state] = <new value>
-        # - You will need to copy the state values into a separate dictionary
-        #   to avoid changing values before computing the update.
-        # - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|)
-        #   should be less than self.epsilon for all states s
-        # - Make sure to use the discount factor self.discount
-        # - Make sure to use the bellman equations to update the state values
-        # - The number of iterations is given by self.iterations
-        # - You may use the util.Counter() class
-        # - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method
-        # - You may also use the self.mdp.getReward(state, action, nextState) method
-        # - You may also use the self.mdp.getPossibleActions(state) method
-        # - You may also use the self.mdp.isTerminal(state) method
-
         for i in range(self.iterations):
             newValues = util.Counter()
             for state in self.mdp.getStates():