From 88ed6ea1356e7f529770dbd3bdf4da1bc95bf972 Mon Sep 17 00:00:00 2001 From: ZhuangYumin Date: Mon, 8 Jul 2024 18:16:30 +0800 Subject: [PATCH] rein q2 --- reinforcement/analysis.py | 30 +++++++++++++-------------- reinforcement/valueIterationAgents.py | 17 --------------- 2 files changed, 15 insertions(+), 32 deletions(-) diff --git a/reinforcement/analysis.py b/reinforcement/analysis.py index 0bd2c9f..f979534 100644 --- a/reinforcement/analysis.py +++ b/reinforcement/analysis.py @@ -23,9 +23,9 @@ def question2a(): """ Prefer the close exit (+1), risking the cliff (-10). """ - answerDiscount = None - answerNoise = None - answerLivingReward = None + answerDiscount = 0.5 + answerNoise = 0.01 + answerLivingReward = -1 return answerDiscount, answerNoise, answerLivingReward # If not possible, return 'NOT POSSIBLE' @@ -33,9 +33,9 @@ def question2b(): """ Prefer the close exit (+1), but avoiding the cliff (-10). """ - answerDiscount = None - answerNoise = None - answerLivingReward = None + answerDiscount = 0.5 + answerNoise = 0.1 + answerLivingReward = -1 return answerDiscount, answerNoise, answerLivingReward # If not possible, return 'NOT POSSIBLE' @@ -43,9 +43,9 @@ def question2c(): """ Prefer the distant exit (+10), risking the cliff (-10). """ - answerDiscount = None - answerNoise = None - answerLivingReward = None + answerDiscount = 0.8 + answerNoise = 0.01 + answerLivingReward = 0 return answerDiscount, answerNoise, answerLivingReward # If not possible, return 'NOT POSSIBLE' @@ -53,9 +53,9 @@ def question2d(): """ Prefer the distant exit (+10), avoiding the cliff (-10). """ - answerDiscount = None - answerNoise = None - answerLivingReward = None + answerDiscount = 0.8 + answerNoise = 0.2 + answerLivingReward = -0.1 return answerDiscount, answerNoise, answerLivingReward # If not possible, return 'NOT POSSIBLE' @@ -63,9 +63,9 @@ def question2e(): """ Avoid both exits and the cliff (so an episode should never terminate). """ - answerDiscount = None - answerNoise = None - answerLivingReward = None + answerDiscount = 0.8 + answerNoise = 0.5 + answerLivingReward = 100 return answerDiscount, answerNoise, answerLivingReward # If not possible, return 'NOT POSSIBLE' diff --git a/reinforcement/valueIterationAgents.py b/reinforcement/valueIterationAgents.py index c987461..338340c 100644 --- a/reinforcement/valueIterationAgents.py +++ b/reinforcement/valueIterationAgents.py @@ -65,23 +65,6 @@ class ValueIterationAgent(ValueEstimationAgent): value iteration, V_k+1(...) depends on V_k(...)'s. """ "*** YOUR CODE HERE ***" - # Write value iteration code here - # Hints: - # - After each iteration, store the new values in self.values - # - When updating a value, use self.values[state] = - # - You will need to copy the state values into a separate dictionary - # to avoid changing values before computing the update. - # - The difference between the new value and the old value (|V_k+1(s) - V_k(s)|) - # should be less than self.epsilon for all states s - # - Make sure to use the discount factor self.discount - # - Make sure to use the bellman equations to update the state values - # - The number of iterations is given by self.iterations - # - You may use the util.Counter() class - # - You may also use the self.mdp.getTransitionStatesAndProbs(state, action) method - # - You may also use the self.mdp.getReward(state, action, nextState) method - # - You may also use the self.mdp.getPossibleActions(state) method - # - You may also use the self.mdp.isTerminal(state) method - for i in range(self.iterations): newValues = util.Counter() for state in self.mdp.getStates():