enter reinforcement

2024-07-06 01:30:00 +08:00
parent f105ba0150
commit e3f8181056
116 changed files with 19698 additions and 0 deletions
--- a/reinforcement/analysis.py
+++ b/reinforcement/analysis.py
@ -0,0 +1,77 @@
+# analysis.py
+# -----------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+#
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+######################
+# ANALYSIS QUESTIONS #
+######################
+
+# Set the given parameters to obtain the specified policies through
+# value iteration.
+
+def question2a():
+    """
+      Prefer the close exit (+1), risking the cliff (-10).
+    """
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question2b():
+    """
+      Prefer the close exit (+1), but avoiding the cliff (-10).
+    """
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question2c():
+    """
+      Prefer the distant exit (+10), risking the cliff (-10).
+    """
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question2d():
+    """
+      Prefer the distant exit (+10), avoiding the cliff (-10).
+    """
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+def question2e():
+    """
+      Avoid both exits and the cliff (so an episode should never terminate).
+    """
+    answerDiscount = None
+    answerNoise = None
+    answerLivingReward = None
+    return answerDiscount, answerNoise, answerLivingReward
+    # If not possible, return 'NOT POSSIBLE'
+
+if __name__ == '__main__':
+    print('Answers to analysis questions:')
+    import analysis
+    for q in [q for q in dir(analysis) if q.startswith('question')]:
+        response = getattr(analysis, q)()
+        print('  Question %s:\t%s' % (q, str(response)))