enter reinforcement

2024-07-06 01:30:00 +08:00
parent f105ba0150
commit e3f8181056
116 changed files with 19698 additions and 0 deletions
--- a/reinforcement/mdp.py
+++ b/reinforcement/mdp.py
@ -0,0 +1,67 @@
+# mdp.py
+# ------
+# Licensing Information:  You are free to use or extend these projects for
+# educational purposes provided that (1) you do not distribute or publish
+# solutions, (2) you retain this notice, and (3) you provide clear
+# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
+# 
+# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
+# The core projects and autograders were primarily created by John DeNero
+# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
+# Student side autograding was added by Brad Miller, Nick Hay, and
+# Pieter Abbeel (pabbeel@cs.berkeley.edu).
+
+
+import random
+
+class MarkovDecisionProcess:
+
+    def getStates(self):
+        """
+        Return a list of all states in the MDP.
+        Not generally possible for large MDPs.
+        """
+        abstract
+
+    def getStartState(self):
+        """
+        Return the start state of the MDP.
+        """
+        abstract
+
+    def getPossibleActions(self, state):
+        """
+        Return list of possible actions from 'state'.
+        """
+        abstract
+
+    def getTransitionStatesAndProbs(self, state, action):
+        """
+        Returns list of (nextState, prob) pairs
+        representing the states reachable
+        from 'state' by taking 'action' along
+        with their transition probabilities.
+
+        Note that in Q-Learning and reinforcment
+        learning in general, we do not know these
+        probabilities nor do we directly model them.
+        """
+        abstract
+
+    def getReward(self, state, action, nextState):
+        """
+        Get the reward for the state, action, nextState transition.
+
+        Not available in reinforcement learning.
+        """
+        abstract
+
+    def isTerminal(self, state):
+        """
+        Returns true if the current state is a terminal state.  By convention,
+        a terminal state has zero future rewards.  Sometimes the terminal state(s)
+        may have no possible actions.  It is also common to think of the terminal
+        state as having a self-loop action 'pass' with zero reward; the formulations
+        are equivalent.
+        """
+        abstract