enter reinforcement
This commit is contained in:
67
reinforcement/mdp.py
Normal file
67
reinforcement/mdp.py
Normal file
@ -0,0 +1,67 @@
|
||||
# mdp.py
|
||||
# ------
|
||||
# Licensing Information: You are free to use or extend these projects for
|
||||
# educational purposes provided that (1) you do not distribute or publish
|
||||
# solutions, (2) you retain this notice, and (3) you provide clear
|
||||
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
||||
#
|
||||
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
||||
# The core projects and autograders were primarily created by John DeNero
|
||||
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
||||
# Student side autograding was added by Brad Miller, Nick Hay, and
|
||||
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
||||
|
||||
|
||||
import random
|
||||
|
||||
class MarkovDecisionProcess:
|
||||
|
||||
def getStates(self):
|
||||
"""
|
||||
Return a list of all states in the MDP.
|
||||
Not generally possible for large MDPs.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getStartState(self):
|
||||
"""
|
||||
Return the start state of the MDP.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getPossibleActions(self, state):
|
||||
"""
|
||||
Return list of possible actions from 'state'.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getTransitionStatesAndProbs(self, state, action):
|
||||
"""
|
||||
Returns list of (nextState, prob) pairs
|
||||
representing the states reachable
|
||||
from 'state' by taking 'action' along
|
||||
with their transition probabilities.
|
||||
|
||||
Note that in Q-Learning and reinforcment
|
||||
learning in general, we do not know these
|
||||
probabilities nor do we directly model them.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def getReward(self, state, action, nextState):
|
||||
"""
|
||||
Get the reward for the state, action, nextState transition.
|
||||
|
||||
Not available in reinforcement learning.
|
||||
"""
|
||||
abstract
|
||||
|
||||
def isTerminal(self, state):
|
||||
"""
|
||||
Returns true if the current state is a terminal state. By convention,
|
||||
a terminal state has zero future rewards. Sometimes the terminal state(s)
|
||||
may have no possible actions. It is also common to think of the terminal
|
||||
state as having a self-loop action 'pass' with zero reward; the formulations
|
||||
are equivalent.
|
||||
"""
|
||||
abstract
|
Reference in New Issue
Block a user