enter reinforcement
This commit is contained in:
80
reinforcement/model.py
Normal file
80
reinforcement/model.py
Normal file
@ -0,0 +1,80 @@
|
||||
|
||||
"""
|
||||
Functions you should use.
|
||||
Please avoid importing any other torch functions or modules.
|
||||
Your code will not pass if the gradescope autograder detects any changed imports
|
||||
"""
|
||||
|
||||
from torch.nn import Module
|
||||
from torch.nn import Linear
|
||||
from torch import tensor, double, optim
|
||||
from torch.nn.functional import relu, mse_loss
|
||||
|
||||
|
||||
|
||||
class DeepQNetwork(Module):
|
||||
"""
|
||||
A model that uses a Deep Q-value Network (DQN) to approximate Q(s,a) as part
|
||||
of reinforcement learning.
|
||||
"""
|
||||
def __init__(self, state_dim, action_dim):
|
||||
self.num_actions = action_dim
|
||||
self.state_size = state_dim
|
||||
super(DeepQNetwork, self).__init__()
|
||||
# Remember to set self.learning_rate, self.numTrainingGames,
|
||||
# and self.batch_size!
|
||||
"*** YOUR CODE HERE ***"
|
||||
self.learning_rate = 0
|
||||
self.numTrainingGames = 0
|
||||
self.batch_size = 0
|
||||
|
||||
"**END CODE"""
|
||||
self.double()
|
||||
|
||||
|
||||
def get_loss(self, states, Q_target):
|
||||
"""
|
||||
Returns the Squared Loss between Q values currently predicted
|
||||
by the network, and Q_target.
|
||||
Inputs:
|
||||
states: a (batch_size x state_dim) numpy array
|
||||
Q_target: a (batch_size x num_actions) numpy array, or None
|
||||
Output:
|
||||
loss node between Q predictions and Q_target
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
|
||||
|
||||
def forward(self, states):
|
||||
"""
|
||||
Runs the DQN for a batch of states.
|
||||
The DQN takes the state and returns the Q-values for all possible actions
|
||||
that can be taken. That is, if there are two actions, the network takes
|
||||
as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)]
|
||||
Inputs:
|
||||
states: a (batch_size x state_dim) numpy array
|
||||
Q_target: a (batch_size x num_actions) numpy array, or None
|
||||
Output:
|
||||
result: (batch_size x num_actions) numpy array of Q-value
|
||||
scores, for each of the actions
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
||||
|
||||
|
||||
def run(self, states):
|
||||
return self.forward(states)
|
||||
|
||||
def gradient_update(self, states, Q_target):
|
||||
"""
|
||||
Update your parameters by one gradient step with the .update(...) function.
|
||||
You can look at the ML project for an idea of how to do this, but note that rather
|
||||
than iterating through a dataset, you should only be applying a single gradient step
|
||||
to the given datapoints.
|
||||
|
||||
Inputs:
|
||||
states: a (batch_size x state_dim) numpy array
|
||||
Q_target: a (batch_size x num_actions) numpy array, or None
|
||||
Output:
|
||||
None
|
||||
"""
|
||||
"*** YOUR CODE HERE ***"
|
Reference in New Issue
Block a user