enter reinforcement

2024-07-06 01:30:00 +08:00
parent f105ba0150
commit e3f8181056
116 changed files with 19698 additions and 0 deletions
--- a/reinforcement/model.py
+++ b/reinforcement/model.py
@ -0,0 +1,80 @@
+
+"""
+Functions you should use.
+Please avoid importing any other torch functions or modules.
+Your code will not pass if the gradescope autograder detects any changed imports
+"""
+
+from torch.nn import Module
+from torch.nn import  Linear
+from torch import tensor, double, optim
+from torch.nn.functional import relu, mse_loss
+
+
+
+class DeepQNetwork(Module):
+    """
+    A model that uses a Deep Q-value Network (DQN) to approximate Q(s,a) as part
+    of reinforcement learning.
+    """
+    def __init__(self, state_dim, action_dim):
+        self.num_actions = action_dim
+        self.state_size = state_dim
+        super(DeepQNetwork, self).__init__()
+        # Remember to set self.learning_rate, self.numTrainingGames,
+        # and self.batch_size!
+        "*** YOUR CODE HERE ***"
+        self.learning_rate = 0
+        self.numTrainingGames = 0
+        self.batch_size = 0
+
+        "**END CODE"""
+        self.double()
+
+
+    def get_loss(self, states, Q_target):
+        """
+        Returns the Squared Loss between Q values currently predicted 
+        by the network, and Q_target.
+        Inputs:
+            states: a (batch_size x state_dim) numpy array
+            Q_target: a (batch_size x num_actions) numpy array, or None
+        Output:
+            loss node between Q predictions and Q_target
+        """
+        "*** YOUR CODE HERE ***"
+
+
+    def forward(self, states):
+        """
+        Runs the DQN for a batch of states.
+        The DQN takes the state and returns the Q-values for all possible actions
+        that can be taken. That is, if there are two actions, the network takes
+        as input the state s and computes the vector [Q(s, a_1), Q(s, a_2)]
+        Inputs:
+            states: a (batch_size x state_dim) numpy array
+            Q_target: a (batch_size x num_actions) numpy array, or None
+        Output:
+            result: (batch_size x num_actions) numpy array of Q-value
+                scores, for each of the actions
+        """
+        "*** YOUR CODE HERE ***"
+
+    
+    def run(self, states):
+        return self.forward(states)
+
+    def gradient_update(self, states, Q_target):
+        """
+        Update your parameters by one gradient step with the .update(...) function.
+        You can look at the ML project for an idea of how to do this, but note that rather
+        than iterating through a dataset, you should only be applying a single gradient step
+        to the given datapoints.
+
+        Inputs:
+            states: a (batch_size x state_dim) numpy array
+            Q_target: a (batch_size x num_actions) numpy array, or None
+        Output:
+            None
+        """
+        "*** YOUR CODE HERE ***"