This commit is contained in:
2024-07-08 23:42:48 +08:00
parent a825670fda
commit 1bf4cc1efe
2 changed files with 37 additions and 8 deletions

View File

@ -9,7 +9,7 @@ from torch.nn import Module
from torch.nn import Linear
from torch import tensor, double, optim
from torch.nn.functional import relu, mse_loss
import torch
class DeepQNetwork(Module):
@ -24,9 +24,23 @@ class DeepQNetwork(Module):
# Remember to set self.learning_rate, self.numTrainingGames,
# and self.batch_size!
"*** YOUR CODE HERE ***"
self.learning_rate = 0
self.numTrainingGames = 0
self.batch_size = 0
# Initialize layers
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
layer1_size=300
layer2_size=300
# layer3_size=500
self.fc1 = Linear(state_dim, layer1_size).to(self.device)
self.fc2 = Linear(layer1_size, layer2_size).to(self.device)
# self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
self.fc_out= Linear(layer2_size, action_dim).to(self.device)
# Set learning parameters
self.learning_rate = 0.1
self.numTrainingGames = 3800
self.batch_size = 128
# Optimizer
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
"**END CODE"""
self.double()
@ -43,6 +57,9 @@ class DeepQNetwork(Module):
loss node between Q predictions and Q_target
"""
"*** YOUR CODE HERE ***"
Q_target_tensor = tensor(Q_target, dtype=double, device=self.device)
loss = mse_loss(self.forward(states), Q_target_tensor)
return loss
def forward(self, states):
@ -59,6 +76,14 @@ class DeepQNetwork(Module):
scores, for each of the actions
"""
"*** YOUR CODE HERE ***"
if states.device.type != self.device.type:
states = states.to(self.device)
x = relu(self.fc1(states))
x = relu(self.fc2(x))
# x = relu(self.fc3(x))
Q_values = self.fc_out(x)
return Q_values
def run(self, states):
@ -77,4 +102,8 @@ class DeepQNetwork(Module):
Output:
None
"""
"*** YOUR CODE HERE ***"
"*** YOUR CODE HERE ***"
self.optimizer.zero_grad()
loss = self.get_loss(states, Q_target)
loss.backward()
self.optimizer.step()