try to solve smallClassic
This commit is contained in:
@ -26,21 +26,25 @@ class DeepQNetwork(Module):
|
||||
"*** YOUR CODE HERE ***"
|
||||
# Initialize layers
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
layer1_size=300
|
||||
layer2_size=300
|
||||
# layer3_size=500
|
||||
layer1_size=512
|
||||
layer2_size=128
|
||||
layer3_size=64
|
||||
self.fc1 = Linear(state_dim, layer1_size).to(self.device)
|
||||
self.fc2 = Linear(layer1_size, layer2_size).to(self.device)
|
||||
# self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
|
||||
self.fc_out= Linear(layer2_size, action_dim).to(self.device)
|
||||
self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
|
||||
self.fc_out= Linear(layer3_size, action_dim).to(self.device)
|
||||
|
||||
# Set learning parameters
|
||||
self.learning_rate = 0.1
|
||||
self.numTrainingGames = 3800
|
||||
self.learning_rate = 0.01
|
||||
self.numTrainingGames = 5000
|
||||
self.batch_size = 128
|
||||
|
||||
# Optimizer
|
||||
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
|
||||
# self.scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=27500, eta_min=self.learning_rate*0.25) # Replace with CosineAnnealingLR
|
||||
# self.scheduler2 = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9999)
|
||||
self.output_step=500
|
||||
self.output_cnt=0
|
||||
|
||||
"**END CODE"""
|
||||
self.double()
|
||||
@ -80,7 +84,7 @@ class DeepQNetwork(Module):
|
||||
states = states.to(self.device)
|
||||
x = relu(self.fc1(states))
|
||||
x = relu(self.fc2(x))
|
||||
# x = relu(self.fc3(x))
|
||||
x = relu(self.fc3(x))
|
||||
Q_values = self.fc_out(x)
|
||||
return Q_values
|
||||
|
||||
@ -106,4 +110,9 @@ class DeepQNetwork(Module):
|
||||
self.optimizer.zero_grad()
|
||||
loss = self.get_loss(states, Q_target)
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
self.optimizer.step()
|
||||
# self.scheduler1.step()
|
||||
# self.scheduler2.step()
|
||||
self.output_cnt+=1
|
||||
if self.output_cnt%self.output_step==0:
|
||||
print("now lr is: ", self.optimizer.param_groups[0]['lr'],"update count", self.output_cnt)
|
Reference in New Issue
Block a user