try to solve smallClassic

This commit is contained in:
2024-07-18 19:18:55 +08:00
parent 1bf4cc1efe
commit ceae34ea86
6 changed files with 58 additions and 13 deletions

View File

@ -26,21 +26,25 @@ class DeepQNetwork(Module):
"*** YOUR CODE HERE ***"
# Initialize layers
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
layer1_size=300
layer2_size=300
# layer3_size=500
layer1_size=512
layer2_size=128
layer3_size=64
self.fc1 = Linear(state_dim, layer1_size).to(self.device)
self.fc2 = Linear(layer1_size, layer2_size).to(self.device)
# self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
self.fc_out= Linear(layer2_size, action_dim).to(self.device)
self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
self.fc_out= Linear(layer3_size, action_dim).to(self.device)
# Set learning parameters
self.learning_rate = 0.1
self.numTrainingGames = 3800
self.learning_rate = 0.01
self.numTrainingGames = 5000
self.batch_size = 128
# Optimizer
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
# self.scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=27500, eta_min=self.learning_rate*0.25) # Replace with CosineAnnealingLR
# self.scheduler2 = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9999)
self.output_step=500
self.output_cnt=0
"**END CODE"""
self.double()
@ -80,7 +84,7 @@ class DeepQNetwork(Module):
states = states.to(self.device)
x = relu(self.fc1(states))
x = relu(self.fc2(x))
# x = relu(self.fc3(x))
x = relu(self.fc3(x))
Q_values = self.fc_out(x)
return Q_values
@ -106,4 +110,9 @@ class DeepQNetwork(Module):
self.optimizer.zero_grad()
loss = self.get_loss(states, Q_target)
loss.backward()
self.optimizer.step()
self.optimizer.step()
# self.scheduler1.step()
# self.scheduler2.step()
self.output_cnt+=1
if self.output_cnt%self.output_step==0:
print("now lr is: ", self.optimizer.param_groups[0]['lr'],"update count", self.output_cnt)