try to solve smallClassic
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,4 +6,5 @@ __pycache__/
|
||||
/.github
|
||||
**/.env/
|
||||
**/.ipynb_checkpoints
|
||||
**/playground.ipynb
|
||||
**/playground.ipynb
|
||||
**/*.bin
|
@ -5,6 +5,7 @@ import layout
|
||||
import copy
|
||||
import torch
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
class PacmanDeepQAgent(PacmanQAgent):
|
||||
def __init__(self, layout_input="smallGrid", target_update_rate=300, doubleQ=True, **args):
|
||||
@ -15,8 +16,9 @@ class PacmanDeepQAgent(PacmanQAgent):
|
||||
self.update_amount = 0
|
||||
self.epsilon_explore = 1.0
|
||||
self.epsilon0 = 0.4
|
||||
self.minimal_epsilon = 0.01
|
||||
self.epsilon = self.epsilon0
|
||||
self.discount = 0.9
|
||||
self.discount = 0.95
|
||||
self.update_frequency = 3
|
||||
self.counts = None
|
||||
self.replay_memory = ReplayMemory(50000)
|
||||
@ -54,6 +56,27 @@ class PacmanDeepQAgent(PacmanQAgent):
|
||||
import model
|
||||
self.model = model.DeepQNetwork(state_dim, action_dim)
|
||||
self.target_model = model.DeepQNetwork(state_dim, action_dim)
|
||||
if os.path.exists('para.bin'):
|
||||
print("Loading model parameters from para.bin")
|
||||
checkpoint = torch.load('para.bin')
|
||||
self.model.load_state_dict(checkpoint['model_state_dict'])
|
||||
self.target_model.load_state_dict(checkpoint['target_model_state_dict'])
|
||||
self.model.optimizer.load_state_dict(checkpoint['model_optimizer_state_dict'])
|
||||
self.target_model.optimizer.load_state_dict(checkpoint['target_model_optimizer_state_dict'])
|
||||
self.replay_memory = checkpoint['memory']
|
||||
print(self.model.state_dict())
|
||||
else:
|
||||
print("Initializing new model parameters")
|
||||
def save_model(self, filename="para.bin"):
|
||||
print(f"Saving model parameters to {filename}")
|
||||
torch.save({
|
||||
'model_state_dict': self.model.state_dict(),
|
||||
'target_model_state_dict': self.target_model.state_dict(),
|
||||
'model_optimizer_state_dict': self.model.optimizer.state_dict(),
|
||||
"target_model_optimizer_state_dict": self.target_model.optimizer.state_dict(),
|
||||
"memory": self.replay_memory
|
||||
}, filename)
|
||||
print(self.model.state_dict())
|
||||
|
||||
def getQValue(self, state, action):
|
||||
"""
|
||||
@ -136,7 +159,7 @@ class PacmanDeepQAgent(PacmanQAgent):
|
||||
if len(self.replay_memory) < self.min_transitions_before_training:
|
||||
self.epsilon = self.epsilon_explore
|
||||
else:
|
||||
self.epsilon = max(self.epsilon0 * (1 - self.update_amount / 20000), 0)
|
||||
self.epsilon = max(self.epsilon0 * (1 - self.update_amount / 20000), self.minimal_epsilon)
|
||||
|
||||
|
||||
if len(self.replay_memory) > self.min_transitions_before_training and self.update_amount % self.update_frequency == 0:
|
||||
|
@ -249,6 +249,13 @@ class ReinforcementAgent(ValueEstimationAgent):
|
||||
print('\tAverage Rewards over testing: %.2f' % testAvg)
|
||||
print('\tAverage Rewards for last %d episodes: %.2f' % (
|
||||
NUM_EPS_UPDATE,windowAvg))
|
||||
if windowAvg>-220:
|
||||
if not hasattr(self,'best_window_avg_score'):
|
||||
self.best_window_avg_score=-300
|
||||
if windowAvg>self.best_window_avg_score:
|
||||
print("find an excellent policy, ready to save model")
|
||||
self.save_model("para.best.bin")
|
||||
self.best_window_avg_score=windowAvg
|
||||
print('\tEpisode took %.2f seconds' % (time.time() - self.episodeStartTime))
|
||||
self.lastWindowAccumRewards = 0.0
|
||||
self.episodeStartTime = time.time()
|
||||
@ -256,3 +263,5 @@ class ReinforcementAgent(ValueEstimationAgent):
|
||||
if self.episodesSoFar == self.numTraining:
|
||||
msg = 'Training Done (turning off epsilon and alpha)'
|
||||
print('%s\n%s' % (msg,'-' * len(msg)))
|
||||
import traceback
|
||||
traceback.print_stack()
|
||||
|
@ -26,21 +26,25 @@ class DeepQNetwork(Module):
|
||||
"*** YOUR CODE HERE ***"
|
||||
# Initialize layers
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
layer1_size=300
|
||||
layer2_size=300
|
||||
# layer3_size=500
|
||||
layer1_size=512
|
||||
layer2_size=128
|
||||
layer3_size=64
|
||||
self.fc1 = Linear(state_dim, layer1_size).to(self.device)
|
||||
self.fc2 = Linear(layer1_size, layer2_size).to(self.device)
|
||||
# self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
|
||||
self.fc_out= Linear(layer2_size, action_dim).to(self.device)
|
||||
self.fc3 = Linear(layer2_size, layer3_size).to(self.device)
|
||||
self.fc_out= Linear(layer3_size, action_dim).to(self.device)
|
||||
|
||||
# Set learning parameters
|
||||
self.learning_rate = 0.1
|
||||
self.numTrainingGames = 3800
|
||||
self.learning_rate = 0.01
|
||||
self.numTrainingGames = 5000
|
||||
self.batch_size = 128
|
||||
|
||||
# Optimizer
|
||||
self.optimizer = optim.SGD(self.parameters(), lr=self.learning_rate)
|
||||
# self.scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=27500, eta_min=self.learning_rate*0.25) # Replace with CosineAnnealingLR
|
||||
# self.scheduler2 = torch.optim.lr_scheduler.ExponentialLR(self.optimizer, gamma=0.9999)
|
||||
self.output_step=500
|
||||
self.output_cnt=0
|
||||
|
||||
"**END CODE"""
|
||||
self.double()
|
||||
@ -80,7 +84,7 @@ class DeepQNetwork(Module):
|
||||
states = states.to(self.device)
|
||||
x = relu(self.fc1(states))
|
||||
x = relu(self.fc2(x))
|
||||
# x = relu(self.fc3(x))
|
||||
x = relu(self.fc3(x))
|
||||
Q_values = self.fc_out(x)
|
||||
return Q_values
|
||||
|
||||
@ -106,4 +110,9 @@ class DeepQNetwork(Module):
|
||||
self.optimizer.zero_grad()
|
||||
loss = self.get_loss(states, Q_target)
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
self.optimizer.step()
|
||||
# self.scheduler1.step()
|
||||
# self.scheduler2.step()
|
||||
self.output_cnt+=1
|
||||
if self.output_cnt%self.output_step==0:
|
||||
print("now lr is: ", self.optimizer.param_groups[0]['lr'],"update count", self.output_cnt)
|
@ -745,6 +745,7 @@ if __name__ == '__main__':
|
||||
"""
|
||||
args = readCommand(sys.argv[1:]) # Get game components based on input
|
||||
runGames(**args)
|
||||
args["pacman"].save_model()
|
||||
|
||||
# import cProfile
|
||||
# cProfile.run("runGames( **args )")
|
||||
|
@ -564,7 +564,9 @@ class DeepQLearningTest(testClasses.TestCase):
|
||||
pacman = pacmanType(self.layout)
|
||||
|
||||
# Load Ghost Agent
|
||||
ghostType = loadAgent("RandomGhost", nographics)
|
||||
ghost_agent_name="DirectionalGhost"
|
||||
ghostType = loadAgent(ghost_agent_name, nographics)
|
||||
print("using ghost agent", ghost_agent_name)
|
||||
numghosts = 1
|
||||
ghosts = [ghostType(i+1) for i in range(numghosts)]
|
||||
|
||||
|
Reference in New Issue
Block a user