use guiding agent
This commit is contained in:
@ -6,6 +6,165 @@ import copy
|
||||
import torch
|
||||
import numpy as np
|
||||
import os
|
||||
from util import *
|
||||
import util
|
||||
from pacman import GameState
|
||||
from game import Directions, Actions
|
||||
|
||||
class Agent:
|
||||
"""
|
||||
An agent must define a getAction method, but may also define the
|
||||
following methods which will be called if they exist:
|
||||
|
||||
def registerInitialState(self, state): # inspects the starting state
|
||||
"""
|
||||
|
||||
def __init__(self, index=0):
|
||||
self.index = index
|
||||
|
||||
def getAction(self, state):
|
||||
"""
|
||||
The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
|
||||
must return an action from Directions.{North, South, East, West, Stop}
|
||||
"""
|
||||
raiseNotDefined()
|
||||
def betterEvaluationFunction(currentGameState: GameState):
|
||||
"""
|
||||
Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
|
||||
evaluation function (question 5).
|
||||
|
||||
DESCRIPTION: <write something here so we know what you did>
|
||||
"""
|
||||
# Useful information you can extract from a GameState (pacman.py)
|
||||
if currentGameState.isLose():
|
||||
return -500
|
||||
if currentGameState.isWin():
|
||||
return 500
|
||||
kInf=1e100
|
||||
capsules_position = currentGameState.getCapsules()
|
||||
current_pos = currentGameState.getPacmanPosition()
|
||||
food_positions = currentGameState.getFood().asList()
|
||||
# print(f"action:{action}, action_vec:{action_vec}")
|
||||
current_ghost_positions = currentGameState.getGhostPositions()
|
||||
current_ghost_scared_times = [ghostState.scaredTimer for ghostState in currentGameState.getGhostStates()]
|
||||
not_scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]==0]
|
||||
scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>0 and current_ghost_scared_times[i]<=1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2]
|
||||
edible_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2]
|
||||
current_self_position = currentGameState.getPacmanPosition()
|
||||
def DotProduct(a,b):
|
||||
return a[0]*b[0]+a[1]*b[1]
|
||||
def CrossProduct(a,b):
|
||||
return a[0]*b[1]-a[1]*b[0]
|
||||
def EuclideanDistance(a,b):
|
||||
return ((a[0]-b[0])**2+(a[1]-b[1])**2)**0.5
|
||||
def DistanceAnalysis(current_self_position,object_postion_list,flag="None"):
|
||||
if len(object_postion_list)==0:
|
||||
return 0
|
||||
if current_self_position in object_postion_list:
|
||||
return kInf
|
||||
res=0
|
||||
for obj_pos in object_postion_list:
|
||||
if flag=="Ghost" and util.manhattanDistance(current_self_position,obj_pos)>=6:
|
||||
continue
|
||||
distance_to_obj=util.manhattanDistance(current_self_position,obj_pos)
|
||||
res=max(res,1/distance_to_obj)
|
||||
return res
|
||||
|
||||
da_for_foods=DistanceAnalysis(current_self_position,food_positions)
|
||||
da_for_unscared_ghosts=DistanceAnalysis(current_self_position,not_scared_ghosts_positions,"Ghost")
|
||||
da_for_scared_ghosts=DistanceAnalysis(current_self_position,scared_ghosts_positions)
|
||||
da_for_capsules=DistanceAnalysis(current_self_position,capsules_position)
|
||||
da_for_edible_ghosts=DistanceAnalysis(current_self_position,edible_ghosts_positions)
|
||||
res=da_for_capsules*2-da_for_unscared_ghosts*2-da_for_scared_ghosts*0.2+da_for_foods*0.2+da_for_edible_ghosts*1
|
||||
if da_for_unscared_ghosts<1/6:
|
||||
res+=(da_for_foods*0.2+da_for_edible_ghosts*1)*5
|
||||
# res*=random.uniform(0.9, 1.1)
|
||||
res*=100
|
||||
global last_score
|
||||
res+=(currentGameState.getScore()-last_score)*10
|
||||
# print(f"res:{res}")
|
||||
return res
|
||||
class MultiAgentSearchAgent(Agent):
|
||||
"""
|
||||
This class provides some common elements to all of your
|
||||
multi-agent searchers. Any methods defined here will be available
|
||||
to the MinimaxPacmanAgent, AlphaBetaPacmanAgent & ExpectimaxPacmanAgent.
|
||||
|
||||
You *do not* need to make any changes here, but you can if you want to
|
||||
add functionality to all your adversarial search agents. Please do not
|
||||
remove anything, however.
|
||||
|
||||
Note: this is an abstract class: one that should not be instantiated. It's
|
||||
only partially specified, and designed to be extended. Agent (game.py)
|
||||
is another abstract class.
|
||||
"""
|
||||
|
||||
def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'):
|
||||
self.index = 0 # Pacman is always agent index 0
|
||||
self.evaluationFunction = betterEvaluationFunction
|
||||
self.depth = int(depth)
|
||||
last_score=0
|
||||
class ExpectimaxAgent(MultiAgentSearchAgent):
|
||||
"""
|
||||
Your expectimax agent (question 4)
|
||||
"""
|
||||
|
||||
def ExpectMaxSearch(self,gameState: GameState,depth_remain:int,agentIndex:int) -> tuple[int, list[Actions]]:
|
||||
if depth_remain==0:
|
||||
# print(f"depth_remain:{depth_remain}")
|
||||
# print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}")
|
||||
return self.evaluationFunction(gameState),[]
|
||||
legal_actions = gameState.getLegalActions(agentIndex)
|
||||
if len(legal_actions)==0:
|
||||
# print(f"depth_remain:{depth_remain}")
|
||||
# print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}")
|
||||
return self.evaluationFunction(gameState),[]
|
||||
kInf=1e100
|
||||
res_action=[]
|
||||
res_val=0
|
||||
if agentIndex==0:
|
||||
# Max
|
||||
res_val = -kInf
|
||||
for action in legal_actions:
|
||||
successorGameState = gameState.generateSuccessor(agentIndex,action)
|
||||
nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain
|
||||
val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents())
|
||||
if action=="Stop":
|
||||
val-=100
|
||||
if val>res_val:
|
||||
res_val=val
|
||||
# print(f"action:{action}, action_list:{action_list}")
|
||||
res_action=[action]+action_list
|
||||
else:
|
||||
# Mins
|
||||
res_val = kInf
|
||||
val_list=[]
|
||||
for action in legal_actions:
|
||||
successorGameState = gameState.generateSuccessor(agentIndex,action)
|
||||
nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain
|
||||
val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents())
|
||||
val_list.append(val)
|
||||
if val<res_val:
|
||||
res_val=val
|
||||
res_action=[action]+action_list
|
||||
res_val=sum(val_list)/len(val_list)
|
||||
# print(f"depth_remain:{depth_remain}")
|
||||
# print(f"returning {res_val}, {res_action}")
|
||||
return res_val,res_action
|
||||
|
||||
def getAction(self, gameState: GameState):
|
||||
"""
|
||||
Returns the expectimax action using self.depth and self.evaluationFunction
|
||||
|
||||
All ghosts should be modeled as choosing uniformly at random from their
|
||||
legal moves.
|
||||
"""
|
||||
global last_score
|
||||
last_score=gameState.getScore()
|
||||
stat = self.ExpectMaxSearch(gameState,self.depth,0)
|
||||
# print(f"stat:{stat}")
|
||||
return stat[1][0]
|
||||
|
||||
|
||||
class PacmanDeepQAgent(PacmanQAgent):
|
||||
def __init__(self, layout_input="smallGrid", target_update_rate=300, doubleQ=True, **args):
|
||||
@ -41,6 +200,7 @@ class PacmanDeepQAgent(PacmanQAgent):
|
||||
self.doubleQ = doubleQ
|
||||
if self.doubleQ:
|
||||
self.target_update_rate = -1
|
||||
self.guiding_agent = ExpectimaxAgent()
|
||||
|
||||
def get_state_dim(self, layout):
|
||||
pac_ft_size = 2
|
||||
|
@ -11,7 +11,7 @@ from torch import tensor, double, optim
|
||||
from torch.nn.functional import relu, mse_loss
|
||||
import torch
|
||||
|
||||
kProductionMode=False
|
||||
kProductionMode=True
|
||||
class DeepQNetwork(Module):
|
||||
"""
|
||||
A model that uses a Deep Q-value Network (DQN) to approximate Q(s,a) as part
|
||||
|
@ -115,7 +115,9 @@ class QLearningAgent(ReinforcementAgent):
|
||||
return None
|
||||
|
||||
if util.flipCoin(self.epsilon):
|
||||
return random.choice(legalActions)
|
||||
# return random.choice(legalActions)
|
||||
# now using guiding agent
|
||||
return self.guiding_agent.getAction(state)
|
||||
else:
|
||||
return self.computeActionFromQValues(state)
|
||||
|
||||
|
Reference in New Issue
Block a user