diff --git a/reinforcement/deepQLearningAgents.py b/reinforcement/deepQLearningAgents.py index 5172364..7aa4b54 100644 --- a/reinforcement/deepQLearningAgents.py +++ b/reinforcement/deepQLearningAgents.py @@ -6,6 +6,165 @@ import copy import torch import numpy as np import os +from util import * +import util +from pacman import GameState +from game import Directions, Actions + +class Agent: + """ + An agent must define a getAction method, but may also define the + following methods which will be called if they exist: + + def registerInitialState(self, state): # inspects the starting state + """ + + def __init__(self, index=0): + self.index = index + + def getAction(self, state): + """ + The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and + must return an action from Directions.{North, South, East, West, Stop} + """ + raiseNotDefined() +def betterEvaluationFunction(currentGameState: GameState): + """ + Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable + evaluation function (question 5). + + DESCRIPTION: + """ + # Useful information you can extract from a GameState (pacman.py) + if currentGameState.isLose(): + return -500 + if currentGameState.isWin(): + return 500 + kInf=1e100 + capsules_position = currentGameState.getCapsules() + current_pos = currentGameState.getPacmanPosition() + food_positions = currentGameState.getFood().asList() + # print(f"action:{action}, action_vec:{action_vec}") + current_ghost_positions = currentGameState.getGhostPositions() + current_ghost_scared_times = [ghostState.scaredTimer for ghostState in currentGameState.getGhostStates()] + not_scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]==0] + scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>0 and current_ghost_scared_times[i]<=1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2] + edible_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2] + current_self_position = currentGameState.getPacmanPosition() + def DotProduct(a,b): + return a[0]*b[0]+a[1]*b[1] + def CrossProduct(a,b): + return a[0]*b[1]-a[1]*b[0] + def EuclideanDistance(a,b): + return ((a[0]-b[0])**2+(a[1]-b[1])**2)**0.5 + def DistanceAnalysis(current_self_position,object_postion_list,flag="None"): + if len(object_postion_list)==0: + return 0 + if current_self_position in object_postion_list: + return kInf + res=0 + for obj_pos in object_postion_list: + if flag=="Ghost" and util.manhattanDistance(current_self_position,obj_pos)>=6: + continue + distance_to_obj=util.manhattanDistance(current_self_position,obj_pos) + res=max(res,1/distance_to_obj) + return res + + da_for_foods=DistanceAnalysis(current_self_position,food_positions) + da_for_unscared_ghosts=DistanceAnalysis(current_self_position,not_scared_ghosts_positions,"Ghost") + da_for_scared_ghosts=DistanceAnalysis(current_self_position,scared_ghosts_positions) + da_for_capsules=DistanceAnalysis(current_self_position,capsules_position) + da_for_edible_ghosts=DistanceAnalysis(current_self_position,edible_ghosts_positions) + res=da_for_capsules*2-da_for_unscared_ghosts*2-da_for_scared_ghosts*0.2+da_for_foods*0.2+da_for_edible_ghosts*1 + if da_for_unscared_ghosts<1/6: + res+=(da_for_foods*0.2+da_for_edible_ghosts*1)*5 + # res*=random.uniform(0.9, 1.1) + res*=100 + global last_score + res+=(currentGameState.getScore()-last_score)*10 + # print(f"res:{res}") + return res +class MultiAgentSearchAgent(Agent): + """ + This class provides some common elements to all of your + multi-agent searchers. Any methods defined here will be available + to the MinimaxPacmanAgent, AlphaBetaPacmanAgent & ExpectimaxPacmanAgent. + + You *do not* need to make any changes here, but you can if you want to + add functionality to all your adversarial search agents. Please do not + remove anything, however. + + Note: this is an abstract class: one that should not be instantiated. It's + only partially specified, and designed to be extended. Agent (game.py) + is another abstract class. + """ + + def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'): + self.index = 0 # Pacman is always agent index 0 + self.evaluationFunction = betterEvaluationFunction + self.depth = int(depth) +last_score=0 +class ExpectimaxAgent(MultiAgentSearchAgent): + """ + Your expectimax agent (question 4) + """ + + def ExpectMaxSearch(self,gameState: GameState,depth_remain:int,agentIndex:int) -> tuple[int, list[Actions]]: + if depth_remain==0: + # print(f"depth_remain:{depth_remain}") + # print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}") + return self.evaluationFunction(gameState),[] + legal_actions = gameState.getLegalActions(agentIndex) + if len(legal_actions)==0: + # print(f"depth_remain:{depth_remain}") + # print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}") + return self.evaluationFunction(gameState),[] + kInf=1e100 + res_action=[] + res_val=0 + if agentIndex==0: + # Max + res_val = -kInf + for action in legal_actions: + successorGameState = gameState.generateSuccessor(agentIndex,action) + nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain + val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents()) + if action=="Stop": + val-=100 + if val>res_val: + res_val=val + # print(f"action:{action}, action_list:{action_list}") + res_action=[action]+action_list + else: + # Mins + res_val = kInf + val_list=[] + for action in legal_actions: + successorGameState = gameState.generateSuccessor(agentIndex,action) + nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain + val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents()) + val_list.append(val) + if val