use guiding agent

2024-07-22 19:10:40 +08:00
parent 1f217770b7
commit 2d55c655c3
3 changed files with 164 additions and 2 deletions
--- a/reinforcement/deepQLearningAgents.py
+++ b/reinforcement/deepQLearningAgents.py
@@ -6,6 +6,165 @@ import copy
 import torch
 import numpy as np
 import os
+from util import *
+import util
+from pacman import GameState
+from game import Directions, Actions
+
+class Agent:
+    """
+    An agent must define a getAction method, but may also define the
+    following methods which will be called if they exist:
+
+    def registerInitialState(self, state): # inspects the starting state
+    """
+
+    def __init__(self, index=0):
+        self.index = index
+
+    def getAction(self, state):
+        """
+        The Agent will receive a GameState (from either {pacman, capture, sonar}.py) and
+        must return an action from Directions.{North, South, East, West, Stop}
+        """
+        raiseNotDefined()
+def betterEvaluationFunction(currentGameState: GameState):
+    """
+    Your extreme ghost-hunting, pellet-nabbing, food-gobbling, unstoppable
+    evaluation function (question 5).
+
+    DESCRIPTION: <write something here so we know what you did>
+    """
+    # Useful information you can extract from a GameState (pacman.py)
+    if currentGameState.isLose():
+        return -500
+    if currentGameState.isWin():
+        return 500
+    kInf=1e100
+    capsules_position = currentGameState.getCapsules()
+    current_pos = currentGameState.getPacmanPosition()
+    food_positions = currentGameState.getFood().asList()
+    # print(f"action:{action}, action_vec:{action_vec}")
+    current_ghost_positions = currentGameState.getGhostPositions()
+    current_ghost_scared_times = [ghostState.scaredTimer for ghostState in currentGameState.getGhostStates()]
+    not_scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]==0]
+    scared_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>0 and current_ghost_scared_times[i]<=1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2]
+    edible_ghosts_positions = [current_ghost_positions[i] for i in range(len(current_ghost_positions)) if current_ghost_scared_times[i]>1.2*util.manhattanDistance(current_ghost_positions[i],current_pos)+2]
+    current_self_position = currentGameState.getPacmanPosition()
+    def DotProduct(a,b):
+        return a[0]*b[0]+a[1]*b[1]
+    def CrossProduct(a,b):
+        return a[0]*b[1]-a[1]*b[0]
+    def EuclideanDistance(a,b):
+        return ((a[0]-b[0])**2+(a[1]-b[1])**2)**0.5
+    def DistanceAnalysis(current_self_position,object_postion_list,flag="None"):
+        if len(object_postion_list)==0:
+            return 0
+        if current_self_position in object_postion_list:
+            return kInf
+        res=0
+        for obj_pos in object_postion_list:
+            if flag=="Ghost" and util.manhattanDistance(current_self_position,obj_pos)>=6:
+                continue
+            distance_to_obj=util.manhattanDistance(current_self_position,obj_pos)
+            res=max(res,1/distance_to_obj)
+        return res
+
+    da_for_foods=DistanceAnalysis(current_self_position,food_positions)
+    da_for_unscared_ghosts=DistanceAnalysis(current_self_position,not_scared_ghosts_positions,"Ghost")
+    da_for_scared_ghosts=DistanceAnalysis(current_self_position,scared_ghosts_positions)
+    da_for_capsules=DistanceAnalysis(current_self_position,capsules_position)
+    da_for_edible_ghosts=DistanceAnalysis(current_self_position,edible_ghosts_positions)
+    res=da_for_capsules*2-da_for_unscared_ghosts*2-da_for_scared_ghosts*0.2+da_for_foods*0.2+da_for_edible_ghosts*1
+    if da_for_unscared_ghosts<1/6:
+        res+=(da_for_foods*0.2+da_for_edible_ghosts*1)*5
+    # res*=random.uniform(0.9, 1.1)
+    res*=100
+    global last_score
+    res+=(currentGameState.getScore()-last_score)*10
+    # print(f"res:{res}")
+    return res
+class MultiAgentSearchAgent(Agent):
+    """
+    This class provides some common elements to all of your
+    multi-agent searchers.  Any methods defined here will be available
+    to the MinimaxPacmanAgent, AlphaBetaPacmanAgent & ExpectimaxPacmanAgent.
+
+    You *do not* need to make any changes here, but you can if you want to
+    add functionality to all your adversarial search agents.  Please do not
+    remove anything, however.
+
+    Note: this is an abstract class: one that should not be instantiated.  It's
+    only partially specified, and designed to be extended.  Agent (game.py)
+    is another abstract class.
+    """
+
+    def __init__(self, evalFn = 'scoreEvaluationFunction', depth = '2'):
+        self.index = 0 # Pacman is always agent index 0
+        self.evaluationFunction = betterEvaluationFunction
+        self.depth = int(depth)
+last_score=0
+class ExpectimaxAgent(MultiAgentSearchAgent):
+    """
+      Your expectimax agent (question 4)
+    """
+
+    def ExpectMaxSearch(self,gameState: GameState,depth_remain:int,agentIndex:int) -> tuple[int, list[Actions]]:
+        if depth_remain==0:
+            # print(f"depth_remain:{depth_remain}")
+            # print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}")
+            return self.evaluationFunction(gameState),[]
+        legal_actions = gameState.getLegalActions(agentIndex)
+        if len(legal_actions)==0:
+            # print(f"depth_remain:{depth_remain}")
+            # print(f"returning leaf {self.evaluationFunction(gameState)}, {[]}")
+            return self.evaluationFunction(gameState),[]
+        kInf=1e100
+        res_action=[]
+        res_val=0
+        if agentIndex==0:
+            # Max
+            res_val = -kInf
+            for action in legal_actions:
+                successorGameState = gameState.generateSuccessor(agentIndex,action)
+                nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain
+                val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents())
+                if action=="Stop":
+                    val-=100
+                if val>res_val:
+                    res_val=val
+                    # print(f"action:{action}, action_list:{action_list}")
+                    res_action=[action]+action_list
+        else:
+            # Mins
+            res_val = kInf
+            val_list=[]
+            for action in legal_actions:
+                successorGameState = gameState.generateSuccessor(agentIndex,action)
+                nxt_depth=depth_remain-1 if agentIndex==gameState.getNumAgents()-1 else depth_remain
+                val,action_list=self.ExpectMaxSearch(successorGameState,nxt_depth,(agentIndex+1)%gameState.getNumAgents())
+                val_list.append(val)
+                if val<res_val:
+                    res_val=val
+                    res_action=[action]+action_list
+            res_val=sum(val_list)/len(val_list)
+        # print(f"depth_remain:{depth_remain}")
+        # print(f"returning {res_val}, {res_action}")
+        return res_val,res_action
+
+    def getAction(self, gameState: GameState):
+        """
+        Returns the expectimax action using self.depth and self.evaluationFunction
+
+        All ghosts should be modeled as choosing uniformly at random from their
+        legal moves.
+        """
+        global last_score
+        last_score=gameState.getScore()
+        stat = self.ExpectMaxSearch(gameState,self.depth,0)
+        # print(f"stat:{stat}")
+        return stat[1][0]
+

 class PacmanDeepQAgent(PacmanQAgent):
    def __init__(self, layout_input="smallGrid", target_update_rate=300, doubleQ=True, **args):
@@ -41,6 +200,7 @@ class PacmanDeepQAgent(PacmanQAgent):
        self.doubleQ = doubleQ
        if self.doubleQ:
            self.target_update_rate = -1
+        self.guiding_agent = ExpectimaxAgent()

    def get_state_dim(self, layout):
        pac_ft_size = 2
--- a/reinforcement/model.py
+++ b/reinforcement/model.py
@@ -11,7 +11,7 @@ from torch import tensor, double, optim
 from torch.nn.functional import relu, mse_loss
 import torch

-kProductionMode=False
+kProductionMode=True
 class DeepQNetwork(Module):
    """
    A model that uses a Deep Q-value Network (DQN) to approximate Q(s,a) as part
--- a/reinforcement/qlearningAgents.py
+++ b/reinforcement/qlearningAgents.py
@@ -115,7 +115,9 @@ class QLearningAgent(ReinforcementAgent):
            return None
        
        if util.flipCoin(self.epsilon):
-            return random.choice(legalActions)
+            # return random.choice(legalActions)
+            # now using guiding agent
+            return self.guiding_agent.getAction(state)
        else:
            return self.computeActionFromQValues(state)