我正在尝试使用MiniMax和Alpha-Beta剪枝算法为2048游戏实现AI,基于蛇形策略(参见这篇论文),这种策略似乎是作为单一启发式方法中最好的。
不幸的是,AI在大多数游戏中只能得256分,这与空单元格启发式方法相比并没有好多少。我已经阅读了这里相关的讨论,但自己无法找到解决方案。
这是代码:
import mathfrom BaseAI_3 import BaseAIINF_P = math.infclass PlayerAI(BaseAI): move_str = { 0: "UP", 1: "DOWN", 2: "LEFT", 3: "RIGHT" } def __init__(self): super().__init__() self.depth_max = 4 def getMove(self, grid): move_direction, state, utility = self.decision(grid) act_move = moves.index(move_direction) return moves[act_move] if moves else None def get_children(self, grid): grid.children = [] for move_direction in grid.getAvailableMoves(): gridCopy = grid.clone() gridCopy.path = grid.path[:] gridCopy.path.append(PlayerAI.move_str[move_direction]) gridCopy.move(move_direction) gridCopy.depth_current = grid.depth_current + 1 grid.children.append((move_direction, gridCopy)) return grid.children def utility(self, state): def snake(): poses = [ [ [2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12], [2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11], [2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4], [2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3] ] , [ [2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0], [2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1], [2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2], [2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3] ] ] poses.append([item for item in reversed(poses[0])]) poses.append([list(reversed(item)) for item in reversed(poses[0])]) poses.append([list(reversed(item)) for item in poses[0]]) poses.append([item for item in reversed(poses[1])]) poses.append([list(reversed(item)) for item in reversed(poses[1])]) poses.append([list(reversed(item)) for item in poses[1]]) max_value = -INF_P for pos in poses: value = 0 for i in range(state.size): for j in range(state.size): value += state.map[i][j] * pos[i][j] if value > max_value: max_value = value return max_value weight_snake = 1 / (2 ** 13) value = ( weight_snake * snake(), ) return value def decision(self, state): state.depth_current = 1 state.path = [] return self.maximize(state, -INF_P, INF_P) def terminal_state(self, state): return state.depth_current >= self.depth_max def maximize(self, state, alpha, beta): # terminal-state check if self.terminal_state(state): return (None, state, self.utility(state)) max_move_direction, max_child, max_utility = None, None, (-INF_P, ) for move_direction, child in self.get_children(state): _, state2, utility = self.minimize(child, alpha, beta) child.utility = utility if sum(utility) > sum(max_utility): max_move_direction, max_child, max_utility = move_direction, child, utility if sum(max_utility) >= beta: break if sum(max_utility) > alpha: alpha = sum(max_utility) state.utility = max_utility state.alpha = alpha state.beta = beta return max_move_direction, max_child, max_utility def minimize(self, state, alpha, beta): # terminal-state check if self.terminal_state(state): return (None, state, self.utility(state)) min_move_direction, min_child, min_utility = None, None, (INF_P, ) for move_direction, child in self.get_children(state): _, state2, utility = self.maximize(child, alpha, beta) child.utility = utility if sum(utility) < sum(min_utility): min_move_direction, min_child, min_utility = move_direction, child, utility if sum(min_utility) <= alpha: break if sum(min_utility) < beta: beta = sum(min_utility) state.utility = min_utility state.alpha = alpha state.beta = beta return min_move_direction, min_child, min_utility
grid
是一个对象,grid.map
是一个二维数组(列表的列表)。
我的代码有错误吗?我怎样才能改进代码?
添加的游戏日志:https://pastebin.com/eyzgU2dN
回答:
在过去的周末我意识到算法没有正确实现。错误出在minimize()
函数中,我搜索子节点的方式不对——应该像这样:
def get_opponent_children(self, grid): grid.children = [] for x in range(grid.size): for y in range(grid.size): if grid.map[x][y] == 0: for c in (2, 4): gridCopy = grid.clone() gridCopy.path = grid.path[:] gridCopy.deep_current = grid.deep_current + 1 gridCopy.map[x][y] = c grid.children.append((None, gridCopy)) return grid.children
以及相应的更改:
for move_direction, child in self.get_opponent_children(state):
现在大多数时候可以达到1024和2048分了。