我在进行CS50课程的井字游戏项目时
发现使用极小化极大算法时,在某些情况下无法找到最优解
这是我的代码:
"""Tic Tac Toe Player"""import copyimport mathX = "X"O = "O"EMPTY = Nonedef initial_state(): """ Returns starting state of the board. """ return [[EMPTY, EMPTY, EMPTY], [EMPTY, EMPTY, EMPTY], [EMPTY, EMPTY, EMPTY]]board = initial_state()def player(board): """ Returns player who has the next turn on a board. """ numO = 0 numX = 0 FirstPlayer = None for i in range(len(board)): for j in range(len(board[i])): if board[i][j] == O: numO += 1 elif board[i][j] == X: numX += 1 return X if numO == numX else Odef actions(board): """ Returns set of all possible actions (i, j) available on the board. """ possact = set() for i in range(len(board)): for j in range(len(board[i])): if board [i][j] == EMPTY: possact.add((i, j)) return possactdef result(board, action): """ Returns the board that results from making move (i, j) on the board. """ boardcopy = copy.deepcopy(board) boardcopy[action[0]][action[1]] = player(board) return boardcopy def winner(board): """ Returns the winner of the game, if there is one. """ for i in range(3): wonO = True wonX = True for j in range(3): if board[i][j] == O or board[i][j] == EMPTY: wonX = False if board[i][j] == X or board[i][j] == EMPTY: wonO = False if wonX: return X if wonO: return O for j in range(3): wonO = True wonX = True for i in range(3): if board[i][j] == X or board[i][j] == EMPTY: wonO = False if board[i][j] == O or board[i][j] == EMPTY: wonX = False if wonX: return X if wonO: return O diag1 = '' diag2 = '' j = 2 for i in range(3): diag1 += str(board[i][i]) diag2 += str(board[i][j]) j -= 1 if diag1 == 'XXX' or diag2 == 'XXX': return X elif diag1 == 'OOO' or diag2 == 'OOO': return Odef terminal(board): """ Returns True if game is over, False otherwise. """ if winner(board) == X: return True elif winner(board) == O: return True for i in range(len(board)): for j in range(len(board[i])): if board[i][j] == EMPTY: return False return True def utility(board): """ Returns 1 if X has won the game, -1 if O has won, 0 otherwise. """ resB = winner(board) if resB == X: return 1 elif resB == O: return -1 else: return 0def minimax(board): """ Returns the optimal action for the current player on the board. """ if terminal(board): return None Max = float("-inf") Min = float("inf") if player(board) == X: return Max_Value(board, Max, Min)[1] else: return Min_Value(board, Max, Min)[1]def Max_Value(board, Max, Min): move = None if terminal(board): return [utility(board), None] v = float('-inf') for action in actions(board): test = Min_Value(result(board, action), Max, Min)[0] Max = max(Max, test) if test > v: v = test move = action if Max >= Min: break return [v, move]def Min_Value(board, Max, Min): move = None if terminal(board): return [utility(board), None] v = float('inf') for action in actions(board): test = Max_Value(result(board, action), Max, Min)[0] Min = min(Min, test) if test < v: v = test move = action if Max >= Min: break return [v, move]
这是具体情况(电脑扮演O):第五步的图片
最优解应该是中间底部的格子
但它选择了这个:第六步的图片
电脑最终赢了,但不是以最优方式
为什么极小化极大算法不选择最优解?
我该如何修复这个问题?
回答:
我没有检查你的代码是否正确实现了极小化极大算法,但我可以解释为什么会出现这样的结果。
游戏树中可能有多条路径通向具有相同效用值的节点。极小化极大算法不会区分快速胜利和缓慢胜利;它会选择任何一条能保证胜利的路径。
解决这个问题的一个常见方法是为较慢的胜利分配较低的效用值。例如,将胜利的效用值设为1000 - depth
。相反,失败的效用值应设为-1000 + depth
,以便算法尽可能延长不可避免的失败。(如果想使用负极大算法,保持评估函数的对称性也是有益的。)