Alpha-Beta Pruning Minimax算法在井字游戏AI中未提供正确的optimal move

我正在使用Alpha-Beta Pruning Minimax算法实现一个井字游戏AI。目标是找到给定棋盘上AI玩家(X)的最佳移动。然而,我遇到了一个问题,算法没有返回正确的optimal move索引。

AI玩家(X)的最佳移动应该是索引4,但我的minimax函数返回的是bestAIMove.index = 8。

这是我的代码:

let humanPlayer = "O";let aiPlayer = "X";let origBoard = ["X", "O", 2, "X", 4, 5, "O", 7, 8];let MAX = {index: 99, score: 1000};let MIN = {index: 99, score: -1000}let fc = 0;function checkAvailableMoves(board) {    return board.filter(s => s !== "O" && s !== "X");  }function winning(board, player) {    const winningCombinations = [      [0, 1, 2],      [3, 4, 5],      [6, 7, 8],      [0, 3, 6],      [1, 4, 7],      [2, 5, 8],      [0, 4, 8],      [2, 4, 6]    ];    return winningCombinations.some(combination =>      combination.every(cell => board[cell] === player)    );  }function max(a,b) {return a.score > b.score ? a : b;}function min(a,b) {return a.score < b.score ? a : b;}function minimax(newBoard, depth, player, alpha, beta) {    const availableMoves = checkAvailableMoves(newBoard);    let theBestMove = {};    fc++    if (winning(newBoard, humanPlayer)) {return { score: -10 + depth }}    else if (winning(newBoard, aiPlayer)) {return { score: 10 - depth }}    else if (availableMoves.length === 0) {return { score: 0 }};    if (player === aiPlayer) {      for (let i = 0; i < availableMoves.length; i++) {        const index = availableMoves[i];        newBoard[index] = player;        let result = minimax(newBoard, depth + 1, humanPlayer, alpha, beta);        result.index = index;        alpha = max(alpha,result)        newBoard[index] = index;        if (alpha.score >= beta.score) {break}      }      theBestMove = alpha;    } else if (player === humanPlayer) {      for (let i = 0; i < availableMoves.length; i++) {        const index = availableMoves[i];        newBoard[index] = player;        let result = minimax(newBoard, depth + 1, aiPlayer, alpha, beta);        result.index = index;        beta = min(beta, result);        newBoard[index] = index;        if (alpha.score >= beta.score){break}      }      theBestMove = beta;    }    return theBestMove;  }  bestAIMove = minimax(origBoard,0,aiPlayer,MIN,MAX)  console.log(bestAIMove)  console.log(fc)

可能是什么原因导致这个问题?


回答:

你的代码中存在两个相关的问题:

  1. minmax函数在分数相等时会选择b。但由于你调用minmax时将result作为第二个参数,这总是优先考虑较新的分数。由于Alpha-Beta剪枝,你可能会得到相同的分数,你应该优先考虑“设定标准”的移动,即alphabeta。因此,你可以交换传递给minmax的参数,或者修改这些函数,使它们在分数相等时选择a

  2. result.index = index修改可能为alphabeta的对象。你不希望发生这种情况。应将这些对象视为不可变的。因此,应该使用result = {...result, index}来处理。

通过这两个修复,代码将正常工作。

演示

这是你修正后的代码,人类玩家先行,在一个交互式片段中:

const humanPlayer = "X";const aiPlayer = "O";const MAX = {  index: 99,  score: 1000};const MIN = {  index: 99,  score: -1000}function checkAvailableMoves(board) {  return board.filter(s => s !== "O" && s !== "X");}function winning(board, player) {  const winningCombinations = [    [0, 1, 2],    [3, 4, 5],    [6, 7, 8],    [0, 3, 6],    [1, 4, 7],    [2, 5, 8],    [0, 4, 8],    [2, 4, 6]  ];  return winningCombinations.some(combination =>    combination.every(cell => board[cell] === player)  );}function max(a, b) {  return a.score > b.score ? a : b;}function min(a, b) {  return a.score < b.score ? a : b;}function minimax(newBoard, depth, player, alpha, beta) {  const tab = "  ".repeat(depth);  const availableMoves = checkAvailableMoves(newBoard);  let theBestMove = {};  if (winning(newBoard, humanPlayer)) {    return {      score: -10 + depth    }  } else if (winning(newBoard, aiPlayer)) {    return {      score: 10 - depth    }  } else if (availableMoves.length === 0) {    return {      score: 0    }  };  if (player === aiPlayer) {    for (let i = 0; i < availableMoves.length; i++) {      const index = availableMoves[i];      newBoard[index] = player;      let result = minimax(newBoard, depth + 1, humanPlayer, alpha, beta);      result = { ...result,        index      }; // 不要修改      alpha = max(result, alpha) // 参数交换      newBoard[index] = index;      if (alpha.score >= beta.score) {        break      }    }    theBestMove = alpha;  } else if (player === humanPlayer) {    for (let i = 0; i < availableMoves.length; i++) {      const index = availableMoves[i];      newBoard[index] = player;      let result = minimax(newBoard, depth + 1, aiPlayer, alpha, beta);      result = { ...result,        index      }; // 不要修改      beta = min(result, beta); // 参数交换      newBoard[index] = index;      if (alpha.score >= beta.score) {        break      }    }    theBestMove = beta;  }  return theBestMove;}// 添加辅助函数const gameOver = (board) => winning(board, "X") ? "X" :  winning(board, "O") ? "O" :  checkAvailableMoves(board).length ? "" :  "-";// I/O处理function display(board) {  document.querySelectorAll("td").forEach((td, i) => {    td.textContent = isNaN(board[i]) ? board[i] : "";  });  document.querySelector("div").textContent = {    [humanPlayer]: "你赢了!!!",    [aiPlayer]: "CPU赢了!!!",    "-": "平局"  }[gameOver(board)] || "";}function play(board, index) {  if (gameOver(board)) return true;  board[index] = "OX" [checkAvailableMoves(board).length % 2];  display(board);  return gameOver(board);}function playAiMove(board) {  const {    index  } = minimax(board, 0, aiPlayer, MIN, MAX);  if (!play(board, index)) listen(board);}function listen(board) {  document.querySelector("table").addEventListener("click", e => {    const td = e.target;    if (td.tagName != 'TD') return listen(board);    const index = td.cellIndex + 3 * td.parentNode.rowIndex;    if (isNaN(board[index])) return listen(board); // 用户应重试...    if (play(board, index)) return; // 游戏结束    setTimeout(() => playAiMove(board), 500);  }, {    once: true  });}function game() {  const board = [...Array(9).keys()];  if (aiPlayer === "X") playAiMove(board);  else listen(board);}game();
table {  border-collapse: collapse}td {  border: 1px solid;  width: 25px;  height: 25px;  text-align: center}
<table>  <tr>    <td></td>    <td></td>    <td></td>  </tr>  <tr>    <td></td>    <td></td>    <td></td>  </tr>  <tr>    <td></td>    <td></td>    <td></td>  </tr></table><div></div>

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注