我已经研究人工智能问题有一段时间了,这周我尝试用Python编写一个连接4的AI。起初我遇到了复制棋盘的问题,后来我发现Python需要使用deepcopy来避免复制错误。
最终我成功创建了一个alpha-beta剪枝算法,运行效果还可以,但当我用深度8的算法与在线深度6的alpha-beta剪枝算法对战时,令人惊讶的是我的算法输了。我与哈佛的讲师们一起创建了评估函数,并从msavenski的代码中修改了alpha-beta算法(链接在代码中)
希望那些研究这些问题更久的人能检查一下我的算法和评估函数是否如预期那样工作,因为我相当确定其中有一些错误。我知道我可以使用转置表、深度迭代等方法来使代码更快更有效,但我的另一个目标是保持简单。
这是我的代码:
# -*- coding: utf-8 -*-import copyclass ConnectFour: def __init__(self): self.moves = 0 #The count of moves, 42 moves is equal than board is full self.turn = 0 #Use this variable to recognize which one player turn is it def PrintGameBoard(self, board): print(' 0 1 2 3 4 5 6') # This function just raws a board for i in range(5, -1, -1): print('|---|---|---|---|---|---|---|') print("| ",end="") for j in range(7): print(board[i][j],end="") if j != 6: print(" | ",end="") else: print(" |") print('`---------------------------´') def LegalRow(self, col, board): stacks = [[x[i] for x in board] for i in range(len(board[0]))] # This function checks stack of given column and return the row where you can draw mark. If the stack is full return -1 countofitems = stacks[col].count("x") + stacks[col].count("o") # count of items in stack if (countofitems) < 6: return (countofitems) else: return -1 def LegalMoves(self, board): legalmoves = [] stacks = [[x[i] for x in board] for i in range(len(board[0]))] order = [3,2,4,1,5,0,6] for i in order: if self.LegalRow(i, board)!=-1: legalmoves.append(i) return legalmoves def MakeMove(self, board, col, player, row): board[row][col] = player # This function make a move and increases count of moves self.moves += 1 return board def UnmakeMove(self, board, col, row): board[row][col] = " " # This function make a move and increases count of moves self.moves -= 1 return board def IsWinning(self, currentplayer, board): for i in range(6): # This function returns True or False depending on if current player have four "tila" in a row (win) for j in range(4): if board[i][j] == currentplayer and board[i][j+1] == currentplayer and board[i][j+2] == currentplayer and board[i][j+3] == currentplayer: return True for i in range(3): for j in range(7): if board[i][j] == currentplayer and board[i+1][j] == currentplayer and board[i+2][j] == currentplayer and board[i+3][j] == currentplayer: return True for i in range(3): for j in range(4): if board[i][j] == currentplayer and board[i+1][j+1] == currentplayer and board[i+2][j+2] == currentplayer and board[i+3][j+3] == currentplayer: return True for i in range(3,6): for j in range(4): if board[i][j] == currentplayer and board[i-1][j+1] == currentplayer and board[i-2][j+2] == currentplayer and board[i-3][j+3] == currentplayer: return True return False def PlayerMove(self, board, player): allowedmove = False # This function ask players move when its his turn and returns board after making move. while not allowedmove: try: print("Choose a column where you want to make your move (0-6): ",end="") col =input() col=int(col) row = self.LegalRow(col, board) except (NameError, ValueError, IndexError, TypeError, SyntaxError) as e: print("Give a number as an integer between 0-6!") else: if row != -1 and (col<=6 and col>=0): board[row][int(col)] = player self.moves += 1 allowedmove = True elif col>6 or col<0: print("The range was 0-6!!!") else: print("This column is full") return board def SwitchPlayer(self, player): # This function gives opponent player's mark if player=="x": nextplayer="o" else: nextplayer="x" return nextplayer def evaluation(self, board): # This function evaluate gameboard (heuristic). The rules of evaluation are in site: http://isites.harvard.edu/fs/docs/icb.topic788088.files/Assignment%203/asst3c.pdf list = [] player = "x" opponent = "o" if self.IsWinning(player, board): score = -512 elif self.IsWinning(opponent, board): score = +512 elif self.moves==42: score=0 else: score = 0 for i in range(6): #append to list horizontal segments for j in range(4): list.append([str(board[i][j]),str(board[i][j+1]),str(board[i][j+2]),str(board[i][j+3])]) for i in range(3): #append to list vertical segments for j in range(7): list.append([str(board[i][j]),str(board[i+1][j]),str(board[i+2][j]),str(board[i+3][j])]) for i in range(3): #append to list diagonal segments for j in range(4): list.append([str(board[i][j]),str(board[i+1][j+2]),str(board[i+2][j+2]),str(board[i+3][j+3])]) for i in range(3, 6): #append to list diagonal segments for j in range(4): list.append([str(board[i][j]),str(board[i-1][j+2]),str(board[i-2][j+2]),str(board[i-3][j+3])]) for k in range(len(list)): #add to score with rules of site above if ((list[k].count(str("x"))>0) and (list[k].count(str("o"))>0)) or list[k].count(" ")==4: score += 0 if list[k].count(player)==1 and list[k].count(opponent)==0: score -= 1 if list[k].count(player)==2 and list[k].count(opponent)==0: score -= 10 if list[k].count(player)==3 and list[k].count(opponent)==0: score -= 50 if list[k].count(opponent)==1 and list[k].count(player)==0: score += 1 if list[k].count(opponent)==2 and list[k].count(player)==0: score += 10 if list[k].count(opponent)==3 and list[k].count(player)==0: score += 50 if self.turn==player: score -= 16 else: score += 16 return score def maxfunction(self, board, depth, player, alpha, beta): opponent = self.SwitchPlayer(player) self.turn = opponent legalmoves = self.LegalMoves(board) if (depth==0) or self.moves==42: return self.evaluation(board) value=-1000000000 for col in legalmoves: row = self.LegalRow(col, board) newboard = self.MakeMove(board, col, opponent, row) value = max(value, self.minfunction(board, depth-1, opponent,alpha, beta)) newboard = self.UnmakeMove(board, col, row) if value >= beta: return value alpha = max(alpha, value) return value def minfunction(self, board, depth, opponent, alpha, beta): player = self.SwitchPlayer(opponent) self.turn = player legalmoves = self.LegalMoves(board) if (depth==0) or self.moves==42: return evaluation(board) value=1000000000 for col in legalmoves: row = self.LegalRow(col, board) newboard = self.MakeMove(board, col, player, row) value = min(value, self.maxfunction(board, depth-1, player ,alpha, beta)) newboard = self.UnmakeMove(board, col, row) if value <= alpha: return value beta = min(beta, value) return value def alphabetapruning(self, board, depth, opponent, alpha, beta): #This is the alphabeta-function modified from: https://github.com/msaveski/connect-four values = [] cols = [] value = -1000000000 for col in self.LegalMoves(board): row = self.LegalRow(col, board) board = self.MakeMove(board, col, opponent, row) value = max(value, self.minfunction(board, depth-1, opponent, alpha, beta)) values.append(value) cols.append(col) board = self.UnmakeMove(board, col, row) largestvalue= max(values) print(cols) print(values) for i in range(len(values)): if largestvalue==values[i]: position = cols[i] return largestvalue, position def searchingfunction(self, board, depth, opponent): #This function update turn to opponent and calls alphabeta (main algorithm) and after that update new board (add alphabeta position to old board) and returns new board. newboard = copy.deepcopy(board) value, position=self.alphabetapruning(newboard, depth, opponent, -10000000000, 10000000000) board = self.MakeMove(board, position, opponent, self.LegalRow(position, board)) return boarddef PlayerGoesFirst(): print("Player is X and AI is O") #This function just ask who goes first player = 'x' opponent = 'o' print('Do you want to play first? (y/n) : ',end="") return input().lower().startswith('y')def PlayAgain(): print('Do you want to play again? (y/n) :',end="") #This function ask if player want to play new game return input().lower().startswith('y')def main(): print("Connect4") #The main function. This ask player mark, initialize gameboard (table), print board after each turn, ask players move, make AI's move and checks after each move is game is tie/win or lose. print("-"*34) while True: connectfour = ConnectFour() gameisgoing = True table = [[],[],[],[],[],[]] for i in range(7): for j in range(6): table[j].append(" ") player = "x" opponent = "o" if PlayerGoesFirst(): turn = "x" else: turn = "o" while gameisgoing: connectfour.PrintGameBoard(table) if turn=="x": table = connectfour.PlayerMove(table, player) if connectfour.IsWinning(player, table): connectfour.PrintGameBoard(table) print('You won the game!') gameisgoing = False else: if connectfour.moves==42: connectfour.PrintGameBoard(table) print('Game is tie') gameisgoing=False else: turn = "o" else: table = connectfour.searchingfunction(table, 6, opponent) #Here is AI's move. Takes as input current table (board), depth and opponents mark. Output should be new gameboard with AI's move. if connectfour.IsWinning(opponent, table): connectfour.PrintGameBoard(table) print('Computer won the game') gameisgoing = False else: if connectfour.moves==42: connectfour.PrintGameBoard(table) print('Game is tie') gameisgoing=False else: turn = "x" if not PlayAgain(): print("Game ended") print("-"*34) breakif __name__ == '__main__': main()
回答:
实现看起来不错,AI的走法也合理。
仅凭一场比赛很难对游戏实力做出假设。你需要进行更多场比赛来获得可靠的统计数据。例如,通过改变起始位置,让两个引擎分别以’X’和’0’的身份各玩一次。
还有一个运气因素在起作用。通常情况下,增加搜索深度会使alpha-beta剪枝的效果更好。然而,在某些位置,它可能会做出在较浅搜索中不会做出的糟糕走法。
不同的评估函数也会有这种效果。即使一个评估函数较差,也会有一些位置使得较差的评估函数导致更好的走法。
运气因素只有在你玩足够多的游戏,或者你的搜索深度足够深以至于能看到所有变化到最后时才会消失。连接4是一个已解决的游戏。如果算法是完美的,先走的玩家总是会赢。这是指出搜索中bug的唯一客观方法。像你的评估函数这样的启发式方法,总是会有弱点。