UCT(信心上限樹演算法)解四子棋問題——蒙特卡羅法模擬人機博弈
阿新 • • 發佈:2018-12-30
這段程式碼是有一定的冗餘性的,我在編寫的過程中下意識地將部分本應在演算法類UCT當中實現的功能在本應只是作為結構體的Node類中實現了,造成了一定的條理性的缺失。#ifndef __UCT_H__ #define __UCT_H__ #include #include #include #include #include "Judge.h" //包含判斷勝負的函式userWin,machineWin和isTie #define EMPTY 0 //未落子 #define PLAYER_CHANCE 1 //玩家棋權 #define COMPUTER_CHANCE 2 //電腦(AI)棋權 #define TIME_LIMITATION 3000 //計算時長限制 #define PLAYER_WIN_PROFIT -1 //玩家獲勝時的收益 #define COMPUTER_WIN_PROFIT 1 //我方AI獲勝時的收益 #define TIE_PROFIT 0 //平局收益 #define UNTERMINAL_STATE 2 //非終止狀態 #define VITALITY_COEFFICIENT 0.8 //比例係數c using namespace std; class UCT; class Node { private: int **boardState; //棋局狀態 int *topState; //頂端狀態 int row, column; //棋盤大小(M, N) int _noX, _noY; //不可落子點位置 int _chessman; //我方持子屬性 int _x, _y; //前一上落子位置 int visitedNum; //被訪問次數 double profit; //當前狀態我方收益 int _depth; //節點深度 Node *father; //父節點 Node **children; //子節點 int expandableNum; //可擴充套件節點數量 int *expandableNode; //可擴充套件節點編號 friend class UCT; int *TopState() const { //複製棋盤頂端狀態陣列topState int *presentTop = new int[column]; for (int i = 0; i != column; i ++) presentTop[i] = topState[i]; return presentTop; } int **BoardState() const { //複製棋盤狀態陣列boardState int **presentBoardState = new int*[row]; for (int i = 0; i < row; i ++) { presentBoardState[i] = new int[column]; for(int j = 0; j < column; j ++) presentBoardState[i][j] = boardState[i][j]; } return presentBoardState; } void clear() { //空間釋放 for (int i = 0; i != row; i ++) delete [] boardState[i]; delete [] boardState; delete [] topState; delete [] expandableNode; for (int i = 0; i != column; i ++) if (children[i]) { children[i] -> clear(); delete children[i]; } delete [] children; } public: //建構函式 Node(int **board, int *top, int r, int c, int noX, int noY, int depth = 0, int x = -1, int y = -1, int playingRight = COMPUTER_CHANCE, Node* _father = NULL): boardState(board), topState(top), row(r), column(c), _noX(noX), _noY(noY), _depth(depth), _x(x), _y(y), _chessman(playingRight), visitedNum(0), profit(0), father(_father) { expandableNum = 0; children = new Node*[column]; //大小等於行數的子節點陣列 expandableNode = new int[column]; //可到達子節點編號的陣列 for (int i = 0; i != column; i ++) { if (topState[i] != 0) //若第i列可落子 expandableNode[expandableNum ++] = i; children[i] = NULL; } } int x() const { return _x; } int y() const { return _y; } int chessman() const { return _chessman; } bool isExpandable() const { return expandableNum > 0; }//是否可擴充套件 //是否為終止節點 bool isTerminal() { if (_x == -1 && _y == -1) //若為根節點 return false; if ((_chessman == PLAYER_CHANCE && machineWin(_x, _y, row, column, boardState)) || //計算機勝利 (_chessman == COMPUTER_CHANCE && userWin(_x, _y, row, column, boardState)) || //玩家勝利 (isTie(column, topState))) //平局 return true; return false; } //擴充套件節點 Node *expand(int playingRight) { int index = rand() % expandableNum; //隨機確定一個索引值 int **newBoardState = BoardState(); //複製棋盤狀態陣列 int *newTopState = TopState(); //複製棋盤頂端狀態陣列 int newY = expandableNode[index], newX = -- newTopState[newY]; //確定落子座標 newBoardState[newX][newY] = chessman(); //落子 if (newX - 1 == _noX && newY == _noY) //若落子位置的正上方位置是不可落子點 newTopState[newY] --; //更新棋盤頂端狀態陣列 //為當前節點建立擴充套件子節點 children[newY] = new Node(newBoardState, newTopState, row, column, _noX, _noY, _depth + 1, newX, newY, playingRight, this); swap(expandableNode[index], expandableNode[-- expandableNum]); //將被選中子節點編號置換到目錄末尾 return children[newY]; } //最優子節點 Node *bestChild() { Node* best; double maxProfitRatio = -RAND_MAX; for (int i = 0; i != column; i ++) { if (children[i] == NULL) continue; double modifiedProfit = (_chessman == PLAYER_CHANCE ? -1 : 1) * children[i] -> profit; //修正收益值 int childVisitedNum = children[i] -> visitedNum; //子節點訪問數 double tempProfitRatio = modifiedProfit / childVisitedNum + sqrtl(2 * logl(visitedNum) / childVisitedNum) * VITALITY_COEFFICIENT; //計算綜合收益率 if (tempProfitRatio > maxProfitRatio || (tempProfitRatio == maxProfitRatio && rand() % 2 == 0)) { //選擇綜合收益率最大的子節點 maxProfitRatio = tempProfitRatio; best = children[i]; } } return best; } //回溯更新 void backup(double deltaProfit) { Node *temp = this; while (temp) { temp -> visitedNum ++; //訪問次數+1 temp -> profit += deltaProfit; //收益增加delta temp = temp -> father; } } }; class UCT { private: Node *_root; //根節點 int _row, _column; //行數、列數 int _noX, _noY; //不可落子點的位置 int startTime; //計算開始時間 //計算當前狀態收益 int Profit(int **board, int *top, int chessman, int x, int y) const { if (chessman == PLAYER_CHANCE && userWin(x, y, _row, _column, board)) return PLAYER_WIN_PROFIT; if (chessman == COMPUTER_CHANCE && machineWin(x, y, _row, _column, board)) return COMPUTER_WIN_PROFIT; if (isTie(_column, top)) return TIE_PROFIT; return UNTERMINAL_STATE; //未進入終止狀態 } //隨機落子 void placeChessman(int **board, int *top, int chessman, int &x, int &y) { y = rand() % _column; //隨機選擇一列 while (top[y] == 0) //若此列已下滿 y = rand() % _column; //再隨機選擇一列 x = -- top[y]; //確定落子高度 board[x][y] = chessman; //落子 if (x - 1 == _noX && y == _noY) //若落子位置正上方緊鄰不可落子點 top[y] --; } //棋權變換 int rightChange(int chessman) const { if (chessman == PLAYER_CHANCE) return COMPUTER_CHANCE; else if (chessman == COMPUTER_CHANCE) return PLAYER_CHANCE; else return -1; } //搜尋樹策略 Node *TreePolicy(Node *presentNode) { while (!presentNode -> isTerminal()) { //節點不是終止節點 if (presentNode -> isExpandable()) //且擁有未被訪問的子狀態 return Expand(presentNode); //擴充套件該節點 else presentNode = BestChild(presentNode); //選擇最優子節點 } return presentNode; } //對節點進行擴充套件 Node *Expand(Node *presentNode) { return presentNode -> expand(rightChange(presentNode -> chessman())); } //最優子節點 Node *BestChild(Node *father) { return father -> bestChild(); } //模擬策略 double DefaultPolicy(Node *selectedNode) { int **boardState = selectedNode -> BoardState(), *top = selectedNode -> TopState(); int chessman = selectedNode -> chessman(), depth = selectedNode -> _depth; int x = selectedNode -> x(), y = selectedNode -> y(); int profit = Profit(boardState, top, rightChange(chessman), x, y); //計算收益 while (profit == UNTERMINAL_STATE) { //若當前狀態未達終止狀態 depth ++; placeChessman(boardState, top, chessman, x, y); //隨機落子 profit = Profit(boardState, top, chessman, x, y); //計算收益 chessman = rightChange(chessman); //棋權變換 } for (int i = 0; i != _row; i ++) delete [] boardState[i]; delete [] boardState; delete [] top; return double(profit);// / logl(depth + 1); //非線性加速 } //回溯更新收益(深度越深收益越小) void Backup(Node *selectedNode, double deltaProfit) { selectedNode -> backup(deltaProfit); } public: //建構函式 UCT(int row, int column, int noX, int noY): _row(row), _column(column), _noX(noX), _noY(noY), startTime(clock()) {} //信心上限樹搜尋 Node *UCTSearch(int **boardState, int *topState) { _root = new Node (boardState, topState, _row, _column, _noX, _noY); //以當前狀態建立根節點 while (clock() - startTime <= TIME_LIMITATION) { //尚未耗盡計算時長 Node *selectedNode = TreePolicy(_root); //運用搜索樹策略節點 double deltaProfit = DefaultPolicy(selectedNode); //運用模擬策略對選中節點進行一次隨機模擬 Backup(selectedNode, deltaProfit); //將模擬結果回溯反饋給各祖先 } return BestChild(_root); } //解構函式 ~UCT() { _root -> clear(); delete _root; } }; #endif //__UCT_H__