蒙特卡罗树搜索
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了蒙特卡罗树搜索相关的知识,希望对你有一定的参考价值。
我目前正致力于一个简单的棋盘游戏的MCTS实现。我想我几乎把它弄好了,但每当我想迭代超过1次(时间或数量(i)作为迭代因子)时,游戏就会冻结。如果有人对这个问题有什么了解,我会很高兴的。
// method to find best turn
Node findBest( )
{
Node startingPoint = new Node();
startingPoint.CurrentField_Boxes = GameBoard.boxs;
startingPoint.CurrentField_H = GameBoard.horizontal_line;
startingPoint.CurrentField_V = GameBoard.vertical_line;
Node rootNode = startingPoint;
for( int i = 0; i < 2; i++)
{
//Selection
Node promisingNode = selectPromisingNode(rootNode);
Debug.Log("POST SELECTION CHECK VON X/Y " + promisingNode.x + "/" + promisingNode.y);
//Expansion
if (!checkField())
{
expandNode(promisingNode);
}
//Simulation
Node nodeToExplore = promisingNode;
Debug.Log("POST 2 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
if (promisingNode.getChildren().Count > 0)
{
//get random child node
nodeToExplore = promisingNode.getRandomChild();
Debug.Log("POST 3 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
}
int playoutResult = simulation(nodeToExplore);
Debug.Log("SIM RESULT :" + playoutResult);
//backpropagation
backPropogation(nodeToExplore);
}
Debug.Log("*********************************");
Node bestNode = new Node();
Debug.Log("POST 4 SELECTION CHECK VON X/Y " + rootNode.getBestChild().x + "/" + rootNode.getBestChild().y);
bestNode = rootNode.getBestChild();
return bestNode;
}
private Node selectPromisingNode(Node rootNode)
{
Debug.Log("SELECTING");
Node node = new Node();
node = rootNode;
while (node.getChildren().Count != 0) // checking if terminal
{
findBestNodeWithUCT(node);
}
return node;
}
Node findBestNodeWithUCT( Node root)
{
double j = 0;
int bestIndex = 0; ;
for (int i = 0; i < root.getChildren().Count; i++)
{
if (root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult() > j)
{
j = root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult();
bestIndex = i;
Debug.Log("BEST INDEX : " + bestIndex);
}
}
Node bestOption = root.getChildren()[bestIndex];
Debug.Log("BEST OPTION RETURN X/Y : " + bestOption.x + "/" + bestOption.y);
return bestOption;
}
private void expandNode(Node node)
{
Debug.Log("Start EXPANSION");
bool vert = false;
bool hori = false;
//Phase I : creating new Node
//create a new node wit certain action A
Node newNode = new Node();
//init and setting parent
newNode.parent[0] = node;
//init gamefield into node
newNode.CurrentField_V = node.CurrentField_V;
newNode.CurrentField_H = node.CurrentField_H;
//init and setting child parent relationship
newNode.visitTimes++;
Debug.Log("ERSTELLE NEUEN KNOTEN");
//Phase II : Validating Action
//create a action a which leads to the expansion
int x = UnityEngine.Random.Range(0,4);
int y = UnityEngine.Random.Range(0, 4);
//checking variables for valid coordinates
Debug.Log("AKTION A IN EXPANSION :" + x + y);
//Prechecking of generated coordinates to set corresponding obj
if (y < 3)
{
vert = true;
}
if (x < 3 )
{
hori = true;
}
else if (x == 3 && y == 3)
{
Debug.Log(" OUT OF BOUNDS :NEW RANDOM");
expandNode(node);
}
//Phase III : Doing Action
if(vert && newNode.CurrentField_V[x, y].tag == "is play" && hori && newNode.CurrentField_H[x, y].tag != "is play")
{
expandNode(node);
}
//setting choice into sim gamefield with green color
if (vert && newNode.CurrentField_V[x, y].tag != "is play")// && GameBoard.sim_vertical_line[x, y].tag != "is play")
{
Debug.Log("SETZEN DER VERTIKALEN AKTION IN KNOTENSTATE");
GameBoard.sim_vertical_line[x, y].tag = "is play";
GameBoard.sim_vertical_line[x, y].GetComponent<SpriteRenderer>().sprite = greenV;
// newNode.CurrentField_V[x, y].tag = "is play";
newNode.x = x;
newNode.y = y;
node.nodeChildren.Add(newNode);
}
if (hori && newNode.CurrentField_H[x, y].tag != "is play" )//&& GameBoard.sim_horizontal_line[x, y].tag != "is play")
{
Debug.Log("SETZEN DER HORIZONTALEN AKTION IN KNOTENSTATE");
GameBoard.sim_horizontal_line[x, y].tag = "is play";
GameBoard.sim_horizontal_line[x, y].GetComponent<SpriteRenderer>().sprite = greenH;
// newNode.CurrentField_H[x, y].tag = "is play";
newNode.x = x;
newNode.y = y;
node.nodeChildren.Add(newNode);
}
else { expandNode(node); }
}
private void backPropogation(Node nodeToExplore)
{
Node tempNode = nodeToExplore;
Debug.Log("BACKPROPAGATION");
while (tempNode != null)
{
int i = 0;
if (i % 2 == 0)
{
tempNode.visitTimes++;
int j = tempNode.result;
tempNode.setResult(j);
tempNode = tempNode.getParent();
}
else if (i % 2 != 0)
{
tempNode.visitTimes++;
int j = tempNode.result;
tempNode.setResult(j * -1);
tempNode = tempNode.getParent();
}
i++;
}
}
一次迭代工作正常,我已经获得了比随机机器人更好的行为,但是在一次迭代中只能创建最多5-6个节点。
非常感谢。
答案
在selectPromisingNode()
函数中,从第二次迭代开始,您将拥有一个无限循环。您开始将根节点插入该功能。从第二次迭代开始,根节点具有多于0个子节点,因此满足while循环的条件。在循环内部,您不会更改node
变量的值,因此您将无限地经常以根节点作为参数调用findBestNodeWithUCT()
。
您可能希望将循环内的代码行更改为:node = findBestNodeWithUCT(node)
,这样您实际上可以开始遍历树而不是停留在根目录。
我没有检查剩下的代码,所以我不知道是否有更多错误,但这至少似乎可以准确地解释你遇到的问题
以上是关于蒙特卡罗树搜索的主要内容,如果未能解决你的问题,请参考以下文章