蒙特卡罗树搜索

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了蒙特卡罗树搜索相关的知识,希望对你有一定的参考价值。

我目前正致力于一个简单的棋盘游戏的MCTS实现。我想我几乎把它弄好了,但每当我想迭代超过1次(时间或数量(i)作为迭代因子)时,游戏就会冻结。如果有人对这个问题有什么了解,我会很高兴的。

 // method to find  best turn
    Node findBest( )
    {
        Node startingPoint = new Node();
        startingPoint.CurrentField_Boxes = GameBoard.boxs;
        startingPoint.CurrentField_H = GameBoard.horizontal_line;
        startingPoint.CurrentField_V = GameBoard.vertical_line;

        Node rootNode = startingPoint;

       for( int i = 0; i < 2; i++)
        {
            //Selection
            Node promisingNode = selectPromisingNode(rootNode);
            Debug.Log("POST SELECTION CHECK VON X/Y " + promisingNode.x + "/" + promisingNode.y);

            //Expansion
            if (!checkField())
            {
                expandNode(promisingNode);
            }

            //Simulation
            Node nodeToExplore = promisingNode;
            Debug.Log("POST 2 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);

            if (promisingNode.getChildren().Count > 0)
            {
                //get random child node
                nodeToExplore = promisingNode.getRandomChild();
                Debug.Log("POST 3 SELECTION CHECK VON X/Y " + nodeToExplore.x + "/" + nodeToExplore.y);
            }

            int playoutResult = simulation(nodeToExplore);
            Debug.Log("SIM RESULT :" + playoutResult);
            //backpropagation
            backPropogation(nodeToExplore);
        }

        Debug.Log("*********************************");
        Node  bestNode = new Node();
        Debug.Log("POST 4 SELECTION CHECK VON X/Y " + rootNode.getBestChild().x + "/" + rootNode.getBestChild().y);
        bestNode = rootNode.getBestChild();
        return bestNode;
    }

    private Node selectPromisingNode(Node rootNode)
    {
        Debug.Log("SELECTING");
        Node node = new Node();
        node = rootNode;

        while (node.getChildren().Count != 0) // checking if terminal
        {
            findBestNodeWithUCT(node);
        }

        return node;
    }

    Node findBestNodeWithUCT( Node root)
    {
        double j = 0;
        int bestIndex = 0; ;

        for (int i = 0; i < root.getChildren().Count; i++)
        {
            if (root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult() > j)
            {
                j = root.getChildren()[i].getVisitTimes() / root.getChildren()[i].getResult();
                bestIndex = i;
                Debug.Log("BEST INDEX : " + bestIndex);
            }
        }

        Node bestOption = root.getChildren()[bestIndex];
        Debug.Log("BEST OPTION RETURN X/Y : " + bestOption.x + "/" + bestOption.y);

        return bestOption;
    }

    private void expandNode(Node node)
    {
        Debug.Log("Start EXPANSION");
        bool vert = false;
        bool hori = false;

    //Phase I : creating new Node
        //create a new node wit certain action A
        Node newNode = new Node();
        //init and setting parent 
        newNode.parent[0] = node;
        //init gamefield into node
        newNode.CurrentField_V = node.CurrentField_V;
        newNode.CurrentField_H = node.CurrentField_H;
        //init and setting child parent relationship
        newNode.visitTimes++;
        Debug.Log("ERSTELLE NEUEN KNOTEN");

     //Phase II : Validating Action
        //create a action a which leads to the expansion
        int x = UnityEngine.Random.Range(0,4);
        int y = UnityEngine.Random.Range(0, 4);
        //checking variables for valid coordinates
        Debug.Log("AKTION A IN EXPANSION :" + x + y);
        //Prechecking of generated coordinates to set corresponding obj
        if (y < 3)
        {
            vert = true;
        }
        if (x < 3 )
        {
            hori = true;
        }
        else if (x == 3 && y == 3)
        {
            Debug.Log(" OUT OF BOUNDS :NEW RANDOM");
            expandNode(node);

        }

     //Phase III : Doing Action
        if(vert && newNode.CurrentField_V[x, y].tag == "is play" && hori && newNode.CurrentField_H[x, y].tag != "is play")
        {
            expandNode(node);
        }
        //setting choice into sim gamefield with green color
        if (vert && newNode.CurrentField_V[x, y].tag != "is play")// && GameBoard.sim_vertical_line[x, y].tag != "is play")
        {
            Debug.Log("SETZEN DER VERTIKALEN AKTION IN KNOTENSTATE");
            GameBoard.sim_vertical_line[x, y].tag = "is play";
            GameBoard.sim_vertical_line[x, y].GetComponent<SpriteRenderer>().sprite = greenV;
          //  newNode.CurrentField_V[x, y].tag = "is play";
            newNode.x = x;
            newNode.y = y;
            node.nodeChildren.Add(newNode);

        }
        if (hori && newNode.CurrentField_H[x, y].tag != "is play" )//&& GameBoard.sim_horizontal_line[x, y].tag != "is play")
        {
            Debug.Log("SETZEN DER HORIZONTALEN AKTION IN KNOTENSTATE");
            GameBoard.sim_horizontal_line[x, y].tag = "is play";
            GameBoard.sim_horizontal_line[x, y].GetComponent<SpriteRenderer>().sprite = greenH;
          //  newNode.CurrentField_H[x, y].tag = "is play";
            newNode.x = x;
            newNode.y = y;
            node.nodeChildren.Add(newNode);

        }
        else { expandNode(node); }
    }

    private void backPropogation(Node nodeToExplore)
    {
        Node tempNode = nodeToExplore;
        Debug.Log("BACKPROPAGATION");
        while (tempNode != null)
        {
            int i = 0;

            if (i % 2 == 0)
            {
                tempNode.visitTimes++;
                int j = tempNode.result;
                tempNode.setResult(j);
                tempNode = tempNode.getParent();
            }
            else if (i % 2 != 0)
            {
                tempNode.visitTimes++;
                int j = tempNode.result;
                tempNode.setResult(j * -1);
                tempNode = tempNode.getParent();
            }
            i++;
        }
    }

一次迭代工作正常,我已经获得了比随机机器人更好的行为,但是在一次迭代中只能创建最多5-6个节点。

非常感谢。

答案

selectPromisingNode()函数中,从第二次迭代开始,您将拥有一个无限循环。您开始将根节点插入该功能。从第二次迭代开始,根节点具有多于0个子节点,因此满足while循环的条件。在循环内部,您不会更改node变量的值,因此您将无限地经常以根节点作为参数调用findBestNodeWithUCT()

您可能希望将循环内的代码行更改为:node = findBestNodeWithUCT(node),这样您实际上可以开始遍历树而不是停留在根目录。

我没有检查剩下的代码,所以我不知道是否有更多错误,但这至少似乎可以准确地解释你遇到的问题

以上是关于蒙特卡罗树搜索的主要内容,如果未能解决你的问题,请参考以下文章

深度解析黑白棋AI代码原理(蒙特卡洛搜索树MCTS+Roxanne策略)

蒙特卡洛树搜索:井字游戏的实现

蒙特卡罗树搜索

分数在 0 和 n 之间时蒙特卡罗树搜索的 UCB 公式

为啥蒙特卡洛树搜索会重置树

蒙特卡洛树搜索介绍