多个 std::vectors 来解决缓存问题？

Posted 2023-02-16

技术标签:

【中文标题】多个 std::vectors 来解决缓存问题？【英文标题】：Multiple std::vectors to fix cache problem? 【发布时间】：2010-07-12 00:55:40 【问题描述】：

我有一个std::vector<DOUBLEPOINT> 我正在制作一个带有贝塞尔曲线的应用程序，结果会实时显示。我将贝塞尔点转换为一堆短线。我将小线的坐标存储在上面的向量中。这是我的问题：。当我的向量的大小超过缓存行时，事情变得非常慢非常快。我想知道是否有很多 std::vector<DOUBLEPOINT> 会更好，基本上每 100 分，它就会切换到另一个。这会解决我的缓存问题吗？否则，我怎么能允许用户根据需要创建尽可能多的点而不会变得非常非常慢？我所有的其他算法都在快速变亮（例如多边形填充），所以这些不是我的问题。真正减慢速度的是 std::vector。

谢谢

    struct SHAPECONTOUR

        std::vector<USERFPOINT> UserPoints;
        std::vector<DOUBLEPOINT> DrawingPoints;

        SHAPEOUTLINE Outline;

    ;

I call UpdateShape() every time a point is added but I assure you my other algorithms are fast...


void OGLSHAPE::UpdateShape()

    if(Contour.size() == 0)
    
        return;
    
    for(int i = 0; i < Contour.size(); ++i)
    
        Contour[i].DrawingPoints.clear();

     if(Contour[i].UserPoints.size() < 2)
     
         break;
     


    Contour[i].DrawingPoints.clear();
Contour[i].DrawingPoints.reserve(1000);
     for(unsigned int x = 0; x < Contour[i].UserPoints.size() - 1; ++x)
         SetCubicBezier(
             Contour[i].UserPoints[x],
             Contour[i].UserPoints[x + 1],
             i);




     //Remove Duplicates
     for(int j = 0; j < 2; ++j)
     
         if(Contour[i].DrawingPoints.size() > 2)
             for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
             
                 if(Contour[i].DrawingPoints[x].point[0] ==
                     Contour[i].DrawingPoints[x + 1].point[0] &&
                     Contour[i].DrawingPoints[x].point[1] ==
                     Contour[i].DrawingPoints[x + 1].point[1] 

                 )
                     Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);
             
     


     GenerateLinePoly(Contour[i].DrawingPoints,Contour[i].Outline.OutlineWidth);
     Contour[i].Outline.OutlineSize = OutlineVec.size()  / 2;
     glBindBufferARB(GL_ARRAY_BUFFER_ARB,Contour[i].Outline.OutlineVBO);
     glBufferDataARB(GL_ARRAY_BUFFER_ARB,sizeof(GLfloat) * OutlineVec.size(),&OutlineVec[0],GL_STATIC_COPY);


    

    gluTessNormal(PolygonTesselator.tobj, 0, 0, 1);
    PolygonTesselator.Set_Winding_Rule(WindingRule); 

    //PolygonTesselator.SetDimensions(layer[currentlayer].Shapes[i].Dimensions,layer[currentlayer].Shapes[i].minima);

    PolygonTesselator.Begin_Polygon(); 
    for(unsigned int c = 0; c < Contour.size(); ++c)
    
            PolygonTesselator.Begin_Contour();

            for(unsigned int j = 0; j < Contour[c].DrawingPoints.size(); ++j)
            
                gluTessVertex(PolygonTesselator.tobj,&Contour[c].DrawingPoints[j].point[0],
                    &Contour[c].DrawingPoints[j].point[0]);
            

            PolygonTesselator.End_Contour();

    
    PolygonTesselator.End_Polygon();



    PolygonTesselator.TransferVerticies(
        ObjectVBOInt,
        TextureCoordsVBOInt,
        ObjectVBOCount,
        TextureCoordsVBOCount);


void OGLSHAPE::SetCubicBezier(USERFPOINT &a,USERFPOINT &b, int &currentcontour )






        double dx1 = a.RightHandle.x - a.UserPoint.x;
        double dy1 = a.RightHandle.y - a.UserPoint.y;
        double dx2 = b.LeftHandle.x - a.RightHandle.x;
        double dy2 = b.LeftHandle.y - a.RightHandle.y;
        double dx3 = b.UserPoint.x - b.LeftHandle.x;
        double dy3 = b.UserPoint.y - b.LeftHandle.y;

        float len = sqrt(dx1 * dx1 + dy1 * dy1) + 
            sqrt(dx2 * dx2 + dy2 * dy2) + 
            sqrt(dx3 * dx3 + dy3 * dy3);




        int NUM_STEPS =  int(len * 0.049);

        if(NUM_STEPS > 55)
        
            NUM_STEPS = 55;
        
        double subdiv_step  = 1.0 / (NUM_STEPS + 1);
        double subdiv_step2 = subdiv_step*subdiv_step;
        double subdiv_step3 = subdiv_step*subdiv_step*subdiv_step;

        double pre1 = 3.0 * subdiv_step;
        double pre2 = 3.0 * subdiv_step2;
        double pre4 = 6.0 * subdiv_step2;
        double pre5 = 6.0 * subdiv_step3;



        double tmp1x = a.UserPoint.x - a.RightHandle.x * 2.0 + b.LeftHandle.x;
        double tmp1y = a.UserPoint.y - a.RightHandle.y  * 2.0 + b.LeftHandle.y;

        double tmp2x = (a.RightHandle.x - b.LeftHandle.x)*3.0 - a.UserPoint.x + b.UserPoint.x;
        double tmp2y = (a.RightHandle.y - b.LeftHandle.y)*3.0 - a.UserPoint.y + b.UserPoint.y;

        temp.point[0] = a.UserPoint.x;
        temp.point[1] = a.UserPoint.y;

        //a user
        //a right
        //b left
        //b user

        double dfx = (a.RightHandle.x - a.UserPoint.x)*pre1 + tmp1x*pre2 + tmp2x*subdiv_step3;
        double dfy = (a.RightHandle.y - a.UserPoint.y)*pre1 + tmp1y*pre2 + tmp2y*subdiv_step3;

        double ddfx = tmp1x*pre4 + tmp2x*pre5;
        double ddfy = tmp1y*pre4 + tmp2y*pre5;

        double dddfx = tmp2x*pre5;
        double dddfy = tmp2y*pre5;

        int step = NUM_STEPS;

        // Suppose, we have some abstract object Polygon which
        // has method AddVertex(x, y), similar to LineTo in
        // many graphical APIs.
        // Note, that the loop has only operation add!

        while(step--)
        


            temp.point[0]  += dfx;
            temp.point[1]  += dfy;
            dfx  += ddfx;
            dfy  += ddfy;
            ddfx += dddfx;
            ddfy += dddfy;

            Contour[currentcontour].DrawingPoints.push_back(temp);
        


        temp.point[0] = (GLdouble)b.UserPoint.x;
        temp.point[1] = (GLdouble)b.UserPoint.y;
        Contour[currentcontour].DrawingPoints.push_back(temp);




void OGLSHAPE::GenerateLinePoly(const std::vector<DOUBLEPOINT> &input, int width)

    OutlineVec.clear();
    if(input.size() < 2)
    
        return;
    



    float w = width / 2.0f;

    //glBegin(GL_TRIANGLES);
    for( size_t i = 0; i < input.size()-1; ++i )
    
        POINTFLOAT cur;
        cur.x = input[i].point[0];
        cur.y = input[i].point[1];


        POINTFLOAT nxt;
        nxt.x = input[i+1].point[0];
        nxt.y = input[i+1].point[1];

        POINTFLOAT b;
        b.x = nxt.x - cur.x;
        b.y = nxt.y - cur.y;

        b = normalize(b);



        POINTFLOAT b_perp;
        b_perp.x = -b.y;
        b_perp.y = b.x;


        POINTFLOAT p0;
        POINTFLOAT p1;
        POINTFLOAT p2;
        POINTFLOAT p3;

        p0.x = cur.x + b_perp.x * w;
        p0.y = cur.y + b_perp.y * w;

        p1.x = cur.x - b_perp.x * w;
        p1.y = cur.y - b_perp.y * w;

        p2.x = nxt.x + b_perp.x * w;
        p2.y = nxt.y + b_perp.y * w;

        p3.x = nxt.x - b_perp.x * w;
        p3.y = nxt.y - b_perp.y * w;

        OutlineVec.push_back(p0.x);
        OutlineVec.push_back(p0.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);

        OutlineVec.push_back(p2.x);
        OutlineVec.push_back(p2.y);
        OutlineVec.push_back(p1.x);
        OutlineVec.push_back(p1.y);
        OutlineVec.push_back(p3.x);
        OutlineVec.push_back(p3.y);



        // only do joins when we have a prv
        if( i == 0 ) continue;

        POINTFLOAT prv;
        prv.x = input[i-1].point[0];
        prv.y = input[i-1].point[1];

        POINTFLOAT a;
        a.x = prv.x - cur.x;
        a.y = prv.y - cur.y;

        a = normalize(a);

        POINTFLOAT a_perp;
        a_perp.x = a.y;
        a_perp.y = -a.x;

        float det = a.x * b.y  - b.x * a.y;
        if( det > 0 )
        
            a_perp.x = -a_perp.x;
            a_perp.y = -a_perp.y;

            b_perp.x = -b_perp.x;
            b_perp.y = -b_perp.y;
        

        // TODO: do inner miter calculation

        // flip around normals and calculate round join points
        a_perp.x = -a_perp.x;
        a_perp.y = -a_perp.y;

        b_perp.x = -b_perp.x;
        b_perp.y = -b_perp.y;

        size_t num_pts = 4;

        std::vector< POINTFLOAT> round( 1 + num_pts + 1 );
        POINTFLOAT nc;
        nc.x = cur.x + (a_perp.x * w);
        nc.y = cur.y + (a_perp.y * w);

        round.front() = nc;

        nc.x = cur.x + (b_perp.x * w);
        nc.y = cur.y + (b_perp.y * w);

        round.back() = nc;

        for( size_t j = 1; j < num_pts+1; ++j )
        
            float t = (float)j/(float)(num_pts+1);
            if( det > 0 )
         
             POINTFLOAT nin;
             nin = slerp2d( b_perp, a_perp, 1.0f-t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         
            else
         
             POINTFLOAT nin;
             nin = slerp2d( a_perp, b_perp, t );
             nin.x *= w;
             nin.y *= w;

             nin.x += cur.x;
             nin.y += cur.y;

             round[j] = nin;
         
        

        for( size_t j = 0; j < round.size()-1; ++j )
        

            OutlineVec.push_back(cur.x);
            OutlineVec.push_back(cur.y);


            if( det > 0 )
         
             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);
             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);
         
            else
         

             OutlineVec.push_back(round[j].x);
             OutlineVec.push_back(round[j].y);

             OutlineVec.push_back(round[j + 1].x);
             OutlineVec.push_back(round[j + 1].y);

【问题讨论】：

渲染时性能会下降，还是仅在添加新点时会下降？仅在添加新点时，257 点很好，但 258 点从 60fps 变为 0.5fps。在Debug模式下，59后变慢我们能看到一些代码吗？我认为这不是向量的错，258 个元素并不算多（除非你有一直在复制的巨型类，但听起来不像）。但是您不会不断添加新点，是吗？然后在已经分配的向量的迭代过程中发生减速。它让人怀疑它根本不是 std::vector 错误。 【参考方案1】：

这可能不是您问题的根源。但是您可能想尝试使用迭代器而不是到处索引。它可以帮助编译器做出更好的优化决策。 std::for_each 看起来像是一个可能的候选者，你可以推迟到另一个函数，例如：

void OGLSHAPE::real_update_shap(SHAPECONTOUR &contour) 
    if(contour.UserPoints.size() < 2) 
        return;
    
    // do your thing!


void OGLSHAPE::UpdateShape() 
    // no need to explicitly test if empty, for_each won't do anything if the vector
    // has no elements
    std::for_each(Contour.begin(), Contour.end(), std::bind1st(std::mem_fun(real_update_shape), this));

或者至少，使用一些参考来帮助编译器。例如，转换这个：

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 
     if(Contour[i].DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < Contour[i].DrawingPoints.size() - 1; ++x)
         
             if(Contour[i].DrawingPoints[x].point[0] ==
                 Contour[i].DrawingPoints[x + 1].point[0] &&
                 Contour[i].DrawingPoints[x].point[1] ==
                 Contour[i].DrawingPoints[x + 1].point[1] 

             )
                 Contour[i].DrawingPoints.erase(Contour[i].DrawingPoints.begin() + x);

这样的：

 //Remove Duplicates
 for(int j = 0; j < 2; ++j)
 
     // reference to the one we care about, this may be allowed to be const
     // but I am not sure since it depends on your specific use cases, if it can be
     // it is better to shoot for const correctness as much as possible.
     // PS: types in all caps is very ugly, people will think it's a macro!
     SHAPECONTOUR &current_contour = Contour[i];
     if(current_contour.DrawingPoints.size() > 2)
         for(unsigned int x = 0; x < current_contour.DrawingPoints.size() - 1; ++x)
         
             if( current_contour.DrawingPoints[x    ].point[0] ==
                 current_contour.DrawingPoints[x + 1].point[0] &&
                 current_contour.DrawingPoints[x    ].point[1] ==
                 current_contour.DrawingPoints[x + 1].point[1])

             current_contour.DrawingPoints.erase(current_contour.DrawingPoints.begin() + x);

这是可以可能帮助的事情之一，而且绝对不会造成伤害。所以值得一试。

【讨论】：

不幸的是，我分析了我的代码并意识到是我的多边形细分影响了我的速度。我不知道如何加快速度...【参考方案2】：

我对缓存一无所知，但它可能有助于手动扩展你的向量，而不是让它在添加它们时一次扩展一个值。

根据this reference，您可以调用reserve 来增加向量的最大大小。我建议以 100 的长度开始你的向量，一旦第 100 个插槽被填满，再请求 100 个插槽。

此答案可能无法解决问题，因为我不知道缓存与 std::vectors 有何关系，但您可能面临我描述的问题。

【讨论】：

以上是关于多个 std::vectors 来解决缓存问题？的主要内容，如果未能解决你的问题，请参考以下文章