AMD 上的慢模板纹理

Posted

技术标签:

【中文标题】AMD 上的慢模板纹理【英文标题】:Slow stencil texture on AMD 【发布时间】:2017-09-19 15:46:46 【问题描述】:

我正在尝试使用我在光交互片段着色器中绑定和使用的 FBO + 模板纹理附件向修改后的 Doom3 引擎添加柔和阴影。 它运行良好,但 Radeon 460 存在严重的性能问题(我没有其他 AMD GPU,但怀疑它相同或更差,因为它相对较新)。

我正在使用最新的驱动程序。

fps 下降非常糟糕,实际上将qglCopyTexImage2D 处理到另一个纹理(每个灯光!)比绑定 FBO 中使用的模板纹理更快。

另一个问题是,当我尝试使用 qglCopyTexSubImage2D 优化 qglCopyTexImage2D 时,它开始闪烁。

其他程序员对模板纹理有什么实际使用建议吗?

nVidia 和 Intel 在速度方面的表现似乎都不错。

        globalImages->currentRenderImage->Bind();
        globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentRenderImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        globalImages->currentRenderFbo->Bind();
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        if ( glConfig.vendor != glvAny )  
            globalImages->currentStencilFbo->Bind();
            globalImages->currentStencilFbo->uploadWidth = curWidth;
            globalImages->currentStencilFbo->uploadHeight = curHeight;
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
        

        globalImages->currentDepthImage->Bind();
        globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentDepthImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        if ( glConfig.vendor == glvIntel )  // FIXME allow 24-bit depth for low-res monitors
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
         else 
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
        
    

    // (re-)attach textures to FBO
    if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) 
        // create a framebuffer object, you need to delete them when program exits.
        if ( !fboId )
            qglGenFramebuffers( 1, &fboId );
        qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
        // attach a texture to FBO color attachement point
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
        // attach a renderbuffer to depth attachment point
        GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
        else
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
        if ( GL_FRAMEBUFFER_COMPLETE != status )  // something went wrong, fall back to default
            common->Printf( "glCheckFramebufferStatus %d\n", status );
            qglDeleteFramebuffers( 1, &fboId );
            fboId = 0; // try from scratch next time
            r_useFbo.SetBool( false );
        
        qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be 
    
    qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
    qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
    fboUsed = true;
    GL_CheckErrors();


/*
 Soft shadows vendor specific implementation
 Intel: separate stencil, direct access, fastest
 nVidia: combined stencil & depth, direct access, fast
 AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
 */

void FB_CopyStencil()  // duzenko: why, AMD? WHY?? 
    if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
        return;
    globalImages->currentStencilFbo->Bind();
    qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
    /*globalImages->currentDepthFbo->Bind();
    idScreenRect& r = backEnd.currentScissor;
    //qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
    GL_CheckErrors();


void FB_BindStencilTexture() 
    const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
    idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
    stencil->Bind();
    if ( glConfig.vendor != glvIntel )
        glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );

【问题讨论】:

【参考方案1】:

我最终得到了两个帧缓冲区:一个仅用于阴影,另一个用于其他所有内容。 阴影纹理是前者的FBO附件,后者绑定为texture2D。

【讨论】:

以上是关于AMD 上的慢模板纹理的主要内容,如果未能解决你的问题,请参考以下文章

“纹理图集不支持模板渲染”SpriteKit 和 Xcode

模板缓冲区在不同显卡上的作用不同

如何在模板中使用深度纹理,OpenGL ES 3.0

WebGL:在带有深度模板纹理附件的帧缓冲区上未清除颜色

为啥模板测试不丢弃片段?

哪些 OpenGL ES 2.0 纹理格式可进行颜色、深度或模板渲染?