在 OpenGL 中实例化数百万个对象:提高每秒帧数
Posted
技术标签:
【中文标题】在 OpenGL 中实例化数百万个对象:提高每秒帧数【英文标题】:Instancing millions of objects in OpenGL: improving frames-per-second 【发布时间】:2017-02-06 17:45:01 【问题描述】:我的最终目标是以 60 fps 的速度渲染 100 万个不同大小和颜色的球体。我也希望能够在屏幕上移动相机。
我已修改this page of the tutorial I am studying 上的代码以尝试实例化许多球体。但是,我发现在 64 球时,我的 fps 低于 60,而在 900 球时,我的 fps 仅为区区 4。我对实例化的理解是幼稚的,但我相信我应该获得比每秒更多的帧数这。只有 64 个球体应该可以达到 60 fps。我相信我在某种程度上导致 CPU 和 GPU 的通信频率超出了它们应有的频率。所以我的问题是:我如何实例化这么多对象(理想情况下是数百万)而不导致 fps 下降(理想情况下为 60 fps)?
我通过每 10 帧计算一次 (10 / time_elapsed)
来计算 fps,其中 time_elapsed
是自上次 fps 调用以来经过的时间。我在代码的第 118 行使用printf
将其打印出来。
我一直在通过 this tutorial 学习 OpenGL,因此我在 Visual Studio 2013 中使用了 32-bit GLEW 和 32-bit GLFW。我在 64 位操作系统 (Windows 7) 上有 8 GB 的 RAM 和 2.30 GHz CPU .
我已经尝试根据上面的教程编写我自己的示例。源代码:
(将第 2 行设置为要实例化的球体的数量。确保第 2 行具有整数平方根。将第 4 行设置为球体的细节,它可以走的最低值为 0。数字越大 = 越详细。)
// Make sure NUM_INS is a square number
#define NUM_INS 1
// Detail up to 4 is probably good enough
#define SPHERE_DETAIL 4
#include <vector>
// GLEW
#define GLEW_STATIC
#include <GL/glew.h>
// GLFW
#include <GLFW/glfw3.h>
// GL includes
#include "Shader.h"
// GLM Mathemtics
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
// Properties
GLuint screenWidth = 800, screenHeight = 600;
// Function prototypes
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode);
std::vector<GLfloat> create_sphere(int recursion);
// The MAIN function, from here we start our application and run the Game loop
int main()
// Init GLFW
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "LearnOpenGL", nullptr, nullptr); // Windowed
glfwMakeContextCurrent(window);
// Set the required callback functions
glfwSetKeyCallback(window, key_callback);
// Initialize GLEW to setup the OpenGL Function pointers
glewExperimental = GL_TRUE;
glewInit();
// Define the viewport dimensions
glViewport(0, 0, screenWidth, screenHeight);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); // Comment to remove wireframe mode
// Setup OpenGL options
glEnable(GL_DEPTH_TEST);
// Setup and compile our shader(s)
Shader shader("core.vs", "core.frag");
// Generate a list of 100 quad locations/translation-vectors
std::vector<glm::vec2> translations(NUM_INS);
//glm::vec2 translations[NUM_INS];
int index = 0;
GLfloat offset = 1.0f / (float)sqrt(NUM_INS);
for (GLint y = -(float)sqrt(NUM_INS); y < (float)sqrt(NUM_INS); y += 2)
for (GLint x = -(float)sqrt(NUM_INS); x < (float)sqrt(NUM_INS); x += 2)
glm::vec2 translation;
translation.x = (GLfloat)x / (float)sqrt(NUM_INS) + offset;
translation.y = (GLfloat)y / (float)sqrt(NUM_INS) + offset;
translations[index++] = translation;
// Store instance data in an array buffer
GLuint instanceVBO;
glGenBuffers(1, &instanceVBO);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * NUM_INS, &translations[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// create 12 vertices of a icosahedron
std::vector<GLfloat> vv = create_sphere(SPHERE_DETAIL);
GLuint quadVAO, quadVBO;
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, vv.size() * sizeof(GLfloat), &vv[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(GLfloat)));
// Also set instance data
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat), (GLvoid*)0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glVertexAttribDivisor(2, 1); // Tell OpenGL this is an instanced vertex attribute.
glBindVertexArray(0);
// For printing frames-per-second
float counter = 0;
double get_time = 0;
double new_time;
// Game loop
while (!glfwWindowShouldClose(window))
// Print fps by printing (number_of_frames / time_elapsed)
counter += 1;
if (counter > 10)
counter -= 10;
new_time = glfwGetTime();
printf("fps: %.2f ", (10/(new_time - get_time)));
get_time = new_time;
// Check and call events
glfwPollEvents();
// Clear buffers
//glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Draw 100 instanced quads
shader.Use();
glm::mat4 model;
model = glm::rotate(model, 0.0f, glm::vec3(1.0f, 0.0f, 0.0f));
// Camera/View transformation
glm::mat4 view;
GLfloat radius = 10.0f;
GLfloat camX = sin(glfwGetTime()) * radius;
GLfloat camZ = cos(glfwGetTime()) * radius;
view = glm::lookAt(glm::vec3(camX, 0.0f, camZ), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 1.0f, 0.0f));
// Projection
glm::mat4 projection;
projection = glm::perspective(45.0f, (GLfloat)screenWidth / (GLfloat)screenHeight, 0.1f, 100.0f);
// Get the uniform locations
GLint modelLoc = glGetUniformLocation(shader.Program, "model");
GLint viewLoc = glGetUniformLocation(shader.Program, "view");
GLint projLoc = glGetUniformLocation(shader.Program, "projection");
// Pass the matrices to the shader
glUniformMatrix4fv(modelLoc, 1, GL_FALSE, glm::value_ptr(model));
glUniformMatrix4fv(viewLoc, 1, GL_FALSE, glm::value_ptr(view));
glUniformMatrix4fv(projLoc, 1, GL_FALSE, glm::value_ptr(projection));
glBindVertexArray(quadVAO);
glDrawArraysInstanced(GL_TRIANGLES, 0, vv.size() / 3, NUM_INS); // 100 triangles of 6 vertices each
glBindVertexArray(0);
// Swap the buffers
glfwSwapBuffers(window);
glfwTerminate();
return 0;
// Is called whenever a key is pressed/released via GLFW
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode)
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
std::vector<GLfloat> add_color(std::vector<GLfloat> sphere)
// Add color
std::vector<GLfloat> colored_sphere;
for (GLint i = 0; i < sphere.size(); i+=9)
colored_sphere.push_back(sphere[i]);
colored_sphere.push_back(sphere[i+1]);
colored_sphere.push_back(sphere[i+2]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+3]);
colored_sphere.push_back(sphere[i+4]);
colored_sphere.push_back(sphere[i+5]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+6]);
colored_sphere.push_back(sphere[i+7]);
colored_sphere.push_back(sphere[i+8]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
return colored_sphere;
std::vector<GLfloat> tesselate(std::vector<GLfloat> shape, int recursion)
if (recursion > 0)
std::vector<GLfloat> new_sphere = ;
for (GLint i = 0; i < shape.size(); i += 9)
// 1.902113 approximately
GLfloat radius = sqrt(1.0f + pow((1.0f + sqrt(5.0f)) / 2.0f, 2));
// Every 9 points is a triangle. Take 1 triangle and turn it into 4 triangles.
GLfloat p_one[] = shape[i], shape[i + 1], shape[i + 2];
GLfloat p_two[] = shape[i + 3], shape[i + 4], shape[i + 5];
GLfloat p_thr[] = shape[i + 6], shape[i + 7], shape[i + 8];
GLfloat p_one_two[] = (p_one[0] + p_two[0]) / 2.0f, (p_one[1] + p_two[1]) / 2.0f, (p_one[2] + p_two[2]) / 2.0f ;
GLfloat p_one_thr[] = (p_one[0] + p_thr[0]) / 2.0f, (p_one[1] + p_thr[1]) / 2.0f, (p_one[2] + p_thr[2]) / 2.0f ;
GLfloat p_two_thr[] = (p_two[0] + p_thr[0]) / 2.0f, (p_two[1] + p_thr[1]) / 2.0f, (p_two[2] + p_thr[2]) / 2.0f ;
GLfloat r_one_two = sqrt((p_one_two[0]*p_one_two[0]) + (p_one_two[1]*p_one_two[1]) + (p_one_two[2]*p_one_two[2]));
GLfloat r_one_thr = sqrt((p_one_thr[0]*p_one_thr[0]) + (p_one_thr[1]*p_one_thr[1]) + (p_one_thr[2]*p_one_thr[2]));
GLfloat r_two_thr = sqrt((p_two_thr[0]*p_two_thr[0]) + (p_two_thr[1]*p_two_thr[1]) + (p_two_thr[2]*p_two_thr[2]));
GLfloat t_one_two[] = radius * p_one_two[0] / r_one_two, radius * p_one_two[1] / r_one_two, radius * p_one_two[2] / r_one_two ;
GLfloat t_one_thr[] = radius * p_one_thr[0] / r_one_thr, radius * p_one_thr[1] / r_one_thr, radius * p_one_thr[2] / r_one_thr ;
GLfloat t_two_thr[] = radius * p_two_thr[0] / r_two_thr, radius * p_two_thr[1] / r_two_thr, radius * p_two_thr[2] / r_two_thr ;
// Triangle 1:
new_sphere.push_back(p_one[0]);
new_sphere.push_back(p_one[1]);
new_sphere.push_back(p_one[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
// Triangle 2:
new_sphere.push_back(p_two[0]);
new_sphere.push_back(p_two[1]);
new_sphere.push_back(p_two[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Triangle 3:
new_sphere.push_back(p_thr[0]);
new_sphere.push_back(p_thr[1]);
new_sphere.push_back(p_thr[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Center Triangle:
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
return tesselate(new_sphere, recursion - 1);
printf("number of vertices to be rendered: %d || ", shape.size());
return shape;
std::vector<GLfloat> create_sphere(int recursion)
// Define the starting icosahedron
GLfloat t_ = (1.0f + sqrt(5.0f)) / 2.0f;
std::vector<GLfloat> icosahedron =
-1.0f, t_, 0.0f, -t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, t_, 1.0f, t_, 0.0f,
-1.0f, t_, 0.0f, 1.0f, t_, 0.0f, 0.0f, 1.0f, -t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, -t_, -t_, 0.0f, -1.0f,
-1.0f, t_, 0.0f, -t_, 0.0f, -1.0f, -t_, 0.0f, 1.0f,
1.0f, t_, 0.0f, 0.0f, 1.0f, t_, t_, 0.0f, 1.0f,
0.0f, 1.0f, t_, -t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
-t_, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -1.0f, -t_, 0.0f,
-t_, 0.0f, -1.0f, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -t_,
0.0f, 1.0f, -t_, 1.0f, t_, 0.0f, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-1.0f, -t_, 0.0f,
1.0f, -t_, 0.0f,-1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, -1.0f, t_, 0.0f, 1.0f,
0.0f, -1.0f, t_, t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-t_, 0.0f, 1.0f,
0.0f, -1.0f, -t_,-1.0f, -t_, 0.0f,-t_, 0.0f, -1.0f,
t_, 0.0f, -1.0f, 0.0f, -1.0f, -t_, 0.0f, 1.0f, -t_,
t_, 0.0f, 1.0f, t_, 0.0f, -1.0f, 1.0f, t_, 0.0f,
;
// Tesselate the icososphere the number of times recursion
std::vector<GLfloat> colorless_sphere = tesselate(icosahedron, recursion);
// Add color and return
return add_color(colorless_sphere);
顶点着色器:(命名为core.vs)
#version 330 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 color;
layout (location = 2) in vec2 offset;
out vec3 fColor;
uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;
void main()
gl_Position = projection * view * model * vec4(position.x + offset.x, position.y + offset.y, position.z, 1.0f);
fColor = color;
片段着色器:(命名为 core.frag)
#version 330 core
in vec3 fColor;
out vec4 color;
void main()
color = vec4(fColor, 1.0f);
着色器类:(命名为 Shader.h)
#ifndef SHADER_H
#define SHADER_H
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <GL/glew.h>
class Shader
public:
GLuint Program;
// Constructor generates the shader on the fly
Shader(const GLchar* vertexPath, const GLchar* fragmentPath)
// 1. Retrieve the vertex/fragment source code from filePath
std::string vertexCode;
std::string fragmentCode;
std::ifstream vShaderFile;
std::ifstream fShaderFile;
// ensures ifstream objects can throw exceptions:
vShaderFile.exceptions(std::ifstream::badbit);
fShaderFile.exceptions(std::ifstream::badbit);
try
// Open files
vShaderFile.open(vertexPath);
fShaderFile.open(fragmentPath);
std::stringstream vShaderStream, fShaderStream;
// Read file's buffer contents into streams
vShaderStream << vShaderFile.rdbuf();
fShaderStream << fShaderFile.rdbuf();
// close file handlers
vShaderFile.close();
fShaderFile.close();
// Convert stream into string
vertexCode = vShaderStream.str();
fragmentCode = fShaderStream.str();
catch (std::ifstream::failure e)
std::cout << "ERROR::SHADER::FILE_NOT_SUCCESFULLY_READ" << std::endl;
const GLchar* vShaderCode = vertexCode.c_str();
const GLchar * fShaderCode = fragmentCode.c_str();
// 2. Compile shaders
GLuint vertex, fragment;
GLint success;
GLchar infoLog[512];
// Vertex Shader
vertex = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex, 1, &vShaderCode, NULL);
glCompileShader(vertex);
// Print compile errors if any
glGetShaderiv(vertex, GL_COMPILE_STATUS, &success);
if (!success)
glGetShaderInfoLog(vertex, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
// Fragment Shader
fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment, 1, &fShaderCode, NULL);
glCompileShader(fragment);
// Print compile errors if any
glGetShaderiv(fragment, GL_COMPILE_STATUS, &success);
if (!success)
glGetShaderInfoLog(fragment, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
// Shader Program
this->Program = glCreateProgram();
glAttachShader(this->Program, vertex);
glAttachShader(this->Program, fragment);
glLinkProgram(this->Program);
// Print linking errors if any
glGetProgramiv(this->Program, GL_LINK_STATUS, &success);
if (!success)
glGetProgramInfoLog(this->Program, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
// Delete the shaders as they're linked into our program now and no longer necessery
glDeleteShader(vertex);
glDeleteShader(fragment);
// Uses the current shader
void Use()
glUseProgram(this->Program);
;
#endif
【问题讨论】:
可能微不足道,但您可以一劳永逸地计算(float)sqrt(NUM_INS)
(CPU 会说谢谢)。
这有助于 med 进行实例渲染:ogldev.atspace.co.uk/www/tutorial33/tutorial33.html 它只是一个不同的绘图调用,它告诉 gpu 回收已发送的信息
出于好奇,您的目标是哪类硬件?您总是可以借此机会了解计算着色器和间接绘制 :) 完全在 GPU 端生成所有这些实例,然后一切顺利。
@Charlie 也许我没有在我的帖子中提到,但我已经在我的代码中使用了实例化。这就是我的结果让我感到困惑的原因。
@AndonM.Coleman 我的目标是具有相同硬件的计算机,或者可能稍差一些。
【参考方案1】:
我的最终目标是以 60 fps 的速度渲染 100 万个不同大小和颜色的球体。
这是一个不合理的期望。
假设每个球体由 50 个三角形组成。对于一个好的球体形状来说有点小,但我们假设它们那么小。
100 万个球体,每个球体有 50 个三角形,每帧有 5000 万个三角形。在 60 FPS 时,每秒 3 十亿 个三角形。
没有商业可用的 GPU 足以做到这一点。那只是一个 50 个三角形的球体;您的 4x 镶嵌二十面体将超过 5,000 个三角形。
现在是的,绘制 60 个这样的球体每帧只需约 300,000 个三角形。但即便如此,在 60 FPS 下也是每秒约 1800 万个三角形。确实存在可以处理这么多三角形的硬件,但它显然是很多。而且你肯定不会得到 100 万个。
这与 GPU/CPU 通信或开销无关。你只是在你的 GPU 上投入了比它所能处理的更多的工作。你也许可以在这里和那里改进一些东西,但没有什么能让你达到你想要的十分之一。
至少,不采用这种整体方法。
对于您想要绘制数百万个球体的特殊情况,我会使用光线追踪冒名顶替者而不是球体的实际几何形状。也就是说,您绘制四边形,其位置由顶点(或几何)着色器生成。您为每个球体生成一个四边形,以便四边形包围球体。然后片段着色器执行一个简单的光线-球体相交测试,以查看有问题的片段(从相机视图的方向)是否撞击球体。如果光线没有击中球体,则丢弃片段。
您还需要修改gl_FragDepth
为冒名顶替者提供正确的深度值,以便相交球体可以工作。
【讨论】:
也许你是对的,数以百万计的球体太雄心勃勃了。但是,这里有一个视频,其中有 50 万个球体在 OpenGL 中渲染,它们也会移动和填充:youtube.com/watch?v=Tnc_IoXwH24 也许我必须修改我的项目以满足您的标准,或者像这个视频中那样降低 fps。跨度> @PaulTerwilliger:你看过那个视频的细节了吗?具体来说,它说“用于每个球体的三角形数量取决于它与观察者的距离”。这规则将实例化为工具。 我明白了,那么您是否知道将实例化用于球体的不同细节?我不能将不同细节的球体发送到 GPU,然后根据距离渲染一些或其他吗? @PaulTerwilliger:你为什么这么热衷于使用实例化?你能有几个不同的球体分辨率,然后根据到相机的距离对你的实例进行排序吗?但是有问题的视频没有这样做;它在 GPU 上生成顶点数据。 @PaulTerwilliger:“我只是认为实例化是绘制许多相同对象的方法”它是。但如果它们有不同的三角形计数,它们就不是“同一个对象”。以上是关于在 OpenGL 中实例化数百万个对象:提高每秒帧数的主要内容,如果未能解决你的问题,请参考以下文章