在 OpenGL 中实例化数百万个对象
Posted
技术标签:
【中文标题】在 OpenGL 中实例化数百万个对象【英文标题】:Instancing millions of objects in OpenGL 【发布时间】:2017-02-04 02:14:54 【问题描述】:我的最终目标是以 60 fps 的速度渲染 100 万个不同大小和颜色的球体。我也希望能够在屏幕上移动相机。
我已经修改了this page of the tutorial I am studying 上的代码以尝试实例化 100 万个立方体。我已经能够实例化多达 90,000 个立方体,但是如果我尝试实例化 160,000 个立方体,那么程序就会中断。我收到程序“停止工作”并意外退出的错误消息。我不知道这是什么错误,但我相信它可能与内存有关。
我对实例化的理解很幼稚,所以我不知道问题是什么。我相信实例化 100 万个立方体是我实现 100 万个球体实例化目标的下一步。所以,我的问题是:如何在 OpenGL 中实例化 100 万个立方体/对象?
我一直在通过 this tutorial 学习 OpenGL,因此我在 Visual Studio 2013 中使用 32-bit GLEW 和 32-bit GLFW。我在 64 位操作系统 (Windows 7) 上有 8 GB 的 RAM 和 2.30 GHz CPU .
我的代码如下:
(将第 2 行设置为要实例化的立方体的数量。确保第 2 行具有整数平方根)
// Make sure NUM_INS is a square number
#define NUM_INS 9
// GLEW
#define GLEW_STATIC
#include <GL/glew.h>
// GLFW
#include <GLFW/glfw3.h>
// GL includes
#include "Shader.h"
// GLM Mathemtics
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
// Properties
GLuint screenWidth = 800, screenHeight = 600;
// Function prototypes
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode);
// The MAIN function, from here we start our application and run the Game loop
int main()
// Init GLFW
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "LearnOpenGL", nullptr, nullptr); // Windowed
glfwMakeContextCurrent(window);
// Set the required callback functions
glfwSetKeyCallback(window, key_callback);
// Initialize GLEW to setup the OpenGL Function pointers
glewExperimental = GL_TRUE;
glewInit();
// Define the viewport dimensions
glViewport(0, 0, screenWidth, screenHeight);
// Setup OpenGL options
//glEnable(GL_DEPTH_TEST);
// Setup and compile our shader(s)
Shader shader("core.vs", "core.frag");
// Generate a list of 100 quad locations/translation-vectors
glm::vec2 translations[NUM_INS];
int index = 0;
GLfloat offset = 1.0f/sqrt(NUM_INS);
for (GLint y = -sqrt(NUM_INS); y < sqrt(NUM_INS); y += 2)
for (GLint x = -sqrt(NUM_INS); x < sqrt(NUM_INS); x += 2)
glm::vec2 translation;
translation.x = (GLfloat)x / sqrt(NUM_INS) + offset;
translation.y = (GLfloat)y / sqrt(NUM_INS) + offset;
translations[index++] = translation;
// Store instance data in an array buffer
GLuint instanceVBO;
glGenBuffers(1, &instanceVBO);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * NUM_INS, &translations[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// Generate quad VAO
GLfloat quadVertices[] =
// Positions // Colors
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
-0.05f, -0.05f, 0.0f, 0.0f, 1.0f,
-0.05f, 0.05f, 1.0f, 0.0f, 0.0f,
0.05f, -0.05f, 0.0f, 1.0f, 0.0f,
0.05f, 0.05f, 0.0f, 0.0f, 1.0f
;
GLfloat vertices[] =
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 1.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 1.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 1.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 1.0f, 1.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 1.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), 0.0f, 0.0f, 0.0f,
-0.5f/sqrt(NUM_INS), 0.5f/sqrt(NUM_INS), -0.5f/sqrt(NUM_INS), 0.0f, 1.0f, 0.0f
;
GLuint quadVAO, quadVBO;
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(GLfloat)));
// Also set instance data
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat), (GLvoid*)0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glVertexAttribDivisor(2, 1); // Tell OpenGL this is an instanced vertex attribute.
glBindVertexArray(0);
// Game loop
while (!glfwWindowShouldClose(window))
// Check and call events
glfwPollEvents();
// Clear buffers
glClearColor(0.03f, 0.03f, 0.03f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
// Draw 100 instanced quads
shader.Use();
glBindVertexArray(quadVAO);
glDrawArraysInstanced(GL_TRIANGLES, 0, 36, NUM_INS); // 100 triangles of 6 vertices each
glBindVertexArray(0);
// Swap the buffers
glfwSwapBuffers(window);
glfwTerminate();
return 0;
// Is called whenever a key is pressed/released via GLFW
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode)
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
顶点着色器:(命名为core.vs)
#version 330 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 color;
layout (location = 2) in vec2 offset;
out vec3 fColor;
void main()
gl_Position = vec4(position.x + offset.x, position.y + offset.y, position.z, 1.0f);
fColor = color;
片段着色器:(命名为 core.frag)
#version 330 core
in vec3 fColor;
out vec4 color;
void main()
color = vec4(fColor, 1.0f);
着色器类:(命名为 Shader.h)
#pragma once
// Std. Includes
#include <vector>
// GL Includes
#include <GL/glew.h>
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
// Defines several possible options for camera movement. Used as abstraction to stay away from window-system specific input methods
enum Camera_Movement
FORWARD,
BACKWARD,
LEFT,
RIGHT
;
// Default camera values
const GLfloat YAW = -90.0f;
const GLfloat PITCH = 0.0f;
const GLfloat SPEED = 3.0f;
const GLfloat SENSITIVTY = 0.25f;
const GLfloat ZOOM = 45.0f;
// An abstract camera class that processes input and calculates the corresponding Eular Angles, Vectors and Matrices for use in OpenGL
class Camera
public:
// Camera Attributes
glm::vec3 Position;
glm::vec3 Front;
glm::vec3 Up;
glm::vec3 Right;
glm::vec3 WorldUp;
// Eular Angles
GLfloat Yaw;
GLfloat Pitch;
// Camera options
GLfloat MovementSpeed;
GLfloat MouseSensitivity;
GLfloat Zoom;
// Constructor with vectors
Camera(glm::vec3 position = glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3 up = glm::vec3(0.0f, 1.0f, 0.0f), GLfloat yaw = YAW, GLfloat pitch = PITCH) : Front(glm::vec3(0.0f, 0.0f, -1.0f)), MovementSpeed(SPEED), MouseSensitivity(SENSITIVTY), Zoom(ZOOM)
this->Position = position;
this->WorldUp = up;
this->Yaw = yaw;
this->Pitch = pitch;
this->updateCameraVectors();
// Constructor with scalar values
Camera(GLfloat posX, GLfloat posY, GLfloat posZ, GLfloat upX, GLfloat upY, GLfloat upZ, GLfloat yaw, GLfloat pitch) : Front(glm::vec3(0.0f, 0.0f, -1.0f)), MovementSpeed(SPEED), MouseSensitivity(SENSITIVTY), Zoom(ZOOM)
this->Position = glm::vec3(posX, posY, posZ);
this->WorldUp = glm::vec3(upX, upY, upZ);
this->Yaw = yaw;
this->Pitch = pitch;
this->updateCameraVectors();
// Returns the view matrix calculated using Eular Angles and the LookAt Matrix
glm::mat4 GetViewMatrix()
return glm::lookAt(this->Position, this->Position + this->Front, this->Up);
// Processes input received from any keyboard-like input system. Accepts input parameter in the form of camera defined ENUM (to abstract it from windowing systems)
void ProcessKeyboard(Camera_Movement direction, GLfloat deltaTime)
GLfloat velocity = this->MovementSpeed * deltaTime;
if (direction == FORWARD)
this->Position += this->Front * velocity;
if (direction == BACKWARD)
this->Position -= this->Front * velocity;
if (direction == LEFT)
this->Position -= this->Right * velocity;
if (direction == RIGHT)
this->Position += this->Right * velocity;
// Processes input received from a mouse input system. Expects the offset value in both the x and y direction.
void ProcessMouseMovement(GLfloat xoffset, GLfloat yoffset, GLboolean constrainPitch = true)
xoffset *= this->MouseSensitivity;
yoffset *= this->MouseSensitivity;
this->Yaw += xoffset;
this->Pitch += yoffset;
// Make sure that when pitch is out of bounds, screen doesn't get flipped
if (constrainPitch)
if (this->Pitch > 89.0f)
this->Pitch = 89.0f;
if (this->Pitch < -89.0f)
this->Pitch = -89.0f;
// Update Front, Right and Up Vectors using the updated Eular angles
this->updateCameraVectors();
// Processes input received from a mouse scroll-wheel event. Only requires input on the vertical wheel-axis
void ProcessMouseScroll(GLfloat yoffset)
if (this->Zoom >= 1.0f && this->Zoom <= 45.0f)
this->Zoom -= yoffset;
if (this->Zoom <= 1.0f)
this->Zoom = 1.0f;
if (this->Zoom >= 45.0f)
this->Zoom = 45.0f;
private:
// Calculates the front vector from the Camera's (updated) Eular Angles
void updateCameraVectors()
// Calculate the new Front vector
glm::vec3 front;
front.x = cos(glm::radians(this->Yaw)) * cos(glm::radians(this->Pitch));
front.y = sin(glm::radians(this->Pitch));
front.z = sin(glm::radians(this->Yaw)) * cos(glm::radians(this->Pitch));
this->Front = glm::normalize(front);
// Also re-calculate the Right and Up vector
this->Right = glm::normalize(glm::cross(this->Front, this->WorldUp)); // Normalize the vectors, because their length gets closer to 0 the more you look up or down which results in slower movement.
this->Up = glm::normalize(glm::cross(this->Right, this->Front));
;
【问题讨论】:
很好,可以从事物的声音中重复。 the debugger 怎么说? 请在问题本身中包含源代码。否则,当 pastebin 被删除时,这个问题是没有用的。 您将对象分配为局部变量,因此在堆栈上,它不会占用那么多对象。在堆上分配你的对象数组(你可以使用std::vector
)。
@MatteoItalia 我该怎么做?
【参考方案1】:
首先,我必须说你的Shader class是相机代码,但我也是从那个教程中学习的,所以我自己改了。
您要解决的问题与您的系统堆栈大小有关。在visual studio中,只允许你做一个1MB大小的局部变量,当设置NUM_INS
为160000时你的程序会溢出。
真正的解决方案(已编辑)
就像@Matteo Italia 所说,改用std::vector
,或者只是将您的数组初始化部分glm::vec2 translations[NUM_INS];
更改为glm::vec2* translations = new glm::vec2[NUM_INS];
,并且当您不使用它时不要忘记delete
。我测试了第二种方法,它可以工作。抱歉我之前的回答不好,我应该了解更多关于堆和堆栈的知识!
不了解背景的,我找了ref1,ref2学习。
最坏的解决方案(以前,不应使用)
要解决问题,您可以通过以下步骤更改 Visual Studio 设置:
-
右键单击您的项目 -> 设置
转到链接器 -> 系统
将堆保留大小设置为2097152 (2M)
请注意,我的编辑是中国人,所以我不知道详细的名称。通过设置它,您可以将 NUM_INS
设置为 160,000 或更多,并看到如下结果:
【讨论】:
这是 stack 的大小,增加它几乎不是解决方案。 OP 应该简单地将他的东西分配在堆上而不是堆栈上。 @Tokenyet 谢谢你的评论帮助我解决了我的问题。我知道这是一个不同的问题,我应该将其作为新问题发布吗?我的多维数据集代码运行速度约为 30 fps,但我预计它会更高。有什么简单的方法可以加快速度吗? @Paul Terwilliger 你应该写一篇关于如何实现 fps 计数器代码和其他细节的新帖子。在这里,我只是想提醒您一些事情,请检查您的vsync 和其他限制设置。如果您这样做了,请继续发新帖:) 在这种情况下,这确实不是正确的答案;它可能有效,但它是一个脆弱的解决方案。 @Tokenyet 好的,我发布了一篇关于每秒帧数的新帖子。您可以在这里找到它:***.com/questions/39752685/… 感谢您的所有帮助!【参考方案2】:这里
glm::vec2 translations[NUM_INS];
你正在分配你的位置数组在堆栈上;现在,只要NUM_INS
相对较小,这并不是什么大问题,但是当您开始使用“大”数字(例如 100000)时,堆栈就无法承受。
假设每个 glm::vec2
元素由一对 32 位浮点数组成(因此,每个 vec2
是 8 字节),160000 个元素占用 1.28 MB,这会溢出堆栈(在 Windows 上使用默认链接器时为 1 MB设置)。
这个问题的解决方案是不增加堆栈大小:堆栈的大小是有意限制的,并且没有针对大对象进行优化。相反,您应该在堆上分配您的元素,这样您就可以利用所有可用于您的进程的虚拟内存。
为此,请使用 new
/delete
或者 - 更简单地 - 学习使用 std::vector
类:
std::vector<glm::vec2> translations(NUM_INS);
您的代码的其余部分应该按原样工作。
【讨论】:
以上是关于在 OpenGL 中实例化数百万个对象的主要内容,如果未能解决你的问题,请参考以下文章
如何在 C++ 中使用 Slot Map / Object Pool 模式管理数百万个游戏对象?