为啥在这段代码中向量比指针使用更少的内存?
Posted
技术标签:
【中文标题】为啥在这段代码中向量比指针使用更少的内存?【英文标题】:why vector use less memory than pointers in this code?为什么在这段代码中向量比指针使用更少的内存? 【发布时间】:2015-05-28 16:37:48 【问题描述】:我使用指针编写了基于 Strassen 乘法算法的并行程序。 该程序返回两个大小相同的矩阵相乘的结果。 当大小为 256 时,程序填充大约 1 GB 的 ram,当它总共 512 ram 时,\y 已满,我的 windows 无法工作,然后我必须重新启动。
我用向量替换整个指针,然后令人难以置信的 Ram 使用量减少了!.对于 1024 大小,只使用了 80 MB 的 ram。
我最初对静态绑定的向量了解一点,然后如果我们在运行时需要更多空间,它会动态绑定。
为什么指针比向量需要更多空间?
这是我的第一个代码:
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
int ** matrix_1;
int ** matrix_2;
#define number_thread:4;
void show(string name, int n, int **show)
cout << " matrix " << name << " :" << endl;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
int ** strassen(int n, int **matrix_a, int ** matrix_b)
int ** A11;
int ** A12;
int ** A21;
int ** A22;
int ** B11;
int ** B12;
int ** B21;
int ** B22;
int ** result;
int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
A11 = new int*[n / 2];
A12 = new int*[n / 2];
A21 = new int*[n / 2];
A22 = new int*[n / 2];
B11 = new int*[n / 2];
B12 = new int*[n / 2];
B21 = new int*[n / 2];
B22 = new int*[n / 2];
result = new int *[n];
m1 = new int*[n / 2];
m2 = new int*[n / 2];
m3 = new int*[n / 2];
m4 = new int*[n / 2];
m5 = new int*[n / 2];
m6 = new int*[n / 2];
m7 = new int*[n / 2];
m8 = new int*[n / 2];
cilk_for(int i = 0; i < n / 2; i++)
//cout << " value i : " << i << endl;
A11[i] = new int[n / 2];
A12[i] = new int[n / 2];
A21[i] = new int[n / 2];
A22[i] = new int[n / 2];
B11[i] = new int[n / 2];
B12[i] = new int[n / 2];
B21[i] = new int[n / 2];
B22[i] = new int[n / 2];
m1[i] = new int[n / 2];
m2[i] = new int[n / 2];
m3[i] = new int[n / 2];
m4[i] = new int[n / 2];
m5[i] = new int[n / 2];
m6[i] = new int[n / 2];
m7[i] = new int[n / 2];
m8[i] = new int[n / 2];
cilk_for(int i = 0; i < n; i++) // matrix result
result[i] = new int[n];
if (n == 2)
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
// for (int i = 0; i < n;i++)
cilk_for(int i = 0; i < (n / 2); i++)
for (int j = 0; j < (n / 2); j++)
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
cilk_for(int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
if (i < n / 2 && j < n / 2)
result[i][j] = m1[i][j];
else if (i < n / 2 && j >= n / 2)
result[i][j] = m3[i][j - n / 2];
else if (i >= n / 2 && j < n / 2)
result[i][j] = m5[i - n / 2][j];
else if (i >= n / 2 && j >= n / 2)
result[i][j] = m7[i - n / 2][j - n / 2];
/*
cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/*
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
*/
delete[] A11;
delete[] A12;
delete[] A21;
delete[] A22;
delete[] B11;
delete[] B12;
delete[] B21;
delete[] B22;
delete[] m1;
delete[] m2;
delete[] m3;
delete[] m4;
delete[] m5;
delete[] m6;
delete[] m7;
delete[] m8;
return result;
int main()
int size;
freopen("in.txt", "r", stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "4");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
matrix_1 = new int*[size];
matrix_2 = new int*[size];
if (size % 2 == 0)
//instialize matrix1
//cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
matrix_1[i] = new int[size];
for (int j = 0; j < size; j++)
matrix_1[i][j] = rand() % 3;
//cin >> matrix_1[i][j];
//cout << matrix_1[i][j] << " ";
//cout << endl;
//instialize matrix2
//cout << "matrix2_is :\n";
for (int i = 0; i < size; i++)
matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
matrix_2[i][j] = rand() % 3;
//cout << matrix_2[i][j]<<" ";
//cin >> matrix_2[i][j];
// cout << endl;
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
/* for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
cout<< matrix_2[i][j]<<" ";
cout << endl;
*/
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
我用向量替换整个指针数组:
#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>
#include <Windows.h>
#include <Psapi.h>
#include<vector>
using namespace std;
vector<vector<int> > matrix_1, matrix_2;
//int matrix_1;
//int ** matrix_2;
#define number_thread:4;
void show(string name ,int n, int **show)
cout << " matrix " << name<<" :" << endl;
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
cout << show[i][j] << " ";
cout << endl;
vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)
vector<vector<int>> A11;
vector<vector<int>> A12;
vector<vector<int>> A21;
vector<vector<int>> A22;
vector<vector<int>> B11;
vector<vector<int>> B12;
vector<vector<int>> B21;
vector<vector<int>> B22;
vector<vector<int>> result;
vector<int> help;
vector<vector<int>> m1, m2, m3, m4, m5, m6, m7, m8;
help.clear();
for (int j = 0; j < n / 2; j++)
help.push_back(2);
for(int i = 0; i < n / 2; i++)
A11.push_back(help);
A12.push_back(help);
A21.push_back(help);
A22.push_back(help);
B11.push_back(help);
B12.push_back(help);
B21.push_back(help);
B22.push_back(help);
m1.push_back(help);
m2.push_back(help);
m3.push_back(help);
m4.push_back(help);
m5.push_back(help);
m6.push_back(help);
m7.push_back(help);
m8.push_back(help);
for (int j = 0; j < n / 2; j++)
help.push_back(2);
for(int i = 0; i < n; i++)
result.push_back(help);
if (n == 2)
result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];
return result;
// for (int i = 0; i < n;i++)
for(int i = 0; i < (n / 2); i++)
for(int j = 0; j <( n / 2); j++)
A11[i][j] = matrix_a[i][j];
B11[i][j] = matrix_b[i][j];
A12[i][j] = matrix_a[i][j + n / 2];
B12[i][j] = matrix_b[i][j + n / 2];
A21[i][j] = matrix_a[i + n / 2][j];
B21[i][j] = matrix_b[i + n / 2][j];
A22[i][j] = matrix_a[i + n / 2][j + n / 2];
B22[i][j] = matrix_b[i + n / 2][j + n / 2];
/*
show("A11", n / 2, A11);
show("A12", n / 2, A12);
show("A21", n / 2, A21);
show("A22", n / 2, A22);
show("B11", n / 2, B11);
show("B12", n / 2, B12);
show("B21", n / 2, B21);
show("B22", n / 2, B22);*/
// Run By eight_thread
m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22
cilk_sync;
/*
cout << "****************************\n";
cout << "*********** before add :\n";
show("m1", n / 2, m1);
show("m2", n / 2, m2);
show
("m3", n / 2, m3);
show("m4", n / 2, m4);
show("m5", n / 2, m5);
show("m6", n / 2, m6);
show("m7", n / 2, m7);
show("m8", n / 2, m8);*/
for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
m1[i][j] = m1[i][j] + m2[i][j];
m3[i][j] = m3[i][j] + m4[i][j];
m5[i][j] = m5[i][j] + m6[i][j];
m7[i][j] = m7[i][j] + m8[i][j];
/*cout << "after adding hello \n";
show("m1", n / 2, m1);
show("m3", n / 2, m3);
show("m5", n / 2, m5);
show("m7", n / 2, m7);*/
for(int i = 0; i < n ; i++)
for(int j = 0; j < n ; j++)
if (i < n / 2 && j < n / 2)
result[i][j] = m1[i][j];
else if (i < n / 2 && j >= n / 2)
result[i][j] = m3[i][j - n / 2];
else if (i >= n / 2 && j < n / 2)
result[i][j] = m5[i - n / 2][j];
else if (i >= n / 2 && j >= n / 2)
result[i][j] = m7[i - n / 2][j - n / 2];
/*
cilk_for(int i = 0; i < n / 2; i++)
for (int j = 0; j < n / 2; j++)
delete A11[i][j];
delete A12[i][j];
delete A21[i][j];
delete A22[i][j];
delete B11[i][j];
delete B12[i][j];
delete B21[i][j];
delete B22[i][j];
delete m1[i][j];
delete m2[i][j];
delete m3[i][j];
delete m4[i][j];
delete m5[i][j];
delete m6[i][j];
delete m7[i][j];
delete m8[i][j];*/
/*
delete[] A11[i];
delete[] A12[i];
delete[] A21[i];
delete[] A22[i];
delete[] B11[i];
delete[] B12[i];
delete[] B21[i];
delete[] B22[i];
delete[] m1[i];
delete[] m2[i];
delete[] m3[i];
delete[] m4[i];
delete[] m5[i];
delete[] m6[i];
delete[] m7[i];
delete[] m8[i];
*/
/* for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
cout << result[i][j] << " ";
cout << endl;
*/
return result;
int main()
int size;
freopen("in.txt","r",stdin);
freopen("out.txt", "w", stdout);
__cilkrts_set_param("nworkers", "1");
//cout << " please Enter the size OF ur matrix /n";
cin >> size;
vector<int> inner;
if (size % 2 == 0)
//instialize matrix1
cout << "matrix_1 :" << endl;
for (int i = 0; i < size; i++)
inner.clear();
for (int j = 0; j < size; j++)
inner.push_back(rand()%3);
//cin >> matrix_1[i][j];
cout << inner[j]<<" ";
cout << endl;
matrix_1.push_back(inner);
//instialize matrix2
cout << "matrix2_is :\n";
inner.clear();
for (int i = 0; i < size; i++)
inner.clear();
//matrix_2[i] = new int[size];
for (int j = 0; j < size; j++)
inner.push_back(rand()%3);
cout << inner[j]<<" ";
//cin >> matrix_2[i][j];
cout << endl;
matrix_2.push_back(inner);
clock_t begin = clock();
matrix_2 = strassen(size, matrix_1, matrix_2);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "*******\ntime is : " << elapsed_secs << endl;
//answer:
cout << "answerrr :" << endl;
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
cout<< matrix_2[i][j]<<" ";
cout << endl;
else
cout << " we couldnt use strasen ";
cout << "\nTotal Virtual Memory:" << endl;
MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
printf("%u", totalVirtualMem);
cout << "\nVirtual Memory currently used:" << endl;
// MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
printf("%u", virtualMemUsed);
cout << "\nVirtual Memory currently used by current process:" << endl;
PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
printf("%u", virtualMemUsedByMe);
cout << "\nPhysical Memory currently used: " << endl;
//MEMORYSTATUSEX memInfo;
memInfo.dwLength = sizeof(MEMORYSTATUSEX);
GlobalMemoryStatusEx(&memInfo);
DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;
printf("%u", physMemUsed);
cout << endl;
cout << "\nPhysical Memory currently used by current process : " << endl;
// PROCESS_MEMORY_COUNTERS_EX pmc;
GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
printf("%u", physMemUsedByMe);
//cout << "memory usage :"<<double(totalVirtualMem) << endl;
//_getch();
return 0;
【问题讨论】:
大小为 N 的 T 类型数组应始终小于大小为 N 的 T 类型向量。请提供实际示例说明您所做的更改。 你是如何声明/分配你的指针和你的向量的? 指针不需要比向量更多的空间。这取决于您如何分配指针指向的内存,我们无法从模糊的描述中猜测出来。最有可能的是,指针杂耍导致了大量内存泄漏,这已由 RAII 修复。 我编辑了我的帖子@MikeSeymour 【参考方案1】:想到两个可能的原因:
如果您手动分配内存并且没有正确释放它,则会造成内存泄漏。使用原始指针比使用向量更容易发生这种情况。 如果您在 1000 个单独的分配中分配 1000 个整数,它将比分配一个由 1000 个整数组成的块(向量所做的)占用更多的空间。每次分配都需要一些额外的内存来记账。【讨论】:
我添加了我的代码,是什么原因与我的代码有关? @N_93:主要是内存泄漏:strassen()
有一个循环为A11[i]
等分配内存,但相应的删除被注释掉。一般每个new
/new[]
都应该有一个对应的delete
/delete[]
。
如何分配和删除以节省空间?
@N_93:您使用new
/new[]
分配的所有内容都应该在不再使用时再次使用delete
/delete[]
释放。【参考方案2】:
我猜这是一个分配问题。从我所见,OS
的分配似乎相当耗时。
只是一个猜测,但也许std::vector
默认分配器正在从OS
获取更大的连续内存块,并从中提取以满足更小的向量分配?
这个答案可能会提供一些见解:
https://***.com/a/29659791/3807729
我设法减少了运行测试程序所花费的时间,方法是在运行计时操作之前分配、然后释放一个大的std::vector
。
我推测C++
运行时系统(在某些实现中)可能会保留从OS
接收到的内存,即使它已被释放,因为每次从OS
获取小块是很多的更贵。
【讨论】:
以上是关于为啥在这段代码中向量比指针使用更少的内存?的主要内容,如果未能解决你的问题,请参考以下文章
为啥尽管我在变量中使用 malloc 分配更多内存,但当我打印变量的大小时,它仍然显示更少的内存/字节? [复制]