为啥在这段代码中向量比指针使用更少的内存?

Posted

技术标签:

【中文标题】为啥在这段代码中向量比指针使用更少的内存?【英文标题】:why vector use less memory than pointers in this code?为什么在这段代码中向量比指针使用更少的内存? 【发布时间】:2015-05-28 16:37:48 【问题描述】:

我使用指针编写了基于 Strassen 乘法算法的并行程序。 该程序返回两个大小相同的矩阵相乘的结果。 当大小为 256 时,程序填充大约 1 GB 的 ram,当它总共 512 ram 时,\y 已满,我的 windows 无法工作,然后我必须重新启动。

我用向量替换整个指针,然后令人难以置信的 Ram 使用量减少了!.对于 1024 大小,只使用了 80 MB 的 ram。

我最初对静态绑定的向量了解一点,然后如果我们在运行时需要更多空间,它会动态绑定。

为什么指针比向量需要更多空间?

这是我的第一个代码:

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;

int ** matrix_1;
int ** matrix_2;

#define number_thread:4;

void show(string name, int n, int **show)

    cout << " matrix " << name << " :" << endl;
    for (int i = 0; i < n; i++)
    

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    




int ** strassen(int n, int **matrix_a, int ** matrix_b)


    int ** A11;
    int ** A12;
    int ** A21;
    int ** A22;

    int ** B11;
    int ** B12;
    int ** B21;
    int ** B22;

    int ** result;


    int **m1, **m2, **m3, ** m4, ** m5, ** m6, ** m7, ** m8;
    A11 = new int*[n / 2];
    A12 = new int*[n / 2];
    A21 = new int*[n / 2];
    A22 = new int*[n / 2];

    B11 = new int*[n / 2];
    B12 = new int*[n / 2];
    B21 = new int*[n / 2];
    B22 = new int*[n / 2];


    result = new int *[n];

    m1 = new int*[n / 2];
    m2 = new int*[n / 2];
    m3 = new int*[n / 2];
    m4 = new int*[n / 2];
    m5 = new int*[n / 2];
    m6 = new int*[n / 2];
    m7 = new int*[n / 2];
    m8 = new int*[n / 2];

    cilk_for(int i = 0; i < n / 2; i++)
    
        //cout << " value i : " << i << endl;
        A11[i] = new int[n / 2];
        A12[i] = new int[n / 2];
        A21[i] = new int[n / 2];
        A22[i] = new int[n / 2];

        B11[i] = new int[n / 2];
        B12[i] = new int[n / 2];
        B21[i] = new int[n / 2];
        B22[i] = new int[n / 2];

        m1[i] = new int[n / 2];
        m2[i] = new int[n / 2];
        m3[i] = new int[n / 2];
        m4[i] = new int[n / 2];
        m5[i] = new int[n / 2];
        m6[i] = new int[n / 2];
        m7[i] = new int[n / 2];
        m8[i] = new int[n / 2];

    

    cilk_for(int i = 0; i < n; i++) // matrix result
        result[i] = new int[n];


    if (n == 2)
    
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    
    //  for (int i = 0; i < n;i++)

    cilk_for(int i = 0; i < (n / 2); i++)
    
        for (int j = 0; j < (n / 2); j++)
        
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        
    
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    cilk_for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    

    /*cout << "after adding hello \n";
    show("m1", n / 2, m1);
    show("m3", n / 2, m3);
    show("m5", n / 2, m5);
    show("m7", n / 2, m7);*/



    cilk_for(int i = 0; i < n; i++)
    
        for (int j = 0; j < n; j++)
        
            if (i < n / 2 && j < n / 2)
            
                result[i][j] = m1[i][j];
            
            else if (i < n / 2 && j >= n / 2)
            
                result[i][j] = m3[i][j - n / 2];
            
            else if (i >= n / 2 && j < n / 2)
            
                result[i][j] = m5[i - n / 2][j];
            
            else if (i >= n / 2 && j >= n / 2)
            
                result[i][j] = m7[i - n / 2][j - n / 2];

            
        
    

    /*
    cilk_for(int i = 0; i < n / 2; i++)
    
    for (int j = 0; j < n / 2; j++)
    
    delete A11[i][j];
    delete A12[i][j];
    delete A21[i][j];
    delete A22[i][j];
    delete B11[i][j];
    delete B12[i][j];
    delete B21[i][j];
    delete B22[i][j];


    delete m1[i][j];
    delete m2[i][j];
    delete m3[i][j];
    delete m4[i][j];
    delete m5[i][j];
    delete m6[i][j];
    delete m7[i][j];
    delete m8[i][j];*/






    /*  
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
        */


    delete[] A11;
    delete[] A12;
    delete[] A21;
    delete[] A22;
    delete[] B11;
    delete[] B12;
    delete[] B21;
    delete[] B22;


    delete[] m1;
    delete[] m2;
    delete[] m3;
    delete[] m4;
    delete[] m5;
    delete[] m6;
    delete[] m7;
    delete[] m8;

    return result;




int main()


    int size;

    freopen("in.txt", "r", stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "4");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    matrix_1 = new int*[size];
    matrix_2 = new int*[size];

    if (size % 2 == 0)
    

        //instialize matrix1
        //cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        
            matrix_1[i] = new int[size];
            for (int j = 0; j < size; j++)

            
                matrix_1[i][j] = rand() % 3;
                //cin >> matrix_1[i][j];
                //cout << matrix_1[i][j] << " ";

            
            //cout << endl;

        
        //instialize matrix2
        //cout << "matrix2_is :\n";
        for (int i = 0; i < size; i++)
        
            matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            

                matrix_2[i][j] = rand() % 3;
                //cout << matrix_2[i][j]<<" ";
                //cin >> matrix_2[i][j];

            
            //  cout << endl;

        
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        /*  for (int i = 0; i < size; i++)
            
            for (int j = 0; j < size; j++)

            
            cout<< matrix_2[i][j]<<" ";

            
            cout << endl;

            */


    
    else
        cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
    //  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
    //  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;


我用向量替换整个指针数组:

#include <iostream>
#include<cilk\cilk.h>
#include <cilk/cilk_api.h>
#include<conio.h>
#include<ctime>
#include<string>
#include<random>

#include <Windows.h>
#include <Psapi.h>
#include<vector>


using namespace std;
vector<vector<int> > matrix_1, matrix_2;

//int matrix_1;
//int ** matrix_2;

#define number_thread:4;

void show(string name ,int n, int **show)

    cout << " matrix " << name<<" :" << endl;
    for (int i = 0; i < n; i++)
    

        for (int j = 0; j < n; j++)
            cout << show[i][j] << " ";
        cout << endl;
    



vector<vector<int>> strassen(int n, vector<vector<int>> matrix_a, vector<vector<int>> matrix_b)


    vector<vector<int>> A11;
    vector<vector<int>> A12;
    vector<vector<int>> A21;
    vector<vector<int>> A22;

    vector<vector<int>> B11;
    vector<vector<int>> B12;
    vector<vector<int>> B21;
    vector<vector<int>> B22;

    vector<vector<int>> result;


    vector<int> help;


    vector<vector<int>> m1, m2, m3,  m4, m5,  m6,  m7,  m8;




    help.clear();
    for (int j = 0; j < n / 2; j++)
    
        help.push_back(2);
    


    for(int i = 0; i < n / 2; i++)
    
        A11.push_back(help);
        A12.push_back(help);
        A21.push_back(help);
        A22.push_back(help);

        B11.push_back(help);
        B12.push_back(help);
        B21.push_back(help);
        B22.push_back(help);


        m1.push_back(help);
        m2.push_back(help);
        m3.push_back(help);
        m4.push_back(help);

        m5.push_back(help);
        m6.push_back(help);
        m7.push_back(help);
        m8.push_back(help);
    


    for (int j = 0; j < n / 2; j++)
        help.push_back(2);
    for(int i = 0; i < n; i++)
    
        result.push_back(help);

    
    if (n == 2)
    
        result[0][0] = matrix_a[0][0] * matrix_b[0][0] + matrix_a[0][1] * matrix_b[1][0];
        result[0][1] = matrix_a[0][0] * matrix_b[0][1] + matrix_a[0][1] * matrix_b[1][1];
        result[1][0] = matrix_a[1][0] * matrix_b[0][0] + matrix_a[1][1] * matrix_b[1][0];
        result[1][1] = matrix_a[1][0] * matrix_b[0][1] + matrix_a[1][1] * matrix_b[1][1];

        return result;

    
    //  for (int i = 0; i < n;i++)

    for(int i = 0; i < (n / 2); i++)
    
        for(int j = 0; j <( n / 2); j++)
        
            A11[i][j] = matrix_a[i][j];
            B11[i][j] = matrix_b[i][j];

            A12[i][j] = matrix_a[i][j + n / 2];
            B12[i][j] = matrix_b[i][j + n / 2];

            A21[i][j] = matrix_a[i + n / 2][j];
            B21[i][j] = matrix_b[i + n / 2][j];

            A22[i][j] = matrix_a[i + n / 2][j + n / 2];
            B22[i][j] = matrix_b[i + n / 2][j + n / 2];


        
    
    /*
    show("A11", n / 2, A11);
    show("A12", n / 2, A12);
    show("A21", n / 2, A21);
    show("A22", n / 2, A22);
    show("B11", n / 2, B11);
    show("B12", n / 2, B12);
    show("B21", n / 2, B21);
    show("B22", n / 2, B22);*/

    // Run By eight_thread
    m1 = cilk_spawn(strassen(n / 2, A11, B11));// A11B11
    m2 = cilk_spawn(strassen(n / 2, A12, B21));// A12B21
    m3 = cilk_spawn(strassen(n / 2, A11, B12));// A11B12
    m4 = cilk_spawn(strassen(n / 2, A12, B22));// A12B22
    m5 = cilk_spawn(strassen(n / 2, A21, B11));// A21B11
    m6 = cilk_spawn(strassen(n / 2, A22, B21));// A22B21
    m7 = cilk_spawn(strassen(n / 2, A21, B12));// A21B12
    m8 = cilk_spawn(strassen(n / 2, A22, B22));// A22B22



    cilk_sync;

    /*
    cout << "****************************\n";
    cout << "*********** before add :\n";
    show("m1", n / 2, m1);
    show("m2", n / 2, m2);
    show
("m3", n / 2, m3);
    show("m4", n / 2, m4);
    show("m5", n / 2, m5);
    show("m6", n / 2, m6);
    show("m7", n / 2, m7);
    show("m8", n / 2, m8);*/


    for(int i = 0; i < n / 2; i++)
    for (int j = 0; j < n / 2; j++)
    
        m1[i][j] = m1[i][j] + m2[i][j];
        m3[i][j] = m3[i][j] + m4[i][j];
        m5[i][j] = m5[i][j] + m6[i][j];
        m7[i][j] = m7[i][j] + m8[i][j];

    

        /*cout << "after adding hello \n";
        show("m1", n / 2, m1);
        show("m3", n / 2, m3);
        show("m5", n / 2, m5);
        show("m7", n / 2, m7);*/



    for(int i = 0; i < n ; i++)
    
        for(int j = 0; j < n ; j++)
        
            if (i < n / 2 && j < n / 2)
            
                result[i][j] = m1[i][j];
            
            else if (i < n / 2 && j >= n / 2)
            
                result[i][j] = m3[i][j - n / 2];
            
            else if (i >= n / 2 && j < n / 2)
            
                result[i][j] = m5[i - n / 2][j];
            
            else if (i >= n / 2 && j >= n / 2)
            
                result[i][j] = m7[i - n / 2][j - n / 2];

            
        
    



    /*
    cilk_for(int i = 0; i < n / 2; i++)
    
        for (int j = 0; j < n / 2; j++)
        
            delete A11[i][j];
            delete A12[i][j];
            delete A21[i][j];
            delete A22[i][j];
            delete B11[i][j];
            delete B12[i][j];
            delete B21[i][j];
            delete B22[i][j];


            delete m1[i][j];
            delete m2[i][j];
            delete m3[i][j];
            delete m4[i][j];
            delete m5[i][j];
            delete m6[i][j];
            delete m7[i][j];
            delete m8[i][j];*/






    /*  
        delete[] A11[i];
        delete[] A12[i];
        delete[] A21[i];
        delete[] A22[i];
        delete[] B11[i];
        delete[] B12[i];
        delete[] B21[i];
        delete[] B22[i];


        delete[] m1[i];
        delete[] m2[i];
        delete[] m3[i];
        delete[] m4[i];
        delete[] m5[i];
        delete[] m6[i];
        delete[] m7[i];
        delete[] m8[i];
    */


/*  for (int i = 0; i < n; i++)
    
        for (int j = 0; j < n; j++)

        
            cout << result[i][j] << " ";

        
        cout << endl;

    */

    return result;




int main()


    int size;

    freopen("in.txt","r",stdin);
    freopen("out.txt", "w", stdout);


    __cilkrts_set_param("nworkers", "1");
    //cout << " please Enter the size OF ur matrix /n";
    cin >> size;

    vector<int> inner;
    if (size % 2 == 0)
    

        //instialize matrix1
        cout << "matrix_1 :" << endl;
        for (int i = 0; i < size; i++)
        
            inner.clear();

            for (int j = 0; j < size; j++)

            
                inner.push_back(rand()%3);
                //cin >> matrix_1[i][j];
                cout << inner[j]<<" ";

            
            cout << endl;

            matrix_1.push_back(inner);
        
        //instialize matrix2
        cout << "matrix2_is :\n";
        inner.clear();
        for (int i = 0; i < size; i++)
        
            inner.clear();
            //matrix_2[i] = new int[size];
            for (int j = 0; j < size; j++)

            

            inner.push_back(rand()%3);
            cout << inner[j]<<" ";
                //cin >> matrix_2[i][j];

            
            cout << endl;
            matrix_2.push_back(inner);
        
        clock_t begin = clock();


        matrix_2 = strassen(size, matrix_1, matrix_2);

        clock_t end = clock();
        double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;

        cout << "*******\ntime is : " << elapsed_secs << endl;

        //answer:
        cout << "answerrr :" << endl;
        for (int i = 0; i < size; i++)
        
            for (int j = 0; j < size; j++)

            
                cout<< matrix_2[i][j]<<" ";

            
            cout << endl;

        


    

    else
    cout << " we couldnt use strasen ";

    cout << "\nTotal Virtual Memory:" << endl;

    MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG totalVirtualMem = memInfo.ullTotalPageFile;
    printf("%u", totalVirtualMem);

    cout << "\nVirtual Memory currently used:" << endl;
//  MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG virtualMemUsed = memInfo.ullTotalPageFile - memInfo.ullAvailPageFile;
    printf("%u", virtualMemUsed);


    cout << "\nVirtual Memory currently used by current process:" << endl;

    PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T virtualMemUsedByMe = pmc.PrivateUsage;
    printf("%u", virtualMemUsedByMe);

    cout << "\nPhysical Memory currently used: " << endl;
    //MEMORYSTATUSEX memInfo;
    memInfo.dwLength = sizeof(MEMORYSTATUSEX);
    GlobalMemoryStatusEx(&memInfo);
    DWORDLONG physMemUsed = memInfo.ullTotalPhys - memInfo.ullAvailPhys;

    printf("%u", physMemUsed);

    cout << endl;
    cout << "\nPhysical Memory currently used by current process : " << endl;
//  PROCESS_MEMORY_COUNTERS_EX pmc;
    GetProcessMemoryInfo(GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS*)&pmc, sizeof(pmc));
    SIZE_T physMemUsedByMe = pmc.WorkingSetSize;
    printf("%u", physMemUsedByMe);
    //cout << "memory usage :"<<double(totalVirtualMem) << endl;


    //_getch();

    return 0;


【问题讨论】:

大小为 N 的 T 类型数组应始终小于大小为 N 的 T 类型向量。请提供实际示例说明您所做的更改。 你是如何声明/分配你的指针和你的向量的? 指针不需要比向量更多的空间。这取决于您如何分配指针指向的内存,我们无法从模糊的描述中猜测出来。最有可能的是,指针杂耍导致了大量内存泄漏,这已由 RAII 修复。 我编辑了我的帖子@MikeSeymour 【参考方案1】:

想到两个可能的原因:

如果您手动分配内存并且没有正确释放它,则会造成内存泄漏。使用原始指针比使用向量更容易发生这种情况。 如果您在 1000 个单独的分配中分配 1000 个整数,它将比分配一个由 1000 个整数组成的块(向量所做的)占用更多的空间。每次分配都需要一些额外的内存来记账。

【讨论】:

我添加了我的代码,是什么原因与我的代码有关? @N_93:主要是内存泄漏:strassen() 有一个循环为A11[i] 等分配内存,但相应的删除被注释掉。一般每个new/new[]都应该有一个对应的delete/delete[] 如何分配和删除以节省空间? @N_93:您使用new/new[] 分配的所有内容都应该在不再使用时再次使用delete/delete[] 释放。【参考方案2】:

我猜这是一个分配问题。从我所见,OS 的分配似乎相当耗时。

只是一个猜测,但也许std::vector 默认分配器正在从OS 获取更大的连续内存块,并从中提取以满足更小的向量分配?

这个答案可能会提供一些见解:

https://***.com/a/29659791/3807729

我设法减少了运行测试程序所花费的时间,方法是在运行计时操作之前分配、然后释放一个大的std::vector

我推测C++ 运行时系统(在某些实现中)可能会保留从OS 接收到的内存,即使它已被释放,因为每次从OS 获取小块是很多的更贵。

【讨论】:

以上是关于为啥在这段代码中向量比指针使用更少的内存?的主要内容,如果未能解决你的问题,请参考以下文章

为啥实习全局字符串值会导致每个多处理进程使用更少的内存?

为啥尽管我在变量中使用 malloc 分配更多内存,但当我打印变量的大小时,它仍然显示更少的内存/字节? [复制]

原来PHP对象比数组用更少的内存

为啥在这段代码中 CPU 运行速度比 GPU 快?

为啥移动 HTML 地理定位应用程序比本地应用程序获得更少的位置更新?

按多个值对向量进行排序