为啥 MPI 程序以退出代码 134(信号 6)终止?

Posted

技术标签:

【中文标题】为啥 MPI 程序以退出代码 134(信号 6)终止?【英文标题】:Why does the MPI program terminate with exit code 134 (signal 6)?为什么 MPI 程序以退出代码 134(信号 6)终止? 【发布时间】:2019-10-03 18:59:14 【问题描述】:

我有以下用 MPI 编写的霍夫曼压缩代码。

#include "mpi.h"
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<time.h>
#include "../include/serialHeader.h"

struct huffmanDictionary huffmanDictionary[256];
struct huffmanTree *head_huffmanTreeNode = NULL;
struct huffmanTree huffmanTreeNode[512];

int main(int argc, char* argv[])
    clock_t start, end;
    unsigned int cpu_time_used;
    unsigned int i, j, rank, numProcesses, blockLength;
    unsigned int *compBlockLengthArray;
    unsigned int distinctCharacterCount, combinedHuffmanNodes, frequency[256], inputFileLength, compBlockLength;
    unsigned char *inputFileData, *compressedData, writeBit = 0, bitsFilled = 0, bitSequence[255], bitSequenceLength = 0;
    FILE *inputFile;

    MPI_Init( &argc, &argv);
    MPI_File mpi_inputFile, mpi_compressedFile;
    MPI_Status status;

    // get rank and number of processes value
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);

    // get file size
    if(rank == 0)
        inputFile = fopen(argv[1], "rb");
        fseek(inputFile, 0, SEEK_END);
        inputFileLength = ftell(inputFile);
        fseek(inputFile, 0, SEEK_SET);
        fclose(inputFile);
    

    //broadcast size of file to all the processes 
    MPI_Bcast(&inputFileLength, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

    // get file chunk size

    blockLength = inputFileLength / numProcesses;
    printf ("%u\n", numProcesses);

    if(rank == (numProcesses-1))
        blockLength = inputFileLength - ((numProcesses-1) * blockLength);   
    

    // open file in each process and read data and allocate memory for compressed data
    MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_inputFile);
    MPI_File_seek(mpi_inputFile, rank * blockLength, MPI_SEEK_SET);

    inputFileData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));   
    MPI_File_read(mpi_inputFile, inputFileData, blockLength, MPI_UNSIGNED_CHAR, &status);

    for (i=0; i< blockLength; ++i)
        printf ("%c\n", inputFileData[i]);
    // start clock
    if(rank == 0)
        start = clock();
    

    // find the frequency of each symbols
    for (i = 0; i < 256; i++)
        frequency[i] = 0;
    
    for (i = 0; i < blockLength; i++)
        frequency[inputFileData[i]]++;
    

    compressedData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));  
    compBlockLengthArray = (unsigned int *)malloc(numProcesses * sizeof(unsigned int));

    // initialize nodes of huffman tree
    distinctCharacterCount = 0;
    for (i = 0; i < 256; i++)
        if (frequency[i] > 0)
            huffmanTreeNode[distinctCharacterCount].count = frequency[i];
            huffmanTreeNode[distinctCharacterCount].letter = i;
            huffmanTreeNode[distinctCharacterCount].left = NULL;
            huffmanTreeNode[distinctCharacterCount].right = NULL;
            distinctCharacterCount++;
        
    

    // build tree 
    for (i = 0; i < distinctCharacterCount - 1; i++)
        combinedHuffmanNodes = 2 * i;
        sortHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
        buildHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
    

    if(distinctCharacterCount == 1)
      head_huffmanTreeNode = &huffmanTreeNode[0];
    

    // build table having the bitSequence sequence and its length
    buildHuffmanDictionary(head_huffmanTreeNode, bitSequence, bitSequenceLength);

    // compress
    compBlockLength = 0;
    for (i = 0; i < blockLength; i++)
        for (j = 0; j < huffmanDictionary[inputFileData[i]].bitSequenceLength; j++)
            if (huffmanDictionary[inputFileData[i]].bitSequence[j] == 0)
                writeBit = writeBit << 1;
                bitsFilled++;
            
            else
                writeBit = (writeBit << 1) | 01;
                bitsFilled++;
            
            if (bitsFilled == 8)
                compressedData[compBlockLength] = writeBit;
                bitsFilled = 0;
                writeBit = 0;
                compBlockLength++;
            
        
    

    if (bitsFilled != 0)
        for (i = 0; (unsigned char)i < 8 - bitsFilled; i++)
            writeBit = writeBit << 1;
        
        compressedData[compBlockLength] = writeBit;
        compBlockLength++;
    

    // calculate length of compressed data
    compBlockLength = compBlockLength + 1024;
    compBlockLengthArray[rank] = compBlockLength;

    // send the length of each process to process 0
    MPI_Gather(&compBlockLength, 1, MPI_UNSIGNED, compBlockLengthArray, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

    // update the data to reflect the offset
    if(rank == 0)
        compBlockLengthArray[0] = (numProcesses + 2) * 4 + compBlockLengthArray[0];
        for(i = 1; i < numProcesses; i++)
            compBlockLengthArray[i] = compBlockLengthArray[i] + compBlockLengthArray[i - 1];
        
        for(i = (numProcesses - 1); i > 0; i--)
            compBlockLengthArray[i] = compBlockLengthArray[i - 1];
        
        compBlockLengthArray[0] = (numProcesses + 2) * 4;
    

    // broadcast size of each compressed data block to all the processes 
    MPI_Bcast(compBlockLengthArray, numProcesses, MPI_UNSIGNED, 0, MPI_COMM_WORLD);

    // get time
    if(rank == 0)
        end = clock();
        cpu_time_used = ((end - start)) * 1000 / CLOCKS_PER_SEC;
        printf("Time taken: %d:%d s\n", cpu_time_used / 1000, cpu_time_used % 1000);
    

    // write data to file
    MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_compressedFile);

    if(rank == 0)
        MPI_File_write(mpi_compressedFile, &inputFileLength, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
        MPI_File_write(mpi_compressedFile, &numProcesses, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
        MPI_File_write(mpi_compressedFile, compBlockLengthArray, numProcesses, MPI_UNSIGNED, MPI_STATUS_IGNORE);
    
    MPI_File_seek(mpi_compressedFile, compBlockLengthArray[rank], MPI_SEEK_SET);
    MPI_File_write(mpi_compressedFile, frequency, 256, MPI_UNSIGNED, MPI_STATUS_IGNORE);
    MPI_File_write(mpi_compressedFile, compressedData, (compBlockLength - 1024), MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

    // close open files
    MPI_File_close(&mpi_compressedFile);    
    MPI_File_close(&mpi_inputFile);
    MPI_Barrier(MPI_COMM_WORLD);

    if (rank == 0)
    free(head_huffmanTreeNode);
    free(compBlockLengthArray);
    free(inputFileData);
    free(compressedData);
    MPI_Finalize();
    return 0;

我用文件编译代码:

mpicc MPICompress.c ../include/serialFunctions.c -o ../bin/MPI_compress

文件(serialFunctions.c):

#include<stdlib.h>
#include<string.h>
#include "serialHeader.h"

// sort nodes based on frequency
void sortHuffmanTree(int i, int distinctCharacterCount, int mergedHuffmanNodes)
    int a, b;
    for (a = mergedHuffmanNodes; a < distinctCharacterCount - 1 + i; a++)
        for (b = mergedHuffmanNodes; b < distinctCharacterCount - 1 + i; b++)
            if (huffmanTreeNode[b].count > huffmanTreeNode[b + 1].count)
                struct huffmanTree temp_huffmanTreeNode = huffmanTreeNode[b];
                huffmanTreeNode[b] = huffmanTreeNode[b + 1];
                huffmanTreeNode[b + 1] = temp_huffmanTreeNode;
            
        
    


// build tree based on sort result
void buildHuffmanTree(int i, int distinctCharacterCount, int mergedHuffmanNodes)
    huffmanTreeNode[distinctCharacterCount + i].count = huffmanTreeNode[mergedHuffmanNodes].count + huffmanTreeNode[mergedHuffmanNodes + 1].count;
    huffmanTreeNode[distinctCharacterCount + i].left = &huffmanTreeNode[mergedHuffmanNodes];
    huffmanTreeNode[distinctCharacterCount + i].right = &huffmanTreeNode[mergedHuffmanNodes + 1];
    head_huffmanTreeNode = &(huffmanTreeNode[distinctCharacterCount + i]);


// get bitSequence sequence for each char value
void buildHuffmanDictionary(struct huffmanTree *root, unsigned char *bitSequence, unsigned char bitSequenceLength)
    if (root->left)
        bitSequence[bitSequenceLength] = 0;
        buildHuffmanDictionary(root->left, bitSequence, bitSequenceLength + 1);
    

    if (root->right)
        bitSequence[bitSequenceLength] = 1;
        buildHuffmanDictionary(root->right, bitSequence, bitSequenceLength + 1);
    

    if (root->left == NULL && root->right == NULL)
        huffmanDictionary[root->letter].bitSequenceLength = bitSequenceLength;
        memcpy(huffmanDictionary[root->letter].bitSequence, bitSequence, bitSequenceLength * sizeof(unsigned char));
    

和文件(serialHeader.h):


    unsigned char bitSequence[255];
    unsigned char bitSequenceLength;
;

struct huffmanTree

    unsigned char letter;
    unsigned int count;
    struct huffmanTree *left, *right;
;

extern struct huffmanDictionary huffmanDictionary[256];
extern struct huffmanTree *head_huffmanTreeNode;
extern struct huffmanTree huffmanTreeNode[512];

void sortHuffmanTree(int i, int distinctCharacterCount, int combinedHuffmanNodes);
void buildHuffmanTree(int i, int distinctCharacterCount, int combinedHuffmanNodes);
void buildHuffmanDictionary(struct huffmanTree *root, unsigned char *bitSequence, unsigned char bitSequenceLength);
int wrapperGPU(char **file, unsigned char *inputFileData, int inputFileLength);

我使用输入文本文件和输出文件(空或不存在)运行程序

mpirun -np 2 ./MPI_compress input output

我在运行结束时收到以下消息:

free(): invalid size

===================================================================================
=   BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
=   PID 8804 RUNNING AT Inspiron
=   EXIT CODE: 134
=   CLEANING UP REMAINING PROCESSES
=   YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Aborted (signal 6)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions

是什么导致代码中出现这个错误?

【问题讨论】:

错误消息显示free(): invalid size。您可能正在尝试释放未分配的资源,很可能是head_huffmanTreeNode 【参考方案1】:

在我看来你有一个错误

for (b = mergedHuffmanNodes; b < distinctCharacterCount - 1 + i; b++)

sortHuffmanTree()

因为你在下一行

if (huffmanTreeNode[b].count > huffmanTreeNode[b + 1].count)

b + 1 可能超出范围。

【讨论】:

以上是关于为啥 MPI 程序以退出代码 134(信号 6)终止?的主要内容,如果未能解决你的问题,请参考以下文章

mpi 进程在信号 11 上退出

MPI 分段故障(信号 11)

创建核心转储时设置退出状态

Django 1.6 + RabbitMQ 3.2.3 + Celery 3.1.9 - 为啥我的芹菜工人死于:WorkerLostError:工人过早退出:信号11(SIGSEGV)

为啥我所有的开放 MPI 进程的排名都是 0?

为啥我无法附加到此 MPI 代码的引导队列?