为啥 MPI 程序以退出代码 134(信号 6)终止?
Posted
技术标签:
【中文标题】为啥 MPI 程序以退出代码 134(信号 6)终止?【英文标题】:Why does the MPI program terminate with exit code 134 (signal 6)?为什么 MPI 程序以退出代码 134(信号 6)终止? 【发布时间】:2019-10-03 18:59:14 【问题描述】:我有以下用 MPI 编写的霍夫曼压缩代码。
#include "mpi.h"
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<time.h>
#include "../include/serialHeader.h"
struct huffmanDictionary huffmanDictionary[256];
struct huffmanTree *head_huffmanTreeNode = NULL;
struct huffmanTree huffmanTreeNode[512];
int main(int argc, char* argv[])
clock_t start, end;
unsigned int cpu_time_used;
unsigned int i, j, rank, numProcesses, blockLength;
unsigned int *compBlockLengthArray;
unsigned int distinctCharacterCount, combinedHuffmanNodes, frequency[256], inputFileLength, compBlockLength;
unsigned char *inputFileData, *compressedData, writeBit = 0, bitsFilled = 0, bitSequence[255], bitSequenceLength = 0;
FILE *inputFile;
MPI_Init( &argc, &argv);
MPI_File mpi_inputFile, mpi_compressedFile;
MPI_Status status;
// get rank and number of processes value
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &numProcesses);
// get file size
if(rank == 0)
inputFile = fopen(argv[1], "rb");
fseek(inputFile, 0, SEEK_END);
inputFileLength = ftell(inputFile);
fseek(inputFile, 0, SEEK_SET);
fclose(inputFile);
//broadcast size of file to all the processes
MPI_Bcast(&inputFileLength, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
// get file chunk size
blockLength = inputFileLength / numProcesses;
printf ("%u\n", numProcesses);
if(rank == (numProcesses-1))
blockLength = inputFileLength - ((numProcesses-1) * blockLength);
// open file in each process and read data and allocate memory for compressed data
MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_inputFile);
MPI_File_seek(mpi_inputFile, rank * blockLength, MPI_SEEK_SET);
inputFileData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));
MPI_File_read(mpi_inputFile, inputFileData, blockLength, MPI_UNSIGNED_CHAR, &status);
for (i=0; i< blockLength; ++i)
printf ("%c\n", inputFileData[i]);
// start clock
if(rank == 0)
start = clock();
// find the frequency of each symbols
for (i = 0; i < 256; i++)
frequency[i] = 0;
for (i = 0; i < blockLength; i++)
frequency[inputFileData[i]]++;
compressedData = (unsigned char *)malloc(blockLength * sizeof(unsigned char));
compBlockLengthArray = (unsigned int *)malloc(numProcesses * sizeof(unsigned int));
// initialize nodes of huffman tree
distinctCharacterCount = 0;
for (i = 0; i < 256; i++)
if (frequency[i] > 0)
huffmanTreeNode[distinctCharacterCount].count = frequency[i];
huffmanTreeNode[distinctCharacterCount].letter = i;
huffmanTreeNode[distinctCharacterCount].left = NULL;
huffmanTreeNode[distinctCharacterCount].right = NULL;
distinctCharacterCount++;
// build tree
for (i = 0; i < distinctCharacterCount - 1; i++)
combinedHuffmanNodes = 2 * i;
sortHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
buildHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes);
if(distinctCharacterCount == 1)
head_huffmanTreeNode = &huffmanTreeNode[0];
// build table having the bitSequence sequence and its length
buildHuffmanDictionary(head_huffmanTreeNode, bitSequence, bitSequenceLength);
// compress
compBlockLength = 0;
for (i = 0; i < blockLength; i++)
for (j = 0; j < huffmanDictionary[inputFileData[i]].bitSequenceLength; j++)
if (huffmanDictionary[inputFileData[i]].bitSequence[j] == 0)
writeBit = writeBit << 1;
bitsFilled++;
else
writeBit = (writeBit << 1) | 01;
bitsFilled++;
if (bitsFilled == 8)
compressedData[compBlockLength] = writeBit;
bitsFilled = 0;
writeBit = 0;
compBlockLength++;
if (bitsFilled != 0)
for (i = 0; (unsigned char)i < 8 - bitsFilled; i++)
writeBit = writeBit << 1;
compressedData[compBlockLength] = writeBit;
compBlockLength++;
// calculate length of compressed data
compBlockLength = compBlockLength + 1024;
compBlockLengthArray[rank] = compBlockLength;
// send the length of each process to process 0
MPI_Gather(&compBlockLength, 1, MPI_UNSIGNED, compBlockLengthArray, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
// update the data to reflect the offset
if(rank == 0)
compBlockLengthArray[0] = (numProcesses + 2) * 4 + compBlockLengthArray[0];
for(i = 1; i < numProcesses; i++)
compBlockLengthArray[i] = compBlockLengthArray[i] + compBlockLengthArray[i - 1];
for(i = (numProcesses - 1); i > 0; i--)
compBlockLengthArray[i] = compBlockLengthArray[i - 1];
compBlockLengthArray[0] = (numProcesses + 2) * 4;
// broadcast size of each compressed data block to all the processes
MPI_Bcast(compBlockLengthArray, numProcesses, MPI_UNSIGNED, 0, MPI_COMM_WORLD);
// get time
if(rank == 0)
end = clock();
cpu_time_used = ((end - start)) * 1000 / CLOCKS_PER_SEC;
printf("Time taken: %d:%d s\n", cpu_time_used / 1000, cpu_time_used % 1000);
// write data to file
MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_compressedFile);
if(rank == 0)
MPI_File_write(mpi_compressedFile, &inputFileLength, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
MPI_File_write(mpi_compressedFile, &numProcesses, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE);
MPI_File_write(mpi_compressedFile, compBlockLengthArray, numProcesses, MPI_UNSIGNED, MPI_STATUS_IGNORE);
MPI_File_seek(mpi_compressedFile, compBlockLengthArray[rank], MPI_SEEK_SET);
MPI_File_write(mpi_compressedFile, frequency, 256, MPI_UNSIGNED, MPI_STATUS_IGNORE);
MPI_File_write(mpi_compressedFile, compressedData, (compBlockLength - 1024), MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);
// close open files
MPI_File_close(&mpi_compressedFile);
MPI_File_close(&mpi_inputFile);
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 0)
free(head_huffmanTreeNode);
free(compBlockLengthArray);
free(inputFileData);
free(compressedData);
MPI_Finalize();
return 0;
我用文件编译代码:
mpicc MPICompress.c ../include/serialFunctions.c -o ../bin/MPI_compress
文件(serialFunctions.c):
#include<stdlib.h>
#include<string.h>
#include "serialHeader.h"
// sort nodes based on frequency
void sortHuffmanTree(int i, int distinctCharacterCount, int mergedHuffmanNodes)
int a, b;
for (a = mergedHuffmanNodes; a < distinctCharacterCount - 1 + i; a++)
for (b = mergedHuffmanNodes; b < distinctCharacterCount - 1 + i; b++)
if (huffmanTreeNode[b].count > huffmanTreeNode[b + 1].count)
struct huffmanTree temp_huffmanTreeNode = huffmanTreeNode[b];
huffmanTreeNode[b] = huffmanTreeNode[b + 1];
huffmanTreeNode[b + 1] = temp_huffmanTreeNode;
// build tree based on sort result
void buildHuffmanTree(int i, int distinctCharacterCount, int mergedHuffmanNodes)
huffmanTreeNode[distinctCharacterCount + i].count = huffmanTreeNode[mergedHuffmanNodes].count + huffmanTreeNode[mergedHuffmanNodes + 1].count;
huffmanTreeNode[distinctCharacterCount + i].left = &huffmanTreeNode[mergedHuffmanNodes];
huffmanTreeNode[distinctCharacterCount + i].right = &huffmanTreeNode[mergedHuffmanNodes + 1];
head_huffmanTreeNode = &(huffmanTreeNode[distinctCharacterCount + i]);
// get bitSequence sequence for each char value
void buildHuffmanDictionary(struct huffmanTree *root, unsigned char *bitSequence, unsigned char bitSequenceLength)
if (root->left)
bitSequence[bitSequenceLength] = 0;
buildHuffmanDictionary(root->left, bitSequence, bitSequenceLength + 1);
if (root->right)
bitSequence[bitSequenceLength] = 1;
buildHuffmanDictionary(root->right, bitSequence, bitSequenceLength + 1);
if (root->left == NULL && root->right == NULL)
huffmanDictionary[root->letter].bitSequenceLength = bitSequenceLength;
memcpy(huffmanDictionary[root->letter].bitSequence, bitSequence, bitSequenceLength * sizeof(unsigned char));
和文件(serialHeader.h):
unsigned char bitSequence[255];
unsigned char bitSequenceLength;
;
struct huffmanTree
unsigned char letter;
unsigned int count;
struct huffmanTree *left, *right;
;
extern struct huffmanDictionary huffmanDictionary[256];
extern struct huffmanTree *head_huffmanTreeNode;
extern struct huffmanTree huffmanTreeNode[512];
void sortHuffmanTree(int i, int distinctCharacterCount, int combinedHuffmanNodes);
void buildHuffmanTree(int i, int distinctCharacterCount, int combinedHuffmanNodes);
void buildHuffmanDictionary(struct huffmanTree *root, unsigned char *bitSequence, unsigned char bitSequenceLength);
int wrapperGPU(char **file, unsigned char *inputFileData, int inputFileLength);
我使用输入文本文件和输出文件(空或不存在)运行程序
mpirun -np 2 ./MPI_compress input output
我在运行结束时收到以下消息:
free(): invalid size
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= PID 8804 RUNNING AT Inspiron
= EXIT CODE: 134
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
YOUR APPLICATION TERMINATED WITH THE EXIT STRING: Aborted (signal 6)
This typically refers to a problem with your application.
Please see the FAQ page for debugging suggestions
是什么导致代码中出现这个错误?
【问题讨论】:
错误消息显示free(): invalid size
。您可能正在尝试释放未分配的资源,很可能是head_huffmanTreeNode
。
【参考方案1】:
在我看来你有一个错误
for (b = mergedHuffmanNodes; b < distinctCharacterCount - 1 + i; b++)
在sortHuffmanTree()
因为你在下一行
if (huffmanTreeNode[b].count > huffmanTreeNode[b + 1].count)
b + 1
可能超出范围。
【讨论】:
以上是关于为啥 MPI 程序以退出代码 134(信号 6)终止?的主要内容,如果未能解决你的问题,请参考以下文章
Django 1.6 + RabbitMQ 3.2.3 + Celery 3.1.9 - 为啥我的芹菜工人死于:WorkerLostError:工人过早退出:信号11(SIGSEGV)