使用 ctypes 时出现分段错误

Posted

技术标签:

【中文标题】使用 ctypes 时出现分段错误【英文标题】:segmentation fault when using ctypes 【发布时间】:2018-10-23 09:01:31 【问题描述】:

当我使用 ctypes 从 python 调用 c++ 时,我不断收到分段错误。我已将 gdb 附加到 c++ 代码并确保 C++ 代码运行良好。在 c++ 代码具有正确的返回值之后并在返回到 python 代码之前引发了分段错误。我检查了所有其他我猜可能是错误的东西,尤其是输入/输出参数,但仍然无法获得任何错误的线索。任何帮助,将不胜感激。 这是c++代码:

string_utils.cpp

#include <iostream>
#include <algorithm>
#include <tuple>
#include <vector>
#include <string>
#include <exception>
#include <unistd.h>
#include <stdbool.h>

using namespace std;

class SimpleTrieNode 
public:
    char key = '\0';
    bool end = false;
    unsigned int num_children = 0;
    SimpleTrieNode* p_next_sibling = nullptr;
    SimpleTrieNode* p_first_child = nullptr;

public:
    SimpleTrieNode* add_child(char key) 
        if (p_first_child == nullptr) 
            p_first_child = new SimpleTrieNode();
            p_first_child->key = key;
            num_children++;
            return p_first_child;
        
        SimpleTrieNode *p = p_first_child, *p_prev;
        while (p != nullptr) 
            if (p->key == key) 
                return p;
            
            p_prev = p;
            p = p->p_next_sibling;
        
        p = new SimpleTrieNode();
        p_prev->p_next_sibling = p;
        p->key = key;
        num_children++;
        return p;
    

    SimpleTrieNode* get_child(char key) 
        SimpleTrieNode *p = p_first_child;
        while (p != nullptr) 
            if (p->key == key) 
                return p;
            
            p = p->p_next_sibling;
        
        return nullptr;
    

    ~SimpleTrieNode() 
        SimpleTrieNode *p = p_first_child, *p_next = nullptr;
        while (p != nullptr) 
            p_next = p->p_next_sibling;
            delete p;
            p = p_next;
        
    
;

class SimpleTrie 
public:
    SimpleTrieNode root;
    unsigned int max_len = 0;

public:
    SimpleTrieNode* add(char* d) 
        string data(d);
        if (max_len < data.length()) 
            max_len = data.length();
        
        SimpleTrieNode* p = &root;
        for (char c : data) 
            p = p->add_child(c);
        
        p->end = true;
        return p;
    

    bool exist(string q) 
        SimpleTrieNode* p = &root;
        for (char c : q) 
            if (p != nullptr) 
                p = p->get_child(c);
             else 
                return false;
            
        
        return true;

    
;

unsigned int convert_2d_index_to_1d(unsigned int i, unsigned int j,
        unsigned int x, unsigned int y) 
    return i * x + j;


_Bool compute_edit_distance_by_diagonal(SimpleTrieNode const * const p_node,
        string q, unsigned int len_x, unsigned int len_y, unsigned int i,
        unsigned int j, unsigned int threshold, unsigned int matrix[],
        char matched_seq[]) 
    unsigned int k = j;
    unsigned int min_row_value = threshold + 1;
    matched_seq[i - 1] = p_node->key;
    matched_seq[i]='\0';
    while ((j - i <= threshold || j < i) && j <= q.length()) 
        unsigned int min_dist = min(
                min(
                        matrix[convert_2d_index_to_1d(i - 1, j, len_x + 1,
                                len_y + 1)] + 1,
                        matrix[convert_2d_index_to_1d(i, j - 1, len_x + 1,
                                len_y + 1)] + 1),
                p_node->key == q[j - 1] ?
                        matrix[convert_2d_index_to_1d(i - 1, j - 1, len_x + 1,
                                len_y + 1)] :
                        matrix[convert_2d_index_to_1d(i - 1, j - 1, len_x + 1,
                                len_y + 1)] + 1);
        min_row_value = min(min_dist, min_row_value);
        matrix[convert_2d_index_to_1d(i, j, len_x + 1, len_y + 1)] = min_dist;
        if (p_node->end && q.length() == j && min_dist <= threshold) 
            return true;
        
        j++;
    

    if (min_row_value > threshold || i >= threshold + len_y) 
        return false;
    
    SimpleTrieNode *child = p_node->p_first_child;
    k = i < threshold + 1 ? k : k + 1;
    while (child != nullptr) 
        _Bool matched = compute_edit_distance_by_diagonal(child, q, len_x, len_y,
                i + 1, k, threshold, matrix, matched_seq);
        if (matched) 
            return matched;
        
        child = child->p_next_sibling;
    
    return false;


_Bool compute_edit_distance_by_diagonal(SimpleTrieNode const * const p_node,
        string q, unsigned int len_x, unsigned int len_y, unsigned int i,
        unsigned int j, unsigned int threshold, char matched_seq[]) 
    unsigned int size = (len_x + 1) * (len_y + 1);
    unsigned int matrix[size];
    fill_n(matrix, size, threshold+1);
    for (unsigned int ii = 0; ii <= threshold && ii < len_x + 1; ii++) 
        matrix[convert_2d_index_to_1d(ii, 0, len_x + 1, len_y + 1)] = ii;
    
    for (unsigned int jj = 0; jj <= threshold && jj < len_y + 1; jj++) 
        matrix[convert_2d_index_to_1d(0, jj, len_x + 1, len_y + 1)] = jj;
    
    return compute_edit_distance_by_diagonal(p_node, q, len_x, len_y, i, j,
            threshold, matrix, matched_seq);


_Bool approximate_string_match(const SimpleTrie* p_trie, char* q,
        unsigned int threshold, char matched_seq[]) 
    string query(q);
    unsigned int qlen = query.length();

    SimpleTrieNode* child = p_trie->root.p_first_child;
    while (child != nullptr) 
        _Bool m = compute_edit_distance_by_diagonal(child, query,
                p_trie->max_len, qlen, 1, 1, threshold, matched_seq);
        if (m) 
            return m;
        
        child = child->p_next_sibling;
    
    return false;


extern "C" 
    SimpleTrie* SimpleTrie_Initialization() 
        SimpleTrie *p = new SimpleTrie();
        return p;
    
    void SimpleTrie_Destruction(SimpleTrie *p) 
        delete p;
    
    void SimpleTrie_Add(SimpleTrie *p,char *q) 
        p->add(q);
    
    _Bool SimpleTrie_Approximate_string_match(SimpleTrie* p_trie, char *q,
            unsigned int threshold, char matched_seq[]) 
        return approximate_string_match(p_trie, q, threshold, matched_seq);
    

这是生成文件:

all:string_utils.so
string_utils.so: string_utils.cpp
    g++ -c -fPIC string_utils.cpp -o string_utils.o
    g++ -shared -Wall -o string_utils.1.0.so string_utils.o
    rm string_utils.o

这是python代码:

string_utils.py

​​>
from ctypes import *
import os
import codecs


lib_path = os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    'string_utils.1.0.so')
lib = cdll.LoadLibrary(lib_path)
lib.SimpleTrie_Initialization.argtypes = []
lib.SimpleTrie_Initialization.restype = c_void_p
lib.SimpleTrie_Destruction.argtypes = [c_void_p]
lib.SimpleTrie_Destruction.restype = None
lib.SimpleTrie_Add.argtypes = [c_void_p, c_char_p]
lib.SimpleTrie_Add.restype = None
lib.SimpleTrie_Approximate_string_match.argtypes = [
    c_void_p,
    c_char_p,
    c_uint,
    c_char_p]
lib.SimpleTrie_Approximate_string_match.restype = c_bool


class SimpleTrie(object):
    initialized = False

    def __init__(self):
        self.obj = lib.SimpleTrie_Initialization()

    def __del__(self):
        lib.SimpleTrie_Destruction(self.obj)

    def add(self, q):
        lib.SimpleTrie_Add(self.obj, q.encode(encoding='utf-8'))

    def approximate_string_match(self, q, threshold):
        bs = q.encode(encoding='utf-8')
        matched_seq=create_string_buffer(len(bs)+threshold+1)
        m = lib.SimpleTrie_Approximate_string_match(self.obj, bs,
                                                    threshold, matched_seq)
        return m, matched_seq.value.decode('utf-8')


if __name__ == '__main__':
    ptrie = SimpleTrie()
    lines = ['abderqwerwqerqweefg',
             'dfaewqrwqerwqerqwerqwerdfa',
             'afdfertewtertetrewqrwqrffg',
             'fgfdhgadsfsadfsadfadsffdhdf',
             'fgfdhgadsfsadjhgfdfadsffdhdf',
             'antihsadsfasdfaddafsadfsadsfasdaive',
             'dsgffdshgdsgffdadsfsadfsadfsfdasdfasdfasdfasdfsg'
             ]
    for line in lines:
        ptrie.add(line)
    x ,y = ptrie.approximate_string_match(u"antihsadsfasdfadsfasdaive", 6)
    print(x)

提前致谢!

【问题讨论】:

了解如何将 gdb(调试器)附加到您的进程/程序以查看问题所在。 @TheQuantumPhysicist,我将 gdb 附加到 cpp 代码并确认它按预期工作。在 cpp 函数返回正确值后引发分段错误。我已经更新了问题。 分段错误在哪里?什么线?提供最少的完整代码来重现问题。此代码无法编译。请参阅minimal reproducible example 指南。 当你 add(line) 时,实现会对该行进行临时编码 q.encode(encoding='utf-8')。您是否在 C++ 代码中进行了复制?如果只是存储指针,那么在add 返回后,那个字节串就不再存在了。 @MarkTolonen 感谢您的回复和指出。我已经更新了问题以包含完整的代码。至于 add(line),我通过调用 new SimpleTrieNode() 来创建自己的空间 【参考方案1】:

该问题与 ctypes 无关。 cpp里面乱七八糟的。这个

unsigned int matrix[size];

导致***...欢迎。您正在尝试将大部分数据放入堆栈。您应该使用std::vector&lt;unsigned&gt;std::unique_ptr&lt;unsigned[]&gt; 或其他堆或静态存储。 顺便说一句,将非编译时常量用于数组大小是不可移植的。

然后你更正内存分配,它会在递归的某个地方失败,因为你通过复制传递字符串。您可能应该使用const string&amp; q 而不是string q

【讨论】:

如果 Python 绑定不理解 GCC 的扩展,VLA 很可能会导致问题。 仅符合标准的代码 Lightness Races in Orbit ,我实际上编译了 c++ 测试。没有python它会失败。 Python 可能看不懂,但在 c++ 错误没有修复之前没关系。 好吧,那么公平。虽然我仍然会避免扩展。 @AskoldIlvento。感谢您的评论。但实际上矩阵[大小] 不是问题,因为大小并没有那么大。问题是“unsigned int size = (len_x + 1) * (len_y + 1);”,应该是“unsigned int size = (len_x + 1) * (len_y + 1+ threshold);”

以上是关于使用 ctypes 时出现分段错误的主要内容,如果未能解决你的问题,请参考以下文章

在 C++ 中使用向量时出现分段错误

分段错误:在 C++ 中弹出向量时出现 11

删除时出现分段错误

分配时出现分段错误[重复]

在 Swift 中使用 Set 时出现编译器分段错误

呈现窗口时出现 Gtkmm 分段错误