使用 C API 访问 NumPy 数组的视图
Posted
技术标签:
【中文标题】使用 C API 访问 NumPy 数组的视图【英文标题】:Accessing view of a NumPy array using the C API 【发布时间】:2016-01-25 19:19:15 【问题描述】:在我用 C++ 编写的 Python 扩展模块中,我使用以下 sn-p 代码将 NumPy 数组转换为 Armadillo 数组,以用于代码的 C++ 部分:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
问题在于,当我传递一个 NumPy 数组(即my_array[:, 3]
)的 view 时,它似乎无法正确处理底层 C 数组的步幅。根据输出,函数接收的数组pyarr
似乎实际上是完整的基本数组,而不是视图(或者至少当我使用PyArray_DATA
访问数据时,我似乎得到了一个指向完整的基本数组)。如果我将视图的副本传递给此函数(即my_array[:, 3].copy()
),它会按预期工作,但我不想每次都记住这样做。
那么,有没有办法让PyArray_FromArray
只复制我想要的矩阵切片?我尝试使用标志NPY_ARRAY_ENSURECOPY
,但没有帮助。
编辑 1
按照 cmets 中的建议,这是一个完整的工作示例:
在文件example.cpp
:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C"
#include <Python.h>
#include <numpy/arrayobject.h>
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
public:
WrongDimensions()
const char* what() const noexcept return msg.c_str();
private:
std::string msg = "The dimensions were incorrect";
;
class NotImplemented : public std::exception
public:
NotImplemented()
const char* what() const noexcept return msg.c_str();
private:
std::string msg = "Not implemented";
;
class BadArrayLayout : public std::exception
public:
BadArrayLayout()
const char* what() const noexcept return msg.c_str();
private:
std::string msg = "The matrix was not contiguous";
;
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++)
result.push_back(dims[i]);
return result;
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1)
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
else
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value)
arrTypeCode = NPY_UINT16;
else if (std::is_same<outT, double>::value)
arrTypeCode = NPY_DOUBLE;
else
throw NotImplemented();
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1)
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
else
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = nRows, nCols;
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++)
for (int j = 0; j < nCols; j++)
resultDataPtr[i * nCols + j] = matrix(i, j);
return result;
extern "C"
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray))
return NULL;
PyObject* output = NULL;
try
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
catch (const std::bad_alloc&)
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
catch (const std::exception& err)
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
return output;
static PyMethodDef example_methods[] =
"test_function", example_testFunction, METH_VARARGS, "A test function",
NULL, NULL, 0, NULL
;
static struct PyModuleDef example_module =
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
;
PyMODINIT_FUNC
PyInit_example(void)
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
还有setup.py
编译:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
现在假设我们有示例数组
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
该函数似乎适用于像a[:, :3]
这样的多维切片,并且它返回的矩阵未改变,正如我所期望的那样。但如果我给它一个单维切片,除非我复制一份,否则我会得到错误的组件:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])
【问题讨论】:
NPY_ARRAY_CARRAY 怎么样?我认为这会强制从非连续切片构造一个连续数组,但我只是在这里猜测 :-) 我已经在使用 NPY_ARRAY_FARRAY。那不应该做同样的事情吗(除了以列为主的顺序)?为了与使用 BLAS 函数的犰狳兼容,我需要它以列为主(类似 Fortran)的顺序。 哦,对了。错过了。 . . 你能发布一些例子和getPyArrayDimensions()
吗?
好电话。我刚刚编辑了这个问题以提供一个完整的最小工作示例。
【参考方案1】:
数组的视图只是同一数据数组的另一个信息包装器。 Numpy
不会在此处复制任何数据。仅调整用于解释数据的信息,并在有用时移动指向数据的指针。
在您的代码中,您假设向量a[:, 3]
的数据表示为内存中的一个向量,对于NPY_ARRAY_CARRAY
和NPY_ARRAY_FARRAY
而言不会有所不同。但是这种表示只有在创建数组本身的(fortran 有序)副本后才能得到。
为了让它工作,我稍微修改了你的 convertPyArrayToArma()
函数来创建一个副本,即使它是一个向量:
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value)
arrTypeCode = NPY_UINT16;
else if (std::is_same<outT, double>::value)
arrTypeCode = NPY_DOUBLE;
else
throw NotImplemented();
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
const auto dims = getPyArrayDimensions(pyarr);
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
// this copies the data from cleanArr
arma::Mat<outT> result;
if (dims.size() == 1)
result = arma::Col<outT>(dataPtr, dims[0], true);
else
result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
Py_DECREF(cleanArr);
return result;
【讨论】:
啊,我忽略了我为 1D 编写的单独代码路径......但这解释了我看到的结果。谢谢!以上是关于使用 C API 访问 NumPy 数组的视图的主要内容,如果未能解决你的问题,请参考以下文章
Numpy C API - 使用 PyArray_Descr 创建数组会导致段错误
从使用 numpy.save(...) 保存的文件中将 numpy 数组加载到 C 中
在tensorflow中使用`dataset.map()`访问张量numpy数组