Hexagon DSP系列记录——写Hexagon DSP程序流程
Posted lligen-dai
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hexagon DSP系列记录——写Hexagon DSP程序流程相关的知识,希望对你有一定的参考价值。
写Hexagon DSP程序流程
In general the developer follows these steps:
- Define an API in IDL that will perform their computation
- Compile IDL into a header and android library
- Call the API directly from their application
- Results are returned just like a normal function call and it appears to the application as if the work was performed locally.
以Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp为例子讲解:
cornerApp例子主要是对输入图像做高斯滤波并检测fast角点,具体图像算法部分使用的是高通自带的fastcv SDK。
首先定义idl接口文件inc/cornerApp.idl
#ifndef CORNER_APP_IDL
#define CORNER_APP_IDL
#include "AEEStdDef.idl"
interface cornerApp
AEEResult filterGaussianAndCornerFastQ(in sequence<uint8> src,
in uint32 srcWidth,
in uint32 srcHeight,
rout sequence <uint8> dst,
in int32 blurBorder,
in uint32 srcStride,
in int32 barrier,
in uint32 border,
rout sequence <uint32> xy,
in uint32 nCornersMax,
rout uint32 nCorners,
rout sequence <uint32> renderBuffer);
;
#endif CORNER_APP_IDL
经过QAIC IDL Compiler后会生成cornerApp.h/cornerApp_stub.c/cornerApp_skel.c,我们只关注cornerApp.h,因为它对应idl文件的c接口,截取cornerApp.h关键部分如下
#ifdef __cplusplus
extern "C"
#endif
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(cornerApp_filterGaussianAndCornerFastQ)(
const uint8* src, int srcLen, //所有的sequence<xxx>被解析为const xxx*指针,srcLen代表xxx的个数
uint32 srcWidth,
uint32 srcHeight,
uint8* dst, int dstLen,
int32 blurBorder,
uint32 srcStride,
int32 barrier,
uint32 border,
uint32* xy, int xyLen,
uint32 nCornersMax,
uint32* nCorners, //rout uint32 nCorners, 输出被解析为指针,用于将值传出
uint32* renderBuffer, int renderBufferLen
) __QAIC_HEADER_ATTRIBUTE;
#ifdef __cplusplus
#endif
接下来开发者需要做的就是写cornerApp.h文件接口对应的实现src/cornerApp_imp.c:
#include "fastcv.h" // 高通fastcv SDK头文件
#include "cornerApp.h"
#include "AEEStdErr.h"
/*===========================================================================
REMOTED FUNCTION
===========================================================================*/
/*-------------------------------begin corner detection app functions---------------------------------------------------------------------*/
AEEResult cornerApp_filterGaussianAndCornerFastQ(const uint8* src,
int srcLen, uint32 srcWidth, uint32 srcHeight, uint8* dst, int dstLen, int32 blurBorder,
uint32 srcStride, int32 barrier, uint32 border, uint32* xy, int xylen, uint32 nCornersMax,
uint32* nCorners,uint32* renderBuffer, int renderBufferLen)
// Perform color conversion from YUV to RGB. Output renderBuffer is the original
// buffer after conversion to RGB, upon which the caller of this function must draw
// the corners detected in the desired manner.
fcvColorYUV420toRGB565u8(src, srcWidth, srcHeight, renderBuffer);// 高通fastcv SDK提供的函数
// Gaussian blur the image and then detect corners using Fast9 detection algorithm.
// Return the list of corners for application to draw upon the rendered image.
fcvFilterGaussian3x3u8(src, srcWidth, srcHeight, dst, blurBorder);// 高通fastcv SDK提供的函数
fcvCornerFast9u8(dst, srcWidth, srcHeight, srcStride, barrier, border, xy, nCornersMax, nCorners);// 高通fastcv SDK提供的函数
return AEE_SUCCESS;
fastcv提供的所有接口请参考下载的Hexagon_SDK路径: file:///E:/Qualcomm/Hexagon_SDK/3.5.2/docs/FastCV/Applications_Computer%20Vision.html#FastCV%20Library
最后,开发者写应用(如main函数)调用上面接口,参考src/cornerApp.c
下面代码截取关键部分并注释
int test_main_start(int argc, char* argv[]);
int main(int argc, char* argv[])
return test_main_start(argc, argv);
static uint8_t *dataBuf = NULL;
static uint8_t *blurredImgBuf = NULL;
static uint32_t *corners = NULL;
static uint32_t *renderBuf = NULL;
void deinitQ6(void)
int nErr = 0;
if (dataBuf) rpcmem_free(dataBuf);
if (corners) rpcmem_free(corners);
if (blurredImgBuf) rpcmem_free(blurredImgBuf);
if (renderBuf) rpcmem_free(renderBuf);
VERIFY(0 == dspCV_deinitQ6());
return;
bail:
if(nErr)
printf("error in deinitQ6: %d\\n", nErr);
int initQ6(uint32_t srcHeight, uint32_t srcWidth)
int nErr = 0;
dspCV_Attribute attrib[] =
// Slightly more MCPS than are available on current targets
DSP_TOTAL_MCPS, 1000,
// drive the clock to MAX on known targets
DSP_MCPS_PER_THREAD, 500,
// 12 GB/sec is slightly higher than the max realistic max BW on existing targets.
PEAK_BUS_BANDWIDTH_MBPS, 12000,
// This app is non-real time, and constantly reading/writing memory
BUS_USAGE_PERCENT, 100,
;
nErr = dspCV_initQ6_with_attributes(attrib, sizeof(attrib)/sizeof(attrib[0]));
if (nErr)
printf("error in dspCV_initQ6: %d\\n", nErr);
return(1);
// allocate ion buffers
VERIFY(0 != (dataBuf = (uint8_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS,
srcWidth*srcHeight*3/2)));
VERIFY(0 != (blurredImgBuf = (uint8_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS,
srcWidth*srcHeight)));
VERIFY(0 != (renderBuf = (uint32_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS,
srcWidth*srcHeight * 3 / 2 *sizeof(uint32_t))));
VERIFY(0 != (corners = (uint32_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS,
XY_SIZE*sizeof(uint32_t))));
return 0;
bail:
printf("out of memory in initQ6: %d\\n", nErr);
deinitQ6();
return 2;
void compareResults(int arg1, int arg2, const char* compareStr)
printf("%s\\t Expected: %d\\tGot: %d\\n", compareStr, arg2, arg1);
return;
int test_main_start(int argc, char* argv[])
uint32_t srcWidth;
uint32_t srcHeight;
int retVal, i;
uint32_t numCornersDetected = 0; // DSP handle
uint32_t cornerThreshold = 20;
//FastRPC机制(fast remote procedure calls, docs/Technologies_FastRPC.html)
//将计算任务从CPU发送到DSP。数据传输涉及ION memory。
//FastRPC主要接口:rpcmem_init/rpcmem_alloc/rpcmem_free/rpcmem_deinit
//FastRPC申请的内存为ION共享内存
rpcmem_init();
srcWidth = 800;
srcHeight = 480;
// call dspCV_initQ6_with_attributes() to bump up Q6 clock frequency
// Since this app is not real-time, and can fully load the DSP clock & bus resources
// throughout its lifetime, vote for the maximum available MIPS & BW.
// lligen add NOTE 1: The dspCV library in $HEXAGON_SDK_ROOT/lib/fastcv/dspCV aims to
// abstract as much of the DSP runtime environment as possible, to reduce effort in
// offloading compute processing to the DSP. It offers API's to perform such functions as
// clock/power voting, multi-threaded callbacks, concurrency checking, and HVX resource
// management.
// lligen add NOTE 2: using dspCV remote API's is no longer recommended from SDM660 forward.
// They are still supported for backward compatibility.
retVal = initQ6(srcHeight, srcWidth);
if (retVal)
printf("init failed!! status %d\\n",retVal);
rpcmem_deinit();
return retVal;
printf("initq6 done...\\n");
if (retVal)
printf("init failed!! status %d\\n",retVal);
rpcmem_deinit();
return retVal;
// 图像数据复制到ION内存,DSP运行时会直接使用ION内存数据
memcpy(dataBuf, tmpcorner_test2, srcWidth * srcHeight);
unsigned long long t1 = GetTime();
int anyFailures = 0;
for (i = 0; i < LOOPS; i++)
// 调用之前写好的接口
retVal = cornerApp_filterGaussianAndCornerFastQ(
dataBuf, srcWidth*srcHeight * 3/2, //输入图片格式为YUV420
srcWidth,
srcHeight,
blurredImgBuf, srcWidth * srcHeight,
0, // blurBorder (fcvFilterGaussian)
srcWidth, // srcStride (fcvCornerFast)
cornerThreshold,
7,
(uint32*)corners, XY_SIZE,
MAX_NUM_CORNERS,
(uint32*)&numCornersDetected,
(uint32*)renderBuf, srcWidth * srcHeight / 2);
anyFailures |= retVal;
unsigned long long t2 = GetTime();
if (AEE_SUCCESS != retVal)
printf("corner detection returned error code %d\\n", retVal);
else
#if 0
uint32_t *cornerPtr = corners;
for (i = 0; i < numCornersDetected; i++)
uint32_t x = *(cornerPtr++);
uint32_t y = *(cornerPtr++);
printf("corner #:%d \\t (x,y): %d, %d\\n", i, (int)x, (int)y);
#endif
printf("Num corners detected: %d. Expected: 60 \\n", (int)numCornersDetected);
anyFailures |= (numCornersDetected != 60);
#ifdef __hexagon__
printf("run time of corner detection: %llu PCycles (from %llu-%llu) for %d iterations\\n",
t2-t1, t1, t2, LOOPS);
#else
printf("run time of corner detection: %llu microseconds for %d iterations\\n", t2-t1, LOOPS);
#endif
// free ion buffers
deinitQ6();
rpcmem_deinit();
printf("deinit done...\\n");
if (anyFailures != 0) printf ("cornerApp FAILED!!!\\n");
else printf ("cornerApp SUCCESS\\n");
return anyFailures;
编译运行可以选择使用Hexagon_SDK提供的脚本cornerApp_walkthrough.py
cd E:\\Qualcomm\\Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp
python .\\cornerApp_walkthrough.py -T sm8150
尝试自己一步步编译运行。
cd E:\\Qualcomm\\Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp
$make V=android_Debug_aarch64 tree CDSP_FLAG=1
$make V=hexagon_Debug_dynamic_toolv83_v66 tree
adb push android_Debug_aarch64/ship/cornerApp /vendor/bin
adb shell chmod 777 /vendor/bin/cornerApp
adb shell mkdir /vendor/lib/rfsa/dsp/sdk
adb push hexagon_Debug_dynamic_toolv83_v66/ship/libcornerApp_skel.so /vendor/lib/rfsa/dsp/sdk/
adb push android_Debug_aarch64/ship/libcornerApp.so /vendor/lib64/
// 依赖的libfastcvadsp.so/libdspCV_skel.so 手机自带, 在/vendor/lib/rfsa/adsp,如果没有,那也需要adb push
// 执行前,每台手机需要先Generate testsig,参考上一篇文章的**编译与运行calculator实例**部分
adb shell export LD_LIBRARY_PATH=/vendor/lib64/:$LD_LIBRARY_PATH ADSP_LIBRARY_PATH="/vendor/lib/rfsa/dsp/sdk\\;/vendor/lib/rfsa/dsp/testsig;" /vendor/bin/cornerApp
initq6 done...
Num corners detected: 60. Expected: 60
run time of corner detection: 227263 microseconds for 100 iterations
deinit done...
cornerApp SUCCESS
以上是关于Hexagon DSP系列记录——写Hexagon DSP程序流程的主要内容,如果未能解决你的问题,请参考以下文章