Hexagon DSP系列记录——写Hexagon DSP程序流程

Posted lligen-dai

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Hexagon DSP系列记录——写Hexagon DSP程序流程相关的知识,希望对你有一定的参考价值。

写Hexagon DSP程序流程

In general the developer follows these steps:

  1. Define an API in IDL that will perform their computation
  2. Compile IDL into a header and android library
  3. Call the API directly from their application
  4. Results are returned just like a normal function call and it appears to the application as if the work was performed locally.

以Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp为例子讲解:
cornerApp例子主要是对输入图像做高斯滤波并检测fast角点,具体图像算法部分使用的是高通自带的fastcv SDK。
首先定义idl接口文件inc/cornerApp.idl

#ifndef CORNER_APP_IDL
#define CORNER_APP_IDL

#include "AEEStdDef.idl"

interface cornerApp
   AEEResult filterGaussianAndCornerFastQ(in sequence<uint8> src,
                              in uint32 srcWidth,
                              in uint32 srcHeight,
                              rout sequence <uint8> dst,
                              in int32 blurBorder,
                              in uint32 srcStride,
                              in int32 barrier, 
                              in uint32 border, 
                              rout sequence <uint32> xy,
                              in uint32 nCornersMax, 
                              rout uint32 nCorners,
                              rout sequence <uint32> renderBuffer);
                              
;

#endif CORNER_APP_IDL

经过QAIC IDL Compiler后会生成cornerApp.h/cornerApp_stub.c/cornerApp_skel.c,我们只关注cornerApp.h,因为它对应idl文件的c接口,截取cornerApp.h关键部分如下

#ifdef __cplusplus
extern "C" 
#endif
__QAIC_HEADER_EXPORT AEEResult __QAIC_HEADER(cornerApp_filterGaussianAndCornerFastQ)(
	const uint8* src, int srcLen, //所有的sequence<xxx>被解析为const xxx*指针,srcLen代表xxx的个数
	uint32 srcWidth, 
	uint32 srcHeight, 
	uint8* dst, int dstLen, 
	int32 blurBorder, 
	uint32 srcStride, 
	int32 barrier, 
	uint32 border, 
	uint32* xy, int xyLen, 
	uint32 nCornersMax, 
	uint32* nCorners, //rout uint32 nCorners, 输出被解析为指针,用于将值传出
	uint32* renderBuffer, int renderBufferLen
	) __QAIC_HEADER_ATTRIBUTE;
#ifdef __cplusplus

#endif

接下来开发者需要做的就是写cornerApp.h文件接口对应的实现src/cornerApp_imp.c:

#include "fastcv.h"  // 高通fastcv SDK头文件
#include "cornerApp.h"
#include "AEEStdErr.h"

/*===========================================================================
    REMOTED FUNCTION
===========================================================================*/
/*-------------------------------begin corner detection app functions---------------------------------------------------------------------*/

AEEResult cornerApp_filterGaussianAndCornerFastQ(const uint8* src,
		int srcLen, uint32 srcWidth, uint32 srcHeight, uint8* dst, int dstLen, int32 blurBorder,
		uint32 srcStride, int32 barrier, uint32 border, uint32* xy, int xylen, uint32 nCornersMax,
		uint32* nCorners,uint32* renderBuffer, int renderBufferLen)

	// Perform color conversion from YUV to RGB. Output renderBuffer is the original 
    // buffer after conversion to RGB, upon which the caller of this function must draw 
    // the corners detected in the desired manner. 
	fcvColorYUV420toRGB565u8(src, srcWidth, srcHeight, renderBuffer);// 高通fastcv SDK提供的函数

	// Gaussian blur the image and then detect corners using Fast9 detection algorithm.
    // Return the list of corners for application to draw upon the rendered image.
	fcvFilterGaussian3x3u8(src, srcWidth, srcHeight, dst, blurBorder);// 高通fastcv SDK提供的函数

    fcvCornerFast9u8(dst, srcWidth, srcHeight, srcStride, barrier, border, xy, nCornersMax, nCorners);// 高通fastcv SDK提供的函数

	return AEE_SUCCESS;

fastcv提供的所有接口请参考下载的Hexagon_SDK路径: file:///E:/Qualcomm/Hexagon_SDK/3.5.2/docs/FastCV/Applications_Computer%20Vision.html#FastCV%20Library
最后,开发者写应用(如main函数)调用上面接口,参考src/cornerApp.c
下面代码截取关键部分并注释

int test_main_start(int argc, char* argv[]);
int main(int argc, char* argv[])

    return test_main_start(argc, argv);


static uint8_t *dataBuf = NULL;
static uint8_t *blurredImgBuf = NULL;
static uint32_t *corners = NULL;
static uint32_t *renderBuf = NULL;

void deinitQ6(void)

	int nErr = 0;
	if (dataBuf) rpcmem_free(dataBuf);
	if (corners) rpcmem_free(corners);
	if (blurredImgBuf) rpcmem_free(blurredImgBuf);
	if (renderBuf) rpcmem_free(renderBuf);

	VERIFY(0 == dspCV_deinitQ6());
	return;

	bail:
	if(nErr) 
		printf("error in deinitQ6: %d\\n", nErr);
	

int initQ6(uint32_t srcHeight, uint32_t srcWidth)

	int nErr = 0;

    dspCV_Attribute attrib[] =
    
    	// Slightly more MCPS than are available on current targets
        DSP_TOTAL_MCPS, 1000, 
        // drive the clock to MAX on known targets
        DSP_MCPS_PER_THREAD, 500, 
        // 12 GB/sec is slightly higher than the max realistic max BW on existing targets.
        PEAK_BUS_BANDWIDTH_MBPS, 12000,
        // This app is non-real time, and constantly reading/writing memory
        BUS_USAGE_PERCENT, 100,               
    ;

	nErr = dspCV_initQ6_with_attributes(attrib, sizeof(attrib)/sizeof(attrib[0]));
    if (nErr)
		printf("error in dspCV_initQ6: %d\\n", nErr);
        return(1);
	

	// allocate ion buffers
	VERIFY(0 != (dataBuf = (uint8_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 
												srcWidth*srcHeight*3/2)));
	VERIFY(0 != (blurredImgBuf = (uint8_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 
												srcWidth*srcHeight)));
	VERIFY(0 != (renderBuf = (uint32_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 
												srcWidth*srcHeight * 3 / 2 *sizeof(uint32_t))));
	VERIFY(0 != (corners = (uint32_t*)rpcmem_alloc(ION_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, 
												XY_SIZE*sizeof(uint32_t))));

	return 0;

	bail:
	printf("out of memory in initQ6: %d\\n", nErr);
    deinitQ6();
	return 2;


void compareResults(int arg1, int arg2, const char* compareStr)

	printf("%s\\t Expected: %d\\tGot: %d\\n", compareStr, arg2, arg1);
	return;


int test_main_start(int argc, char* argv[]) 
	uint32_t srcWidth;
	uint32_t srcHeight;
	int retVal, i;

	uint32_t numCornersDetected = 0;        // DSP handle
	uint32_t cornerThreshold = 20;

	//FastRPC机制(fast remote procedure calls, docs/Technologies_FastRPC.html)
	//将计算任务从CPU发送到DSP。数据传输涉及ION memory。
	//FastRPC主要接口:rpcmem_init/rpcmem_alloc/rpcmem_free/rpcmem_deinit
	//FastRPC申请的内存为ION共享内存
	rpcmem_init();

	srcWidth = 800;
	srcHeight = 480;

    // call dspCV_initQ6_with_attributes() to bump up Q6 clock frequency
    // Since this app is not real-time, and can fully load the DSP clock & bus resources 
    // throughout its lifetime, vote for the maximum available MIPS & BW.
    // lligen add NOTE 1: The dspCV library in $HEXAGON_SDK_ROOT/lib/fastcv/dspCV aims to 
    // abstract as much of the DSP runtime environment as possible, to reduce effort in 
    // offloading compute processing to the DSP. It offers API's to perform such functions as 
    // clock/power voting, multi-threaded callbacks, concurrency checking, and HVX resource 
    // management.
    // lligen add NOTE 2: using dspCV remote API's is no longer recommended from SDM660 forward. 
    // They are still supported for backward compatibility.
	retVal = initQ6(srcHeight, srcWidth);
    if (retVal)
    
        printf("init failed!! status %d\\n",retVal);
        rpcmem_deinit();
        return retVal;
    
    printf("initq6 done...\\n");
    if (retVal)
    
        printf("init failed!! status %d\\n",retVal);
        rpcmem_deinit();
        return retVal;
    
	// 图像数据复制到ION内存,DSP运行时会直接使用ION内存数据
	memcpy(dataBuf, tmpcorner_test2, srcWidth * srcHeight);

	unsigned long long t1 = GetTime();
    int anyFailures = 0;
    for (i = 0; i < LOOPS; i++)
    
    	// 调用之前写好的接口
	    retVal = cornerApp_filterGaussianAndCornerFastQ(
			dataBuf, srcWidth*srcHeight * 3/2, //输入图片格式为YUV420
			srcWidth,
			srcHeight,
			blurredImgBuf, srcWidth * srcHeight,
			0,	// blurBorder (fcvFilterGaussian)
			srcWidth,  // srcStride (fcvCornerFast)
			cornerThreshold,
			7,
			(uint32*)corners, XY_SIZE,
			MAX_NUM_CORNERS,
			(uint32*)&numCornersDetected,
			(uint32*)renderBuf, srcWidth * srcHeight / 2);

            anyFailures |= retVal;
    
    unsigned long long t2 = GetTime();

    if (AEE_SUCCESS != retVal)
    
        printf("corner detection returned error code %d\\n", retVal);
    
    else
    
#if 0
        uint32_t *cornerPtr = corners;
        for (i = 0; i < numCornersDetected; i++)
        
            uint32_t x = *(cornerPtr++);
            uint32_t y = *(cornerPtr++);
            printf("corner #:%d \\t (x,y): %d, %d\\n", i, (int)x, (int)y);
        
#endif
        printf("Num corners detected: %d. Expected: 60 \\n", (int)numCornersDetected);
        anyFailures |= (numCornersDetected != 60);
    
    
#ifdef __hexagon__
    printf("run time of corner detection: %llu PCycles (from %llu-%llu) for %d iterations\\n", 
    		t2-t1, t1, t2, LOOPS);
#else
	printf("run time of corner detection: %llu microseconds for %d iterations\\n", t2-t1, LOOPS);
#endif

	// free ion buffers
	deinitQ6();
	rpcmem_deinit();
    printf("deinit done...\\n");

    if (anyFailures != 0) printf ("cornerApp FAILED!!!\\n");
    else printf ("cornerApp SUCCESS\\n");
    return anyFailures;

编译运行可以选择使用Hexagon_SDK提供的脚本cornerApp_walkthrough.py

cd E:\\Qualcomm\\Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp
python .\\cornerApp_walkthrough.py -T sm8150

尝试自己一步步编译运行。

cd E:\\Qualcomm\\Hexagon_SDK\\3.5.2\\examples\\fastcv\\cornerApp
$make V=android_Debug_aarch64 tree CDSP_FLAG=1
$make V=hexagon_Debug_dynamic_toolv83_v66 tree
adb push android_Debug_aarch64/ship/cornerApp /vendor/bin
adb shell chmod 777 /vendor/bin/cornerApp
adb shell mkdir /vendor/lib/rfsa/dsp/sdk
adb push hexagon_Debug_dynamic_toolv83_v66/ship/libcornerApp_skel.so /vendor/lib/rfsa/dsp/sdk/
adb push android_Debug_aarch64/ship/libcornerApp.so /vendor/lib64/
// 依赖的libfastcvadsp.so/libdspCV_skel.so 手机自带, 在/vendor/lib/rfsa/adsp,如果没有,那也需要adb push
// 执行前,每台手机需要先Generate testsig,参考上一篇文章的**编译与运行calculator实例**部分
adb shell export LD_LIBRARY_PATH=/vendor/lib64/:$LD_LIBRARY_PATH ADSP_LIBRARY_PATH="/vendor/lib/rfsa/dsp/sdk\\;/vendor/lib/rfsa/dsp/testsig;" /vendor/bin/cornerApp
initq6 done...
Num corners detected: 60. Expected: 60
run time of corner detection: 227263 microseconds for 100 iterations
deinit done...
cornerApp SUCCESS

以上是关于Hexagon DSP系列记录——写Hexagon DSP程序流程的主要内容,如果未能解决你的问题,请参考以下文章

Hexagon DSP系列记录

Hexagon DSP系列记录

Hexagon DSP系列记录——DSP工作模式设置

Hexagon DSP系列记录——DSP工作模式设置

Hexagon DSP系列记录——DSP LOG输出

Hexagon DSP系列记录——DSP LOG输出