3KV260开发Vitis AI library APIs

Posted 2022-11-24 苍山有雪，剑有霜

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了3KV260开发Vitis AI library APIs相关的知识，希望对你有一定的参考价值。

vitis ai 提供了C/Python两种接口，两种接口函数名称类似，之后的内容主要由C++/C讲解。

四种API

Vitis AI Library提供了以下四种API：

Vitis AI Library API_0 based on VART
Vitis AI Library API_1 based on AI Library
Vitis AI Library API_2 based on DpuTask
Vitis AI Library API_3 based on Graph_runner

下面四种API的demo。

VART

如果使用VART（Vitisi AI Runtime Library）进行代码的编写，流程如下：

以resnet50进行图片分类为例，主体代码如下：

//origin：/Vitis-Ai/demo/VART/resnet50/src/main.cc

/**
 * @brief Run DPU Task for ResNet50
 *
 * @param taskResnet50 - pointer to ResNet50 Task
 *
 * @return none
 */
void runResnet50(vart::Runner* runner) 
  /* Mean value for ResNet50 specified in Caffe prototxt */
  vector<string> kinds, images;

  /* Load all image names.*/
  ListImages(baseImagePath, images);
  if (images.size() == 0) 
    cerr << "\\nError: No images existing under " << baseImagePath << endl;
    return;
  

  /* Load all kinds words.*/
  LoadWords(wordsPath + "words.txt", kinds);
  if (kinds.size() == 0) 
    cerr << "\\nError: No words exist in file words.txt." << endl;
    return;
  
  float mean[3] = 104, 107, 123;

  /* get in/out tensors and dims*/
  auto outputTensors = runner->get_output_tensors();
  auto inputTensors = runner->get_input_tensors();
  auto out_dims = outputTensors[0]->get_shape();
  auto in_dims = inputTensors[0]->get_shape();

  auto input_scale = get_input_scale(inputTensors[0]);
  auto output_scale = get_output_scale(outputTensors[0]);

  /*get shape info*/
  int outSize = shapes.outTensorList[0].size;
  int inSize = shapes.inTensorList[0].size;
  int inHeight = shapes.inTensorList[0].height;
  int inWidth = shapes.inTensorList[0].width;

  int batchSize = in_dims[0];

  std::vector<std::unique_ptr<vart::TensorBuffer>> inputs, outputs;

  vector<Mat> imageList;
  int8_t* imageInputs = new int8_t[inSize * batchSize];

  float* softmax = new float[outSize];
  int8_t* FCResult = new int8_t[batchSize * outSize];
  std::vector<vart::TensorBuffer*> inputsPtr, outputsPtr;
  std::vector<std::shared_ptr<xir::Tensor>> batchTensors;
  /*run with batch*/
  for (unsigned int n = 0; n < images.size(); n += batchSize) 
    unsigned int runSize =
        (images.size() < (n + batchSize)) ? (images.size() - n) : batchSize;
    in_dims[0] = runSize;
    out_dims[0] = batchSize;
    for (unsigned int i = 0; i < runSize; i++) 
      Mat image = imread(baseImagePath + images[n + i]);

      /*image pre-process*/
      Mat image2;  //= cv::Mat(inHeight, inWidth, CV_8SC3);
      resize(image, image2, Size(inHeight, inWidth), 0, 0);
      for (int h = 0; h < inHeight; h++) 
        for (int w = 0; w < inWidth; w++) 
          for (int c = 0; c < 3; c++) 
            imageInputs[i * inSize + h * inWidth * 3 + w * 3 + c] =
                (int8_t)((image2.at<Vec3b>(h, w)[c] - mean[c]) * input_scale);
          
        
      
      imageList.push_back(image);
    

    /* in/out tensor refactory for batch inout/output */
    batchTensors.push_back(std::shared_ptr<xir::Tensor>(
        xir::Tensor::create(inputTensors[0]->get_name(), in_dims,
                            xir::DataTypexir::DataType::XINT, 8u)));
    inputs.push_back(std::make_unique<CpuFlatTensorBuffer>(
        imageInputs, batchTensors.back().get()));
    batchTensors.push_back(std::shared_ptr<xir::Tensor>(
        xir::Tensor::create(outputTensors[0]->get_name(), out_dims,
                            xir::DataTypexir::DataType::XINT, 8u)));
    outputs.push_back(std::make_unique<CpuFlatTensorBuffer>(
        FCResult, batchTensors.back().get()));

    /*tensor buffer input/output */
    inputsPtr.clear();
    outputsPtr.clear();
    inputsPtr.push_back(inputs[0].get());
    outputsPtr.push_back(outputs[0].get());

    /*run*/
    auto job_id = runner->execute_async(inputsPtr, outputsPtr);
    runner->wait(job_id.first, -1);
    for (unsigned int i = 0; i < runSize; i++) 
      cout << "\\nImage : " << images[n + i] << endl;
      /* Calculate softmax on CPU and display TOP-5 classification results */
      CPUCalcSoftmax(&FCResult[i * outSize], outSize, softmax, output_scale);
      TopK(softmax, outSize, 5, kinds);
      /* Display the impage */
      bool quiet = (getenv("QUIET_RUN") != nullptr);
      if (!quiet) 
        cv::imshow("Classification of ResNet50", imageList[i]);
        cv::waitKey(10000);
      
    
    imageList.clear();
    inputs.clear();
    outputs.clear();
  
  delete[] FCResult;
  delete[] imageInputs;
  delete[] softmax;


/**
 * @brief Entry for runing ResNet50 neural network
 *
 * @note Runner APIs prefixed with "dpu" are used to easily program &
 *       deploy ResNet50 on DPU platform.
 *
 */
int main(int argc, char* argv[]) 
  // Check args
  if (argc != 2) 
    cout << "Usage of resnet50 demo: ./resnet50 [model_file]" << endl;
    return -1;
  
  auto graph = xir::Graph::deserialize(argv[1]);
  auto subgraph = get_dpu_subgraph(graph.get());
  CHECK_EQ(subgraph.size(), 1u)
      << "resnet50 should have one and only one dpu subgraph.";
  LOG(INFO) << "create running for subgraph: " << subgraph[0]->get_name();
  /*create runner*/
  auto runner = vart::Runner::create_runner(subgraph[0], "run");
  // ai::XdpuRunner* runner = new ai::XdpuRunner("./");
  /*get in/out tensor*/
  auto inputTensors = runner->get_input_tensors();
  auto outputTensors = runner->get_output_tensors();

  /*get in/out tensor shape*/
  int inputCnt = inputTensors.size();
  int outputCnt = outputTensors.size();
  TensorShape inshapes[inputCnt];
  TensorShape outshapes[outputCnt];
  shapes.inTensorList = inshapes;
  shapes.outTensorList = outshapes;
  getTensorShape(runner.get(), &shapes, inputCnt, outputCnt);

  /*run with batch*/
  runResnet50(runner.get());
  return 0;

AI Library

当使用的模型在Vitis AI/Model ZOO中时，可以直接复用相应的模型demo，举个例子yolov3：

int main(int argc, char *argv[]) 
  if (argc < 2) 
    cerr << "usage: " << argv[0] << " image_file_url " << endl;
    abort();
  
  Mat img = cv::imread(argv[2]);
  if (img.empty()) 
    cerr << "cannot load " << argv[2] << endl;
    abort();
  

  auto yolo = vitis::ai::YOLOv3::create(argv[1], true);

  //  auto yolo =
  //    vitis::ai::YOLOv3::create(xilinx::ai::YOLOV3_VOC_416x416_TF, true);

  auto results = yolo->run(img);

  for (auto &box : results.bboxes) 
    int label = box.label;
    float xmin = box.x * img.cols + 1;
    float ymin = box.y * img.rows + 1;
    float xmax = xmin + box.width * img.cols;
    float ymax = ymin + box.height * img.rows;
    if (xmin < 0.) xmin = 1.;
    if (ymin < 0.) ymin = 1.;
    if (xmax > img.cols) xmax = img.cols;
    if (ymax > img.rows) ymax = img.rows;
    float confidence = box.score;

    cout << "RESULT: " << label << "\\t" << xmin << "\\t" << ymin << "\\t" << xmax
         << "\\t" << ymax << "\\t" << confidence << "\\n";
    rectangle(img, Point(xmin, ymin), Point(xmax, ymax), Scalar(0, 255, 0), 1,
              1, 0);
  
  //    imshow("", img);
  //    waitKey(0);
  imwrite("result.jpg", img);

  return 0;

官方提供的模型列表直接参考：https://github.com/Xilinx/Vitis-AI/tree/master/models/AI-Model-Zoo

DPU Task

如果是使用DPU Task，那么可以直接参考yolov3这个例子：

//origin: Vitis-AI/demo/Vitis-AI-Library/samples/
dpu_task/yolov3/demo_yolov3.cpp

// The parameters of yolov3_voc, each value could be set as actual needs.
//也可以写在TXT中,防止硬编码
const string yolov3_config = 
    "   name: \\"yolov3_voc_416\\" \\n"
    "   model_type : YOLOv3 \\n"
    "   yolo_v3_param  \\n"
    "     num_classes: 20 \\n"
    "     anchorCnt: 3 \\n"
    "     conf_threshold: 0.3 \\n"
    "     nms_threshold: 0.45 \\n"
    "     layer_name: \\"81\\" \\n"
    "     layer_name: \\"93\\" \\n"
    "     layer_name: \\"105\\" \\n"
    "     biases: 10 \\n"
    "     biases: 13 \\n"
    "     biases: 16 \\n"
    "     biases: 30 \\n"
    "     biases: 33 \\n"
    "     biases: 23 \\n"
    "     biases: 30 \\n"
    "     biases: 61 \\n"
    "     biases: 62 \\n"
    "     biases: 45 \\n"
    "     biases: 59 \\n"
    "     biases: 119 \\n"
    "     biases: 116 \\n"
    "     biases: 90 \\n"
    "     biases: 156 \\n"
    "     biases: 198 \\n"
    "     biases: 373 \\n"
    "     biases: 326 \\n"
    "     test_mAP: false \\n"
    "    \\n";

int main(int argc, char* argv[]) 
  // argv[1]是xmodel的位置
  auto kernel_name = argv[1];

  // Read image from a path.
  vector<Mat> imgs;
  vector<string> imgs_names;
  for (int i = 2; i < argc; i++) 
    // image file names.
    auto img = cv::imread(argv[i]);
    if (img.empty()) 
      std::cout << "Cannot load " << argv[i] << std::endl;
      continue;
    
    imgs.push_back(img);
    imgs_names.push_back(argv[i]);
  
  if (imgs.empty()) 
    std::cerr << "No image load success!" << std::endl;
    abort();
  
  // Create a dpu task object.
  auto task = vitis::ai::DpuTask::create(kernel_name);
  auto batch = task->get_input_batch(0, 0);
  // Set the mean values and scale values.
  task->setMeanScaleBGR(0.0f, 0.0f, 0.0f,
                        0.00390625f, 0.00390625f, 0.00390625f);
  auto input_tensor = task->getInputTensor(0u);
  CHECK_EQ((int)input_tensor.size(), 1)
      << " the dpu model must have only one input";
  auto width = input_tensor[0].width;
  auto height = input_tensor[0].height;
  auto size = cv::Size(width, height);
  // Create a config and set the correlating data to control post-process.
  vitis::ai::proto::DpuModelParam config;
  // Fill all the parameters.
  auto ok =
      google::protobuf::TextFormat::ParseFromString(yolov3_config, &config);
  if (!ok) 
    cerr << "Set parameters failed!" << endl;
    abort();
  

  vector<Mat> inputs;
  vector<int> input_cols, input_rows;
  for (long unsigned int i = 0, j = -1; i < imgs.size(); i++) 
    /* Pre-process Part */
    // Resize it if its size is not match.
    cv::Mat image;
    input_cols.push_back(imgs[i].cols);
    input_rows.push_back(imgs[i].rows);
    if (size != imgs[i].size()) 
      cv::resize(imgs[i], image, size);
     else 
      image = imgs[i];
    
    inputs.push_back(image);
    j++;
    if (j < batch - 1 && i < imgs.size() - 1) 
      continue;
    

    // Set the input images into dpu.
    task->setImageRGB(inputs);

    /* DPU Runtime */
    // Run the dpu.
    task->run(0u);

    /* Post-process part */
    // Get output.
    auto output_tensor = task->getOutputTensor(0u);
    // Execute the yolov3 post-processing.
   以上是关于3KV260开发Vitis AI library APIs的主要内容，如果未能解决你的问题，请参考以下文章