本文使用YOLOV3进行目标检测,同时使用DVPP进行JPEG解码和缩放,使用AIPP进行色域转换和归一化。

操作过程

下载模型并同步文件

首先根据README中的链接下载模型文件和权重文件,放置在model文件夹下,同时配置远程服务器ssh链接,同步工程文件。

down

模型转换

利用ATC转为om模型。
atc

编译文件

在编译之前,需要配置环境,可参考昇腾AI设备安装开发环境修改.bashrc和安装opencv等。

添加构建配置,进行构建:
build
build

执行

配置并执行
conf
run

实验结果

输入 输出
in out

代码分析

文件目录

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
.
├── CMakeLists.txt
├── data
│   └── dog1_1024_683.jpg # 测试图片
├── inc # 头文件
│   ├── dvpp_process.h
│   ├── model_process.h
│   ├── sample_process.h
│   └── utils.h
├── model # 模型
│   ├── aipp_nv12.cfg
│   ├── fusion_result.json
│   ├── yolov3.caffemodel
│   ├── yolov3.om
│   └── yolov3.prototxt
├── README_CN.md
├── scripts
└── src # 源文件
├── acl.json # ACL配置
├── CMakeLists.txt
├── dvpp_process.cpp # DVPP
├── main.cpp # 主入口
├── model_process.cpp
├── sample_process.cpp
└── utils.cpp

main

1
2
3
4
5
6
7
8
int main()
{
SampleProcess sampleProcess; // 创建SampleProcess对象
Result ret = sampleProcess.InitResource(); // 资源初始化
ret = sampleProcess.Process(); // 处理
INFO_LOG("execute sample success");
return SUCCESS;
}

sampleProcess

sampleProcess.InitResource()

准备运行环境

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// 创建ACL并初始化
const char *aclConfigPath = "../src/acl.json";
aclError ret = aclInit(aclConfigPath);

// 配置 device
ret = aclrtSetDevice(g_deviceId_);

// 创建 context -> stream
ret = aclrtCreateContext(&g_context_, g_deviceId_);
ret = aclrtCreateStream(&g_stream_);

// 配置ACL_DEVICE模型
ret = aclrtGetRunMode(&g_runMode_);

bool isDivece = (g_runMode_ == ACL_DEVICE);
RunStatus::SetDeviceStatus(isDivece);
INFO_LOG("get run mode success");
return SUCCESS;

sampleProcess.Process()

整个过程的大概处理流程如下:

初始化DVPP -> 加载模型 -> 输入图片 -> 预处理 -> 推理 -> 获取输出 -> 后处理

modelProcess主要负责与模型相关的操作,如加载、创建输入输出、推理等

dvppProcess是与DVPP处理相关,如图像解码、缩放等。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
// DVPP
DvppProcess dvppProcess(g_stream_);
Result ret = dvppProcess.InitResource();

ModelProcess modelProcess;
const char* omModelPath = "../model/yolov3.om";
ret = modelProcess.LoadModel(omModelPath); // 加载模型
ret = modelProcess.CreateDesc(); // 模型描述信息
ret = modelProcess.CreateOutput(); // 创建输出

int modelInputWidth;
int modelInputHeight;
ret = modelProcess.GetModelInputWH(modelInputWidth, modelInputHeight); // 获取输入宽和高

const float imageInfo[4] = {(float)modelInputWidth, (float)modelInputHeight,
(float)modelInputWidth, (float)modelInputHeight};
size_t imageInfoSize_ = sizeof(imageInfo);
void *imageInfoBuf_;

// 移到Device
if (g_runMode_ == ACL_HOST)
imageInfoBuf_ = Utils::CopyDataHostToDevice((void *)imageInfo, imageInfoSize_);
else
imageInfoBuf_ = Utils::CopyDataDeviceToDevice((void *)imageInfo, imageInfoSize_);

// 推理图片路径
PicDesc testPic[] = {
{"../data/dog1_1024_683.jpg", 0, 0},
};

// 循环推理
for (size_t index = 0; index < sizeof(testPic) / sizeof(testPic[0]); ++index) {
uint32_t devPicBufferSize;
void *picDevBuffer = nullptr;
// 读入图片
ret = Utils::GetDeviceBufferOfPicture(testPic[index], picDevBuffer, devPicBufferSize);

// 图片输入到DVPP
dvppProcess.SetInput(picDevBuffer, devPicBufferSize, testPic[index]);

// 设置DVPP输出参数
ret = dvppProcess.InitDvppOutputPara(modelInputWidth, modelInputHeight);

// DVPP处理
ret = dvppProcess.Process();

(void)acldvppFree(picDevBuffer);
picDevBuffer = nullptr;

void *dvppOutputBuffer = nullptr;
int dvppOutputSize;

// 获取DVPP输出
dvppProcess.GetDvppOutput(&dvppOutputBuffer, dvppOutputSize);

// 创建模型输入和输出
ret = modelProcess.CreateInput(dvppOutputBuffer, dvppOutputSize, imageInfoBuf_, imageInfoSize_);

// 模型推理
ret = modelProcess.Execute();

(void)acldvppFree(dvppOutputBuffer);
modelProcess.DestroyInput();

// 获取模型输出
const aclmdlDataset *modelOutput = modelProcess.GetModelOutputData();

// 后处理
ret = Postprocess(modelOutput, testPic[index], modelInputWidth, modelInputHeight);
}
aclrtFree(imageInfoBuf_);

return SUCCESS;

sampleProcess.Postprocess()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
uint32_t dataSize = 0;
float* detectData = (float *)GetInferenceOutputItem(dataSize, modelOutput, g_bBoxDataBufId); // 检测框数据

uint32_t* boxNum = (uint32_t *)GetInferenceOutputItem(dataSize, modelOutput, g_boxNumDataBufId); // 目标类别

uint32_t totalBox = boxNum[0];
vector<BBox> detectResults;
// 缩放比例
float widthScale = (float)(picDesc.width) / modelWidth;
float heightScale = (float)(picDesc.height) / modelHeight;

// 原图上画框
for (uint32_t i = 0; i < totalBox; i++) {
BBox boundBox;

uint32_t score = uint32_t(detectData[totalBox * SCORE + i] * 100);
boundBox.rect.ltX = detectData[totalBox * TOPLEFTX + i] * widthScale;
boundBox.rect.ltY = detectData[totalBox * TOPLEFTY + i] * heightScale;
boundBox.rect.rbX = detectData[totalBox * BOTTOMRIGHTX + i] * widthScale;
boundBox.rect.rbY = detectData[totalBox * BOTTOMRIGHTY + i] * heightScale;

uint32_t objIndex = (uint32_t)detectData[totalBox * LABEL + i];
boundBox.text = yolov3Label[objIndex] + std::to_string(score) + "\%";
printf("%d %d %d %d %s\n", boundBox.rect.ltX, boundBox.rect.ltY,
boundBox.rect.rbX, boundBox.rect.rbY, boundBox.text.c_str());

detectResults.emplace_back(boundBox);
}

// 转为图片
DrawBoundBoxToImage(detectResults, picDesc.picName);

return SUCCESS;

ModelProcess

modelProcess.LoadModel()

1
2
3
4
5
6
7
8
9
10
11
12
// 获取模型尺寸
aclError ret = aclmdlQuerySize(modelPath, &g_modelWorkSize_, &g_modelWeightSize_);

// 分配内存,大页内存优先
ret = aclrtMalloc(&g_modelWorkPtr_, g_modelWorkSize_, ACL_MEM_MALLOC_HUGE_FIRST);
ret = aclrtMalloc(&g_modelWeightPtr_, g_modelWeightSize_, ACL_MEM_MALLOC_HUGE_FIRST);

// 从文件中读取模型到内存
ret = aclmdlLoadFromFileWithMem(modelPath, &g_modelId_, g_modelWorkPtr_, g_modelWorkSize_, g_modelWeightPtr_, g_modelWeightSize_);

g_loadFlag_ = true;
return SUCCESS;

modelProcess.Execute()

在执行之前还有一系列准备工作

获取模型描述 -> 创建输出并分配内存 -> 获取模型输入宽高 -> 将图片信息移动到Device上 -> 读取图片并经过DVPP处理 -> 创建模型输入 -> 执行

  1. modelProcess.CreateDesc()

    1
    2
    3
    4
    5
    6
    Result ModelProcess::CreateDesc()
    {
    g_modelDesc_= aclmdlCreateDesc();
    aclError ret = aclmdlGetDesc(g_modelDesc_, g_modelId_);
    return SUCCESS;
    }
  2. modelProcess.CreateOutput()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    Result ModelProcess::CreateOutput()
    {
    g_output_ = aclmdlCreateDataset();
    size_t outputSize = aclmdlGetNumOutputs(g_modelDesc_);
    for (size_t i = 0; i < outputSize; ++i) {
    size_t modelOutputSize = aclmdlGetOutputSizeByIndex(g_modelDesc_, i);
    void *outputBuffer = nullptr;
    aclError ret = aclrtMalloc(&outputBuffer, modelOutputSize, ACL_MEM_MALLOC_NORMAL_ONLY);
    aclDataBuffer *outputData = aclCreateDataBuffer(outputBuffer, modelOutputSize);
    ret = aclmdlAddDatasetBuffer(g_output_, outputData);
    }
    return SUCCESS;
    }
  3. modelProcess.GetModelInputWH()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    Result ModelProcess::GetModelInputWH(int &width, int &height)
    {
    aclmdlIODims dims; // NHWC
    aclError ret = aclmdlGetInputDims(g_modelDesc_, 0, &dims);
    width = dims.dims[2];
    height = dims.dims[1];
    INFO_LOG("model input width %d, input height %d", width, height);
    return SUCCESS;
    }
  4. CopyDataHostToDevice

    1
    2
    3
    4
    void* Utils::CopyDataHostToDevice(void* deviceData, uint32_t dataSize)
    {
    return CopyDataToDevice(deviceData, dataSize, ACL_MEMCPY_HOST_TO_DEVICE);
    }
  5. dvppProcess

  6. modelProcess.CreateInput()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    Result ModelProcess::CreateInput(void *input1, size_t input1Size, void *input2, size_t input2Size)
    {
    vector<DataInfo> inputData = {{input1, input1Size}, {input2, input2Size}};

    uint32_t dataNum = aclmdlGetNumInputs(g_modelDesc_);
    if (dataNum == 0) {
    ERROR_LOG("Create input failed for no input data");
    return FAILED;
    }
    g_input_ = aclmdlCreateDataset();

    for (uint32_t i = 0; i < inputData.size(); i++) {
    size_t modelInputSize = aclmdlGetInputSizeByIndex(g_modelDesc_, i);
    aclDataBuffer *dataBuf = aclCreateDataBuffer(inputData[i].data, inputData[i].size);
    aclError ret = aclmdlAddDatasetBuffer(g_input_, dataBuf);
    }
    INFO_LOG("create model input success");
    return SUCCESS;
    }
  7. modelProcess.Execute()

    1
    2
    3
    4
    5
    Result ModelProcess::Execute()
    {
    aclError ret = aclmdlExecute(g_modelId_, g_input_, g_output_);
    return SUCCESS;
    }

DVPP

DVPP 在dvpp上执行的图像处理,主要流程为 初始化 -> 设置输入 -> 初始化输出参数 -> 执行 -> 获取输出

dvppProcess.InitResource()

创建通道描述信息 -> 创建通道 -> 缩放配置

1
2
3
4
5
6
7
8
9
10
11
Result DvppProcess::InitResource()
{
g_dvppChannelDesc_ = acldvppCreateChannelDesc();

aclError ret = acldvppCreateChannel(g_dvppChannelDesc_);

g_resizeConfig_ = acldvppCreateResizeConfig();

INFO_LOG("dvpp init resource success");
return SUCCESS;
}

dvppProcess.SetInput()

1
2
3
4
5
6
void DvppProcess::SetInput(void *inDevBuffer, uint32_t inDevBufferSize, const PicDesc &picDesc)
{
g_inDevBuffer_ = inDevBuffer;
g_inDevBufferSize_ = inDevBufferSize;
g_jpegDecodeOutputSize_ = picDesc.jpegDecodeSize;
}

dvppProcess.InitDvppOutputPara()

1
2
3
4
5
6
7
8
9
10
11
12
13
Result DvppProcess::InitDvppOutputPara(int modelInputWidth, int modelInputHeight)
{

g_modelInputWidth_ = modelInputWidth;
g_modelInputHeight_ = modelInputHeight;
// 根据约束对齐
g_resizeOutWidthStride_ = AlignSize(modelInputWidth, 16);
g_resizeOutHeightStride_ = AlignSize(modelInputHeight, 2);
// 输出buffer
g_resizeOutBufferSize_ = g_resizeOutWidthStride_ * g_resizeOutHeightStride_ * 3 / 2;
aclError ret = acldvppMalloc(&g_resizeOutBufferDev_, g_resizeOutBufferSize_);
return SUCCESS;
}

dvppProcess.Process()

DVPP执行流程:初始化图片解码描述信息 -> JPEG图片解码 -> 初始化缩放输入与输出描述信息 -> 图片缩放 -> 销毁资源

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
Result DvppProcess::Process()
{
Result ret = InitDecodeOutputDesc();

ret = ProcessDecode();

DestroyDecodeResource();

ret = InitResizeInputDesc();

ret = InitResizeOutputDesc();

ret = ProcessResize();

DestroyResizeResource();

INFO_LOG("Process dvpp success");
return SUCCESS;
}
  1. InitDecodeOutputDesc()

    1
    2
    3
    4
    5
    6
    7
    8
    9
    Result DvppProcess::InitDecodeOutputDesc()
    {
    aclError ret = acldvppMalloc(&g_decodeOutDevBuffer_, g_jpegDecodeOutputSize_);
    g_decodeOutputDesc_ = acldvppCreatePicDesc();
    (void)acldvppSetPicDescData(g_decodeOutputDesc_, g_decodeOutDevBuffer_);
    (void)acldvppSetPicDescFormat(g_decodeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420);
    (void)acldvppSetPicDescSize(g_decodeOutputDesc_, g_jpegDecodeOutputSize_);
    return SUCCESS;
    }
  2. ProcessDecode()
    JPEG异步解码

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    Result DvppProcess::ProcessDecode()
    {
    aclError ret = acldvppJpegDecodeAsync(g_dvppChannelDesc_, g_inDevBuffer_, g_inDevBufferSize_, g_decodeOutputDesc_, g_stream_);

    ret = aclrtSynchronizeStream(g_stream_);

    g_decodeOutputWidth_ = acldvppGetPicDescWidth(g_decodeOutputDesc_);
    g_decodeOutputHeight_ = acldvppGetPicDescHeight(g_decodeOutputDesc_);
    g_decodeOutputWidthStride_ = acldvppGetPicDescWidthStride(g_decodeOutputDesc_);
    return SUCCESS;
    }
  3. InitResizeInputDesc()
    创建输入图片描述并设置相应的值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    Result DvppProcess::InitResizeInputDesc()
    {
    uint32_t jpegOutWidthStride = g_decodeOutputWidthStride_;
    uint32_t jpegOutHeightStride = AlignSize(g_decodeOutputHeight_, 16);
    uint32_t jpegOutBufferSize = jpegOutWidthStride * jpegOutHeightStride * 3 / 2;
    g_resizeInputDesc_ = acldvppCreatePicDesc();
    if (g_resizeInputDesc_ == nullptr) {
    ERROR_LOG("InitResizeInputDesc failed");
    return FAILED;
    }

    (void)acldvppSetPicDescData(g_resizeInputDesc_, g_decodeOutDevBuffer_);
    (void)acldvppSetPicDescFormat(g_resizeInputDesc_, PIXEL_FORMAT_YVU_SEMIPLANAR_420);
    (void)acldvppSetPicDescWidth(g_resizeInputDesc_, g_decodeOutputWidth_);
    (void)acldvppSetPicDescHeight(g_resizeInputDesc_, g_decodeOutputHeight_);
    (void)acldvppSetPicDescWidthStride(g_resizeInputDesc_, jpegOutWidthStride);
    (void)acldvppSetPicDescHeightStride(g_resizeInputDesc_, jpegOutHeightStride);
    (void)acldvppSetPicDescSize(g_resizeInputDesc_, jpegOutBufferSize);
    return SUCCESS;
    }
  4. InitResizeOutputDesc()
    创建输出图片描述并设置相应的值

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    Result DvppProcess::InitResizeOutputDesc()
    {
    g_resizeOutputDesc_ = acldvppCreatePicDesc();
    if (g_resizeOutputDesc_ == nullptr) {
    ERROR_LOG("acldvppCreatePicDesc failed");
    return FAILED;
    }

    (void)acldvppSetPicDescData(g_resizeOutputDesc_, g_resizeOutBufferDev_);
    (void)acldvppSetPicDescFormat(g_resizeOutputDesc_, PIXEL_FORMAT_YUV_SEMIPLANAR_420);
    (void)acldvppSetPicDescWidth(g_resizeOutputDesc_, g_modelInputWidth_);
    (void)acldvppSetPicDescHeight(g_resizeOutputDesc_, g_modelInputHeight_);
    (void)acldvppSetPicDescWidthStride(g_resizeOutputDesc_, g_resizeOutWidthStride_);
    (void)acldvppSetPicDescHeightStride(g_resizeOutputDesc_, g_resizeOutHeightStride_);
    (void)acldvppSetPicDescSize(g_resizeOutputDesc_, g_resizeOutBufferSize_);
    return SUCCESS;
    }
  5. ProcessResize()
    异步缩放

    1
    2
    3
    4
    5
    6
    7
    8
    Result DvppProcess::ProcessResize()
    {
    aclError ret = acldvppSetResizeConfigInterpolation(g_resizeConfig_, 0);
    ret = acldvppVpcResizeAsync(g_dvppChannelDesc_, g_resizeInputDesc_,
    g_resizeOutputDesc_, g_resizeConfig_, g_stream_);
    ret = aclrtSynchronizeStream(g_stream_);
    return SUCCESS;
    }

dvppProcess.GetDvppOutput()

1
2
3
4
5
6
7
void DvppProcess::GetDvppOutput(void **outputBuffer, int &outputSize)
{
*outputBuffer = g_resizeOutBufferDev_;
outputSize = g_resizeOutBufferSize_;
g_resizeOutBufferDev_ = nullptr;
g_resizeOutBufferSize_ = 0;
}

AIPP

运行在AICore上的图像处理,主要有色域转换(YUV->BGR)和归一化

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
aipp_op{
aipp_mode:static
crop:false
rbuv_swap_switch:true
input_format : YUV420SP_U8
load_start_pos_h : 0
load_start_pos_w : 0
src_image_size_w : 416
src_image_size_h : 416

csc_switch : true

matrix_r0c0 : 298
matrix_r0c1 : 516
matrix_r0c2 : 0
matrix_r1c0 : 298
matrix_r1c1 : -100
matrix_r1c2 : -208
matrix_r2c0 : 298
matrix_r2c1 : 0
matrix_r2c2 : 409
input_bias_0 : 16
input_bias_1 : 128
input_bias_2 : 128
mean_chn_0 : 0
mean_chn_1 : 0
mean_chn_2 : 0
min_chn_0 : 0.0
min_chn_1 : 0.0
min_chn_2 : 0.0

var_reci_chn_0 :0.003921568627451
var_reci_chn_1 :0.003921568627451
var_reci_chn_2 :0.003921568627451
}

总结

本次复现了使用DVPP+AIPP的YOLOV3目标检测网络,对代码执行流程和AscendCL调用流程进行了分析,了解了使用MindStudio进行AscendCL开发的过程,相较于直接使用VSCode连接ECS开发ACL程序而言,MindStudio提供了可视化的模型转换界面,以及deployment、ssh等功能。