使用 MXNet C Predict API 实现基于 SSD 的目标检测

本文记录使用 MXNet 的 C Predict API 实现目标检测的过程. 主要是参考 classification 的例子实现了 detection.

官网提供的 model 是 train phase 的 model, 因此, 输入有 data 和 label 两个 Input Node, 在预测的时候, 不需要 label Input Node, 在下载到官网的 model 之后需要使用官网的 deploy.py 把 model 转换一下, 方法如下:

1
2
3
# 首先下载训练好的 model 文件, 例如 https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/resnet50_ssd_512_voc0712_trainval.zip 放到 model 中.
# in /path/to/mxnet/example/ssd
python deploy.py --network=resnet50 --data-shape 512


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#include <stdio.h>
// Path for c_predict_api
#include <mxnet/c_predict_api.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
const mx_float DEFAULT_MEAN = 117.0;
// Read file to buffer
class BufferFile {
public :
std::string file_path_;
int length_;
char* buffer_;
explicit BufferFile(std::string file_path)
:file_path_(file_path) {
std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
if (!ifs) {
std::cerr << "Can't open the file. Please check " << file_path << ". \n";
length_ = 0;
buffer_ = NULL;
return;
}
ifs.seekg(0, std::ios::end);
length_ = ifs.tellg();
ifs.seekg(0, std::ios::beg);
std::cout << file_path.c_str() << " ... "<< length_ << " bytes\n";
buffer_ = new char[sizeof(char) * length_];
ifs.read(buffer_, length_);
ifs.close();
}
int GetLength() {
return length_;
}
char* GetBuffer() {
return buffer_;
}
~BufferFile() {
if (buffer_) {
delete[] buffer_;
buffer_ = NULL;
}
}
};
void GetImageFile(const std::string image_file,
mx_float* image_data, const int channels,
const cv::Size resize_size, const mx_float* mean_data = nullptr) {
// Read all kinds of file into a BGR color 3 channels image
cv::Mat im_ori = cv::imread(image_file, cv::IMREAD_COLOR);
if (im_ori.empty()) {
std::cerr << "Can't open the image. Please check " << image_file << ". \n";
assert(false);
}
cv::Mat im;
resize(im_ori, im, resize_size);
float mean_b, mean_g, mean_r;
mean_b = 104.0;
mean_g = 117.0;
mean_r = 123.0;
for(int i=0; i < im.cols; ++i){
uchar* data = im.ptr<uchar>(i);
for(int j=0; j< im.rows; ++j){
image_data[i*im.cols+j+2] = static_cast<mx_float>(*data++) - mean_b;
image_data[i*im.cols+j+1] = static_cast<mx_float>(*data++) - mean_g;
image_data[i*im.cols+j] = static_cast<mx_float>(*data++) - mean_r;
}
}
}
int main(int argc, char* argv[]) {
if (argc < 4) {
std::cout << "Usage: ./detect symbol_path params_path image_path" << std::endl;
return 0;
}
std::string test_file;
test_file = std::string(argv[3]);
// Models path for your model, you have to modify it
std::string json_file = std::string(argv[1]);
std::string param_file = std::string(argv[2]);
BufferFile json_data(json_file);
BufferFile param_data(param_file);
// Parameters
int dev_type = 1; // 1: cpu, 2: gpu
int dev_id = 0; // arbitrary.
mx_uint num_input_nodes = 1; // 1 for feedforward
const char* input_key[1] = {"data"};
const char** input_keys = input_key;
// Image size and channels
int width = 512;
int height = 512;
int channels = 3;
const mx_uint input_shape_indptr[2] = { 0, 4 };
const mx_uint input_shape_data[4] = { 1, static_cast<mx_uint>(channels), static_cast<mx_uint>(height), static_cast<mx_uint>(width)};
PredictorHandle pred_hnd = 0;
if (json_data.GetLength() == 0 || param_data.GetLength() == 0) {
return -1;
}
// Create Predictor
MXPredCreate((const char*)json_data.GetBuffer(),
(const char*)param_data.GetBuffer(),
static_cast<size_t>(param_data.GetLength()),
dev_type, dev_id, num_input_nodes, input_keys, input_shape_indptr, input_shape_data, &pred_hnd);
assert(pred_hnd);
int image_size = width * height * channels;
// // Read Image Data
std::vector<mx_float> image_data = std::vector<mx_float>(image_size);
GetImageFile(test_file, image_data.data(), channels, cv::Size(width, height));
// // Set Input Image
int64 start = cv::getTickCount();
MXPredSetInput(pred_hnd, "data", image_data.data(), image_size);
// // Do Predict Forward
for(int i=0; i<20; ++i){
MXPredForward(pred_hnd);
}
mx_uint output_index = 0;
mx_uint *shape = 0;
mx_uint shape_len;
// Get Output Result
MXPredGetOutputShape(pred_hnd, output_index, &shape, &shape_len);
size_t size = 1;
for (mx_uint i = 0; i < shape_len; ++i) size *= shape[i];
std::vector<float> data(size);
MXPredGetOutput(pred_hnd, 0, data.data(), size);
int64 end = cv::getTickCount();
double secs = (end-start)/cv::getTickFrequency();
std::cout<<"time: " <<secs<<std::endl;
assert(data.size() % 6 == 0);
cv::Mat mat = cv::imread(test_file, 1);
int orig_cols = mat.cols;
int orig_rows = mat.rows;
cv::resize(mat, mat, cv::Size(width, height));
for(int i=0; i<data.size(); i+=6){
if(data[i]<0) continue;
int id = static_cast<int>(data[i]);
float score = data[i+1];
if(score < 0.5) continue;
std::cout<<score<<std::endl;
int xmin = static_cast<int>((data[i+2])*width);
int ymin = static_cast<int>((data[i+3])*height);
int xmax = static_cast<int>((data[i+4])*width);
int ymax = static_cast<int>((data[i+5])*height);
cv::rectangle(mat, cv::Point(xmin, ymin), cv::Point(xmax, ymax), cv::Scalar(255, 0, 0), 2);
}
cv::resize(mat, mat, cv::Size(orig_cols, orig_rows));
cv::imshow(test_file, mat);
cv::waitKey(0);
return 0;
}
}