0%

MXNet 的 Bind 和 SimpleBind

之前已经有两篇分析了 MXNet 的 Bind 和 SimpleBind 的源码, 这篇, 主要是记录一下 CPP package 中是如何调用 Bind 和 SimpleBind 的. 搞清楚这两部分有两个好处, 一是为自己写代码调用 Bind 提供参考, 二是了解 MXNet 中 Symbol/Graph 和具体的 NDArray 是如何协同的.

Bind

使用 mlp.cpp 的例子来说明. 在 bind 的时候有以下几个问题:

  • 构建完成 symbol/graph 之后, 如何把具体的输入 ndarray feed 到具体的 Node 中去
  • 从 symbol 创建了 graph 之后有两个顺数问题
    • 从大的方面说, 各个 layer 之间的先后顺序
    • 每个 layer 上的 node 之间的顺序
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
/*!
* Copyright (c) 2015 by Contributors
*/

#include <iostream>
#include <vector>
#include <string>
#include "mxnet-cpp/MxNetCpp.h"
// Allow IDE to parse the types
#include "../include/mxnet-cpp/op.h"

using namespace std;
using namespace mxnet::cpp;

/*
* In this example,
* we make by hand some data in 10 classes with some pattern
* and try to use MLP to recognize the pattern.
*/

void OutputAccuracy(mx_float* pred, mx_float* target) {
int right = 0;
for (int i = 0; i < 128; ++i) {
float mx_p = pred[i * 10 + 0];
float p_y = 0;
for (int j = 0; j < 10; ++j) {
if (pred[i * 10 + j] > mx_p) {
mx_p = pred[i * 10 + j];
p_y = j;
}
}
if (p_y == target[i]) right++;
}
cout << "Accuracy: " << right / 128.0 << endl;
}

void MLP() {
auto sym_x = Symbol::Variable("X");
auto sym_label = Symbol::Variable("label");

const int nLayers = 2;
vector<int> layerSizes({512, 10});
vector<Symbol> weights(nLayers);
vector<Symbol> biases(nLayers);
vector<Symbol> outputs(nLayers);

for (int i = 0; i < nLayers; i++) {
string istr = to_string(i);
weights[i] = Symbol::Variable(string("w") + istr);
biases[i] = Symbol::Variable(string("b") + istr);
Symbol fc = FullyConnected(string("fc") + istr,
i == 0? sym_x : outputs[i-1],
weights[i], biases[i], layerSizes[i]);
outputs[i] = LeakyReLU(string("act") + istr, fc, LeakyReLUActType::kLeaky);
}
auto sym_out = SoftmaxOutput("softmax", outputs[nLayers - 1], sym_label);

Context ctx_dev(DeviceType::kCPU, 0);

NDArray array_x(Shape(128, 28), ctx_dev, false);
NDArray array_y(Shape(128), ctx_dev, false);

mx_float* aptr_x = new mx_float[128 * 28];
mx_float* aptr_y = new mx_float[128];

// we make the data by hand, in 10 classes, with some pattern
for (int i = 0; i < 128; i++) {
for (int j = 0; j < 28; j++) {
aptr_x[i * 28 + j] = i % 10 * 1.0f;
}
aptr_y[i] = i % 10;
}
array_x.SyncCopyFromCPU(aptr_x, 128 * 28);
array_x.WaitToRead();
array_y.SyncCopyFromCPU(aptr_y, 128);
array_y.WaitToRead();

// init the parameters
NDArray array_w_1(Shape(512, 28), ctx_dev, false);
NDArray array_b_1(Shape(512), ctx_dev, false);
NDArray array_w_2(Shape(10, 512), ctx_dev, false);
NDArray array_b_2(Shape(10), ctx_dev, false);

// the parameters should be initialized in some kind of distribution,
// so it learns fast
// but here just give a const value by hand
array_w_1 = 0.5f;
array_b_1 = 0.0f;
array_w_2 = 0.5f;
array_b_2 = 0.0f;

// the grads
NDArray array_w_1_g(Shape(512, 28), ctx_dev, false);
NDArray array_b_1_g(Shape(512), ctx_dev, false);
NDArray array_w_2_g(Shape(10, 512), ctx_dev, false);
NDArray array_b_2_g(Shape(10), ctx_dev, false);

// Bind the symolic network with the ndarray
// all the input args
// 输入, 包括样本数据和参数, 这里的push_back 顺序非常重要, 这里的顺序是和
// index_graph 的顺序对应的. 这样, 可以方便把具体的ndarray feed 到具体的 node 上.
std::vector<NDArray> in_args;
in_args.push_back(array_x);
in_args.push_back(array_w_1);
in_args.push_back(array_b_1);
in_args.push_back(array_w_2);
in_args.push_back(array_b_2);
in_args.push_back(array_y);
// all the grads
// arg_grad_store 和 in_args 一样需要注意顺序问题
std::vector<NDArray> arg_grad_store;
arg_grad_store.push_back(NDArray()); // we don't need the grad of the input
arg_grad_store.push_back(array_w_1_g);
arg_grad_store.push_back(array_b_1_g);
arg_grad_store.push_back(array_w_2_g);
arg_grad_store.push_back(array_b_2_g);
arg_grad_store.push_back(
NDArray()); // neither do we need the grad of the loss
// how to handle the grad
// 第三个部分, 每个 grad 怎么保存. 顺序是和 arg_grad 相同的
std::vector<OpReqType> grad_req_type;
grad_req_type.push_back(kNullOp);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kWriteTo);
grad_req_type.push_back(kNullOp);
std::vector<NDArray> aux_states;
// 以上三个输入的地方隐含了一个条件, 即已经知道 index_graph 的顺序. 以上的顺序也就是
// graph 在 post-DFS 排序之后的结果
// 在 SimpleBind 中, 这个顺序是在 SimpleBind 中完成, 因此, 不需要我们自己手动排序.
cout << "make the Executor" << endl;
Executor* exe = new Executor(sym_out, ctx_dev, in_args, arg_grad_store,
grad_req_type, aux_states);

cout << "Training" << endl;
int max_iters = 20000;
mx_float learning_rate = 0.0001;
for (int iter = 0; iter < max_iters; ++iter) {
exe->Forward(true);

if (iter % 100 == 0) {
cout << "epoch " << iter << endl;
std::vector<NDArray>& out = exe->outputs;
float* cptr = new float[128 * 10];
out[0].SyncCopyToCPU(cptr, 128 * 10);
NDArray::WaitAll();
OutputAccuracy(cptr, aptr_y);
delete[] cptr;
}

// update the parameters
exe->Backward();
for (int i = 1; i < 5; ++i) {
in_args[i] -= arg_grad_store[i] * learning_rate;
}
NDArray::WaitAll();
}

delete exe;
delete[] aptr_x;
delete[] aptr_y;
}

int main(int argc, char** argv) {
MLP();
MXNotifyShutdown();
return 0;
}

SimpleBind

SimpleBind 并不是 mxnet 原生支持的, mxnet 只支持 Bind. SimpleBind 是各个语言的 wrapper 实现的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
int main(int argc, char** argv) {
const int image_size = 28;
const vector<int> layers{128, 64, 10};
const int batch_size = 100;
const int max_epoch = 10;
const float learning_rate = 0.1;
const float weight_decay = 1e-2;

auto train_iter = MXDataIter("MNISTIter")
.SetParam("image", "./mnist_data/train-images-idx3-ubyte")
.SetParam("label", "./mnist_data/train-labels-idx1-ubyte")
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.CreateDataIter();
auto val_iter = MXDataIter("MNISTIter")
.SetParam("image", "./mnist_data/t10k-images-idx3-ubyte")
.SetParam("label", "./mnist_data/t10k-labels-idx1-ubyte")
.SetParam("batch_size", batch_size)
.SetParam("flat", 1)
.CreateDataIter();

auto net = mlp(layers);

Context ctx = Context::cpu(); // Use CPU for training

std::map<string, NDArray> args;
args["X"] = NDArray(Shape(batch_size, image_size*image_size), ctx);
args["label"] = NDArray(Shape(batch_size), ctx);
// Let MXNet infer shapes other parameters such as weights
// 根据输入 X 和 label 推断其他 args 如 weights 的shape, 并且初始化分配 NDArray
//
net.InferArgsMap(ctx, &args, args);

// Initialize all parameters with uniform distribution U(-0.01, 0.01)
auto initializer = Uniform(0.01);
for (auto& arg : args) {
// arg.first is parameter name, and arg.second is the value
initializer(arg.first, &arg.second);
}

// Create sgd optimizer
Optimizer* opt = OptimizerRegistry::Find("sgd");
opt->SetParam("rescale_grad", 1.0/batch_size);

// Start training
for (int iter = 0; iter < max_epoch; ++iter) {
int samples = 0;
train_iter.Reset();

auto tic = chrono::system_clock::now();
while (train_iter.Next()) {
samples += batch_size;
auto data_batch = train_iter.GetDataBatch();
// Set data and label
args["X"] = data_batch.data;
args["label"] = data_batch.label;

// Create executor by binding parameters to the model
auto *exec = net.SimpleBind(ctx, args);
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
inline void Symbol::InferArgsMap(
const Context &context, std::map<std::string, NDArray> *args_map,
const std::map<std::string, NDArray> &known_args) const {
// arg_name_list 是 graph 中所有 args 的 name, 保持 symbol 的name 和 args 的 map
// 的 name 相同, 然后可以匹配起来
const auto arg_name_list = ListArguments();
std::vector<std::vector<mx_uint> > in_shapes, aux_shapes, out_shapes;
std::map<std::string, std::vector<mx_uint> > arg_shapes;

for (const auto &arg_name : arg_name_list) {
auto iter = known_args.find(arg_name);
if (iter != known_args.end()) {
arg_shapes[arg_name] = iter->second.GetShape();
}
}
// 这里是关键方法, 根据已有的 X 和 label 信息推断出其他的 args 的 shape
InferShape(arg_shapes, &in_shapes, &aux_shapes, &out_shapes);

for (size_t i = 0; i < in_shapes.size(); ++i) {
const auto &shape = in_shapes[i];
const auto &arg_name = arg_name_list[i];
auto iter_arg = known_args.find(arg_name);
if (iter_arg != known_args.end()) { //已有的分配和 NDArray 的, 直接赋值. 这里是
// X 和 label
(*args_map)[arg_name] = iter_arg->second;
} else { // 没有的, 根据 shape 创建 NDArray, 并且初始化
(*args_map)[arg_name] = NDArray(shape, context, false);
NDArray::SampleGaussian(0, 1, &(*args_map)[arg_name]);
}
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
// 这里是 cpp-package 实现的 simplebind
inline Executor *Symbol::SimpleBind(
const Context &context, const std::map<std::string, NDArray> &args_map,
const std::map<std::string, NDArray> &arg_grad_store,
const std::map<std::string, OpReqType> &grad_req_type,
const std::map<std::string, NDArray> &aux_map) {
std::vector<NDArray> arg_arrays;
std::vector<NDArray> grad_arrays;
std::vector<OpReqType> grad_reqs;
std::vector<NDArray> aux_arrays;
// InferExecutorArrays 是最重要的一个操作之一, 它确定了各个输入 NDArray 的顺序
// InferExecutorArrays 仍然是 cpp package 定义的
InferExecutorArrays(context, &arg_arrays, &grad_arrays, &grad_reqs,
&aux_arrays, args_map, arg_grad_store, grad_req_type,
aux_map);

return new Executor(*this, context, arg_arrays, grad_arrays, grad_reqs,
aux_arrays);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
inline void Symbol::InferExecutorArrays(
const Context &context, std::vector<NDArray> *arg_arrays,
std::vector<NDArray> *grad_arrays, std::vector<OpReqType> *grad_reqs,
std::vector<NDArray> *aux_arrays,
const std::map<std::string, NDArray> &args_map, //在上面的例子中, args_map 中
//有 "X" 和 "label" 以及两个key, 也就是说其他的 key(即 Symbol name)以及对应的 DNArray
//需要程序新建
const std::map<std::string, NDArray> &arg_grad_store,
const std::map<std::string, OpReqType> &grad_req_type,
const std::map<std::string, NDArray> &aux_map) const {
// arg_name_list 是排序之后的 arg_name_list, 所以, 顺序是固定的
const auto arg_name_list = ListArguments();
std::vector<std::vector<mx_uint> > in_shapes, aux_shapes, out_shapes;
std::map<std::string, std::vector<mx_uint> > arg_shapes;

for (const auto &arg_name : arg_name_list) {
auto iter = args_map.find(arg_name);
if (iter != args_map.end()) {
arg_shapes[arg_name] = iter->second.GetShape();
}
}
// 在这个SimpleBind例子中, 下面的 for 循环中的第一个if-else是没用的, 因为之前有 Symbol::InferArgsMap
// 完成了这个工作
InferShape(arg_shapes, &in_shapes, &aux_shapes, &out_shapes);

for (size_t i = 0; i < in_shapes.size(); ++i) {
const auto &shape = in_shapes[i];
const auto &arg_name = arg_name_list[i];
auto iter_arg = args_map.find(arg_name);
if (iter_arg != args_map.end()) {
arg_arrays->push_back(iter_arg->second);
} else {
arg_arrays->push_back(NDArray(shape, context, false));
NDArray::SampleGaussian(0, 1, &arg_arrays->back());
}
auto iter_grad = arg_grad_store.find(arg_name);
if (iter_grad != arg_grad_store.end()) {
grad_arrays->push_back(iter_grad->second);
} else {
grad_arrays->push_back(NDArray(shape, context, false));
}
auto iter_req = grad_req_type.find(arg_name);
if (iter_req != grad_req_type.end()) {
grad_reqs->push_back(iter_req->second);
} else if (arg_name.rfind("data") == arg_name.length() - 4
|| arg_name.rfind("label") == arg_name.length() - 5) {
grad_reqs->push_back(OpReqType::kNullOp);
} else {
grad_reqs->push_back(OpReqType::kWriteTo);
}
}

const auto aux_name_list = ListAuxiliaryStates();
for (size_t i = 0; i < aux_shapes.size(); ++i) {
const auto &shape = aux_shapes[i];
const auto &aux_name = aux_name_list[i];
auto iter_aux = aux_map.find(aux_name);
if (iter_aux != aux_map.end()) {
aux_arrays->push_back(iter_aux->second);
} else {
aux_arrays->push_back(NDArray(shape, context, false));
NDArray::SampleGaussian(0, 1, &aux_arrays->back());
}
}
}
inline void Symbol::InferArgsMap(
const Context &context, std::map<std::string, NDArray> *args_map,
const std::map<std::string, NDArray> &known_args) const {

const auto arg_name_list = ListArguments();
std::vector<std::vector<mx_uint> > in_shapes, aux_shapes, out_shapes;
std::map<std::string, std::vector<mx_uint> > arg_shapes;

for (const auto &arg_name : arg_name_list) {
auto iter = known_args.find(arg_name);
if (iter != known_args.end()) {
arg_shapes[arg_name] = iter->second.GetShape();
}
}

InferShape(arg_shapes, &in_shapes, &aux_shapes, &out_shapes);

for (size_t i = 0; i < in_shapes.size(); ++i) {
const auto &shape = in_shapes[i];
const auto &arg_name = arg_name_list[i];
auto iter_arg = known_args.find(arg_name);
if (iter_arg != known_args.end()) {
(*args_map)[arg_name] = iter_arg->second;
} else {
(*args_map)[arg_name] = NDArray(shape, context, false);
NDArray::SampleGaussian(0, 1, &(*args_map)[arg_name]);
}
}
}