Skip to content
This repository was archived by the owner on Jan 24, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
91 commits
Select commit Hold shift + click to select a range
5e3e623
add inception parse and resize op in framework
cyj1986 Jul 18, 2018
e422b3b
Merge remote-tracking branch 'upstream/developing'
cyj1986 Jul 18, 2018
98c81f0
Merge remote-tracking branch 'upstream/developing'
cyj1986 Jul 23, 2018
506ce3d
fix power test bug
cyj1986 Jul 23, 2018
b24e604
Merge branch 'developing' into master
cyj1986 Aug 2, 2018
eefd0b1
Merge branch 'developing' into master
cyj1986 Aug 3, 2018
3a0da9b
Merge branch 'developing' into master
cyj1986 Aug 16, 2018
c115be7
Merge branch 'developing' into master
LittleMaer Aug 20, 2018
8421735
Merge branch 'developing' into master
LittleMaer Aug 22, 2018
50b9f44
Merge pull request #32 from PaddlePaddle/developing
Shixiaowei02 Aug 29, 2018
ce8b4d2
update
Shixiaowei02 Aug 29, 2018
2fd979b
Merge branch 'developing' into master
cyj1986 Aug 30, 2018
6279926
go back: config.yaml
Shixiaowei02 Aug 30, 2018
b866f7b
Merge branch 'developing' into master
cyj1986 Sep 3, 2018
ed86bb6
modify pbs init
Shixiaowei02 Sep 3, 2018
b6f6033
Merge branch 'developing' into parser
Shixiaowei02 Sep 3, 2018
a5c90d0
add resize in ops
cyj1986 Sep 3, 2018
e34584d
fix conflict
cyj1986 Sep 3, 2018
c23655b
fix conflict
cyj1986 Sep 3, 2018
6debad1
fix init py
Shixiaowei02 Sep 3, 2018
7188a73
Merge branch 'parser' of https://github.com/Shixiaowei02/Anakin into …
Shixiaowei02 Sep 3, 2018
42f440d
add fix
Shixiaowei02 Sep 3, 2018
b0bec79
Merge pull request #215 from cyj1986/master
cyj1986 Sep 3, 2018
faf237f
update caffe parser
Shixiaowei02 Sep 3, 2018
46c2da2
update caffe parser
Shixiaowei02 Sep 3, 2018
2a78b2d
Merge branch 'developing' into parser
Shixiaowei02 Sep 3, 2018
9993779
add load graph from buffer
cyj1986 Sep 3, 2018
13c2854
Merge remote-tracking branch 'upstream/developing'
cyj1986 Sep 3, 2018
bc96e45
fix caffe parser
Shixiaowei02 Sep 3, 2018
fbb0af0
fix style
Shixiaowei02 Sep 3, 2018
d503eaa
Merge branch 'developing' of https://github.com/PaddlePaddle/Anakin i…
qq332982511 Sep 4, 2018
9654a5f
add norm op
Shixiaowei02 Sep 4, 2018
47eff25
Merge pull request #411 from cyj1986/master
LittleMaer Sep 5, 2018
b0fa09a
Merge pull request #409 from Shixiaowei02/parser
LittleMaer Sep 5, 2018
be97324
add split_inception
Shixiaowei02 Sep 5, 2018
01eab8b
add parse_inceptrion
Shixiaowei02 Sep 5, 2018
4ac0b70
Merge branch 'developing' into parser
Shixiaowei02 Sep 5, 2018
d5f400c
update parser
Shixiaowei02 Sep 5, 2018
cff87b4
update parser
Shixiaowei02 Sep 5, 2018
a9d626e
update parser
Shixiaowei02 Sep 5, 2018
46261a2
update caffe parser
Shixiaowei02 Sep 5, 2018
4a13b9d
remove a lot of warning
luotao1 Sep 6, 2018
6470a5e
add converter design doc
qq332982511 Sep 6, 2018
3a29009
Merge branch 'developing' of https://github.com/PaddlePaddle/Anakin i…
qq332982511 Sep 6, 2018
3b0fda4
add init
qq332982511 Sep 6, 2018
c4ac0bf
rm abs path
qq332982511 Sep 6, 2018
53712b6
Merge pull request #418 from Shixiaowei02/parser
Sep 6, 2018
4f9e60f
Merge branch 'developing' into warning
luotao1 Sep 6, 2018
3c8554f
Merge pull request #429 from luotao1/warning
pangge Sep 7, 2018
cf1665a
fix resize bug
cyj1986 Sep 7, 2018
0b83eb1
Merge remote-tracking branch 'upstream/developing'
cyj1986 Sep 7, 2018
1ae3bd5
fix resize bug
cyj1986 Sep 7, 2018
36b7fb6
Update README_CPU.md
yma-admin Sep 10, 2018
425fc62
Merge branch 'developing' into developing_tf_parser
qq332982511 Sep 10, 2018
2cf66e4
Merge branch 'developing' into master
cyj1986 Sep 10, 2018
e7d7258
update names
Shixiaowei02 Sep 10, 2018
b5f3324
Merge branch 'developing' into parser
Shixiaowei02 Sep 10, 2018
8180eb7
update name
qq332982511 Sep 10, 2018
d30da5e
update name
qq332982511 Sep 10, 2018
037b6c8
Merge branch 'developing_tf_parser' of https://github.com/qq332982511…
qq332982511 Sep 10, 2018
56ac52b
Merge pull request #430 from qq332982511/developing_tf_parser
LittleMaer Sep 10, 2018
4a9053a
Merge pull request #443 from Shixiaowei02/parser
LittleMaer Sep 10, 2018
89f1cec
Merge pull request #40 from PaddlePaddle/developing
Shixiaowei02 Sep 10, 2018
decb72c
support diepsie
Shixiaowei02 Sep 10, 2018
3e057ce
add fluid tool
Shixiaowei02 Sep 11, 2018
bb6a066
Merge branch 'developing' into master
cyj1986 Sep 12, 2018
cf6b426
update conv + prelu fusion
Sep 12, 2018
36ddd93
Merge pull request #447 from Shixiaowei02/parser
Sep 13, 2018
67f08f1
fix conv act bug
Sep 13, 2018
cac5803
Merge branch 'developing' into master
cyj1986 Sep 13, 2018
46356c4
Merge pull request #432 from cyj1986/master
xyoungli Sep 14, 2018
689b210
Merge pull request #453 from throneclay/developing_face
Sep 14, 2018
db7411a
fix prior_box bug
Shixiaowei02 Sep 15, 2018
0426146
add slice eltwise
Shixiaowei02 Sep 17, 2018
6e4e6ad
Merge branch 'parser' of https://github.com/Shixiaowei02/Anakin into …
Shixiaowei02 Sep 17, 2018
04e3a79
update
Shixiaowei02 Sep 17, 2018
a379ab7
judge proto repeated field
Shixiaowei02 Sep 17, 2018
216bf87
fix style
Shixiaowei02 Sep 17, 2018
4df42ab
update normalize
Shixiaowei02 Sep 19, 2018
78dcce1
Merge branch 'developing' into parser
Shixiaowei02 Sep 19, 2018
1189a2e
update interp
Shixiaowei02 Sep 20, 2018
c57274f
update interp
Shixiaowei02 Sep 20, 2018
578cab6
fix readme
Shixiaowei02 Sep 21, 2018
bc6a39e
update readme
Shixiaowei02 Sep 21, 2018
fed5e69
support new fluid mobilenetssd
Shixiaowei02 Sep 27, 2018
8bc8646
support new fluid mobilenetssd
Shixiaowei02 Sep 27, 2018
41fb528
add the version check
Shixiaowei02 Sep 27, 2018
e6fcf0a
add the version check
Shixiaowei02 Sep 27, 2018
20e2a7d
add the version check
Shixiaowei02 Sep 27, 2018
ed08914
support new version fluid
Shixiaowei02 Sep 28, 2018
65178d4
Merge pull request #458 from Shixiaowei02/parser
Sep 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 0 additions & 68 deletions benchmark/README_CPU.md
Original file line number Diff line number Diff line change
Expand Up @@ -205,74 +205,6 @@ We tested them on single-CPU with different thread numbers.
4 | 18074 | 118696
6 | 26607 | 102044

2. **`Anakin`** VS **`PaddlePaddle/Fluid`**
We use private dataset and different QPS index in this benchmark.
### <span id = '1'>language model in E5-2650 v4 </span>

- Latency (`ms`) of one batch

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 42.7418 | 1.93589
2 | 42.7418 | 2.49537
6 | 42.7734 | 3.14332
10 | 43.0721 | 4.55329
12 | 42.8501 | 5.09893

- Throughput (`sentence/s`)

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 23 | 504
2 | 46 | 762
6 | 134 | 1393
10 | 218 | 1556
12 | 260 | 1541

### <span id = '2'>Chinese_ner model in E5-2650 v4 </span>

- Latency (`ms`) of one batch

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 0.380475 | 0.17034
4 | 0.380475 | 0.171143
6 | 0.380475 | 0.172688
10 | 0.380475 | 0.173269
12 | 0.380475 | 0.17668

- Throughput (`sentence/s`)

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 7844 | 5822
4 | 7844 | 11377
6 | 7844 | 29725
10 | 7844 | 41238
12 | 7844 | 42790

### <span id = '3'>text_classfication model in E5-2650 v4 </span>

- Latency (`ms`) of one batch

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 1.48578 | 1.10088
4 | 1.54025 | 1.11258
6 | 1.68529 | 1.1257
10 | 1.9817 | 1.13267
12 | 2.21864 | 1.1429

- Throughput (`sentence/s`)

ThreadNum | Fluid | Anakin
:---: | :---: | :---: |
1 | 673 | 901
4 | 1289 | 1665
6 | 3458 | 4449
10 | 4875 | 6183
12 | 5265 | 6188

## How to run those Benchmark models?

> 1. You can just run `sh benchmark_tensorflow.sh` and `sh benchmark_anakin.sh`
Expand Down
2 changes: 2 additions & 0 deletions cmake/compiler_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ anakin_add_compile_option(-Wshadow)
anakin_add_compile_option(-fpermissive)
anakin_add_compile_option(-Wsign-promo)
anakin_add_compile_option(-fdiagnostics-show-option)
anakin_add_compile_option(-Wno-missing-field-initializers)
anakin_add_compile_option(-Wno-extra)

if(ENABLE_NOISY_WARNINGS)
anakin_add_compile_option(-Wcast-align)
Expand Down
4 changes: 2 additions & 2 deletions docs/Manual/Converter_ch.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ Anakin 模型转换器输入支持 Caffe 和 Fluid 两种格式的预测模型

## 系统要求

- Python 2.7+
- Protobuf 3.1+(务必注意 Python 与系统环境 Protobuf 版本一致)
- Python 2.7
- Protobuf 3.1+(务必注意 Pip Protobuf 与系统环境 Protobuf 版本一致)
- PaddlePaddle 0.12.0+ (Fluid 模式下)
- flask, bson, matplotlib, scikit-image
- tkinter
Expand Down
56 changes: 56 additions & 0 deletions docs/Manual/DesignConverter_ch.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Parser的编写指南
下文称Anakin为AK,运算操作为OP,本文参考Tensorflow的Parser编写,参考代码目录为tools/external_converter_v2/parser/tensorflow
## Parser的功能和执行流程
功能是将其他深度学习框架(如CAFFE,FLUID,TENSORFLOW,ONNEX)的模型转换为AK的模型.对AK的作用是屏蔽不同框架间的差异,这种差异包括模型存储、OP的定义、图差异
因此Parser的执行流程是:
1. 将源框架的模型载入Parser
2. 将原框架的图解析为AK中的OP节点和OP节点的连接关系
3. 进行OP定义的转换和图优化
4. 将符合AK标准的图写入protobuf
## Parser的目录结构
Parser工具在tools/external_converter_v2/parser目录下
Parser的目录主要包含3部分:
1. Parser的运行配置文件包括 config.py, config.yaml, converter.py, 用户只用执行converter.py,Parser就会按照config.yaml中的声明去解析模型
2. Parser的公共定义,包括operations,pbs,proto三个目录. Parser的公共工具函数 graph*.py logger.py utils.py
3. 各个框架对应的Parser,其目录的命名方式为框架名,如caffe, tensorflow
## Parser的编写流程
### 1、声明你的Parser
1. 在config.yaml中填写你的Parser运行的必要信息,包括ProtoPath和SavePath等.OPTIONS/Framework改为你的Parser的类型,TARGET下填写对应的参数列表
2. 添加你的Parser目录,如tensorflow,导出你的Parser符号.注意,Parser的框架默认调用你的Parser类中的__call__方法来执行解析,这个方法需要返回填写完毕的GraphProtoIO对象
3. 在config.py中Configuration下__init__函数中增加对你的Parser的调用,将yaml中读取的配置信息传给你的Parser,此处调用你的Parser中的__init__方法
### 2、添加你的Parser主体
可以参考parser_tf.py
1. 你需要在Parser主体构造时获取模型路径,input,ouput名字等解析必须的信息,
2. 在__call__中返回填写好的GraphProtoIO对象,该对象为填写protobuf的辅助工具
3. 建议Parser的解析过程分成三部分,先将原框架的模型载入并转换为一种便于修改的中间的图形式;对中间图修改使得图满足AK的要求;将满足要求的中间图利用NodeProtoIO和GraphProtoIO这两个辅助类填入protobuf.具体细节可以参考parser_tf
### 3、读取原始模型,并将模型转换为中间类型
可以参考parse_tf_2_med.py
1. 这一步与原始框架结合紧密,你可能需要import原始框架的工具函数来完成模型的裁剪、固定、加载等操作
2. 大部分的框架都是使用tensor来连接OP的,但AK中是OP直接相连,这点需要注意
3. AK的shape默认是4维的,有的参数的shape不足4维,需要Parser补全
### 4、对中间类型的图进行优化
可以参考med_graph.py
1. 由于AK不支持普通OP多输出的情况,需要在多输出的OP后面补上Splite类型的OP节点
2. 对于Convlution后接Batchnorm这种可以合并又不会导致OP定义改变的情况,需要Parser在这一步做掉
3. AK规定所有的输入类型OP的名字必须是input_x这种命名方式,其中x为从0开始的数字
### 5、将中间类型的图以GraphProtoIO的方式保存
可以参考parse_med_2_ak.py 和 parser_tf.py
1. 你首先需要构造Node节点,Node节点的名字是OP的名字(如conv2d_1_a_0),Node节点中OP成员变量的名字是Node节点的类型(如Convlution)
2. Node节点需要按照输入的顺序用Node的add_in方法填写输入Node的名字,add_out方法按顺序填写输出Node的名字
3. 通过调用GraphProtoIO的add_node方法将构造好的Node的__call__方法的返回值作为参数,将Node节点加入AK的graph中
4. 调用GraphProtoIO的add_in_edge和add_out_edge完成AK图中OP间关系的构建. 如果Node中的in和out填写正确,你也可以通过调用GraphProtoIO的format_edge_from_nodes方法完成这个工作
5. AK的模型需要Parser给出输出Node的名字,使用GraphProtoIO的add_out方法填写输出Node的名字
### 6、检查模型解析的正确性
1. 默认的config.yaml配置会在解析结束后启动一个web服务器展示解析后的AK模型图,你需要对比原框架的模型图进行验证.这里最容易出现的错误是边关系的错误,表现为图非常乱,你需要逐条边地检查错误.第二个容易出错的地方是参数漏填,需要你检查OP中的属性
2. 将解析后的模型放入AK中执行,使用相同的输入,原框架与AK有相同的输出.若果输出不一致可以开启AK的DEBUG模式,在net.cpp中将没层的输出打印.如果AK在解析阶段陷入死循环,大概率是边的关系出错.
## 如何添加新OP
1. 需要在AK代码中加入该OP的实现,包括对应设备Saber的OP,Saber单测和Framework中的OP
2. 根据Framework的OP在ops.py中添加Parser公共的OP定义
3. 从原框架的模型中解析出该OP的节点,并在AK的graph中填入该OP节点
## AK模型与其他框架模型的不同之处
+ AK模型与CAFFE的模型相似,因此与其他模型有很多不同的地方,需要Parser在解析过程中处理掉.
+ 最大的不同是与FLUID或TENSORFLOW这种OP粒度很细的框架,AK的模型中OP的粒度很粗,这是为了节省访存开销.这会导致解析这些框架的模型时存在大量的合并操作.
+ 其次是OP的行为不同,如TENSORFLOW中Pooling默认都是exclusive的,而AK中是inclusive的.TENSORFLOW的Padding如果是奇数pad则在右方和下方多pad,AK是在左方和上方多Pad
+ AK默认的布局是NCHW,如果其他框架的OP是其他形式的,需要在Parser中做weights的布局转换,并处理reshape的问题.
+ AK中有的weights是需要预先做布局转换的(如GRU,LSTM).AK中也支持同一OP的不同算法,如(GRU,Pooling).

2 changes: 1 addition & 1 deletion docs/Manual/INSTALL_ch.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

#### Anakin - CPU ###

在编译 CPU 版本前,我们建议您升级 GCC-G++ 至 5.4.0 以上,链接的 libm.so 库版本高于 2.17,以发挥更佳性能。
在编译 CPU 版本前,我们建议您升级 GCC-G++ 至 5.4.0,链接的 libm.so 库版本为 2.17 ~ 2.23,以发挥更佳性能。

#### Anakin - AMDGPU ###

Expand Down
4 changes: 3 additions & 1 deletion framework/core/net/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,9 +393,11 @@ void Net<Ttype, Dtype, Ptype, RunType>::prediction() {
#define RECORD_INNER
#if defined(RECORD_INNER) && defined(USE_X86_PLACE)
record_tensor_to_file(*out,("record_"+executer.name).c_str());
if(executer.name=="")
#endif
LOG(INFO) <<executer.name <<" d_tensor_out_p :" <<out->data();
#ifdef USE_CUDA
record_tensor_to_file(*out,("record_"+executer.name).c_str());
#endif
#ifdef USE_X86_PLACE
// for (int i = 0; i < 10; ++i) {
// std::cout << out->data()[i]<<" ";
Expand Down
15 changes: 15 additions & 0 deletions framework/graph/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ Status Graph<Ttype, Dtype, Ptype>::load(std::string model_path) EXCLUSIVE_LOCKS_
return ret;
}

template<typename Ttype, DataType Dtype, Precision Ptype>
Status Graph<Ttype, Dtype, Ptype>::load(const char* buffer, size_t len) EXCLUSIVE_LOCKS_REQUIRED(_mut) {
std::unique_lock<std::mutex> lock(this->_mut);
Status ret = Status::OK();
if(len != _len || buffer != _buffer) {
this->Clean();
ret = parser::load<Ttype, Dtype>(this, buffer, len);
_buffer = buffer;
_len = len;
}

return ret;
}


template<typename Ttype, DataType Dtype, Precision Ptype>
Status Graph<Ttype, Dtype, Ptype>::load(const char* model_path) {
return parser::load<Ttype, Dtype>(this, model_path);
Expand Down
5 changes: 5 additions & 0 deletions framework/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class Graph : public GraphBase<std::string,
Status load(std::istream* instream);
Status load(std::string model_path);
Status load(const char* model_path);
Status load(const char* buffer, size_t len);

Status save(std::string model_path);
Status save(const char* model_path);
/// Get nodes in execution oroder.
Expand Down Expand Up @@ -152,6 +154,9 @@ class Graph : public GraphBase<std::string,
std::string _model_path{"None"} GUARDED_BY(this->_mut);
/// this make the graph optimized.
bool _has_graph_optimized{false}; GUARDED_BY(this->_mut);

const char* _buffer{NULL} GUARDED_BY(this->_mut);
size_t _len{0} GUARDED_BY(this->_mut);
std::mutex _mut;
};

Expand Down
Loading