Skip to content

Commit dbdf054

Browse files
Your Namedcslin
authored andcommitted
tc comprehension integration Ref. SINGA-482
1 parent 806dbe7 commit dbdf054

File tree

12 files changed

+707
-30
lines changed

12 files changed

+707
-30
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ OPTION(ENABLE_DIST "Enable distributed training" OFF)
6767
OPTION(DISABLE_WARNINGS "Disable warnings under windows" ON)
6868
OPTION(USE_MODULES "Compile dependent libs as submodules together with singa" OFF)
6969
OPTION(USE_MKLDNN "Use mkl-dnn libs" OFF)
70+
OPTION(USE_TC "Use tensor comprehension" OFF)
7071

7172

7273
# TODO: remove all USE_CBLAS in codes

cmake/Dependencies.cmake

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ ENDIF()
125125

126126
IF(USE_PYTHON)
127127
IF(USE_PYTHON3)
128-
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
128+
set(Python_ADDITIONAL_VERSIONS 3.6 3.5 3.4)
129129
FIND_PACKAGE(PythonInterp 3 REQUIRED)
130130
FIND_PACKAGE(PythonLibs 3 REQUIRED)
131131
FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
132-
ELSE()
132+
ELSE()
133133
FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
134134
FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
135135
FIND_PACKAGE(SWIG 3.0.8 REQUIRED)
@@ -142,10 +142,53 @@ IF(USE_JAVA)
142142
FIND_PACKAGE(SWIG 3.0 REQUIRED)
143143
ENDIF()
144144

145+
145146
IF(USE_MKLDNN)
146147
FIND_PATH(MKLDNN_INCLUDE_DIR NAME "mkldnn.hpp" PATHS "$ENV{CMAKE_INCLUDE_PATH}")
147148
FIND_LIBRARY(MKLDNN_LIBRARIES NAME "mkldnn" PATHS "$ENV{CMAKE_LIBRARY_PATH}")
148149
MESSAGE(STATUS "Found MKLDNN at ${MKLDNN_INCLUDE_DIR}")
149150
INCLUDE_DIRECTORIES(${MKLDNN_INCLUDE_DIR})
150151
LIST(APPEND SINGA_LINKER_LIBS ${MKLDNN_LIBRARIES})
151152
ENDIF()
153+
154+
155+
IF(USE_TC)
156+
### Tensor comprehensions
157+
INCLUDE_DIRECTORIES(/root/TensorComprehensions)
158+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/tc/version)
159+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build)
160+
# polyhedral model required
161+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/isl_interface/include)
162+
# dlpack
163+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/dlpack/include)
164+
# islpp
165+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/third-party/islpp/include)
166+
# gflags
167+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/gflags/include)
168+
# glog
169+
INCLUDE_DIRECTORIES(/root/TensorComprehensions/build/third-party/googlelibraries/glog)
170+
# Halide
171+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include/Halide)
172+
# llvm
173+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/include)
174+
# torch ATen header
175+
INCLUDE_DIRECTORIES(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib/include)
176+
177+
# find Halide lib
178+
set(HALIDE_PREFIX "/root/conda/envs/tc_build")
179+
find_library(HALIDE_LIBRARIES REQUIRED NAMES Halide PATHS ${HALIDE_PREFIX} PATH_SUFFIXES lib lib64 NO_DEFAULT_PATH)
180+
message(STATUS "Found Halide.so file: ${HALIDE_LIBRARIES}")
181+
182+
# find tc lib
183+
link_directories(/root/TensorComprehensions/build/tc/aten)
184+
link_directories(/root/TensorComprehensions/build/tc/lang)
185+
link_directories(/root/TensorComprehensions/build/tc/core)
186+
link_directories(/root/TensorComprehensions/build/tc/autotuner)
187+
link_directories(/root/TensorComprehensions/build/tc/proto)
188+
189+
# torch(aten)
190+
link_directories(/root/conda/envs/tc_build/lib/python3.6/site-packages/torch/lib)
191+
192+
LIST(APPEND SINGA_LINKER_LIBS ${HALIDE_LIBRARIES} tc_aten tc_lang tc_core_cpu tc_cuda tc_core_cuda_no_sdk tc_core tc_autotuner tc_proto ATen)
193+
### Tensor comprehensions
194+
ENDIF()

cmake/Templates/singa_config.h.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,4 @@
5353
// #cmakedefine CUDNN_VERSION @CUDNN_VERSION@
5454

5555
#cmakedefine USE_MKLDNN
56+
#cmakedefine USE_TC

include/singa/core/tensor.h

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@
2323
#include <tuple>
2424
#include <memory>
2525

26+
#ifdef USE_TC
27+
#include <dlpack/dlpack.h>
28+
#include <tc/core/tensor.h>
29+
#include <tc/utils/compiler_options.h>
30+
#include <tc/core/compiler.h>
31+
#include <tc/core/utils/time.h>
32+
#include <tc/core/cuda/cuda_backend.h>
33+
#include <tc/core/cuda/cuda_tc_executor.h>
34+
#include <tc/core/cpu/cpu_backend.h>
35+
#include <tc/core/cpu/cpu_tc_executor.h>
36+
#include <tc/core/check.h>
37+
#include <tc/core/tc_executor.h>
38+
#endif // USE_TC
39+
2640
#include "singa/core/common.h"
2741
#include "singa/core/device.h"
2842
#include "singa/proto/core.pb.h"
@@ -603,6 +617,85 @@ Tensor ConcatRows(const vector<Tensor> &in);
603617
Tensor ConcatenateColumns(const vector<Tensor> &in);
604618
/// Alias name for function ConcatenateColumns
605619
Tensor ConcatColumns(const vector<Tensor> &in);
620+
621+
622+
623+
624+
#ifdef USE_TC
625+
/// tc integration start
626+
DLManagedTensor *toDLPack(const Tensor &src);
627+
628+
inline std::vector<tc::DLTensorUPtr>
629+
makeDLTensors(const std::vector<Tensor> &tensors);
630+
631+
template <typename Backend>
632+
std::unique_ptr<typename Backend::ExecutorType>
633+
compileTC(const std::string &tc, const std::string &entryPoint,
634+
const std::vector<Tensor> &inputs,
635+
const typename Backend::MappingOptionsType &options,
636+
const tc::CompilerOptions &compilerOptions = tc::CompilerOptions());
637+
638+
std::vector<tc::DLTensorUPtr>
639+
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
640+
const std::vector<Tensor> &inputs);
641+
642+
std::vector<Tensor> prepareOutputs(const std::string &tc,
643+
const std::string &entryPoint,
644+
const std::vector<Tensor> &inputs);
645+
646+
template <typename Executor>
647+
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
648+
std::vector<Tensor> &outputs);
649+
650+
// makeDLConstTensors implementation
651+
inline std::vector<tc::DLConstTensorUPtr>
652+
makeDLConstTensors(const std::vector<Tensor> &tensors) {
653+
std::vector<tc::DLConstTensorUPtr> dlTensors;
654+
for (auto tensor : tensors) {
655+
auto dlMTensor = toDLPack(tensor);
656+
dlTensors.push_back(tc::makeDLConstTensor(&(dlMTensor->dl_tensor)));
657+
dlMTensor->deleter(dlMTensor);
658+
}
659+
return dlTensors;
660+
}
661+
662+
// makeDLTensors implementation
663+
inline std::vector<tc::DLTensorUPtr>
664+
makeDLTensors(const std::vector<Tensor> &tensors) {
665+
std::vector<tc::DLTensorUPtr> dlTensors;
666+
for (auto tensor : tensors) {
667+
auto dlMTensor = toDLPack(tensor);
668+
dlTensors.push_back(tc::makeDLTensor(&(dlMTensor->dl_tensor)));
669+
dlMTensor->deleter(dlMTensor);
670+
}
671+
return dlTensors;
672+
}
673+
674+
// compile implementation
675+
template <typename Backend>
676+
std::unique_ptr<typename Backend::ExecutorType>
677+
compileTC(const std::string &tc, const std::string &entryPoint,
678+
const std::vector<Tensor> &inputs,
679+
const typename Backend::MappingOptionsType &options,
680+
const tc::CompilerOptions &compilerOptions) {
681+
auto inputDLTensors = makeDLConstTensors(inputs);
682+
return tc::compile<Backend>(tc, entryPoint, extractRawPtrs(inputDLTensors),
683+
options, compilerOptions);
684+
}
685+
686+
// run implementation
687+
template <typename Executor>
688+
void runTC(const Executor &executor, const std::vector<Tensor> &inputs,
689+
std::vector<Tensor> &outputs) {
690+
auto inputDLTensors = makeDLConstTensors(inputs);
691+
auto outputDLTensors = makeDLTensors(outputs);
692+
return executor.run(extractRawPtrs(inputDLTensors),
693+
extractRawPtrs(outputDLTensors));
694+
}
695+
696+
/// tc integration end
697+
#endif // USE_TC
698+
606699
} // namespace singa
607700

608701
#endif // SINGA_CORE_TENSOR_H_

src/core/tensor/tensor.cc

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727

2828
#define Noaxis 9999
2929

30+
#ifdef USE_TC
31+
// namespace is already exist in singa
32+
// aliasing to avoid duplicates
33+
namespace tclang = lang;
34+
#endif // USE_TC
35+
3036
namespace singa {
3137

3238
Tensor::~Tensor() {
@@ -1334,4 +1340,111 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
13341340
return out.Reshape(s);
13351341
}
13361342

1343+
1344+
#ifdef USE_TC
1345+
/// tc integration start
1346+
struct SingaDLManagedTensor {
1347+
Tensor handle;
1348+
DLManagedTensor tensor;
1349+
};
1350+
1351+
void deleter(DLManagedTensor *arg) {
1352+
delete static_cast<SingaDLManagedTensor *>(arg->manager_ctx);
1353+
}
1354+
1355+
static DLDataType getDLDataType(const Tensor &t) {
1356+
DLDataType dtype;
1357+
dtype.lanes = 1;
1358+
dtype.bits = SizeOf(t.data_type()) * 8;
1359+
switch (t.data_type()) {
1360+
case kFloat32:
1361+
dtype.code = DLDataTypeCode::kDLFloat;
1362+
break;
1363+
default:
1364+
throw std::logic_error("only kFloat32 is supported for dlpack conversion");
1365+
break;
1366+
}
1367+
return dtype;
1368+
}
1369+
1370+
static DLContext getDLContext(const Tensor &tensor, const int64_t &device_id) {
1371+
DLContext ctx;
1372+
ctx.device_id = device_id;
1373+
if (tensor.device()->lang() == kCuda) {
1374+
ctx.device_type = DLDeviceType::kDLGPU;
1375+
} else {
1376+
ctx.device_type = DLDeviceType::kDLCPU;
1377+
}
1378+
return ctx;
1379+
}
1380+
1381+
// This function returns a shared_ptr to memory managed DLpack tensor
1382+
// constructed out of ATen tensor
1383+
DLManagedTensor *toDLPack(const Tensor &src) {
1384+
SingaDLManagedTensor *singaDLManagedTensor(new SingaDLManagedTensor);
1385+
singaDLManagedTensor->handle = src;
1386+
singaDLManagedTensor->tensor.manager_ctx = singaDLManagedTensor;
1387+
singaDLManagedTensor->tensor.deleter = &deleter;
1388+
singaDLManagedTensor->tensor.dl_tensor.data = src.block()->mutable_data();
1389+
int64_t device_id = src.device()->id();
1390+
singaDLManagedTensor->tensor.dl_tensor.ctx = getDLContext(src, device_id);
1391+
singaDLManagedTensor->tensor.dl_tensor.ndim = src.nDim();
1392+
singaDLManagedTensor->tensor.dl_tensor.dtype = getDLDataType(src);
1393+
1394+
auto shapeVec =
1395+
new std::vector<int64_t>(src.shape().begin(), src.shape().end());
1396+
singaDLManagedTensor->tensor.dl_tensor.shape = shapeVec->data();
1397+
1398+
auto strideVec =
1399+
new std::vector<int64_t>(src.stride().begin(), src.stride().end());
1400+
singaDLManagedTensor->tensor.dl_tensor.strides = strideVec->data();
1401+
1402+
singaDLManagedTensor->tensor.dl_tensor.byte_offset = 0;
1403+
return &(singaDLManagedTensor->tensor);
1404+
}
1405+
1406+
// prepare output
1407+
std::vector<tc::DLTensorUPtr>
1408+
inferOutputTensorInfo(const std::string &tc, const std::string &entryPoint,
1409+
const std::vector<Tensor> &inputs) {
1410+
auto parsedTcs = tc::detail::parse(tc);
1411+
if (parsedTcs.count(entryPoint) != 1u) {
1412+
TC_CHECK_GE(parsedTcs.size(), 1u)
1413+
<< "No TC was parsed, should have thrown earlier";
1414+
throw tclang::ErrorReport(parsedTcs.begin()->second)
1415+
<< "\nattempting to access undefined entryPoint: " << entryPoint;
1416+
}
1417+
auto inputDLTensors = makeDLConstTensors(inputs);
1418+
return makeDLTensorVector(tc::detail::inferOutputTensorInfo(
1419+
parsedTcs.at(entryPoint), extractRawPtrs(inputDLTensors)));
1420+
}
1421+
1422+
std::vector<Tensor> prepareOutputs(const std::string &tc,
1423+
const std::string &entryPoint,
1424+
const std::vector<Tensor> &inputs) {
1425+
std::vector<Tensor> outputs;
1426+
auto outTensorInfo = inferOutputTensorInfo(tc, entryPoint, inputs);
1427+
if (outTensorInfo.size() == 0) {
1428+
return outputs;
1429+
}
1430+
TC_CHECK_GE(inputs.size(), 1u)
1431+
<< "NYI: Need >= 1 input tensors to determine "
1432+
<< "backend and prepare ATen outputs. Add an overload with just an ATen "
1433+
<< "backend";
1434+
1435+
auto dev = inputs[0].device();
1436+
auto dtype = inputs[0].data_type();
1437+
for (size_t i = 0; i < outTensorInfo.size(); ++i) {
1438+
tc::TensorInfo info(outTensorInfo[i]);
1439+
Shape shape(info.shape.begin(), info.shape.end());
1440+
1441+
Tensor tmp(shape, dev, dtype);
1442+
outputs.push_back(tmp);
1443+
}
1444+
return outputs;
1445+
}
1446+
/// tc integration end
1447+
#endif // USE_TC
1448+
1449+
13371450
} // namespace singa

src/model/operation/tc_fn.cc

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*********************************************************
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
************************************************************/
21+
#ifdef USE_TC
22+
#include "./tc_fn.h"
23+
24+
namespace singa {
25+
26+
TcFnHandle::TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs)
27+
{
28+
tc_string = tcDefinition;
29+
tc_name = entryFn;
30+
auto naiveOptions = tc::CudaBackend::MappingOptionsType::makeNaiveMappingOptions();
31+
pExecutor = singa::compileTC<tc::CudaBackend>(tcDefinition, entryFn, inputs, {naiveOptions});
32+
};
33+
34+
Tensor tcExecute(const TcFnHandle &tcFnhandle, const std::vector<Tensor> &inputs)
35+
{
36+
auto outputs = singa::prepareOutputs(tcFnhandle.tc_string, tcFnhandle.tc_name, inputs);
37+
singa::runTC(*(tcFnhandle.pExecutor), inputs, outputs);
38+
return outputs[0];
39+
}
40+
41+
}
42+
#endif // USE_TC

src/model/operation/tc_fn.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*********************************************************
2+
*
3+
* Licensed to the Apache Software Foundation (ASF) under one
4+
* or more contributor license agreements. See the NOTICE file
5+
* distributed with this work for additional information
6+
* regarding copyright ownership. The ASF licenses this file
7+
* to you under the Apache License, Version 2.0 (the
8+
* "License"); you may not use this file except in compliance
9+
* with the License. You may obtain a copy of the License at
10+
*
11+
* http://www.apache.org/licenses/LICENSE-2.0
12+
*
13+
* Unless required by applicable law or agreed to in writing,
14+
* software distributed under the License is distributed on an
15+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
* KIND, either express or implied. See the License for the
17+
* specific language governing permissions and limitations
18+
* under the License.
19+
*
20+
************************************************************/
21+
//#ifndef SINGA_MODEL_OPERATION_TC_FN_H_
22+
//#define SINGA_MODEL_OPERATION_TC_FN_H_
23+
24+
#ifdef USE_TC
25+
26+
#include "singa/core/tensor.h"
27+
28+
namespace singa {
29+
30+
class TcFnHandle {
31+
public:
32+
TcFnHandle(std::string tcDefinition, std::string entryFn, const std::vector<Tensor> &inputs);
33+
std::string tc_string;
34+
std::string tc_name;
35+
std::unique_ptr<typename tc::CudaBackend::ExecutorType> pExecutor;
36+
};
37+
38+
Tensor tcExecute(const TcFnHandle &smh, const std::vector<Tensor> &inputs);
39+
40+
} // namespace singa
41+
42+
#endif // USE_TC

0 commit comments

Comments
 (0)