Skip to content

Commit

Permalink
Add Tracelogging for profiling (#1639)
Browse files Browse the repository at this point in the history
Enabled only if onnxruntime_ENABLE_INSTRUMENT is ON
  • Loading branch information
snnn authored Nov 12, 2019
1 parent 0c6e9f9 commit fc6773a
Show file tree
Hide file tree
Showing 18 changed files with 1,228 additions and 72 deletions.
10 changes: 10 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in l
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump node input shapes and output data to standard output when executing the model." OFF)
option(onnxruntime_USE_DML "Build with DirectML support" OFF)
option(onnxruntime_USE_ACL "Build with ACL support" OFF)
option(onnxruntime_ENABLE_INSTRUMENT "Enable Instrument with Event Tracing for Windows (ETW)" OFF)

set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests" FORCE)
#nsync tests failed on Mac Build
Expand All @@ -91,6 +92,15 @@ set(ONNX_ML 1)
if(NOT onnxruntime_ENABLE_PYTHON)
set(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS OFF)
endif()

if(NOT WIN32)
#TODO: On Linux we may try https://github.com/microsoft/TraceLogging
if(onnxruntime_ENABLE_INSTRUMENT)
message(WARNING "Instrument is only supported on Windows now")
set(onnxruntime_ENABLE_INSTRUMENT OFF)
endif()
endif()

if(onnxruntime_USE_OPENMP)
find_package(OpenMP)
if (OPENMP_FOUND)
Expand Down
4 changes: 3 additions & 1 deletion cmake/onnxruntime_framework.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ file(GLOB_RECURSE onnxruntime_framework_srcs CONFIGURE_DEPENDS
source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_framework_srcs})

add_library(onnxruntime_framework ${onnxruntime_framework_srcs})

if(onnxruntime_ENABLE_INSTRUMENT)
target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
endif()
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
onnxruntime_add_include_to_target(onnxruntime_framework onnxruntime_common onnx onnx_proto protobuf::libprotobuf)
set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")
Expand Down
3 changes: 3 additions & 0 deletions cmake/onnxruntime_session.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_session_srcs})
add_library(onnxruntime_session ${onnxruntime_session_srcs})
install(DIRECTORY ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/session DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core)
onnxruntime_add_include_to_target(onnxruntime_session onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf)
if(onnxruntime_ENABLE_INSTRUMENT)
target_compile_definitions(onnxruntime_session PUBLIC ONNXRUNTIME_ENABLE_INSTRUMENT)
endif()
target_include_directories(onnxruntime_session PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
add_dependencies(onnxruntime_session ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_session PROPERTIES FOLDER "ONNXRuntime")
Expand Down
11 changes: 11 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,17 @@ if (onnxruntime_BUILD_SERVER)

endif()

#some ETW tools
if(WIN32 AND onnxruntime_ENABLE_INSTRUMENT)
add_executable(generate_perf_report_from_etl ${ONNXRUNTIME_ROOT}/tool/etw/main.cc ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
target_compile_definitions(generate_perf_report_from_etl PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
target_link_libraries(generate_perf_report_from_etl PRIVATE tdh Advapi32)

add_executable(compare_two_sessions ${ONNXRUNTIME_ROOT}/tool/etw/compare_two_sessions.cc ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
target_compile_definitions(compare_two_sessions PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
target_link_libraries(compare_two_sessions PRIVATE ${GETOPT_LIB_WIDE} tdh Advapi32)
endif()

add_executable(onnxruntime_mlas_test ${TEST_SRC_DIR}/mlas/unittest.cpp)
target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT})
set(onnxruntime_mlas_test_libs onnxruntime_mlas onnxruntime_common)
Expand Down
9 changes: 9 additions & 0 deletions include/onnxruntime/core/platform/tracing.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

#pragma once

#include <windows.h>
#include <TraceLoggingProvider.h>

TRACELOGGING_DECLARE_PROVIDER(telemetry_provider_handle);
36 changes: 33 additions & 3 deletions onnxruntime/core/framework/sequential_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@
using namespace Concurrency;
#endif

#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
#include <Windows.h>
#include "core/platform/tracing.h"
namespace {
LARGE_INTEGER OrtGetPerformanceFrequency() {
LARGE_INTEGER v;
// On systems that run Windows XP or later, the QueryPerformanceFrequency function will always succeed
// and will thus never return zero.
(void)QueryPerformanceFrequency(&v);
return v;
}

LARGE_INTEGER perf_freq = OrtGetPerformanceFrequency();
} // namespace
#endif

namespace onnxruntime {

static Status ReleaseNodeMLValues(ExecutionFrame& frame,
Expand Down Expand Up @@ -87,7 +103,10 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
if (p_op_kernel == nullptr)
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Got nullptr from GetKernel for node: ",
node.Name());

#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
LARGE_INTEGER kernel_start;
QueryPerformanceCounter(&kernel_start);
#endif
// construct OpKernelContext
// TODO: log kernel inputs?
OpKernelContextInternal op_kernel_context(session_state, frame, *p_op_kernel, logger, terminate_flag_);
Expand Down Expand Up @@ -128,7 +147,6 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
}
}
}

#if defined DEBUG_NODE_INPUTS_OUTPUTS
utils::DumpNodeInputs(op_kernel_context, p_op_kernel->Node());
#endif
Expand Down Expand Up @@ -202,7 +220,19 @@ Status SequentialExecutor::Execute(const SessionState& session_state, const std:
}
}
}

#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
LARGE_INTEGER kernel_stop;
QueryPerformanceCounter(&kernel_stop);
LARGE_INTEGER elapsed;
elapsed.QuadPart = kernel_stop.QuadPart - kernel_start.QuadPart;
elapsed.QuadPart *= 1000000;
elapsed.QuadPart /= perf_freq.QuadPart;
// Log an event
TraceLoggingWrite(telemetry_provider_handle, // handle to my provider
"OpEnd", // Event Name that should uniquely identify your event.
TraceLoggingValue(p_op_kernel->KernelDef().OpName().c_str(), "op_name"),
TraceLoggingValue(elapsed.QuadPart, "time"));
#endif
if (is_profiler_enabled) {
session_state.Profiler().EndTimeAndRecordEvent(profiling::NODE_EVENT,
p_op_kernel->Node().Name() + "_fence_after",
Expand Down
5 changes: 4 additions & 1 deletion onnxruntime/core/framework/session_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,10 @@ void SessionState::AddSubgraphSessionState(onnxruntime::NodeIndex index, const s
ORT_ENFORCE(existing_entries.find(attribute_name) == existing_entries.cend(), "Entry exists in node ", index,
" for attribute ", attribute_name);
}

#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
session_state->parent_ = this;
GenerateGraphId();
#endif
subgraph_session_states_[index].insert(std::make_pair(attribute_name, std::move(session_state)));
}

Expand Down
13 changes: 13 additions & 0 deletions onnxruntime/core/framework/session_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,19 @@ class SessionState {

std::unique_ptr<NodeIndexInfo> node_index_info_;
std::multimap<int, std::unique_ptr<FeedsFetchesManager>> cached_feeds_fetches_managers_;
#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
SessionState* parent_ = nullptr;
//Assign each graph in each session an unique id.
int graph_id_ = 0;
int next_graph_id_ = 1;

void GenerateGraphId() {
SessionState* p = this;
while (p->parent_ != nullptr) p = p->parent_;
graph_id_ = p->next_graph_id_ ++;
}

#endif
};

} // namespace onnxruntime
4 changes: 4 additions & 0 deletions onnxruntime/core/session/environment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@

#include "core/platform/env.h"

#ifdef ONNXRUNTIME_ENABLE_INSTRUMENT
#include "core/platform/tracing.h"
#endif

namespace onnxruntime {
using namespace ::onnxruntime::common;
using namespace ONNX_NAMESPACE;
Expand Down
Loading

0 comments on commit fc6773a

Please sign in to comment.