Skip to content

Add automotive benchmarking setting #2139

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions loadgen/benchmark/repro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ class QSL : public mlperf::QuerySampleLibrary {
const std::string& Name() override { return mName; }
size_t TotalSampleCount() override { return 1000000; }
size_t PerformanceSampleCount() override { return TotalSampleCount(); }
size_t GroupSize(size_t i) override { return 1; }
size_t GroupOf(size_t i) override { return i; }
size_t NumberOfGroups() override { return TotalSampleCount(); }
void LoadSamplesToRam(const std::vector<mlperf::QuerySampleIndex>&) override {
}
void UnloadSamplesFromRam(
Expand Down
93 changes: 93 additions & 0 deletions loadgen/bindings/c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ limitations under the License.
#include "c_api.h"

#include <string>
#include <cassert>

#include "../loadgen.h"
#include "../query_sample.h"
Expand Down Expand Up @@ -88,6 +89,9 @@ class QuerySampleLibraryTrampoline : public QuerySampleLibrary {
const std::string& Name() override { return name_; }
size_t TotalSampleCount() override { return total_sample_count_; }
size_t PerformanceSampleCount() override { return performance_sample_count_; }
size_t GroupSize(size_t i) override { return 1; }
size_t GroupOf(size_t i) override { return i; }
size_t NumberOfGroups() override { return total_sample_count_; }

void LoadSamplesToRam(const std::vector<QuerySampleIndex>& samples) override {
(*load_samples_to_ram_cb_)(client_data_, samples.data(), samples.size());
Expand Down Expand Up @@ -126,6 +130,83 @@ void DestroyQSL(void* qsl) {
delete qsl_cast;
}

namespace {

//
class GroupedQuerySampleLibraryTrampoline : public QuerySampleLibrary {
public:
GroupedQuerySampleLibraryTrampoline(
ClientData client_data,
std::string name,
size_t performance_sample_count,
LoadSamplesToRamCallback load_samples_to_ram_cb,
UnloadSamplesFromRamCallback unload_samples_from_ram_cb,
std::vector<size_t>& group_sizes)
: name_(std::move(name)),
performance_sample_count_(performance_sample_count),
load_samples_to_ram_cb_(load_samples_to_ram_cb),
unload_samples_from_ram_cb_(unload_samples_from_ram_cb) {

total_sample_count_ = 0;

for(size_t i = 0; i < group_sizes.size(); i++){
group_sizes_.push_back(group_sizes[i]);
total_sample_count_ += group_sizes[i];
for(size_t j = 0; j < group_sizes[i]; j++){
group_idx_.push_back(i);
}
}
}
~GroupedQuerySampleLibraryTrampoline() override = default;

const std::string& Name() override { return name_; }
size_t TotalSampleCount() override { return total_sample_count_; }
size_t PerformanceSampleCount() override { return performance_sample_count_; }
size_t GroupSize(size_t i) override { return group_sizes_[i]; }
size_t GroupOf(size_t i) override { return group_idx_[i]; }
size_t NumberOfGroups() override { return group_sizes_.size(); }

void LoadSamplesToRam(const std::vector<QuerySampleIndex>& samples) override {
(*load_samples_to_ram_cb_)(client_data_, samples.data(), samples.size());
}
void UnloadSamplesFromRam(
const std::vector<QuerySampleIndex>& samples) override {
(*unload_samples_from_ram_cb_)(client_data_, samples.data(),
samples.size());
}

private:
std::string name_;
ClientData client_data_;
std::vector<size_t> group_sizes_;
std::vector<size_t> group_idx_;
size_t total_sample_count_;
size_t performance_sample_count_;
LoadSamplesToRamCallback load_samples_to_ram_cb_;
UnloadSamplesFromRamCallback unload_samples_from_ram_cb_;
};

} // namespace

void* ConstructGroupedQSL(ClientData client_data, const char* name, size_t name_length,
size_t total_sample_count, size_t performance_sample_count,
LoadSamplesToRamCallback load_samples_to_ram_cb,
UnloadSamplesFromRamCallback unload_samples_from_ram_cb,
std::vector<size_t>& group_sizes) {
GroupedQuerySampleLibraryTrampoline* qsl = new GroupedQuerySampleLibraryTrampoline(
client_data, std::string(name, name_length),
performance_sample_count, load_samples_to_ram_cb,
unload_samples_from_ram_cb, group_sizes);
return reinterpret_cast<void*>(qsl);
}

void DestroyGroupedQSL(void* qsl) {
GroupedQuerySampleLibraryTrampoline* qsl_cast =
reinterpret_cast<GroupedQuerySampleLibraryTrampoline*>(qsl);
delete qsl_cast;
}


// mlperf::c::StartTest just forwards to mlperf::StartTest after doing the
// proper cast.
void StartTest(void* sut, void* qsl, const TestSettings& settings,
Expand All @@ -139,6 +220,18 @@ void StartTest(void* sut, void* qsl, const TestSettings& settings,
audit_config_filename);
}

void StartTestWithGroupedQSL(void* sut, void* qsl, const TestSettings& settings,
const std::string& audit_config_filename = "audit.config") {
SystemUnderTestTrampoline* sut_cast =
reinterpret_cast<SystemUnderTestTrampoline*>(sut);
GroupedQuerySampleLibraryTrampoline* qsl_cast =
reinterpret_cast<GroupedQuerySampleLibraryTrampoline*>(qsl);
assert(settings.use_grouped_qsl);
LogSettings default_log_settings;
mlperf::StartTest(sut_cast, qsl_cast, settings, default_log_settings,
audit_config_filename);
}

void QuerySamplesComplete(QuerySampleResponse* responses,
size_t response_count) {
mlperf::QuerySamplesComplete(responses, response_count);
Expand Down
109 changes: 107 additions & 2 deletions loadgen/bindings/python_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ limitations under the License.
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "pybind11/stl_bind.h"
#include "pybind11/numpy.h"

namespace mlperf {

Expand Down Expand Up @@ -109,8 +110,11 @@ class QuerySampleLibraryTrampoline : public QuerySampleLibrary {
~QuerySampleLibraryTrampoline() override = default;

const std::string& Name() override { return name_; }
size_t TotalSampleCount() { return total_sample_count_; }
size_t PerformanceSampleCount() { return performance_sample_count_; }
size_t TotalSampleCount() override { return total_sample_count_; }
size_t PerformanceSampleCount() override { return performance_sample_count_; }
size_t GroupSize(size_t i) override { return 1; }
size_t GroupOf(size_t i) override { return i; }
size_t NumberOfGroups() override { return total_sample_count_; }

void LoadSamplesToRam(const std::vector<QuerySampleIndex>& samples) override {
pybind11::gil_scoped_acquire gil_acquirer;
Expand All @@ -130,6 +134,65 @@ class QuerySampleLibraryTrampoline : public QuerySampleLibrary {
UnloadSamplesFromRamCallback unload_samples_from_ram_cb_;
};

// Forwards QuerySampleLibrary calls to relevant callbacks.
class GroupedQuerySampleLibraryTrampoline : public QuerySampleLibrary {
public:
GroupedQuerySampleLibraryTrampoline(
std::string name,
size_t performance_sample_count,
LoadSamplesToRamCallback load_samples_to_ram_cb,
UnloadSamplesFromRamCallback unload_samples_from_ram_cb,
pybind11::array_t<size_t> group_sizes)
: name_(std::move(name)),
performance_sample_count_(performance_sample_count),
load_samples_to_ram_cb_(load_samples_to_ram_cb),
unload_samples_from_ram_cb_(unload_samples_from_ram_cb) {

total_sample_count_ = 0;
if(group_sizes.ndim() != 1){
throw std::runtime_error("Group sizes should be a 1D Numpy array");
}
auto buffer = group_sizes.request();
size_t* ptr = (size_t*)buffer.ptr;

for(ssize_t i = 0; i < group_sizes.shape()[0]; i++){
group_sizes_.push_back(ptr[i]);
total_sample_count_ += ptr[i];
for(size_t j = 0; j < ptr[i]; j++){
group_idx_.push_back(i);
}
}
}
~GroupedQuerySampleLibraryTrampoline() override = default;

const std::string& Name() override { return name_; }
size_t TotalSampleCount() override { return total_sample_count_; }
size_t PerformanceSampleCount() override { return performance_sample_count_; }
size_t GroupSize(size_t i) override { return group_sizes_[i]; }
size_t GroupOf(size_t i) override { return group_idx_[i]; }
size_t NumberOfGroups() override { return group_sizes_.size(); }

void LoadSamplesToRam(const std::vector<QuerySampleIndex>& samples) override {
pybind11::gil_scoped_acquire gil_acquirer;
load_samples_to_ram_cb_(samples);
}
void UnloadSamplesFromRam(
const std::vector<QuerySampleIndex>& samples) override {
pybind11::gil_scoped_acquire gil_acquirer;
unload_samples_from_ram_cb_(samples);
}

private:
std::string name_;
std::vector<size_t> group_sizes_;
std::vector<size_t> group_idx_;
size_t total_sample_count_;
size_t performance_sample_count_;
LoadSamplesToRamCallback load_samples_to_ram_cb_;
UnloadSamplesFromRamCallback unload_samples_from_ram_cb_;
};


// A QDL that allows defining callbacks for
// IssueQuery, FlushQueries, and Name methods.
class QueryDispatchLibraryTrampoline : public QueryDispatchLibrary {
Expand Down Expand Up @@ -226,6 +289,23 @@ void DestroyQDL(uintptr_t qdl) {
delete qdl_cast;
}

uintptr_t ConstructGroupedQSL(
pybind11::array_t<size_t> group_sizes,
size_t performance_sample_count,
LoadSamplesToRamCallback load_samples_to_ram_cb,
UnloadSamplesFromRamCallback unload_samples_from_ram_cb) {
GroupedQuerySampleLibraryTrampoline* qsl = new GroupedQuerySampleLibraryTrampoline(
"PyQSL", performance_sample_count,
load_samples_to_ram_cb, unload_samples_from_ram_cb, group_sizes);
return reinterpret_cast<uintptr_t>(qsl);
}

void DestroyGroupedQSL(uintptr_t qsl) {
GroupedQuerySampleLibraryTrampoline* qsl_cast =
reinterpret_cast<GroupedQuerySampleLibraryTrampoline*>(qsl);
delete qsl_cast;
}

void StartTest(uintptr_t sut, uintptr_t qsl, mlperf::TestSettings test_settings,
const std::string& audit_config_filename) {
pybind11::gil_scoped_release gil_releaser;
Expand All @@ -251,6 +331,20 @@ void StartTestWithLogSettings(uintptr_t sut, uintptr_t qsl,
audit_config_filename);
}

void StartTestWithGroupedQSL(
uintptr_t sut, uintptr_t qsl, mlperf::TestSettings test_settings,
const std::string& audit_config_filename){
pybind11::gil_scoped_release gil_releaser;
SystemUnderTestTrampoline* sut_cast =
reinterpret_cast<SystemUnderTestTrampoline*>(sut);
GroupedQuerySampleLibraryTrampoline* qsl_cast =
reinterpret_cast<GroupedQuerySampleLibraryTrampoline*>(qsl);
LogSettings default_log_settings;
assert(test_settings.use_grouped_qsl);
mlperf::StartTest(sut_cast, qsl_cast, test_settings, default_log_settings,
audit_config_filename);
}

using ResponseCallback = std::function<void(QuerySampleResponse*)>;

/// TODO: Get rid of copies.
Expand Down Expand Up @@ -310,6 +404,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
&TestSettings::server_max_async_queries)
.def_readwrite("server_num_issue_query_threads",
&TestSettings::server_num_issue_query_threads)
.def_readwrite("server_constant_gen",
&TestSettings::server_constant_gen)
.def_readwrite("offline_expected_qps",
&TestSettings::offline_expected_qps)
.def_readwrite("min_duration_ms", &TestSettings::min_duration_ms)
Expand Down Expand Up @@ -340,6 +436,7 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
.def_readwrite("test05_schedule_rng_seed",
&TestSettings::test05_schedule_rng_seed)
.def_readwrite("use_token_latencies", &TestSettings::use_token_latencies)
.def_readwrite("use_grouped_qsl", &TestSettings::use_grouped_qsl)
.def_readwrite("ttft_latency", &TestSettings::server_ttft_latency)
.def_readwrite("tpot_latency", &TestSettings::server_tpot_latency)
.def_readwrite("infer_token_latencies",
Expand Down Expand Up @@ -454,6 +551,11 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
m.def("DestroyQDL", &py::DestroyQDL,
"Destroy the object created by ConstructQDL.");

m.def("ConstructGroupedQSL", &py::ConstructGroupedQSL,
"Construct grouped query sample library.");
m.def("DestroyGroupedQSL", &py::DestroyGroupedQSL,
"Destroy the object created by ConstructGroupedQSL.");

m.def("StartTest", &py::StartTest,
"Run tests on a SUT created by ConstructSUT() with the provided QSL. "
"Uses default log settings.",
Expand All @@ -476,6 +578,9 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
"IssueQuery calls have finished.",
pybind11::arg("responses"),
pybind11::arg("response_cb") = ResponseCallback{});
m.def("StartTestWithGroupedQSL", &py::StartTestWithGroupedQSL,
"Run tests on a SUT created by ConstructSUT() and a QSL created by"
"ConstructGroupedQSL");
}

} // namespace py
Expand Down
75 changes: 75 additions & 0 deletions loadgen/demos/py_demo_constant_gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright 2019 The MLPerf Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================

"""Python demo showing how to use the MLPerf Inference load generator bindings.
"""

from __future__ import print_function

import threading
import time

from absl import app
import mlperf_loadgen


def load_samples_to_ram(query_samples):
del query_samples
return


def unload_samples_from_ram(query_samples):
del query_samples
return


def process_query_async(query_samples):
time.sleep(0.001)
responses = []
for s in query_samples:
responses.append(mlperf_loadgen.QuerySampleResponse(s.id, 0, 0))
mlperf_loadgen.QuerySamplesComplete(responses)


def issue_query(query_samples):
threading.Thread(target=process_query_async, args=[query_samples]).start()


def flush_queries():
pass


def main(argv):
del argv
settings = mlperf_loadgen.TestSettings()
settings.scenario = mlperf_loadgen.TestScenario.Server
settings.mode = mlperf_loadgen.TestMode.PerformanceOnly
settings.server_target_qps = 100
settings.server_target_latency_ns = 100000000
settings.min_query_count = 100
settings.min_duration_ms = 10000
settings.server_constant_gen = True

sut = mlperf_loadgen.ConstructSUT(issue_query, flush_queries)
qsl = mlperf_loadgen.ConstructQSL(
1024, 128, load_samples_to_ram, unload_samples_from_ram
)
mlperf_loadgen.StartTest(sut, qsl, settings)
mlperf_loadgen.DestroyQSL(qsl)
mlperf_loadgen.DestroySUT(sut)


if __name__ == "__main__":
app.run(main)
Loading
Loading