From cade85e2d8def6a2d5bf6efff9abad1bf4cb092b Mon Sep 17 00:00:00 2001 From: mrmhodak Date: Sun, 12 Nov 2023 02:20:40 -0500 Subject: [PATCH 1/2] MigraphX version of reference code --- language/bert/migraphx_SUT.py | 96 +++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 language/bert/migraphx_SUT.py diff --git a/language/bert/migraphx_SUT.py b/language/bert/migraphx_SUT.py new file mode 100644 index 0000000000..588a738c35 --- /dev/null +++ b/language/bert/migraphx_SUT.py @@ -0,0 +1,96 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import array +import json +import os +import sys + +sys.path.insert(0, os.getcwd()) + +import mlperf_loadgen as lg +import numpy as np +import migraphx +from squad_QSL import get_squad_QSL + + +class BERT_migraphx_SUT: + def __init__(self, args): + + print("Loading ONNX model...") + self.quantized = args.quantized + self.batch_size = args.batch_size + self.model_path = args.model + self.model = migraphx.parse_onnx( + self.model_path, default_dim_value=self.batch_size + ) + print("Quantize to fp16...") + migraphx.quantize_fp16(self.model) + print("Compile for gpu...") + self.model.compile(migraphx.get_target("gpu")) + + print("Constructing SUT...") + self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries) + print("Finished constructing SUT.") + + print("Warmup...") + self.fd = { + "input_ids": np.zeros((self.batch_size, 384), dtype=np.int64), + "input_mask": np.zeros((self.batch_size, 384), dtype=np.int64), + "segment_ids": np.zeros((self.batch_size, 384), dtype=np.int64), + } + self.model.run(self.fd) + + self.qsl = get_squad_QSL(args) + + def issue_queries(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + bs = self.batch_size + for i in range(0, len(idx), bs): + eval_features = self.qsl.get_features(idx[i : i + bs]) + actual_batchsize = len(eval_features) + self.fd["input_ids"][:actual_batchsize] = np.array( + [eval_feature.input_ids for eval_feature in eval_features] + ).astype(np.int64) + self.fd["input_mask"][:actual_batchsize] = np.array( + [eval_feature.input_mask for eval_feature in eval_features] + ).astype(np.int64) + self.fd["segment_ids"][:actual_batchsize] = np.array( + [eval_feature.segment_ids for eval_feature in eval_features] + ).astype(np.int64) + results = self.model.run(self.fd) + scores = [np.array(result) for result in results] + outputs = np.stack(scores, axis=-1) + + response_array_refs = [] + response = [] + for i, qid in enumerate(query_id[i : i + bs]): + response_array = array.array( + "B", np.array(outputs[i], np.float32).tobytes() + ) + response_array_refs.append(response_array) + bi = response_array.buffer_info() + response.append(lg.QuerySampleResponse(qid, bi[0], bi[1])) + lg.QuerySamplesComplete(response) + + def flush_queries(self): + pass + + def __del__(self): + print("Finished destroying SUT.") + +def get_migraphx_sut(args): + return BERT_migraphx_SUT(args) From c2d5c55d00213a8bfc77b2189bcf1ba01f1ea6d5 Mon Sep 17 00:00:00 2001 From: Miro Date: Tue, 13 Feb 2024 00:43:57 -0500 Subject: [PATCH 2/2] Update migraphx_SUT.py --- language/bert/migraphx_SUT.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/language/bert/migraphx_SUT.py b/language/bert/migraphx_SUT.py index 588a738c35..c0be2a52c8 100644 --- a/language/bert/migraphx_SUT.py +++ b/language/bert/migraphx_SUT.py @@ -1,17 +1,6 @@ # coding=utf-8 -# Copyright 2018 The Google AI Language Team Authors. +# Copyright 2023 AMD # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. import array import json