Skip to content

Commit a418dd4

Browse files
authored
Add fastdeploy server and client component (#1169)
* add backend support for fastdeploy server * fix * add code * fix * fix * add fastdeploy server component * add fastdeploy server and client * add exception description * fix * add model repository judgement * add component tab for fastdeploy client * update more tasks in fastdeploy client * sort filenames * backup config * noqa for autogenerated file * add data validation * add __init__ for package * add calculating layout for frontend * add alive server detection and optimize client * add alive server detection and optimize client * add alive server detection and optimize client * add metrics in gradio client * update presentation * Change return value to None for frontend performance data when server not ready * add get_server_config and download_pretrain_model api * add get_server_config and download_pretrain_model api * add unit for metric table * add unit for metric table * fix a bug * add judgement pretrained model download * add judgement pretrained model download * add version info for frontend * rename download model * fix a bug * add fastdeploy model list * optimize for choose configuration files * modify according to frontend need * fix name in config to model name * optimize for server list and alive judgement * keep server name as string type * optimize process judgement logic * optimize for deleting resource files * add rename resource file * fix * fix a bug * optimize code structure * optimize code structure * remove chinese tips and remove fastdeploy-python in requirements
1 parent b90619b commit a418dd4

14 files changed

+5145
-3
lines changed

requirements.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,8 @@ multiprocess
1212
packaging
1313
x2paddle
1414
rarfile
15-
onnx >= 1.6.0
15+
gradio
16+
tritonclient[all]
17+
attrdict
18+
psutil
19+
onnx >= 1.6.0
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# =======================================================================

visualdl/component/inference/fastdeploy_client/client_app.py

Lines changed: 409 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
# Copyright (c) 2022 VisualDL Authors. All Rights Reserve.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# =======================================================================
15+
import json
16+
import re
17+
18+
import numpy as np
19+
import requests
20+
import tritonclient.http as httpclient
21+
from attrdict import AttrDict
22+
from tritonclient.utils import InferenceServerException
23+
24+
25+
def convert_http_metadata_config(metadata):
26+
metadata = AttrDict(metadata)
27+
28+
return metadata
29+
30+
31+
def prepare_request(inputs_meta, inputs_data, outputs_meta):
32+
'''
33+
inputs_meta: inputs meta information from model. name: info
34+
inputs_data: users input data. name: data
35+
'''
36+
# Set the input data
37+
inputs = []
38+
for input_dict in inputs_meta:
39+
input_name = input_dict['name']
40+
if input_name not in inputs_data:
41+
raise RuntimeError(
42+
'Error: input name {} required for model not existed.'.format(
43+
input_name))
44+
if input_dict['datatype'] == 'FP32':
45+
inputs_data[input_name] = inputs_data[input_name].astype(
46+
np.float32
47+
) / 255 # image data returned by gradio is uint8, convert to fp32
48+
if len(input_dict['shape']
49+
) == 3 and input_dict['shape'][0] == 3: # NCHW
50+
inputs_data[input_name] = inputs_data[input_name][0].transpose(
51+
2, 0, 1)
52+
elif len(input_dict['shape']
53+
) == 4 and input_dict['shape'][1] == 3: # NCHW
54+
inputs_data[input_name] = inputs_data[input_name].transpose(
55+
0, 3, 1, 2)
56+
infer_input = httpclient.InferInput(
57+
input_name, inputs_data[input_name].shape, input_dict['datatype'])
58+
infer_input.set_data_from_numpy(inputs_data[input_name])
59+
inputs.append(infer_input)
60+
outputs = []
61+
for output_dict in outputs_meta:
62+
infer_output = httpclient.InferRequestedOutput(output_dict.name)
63+
outputs.append(infer_output)
64+
return inputs, outputs
65+
66+
67+
metrics_table_head = """
68+
<style>
69+
table, th {{
70+
border:0.1px solid black;
71+
}}
72+
</style>
73+
74+
<div>
75+
<table style="width:100%">
76+
<tr>
77+
<th rowspan="2">模型名称</th>
78+
<th colspan="4">执行统计</th>
79+
<th colspan="5">延迟统计</th>
80+
81+
</tr>
82+
<tr>
83+
<th>请求处理成功数</th>
84+
<th>请求处理失败数</th>
85+
<th>推理batch数</th>
86+
<th>推理样本数</th>
87+
<th>请求处理时间(ms)</th>
88+
<th>任务队列等待时间(ms)</th>
89+
<th>输入处理时间(ms)</th>
90+
<th>模型推理时间(ms)</th>
91+
<th>输出处理时间(ms)</th>
92+
</tr>
93+
{}
94+
</table>
95+
</div>
96+
<br>
97+
<br>
98+
<br>
99+
<br>
100+
<br>
101+
<div>
102+
<table style="width:100%">
103+
<tr>
104+
<th rowspan="2">GPU</th>
105+
<th colspan="4">性能指标</th>
106+
<th colspan="2">显存</th>
107+
</tr>
108+
<tr>
109+
<th>利用率(%)</th>
110+
<th>功率(W)</th>
111+
<th>功率限制(W)</th>
112+
<th>耗电量(W)</th>
113+
<th>总量(GB)</th>
114+
<th>已使用(GB)</th>
115+
</tr>
116+
{}
117+
</table>
118+
</div>
119+
"""
120+
121+
122+
def get_metric_data(server_addr, metric_port): # noqa:C901
123+
'''
124+
Get metrics data from fastdeploy server, and transform it into html table.
125+
Args:
126+
server_addr(str): fastdeployserver ip address
127+
metric_port(int): fastdeployserver metrics port
128+
Returns:
129+
htmltable(str): html table to show metrics data
130+
'''
131+
model_table = {}
132+
gpu_table = {}
133+
metric_column_name = {
134+
"Model": {
135+
"nv_inference_request_success", "nv_inference_request_failure",
136+
"nv_inference_count", "nv_inference_exec_count",
137+
"nv_inference_request_duration_us",
138+
"nv_inference_queue_duration_us",
139+
"nv_inference_compute_input_duration_us",
140+
"nv_inference_compute_infer_duration_us",
141+
"nv_inference_compute_output_duration_us"
142+
},
143+
"GPU": {
144+
"nv_gpu_power_usage", "nv_gpu_power_limit",
145+
"nv_energy_consumption", "nv_gpu_utilization",
146+
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
147+
},
148+
"CPU": {
149+
"nv_cpu_utilization", "nv_cpu_memory_total_bytes",
150+
"nv_cpu_memory_used_bytes"
151+
}
152+
}
153+
try:
154+
res = requests.get("http://{}:{}/metrics".format(
155+
server_addr, metric_port))
156+
except Exception:
157+
return metrics_table_head.format('', '')
158+
metric_content = res.text
159+
for content in metric_content.split('\n'):
160+
if content.startswith('#'):
161+
continue
162+
else:
163+
res = re.match(r'(\w+){(.*)} (\w+)',
164+
content) # match output by server metrics interface
165+
if not res:
166+
continue
167+
metric_name = res.group(1)
168+
model = res.group(2)
169+
value = res.group(3)
170+
infos = {}
171+
for info in model.split(','):
172+
k, v = info.split('=')
173+
v = v.strip('"')
174+
infos[k] = v
175+
if metric_name in [
176+
"nv_inference_request_duration_us",
177+
"nv_inference_queue_duration_us",
178+
"nv_inference_compute_input_duration_us",
179+
"nv_inference_compute_infer_duration_us",
180+
"nv_inference_compute_output_duration_us"
181+
]:
182+
value = str(float(value) / 1000)
183+
elif metric_name in [
184+
"nv_gpu_memory_total_bytes", "nv_gpu_memory_used_bytes"
185+
]:
186+
value = str(float(value) / 1024 / 1024 / 1024)
187+
for key, metric_names in metric_column_name.items():
188+
if metric_name in metric_names:
189+
if key == 'Model':
190+
model_name = infos['model']
191+
if model_name not in model_table:
192+
model_table[model_name] = {}
193+
model_table[model_name][metric_name] = value
194+
elif key == 'GPU':
195+
gpu_name = infos['gpu_uuid']
196+
if gpu_name not in gpu_table:
197+
gpu_table[gpu_name] = {}
198+
gpu_table[gpu_name][metric_name] = value
199+
elif key == 'CPU':
200+
pass
201+
model_data_list = []
202+
gpu_data_list = []
203+
model_data_metric_names = [
204+
"nv_inference_request_success", "nv_inference_request_failure",
205+
"nv_inference_exec_count", "nv_inference_count",
206+
"nv_inference_request_duration_us", "nv_inference_queue_duration_us",
207+
"nv_inference_compute_input_duration_us",
208+
"nv_inference_compute_infer_duration_us",
209+
"nv_inference_compute_output_duration_us"
210+
]
211+
gpu_data_metric_names = [
212+
"nv_gpu_utilization", "nv_gpu_power_usage", "nv_gpu_power_limit",
213+
"nv_energy_consumption", "nv_gpu_memory_total_bytes",
214+
"nv_gpu_memory_used_bytes"
215+
]
216+
for k, v in model_table.items():
217+
data = []
218+
data.append(k)
219+
for data_metric in model_data_metric_names:
220+
data.append(v[data_metric])
221+
model_data_list.append(data)
222+
for k, v in gpu_table.items():
223+
data = []
224+
data.append(k)
225+
for data_metric in gpu_data_metric_names:
226+
data.append(v[data_metric])
227+
gpu_data_list.append(data)
228+
model_data = '\n'.join([
229+
"<tr>" + '\n'.join(["<td>" + item + "</td>"
230+
for item in data]) + "</tr>"
231+
for data in model_data_list
232+
])
233+
gpu_data = '\n'.join([
234+
"<tr>" + '\n'.join(["<td>" + item + "</td>"
235+
for item in data]) + "</tr>"
236+
for data in gpu_data_list
237+
])
238+
return metrics_table_head.format(model_data, gpu_data)
239+
240+
241+
class HttpClientManager:
242+
def __init__(self):
243+
self.clients = {} # server url: httpclient
244+
245+
def _create_client(self, server_url):
246+
if server_url in self.clients:
247+
return self.clients[server_url]
248+
try:
249+
fastdeploy_client = httpclient.InferenceServerClient(server_url)
250+
self.clients[server_url] = fastdeploy_client
251+
return fastdeploy_client
252+
except Exception:
253+
raise RuntimeError(
254+
'Can not connect to server {}, please check your \
255+
server address'.format(server_url))
256+
257+
def infer(self, server_url, model_name, model_version, inputs):
258+
fastdeploy_client = self._create_client(server_url)
259+
input_metadata, output_metadata = self.get_model_meta(
260+
server_url, model_name, model_version)
261+
inputs, outputs = prepare_request(input_metadata, inputs,
262+
output_metadata)
263+
response = fastdeploy_client.infer(
264+
model_name, inputs, model_version=model_version, outputs=outputs)
265+
266+
results = {}
267+
for output in output_metadata:
268+
result = response.as_numpy(output.name) # datatype: numpy
269+
if output.datatype == 'BYTES': # datatype: bytes
270+
try:
271+
value = result
272+
if len(result.shape) == 1:
273+
value = result[0]
274+
elif len(result.shape) == 2:
275+
value = result[0][0]
276+
elif len(result.shape) == 3:
277+
value = result[0][0][0]
278+
result = json.loads(value) # datatype: json
279+
except Exception:
280+
pass
281+
else:
282+
result = result[0]
283+
results[output.name] = result
284+
return results
285+
286+
def raw_infer(self, server_url, model_name, model_version, raw_input):
287+
url = 'http://{}/v2/models/{}/versions/{}/infer'.format(
288+
server_url, model_name, model_version)
289+
res = requests.post(url, data=json.dumps(json.loads(raw_input)))
290+
return json.dumps(res.json())
291+
292+
def get_model_meta(self, server_url, model_name, model_version):
293+
fastdeploy_client = self._create_client(server_url)
294+
try:
295+
model_metadata = fastdeploy_client.get_model_metadata(
296+
model_name=model_name, model_version=model_version)
297+
except InferenceServerException as e:
298+
raise RuntimeError("Failed to retrieve the metadata: " + str(e))
299+
300+
model_metadata = convert_http_metadata_config(model_metadata)
301+
302+
input_metadata = model_metadata.inputs
303+
output_metadata = model_metadata.outputs
304+
return input_metadata, output_metadata

0 commit comments

Comments
 (0)