參考 Triton 安裝
準備模型
$ mkdir mnist_infer & cd mnist_infer
$ vi train.py
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255., label
(ds_train, ds_test), ds_info = tfds.load(
'mnist',
split=['train', 'test'],
shuffle_files=True,
as_supervised=True,
with_info=True,
try_gcs=True
)
ds_train = ds_train.map(normalize_img,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(normalize_img,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
tf.keras.layers.Dense(128,activation='relu'),
tf.keras.layers.Dense(10)
])
model.compile(
optimizer=tf.keras.optimizers.Adam(0.001),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)
model.summary()
model.fit(ds_train, epochs=10, validation_data=ds_test)
score = model.evaluate(ds_test, verbose=0)
for name, score in zip(model.metrics_names, score):
print("%s: %.2f" % (name, score))
model.save('model.savedmodel')
$ cd server.22.04/docs/examples/model_repository
$ mkdir -p mnist/1
$ cp -r your_path_to/mnist_infer/model.savedmodel mnist/1/
$ vi config.pbtxt
name: "mnist"
platform: "tensorflow_savedmodel"
max_batch_size: 32
input [
{
name: "flatten_input"
data_type: TYPE_FP32
format: FORMAT_NHWC
dims: [28, 28, 1]
}
]
output [
{
name: "dense_1"
data_type: TYPE_FP32
dims: [10]
}
]
instance_group [
{
kind: KIND_GPU
count: 2
}
]
optimization { execution_accelerators {
gpu_execution_accelerator : [ {
name : "tensorrt"
parameters { key: "precision_mode" value: "FP16" }}]
}}
version_policy { latest { num_versions: 1 } }
dynamic_batching {
preferred_batch_size: [ 4, 8 ]
max_queue_delay_microseconds: 100
}
運行 Triton Inference Server
$ docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v/home/mark/Data/DeepStream/tao/triton-inference-server/server.22.04/docs/examples/model_repository:/models nvcr.io/nvidia/tritonserver:22.04-py3 tritonserver --model-repository=/models
測試使用程式,準備灰階數字圖片 input.jpg
$ cd mnist_infer
$ vi client.py
from PIL import Image
import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import triton_to_np_dtype
## 前處理
img = Image.open('input.jpg').convert('L')
img = img.resize((28, 28))
imgArr = np.asarray(img)/255
imgArr = np.expand_dims(imgArr[:, :, np.newaxis], 0)
imgArr = imgArr.astype(triton_to_np_dtype('FP32'))
## Client-Server 溝通
triton_client = grpcclient.InferenceServerClient(url='localhost:8001', verbose=0)
inputs = []
inputs.append(grpcclient.InferInput('flatten_input', imgArr.shape, 'FP32'))
inputs[0].set_data_from_numpy(imgArr)
outputs = []
outputs.append(grpcclient.InferRequestedOutput('dense_1',class_count=0))
responses = []
responses.append(triton_client.infer('mnist',inputs,
request_id=str(1),
model_version='1',
outputs=outputs))
## 後處理
print (np.argmax(responses[0].as_numpy('dense_1')[0]))
$ docker run -it --rm --net=host --name Triton_mnist \
-v/your_path_to/mnist_infer:/data -w'/data' \
nvcr.io/nvidia/tritonserver:22.04-py3-sdk bash \
nvcr.io/nvidia/tritonserver:22.04-py3-sdk bash \
-c 'python /data/client.py'
測試使用網頁,準備資料 input.json
$ sudo apt-get install jq
$ curl http://localhost:8000/v2/models/mnist | jq .
$ vi mnist_image.py
import numpy as np
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train / 255.
idx = 1
X = X_train[idx].reshape(784)
values = np.expand_dims(X, axis=0).reshape((1,28,28,1)).flatten().tolist()
print(y_train[idx])
#print(values)
cmd = '{"inputs":[{"name":"flatten_input","data":'+str(values)+',"datatype":"FP32","shape":[1,28,28,1]}]}'
with open("input.json","w") as f:
f.write(cmd)
$ python mnist_image.py
$ curl -s -d @./input.json -X POST http://localhost:8000/v2/models/mnist/infer -H "Content-Type: application/json" | jq .
沒有留言:
張貼留言