2020年12月30日 星期三

[Docker]Tensorflow-serving

# CPU
# Download the TensorFlow Serving Docker image and repo
docker pull tensorflow/serving:latest-gpu
# GPU
# 建立tf資料夾
mkdir -p $(pwd)/tf
TESTDATA="$(pwd)/tf"

### CPU ###
docker run -t --rm -p 8501:8501 \
    --name tf_serving \
    -v "$TESTDATA:/models/fashion_model" \
    -e MODEL_NAME=fashion_model \
    tensorflow/serving &

### GPU ###	
docker run -t --runtime=nvidia -p 8501:8501 \
    --name tf_serving_gpu \
	-v "$TESTDATA:/models/fashion_model" \
	-e MODEL_NAME=fashion_model \
	tensorflow/serving:latest-gpu &
# Query the model using the predict API
curl -d '{"instances": [1.0, 2.0, 5.0]}' \
    -X POST http://localhost:8501/v1/models/half_plus_two:predict
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import json

import requests

fashion_mnist = keras.datasets.fashion_mnist
(_, _), (test_images, test_labels) = fashion_mnist.load_data()

test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)

data = json.dumps({"signature_name": "serving_default", "instances": test_images[0:3].tolist()})

headers = {"content-type": "application/json"}
json_response = requests.post('http://10.15.11.75:8501/v1/models/fashion_model:predict', data=data, headers=headers)
print(json_response)

predictions = json.loads(json_response.text)['predictions']
print(np.argmax(predictions[0]))