# CPU
# Download the TensorFlow Serving Docker image and repo
docker pull tensorflow/serving:latest-gpu
# GPU
# 建立tf資料夾
mkdir -p $(pwd)/tf
TESTDATA="$(pwd)/tf"
### CPU ###
docker run -t --rm -p 8501:8501 \
--name tf_serving \
-v "$TESTDATA:/models/fashion_model" \
-e MODEL_NAME=fashion_model \
tensorflow/serving &
### GPU ###
docker run -t --runtime=nvidia -p 8501:8501 \
--name tf_serving_gpu \
-v "$TESTDATA:/models/fashion_model" \
-e MODEL_NAME=fashion_model \
tensorflow/serving:latest-gpu &
# Query the model using the predict API
curl -d '{"instances": [1.0, 2.0, 5.0]}' \
-X POST http://localhost:8501/v1/models/half_plus_two:predict
import tensorflow as tf
from tensorflow import keras
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
import json
import requests
fashion_mnist = keras.datasets.fashion_mnist
(_, _), (test_images, test_labels) = fashion_mnist.load_data()
test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
data = json.dumps({"signature_name": "serving_default", "instances": test_images[0:3].tolist()})
headers = {"content-type": "application/json"}
json_response = requests.post('http://10.15.11.75:8501/v1/models/fashion_model:predict', data=data, headers=headers)
print(json_response)
predictions = json.loads(json_response.text)['predictions']
print(np.argmax(predictions[0]))