- # CPU
- # Download the TensorFlow Serving Docker image and repo
- docker pull tensorflow/serving:latest-gpu
- # GPU
- # 建立tf資料夾
- mkdir -p $(pwd)/tf
- TESTDATA="$(pwd)/tf"
-
- ### CPU ###
- docker run -t --rm -p 8501:8501 \
- --name tf_serving \
- -v "$TESTDATA:/models/fashion_model" \
- -e MODEL_NAME=fashion_model \
- tensorflow/serving &
-
- ### GPU ###
- docker run -t --runtime=nvidia -p 8501:8501 \
- --name tf_serving_gpu \
- -v "$TESTDATA:/models/fashion_model" \
- -e MODEL_NAME=fashion_model \
- tensorflow/serving:latest-gpu &
- # Query the model using the predict API
- curl -d '{"instances": [1.0, 2.0, 5.0]}' \
- -X POST http://localhost:8501/v1/models/half_plus_two:predict
- import tensorflow as tf
- from tensorflow import keras
-
- # Helper libraries
- import numpy as np
- import matplotlib.pyplot as plt
- import json
-
- import requests
-
- fashion_mnist = keras.datasets.fashion_mnist
- (_, _), (test_images, test_labels) = fashion_mnist.load_data()
-
- test_images = test_images.reshape(test_images.shape[0], 28, 28, 1)
-
- data = json.dumps({"signature_name": "serving_default", "instances": test_images[0:3].tolist()})
-
- headers = {"content-type": "application/json"}
- json_response = requests.post('http://10.15.11.75:8501/v1/models/fashion_model:predict', data=data, headers=headers)
- print(json_response)
-
- predictions = json.loads(json_response.text)['predictions']
- print(np.argmax(predictions[0]))