1 # The server URL specifies the endpoint of your server running the ResNet
2 # model with the name "resnet" and using the predict interface.
3 SERVER_URL = 'http://localhost:8501/v1/models/resnet:predict'
4
5 ...
6
7 # Send few actual requests and time average latency.
8 total_time = 0
9 num_requests = 10
10 for _ in xrange(num_requests):
11 response = requests.post(SERVER_URL, data=predict_request)
12 response.raise_for_status()
13 total_time += response.elapsed.total_seconds()
14 prediction = response.json()['predictions'][0]
15
16 print('Prediction class: {}, avg latency: {} ms'.format(
17 prediction['classes'], (total_time*1000)/num_requests))