oneapi-src
diff --git a/‎AI-and-Analytics/Features-and-Functionality/IntelPython_GPU_numba-dpex_Genetic_Algorithm/IntelPython_GPU_numba-dpex_Genetic_Algorithm.ipynb
+12-9 b/‎AI-and-Analytics/Features-and-Functionality/IntelPython_GPU_numba-dpex_Genetic_Algorithm/IntelPython_GPU_numba-dpex_Genetic_Algorithm.ipynb
+12-9
diff --git a/‎AI-and-Analytics/Features-and-Functionality/IntelPython_GPU_numba-dpex_Genetic_Algorithm/IntelPython_GPU_numba-dpex_Genetic_Algorithm.py
+11-8 b/‎AI-and-Analytics/Features-and-Functionality/IntelPython_GPU_numba-dpex_Genetic_Algorithm/IntelPython_GPU_numba-dpex_Genetic_Algorithm.py
+11-8
diff --git a/‎AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/README.md
+65-5 b/‎AI-and-Analytics/Features-and-Functionality/IntelTensorFlow_Enabling_Auto_Mixed_Precision_for_TransferLearning/README.md
+65-5
@@ -348,7 +348,7 @@
     "\n",
     "The only par that differs form the standard implementation is the evaluation function.\n",
     "\n",
-    "The most important part is to specify the global index of the computation. This is the current index of the computed chromosomes. This serves as a loop function across all chromosomes."
+    "The most important part is to specify the index of the computation. This is the current index of the computed chromosomes. This serves as a loop function across all chromosomes."
    ]
   },
   {
@@ -365,10 +365,11 @@
    "outputs": [],
    "source": [
     "import numba_dpex\n",
+    "from numba_dpex import kernel_api\n",
     "\n",
     "@numba_dpex.kernel\n",
-    "def eval_genomes_sycl_kernel(chromosomes, fitnesses, chrom_length):\n",
-    "  pos = numba_dpex.get_global_id(0)\n",
+    "def eval_genomes_sycl_kernel(item: kernel_api.Item, chromosomes, fitnesses, chrom_length):\n",
+    "  pos = item.get_id(0)\n",
     "  num_loops = 3000\n",
     "  for i in range(num_loops):\n",
     "    fitnesses[pos] += chromosomes[pos*chrom_length + 1]\n",
@@ -409,8 +410,9 @@
     "  chromosomes_flat = chromosomes.flatten()\n",
     "  chromosomes_flat_dpctl = dpnp.asarray(chromosomes_flat, device=\"gpu\")\n",
     "  fitnesses_dpctl = dpnp.asarray(fitnesses, device=\"gpu\")\n",
-    "\n",
-    "  eval_genomes_sycl_kernel[numba_dpex.Range(pop_size)](chromosomes_flat_dpctl, fitnesses_dpctl, chrom_size)\n",
+    "  \n",
+    "  exec_range = kernel_api.Range(pop_size)\n",
+    "  numba_dpex.call_kernel(eval_genomes_sycl_kernel, exec_range, chromosomes_flat_dpctl, fitnesses_dpctl, chrom_size)\n",
     "  fitnesses = dpnp.asnumpy(fitnesses_dpctl)\n",
     "  chromosomes = next_generation(chromosomes, fitnesses)\n",
     "  fitnesses = np.zeros(pop_size, dtype=np.float32)\n",
@@ -544,7 +546,7 @@
     "\n",
     "The evaluate created generation we are calculating the full distance of the given path (chromosome). In this example, the lower the fitness value is, the better the chromosome. That's different from the general GA that we implemented.\n",
     "\n",
-    "As in this example we are also using numba-dpex, we are using a global index like before."
+    "As in this example we are also using numba-dpex, we are using an index like before."
    ]
   },
   {
@@ -554,8 +556,8 @@
    "outputs": [],
    "source": [
     "@numba_dpex.kernel\n",
-    "def eval_genomes_plain_TSP_SYCL(chromosomes, fitnesses, distances, pop_length):\n",
-    "  pos = numba_dpex.get_global_id(0)\n",
+    "def eval_genomes_plain_TSP_SYCL(item: kernel_api.Item, chromosomes, fitnesses, distances, pop_length):\n",
+    "  pos = item.get_id(1)\n",
     "  for j in range(pop_length-1):\n",
     "    fitnesses[pos] += distances[int(chromosomes[pos, j]), int(chromosomes[pos, j+1])]\n"
    ]
@@ -708,7 +710,8 @@
     "  chromosomes_flat_dpctl = dpnp.asarray(chromosomes, device=\"gpu\")\n",
     "  fitnesses_dpctl = dpnp.asarray(fitnesses.copy(), device=\"gpu\")\n",
     "\n",
-    "  eval_genomes_plain_TSP_SYCL[numba_dpex.Range(pop_size)](chromosomes_flat_dpctl, fitnesses_dpctl, distances_dpctl, pop_size)\n",
+    "  exec_range = kernel_api.Range(pop_size)\n",
+    "  numba_dpex.call_kernel(eval_genomes_plain_TSP_SYCL, exec_range, chromosomes_flat_dpctl, fitnesses_dpctl, distances_dpctl, pop_size)\n",
     "  fitnesses = dpnp.asnumpy(fitnesses_dpctl)\n",
     "  chromosomes = next_generation_TSP(chromosomes, fitnesses)\n",
     "  fitnesses = np.zeros(pop_size, dtype=np.float32)\n",
 
@@ -260,16 +260,17 @@ def next_generation(chromosomes, fitnesses):
 # 
 # The only par that differs form the standard implementation is the evaluation function.
 # 
-# The most important part is to specify the global index of the computation. This is the current index of the computed chromosomes. This serves as a loop function across all chromosomes.
+# The most important part is to specify the index of the computation. This is the current index of the computed chromosomes. This serves as a loop function across all chromosomes.
 
 # In[ ]:
 
 
 import numba_dpex
+from numba_dpex import kernel_api
 
 @numba_dpex.kernel
-def eval_genomes_sycl_kernel(chromosomes, fitnesses, chrom_length):
-  pos = numba_dpex.get_global_id(0)
+def eval_genomes_sycl_kernel(item: kernel_api.Item, chromosomes, fitnesses, chrom_length):
+  pos = item.get_id(0)
   num_loops = 3000
   for i in range(num_loops):
     fitnesses[pos] += chromosomes[pos*chrom_length + 1]
@@ -300,7 +301,8 @@ def eval_genomes_sycl_kernel(chromosomes, fitnesses, chrom_length):
   chromosomes_flat_dpctl = dpnp.asarray(chromosomes_flat, device="gpu")
   fitnesses_dpctl = dpnp.asarray(fitnesses, device="gpu")
 
-  eval_genomes_sycl_kernel[numba_dpex.Range(pop_size)](chromosomes_flat_dpctl, fitnesses_dpctl, chrom_size)
+  exec_range = kernel_api.Range(pop_size)
+  numba_dpex.call_kernel(eval_genomes_sycl_kernel, exec_range, chromosomes_flat_dpctl, fitnesses_dpctl, chrom_size)
   fitnesses = dpnp.asnumpy(fitnesses_dpctl)
   chromosomes = next_generation(chromosomes, fitnesses)
   fitnesses = np.zeros(pop_size, dtype=np.float32)
@@ -398,14 +400,14 @@ def eval_genomes_sycl_kernel(chromosomes, fitnesses, chrom_length):
 # 
 # The evaluate created generation we are calculating the full distance of the given path (chromosome). In this example, the lower the fitness value is, the better the chromosome. That's different from the general GA that we implemented.
 # 
-# As in this example we are also using numba-dpex, we are using a global index like before.
+# As in this example we are also using numba-dpex, we are using an index like before.
 
 # In[ ]:
 
 
 @numba_dpex.kernel
-def eval_genomes_plain_TSP_SYCL(chromosomes, fitnesses, distances, pop_length):
-  pos = numba_dpex.get_global_id(0)
+def eval_genomes_plain_TSP_SYCL(item: kernel_api.Item, chromosomes, fitnesses, distances, pop_length):
+  pos = item.get_id(1)
   for j in range(pop_length-1):
     fitnesses[pos] += distances[int(chromosomes[pos, j]), int(chromosomes[pos, j+1])]
 
@@ -526,7 +528,8 @@ def next_generation_TSP(chromosomes, fitnesses):
   chromosomes_flat_dpctl = dpnp.asarray(chromosomes, device="gpu")
   fitnesses_dpctl = dpnp.asarray(fitnesses.copy(), device="gpu")
 
-  eval_genomes_plain_TSP_SYCL[numba_dpex.Range(pop_size)](chromosomes_flat_dpctl, fitnesses_dpctl, distances_dpctl, pop_size)
+  exec_range = kernel_api.Range(pop_size)
+  numba_dpex.call_kernel(eval_genomes_plain_TSP_SYCL, exec_range, chromosomes_flat_dpctl, fitnesses_dpctl, distances_dpctl, pop_size)
   fitnesses = dpnp.asnumpy(fitnesses_dpctl)
   chromosomes = next_generation_TSP(chromosomes, fitnesses)
   fitnesses = np.zeros(pop_size, dtype=np.float32)
 
@@ -2,7 +2,7 @@
 
 The `Enable Auto-Mixed Precision for Transfer Learning with TensorFlow*` sample guides you through the process of enabling auto-mixed precision to use low-precision datatypes, like bfloat16, for transfer learning with TensorFlow* (TF).
 
-The sample demonstrates the end-to-end pipeline tasks typically performed in a deep learning use-case: training (and retraining), inference optimization, and serving the model with TensorFlow Serving.
+The sample demonstrates the tasks typically performed in a deep learning use-case: training (and retraining), and inference optimization. The sample also includes tips and boilerplate code for serving the model with TensorFlow Serving.
 
 | Area                    | Description
 |:---                     |:---
@@ -37,10 +37,6 @@ You will need to download and install the following toolkits, tools, and compone
 
   Install using PIP: `$pip install notebook`. <br> Alternatively, see [*Installing Jupyter*](https://jupyter.org/install) for detailed installation instructions.
 
-- **TensorFlow Serving**
-
-  See *TensorFlow Serving* [*Installation*](https://www.tensorflow.org/tfx/serving/setup) for detailed installation options.
-
 - **Other dependencies**
 
   Install using PIP and the `requirements.txt` file supplied with the sample: `$pip install -r requirements.txt --no-deps`. <br> The `requirements.txt` file contains the necessary dependencies to run the Notebook.
@@ -112,6 +108,70 @@ You will see diagrams comparing performance and analysis. This includes performa
 
 For performance analysis, you will see histograms showing different Tensorflow* operations in the analyzed pre-trained model pb file.
 
+## Serve the model with TensorFlow Serving
+
+### Installation
+See *TensorFlow Serving* [*Installation*](https://www.tensorflow.org/tfx/serving/setup) for detailed installation options.
+
+### Example Code
+
+Create a copy of the optimized model in a well-defined directory hierarchy with a version number "1".
+
+```
+!mkdir serving
+!cp -r models/my_optimized_model serving/1
+```
+
+```
+os.environ["MODEL_DIR"] = os.getcwd() + "/serving"
+```
+
+This is where we start running TensorFlow Serving and load our model. After it loads we can start making inference requests using REST. There are some important parameters:
+- **rest_api_port**: The port that you'll use for REST requests.
+- **model_name**: You'll use this in the URL of REST requests. It can be anything.
+- **model_base_path**: This is the path to the directory where you've saved your model.
+
+```
+%%bash --bg
+nohup tensorflow_model_server --rest_api_port=8501 --model_name=rn50 --model_base_path=${MODEL_DIR} > server.log 2>&1
+```
+
+#### Prepare the testing data for prediction
+
+```
+for image_batch, labels_batch in val_ds:
+    print(image_batch.shape)
+    print(labels_batch.shape)
+    break
+test_data, test_labels = image_batch.numpy(), labels_batch.numpy()
+```
+
+#### Make REST requests
+
+Now let's create the JSON object for a batch of three inference requests and we'll send a predict request as a POST to our server's REST endpoint, and pass it three examples.
+
+```
+import json
+import matplotlib.pyplot as plt
+
+def show(idx, title):
+    plt.figure()
+    plt.imshow(test_data[idx])
+    plt.axis('off')
+    plt.title('\n\n{}'.format(title), fontdict={'size': 16})
+
+data = json.dumps({"signature_name": "serving_default", "instances": test_data[0:3].tolist()})
+print('Data: {} ... {}'.format(data[:50], data[len(data)-52:]))
+
+headers = {"content-type": "application/json"}
+json_response = requests.post('http://localhost:8501/v1/models/rn50:predict', data=data, headers=headers)
+predictions = json.loads(json_response.text)['predictions']
+
+for i in range(0,3):
+    show(i, 'The model thought this was a {} (class {}), and it was actually a {} (class {})'.format(
+        class_names[np.argmax(predictions[i])], np.argmax(predictions[i]), class_names[test_labels[i]], test_labels[i]))
+```
+
 ## License
 
 Code samples are licensed under the MIT license. See