emacski
diff --git a/Diff for: ‎.bazelrc
+5-5 b/Diff for: ‎.bazelrc
+5-5
diff --git a/Diff for: ‎BUILD.local_crosslib
+6 b/Diff for: ‎BUILD.local_crosslib
+6
diff --git a/Diff for: ‎README.md
+8-8 b/Diff for: ‎README.md
+8-8
diff --git a/Diff for: ‎WORKSPACE
+19-18 b/Diff for: ‎WORKSPACE
+19-18
diff --git a/Diff for: ‎tensorflow_model_server/BUILD
+20-1 b/Diff for: ‎tensorflow_model_server/BUILD
+20-1
diff --git a/Diff for: ‎tensorflow_model_server/main.cc
+47-2 b/Diff for: ‎tensorflow_model_server/main.cc
+47-2
@@ -1,5 +1,5 @@
 # upstream tensorflow/serving version
-build --embed_label=2.5.0
+build --embed_label=2.5.1
 
 common --experimental_repo_remote_exec
 
@@ -30,7 +30,7 @@ import %workspace%/third_party/tensorflow/.bazelrc
 
 # linux_amd64 base config group. use this config when cross-building
 # for custom amd64 targets (i.e. ones not defined here)
-build:linux_amd64 --platforms=@com_github_emacski_bazeltools//platform:linux_amd64
+build:linux_amd64 --platforms=@com_github_emacski_bazeltools//toolchain:linux_amd64
 
 build:linux_amd64_avx_sse4.2 --config=linux_amd64
 build:linux_amd64_avx_sse4.2 --copt=-mavx
@@ -51,8 +51,8 @@ build:common_arm --copt=-fomit-frame-pointer
 # linux_arm64 base config group. use this config when cross-building
 # for custom 64 bit arm targets (i.e. ones not defined here)
 build:linux_arm64 --config=common_arm
+build:linux_arm64 --platforms=@com_github_emacski_bazeltools//toolchain:linux_arm64
 build:linux_arm64 --cpu=aarch64
-build:linux_arm64 --platforms=@com_github_emacski_bazeltools//platform:linux_arm64
 build:linux_arm64 --copt=-march=armv8-a
 
 build:linux_arm64_armv8-a --config=linux_arm64
@@ -67,8 +67,8 @@ build:linux_arm64_armv8.2-a --define=tag_suffix=linux_arm64_armv8.2-a
 # linux_arm base config group. use this config when cross-building for
 # custom 32 bit arm targets (i.e. ones not defined here)
 build:linux_arm --config=common_arm
-build:linux_arm --cpu=arm
-build:linux_arm --platforms=@com_github_emacski_bazeltools//platform:linux_arm
+build:linux_arm --platforms=@com_github_emacski_bazeltools//toolchain:linux_arm
+build:linux_arm --cpu=armhf
 build:linux_arm --copt=-march=armv7-a
 
 build:linux_arm_armv7-a_neon_vfpv4 --config=linux_arm
 
@@ -0,0 +1,6 @@
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "libomp",
+    srcs = ["libomp.so"],
+)
@@ -37,7 +37,7 @@ On many consumer / developer 64-bit and 32-bit arm platforms you can simply:
 ```sh
 docker pull emacski/tensorflow-serving:latest
 # or
-docker pull emacski/tensorflow-serving:2.4.1
+docker pull emacski/tensorflow-serving:2.5.1
 ```
 
 Refer to [TensorFlow Serving with Docker](https://www.tensorflow.org/tfx/serving/docker)
@@ -58,7 +58,7 @@ for configuration and setting up a model for serving.
 Example
 ```bash
 # on beaglebone black
-docker pull emacski/tensorflow-serving:2.4.1-linux_arm_armv7-a_neon_vfpv3
+docker pull emacski/tensorflow-serving:2.5.1-linux_arm_armv7-a_neon_vfpv3
 ```
 
 ### Aliases
@@ -77,7 +77,7 @@ docker pull emacski/tensorflow-serving:2.4.1-linux_arm_armv7-a_neon_vfpv3
 Examples
 ```bash
 # on Raspberry PI 3 B+
-docker pull emacski/tensorflow-serving:2.4.1-linux_arm64
+docker pull emacski/tensorflow-serving:2.5.1-linux_arm64
 # or
 docker pull emacski/tensorflow-serving:latest-linux_arm64
 ```
@@ -113,9 +113,9 @@ docker pull emacski/tensorflow-serving:latest
 Example
 ```sh
 # on Raspberry PI 3 B+
-docker pull emacski/tensorflow-serving:2.4.1
-# the actual image used is emacski/tensorflow-serving:2.4.1-linux_arm64
-# itself actually being emacski/tensorflow-serving:2.4.1-linux_arm64_armv8-a
+docker pull emacski/tensorflow-serving:2.5.1
+# the actual image used is emacski/tensorflow-serving:2.5.1-linux_arm64
+# itself actually being emacski/tensorflow-serving:2.5.1-linux_arm64_armv8-a
 ```
 
 ### Debug Images
@@ -130,9 +130,9 @@ suffix (if one is required) in the image tag.
 
 ```sh
 # multi-arch
-docker pull emacski/tensorflow-serving:2.4.1-debug
+docker pull emacski/tensorflow-serving:2.5.1-debug
 # specific image
-docker pull emacski/tensorflow-serving:2.4.1-debug-linux_arm64_armv8-a
+docker pull emacski/tensorflow-serving:2.5.1-debug-linux_arm64_armv8-a
 # specific alias
 docker pull emacski/tensorflow-serving:latest-debug-linux_arm64
 ```
 
@@ -18,9 +18,9 @@ load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_file"
 
 http_archive(
     name = "com_github_emacski_bazeltools",
-    sha256 = "36c3fb806547b202c98c137a41d2bb2aebf3a52dfc8dedc7d972c1731368e7c7",
-    strip_prefix = "bazel-tools-7d90c92c3b361b0345451425d85d58a25de80ad9",
-    urls = ["https://github.com/emacski/bazel-tools/archive/7d90c92c3b361b0345451425d85d58a25de80ad9.tar.gz"],
+    sha256 = "dba9e8f0613401ed3c052d6fe79b3517197a7747046659845309fb17e9b3038d",
+    strip_prefix = "bazel-tools-17a0d8b9ae66bc542853a72365ef1aeb85086827",
+    urls = ["https://github.com/emacski/bazel-tools/archive/17a0d8b9ae66bc542853a72365ef1aeb85086827.tar.gz"],
 )
 
 load(
@@ -71,17 +71,18 @@ http_archive(
         # arm (32-bit) datatype sizes
         "//third_party/tensorflow:curl.patch",
         "//third_party/tensorflow:hwloc.patch",
-        # might be using host's cpu
+        # set platform cpu constraint on aarch64 config_setting
         "//third_party/tensorflow:mkl.patch",
-        # remove explicit dep on libgomp
+        # remove explicit linker dep on libgomp
         "//third_party/tensorflow:mkl_dnn.patch",
         # allow android cpu helper to be used for linux_arm and linux_arm64
         "//third_party/tensorflow:tensorflow.patch",
     ],
-    sha256 = "68437339e0d5854d28157ba77a4ae30954f35d08899675e3bb6da9824fbb904a",
-    strip_prefix = "tensorflow-0d1805aede03d25aa9d49adcef6903535fa5ad14",
+    patch_args = ["-p1"],
+    sha256 = "cb99f136dc5c89143669888a44bfdd134c086e1e2d9e36278c1eb0f03fe62d76",
+    strip_prefix = "tensorflow-a4dfb8d1a71385bd6d122e4f27f86dcebb96712d",
     urls = [
-        "https://github.com/tensorflow/tensorflow/archive/0d1805aede03d25aa9d49adcef6903535fa5ad14.tar.gz",
+        "https://github.com/tensorflow/tensorflow/archive/a4dfb8d1a71385bd6d122e4f27f86dcebb96712d.tar.gz",
     ],
 )
 
@@ -113,13 +114,13 @@ http_archive(
     ],
 )
 
-# tensorflow serving 2.5.0
+# tensorflow serving 2.5.1
 http_archive(
     name = "tf_serving",
-    sha256 = "eec408b6950c4d4d06d148ceb1567eaac0c28b9c38fbc2328fe96d07fec3e3d8",
-    strip_prefix = "serving-bba3972185e47376a63d801ffcd2831684db114a",
+    sha256 = "e94726632a0637a460b07e5e1c6c5a30c9f776dda22b5e9132b9ef526fbefa7e",
+    strip_prefix = "serving-da76fc74b48f6afe190125f9ed572e690cef8570",
     urls = [
-        "https://github.com/tensorflow/serving/archive/bba3972185e47376a63d801ffcd2831684db114a.tar.gz",
+        "https://github.com/tensorflow/serving/archive/da76fc74b48f6afe190125f9ed572e690cef8570.tar.gz",
     ],
 )
 
@@ -243,15 +244,15 @@ load("@io_bazel_rules_docker//container:container.bzl", "container_pull")
 
 container_pull(
     name = "discolix_cc_linux_amd64",
-    digest = "sha256:f888804401ae34244ae3dfe2286a004fea446848fcc9dbd9c19632910e1385f9",
+    digest = "sha256:67268fbe9ca7e6ee25774dd3c5359a81eec285362ef0932f1eeed0e0bf32e0fd",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "latest-linux_amd64",
 )
 
 container_pull(
     name = "discolix_cc_linux_amd64_debug",
-    digest = "sha256:0fe8c9f1e0131dbcbc139263e92e4772c46e7161cd2392cd95bb7fd85e3ead97",
+    digest = "sha256:b02ee67449f57d7b5eeefa611986151bbcf8bb0ff72b05f7b426848455e27ba6",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "debug-linux_amd64",
@@ -261,15 +262,15 @@ container_pull(
 
 container_pull(
     name = "discolix_cc_linux_arm64",
-    digest = "sha256:7eabd63fa0da92ea3cb04ed0fda7fdca8ea07fec8797fd692ae6f8d1419ca835",
+    digest = "sha256:dec982052e6f6c1d28f1c1304d2a93b79a2169d8a3273e6029f1033c95be6f31",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "latest-linux_arm64",
 )
 
 container_pull(
     name = "discolix_cc_linux_arm64_debug",
-    digest = "sha256:2b0332cad7d88fd5d2a2fb1069914557673aef9a86caaa04930760a287a2ca81",
+    digest = "sha256:c0a4668fc09e174d321f6d0d81e00cf1ee0515d7c220efdcf3b030a92ec98e1b",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "debug-linux_arm64",
@@ -279,15 +280,15 @@ container_pull(
 
 container_pull(
     name = "discolix_cc_linux_arm",
-    digest = "sha256:dbee6bebc8a85afbe0d3ce923e0296bf0fa5edd45a43bcc487a36d6868acf131",
+    digest = "sha256:9834a18225f8d9f9651a2144867be78da77c517e1334f2c3c00619d3c62299cc",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "latest-linux_arm",
 )
 
 container_pull(
     name = "discolix_cc_linux_arm_debug",
-    digest = "sha256:f0dd6de42d29eb6f0436dc8345cb753099fc425387c9e361c954eb9365a4c89a",
+    digest = "sha256:f1698d4d4d68b4c0c0a7d88a8c2b3abf55c4512ddc7212804d5c5760edb7918f",
     registry = "index.docker.io",
     repository = "discolix/cc",
     tag = "debug-linux_arm",
 
@@ -27,6 +27,8 @@ load("@debian_buster_armhf//debs:deb_packages.bzl", "debian_buster_armhf")
 load("@debian_buster_arm64//debs:deb_packages.bzl", "debian_buster_arm64")
 load("@debian_buster_amd64//debs:deb_packages.bzl", "debian_buster_amd64")
 
+package(default_visibility = ["//visibility:public"])
+
 cc_library(
     name = "main_lib",
     srcs = [
@@ -35,6 +37,7 @@ cc_library(
     linkstamp = "stamp.cc",
     deps = [
         "@org_tensorflow//tensorflow/c:c_api",
+        "@org_tensorflow//tensorflow/compiler/jit:xla_cpu_jit",
         "@org_tensorflow//tensorflow/core:lib",
         "@org_tensorflow//tensorflow/core/platform/cloud:gcs_file_system",
         "@org_tensorflow//tensorflow/core/platform/hadoop:hadoop_file_system",
@@ -90,6 +93,18 @@ container_layer(
     }),
 )
 
+container_layer(
+    name = "libomp_runtime",
+    directory = select({
+        ":linux_arm64": "/usr/lib/aarch64-linux-gnu",
+        ":linux_amd64": "/usr/lib/x86_64-linux-gnu",
+    }),
+    files = select({
+        ":linux_arm64": ["@local_crosslib_arm64//:libomp"],
+        ":linux_amd64": ["@local_crosslib_amd64//:libomp"],
+    })
+)
+
 [container_image(
     name = "custom_image" + suffix,
     architecture = select({
@@ -112,7 +127,11 @@ container_layer(
         ":tensorflow_model_server",
         ":tf_serving_entrypoint.sh",
     ],
-    layers = ["dash"],
+    layers = ["dash"] + select({
+        ":linux_arm64": ["libomp_runtime"],
+        ":linux_amd64": ["libomp_runtime"],
+        "//conditions:default": [],
+    }),
     ports = [
         "8500",
         "8501",
 
@@ -49,6 +49,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/c/c_api.h"
+#include "tensorflow/compiler/jit/flags.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/init_main.h"
 #include "tensorflow/core/util/command_line_flags.h"
@@ -60,9 +61,11 @@ extern const char cBUILD_SCM_REV_STAMP[];
 int main(int argc, char** argv) {
   tensorflow::serving::main::Server::Options options;
   bool display_version = false;
+  bool xla_cpu_compilation_enabled = false;
   std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag("port", &options.grpc_port,
-                       "Port to listen on for gRPC API"),
+                       "TCP port to listen on for gRPC/HTTP API. Disabled if "
+                       "port set to zero."),
       tensorflow::Flag("grpc_socket_path", &options.grpc_socket_path,
                        "If non-empty, listen to a UNIX socket for gRPC API "
                        "on the given path. Can be either relative or absolute "
@@ -76,6 +79,9 @@ int main(int argc, char** argv) {
                        "set, will be auto set based on number of CPUs."),
       tensorflow::Flag("rest_api_timeout_in_ms", &options.http_timeout_in_ms,
                        "Timeout for HTTP/REST API calls."),
+      tensorflow::Flag("rest_api_enable_cors_support",
+                       &options.enable_cors_support,
+                       "Enable CORS headers in response"),
       tensorflow::Flag("enable_batching", &options.enable_batching,
                        "enable batching"),
       tensorflow::Flag(
@@ -108,6 +114,18 @@ int main(int argc, char** argv) {
       tensorflow::Flag("model_base_path", &options.model_base_path,
                        "path to export (ignored if --model_config_file flag "
                        "is set, otherwise required)"),
+      tensorflow::Flag("num_load_threads", &options.num_load_threads,
+                       "The number of threads in the thread-pool used to load "
+                       "servables. If set as 0, we don't use a thread-pool, "
+                       "and servable loads are performed serially in the "
+                       "manager's main work loop, may casue the Serving "
+                       "request to be delayed. Default: 0"),
+      tensorflow::Flag("num_unload_threads", &options.num_unload_threads,
+                       "The number of threads in the thread-pool used to "
+                       "unload servables. If set as 0, we don't use a "
+                       "thread-pool, and servable loads are performed serially "
+                       "in the manager's main work loop, may casue the Serving "
+                       "request to be delayed. Default: 0"),
       tensorflow::Flag("max_num_load_retries", &options.max_num_load_retries,
                        "maximum number of times it retries loading a model "
                        "after the first failure, before giving up. "
@@ -178,6 +196,8 @@ int main(int argc, char** argv) {
                        "A comma separated list of arguments to be passed to "
                        "the grpc server. (e.g. "
                        "grpc.max_connection_age_ms=2000)"),
+      tensorflow::Flag("grpc_max_threads", &options.grpc_max_threads,
+                       "Max grpc server threads to handle grpc messages."),
       tensorflow::Flag("enable_model_warmup", &options.enable_model_warmup,
                        "Enables model warmup, which triggers lazy "
                        "initializations (such as TF optimizations) at load "
@@ -199,11 +219,32 @@ int main(int argc, char** argv) {
                        "model from `saved_model.pb` file. "
                        "If no TensorFlow Lite model found, fallback to "
                        "TensorFlow model."),
+      tensorflow::Flag(
+          "num_tflite_pools", &options.num_tflite_pools,
+          "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
+          "in an interpreter pool of TfLiteSession. Typically there is one "
+          "TfLiteSession for each TF Lite model that is loaded. If not "
+          "set, will be auto set based on number of CPUs."),
+      tensorflow::Flag(
+          "num_tflite_interpreters_per_pool",
+          &options.num_tflite_interpreters_per_pool,
+          "EXPERIMENTAL; CAN BE REMOVED ANYTIME! Number of TFLite interpreters "
+          "in an interpreter pool of TfLiteSession. Typically there is one "
+          "TfLiteSession for each TF Lite model that is loaded. If not "
+          "set, will be 1."),
       tensorflow::Flag(
           "enable_signature_method_name_check",
           &options.enable_signature_method_name_check,
           "Enable method_name check for SignatureDef. Disable this if serving "
-          "native TF2 regression/classification models.")};
+          "native TF2 regression/classification models."),
+      tensorflow::Flag(
+          "xla_cpu_compilation_enabled", &xla_cpu_compilation_enabled,
+          "EXPERIMENTAL; CAN BE REMOVED ANYTIME! "
+          "Enable XLA:CPU JIT (default is disabled). With XLA:CPU JIT "
+          "disabled, models utilizing this feature will return bad Status "
+          "on first compilation request."),
+      tensorflow::Flag("enable_profiler", &options.enable_profiler,
+                       "Enable profiler service.")};
 
   const auto& usage = tensorflow::Flags::Usage(argv[0], flag_list);
   if (!tensorflow::Flags::Parse(&argc, argv, flag_list)) {
@@ -223,6 +264,10 @@ int main(int argc, char** argv) {
     std::cout << "unknown argument: " << argv[1] << "\n" << usage;
   }
 
+  if (!xla_cpu_compilation_enabled) {
+    tensorflow::DisableXlaCompilation();
+  }
+
   tensorflow::serving::main::Server server;
   const auto& status = server.BuildAndStart(options);
   if (!status.ok()) {