diff --git a/README.md b/README.md
index aa8760d5..2a560381 100644
--- a/README.md
+++ b/README.md
@@ -222,6 +222,18 @@ python tools/train.py \
     --cfg experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml \
 ```
 
+### Inference on Windows
+
+Code was not evaluated on Windows. Inference is supported. Please refer to ${POSE_ROOT}/demo for example scripts.
+
+```
+
+// Build Libs
+cd ${POSE_ROOT}/lib
+bulid_windows.bat
+
+```
+
 ### Visualization
 
 #### Visualizing predictions on COCO val
diff --git a/lib/build_windows.bat b/lib/build_windows.bat
new file mode 100644
index 00000000..81f9e205
--- /dev/null
+++ b/lib/build_windows.bat
@@ -0,0 +1,3 @@
+cd nms
+python setup_windows.py clean build_ext --inplace
+cd ../../
diff --git a/lib/nms/gpu_nms.hpp b/lib/nms/gpu_nms.hpp
index 68b6d42c..9cebb5e3 100644
--- a/lib/nms/gpu_nms.hpp
+++ b/lib/nms/gpu_nms.hpp
@@ -1,2 +1,9 @@
-void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+#if defined(__linux__)
+  #define NMS_TYPE int
+#endif
+#if defined(_WIN64)
+  #define NMS_TYPE long
+#endif
+
+void _nms(NMS_TYPE* keep_out, int* num_out, const float* boxes_host, int boxes_num,
           int boxes_dim, float nms_overlap_thresh, int device_id);
diff --git a/lib/nms/nms_kernel.cu b/lib/nms/nms_kernel.cu
index f6176c6d..0b503c7b 100644
--- a/lib/nms/nms_kernel.cu
+++ b/lib/nms/nms_kernel.cu
@@ -8,6 +8,13 @@
 #include <vector>
 #include <iostream>
 
+#if defined(__linux__)
+  #define NMS_TYPE int
+#endif
+#if defined(_WIN64)
+  #define NMS_TYPE long
+#endif
+
 #define CUDA_CHECK(condition) \
   /* Code block avoids redefinition of cudaError_t error */ \
   do { \
@@ -87,7 +94,7 @@ void _set_device(int device_id) {
   CUDA_CHECK(cudaSetDevice(device_id));
 }
 
-void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+void _nms(NMS_TYPE* keep_out, int* num_out, const float* boxes_host, int boxes_num,
           int boxes_dim, float nms_overlap_thresh, int device_id) {
   _set_device(device_id);
 
diff --git a/lib/nms/setup_windows.py b/lib/nms/setup_windows.py
new file mode 100644
index 00000000..31df4b5f
--- /dev/null
+++ b/lib/nms/setup_windows.py
@@ -0,0 +1,116 @@
+# ------------------------------------------------------------------
+# Copyright (c) Nvidia
+# Licensed under BSD 3-Clause "New" or "Revised" License
+# Modified from Apex (https://github.com/NVIDIA/apex/)
+# ------------------------------------------------------------------
+
+import torch
+from setuptools import setup, find_packages
+import subprocess
+from distutils.extension import Extension
+
+import sys
+import warnings
+import os
+import numpy as np
+
+# ninja build does not work unless include_dirs are abs path
+this_dir = os.path.dirname(os.path.abspath(__file__))
+
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+
+if not torch.cuda.is_available():
+    # https://github.com/NVIDIA/apex/issues/486
+    # Extension builds after https://github.com/pytorch/pytorch/pull/23408 attempt to query torch.cuda.get_device_capability(),
+    # which will fail if you are compiling in an environment without visible GPUs (e.g. during an nvidia-docker build command).
+    print('\nWarning: Torch did not find available GPUs on this system.\n',
+          'If your intention is to cross-compile, this is not an error.\n'
+          'Volta (compute capability 7.0), and Turing (compute capability 7.5).\n'
+          'If you wish to cross-compile for a single specific architecture,\n'
+          'export TORCH_CUDA_ARCH_LIST="compute capability" before running setup.py.\n')
+    if os.environ.get("TORCH_CUDA_ARCH_LIST", None) is None:
+        os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5"
+
+print("torch.__version__  = ", torch.__version__)
+TORCH_MAJOR = int(torch.__version__.split('.')[0])
+TORCH_MINOR = int(torch.__version__.split('.')[1])
+
+cmdclass = {}
+ext_modules = []
+extras = {}
+
+from torch.utils.cpp_extension import BuildExtension
+from torch.utils.cpp_extension import CUDAExtension
+cmdclass['build_ext'] = BuildExtension
+
+def check_cuda_torch_binary_vs_bare_metal(cuda_dir):
+    raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True)
+    output = raw_output.split()
+    release_idx = output.index("release") + 1
+    release = output[release_idx].split(".")
+    bare_metal_major = release[0]
+    bare_metal_minor = release[1][0]
+    torch_binary_major = torch.version.cuda.split(".")[0]
+    torch_binary_minor = torch.version.cuda.split(".")[1]
+
+    print("\nCompiling cuda extensions with")
+    print(raw_output + "from " + cuda_dir + "/bin\n")
+
+    if (bare_metal_major != torch_binary_major) or (bare_metal_minor != torch_binary_minor):
+        raise RuntimeError("Cuda extensions are being compiled with a version of Cuda that does " +
+                           "not match the version used to compile Pytorch binaries.  " +
+                           "Pytorch binaries were compiled with Cuda {}.\n".format(torch.version.cuda) +
+                           "In some cases, a minor-version mismatch will not cause later errors")
+
+# Set up macros for forward/backward compatibility hack around
+# https://github.com/pytorch/pytorch/commit/4404762d7dd955383acee92e6f06b48144a0742e
+# and
+# https://github.com/NVIDIA/apex/issues/456
+# https://github.com/pytorch/pytorch/commit/eb7b39e02f7d75c26d8a795ea8c7fd911334da7e#diff-4632522f237f1e4e728cb824300403ac
+version_ge_1_1 = []
+if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 0):
+    version_ge_1_1 = ['-DVERSION_GE_1_1']
+version_ge_1_3 = []
+if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
+    version_ge_1_3 = ['-DVERSION_GE_1_3']
+version_ge_1_5 = []
+if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 4):
+    version_ge_1_5 = ['-DVERSION_GE_1_5']
+version_dependent_macros = version_ge_1_1 + version_ge_1_3 + version_ge_1_5
+
+check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)
+
+ext_modules.append(
+    Extension(
+        "cpu_nms",
+        ["cpu_nms.pyx"],
+        extra_compile_args={'cxx': ['/MD']},
+        include_dirs = [numpy_include]
+    ),
+)
+
+ext_modules.append(
+    CUDAExtension(name='gpu_nms',
+                    sources=['nms_kernel.cu', 'gpu_nms.pyx'],
+                    include_dirs = [numpy_include],
+                    extra_compile_args={'cxx': ['-O3',] + version_dependent_macros,
+                                        'nvcc':['-O3',
+                                                '-gencode', 'arch=compute_70,code=sm_70',
+                                                '-U__CUDA_NO_HALF_OPERATORS__',
+                                                '-U__CUDA_NO_HALF_CONVERSIONS__',
+                                                '--expt-relaxed-constexpr',
+                                                '--expt-extended-lambda',
+                                                '--use_fast_math'] + version_dependent_macros}))
+
+setup(
+    name='nms',
+    version='0.1',
+    description='',
+    ext_modules=ext_modules,
+    cmdclass=cmdclass,
+    extras_require=extras,
+)