[Neo] Get cuda arch from cuda_target_arch rather than querying gpu (#131)

Trevor Morris · web-flow · commit 14200c39ee7a · 2020-07-22T18:37:54.000-07:00
* [Neo] Get cuda arch from cuda_target_arch rather than querying gpu

* fix lint branch

* move import
diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
@@ -20,10 +20,20 @@
 import tvm
 from tvm.te import SpecializedCondition
 from tvm.contrib import nvcc
+from tvm.autotvm.env import AutotvmGlobalScope
 from .generic import *
 from .. import op as _op
 from .... import get_global_func
 
+def get_cross_compile_compute_ver():
+    """Temporary workaround to enable cross compiling for GPU in Neo. tvm.gpu(0).compute_version
+    will encounter an error if there is no GPU present. Instead, we use compute_version from
+    set_cuda_target_arch"""
+    if AutotvmGlobalScope.current.cuda_target_arch:
+        arch = AutotvmGlobalScope.current.cuda_target_arch.split("sm_")[-1]
+        return arch[0] + "." + arch[1]
+    return tvm.gpu(0).compute_version
+
 @schedule_injective.register(["cuda", "gpu"])
 def schedule_injective_cuda(attrs, outs, target):
     """schedule injective ops for cuda"""
@@ -146,7 +156,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
                                                                              pre_flag=False)
             if judge_winograd_shape:
                 if target.target_name == "cuda" and \
-                    nvcc.have_tensorcore(tvm.gpu(0).compute_version) and \
+                    nvcc.have_tensorcore(get_cross_compile_compute_ver()) and \
                     judge_winograd_tensorcore:
                     strategy.add_implementation(
                         wrap_compute_conv2d(topi.cuda.conv2d_nhwc_winograd_tensorcore),
@@ -163,7 +173,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
                         name="conv2d_nhwc_winograd_direct.cuda",
                         plevel=5)
             if target.target_name == "cuda":
-                if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
+                if nvcc.have_tensorcore(get_cross_compile_compute_ver()):
                     if (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or \
                             (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) or \
                             (N % 32 == 0 and CI % 16 == 0 and CO % 8 == 0):
@@ -265,7 +275,7 @@ def conv2d_winograd_without_weight_transfrom_strategy_cuda(attrs, inputs, out_ty
                                                       dilation_h, dilation_w,
                                                       pre_flag=True)
         if target.target_name == "cuda" and \
-            nvcc.have_tensorcore(tvm.gpu(0).compute_version) and \
+            nvcc.have_tensorcore(get_cross_compile_compute_ver()) and \
             judge_winograd_tensorcore:
             strategy.add_implementation(
                 wrap_compute_conv2d(
@@ -363,7 +373,7 @@ def conv3d_strategy_cuda(attrs, inputs, out_type, target):
         N, _, _, _, _ = get_const_tuple(data.shape)
         _, _, _, CI, CO = get_const_tuple(kernel.shape)
         if target.target_name == "cuda":
-            if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
+            if nvcc.have_tensorcore(get_cross_compile_compute_ver()):
                 if (N % 16 == 0 and CI % 16 == 0 and CO % 16 == 0) or \
                 (N % 8 == 0 and CI % 16 == 0 and CO % 32 == 0) or \
                 (N % 32 == 0 and CI % 16 == 0 and CO % 8 == 0):
@@ -459,7 +469,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
                 name="dense_large_batch.cuda",
                 plevel=5)
         if target.target_name == "cuda":
-            if nvcc.have_tensorcore(tvm.gpu(0).compute_version):
+            if nvcc.have_tensorcore(get_cross_compile_compute_ver()):
                 if(i % 16 == 0 and b % 16 == 0 and o % 16 == 0) \
                         or (i % 16 == 0 and b % 8 == 0 and o % 32 == 0) \
                         or (i % 16 == 0 and b % 32 == 0 and o % 8 == 0):
diff --git a/tests/scripts/task_lint.sh b/tests/scripts/task_lint.sh
@@ -51,7 +51,7 @@ echo "clang-format check..."
 # check lastest change, for squash merge into master
 ./tests/lint/git-clang-format.sh HEAD~1
 # chekc against origin/master for PRs.
-./tests/lint/git-clang-format.sh origin/dev
+./tests/lint/git-clang-format.sh origin/release-1.3.0
 
 echo "Check codestyle of python code..."
 make pylint