NVIDIA · xmyqsh · Sep 18, 2020
diff --git a/Makefile b/Makefile
@@ -8,7 +8,7 @@ Q ?= @
 # CPU_ONLY := 1
 
 CXX ?= g++
-PYTHON ?= python
+PYTHON ?= python3.6
 
 EXTENSION_NAME := minkowski
 
@@ -66,17 +66,27 @@ ifneq ($(CPU_ONLY), 1)
 endif
 
 SRC_DIR := ./src
+SRC_GPU_COORDS_MAP_DIR := ./src/3rdparty/gpu_coords_map/include
+SRC_SLAB_HASH_DIR := ./src/3rdparty/gpu_coords_map/include/slab_hash
 OBJ_DIR := ./objs
 CPP_SRCS := $(wildcard $(SRC_DIR)/*.cpp)
+CPP_SRCS_GPU_COORDS_MAP := $(wildcard $(SRC_GPU_COORDS_MAP_DIR)/*.cpp)
+CPP_SRCS_SLAB_HASH:= $(wildcard $(SRC_SLAB_HASH_DIR)/*.cpp)
 CU_SRCS := $(wildcard $(SRC_DIR)/*.cu)
+CU_SRCS_GPU_COORDS_MAP := $(wildcard $(SRC_GPU_COORDS_MAP_DIR)/*.cu)
+CU_SRCS_SLAB_HASH:= $(wildcard $(SRC_SLAB_HASH_DIR)/*.cu)
 OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(CPP_SRCS))
+OBJS_GPU_COORDS_MAP := $(patsubst $(SRC_GPU_COORDS_MAP_DIR)/%.cpp,$(OBJ_DIR)/3rdparty/gpu_coords_map/include/%.o,$(CPP_SRCS_GPU_COORDS_MAP))
+OBJS_SLAB_HASH := $(patsubst $(SRC_SLAB_HASH_DIR)/%.cpp,$(OBJ_DIR)/3rdparty/gpu_coords_map/include/slab_hash/%.o,$(CPP_SRCS_SLAB_HASH))
 CU_OBJS := $(patsubst $(SRC_DIR)/%.cu,$(OBJ_DIR)/cuda/%.o,$(CU_SRCS))
+CU_OBJS_GPU_COORDS_MAP := $(patsubst $(SRC_GPU_COORDS_MAP_DIR)/%.cu,$(OBJ_DIR)/3rdparty/gpu_coords_map/include/cuda/%.o,$(CU_SRCS_GPU_COORDS_MAP))
+CU_OBJS_SLAB_HASH := $(patsubst $(SRC_SLAB_HASH_DIR)/%.cu,$(OBJ_DIR)/3rdparty/gpu_coords_map/include/slab_hash/cuda/%.o,$(CU_SRCS_SLAB_HASH))
 STATIC_LIB := $(OBJ_DIR)/lib$(EXTENSION_NAME).a
 
 # We will also explicitly add stdc++ to the link target.
 LIBRARIES := stdc++ c10 caffe2 torch torch_python _C
 ifneq ($(CPU_ONLY), 1)
-	LIBRARIES += cudart cublas cusparse caffe2_gpu c10_cuda
+	LIBRARIES += cudadevrt cudart cudadevrt cublas cudadevrt cusparse cudadevrt caffe2_gpu cudadevrt c10_cuda cudadevrt
 	CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
 			-gencode arch=compute_35,code=sm_35 \
 			-gencode=arch=compute_50,code=sm_50 \
@@ -118,6 +128,7 @@ ifeq ($(DEBUG), 1)
 	COMMON_FLAGS += -DDEBUG -g -O0
 	# https://gcoe-dresden.de/reaching-the-shore-with-a-fog-warning-my-eurohack-day-4-morning-session/
 	NVCCFLAGS := -g -G # -rdc true
+	# NVCCFLAGS := -g -G -rdc true
 else
 	COMMON_FLAGS += -DNDEBUG -O3
 endif
@@ -140,6 +151,7 @@ COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) \
 
 CXXFLAGS += -fopenmp -fPIC -fwrapv -std=c++14 $(COMMON_FLAGS) $(WARNINGS)
 NVCCFLAGS += -std=c++14 -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
+NVCCFLAGS += -rdc true
 LINKFLAGS += -pthread -fPIC $(WARNINGS) -Wl,-rpath=$(PYTHON_LIB_DIR) -Wl,--no-as-needed -Wl,--sysroot=/
 LDFLAGS += $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) \
 	   $(foreach library,$(LIBRARIES),-l$(library))
@@ -148,7 +160,7 @@ ifeq ($(CPU_ONLY), 1)
 	ALL_OBJS := $(OBJS)
 	CXXFLAGS += -DCPU_ONLY
 else
-	ALL_OBJS := $(OBJS) $(CU_OBJS)
+	ALL_OBJS := $(OBJS) $(OBJS_GPU_COORDS_MAP) $(OBJS_SLAB_HASH) $(CU_OBJS) $(CU_OBJS_GPU_COORDS_MAP) $(CU_OBJS_SLAB_HASH)
 endif
 
 all: $(STATIC_LIB)
@@ -157,8 +169,19 @@ all: $(STATIC_LIB)
 $(OBJ_DIR):
 	@ mkdir -p $@
 	@ mkdir -p $@/cuda
+	@ mkdir -p $@/3rdparty/gpu_coords_map/include/cuda
+	@ mkdir -p $@/3rdparty/gpu_coords_map/include/slab_hash/cuda
 
 $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp | $(OBJ_DIR)
+	@ echo CXX $<
+	@ echo $(CXXFLAGS)
+	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@
+
+$(OBJ_DIR)/3rdparty/gpu_coords_map/include/%.o: $(SRC_GPU_COORDS_MAP_DIR)/%.cpp | $(OBJ_DIR)
+	@ echo CXX $<
+	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@
+
+$(OBJ_DIR)/3rdparty/gpu_coords_map/include/slab_hash/%.o: $(SRC_SLAB_HASH_DIR)/%.cpp | $(OBJ_DIR)
 	@ echo CXX $<
 	$(Q)$(CXX) $< $(CXXFLAGS) -c -o $@
 
@@ -168,8 +191,23 @@ $(OBJ_DIR)/cuda/%.o: $(SRC_DIR)/%.cu | $(OBJ_DIR)
 		-odir $(@D)
 	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
 
+$(OBJ_DIR)/3rdparty/gpu_coords_map/include/cuda/%.o: $(SRC_GPU_COORDS_MAP_DIR)/%.cu | $(OBJ_DIR)
+	@ echo NVCC $<
+	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} \
+		-odir $(@D)
+	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
+
+$(OBJ_DIR)/3rdparty/gpu_coords_map/include/slab_hash/cuda/%.o: $(SRC_SLAB_HASH_DIR)/%.cu| $(OBJ_DIR)
+	@ echo NVCC $<
+	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} \
+		-odir $(@D)
+	$(Q)$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
+
 $(STATIC_LIB): $(ALL_OBJS) | $(OBJ_DIR)
 	$(RM) -f $(STATIC_LIB)
+	@ echo $(LINKFLAGS)
+	@ echo $(LDFLAGS)
+	@ echo $(CXXFLAGS)
 	@ echo LD -o $@
 	ar rc $(STATIC_LIB) $(ALL_OBJS)
 

diff --git a/MinkowskiEngine/MinkowskiCoords.py b/MinkowskiEngine/MinkowskiCoords.py
@@ -35,7 +35,8 @@
 if 'OMP_NUM_THREADS' in os.environ:
     CPU_COUNT = int(os.environ['OMP_NUM_THREADS'])
 
-_memory_manager_backend = MemoryManagerBackend.PYTORCH
+#_memory_manager_backend = MemoryManagerBackend.PYTORCH
+_memory_manager_backend = MemoryManagerBackend.CUDA
 
 
 def set_memory_manager_backend(backend: MemoryManagerBackend):
@@ -102,7 +103,8 @@ class CoordsManager():
     def __init__(self,
                  num_threads: int = -1,
                  memory_manager_backend: MemoryManagerBackend = None,
-                 D: int = -1):
+                 D: int = -1,
+                 device: str = 'cuda'):
         if D < 1:
             raise ValueError(f"Invalid dimension {D}")
         self.D = D
@@ -111,7 +113,9 @@ def __init__(self,
         if memory_manager_backend is None:
             global _memory_manager_backend
             memory_manager_backend = _memory_manager_backend
-        coords_man = MEB.CoordsManager(num_threads, memory_manager_backend)
+        coords_man = MEB.CoordsManager(num_threads, memory_manager_backend) \
+                     if device == 'cpu' else \
+                     MEB.GPUCoordsManager(D, 0, memory_manager_backend)
         self.CPPCoordsManager = coords_man
 
     def initialize(self,
@@ -120,14 +124,15 @@ def initialize(self,
                    force_creation: bool = False,
                    force_remap: bool = False,
                    allow_duplicate_coords: bool = False,
-                   return_inverse: bool = False) -> torch.LongTensor:
+                   return_inverse: bool = False) -> torch.IntTensor:
         assert isinstance(coords_key, CoordsKey)
-        unique_index = torch.LongTensor()
-        inverse_mapping = torch.LongTensor()
+        # TODO(ljm): Adjust cpu interface from long to int acoordingly
+        unique_index = torch.IntTensor()
+        inverse_mapping = torch.IntTensor()
         self.CPPCoordsManager.initializeCoords(
             coords, unique_index, inverse_mapping, coords_key.CPPCoordsKey,
             force_creation, force_remap, allow_duplicate_coords, return_inverse)
-        return unique_index, inverse_mapping
+        return unique_index.long(), inverse_mapping.long()
 
     def create_coords_key(self,
                           coords: torch.IntTensor,
@@ -171,6 +176,9 @@ def stride(self,
     def reduce(self):
         origin_key = CoordsKey(self.D)
         origin_key.setTensorStride(convert_to_int_list(0, self.D))
+        # TODO(ljm): Get batch_size by createOriginCoords
+        # TODO(ljm): Find a better way to get batch_size
+        # Notice(ljm): It can be concluded that the batch indices are contigous by GetCoordsAt
         origin_key.setKey(self.CPPCoordsManager.createOriginCoords(self.D))
         return origin_key
 
@@ -322,6 +330,9 @@ def get_kernel_map(self,
             is_transpose,
             is_pool)
 
+        kernel_map[0] = kernel_map[0].long()
+        kernel_map[1] = kernel_map[1].long()
+
         return kernel_map
 
     def get_coords_map(self, in_key_or_tensor_strides,

diff --git a/MinkowskiEngine/SparseTensor.py b/MinkowskiEngine/SparseTensor.py
@@ -229,6 +229,7 @@ def __init__(
             of the current sparse tensor. By default, it is 1.
 
         """
+        print(coords)
         assert isinstance(feats,
                           torch.Tensor), "Features must be a torch.Tensor"
         assert feats.ndim == 2, f"The feature should be a matrix, The input feature is an order-{feats.ndim} tensor."
@@ -254,12 +255,14 @@ def __init__(
             assert isinstance(coords, torch.Tensor), \
                 "Coordinate must be of type torch.Tensor"
 
+            print(isinstance(coords, torch.IntTensor))
             if not isinstance(coords, torch.IntTensor):
                 warnings.warn(
                     'Coords implicitly converted to torch.IntTensor. ' +
                     'To remove this warning, use `.int()` to convert the ' +
                     'coords into an torch.IntTensor')
-                coords = torch.floor(coords).int()
+                print(isinstance(coords, torch.IntTensor))
+#                coords = torch.floor(coords).int()
 
             if coords.device.type != 'cpu':
                 warnings.warn(
@@ -283,7 +286,9 @@ def __init__(
                 if _global_coords_man is None:
                     _global_coords_man = CoordsManager(
                         memory_manager_backend=memory_manager_backend,
-                        D=coords.size(1) - 1)
+                        D=coords.size(1) - 1,
+                        device=coords.device.type if coords is not None else 'cuda')
+                        #  TODO(ljm): handle device when coords is None
                 coords_manager = _global_coords_man
             else:
                 assert coords is not None, "Initial coordinates must be given"