Skip to content

Commit ff463b7

Browse files
authored
Merge pull request #168 from pierotofy/linalg
Refactor linalg calls
2 parents 5609b2b + ea5227c commit ff463b7

File tree

9 files changed

+39
-85
lines changed

9 files changed

+39
-85
lines changed
Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,12 @@
11
#!/bin/bash
22

3-
# Took from https://github.com/pyg-team/pyg-lib/
3+
# $1 = cu124 --> 12.4
4+
VER="${1:2:2}.${1:4:1}"
45

5-
case ${1} in
6-
cu121)
7-
export CUDA_HOME=/usr/local/cuda-12.1
8-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
9-
export PATH=${CUDA_HOME}/bin:${PATH}
10-
;;
11-
cu118)
12-
export CUDA_HOME=/usr/local/cuda-11.8
13-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
14-
export PATH=${CUDA_HOME}/bin:${PATH}
15-
;;
16-
cu117)
17-
export CUDA_HOME=/usr/local/cuda-11.7
18-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
19-
export PATH=${CUDA_HOME}/bin:${PATH}
20-
;;
21-
cu116)
22-
export CUDA_HOME=/usr/local/cuda-11.6
23-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
24-
export PATH=${CUDA_HOME}/bin:${PATH}
25-
;;
26-
cu115)
27-
export CUDA_HOME=/usr/local/cuda-11.5
28-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
29-
export PATH=${CUDA_HOME}/bin:${PATH}
30-
;;
31-
cu113)
32-
export CUDA_HOME=/usr/local/cuda-11.3
33-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
34-
export PATH=${CUDA_HOME}/bin:${PATH}
35-
;;
36-
cu102)
37-
export CUDA_HOME=/usr/local/cuda-10.2
38-
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
39-
export PATH=${CUDA_HOME}/bin:${PATH}
40-
;;
41-
*)
42-
;;
43-
esac
6+
export CUDA_HOME=/usr/local/cuda-${VER}
7+
export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
8+
export PATH=${CUDA_HOME}/bin:${PATH}
9+
10+
echo "CUDA_HOME: ${CUDA_HOME}"
11+
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}"
12+
echo "PATH: ${PATH}"

.github/workflows/cuda/Linux.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ CUDA_VER_ID="${CUDA_VER_ARR[0]}_${CUDA_VER_ARR[1]}"
1212
CUDA_VER_SHORT="cu${CUDA_VER_ARR[0]}${CUDA_VER_ARR[1]}"
1313

1414
case ${CUDA_VER_SHORT} in
15+
cu124)
16+
CUDA=12.4
17+
APT_KEY=${OS}-${CUDA/./-}-local
18+
FILENAME=cuda-repo-${APT_KEY}_${CUDA}.1-550.54.15-1_amd64.deb
19+
URL=https://developer.download.nvidia.com/compute/cuda/${CUDA}.1/local_installers
20+
;;
1521
cu121)
1622
CUDA=12.1
1723
APT_KEY=${OS}-${CUDA/./-}-local
@@ -65,7 +71,7 @@ sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600
6571
wget -nv ${URL}/${FILENAME}
6672
sudo dpkg -i ${FILENAME}
6773

68-
if [ "${CUDA_VER_SHORT}" = "cu117" ] || [ "${CUDA_VER_SHORT}" = "cu118" ] || [ "${CUDA_VER_SHORT}" = "cu121" ]; then
74+
if [ "${CUDA_VER_SHORT}" = "cu124" ] || [ "${CUDA_VER_SHORT}" = "cu117" ] || [ "${CUDA_VER_SHORT}" = "cu118" ] || [ "${CUDA_VER_SHORT}" = "cu121" ]; then
6975
sudo cp /var/cuda-repo-${APT_KEY}/cuda-*-keyring.gpg /usr/share/keyrings/
7076
else
7177
sudo apt-key add /var/cuda-repo-${APT_KEY}/7fa2af80.pub

.github/workflows/rocm6.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ on:
1111

1212
jobs:
1313
build:
14-
name: ${{ matrix.os }}-cuda-${{ matrix.cuda-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}
14+
name: ${{ matrix.os }}-rocm-${{ matrix.rocm-version }}-torch-${{ matrix.torch-version }}-${{ matrix.cmake-build-type }}
1515
runs-on: ${{ matrix.os }}
1616
strategy:
1717
fail-fast: false

.github/workflows/ubuntu.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,15 @@ jobs:
1919
os: [ubuntu-22.04, ubuntu-20.04] # [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04]
2020
arch: [x64] # [x64, x86]
2121
torch-version: [2.6.0, 2.3.1, 2.2.1, 2.1.2] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1]
22-
cuda-version: [11.8.0, 12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu]
22+
cuda-version: [11.8.0, 12.4.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu]
2323
cmake-build-type: [Release] # [Debug, ClangTidy]
24+
exclude:
25+
- cuda-version: 12.4.1
26+
torch-version: 2.3.1
27+
- cuda-version: 12.4.1
28+
torch-version: 2.2.1
29+
- cuda-version: 12.4.1
30+
torch-version: 2.1.2
2431
env:
2532
CCACHE_DIR: ${{ github.workspace }}/ccache
2633
CCACHE_BASEDIR: ${{ github.workspace }}

.github/workflows/windows.yml

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,15 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
os: [windows-2019, windows-2022] # [windows-2019, windows-2022]
19+
os: [windows-2022] # [windows-2019, windows-2022]
2020
arch: [x64] # [x64, x86]
2121
torch-version: [2.6.0, 2.3.1, 2.2.1, 2.1.2] # [1.12.0, 1.13.0, 2.0.0, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1]
22-
cuda-version: [12.4.0, 12.1.1] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu]
23-
opencv-version: [4.10.0, 4.9.0] # [4.7.0, 4.8.1, 4.9.0]
22+
cuda-version: [12.4.0] # [12.3.1, 12.1.1, 11.8.0, 11.7.1, 11.6.2, 11.5.2,11.4.4, 11.3.1, 11.2.2, 11.1.1, 11.0.3, cpu]
23+
opencv-version: [4.10.0] # [4.7.0, 4.8.1, 4.9.0]
2424
cmake-build-type: [ Release ] # [Debug, ClangTidy]
2525
include:
26-
- os: windows-2019
27-
generator: 'Visual Studio 16 2019'
2826
- os: windows-2022
2927
generator: 'Visual Studio 17 2022'
30-
exclude:
31-
- os: windows-2019
32-
cuda-version: 12.4.0
33-
- os: windows-2022
34-
cuda-version: 12.1.1
35-
- os: windows-2019
36-
opencv-version: 4.10.0
37-
- os: windows-2022
38-
opencv-version: 4.9.0
3928
env:
4029
CCACHE_DIR: ${{ github.workspace }}\ccache
4130
CCACHE_BASEDIR: ${{ github.workspace }}

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.1.4
1+
1.1.5

model.cpp

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -310,18 +310,15 @@ void Model::afterTrain(int step){
310310
if (step < stopSplitAt){
311311
torch::Tensor visibleMask = (radii > 0).flatten();
312312

313-
torch::Tensor grads = xys.grad().detach();
314-
torch::Tensor gradsNorm = torch::sqrt(torch::sum(grads.pow(2), -1, false)); // Calculate L2 norm manually
315-
313+
torch::Tensor grads = torch::linalg_vector_norm(xys.grad().detach(), 2, { -1 }, false, torch::kFloat32);
316314
if (!xysGradNorm.numel()){
317-
xysGradNorm = gradsNorm;
315+
xysGradNorm = grads;
318316
visCounts = torch::ones_like(xysGradNorm);
319317
}else{
320318
visCounts.index_put_({visibleMask}, visCounts.index({visibleMask}) + 1);
321-
xysGradNorm.index_put_({visibleMask}, gradsNorm.index({visibleMask}) + xysGradNorm.index({visibleMask}));
319+
xysGradNorm.index_put_({visibleMask}, grads.index({visibleMask}) + xysGradNorm.index({visibleMask}));
322320
}
323321

324-
325322
if (!max2DSize.numel()){
326323
max2DSize = torch::zeros_like(radii, torch::kFloat32);
327324
}
@@ -355,14 +352,11 @@ void Model::afterTrain(int step){
355352

356353
torch::Tensor centeredSamples = torch::randn({nSplitSamples * nSplits, 3}, device); // Nx3 of axis-aligned scales
357354
torch::Tensor scaledSamples = torch::exp(scales.index({splits}).repeat({nSplitSamples, 1})) * centeredSamples;
358-
torch::Tensor qs = quats.index({splits});
359-
torch::Tensor norms = torch::sqrt(torch::sum(qs.pow(2), -1, true)); // Calculate norm
360-
qs = qs / norms; // Normalize the quaternions
361-
355+
torch::Tensor qs = quats.index({splits}) / torch::linalg_vector_norm(quats.index({splits}), 2, { -1 }, true, torch::kFloat32);
362356
torch::Tensor rots = quatToRotMat(qs.repeat({nSplitSamples, 1}));
363357
torch::Tensor rotatedSamples = torch::bmm(rots, scaledSamples.index({"...", None})).squeeze();
364358
torch::Tensor splitMeans = rotatedSamples + means.index({splits}).repeat({nSplitSamples, 1});
365-
359+
366360
torch::Tensor splitFeaturesDc = featuresDc.index({splits}).repeat({nSplitSamples, 1});
367361
torch::Tensor splitFeaturesRest = featuresRest.index({splits}).repeat({nSplitSamples, 1, 1});
368362

opensfm.cpp

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,32 +84,22 @@ InputData inputDataFromOpenSfM(const std::string &projectRoot){
8484

8585
torch::Tensor unorientedPoses = torch::zeros({static_cast<long int>(shots.size()), 4, 4}, torch::kFloat32);
8686
size_t i = 0;
87-
for (const auto &s : shots) {
87+
for (const auto &s : shots){
8888
Shot shot = s.second;
8989

9090
torch::Tensor rotation = rodriguesToRotation(torch::from_blob(shot.rotation.data(), {static_cast<long>(shot.rotation.size())}, torch::kFloat32));
9191
torch::Tensor translation = torch::from_blob(shot.translation.data(), {static_cast<long>(shot.translation.size())}, torch::kFloat32);
92-
9392
torch::Tensor w2c = torch::eye(4, torch::kFloat32);
9493
w2c.index_put_({Slice(None, 3), Slice(None, 3)}, rotation);
95-
w2c.index_put_({Slice(None, 3), Slice(3, 4)}, translation.reshape({3, 1}));
96-
97-
// Manually compute the inverse of w2c
98-
torch::Tensor rotationT = rotation.transpose(0, 1); // Transpose rotation (3x3)
99-
torch::Tensor translationInv = -(rotationT.matmul(translation.reshape({3, 1}))); // -R^T * t
94+
w2c.index_put_({Slice(None, 3), Slice(3,4)}, translation.reshape({3, 1}));
10095

101-
torch::Tensor invW2C = torch::eye(4, torch::kFloat32);
102-
invW2C.index_put_({Slice(None, 3), Slice(None, 3)}, rotationT); // Set rotation part
103-
invW2C.index_put_({Slice(None, 3), Slice(3, 4)}, translationInv); // Set translation part
104-
105-
unorientedPoses[i] = invW2C;
96+
unorientedPoses[i] = torch::linalg_inv(w2c);
10697

10798
// Convert OpenSfM's camera CRS (OpenCV) to OpenGL
108-
unorientedPoses[i].index_put_({Slice(0, 3), Slice(1, 3)}, unorientedPoses[i].index({Slice(0, 3), Slice(1, 3)}) * -1.0f);
99+
unorientedPoses[i].index_put_({Slice(0, 3), Slice(1,3)}, unorientedPoses[i].index({Slice(0, 3), Slice(1,3)}) * -1.0f);
109100
i++;
110101
}
111102

112-
113103
auto r = autoScaleAndCenterPoses(unorientedPoses);
114104
torch::Tensor poses = std::get<0>(r);
115105
ret.translation = std::get<1>(r);

tensor_math.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ torch::Tensor rotationMatrix(const torch::Tensor &a, const torch::Tensor &b){
6868
}
6969

7070
torch::Tensor rodriguesToRotation(const torch::Tensor &rodrigues){
71-
torch::Tensor rodriguesNorm = torch::sqrt(torch::sum(rodrigues.pow(2), -1, true)); // Calculate L2 norm manually
72-
float theta = rodriguesNorm.item<float>(); // Get the scalar value from the tensor
71+
float theta = torch::linalg_vector_norm(rodrigues, 2, { -1 }, true, torch::kFloat32).item<float>();
7372
if (theta < FLOAT_EPS){
7473
return torch::eye(3, torch::kFloat32);
7574
}

0 commit comments

Comments
 (0)