Skip to content

Commit 4bb44bb

Browse files
Merge pull request #135 from DrTimothyAldenDavis/master
Master
2 parents 9c3c0bf + fa53d23 commit 4bb44bb

17 files changed

+208
-188
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ endif ( )
2626
set ( CMAKE_MACOSX_RPATH TRUE )
2727

2828
# version of SuiteSparse:GraphBLAS
29-
set ( GraphBLAS_DATE "Apr 6, 2022" )
29+
set ( GraphBLAS_DATE "Apr 8, 2022" )
3030
set ( GraphBLAS_VERSION_MAJOR 7 )
3131
set ( GraphBLAS_VERSION_MINOR 0 )
32-
set ( GraphBLAS_VERSION_SUB 2 )
32+
set ( GraphBLAS_VERSION_SUB 3 )
3333

3434
message ( STATUS "Building SuiteSparse:GraphBLAS version: v" ${GraphBLAS_VERSION_MAJOR}.${GraphBLAS_VERSION_MINOR}.${GraphBLAS_VERSION_SUB} " date: " ${GraphBLAS_DATE} )
3535

CUDA/GB_jit_cache.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ namespace jit {
4646
}
4747
}
4848

49+
// Get the directory in home to use for storing the cache
50+
std::string get_user_graphblas_source_path() {
51+
auto gb_home = std::getenv("GRAPHBLAS_SOURCE_PATH");
52+
if (gb_home != nullptr) return std::string(gb_home);
53+
else return std::string();
54+
}
4955

5056

5157
// Default `GRAPHBLAS_CACHE_PATH` to `$HOME/.GraphBLAS`.

CUDA/GB_jit_cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
namespace jit {
3232

3333
std::string get_user_home_cache_dir();
34+
std::string get_user_graphblas_source_path();
3435
std::string getCacheDir(void);
3536

3637
template <typename Tv>

CUDA/jitFactory.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,12 @@ static const std::vector<std::string> compiler_flags{
8989
"-I../../Source",
9090
"-I../../Source/Template",
9191
"-I../templates",
92-
"-I/share/workspace/nvidia_projects/GraphBLAS/CUDA/templates"
93-
"-I/share/workspace/nvidia_projects/GraphBLAS/CUDA/"
94-
// "-L../../build/CUDA",
92+
93+
// Add includes relative to GRAPHBLAS_SOURCE_PATH variable
94+
"-I" + jit::get_user_graphblas_source_path() + "/CUDA",
95+
"-I" + jit::get_user_graphblas_source_path() + "/CUDA/templates",
96+
"-I" + jit::get_user_graphblas_source_path() + "/Source",
97+
"-I" + jit::get_user_graphblas_source_path() + "/Source/Template",
9598
"-I/usr/local/cuda/include",
9699
};
97100

@@ -151,7 +154,7 @@ class phase1launchFactory
151154
std::string hashable_name = base_name + "_" + kernel_name;
152155
string_to_be_jitted << hashable_name << std::endl <<
153156
R"(#include ")" << jit::get_user_home_cache_dir() << "/" << semiring_factory_.filename << R"(")" << std::endl <<
154-
R"(#include ")" << hashable_name << R"(.cuh")" << std::endl;
157+
R"(#include "templates/)" << hashable_name << R"(.cuh")" << std::endl;
155158
std::cout << string_to_be_jitted.str();
156159

157160
bool result = false;

CUDA/templates/GB_jit_AxB_phase1.cuh

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -313,10 +313,10 @@ __global__ void AxB_phase1
313313
__syncthreads();
314314
if (threadIdx.x==0 && blockIdx.x == 0)
315315
{
316-
printf ("Here in phase1, what I see is this:\n") ;
317-
printf ("MX(pM) is: %s\n", GB_XSTR (MX (pM))) ;
318-
printf ("GB_MULT(x,y) is: %s\n", GB_XSTR (GB_MULT (x,y))) ;
319-
printf ("GB_ADD(x,y) is: %s\n", GB_XSTR (GB_ADD (x,y))) ;
316+
// printf ("Here in phase1, what I see is this:\n") ;
317+
// printf ("MX(pM) is: %s\n", GB_XSTR (MX (pM))) ;
318+
// printf ("GB_MULT(x,y) is: %s\n", GB_XSTR (GB_MULT (x,y))) ;
319+
// printf ("GB_ADD(x,y) is: %s\n", GB_XSTR (GB_ADD (x,y))) ;
320320
// #define GB_GETA(blob)
321321
// #define GB_GETB(blob)
322322
// #define GB_MULT(x,y) (1)
@@ -382,11 +382,11 @@ __global__ void AxB_phase1
382382

383383
int64_t k_end = GB_IMIN( pointerchunk , klast - kfirst +2 ) ;
384384

385-
if( threadIdx.x ==0)
386-
{
387-
printf("chunk%ld pfirst,plast,ch_end =%ld,%ld,%ld kfirst,klast,kend = %ld,%ld,%ld\n",
388-
chunk, pfirst, plast, chunk_end, kfirst, klast, k_end ) ;
389-
}
385+
// if( threadIdx.x ==0)
386+
// {
387+
// printf("chunk%ld pfirst,plast,ch_end =%ld,%ld,%ld kfirst,klast,kend = %ld,%ld,%ld\n",
388+
// chunk, pfirst, plast, chunk_end, kfirst, klast, k_end ) ;
389+
// }
390390
__syncthreads();
391391

392392

@@ -399,10 +399,10 @@ __global__ void AxB_phase1
399399
__syncthreads();
400400
if (threadIdx.x == 0)
401401
{
402-
for (int64_t i = 0 ; i < k_end ; i++)
403-
{
404-
printf ("Mps [%d] = %ld\n", i, Mps [i]) ;
405-
}
402+
// for (int64_t i = 0 ; i < k_end ; i++)
403+
// {
404+
// printf ("Mps [%d] = %ld\n", i, Mps [i]) ;
405+
// }
406406
}
407407
__syncthreads();
408408

@@ -420,10 +420,10 @@ __global__ void AxB_phase1
420420
__syncthreads();
421421
if (threadIdx.x == 0)
422422
{
423-
for (int64_t i = 0 ; i < chunksize ; i++)
424-
{
425-
printf ("ks [%d] = %ld\n", i, ks [i]) ;
426-
}
423+
// for (int64_t i = 0 ; i < chunksize ; i++)
424+
// {
425+
// printf ("ks [%d] = %ld\n", i, ks [i]) ;
426+
// }
427427
}
428428
__syncthreads();
429429

@@ -461,7 +461,7 @@ __global__ void AxB_phase1
461461
GB_bucket_code bucket = GB_BUCKET_ZOMBIE ;
462462
int64_t k = ks[ pM - pfirst ] ;
463463
//k += ( pM == Mp[k+1] ) ;
464-
printf ("tid%d k %ld pM %ld MX(pM): %d\n", threadIdx.x, k, pM, MX (pM));
464+
// printf ("tid%d k %ld pM %ld MX(pM): %d\n", threadIdx.x, k, pM, MX (pM));
465465
int64_t i = Mi [ pM ] ;
466466
int64_t j = k ; // HACK, does not need to be initialized here
467467

@@ -515,16 +515,16 @@ pA_end = Ap [i+1] ;
515515

516516
//bucket = GB_BUCKET_MERGEPATH ;
517517
bucket= GB_bucket_assignment ( ainz, bjnz, bvlen) ;
518-
printf ("tid%d i %ld j %ld ainz %ld bjnz %ld: bucket %d\n",
519-
threadIdx.x, i, j, ainz, bjnz, (int) bucket) ;
518+
// printf ("tid%d i %ld j %ld ainz %ld bjnz %ld: bucket %d\n",
519+
// threadIdx.x, i, j, ainz, bjnz, (int) bucket) ;
520520
}
521521
}
522522
}
523523

524524
if (bucket == GB_BUCKET_ZOMBIE)
525525
{
526526
// mark C(i,j) is a zombie
527-
printf ("tid%d pM=%d %d,%d prezombie\n",threadIdx.x,pM,i,j) ;
527+
// printf ("tid%d pM=%d %d,%d prezombie\n",threadIdx.x,pM,i,j) ;
528528
Ci [pM] = GB_FLIP (i) << 4 ;
529529
// GB_BUCKET_COUNT (GB_BUCKET_ZOMBIE) ;
530530
my_bucket_0++ ; //0 is the zombie bucket
@@ -534,7 +534,7 @@ pA_end = Ap [i+1] ;
534534
// place C(i,j) in its bucket
535535
Ci [pM] = (k << 4) + bucket ;
536536
GB_BUCKET_COUNT (bucket) ;
537-
printf ("tid%d pM=%d %d,%d b=%d\n",threadIdx.x, pM, i,j, (int)bucket) ;
537+
// printf ("tid%d pM=%d %d,%d b=%d\n",threadIdx.x, pM, i,j, (int)bucket) ;
538538
}
539539
}
540540

CUDA/templates/GB_jit_reduceNonZombiesWarp.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,6 @@ __global__ void reduceNonZombiesWarp
9999
for(int i = blockIdx.x * blockDim.x + threadIdx.x;
100100
i < N;
101101
i += blockDim.x * gridDim.x) {
102-
printf("tid=%d, N: %ud\n", tid, N);
103-
104102
if (is_sparse && index[i] < 0) continue; // skip zombies
105103
T fold = g_idata[i];
106104
sum = GB_ADD( sum, fold );

Doc/ChangeLog

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
Version 7.0.3, Apr 8, 2022
2+
3+
* faster transpose when using 2 threads
4+
15
Version 7.0.2, Apr 6, 2022
26

37
* (45) bug fix: vector iterator was broken for iterating across a

Doc/GraphBLAS_UserGuide.pdf

786 Bytes
Binary file not shown.

Doc/GraphBLAS_UserGuide.tex

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ \subsection{Release Notes}
148148

149149
\begin{itemize}
150150

151+
\item Version 7.0.3 (Apr 8, 2022)
152+
153+
\begin{packed_itemize}
154+
\item faster transpose when using 2 threads
155+
\end{packed_itemize}
156+
151157
\item Version 7.0.2 (Apr 5, 2022)
152158

153159
\begin{packed_itemize}
@@ -5934,8 +5940,8 @@ \subsection{Serialize/deserialize methods}
59345940
\url{https://cwe.mitre.org/data/definitions/502.html}. The deserialization
59355941
methods do a few basic checks so that no out-of-bounds access occurs during
59365942
deserialization, but the output matrix or vector itself may still be corrupted.
5937-
If the data is untrusted, use check the matrix or vector after
5938-
deserializing it:
5943+
If the data is untrusted, use \verb'GxB_*_fprint' to
5944+
check the matrix or vector after deserializing it:
59395945

59405946
{\footnotesize
59415947
\begin{verbatim}

Doc/GraphBLAS_version.tex

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
% version of SuiteSparse:GraphBLAS
22
\date{VERSION
3-
7.0.2,
4-
Apr 6, 2022}
3+
7.0.3,
4+
Apr 8, 2022}
55

0 commit comments

Comments
 (0)