-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathMakefile
More file actions
28 lines (21 loc) · 1.19 KB
/
Makefile
File metadata and controls
28 lines (21 loc) · 1.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
all: cugemm.bin cugemm-debug.bin cugemm-profile.bin
SOURCE_FILE=cugemm.cu
#SOURCE_FILE=cugemm-hw4.cu
# optimized binary
cugemm.bin: $(SOURCE_FILE)
nvcc -std=c++17 --generate-code=arch=compute_75,code=[compute_75,sm_75] $^ -lcublas -o $@
# debug binary without optimizations
cugemm-debug.bin: $(SOURCE_FILE)
nvcc -g -G -src-in-ptx -std=c++17 --generate-code=arch=compute_75,code=[compute_75,sm_75] $^ -lcublas -o $@
# optimized binary with line number information for profiling
cugemm-profile.bin: $(SOURCE_FILE)
nvcc -g --generate-line-info -src-in-ptx -std=c++17 --generate-code=arch=compute_75,code=[compute_75,sm_75] $^ -lcublas -o $@
# NB: make sure you change the --algo flag here to profile the one you care about.
# You can change the --export flag to set the filename of the profiling report that is produced.
profile-ncu: cugemm-profile.bin
sudo ncu --export my-profile --set full ./cugemm-profile.bin --size=1024 --reps=1 --algo=1 --validate=false
# profile with Nsight Systems to see a nice timeline of memory copies and kernel execution
profile-nsys: cugemm.bin
nsys profile -t cuda,osrt,nvtx -o myprofile ./cugemm.bin --validate=false --algo 6 --size 8192 --streams 4
clean:
rm -f cugemm*.bin