A CUDA/C++ implementation of the code used in our paper. The program uses vanilla CUDA/C++ (no additional libraries outside of minimum required to communicate with CUDA from C++). This program adds support for full GPU utilization using specialized CUDA Kernels to perform DNN inference and back propogations.
int device = 0;
CudaWrapper::setDevice(device);
CudaWrapper::profileDevices();
MatrixUtils::Matrix inputTrainData("TrainingData/CIFAR10_TrainInputs.csv");
MatrixUtils::Matrix trainLabels("TrainingData/CIFAR10_TrainOutputs.csv");
NeuralNetwork network;
network.addLayer(new InputLayer(inputSize));
network.addLayer(new FullyConnectedLayer(10000, "test"));
network.addLayer(new FullyConnectedLayer(outputSize, "test"));
network.setTrainingData(inputTrainData.getArray(), trainLabels.getArray(), 50000);
network.setLearningRate(0.0005);
network.printTopology();
network.loadGPU();
for(int i=0; i<50; i++){
network.train();
}