|
| 1 | +/* |
| 2 | + * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. |
| 3 | + * |
| 4 | + * Please refer to the NVIDIA end user license agreement (EULA) associated |
| 5 | + * with this source code for terms and conditions that govern your use of |
| 6 | + * this software. Any use, reproduction, disclosure, or distribution of |
| 7 | + * this software and related documentation outside the terms of the EULA |
| 8 | + * is strictly prohibited. |
| 9 | + * |
| 10 | + */ |
| 11 | + |
| 12 | +/* |
| 13 | + * This sample evaluates fair call and put prices for a |
| 14 | + * given set of European options by Black-Scholes formula. |
| 15 | + * See supplied whitepaper for more explanations. |
| 16 | + */ |
| 17 | + |
| 18 | + |
| 19 | +#include <helper_functions.h> // helper functions for string parsing |
| 20 | +#include <helper_cuda.h> // helper functions CUDA error checking and initialization |
| 21 | + |
| 22 | +//////////////////////////////////////////////////////////////////////////////// |
| 23 | +// Process an array of optN options on CPU |
| 24 | +//////////////////////////////////////////////////////////////////////////////// |
| 25 | +extern "C" void BlackScholesCPU( |
| 26 | + float *h_CallResult, |
| 27 | + float *h_PutResult, |
| 28 | + float *h_StockPrice, |
| 29 | + float *h_OptionStrike, |
| 30 | + float *h_OptionYears, |
| 31 | + float Riskfree, |
| 32 | + float Volatility, |
| 33 | + int optN |
| 34 | +); |
| 35 | + |
| 36 | +//////////////////////////////////////////////////////////////////////////////// |
| 37 | +// Process an array of OptN options on GPU |
| 38 | +//////////////////////////////////////////////////////////////////////////////// |
| 39 | +#include "BlackScholes_kernel.cuh" |
| 40 | + |
| 41 | +//////////////////////////////////////////////////////////////////////////////// |
| 42 | +// Helper function, returning uniformly distributed |
| 43 | +// random float in [low, high] range |
| 44 | +//////////////////////////////////////////////////////////////////////////////// |
| 45 | +float RandFloat(float low, float high) |
| 46 | +{ |
| 47 | + float t = (float)rand() / (float)RAND_MAX; |
| 48 | + return (1.0f - t) * low + t * high; |
| 49 | +} |
| 50 | + |
| 51 | +//////////////////////////////////////////////////////////////////////////////// |
| 52 | +// Data configuration |
| 53 | +//////////////////////////////////////////////////////////////////////////////// |
| 54 | +const int OPT_N = 4000000; |
| 55 | +const int NUM_ITERATIONS = 5; |
| 56 | + |
| 57 | + |
| 58 | +const int OPT_SZ = OPT_N * sizeof(float); |
| 59 | +const float RISKFREE = 0.02f; |
| 60 | +const float VOLATILITY = 0.30f; |
| 61 | + |
| 62 | +#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) |
| 63 | + |
| 64 | +//////////////////////////////////////////////////////////////////////////////// |
| 65 | +// Main program |
| 66 | +//////////////////////////////////////////////////////////////////////////////// |
| 67 | +int main(int argc, char **argv) |
| 68 | +{ |
| 69 | + // Start logs |
| 70 | + printf("[%s] - Starting...\n", argv[0]); |
| 71 | + |
| 72 | + //'h_' prefix - CPU (host) memory space |
| 73 | + float |
| 74 | + //Results calculated by CPU for reference |
| 75 | + *h_CallResultCPU, |
| 76 | + *h_PutResultCPU, |
| 77 | + //CPU copy of GPU results |
| 78 | + *h_CallResultGPU, |
| 79 | + *h_PutResultGPU, |
| 80 | + //CPU instance of input data |
| 81 | + *h_StockPrice, |
| 82 | + *h_OptionStrike, |
| 83 | + *h_OptionYears; |
| 84 | + |
| 85 | + //'d_' prefix - GPU (device) memory space |
| 86 | + float |
| 87 | + //Results calculated by GPU |
| 88 | + *d_CallResult, |
| 89 | + *d_PutResult, |
| 90 | + //GPU instance of input data |
| 91 | + *d_StockPrice, |
| 92 | + *d_OptionStrike, |
| 93 | + *d_OptionYears; |
| 94 | + |
| 95 | + double |
| 96 | + delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime; |
| 97 | + |
| 98 | + StopWatchInterface *hTimer = NULL; |
| 99 | + int i; |
| 100 | + |
| 101 | + findCudaDevice(argc, (const char **)argv); |
| 102 | + |
| 103 | + sdkCreateTimer(&hTimer); |
| 104 | + |
| 105 | + printf("Initializing data...\n"); |
| 106 | + printf("...allocating CPU memory for options.\n"); |
| 107 | + h_CallResultCPU = (float *)malloc(OPT_SZ); |
| 108 | + h_PutResultCPU = (float *)malloc(OPT_SZ); |
| 109 | + h_CallResultGPU = (float *)malloc(OPT_SZ); |
| 110 | + h_PutResultGPU = (float *)malloc(OPT_SZ); |
| 111 | + h_StockPrice = (float *)malloc(OPT_SZ); |
| 112 | + h_OptionStrike = (float *)malloc(OPT_SZ); |
| 113 | + h_OptionYears = (float *)malloc(OPT_SZ); |
| 114 | + |
| 115 | + printf("...allocating GPU memory for options.\n"); |
| 116 | + checkCudaErrors(cudaMalloc((void **)&d_CallResult, OPT_SZ)); |
| 117 | + checkCudaErrors(cudaMalloc((void **)&d_PutResult, OPT_SZ)); |
| 118 | + checkCudaErrors(cudaMalloc((void **)&d_StockPrice, OPT_SZ)); |
| 119 | + checkCudaErrors(cudaMalloc((void **)&d_OptionStrike, OPT_SZ)); |
| 120 | + checkCudaErrors(cudaMalloc((void **)&d_OptionYears, OPT_SZ)); |
| 121 | + |
| 122 | + printf("...generating input data in CPU mem.\n"); |
| 123 | + srand(5347); |
| 124 | + |
| 125 | + //Generate options set |
| 126 | + for (i = 0; i < OPT_N; i++) |
| 127 | + { |
| 128 | + h_CallResultCPU[i] = 0.0f; |
| 129 | + h_PutResultCPU[i] = -1.0f; |
| 130 | + h_StockPrice[i] = RandFloat(5.0f, 30.0f); |
| 131 | + h_OptionStrike[i] = RandFloat(1.0f, 100.0f); |
| 132 | + h_OptionYears[i] = RandFloat(0.25f, 10.0f); |
| 133 | + } |
| 134 | + |
| 135 | + printf("...copying input data to GPU mem.\n"); |
| 136 | + //Copy options data to GPU memory for further processing |
| 137 | + checkCudaErrors(cudaMemcpy(d_StockPrice, h_StockPrice, OPT_SZ, cudaMemcpyHostToDevice)); |
| 138 | + checkCudaErrors(cudaMemcpy(d_OptionStrike, h_OptionStrike, OPT_SZ, cudaMemcpyHostToDevice)); |
| 139 | + checkCudaErrors(cudaMemcpy(d_OptionYears, h_OptionYears, OPT_SZ, cudaMemcpyHostToDevice)); |
| 140 | + printf("Data init done.\n\n"); |
| 141 | + |
| 142 | + |
| 143 | + printf("Executing Black-Scholes GPU kernel (%i iterations)...\n", NUM_ITERATIONS); |
| 144 | + checkCudaErrors(cudaDeviceSynchronize()); |
| 145 | + sdkResetTimer(&hTimer); |
| 146 | + sdkStartTimer(&hTimer); |
| 147 | + |
| 148 | + for (i = 0; i < NUM_ITERATIONS; i++) |
| 149 | + { |
| 150 | + BlackScholesGPU<<<DIV_UP((OPT_N/2), 128), 128/*480, 128*/>>>( |
| 151 | + (float2 *)d_CallResult, |
| 152 | + (float2 *)d_PutResult, |
| 153 | + (float2 *)d_StockPrice, |
| 154 | + (float2 *)d_OptionStrike, |
| 155 | + (float2 *)d_OptionYears, |
| 156 | + RISKFREE, |
| 157 | + VOLATILITY, |
| 158 | + OPT_N |
| 159 | + ); |
| 160 | + getLastCudaError("BlackScholesGPU() execution failed\n"); |
| 161 | + } |
| 162 | + |
| 163 | + checkCudaErrors(cudaDeviceSynchronize()); |
| 164 | + sdkStopTimer(&hTimer); |
| 165 | + gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS; |
| 166 | + |
| 167 | + //Both call and put is calculated |
| 168 | + printf("Options count : %i \n", 2 * OPT_N); |
| 169 | + printf("BlackScholesGPU() time : %f msec\n", gpuTime); |
| 170 | + printf("Effective memory bandwidth: %f GB/s\n", ((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3)); |
| 171 | + printf("Gigaoptions per second : %f \n\n", ((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3)); |
| 172 | + |
| 173 | + printf("BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %u, Workgroup = %u\n", |
| 174 | + (((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime*1e-3, (2 * OPT_N), 1, 128); |
| 175 | + |
| 176 | + printf("\nReading back GPU results...\n"); |
| 177 | + //Read back GPU results to compare them to CPU results |
| 178 | + checkCudaErrors(cudaMemcpy(h_CallResultGPU, d_CallResult, OPT_SZ, cudaMemcpyDeviceToHost)); |
| 179 | + checkCudaErrors(cudaMemcpy(h_PutResultGPU, d_PutResult, OPT_SZ, cudaMemcpyDeviceToHost)); |
| 180 | + |
| 181 | + |
| 182 | + printf("Checking the results...\n"); |
| 183 | + printf("...running CPU calculations.\n\n"); |
| 184 | + //Calculate options values on CPU |
| 185 | + BlackScholesCPU( |
| 186 | + h_CallResultCPU, |
| 187 | + h_PutResultCPU, |
| 188 | + h_StockPrice, |
| 189 | + h_OptionStrike, |
| 190 | + h_OptionYears, |
| 191 | + RISKFREE, |
| 192 | + VOLATILITY, |
| 193 | + OPT_N |
| 194 | + ); |
| 195 | + |
| 196 | + printf("Comparing the results...\n"); |
| 197 | + //Calculate max absolute difference and L1 distance |
| 198 | + //between CPU and GPU results |
| 199 | + sum_delta = 0; |
| 200 | + sum_ref = 0; |
| 201 | + max_delta = 0; |
| 202 | + |
| 203 | + for (i = 0; i < OPT_N; i++) |
| 204 | + { |
| 205 | + ref = h_CallResultCPU[i]; |
| 206 | + delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]); |
| 207 | + |
| 208 | + if (delta > max_delta) |
| 209 | + { |
| 210 | + max_delta = delta; |
| 211 | + } |
| 212 | + |
| 213 | + sum_delta += delta; |
| 214 | + sum_ref += fabs(ref); |
| 215 | + } |
| 216 | + |
| 217 | + L1norm = sum_delta / sum_ref; |
| 218 | + printf("L1 norm: %E\n", L1norm); |
| 219 | + printf("Max absolute error: %E\n\n", max_delta); |
| 220 | + |
| 221 | + printf("Shutting down...\n"); |
| 222 | + printf("...releasing GPU memory.\n"); |
| 223 | + checkCudaErrors(cudaFree(d_OptionYears)); |
| 224 | + checkCudaErrors(cudaFree(d_OptionStrike)); |
| 225 | + checkCudaErrors(cudaFree(d_StockPrice)); |
| 226 | + checkCudaErrors(cudaFree(d_PutResult)); |
| 227 | + checkCudaErrors(cudaFree(d_CallResult)); |
| 228 | + |
| 229 | + printf("...releasing CPU memory.\n"); |
| 230 | + free(h_OptionYears); |
| 231 | + free(h_OptionStrike); |
| 232 | + free(h_StockPrice); |
| 233 | + free(h_PutResultGPU); |
| 234 | + free(h_CallResultGPU); |
| 235 | + free(h_PutResultCPU); |
| 236 | + free(h_CallResultCPU); |
| 237 | + sdkDeleteTimer(&hTimer); |
| 238 | + printf("Shutdown done.\n"); |
| 239 | + |
| 240 | + printf("\n[BlackScholes] - Test Summary\n"); |
| 241 | + |
| 242 | + if (L1norm > 1e-6) |
| 243 | + { |
| 244 | + printf("Test failed!\n"); |
| 245 | + exit(EXIT_FAILURE); |
| 246 | + } |
| 247 | + |
| 248 | + printf("\nNOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n"); |
| 249 | + printf("Test passed\n"); |
| 250 | + exit(EXIT_SUCCESS); |
| 251 | +} |
0 commit comments