Skip to content

Commit 27fca56

Browse files
author
FindHao
committed
init
0 parents  commit 27fca56

File tree

286 files changed

+1638119
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

286 files changed

+1638119
-0
lines changed

.gitignore

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
*.o
2+
BlackScholes/BlackScholes
3+
dct8x8/dct8x8
4+
histogram/histogram
5+
imageDenoising/imageDenoising
6+
imageDenoising/output.ppm
7+
MonteCarloMultiGPU/MonteCarloMultiGPU
8+
recursiveGaussian/recursiveGaussian
9+
srad_v1/srad
10+
srad_v1/image_out.pgm
11+
srad_v2/srad
12+
srad_v1_opt/srad
13+
backprop/backprop
14+
cfd/euler3d
15+
cfd/euler3d_double
16+
cfd/pre_euler3d
17+
cfd/pre_euler3d_double
18+
hotspot/hotspot
19+
pathfinder/pathfinder
20+
pathfinder/result.txt
21+
gaussian/gaussian
22+
vectorAdd*/vectorAdd
23+
kmeans/kmeans
24+
bfs/bfs
25+
dwt2d/dwt2d
26+
dwt2d/*.bmp.dwt.*
27+
28+
.vscode/
29+
30+
NsightEclipse.xml
31+
32+
33+
core
34+
*/hpctoolkit-*-measurements*
35+
*/hpctoolkit-*-database*
36+
*/prof
37+
*/*log*
38+
*/workspace/
39+
*/prof/
40+
*csv
41+
*hpcstruct
42+
*/*hpcstruct
43+
44+
data/

BlackScholes/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
BlackScholes

BlackScholes/BlackScholes.cu

+251
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
/*
2+
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
3+
*
4+
* Please refer to the NVIDIA end user license agreement (EULA) associated
5+
* with this source code for terms and conditions that govern your use of
6+
* this software. Any use, reproduction, disclosure, or distribution of
7+
* this software and related documentation outside the terms of the EULA
8+
* is strictly prohibited.
9+
*
10+
*/
11+
12+
/*
13+
* This sample evaluates fair call and put prices for a
14+
* given set of European options by Black-Scholes formula.
15+
* See supplied whitepaper for more explanations.
16+
*/
17+
18+
19+
#include <helper_functions.h> // helper functions for string parsing
20+
#include <helper_cuda.h> // helper functions CUDA error checking and initialization
21+
22+
////////////////////////////////////////////////////////////////////////////////
23+
// Process an array of optN options on CPU
24+
////////////////////////////////////////////////////////////////////////////////
25+
extern "C" void BlackScholesCPU(
26+
float *h_CallResult,
27+
float *h_PutResult,
28+
float *h_StockPrice,
29+
float *h_OptionStrike,
30+
float *h_OptionYears,
31+
float Riskfree,
32+
float Volatility,
33+
int optN
34+
);
35+
36+
////////////////////////////////////////////////////////////////////////////////
37+
// Process an array of OptN options on GPU
38+
////////////////////////////////////////////////////////////////////////////////
39+
#include "BlackScholes_kernel.cuh"
40+
41+
////////////////////////////////////////////////////////////////////////////////
42+
// Helper function, returning uniformly distributed
43+
// random float in [low, high] range
44+
////////////////////////////////////////////////////////////////////////////////
45+
float RandFloat(float low, float high)
46+
{
47+
float t = (float)rand() / (float)RAND_MAX;
48+
return (1.0f - t) * low + t * high;
49+
}
50+
51+
////////////////////////////////////////////////////////////////////////////////
52+
// Data configuration
53+
////////////////////////////////////////////////////////////////////////////////
54+
const int OPT_N = 4000000;
55+
const int NUM_ITERATIONS = 5;
56+
57+
58+
const int OPT_SZ = OPT_N * sizeof(float);
59+
const float RISKFREE = 0.02f;
60+
const float VOLATILITY = 0.30f;
61+
62+
#define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
63+
64+
////////////////////////////////////////////////////////////////////////////////
65+
// Main program
66+
////////////////////////////////////////////////////////////////////////////////
67+
int main(int argc, char **argv)
68+
{
69+
// Start logs
70+
printf("[%s] - Starting...\n", argv[0]);
71+
72+
//'h_' prefix - CPU (host) memory space
73+
float
74+
//Results calculated by CPU for reference
75+
*h_CallResultCPU,
76+
*h_PutResultCPU,
77+
//CPU copy of GPU results
78+
*h_CallResultGPU,
79+
*h_PutResultGPU,
80+
//CPU instance of input data
81+
*h_StockPrice,
82+
*h_OptionStrike,
83+
*h_OptionYears;
84+
85+
//'d_' prefix - GPU (device) memory space
86+
float
87+
//Results calculated by GPU
88+
*d_CallResult,
89+
*d_PutResult,
90+
//GPU instance of input data
91+
*d_StockPrice,
92+
*d_OptionStrike,
93+
*d_OptionYears;
94+
95+
double
96+
delta, ref, sum_delta, sum_ref, max_delta, L1norm, gpuTime;
97+
98+
StopWatchInterface *hTimer = NULL;
99+
int i;
100+
101+
findCudaDevice(argc, (const char **)argv);
102+
103+
sdkCreateTimer(&hTimer);
104+
105+
printf("Initializing data...\n");
106+
printf("...allocating CPU memory for options.\n");
107+
h_CallResultCPU = (float *)malloc(OPT_SZ);
108+
h_PutResultCPU = (float *)malloc(OPT_SZ);
109+
h_CallResultGPU = (float *)malloc(OPT_SZ);
110+
h_PutResultGPU = (float *)malloc(OPT_SZ);
111+
h_StockPrice = (float *)malloc(OPT_SZ);
112+
h_OptionStrike = (float *)malloc(OPT_SZ);
113+
h_OptionYears = (float *)malloc(OPT_SZ);
114+
115+
printf("...allocating GPU memory for options.\n");
116+
checkCudaErrors(cudaMalloc((void **)&d_CallResult, OPT_SZ));
117+
checkCudaErrors(cudaMalloc((void **)&d_PutResult, OPT_SZ));
118+
checkCudaErrors(cudaMalloc((void **)&d_StockPrice, OPT_SZ));
119+
checkCudaErrors(cudaMalloc((void **)&d_OptionStrike, OPT_SZ));
120+
checkCudaErrors(cudaMalloc((void **)&d_OptionYears, OPT_SZ));
121+
122+
printf("...generating input data in CPU mem.\n");
123+
srand(5347);
124+
125+
//Generate options set
126+
for (i = 0; i < OPT_N; i++)
127+
{
128+
h_CallResultCPU[i] = 0.0f;
129+
h_PutResultCPU[i] = -1.0f;
130+
h_StockPrice[i] = RandFloat(5.0f, 30.0f);
131+
h_OptionStrike[i] = RandFloat(1.0f, 100.0f);
132+
h_OptionYears[i] = RandFloat(0.25f, 10.0f);
133+
}
134+
135+
printf("...copying input data to GPU mem.\n");
136+
//Copy options data to GPU memory for further processing
137+
checkCudaErrors(cudaMemcpy(d_StockPrice, h_StockPrice, OPT_SZ, cudaMemcpyHostToDevice));
138+
checkCudaErrors(cudaMemcpy(d_OptionStrike, h_OptionStrike, OPT_SZ, cudaMemcpyHostToDevice));
139+
checkCudaErrors(cudaMemcpy(d_OptionYears, h_OptionYears, OPT_SZ, cudaMemcpyHostToDevice));
140+
printf("Data init done.\n\n");
141+
142+
143+
printf("Executing Black-Scholes GPU kernel (%i iterations)...\n", NUM_ITERATIONS);
144+
checkCudaErrors(cudaDeviceSynchronize());
145+
sdkResetTimer(&hTimer);
146+
sdkStartTimer(&hTimer);
147+
148+
for (i = 0; i < NUM_ITERATIONS; i++)
149+
{
150+
BlackScholesGPU<<<DIV_UP((OPT_N/2), 128), 128/*480, 128*/>>>(
151+
(float2 *)d_CallResult,
152+
(float2 *)d_PutResult,
153+
(float2 *)d_StockPrice,
154+
(float2 *)d_OptionStrike,
155+
(float2 *)d_OptionYears,
156+
RISKFREE,
157+
VOLATILITY,
158+
OPT_N
159+
);
160+
getLastCudaError("BlackScholesGPU() execution failed\n");
161+
}
162+
163+
checkCudaErrors(cudaDeviceSynchronize());
164+
sdkStopTimer(&hTimer);
165+
gpuTime = sdkGetTimerValue(&hTimer) / NUM_ITERATIONS;
166+
167+
//Both call and put is calculated
168+
printf("Options count : %i \n", 2 * OPT_N);
169+
printf("BlackScholesGPU() time : %f msec\n", gpuTime);
170+
printf("Effective memory bandwidth: %f GB/s\n", ((double)(5 * OPT_N * sizeof(float)) * 1E-9) / (gpuTime * 1E-3));
171+
printf("Gigaoptions per second : %f \n\n", ((double)(2 * OPT_N) * 1E-9) / (gpuTime * 1E-3));
172+
173+
printf("BlackScholes, Throughput = %.4f GOptions/s, Time = %.5f s, Size = %u options, NumDevsUsed = %u, Workgroup = %u\n",
174+
(((double)(2.0 * OPT_N) * 1.0E-9) / (gpuTime * 1.0E-3)), gpuTime*1e-3, (2 * OPT_N), 1, 128);
175+
176+
printf("\nReading back GPU results...\n");
177+
//Read back GPU results to compare them to CPU results
178+
checkCudaErrors(cudaMemcpy(h_CallResultGPU, d_CallResult, OPT_SZ, cudaMemcpyDeviceToHost));
179+
checkCudaErrors(cudaMemcpy(h_PutResultGPU, d_PutResult, OPT_SZ, cudaMemcpyDeviceToHost));
180+
181+
182+
printf("Checking the results...\n");
183+
printf("...running CPU calculations.\n\n");
184+
//Calculate options values on CPU
185+
BlackScholesCPU(
186+
h_CallResultCPU,
187+
h_PutResultCPU,
188+
h_StockPrice,
189+
h_OptionStrike,
190+
h_OptionYears,
191+
RISKFREE,
192+
VOLATILITY,
193+
OPT_N
194+
);
195+
196+
printf("Comparing the results...\n");
197+
//Calculate max absolute difference and L1 distance
198+
//between CPU and GPU results
199+
sum_delta = 0;
200+
sum_ref = 0;
201+
max_delta = 0;
202+
203+
for (i = 0; i < OPT_N; i++)
204+
{
205+
ref = h_CallResultCPU[i];
206+
delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
207+
208+
if (delta > max_delta)
209+
{
210+
max_delta = delta;
211+
}
212+
213+
sum_delta += delta;
214+
sum_ref += fabs(ref);
215+
}
216+
217+
L1norm = sum_delta / sum_ref;
218+
printf("L1 norm: %E\n", L1norm);
219+
printf("Max absolute error: %E\n\n", max_delta);
220+
221+
printf("Shutting down...\n");
222+
printf("...releasing GPU memory.\n");
223+
checkCudaErrors(cudaFree(d_OptionYears));
224+
checkCudaErrors(cudaFree(d_OptionStrike));
225+
checkCudaErrors(cudaFree(d_StockPrice));
226+
checkCudaErrors(cudaFree(d_PutResult));
227+
checkCudaErrors(cudaFree(d_CallResult));
228+
229+
printf("...releasing CPU memory.\n");
230+
free(h_OptionYears);
231+
free(h_OptionStrike);
232+
free(h_StockPrice);
233+
free(h_PutResultGPU);
234+
free(h_CallResultGPU);
235+
free(h_PutResultCPU);
236+
free(h_CallResultCPU);
237+
sdkDeleteTimer(&hTimer);
238+
printf("Shutdown done.\n");
239+
240+
printf("\n[BlackScholes] - Test Summary\n");
241+
242+
if (L1norm > 1e-6)
243+
{
244+
printf("Test failed!\n");
245+
exit(EXIT_FAILURE);
246+
}
247+
248+
printf("\nNOTE: The CUDA Samples are not meant for performance measurements. Results may vary when GPU Boost is enabled.\n\n");
249+
printf("Test passed\n");
250+
exit(EXIT_SUCCESS);
251+
}

BlackScholes/BlackScholes_gold.cpp

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
3+
*
4+
* Please refer to the NVIDIA end user license agreement (EULA) associated
5+
* with this source code for terms and conditions that govern your use of
6+
* this software. Any use, reproduction, disclosure, or distribution of
7+
* this software and related documentation outside the terms of the EULA
8+
* is strictly prohibited.
9+
*
10+
*/
11+
12+
13+
14+
#include <math.h>
15+
16+
17+
18+
///////////////////////////////////////////////////////////////////////////////
19+
// Polynomial approximation of cumulative normal distribution function
20+
///////////////////////////////////////////////////////////////////////////////
21+
static double CND(double d)
22+
{
23+
const double A1 = 0.31938153;
24+
const double A2 = -0.356563782;
25+
const double A3 = 1.781477937;
26+
const double A4 = -1.821255978;
27+
const double A5 = 1.330274429;
28+
const double RSQRT2PI = 0.39894228040143267793994605993438;
29+
30+
double
31+
K = 1.0 / (1.0 + 0.2316419 * fabs(d));
32+
33+
double
34+
cnd = RSQRT2PI * exp(- 0.5 * d * d) *
35+
(K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5)))));
36+
37+
if (d > 0)
38+
cnd = 1.0 - cnd;
39+
40+
return cnd;
41+
}
42+
43+
44+
///////////////////////////////////////////////////////////////////////////////
45+
// Black-Scholes formula for both call and put
46+
///////////////////////////////////////////////////////////////////////////////
47+
static void BlackScholesBodyCPU(
48+
float &callResult,
49+
float &putResult,
50+
float Sf, //Stock price
51+
float Xf, //Option strike
52+
float Tf, //Option years
53+
float Rf, //Riskless rate
54+
float Vf //Volatility rate
55+
)
56+
{
57+
double S = Sf, X = Xf, T = Tf, R = Rf, V = Vf;
58+
59+
double sqrtT = sqrt(T);
60+
double d1 = (log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT);
61+
double d2 = d1 - V * sqrtT;
62+
double CNDD1 = CND(d1);
63+
double CNDD2 = CND(d2);
64+
65+
//Calculate Call and Put simultaneously
66+
double expRT = exp(- R * T);
67+
callResult = (float)(S * CNDD1 - X * expRT * CNDD2);
68+
putResult = (float)(X * expRT * (1.0 - CNDD2) - S * (1.0 - CNDD1));
69+
}
70+
71+
72+
////////////////////////////////////////////////////////////////////////////////
73+
// Process an array of optN options
74+
////////////////////////////////////////////////////////////////////////////////
75+
extern "C" void BlackScholesCPU(
76+
float *h_CallResult,
77+
float *h_PutResult,
78+
float *h_StockPrice,
79+
float *h_OptionStrike,
80+
float *h_OptionYears,
81+
float Riskfree,
82+
float Volatility,
83+
int optN
84+
)
85+
{
86+
for (int opt = 0; opt < optN; opt++)
87+
BlackScholesBodyCPU(
88+
h_CallResult[opt],
89+
h_PutResult[opt],
90+
h_StockPrice[opt],
91+
h_OptionStrike[opt],
92+
h_OptionYears[opt],
93+
Riskfree,
94+
Volatility
95+
);
96+
}

0 commit comments

Comments
 (0)