-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathterrain_generator.cu
More file actions
350 lines (293 loc) · 14 KB
/
terrain_generator.cu
File metadata and controls
350 lines (293 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
#include <iostream>
#include <vector>
#include <fstream>
#include <cmath>
#include <algorithm> // For std::min/max
#include <random> // For std::mt19937 and std::uniform_int_distribution
#include <chrono> // For std::chrono::high_resolution_clock
// Include CUDA runtime API
#include <cuda_runtime.h>
// --- CUDA Error Checking Macro ---
// This macro simplifies CUDA error checking throughout the code.
#define CUDA_CHECK(call) \
do { \
cudaError_t err = call; \
if (err != cudaSuccess) { \
fprintf(stderr, "CUDA Error: %s at %s:%d\n", cudaGetErrorString(err), __FILE__, __LINE__); \
exit(EXIT_FAILURE); \
} \
} while (0)
// --- Configuration Parameters ---
// Define the width and height of the terrain grid.
// These should be chosen carefully for performance and memory usage.
const int GRID_WIDTH = 512;
const int GRID_HEIGHT = 512;
// Perlin Noise Octaves: Number of layers of noise combined.
// More octaves result in more detailed terrain.
const int PERLIN_OCTAVES = 6;
// Perlin Noise Persistence: How much each octave contributes to the total.
// A higher value means higher octaves have more influence.
const float PERLIN_PERSISTENCE = 0.5f;
// Perlin Noise Lacunarity: How much the frequency increases with each octave.
// A higher value means higher octaves are "busier".
const float PERLIN_LACUNARITY = 2.0f;
// Perlin Noise Scale: Controls the overall "zoom" of the noise.
// A smaller scale results in larger features.
const float PERLIN_SCALE = 0.01f;
// Seed for the random permutation table.
// Using a fixed seed ensures reproducible terrain.
const unsigned int RANDOM_SEED = 12345;
// --- Perlin Noise Constants (Device Global Memory) ---
// Permutation table for Perlin noise.
// Stored in __constant__ memory for fast, cached access by all threads.
// Size 512 because Perlin noise uses p[p[x] + y] + z, so it needs to wrap.
__constant__ int p_device[512];
// --- Perlin Noise Helper Functions (Device Code) ---
// Fade function (6t^5 - 15t^4 + 10t^3) - used for smooth interpolation.
__device__ float fade(float t) {
return t * t * t * (t * (t * 6 - 15) + 10);
}
// Linear interpolation (a + t * (b - a)).
__device__ float lerp(float t, float a, float b) {
return a + t * (b - a);
}
// Gradient function: computes the dot product of a pseudorandom gradient
// vector and the distance vector (x, y, z).
__device__ float grad(int hash, float x, float y, float z) {
int h = hash & 15; // Convert hash to 0-15
float u = (h < 8 || h == 12 || h == 13) ? x : y;
float v = (h < 4 || h == 12 || h == 13) ? y : z;
if (h == 12 || h == 14) v = x; // Special cases for 12 and 14
return ((h & 1) == 0 ? u : -u) + ((h & 2) == 0 ? v : -v);
}
// --- Perlin Noise Function (Device Code) ---
// Generates a Perlin noise value for a given 3D coordinate.
__device__ float perlin_noise(float x, float y, float z) {
// Find unit cube that contains point
int X = (int)floorf(x) & 255;
int Y = (int)floorf(y) & 255;
int Z = (int)floorf(z) & 255;
// Find relative x, y, z of point in cube
x -= floorf(x);
y -= floorf(y);
z -= floorf(z);
// Compute fade curves for x, y, z
float u = fade(x);
float v = fade(y);
float w = fade(z);
// Hash coordinates of the 8 cube corners
int A = p_device[X] + Y;
int AA = p_device[A] + Z;
int AB = p_device[A + 1] + Z;
int B = p_device[X + 1] + Y;
int BA = p_device[B] + Z;
int BB = p_device[B + 1] + Z;
// Add 1 to Z for the next layer of points
int AAA = p_device[AA + 1];
int ABA = p_device[AB + 1];
int BAA = p_device[BA + 1];
int BBA = p_device[BB + 1];
AA = p_device[AA];
AB = p_device[AB];
BA = p_device[BA];
BB = p_device[BB];
// Interpolate along x, y, z
float res = lerp(w, lerp(v, lerp(u, grad(AA, x, y, z),
grad(BA, x - 1, y, z)),
lerp(u, grad(AB, x, y - 1, z),
grad(BB, x - 1, y - 1, z))),
lerp(v, lerp(u, grad(AAA, x, y, z - 1),
grad(BAA, x - 1, y, z - 1)),
lerp(u, grad(ABA, x, y - 1, z - 1),
grad(BBA, x - 1, y - 1, z - 1))));
return res;
}
// --- Terrain Generation Kernel (Device Code) ---
// This kernel calculates the height for each point in the terrain grid.
// Each thread processes one (x, y) coordinate.
__global__ void generate_terrain_kernel(float* height_map,
int width, int height,
float scale, int octaves,
float persistence, float lacunarity) {
// Calculate global thread ID
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
// Check if thread is within bounds of the grid
if (x < width && y < height) {
float total_amplitude = 0.0f;
float max_amplitude = 0.0f;
float frequency = 1.0f;
float amplitude = 1.0f;
// Combine multiple octaves of Perlin noise
for (int i = 0; i < octaves; ++i) {
// Calculate Perlin noise for the current octave
// The z-coordinate is arbitrary for 2D noise, often set to 0 or a constant.
// Here, we use a small offset to ensure different 3D slices of noise.
float noise_val = perlin_noise(x * scale * frequency,
y * scale * frequency,
i * 100.0f); // Small Z offset for each octave
// Accumulate noise value, weighted by amplitude
total_amplitude += noise_val * amplitude;
max_amplitude += amplitude; // Keep track of maximum possible amplitude
// Update frequency and amplitude for the next octave
amplitude *= persistence;
frequency *= lacunarity;
}
// Normalize the noise value to be between 0 and 1, based on max possible amplitude
// This helps to ensure the terrain heights are within a predictable range.
float normalized_height = (total_amplitude / max_amplitude + 1.0f) * 0.5f;
// Store the calculated height in the height map
height_map[y * width + x] = normalized_height;
}
}
// --- Host-side Utility Functions ---
// Function to initialize the Perlin noise permutation table.
// This table is then copied to constant memory on the device.
void init_permutation_table(int* p_host) {
// Initialize with values from 0 to 255
for (int i = 0; i < 256; ++i) {
p_host[i] = i;
}
// Use a Mersenne Twister engine for good random distribution
std::mt19937 rng(RANDOM_SEED);
std::uniform_int_distribution<int> dist(0, 255);
// Shuffle the array using Fisher-Yates algorithm
for (int i = 255; i > 0; --i) {
int j = dist(rng) % (i + 1); // Random index from 0 to i
std::swap(p_host[i], p_host[j]);
}
// Duplicate the array to avoid boundary checks in Perlin noise function
// (p[x] becomes p[x & 255], so p[256]...p[511] are copies of p[0]...p[255])
for (int i = 0; i < 256; ++i) {
p_host[i + 256] = p_host[i];
}
}
// Saves the generated height map as a PPM (Portable Pixmap) image file.
// PPM is a simple uncompressed image format, easy to write directly.
void save_ppm(const std::vector<float>& height_map, int width, int height, const std::string& filename) {
std::ofstream ofs(filename, std::ios_base::out | std::ios_base::binary);
if (!ofs.is_open()) {
std::cerr << "Error: Could not open file " << filename << " for writing.\n";
return;
}
// PPM header: P6 (binary RGB), width, height, max color value (255)
ofs << "P6\n" << width << " " << height << "\n255\n";
// Iterate through the height map and convert height to RGB colors.
// We'll use a simple color gradient:
// Low values (water): Blue
// Mid values (land): Green/Brown
// High values (mountains): White
for (int i = 0; i < width * height; ++i) {
float h = height_map[i]; // Height value (0.0 to 1.0)
unsigned char r, g, b;
if (h < 0.2f) { // Water
r = 0;
g = (unsigned char)(h * 255.0f * 2.5f); // Fade to light blue
b = (unsigned char)(h * 255.0f * 2.5f);
b = std::min((unsigned char)255, (unsigned char)(b + 100)); // Ensure some blue
} else if (h < 0.5f) { // Grass/Land
r = (unsigned char)(h * 255.0f * 0.8f);
g = (unsigned char)(h * 255.0f * 1.5f);
b = 0;
r = std::min((unsigned char)255, (unsigned char)(r + 50));
g = std::min((unsigned char)255, (unsigned char)(g + 50));
} else if (h < 0.8f) { // Mountains/Rock
r = (unsigned char)(h * 255.0f * 1.2f);
g = (unsigned char)(h * 255.0f * 0.8f);
b = (unsigned char)(h * 255.0f * 0.4f);
r = std::min((unsigned char)255, (unsigned char)(r + 100));
g = std::min((unsigned char)255, (unsigned char)(g + 100));
b = std::min((unsigned char)255, (unsigned char)(b + 100));
} else { // Snow caps
r = (unsigned char)(h * 255.0f);
g = (unsigned char)(h * 255.0f);
b = (unsigned char)(h * 255.0f);
}
ofs << r << g << b;
}
ofs.close();
std::cout << "Saved terrain to " << filename << std::endl;
}
// Prints a low-resolution ASCII representation of the terrain to the console.
void print_ascii_terrain(const std::vector<float>& height_map, int width, int height) {
std::cout << "\n--- ASCII Terrain (Scaled) ---\n";
// Scale down for console output to avoid excessively large output.
// Adjust ASCII_SCALE to control the resolution of the ASCII output.
const int ASCII_SCALE = 8; // Each ASCII character represents an 8x8 block of terrain
if (width < ASCII_SCALE || height < ASCII_SCALE) {
std::cout << "Terrain too small for ASCII scaling. Skipping ASCII output.\n";
return;
}
// Define characters for different height levels
const char* ascii_chars = " .:-=+*#%@"; // From low to high density/height
for (int y = 0; y < height; y += ASCII_SCALE) {
for (int x = 0; x < width; x += ASCII_SCALE) {
float avg_height = 0.0f;
int count = 0;
// Average height over the block for smoother ASCII representation
for (int dy = 0; dy < ASCII_SCALE && (y + dy) < height; ++dy) {
for (int dx = 0; dx < ASCII_SCALE && (x + dx) < width; ++dx) {
avg_height += height_map[(y + dy) * width + (x + dx)];
count++;
}
}
if (count > 0) {
avg_height /= count;
}
// Map normalized height (0.0 to 1.0) to an ASCII character index
int char_idx = static_cast<int>(avg_height * (strlen(ascii_chars) - 1));
std::cout << ascii_chars[char_idx];
}
std::cout << "\n";
}
std::cout << "------------------------------\n";
}
// --- Main Function (Host Code) ---
int main() {
std::cout << "Starting GPU Terrain Generator...\n";
// --- 1. Initialize Perlin Noise Permutation Table ---
int p_host[512];
init_permutation_table(p_host);
CUDA_CHECK(cudaMemcpyToSymbol(p_device, p_host, sizeof(p_host)));
// --- 2. Allocate Host and Device Memory ---
std::vector<float> h_height_map(GRID_WIDTH * GRID_HEIGHT); // Host height map
float* d_height_map; // Device height map
CUDA_CHECK(cudaMalloc(&d_height_map, GRID_WIDTH * GRID_HEIGHT * sizeof(float)));
// --- 3. Configure Kernel Launch Parameters ---
// Define block and grid dimensions.
// A common practice is to use 16x16 or 32x32 threads per block.
const int TILE_SIZE = 16;
dim3 block_dim(TILE_SIZE, TILE_SIZE);
dim3 grid_dim((GRID_WIDTH + block_dim.x - 1) / block_dim.x,
(GRID_HEIGHT + block_dim.y - 1) / block_dim.y);
std::cout << "Grid Dimensions: (" << grid_dim.x << ", " << grid_dim.y << ")\n";
std::cout << "Block Dimensions: (" << block_dim.x << ", " << block_dim.y << ")\n";
std::cout << "Total Threads: " << grid_dim.x * grid_dim.y * block_dim.x * block_dim.y << "\n";
// --- 4. Launch Kernel ---
std::cout << "Launching terrain generation kernel...\n";
auto start_time = std::chrono::high_resolution_clock::now();
generate_terrain_kernel<<<grid_dim, block_dim>>>(
d_height_map, GRID_WIDTH, GRID_HEIGHT,
PERLIN_SCALE, PERLIN_OCTAVES, PERLIN_PERSISTENCE, PERLIN_LACUNARITY
);
// Synchronize to wait for kernel completion and check for errors
CUDA_CHECK(cudaDeviceSynchronize());
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_time = end_time - start_time;
std::cout << "Kernel execution time: " << elapsed_time.count() * 1000.0 << " ms\n";
// --- 5. Copy Results Back to Host ---
std::cout << "Copying results back to host...\n";
CUDA_CHECK(cudaMemcpy(h_height_map.data(), d_height_map,
GRID_WIDTH * GRID_HEIGHT * sizeof(float),
cudaMemcpyDeviceToHost));
// --- 6. Output Results ---
// Save to PPM image file
save_ppm(h_height_map, GRID_WIDTH, GRID_HEIGHT, "terrain.ppm");
// Print ASCII representation
print_ascii_terrain(h_height_map, GRID_WIDTH, GRID_HEIGHT);
// --- 7. Clean Up ---
std::cout << "Cleaning up memory...\n";
CUDA_CHECK(cudaFree(d_height_map));
std::cout << "GPU Terrain Generator finished successfully.\n";
return 0;
}