-
Notifications
You must be signed in to change notification settings - Fork 86
Expand file tree
/
Copy pathgpu_stream.hpp
More file actions
63 lines (48 loc) · 2.22 KB
/
gpu_stream.hpp
File metadata and controls
63 lines (48 loc) · 2.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <getopt.h>
#include <iostream>
#include <memory>
#include <variant>
#include <vector>
#include <cuda.h>
#include <cuda_runtime.h>
#include <numa.h>
#include "gpu_stream_kernels.hpp"
#include "gpu_stream_utils.hpp"
#define NON_HIP (!defined(__HIP_PLATFORM_HCC__) && !defined(__HCC__) && !defined(__HIPCC__))
using namespace stream_config;
class GpuStream {
public:
GpuStream() = delete; // Delete default constructor
GpuStream(Opts &) noexcept; // Constructor
~GpuStream() noexcept = default; // Destructor
GpuStream(const GpuStream &) = delete;
GpuStream &operator=(const GpuStream &) = delete;
GpuStream(GpuStream &&) noexcept = default;
GpuStream &operator=(GpuStream &&) noexcept = default;
int Run();
private:
using BenchArgsVariant = std::variant<std::unique_ptr<BenchArgs<float>>, std::unique_ptr<BenchArgs<double>>>;
std::vector<BenchArgsVariant> bench_args_;
Opts opts_;
// Memory management functions
template <typename T> cudaError_t GpuMallocDataBuf(T **, uint64_t);
template <typename T> int PrepareValidationBuf(std::unique_ptr<BenchArgs<T>> &);
template <typename T> int CheckBuf(std::unique_ptr<BenchArgs<T>> &, int);
template <typename T> int PrepareEvent(std::unique_ptr<BenchArgs<T>> &);
template <typename T> int PrepareBufAndStream(std::unique_ptr<BenchArgs<T>> &);
template <typename T> int DestroyEvent(std::unique_ptr<BenchArgs<T>> &);
template <typename T> int DestroyBufAndStream(std::unique_ptr<BenchArgs<T>> &);
template <typename T> int Destroy(std::unique_ptr<BenchArgs<T>> &);
// Benchmark functions
template <typename T> int RunStreamKernel(std::unique_ptr<BenchArgs<T>> &, Kernel, int);
float GetActualMemoryClockRate(int gpu_id);
template <typename T> int RunStream(std::unique_ptr<BenchArgs<T>> &, const std::string &data_type, float peak_bw);
// Helper functions
int GetGpuCount(int *);
int SetGpu(int gpu_id);
float GetMemoryClockRate(int device_id, const cudaDeviceProp &prop);
void PrintCudaDeviceInfo(int device_id, const cudaDeviceProp &prop, float memory_clock_mhz, float peak_bw);
};