-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathgrayscale.py
More file actions
50 lines (39 loc) · 1.75 KB
/
grayscale.py
File metadata and controls
50 lines (39 loc) · 1.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import torch
import pygpubench
def reference_kernel(data):
output, data = data
weights = torch.tensor([0.2989, 0.5870, 0.1140],
device=data.device,
dtype=data.dtype)
output[...] = torch.sum(data * weights, dim=-1)
def generate_input(size: int, seed: int):
"""
Generates random RGB image tensor of the specified size.
Returns:
Tensor of shape (size, size, 3) with values in [0, 1]
"""
gen = torch.Generator(device="cuda")
gen.manual_seed(seed)
x = torch.rand(
size, size, 3, device="cuda", dtype=torch.float32, generator=gen
).contiguous()
y = torch.empty(size, size, device="cuda", dtype=torch.float32).contiguous()
return x, y
def generate_test_case(**kwargs):
x, y = generate_input(**kwargs)
expected = torch.empty_like(y)
reference_kernel((expected, x))
return (y, x), (expected, 1e-6, 1e-6)
# note: can't enable landlock when running on modal :(
if __name__ == "__main__":
kernels = ["valid_custom_kernel_eager", "valid_custom_kernel_compiled", "valid_custom_kernel_stream"]
for kernel in kernels:
print(kernel)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False)
print("❌" if not res.success else "✅", pygpubench.basic_stats(res.time_us))
broken = ["wrong_custom_kernel_backward_race", "wrong_custom_kernel_forward_race"]
for kernel in broken:
print(kernel)
res = pygpubench.do_bench_isolated(f"submission.{kernel}", generate_test_case, {"size": 1024}, 100, 5, discard=True, landlock=False)
print("❌" if not res.success else "✅",pygpubench.basic_stats(res.time_us))
print("done")