forked from karpathy/llm.c
-
Notifications
You must be signed in to change notification settings - Fork 9
Open
Description
It seems nvcc tool chain behavior keeps changing with version upgrade. I tried compile test_gpt2cu with CUDA
$ nvcc --version
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Tue_May_27_02:21:03_PDT_2025
Cuda compilation tools, release 12.9, V12.9.86
Build cuda_12.9.r12.9/compiler.36037853_0
And I encountered the following errors:
make test_gpt2cu [backprop]
NICE Compiling with OpenMP support
nvcc -O3 --use_fast_math test_gpt2.cu -lcublas -lcublasLt -o test_gpt2cu
nvcc warning : Support for offline compilation for architectures prior to '<compute/sm/lto>_75' will be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
train_gpt2.cu(795): error: __host__ or __device__ annotation on lambda requires --extended-lambda nvcc flag
(cudaCheck(cub::DeviceFor::Bulk(B * T * C, [=] __attribute__((device))(int idx) { auto [b, t, c] = i2n(idx, C, T); out_md(b, t, c) = wte_md(inp_md(b, t), c) + wpe_md(t, c); }), "train_gpt2.cu", 795))
^
train_gpt2.cu(795): error: calling a __device__ function("_Z3i2n1?1?1?") from a __host__ function("operator()") is not allowed
(cudaCheck(cub::DeviceFor::Bulk(B * T * C, [=] __attribute__((device))(int idx) { auto [b, t, c] = i2n(idx, C, T); out_md(b, t, c) = wte_md(inp_md(b, t), c) + wpe_md(t, c); }), "train_gpt2.cu", 795))
^
train_gpt2.cu(945): error: __host__ or __device__ annotation on lambda requires --extended-lambda nvcc flag
[=] __attribute__((device))(int idx) {
^
train_gpt2.cu(971): error: __host__ or __device__ annotation on lambda requires --extended-lambda nvcc flag
thrust::make_counting_iterator(0), [=] __attribute__((host)) __attribute__((device))(int idx) {
^
train_gpt2.cu(986): error: __host__ or __device__ annotation on lambda requires --extended-lambda nvcc flag
thrust::transform(thrust::cuda::par_nosync, inp, inp + N, out, [] __attribute__((device))(float xi) {
^
test_gpt2.cu(51): error: initial value of reference to non-const must be an lvalue
gpt2_build_from_checkpoint(&model, "gpt2_124M.bin");
^
test_gpt2.cu(73): error: too many arguments in function call
float* expected_grads_memory = malloc_and_point_parameters(&expected_grads, model.param_sizes, 0);
^
test_gpt2.cu(73): error: no suitable conversion function from "thrust::THRUST_200802_SM_520_NS::device_vector<float, thrust::THRUST_200802_SM_520_NS::device_allocator<float>>" to "float *" exists
float* expected_grads_memory = malloc_and_point_parameters(&expected_grads, model.param_sizes, 0);
^
test_gpt2.cu(74): error: too many arguments in function call
float* calculated_grads_memory = malloc_and_point_parameters(&calculated_grads, model.param_sizes, 0);
^
test_gpt2.cu(74): error: no suitable conversion function from "thrust::THRUST_200802_SM_520_NS::device_vector<float, thrust::THRUST_200802_SM_520_NS::device_allocator<float>>" to "float *" exists
float* calculated_grads_memory = malloc_and_point_parameters(&calculated_grads, model.param_sizes, 0);
^
test_gpt2.cu(94): error: initial value of reference to non-const must be an lvalue
gpt2_forward(&model, x,
^
test_gpt2.cu(119): error: initial value of reference to non-const must be an lvalue
gpt2_forward(&model, x, y, B, T);
^
test_gpt2.cu(120): error: initial value of reference to non-const must be an lvalue
gpt2_zero_grad(&model);
^
test_gpt2.cu(121): error: initial value of reference to non-const must be an lvalue
gpt2_backward(&model);
^
test_gpt2.cu(175): error: no suitable conversion function from "thrust::THRUST_200802_SM_520_NS::device_vector<float, thrust::THRUST_200802_SM_520_NS::device_allocator<float>>" to "const void *" exists
cudaMemcpy(calculated_grads_memory, model.grads_memory, model.num_parameters * sizeof(float), cudaMemcpyDeviceToHost);
^
test_gpt2.cu(220): error: identifier "gpt2_free" is undefined
gpt2_free(&model);
^
16 errors detected in the compilation of "test_gpt2.cu".
make: *** [Makefile:59: test_gpt2cu] Error 2
Metadata
Metadata
Assignees
Labels
No labels