Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ string(REPLACE " " ";" INSTALLED_GPU_CCS_3 "${INSTALLED_GPU_CCS_2}")
string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_3}")
SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
add_executable(weight_packer weight_packer.cpp)
add_executable(llama2_q4 llama2_q4.cu)
9 changes: 7 additions & 2 deletions weight_packer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ int divUp(int a, int b) {
// 1. Convert from row-major to col-major
// 2. Get rid of the order_map (simply pack as little endian)
void repack_q_data(uint32_t* q_weight_out, const uint32_t* q_weight_in, int height, int width) {
uint32_t* temp = (uint32_t*)malloc(width * height * sizeof(uint32_t));
size_t temp_size = width * height;
uint32_t* temp = (uint32_t*)malloc(temp_size * sizeof(uint32_t));
int order_map[] = { 0, 2, 4, 6, 1, 3, 5, 7 }; // used by AWQ's original implementation

// 1. convert to uint32 col-major array first (only 4 LSBs of each element are non-zero)
Expand All @@ -117,7 +118,11 @@ void repack_q_data(uint32_t* q_weight_out, const uint32_t* q_weight_in, int heig
for (int y = 0; y < height; y += 8) {
uint32_t packed_val = 0;
for (int i = 0; i < 8; i++) {
packed_val = (packed_val) | (temp[x * height + y + i] << (4 * i));
size_t index = x * height + y + i;
if (index >= temp_size) {
fprintf(stderr, "Error: Heap overflow detected at index %zu\n", index);
}
packed_val = (packed_val) | (temp[index] << (4 * i));
}
int packed_wt_y = y / 8;
q_weight_out[x * packed_wt_height + packed_wt_y] = packed_val;
Expand Down