Skip to content

Support for integers exceeding uint16_t when the number of keys is lower than 65535 #19

@mlhktp

Description

@mlhktp

When the number of keys is lower than 65535, the application assumes that the key fits into a 16-bit integer (uint16_t). This assumption leads to collisions. Manually changing the key type to uint32_t (assuming it fits into 32 bits) resolves the problem.

Steps to Reproduce:

  1. Create a 32bit.nbperf file with the following content:

    1112091295
    2389685386
    2687098545
    2818849856
    1566765846
    3743921995
    3614969005
    3023180322
    396579954
    3603558518
    
  2. Create a keys.h file with the following content:

    #include <stdint.h>
    const uint32_t keys[] = {
        1112091295,
        2389685386,
        2687098545,
        2818849856,
        1566765846,
        3743921995,
        3614969005,
        3023180322,
        396579954,
        3603558518
    };
  3. Create a check_collision.cpp file with the following content:

    #include <iostream>
    #include <unordered_set>
    #include "output.h"
    #include "keys.h"
    
    int main() {
        std::unordered_set<uint16_t> hash_set;
        bool collision = false;
    
        for (const auto& key : keys) {
            uint16_t hash_value = inthash(key);
            if (hash_set.find(hash_value) != hash_set.end()) {
                std::cout << "Collision detected for key: " << key << " with hash value: " << hash_value << std::endl;
                collision = true;
            } else {
                hash_set.insert(hash_value);
            }
        }
    
        if (!collision) {
            std::cout << "No collisions detected." << std::endl;
        }
    
        return 0;
    }
  4. Use the following output.h file generated by nbperf (nbperf -I 32bit.nbperf > output.h):

    /* generated with rurban/nbperf v3.1-0-g4379d4d -I 32bit.nbperf */
    /* seed[0]: 1804289383, seed[1]: 846930886 */
    #include <stdint.h>
    
    static inline void _inthash2(const int32_t key, uint32_t *h)
    {
        *h = (key * (UINT32_C(0xEB382D69) + UINT32_C(1804289383)))
             + UINT32_C(846930886);
    }
    
    uint16_t inthash(const uint16_t key)
    {
        static const uint8_t g[21] = {
             0,  0,  0,  1,  0,  0,  3,  6,  8,  1,
             4,  2,  0,  0,  0,  0,  2,  0,  7,  0,
             0,
        };
        uint16_t h[2];
    
        _inthash2(key, (uint32_t*)h);
    
        h[0] = h[0] % 21;
        h[1] = h[1] % 21;
        return (g[h[0]] + g[h[1]]) % 10;
    }

Manual Fix:

Change the key type in the output.h file from uint16_t to uint32_t:

- static inline void _inthash2(const int32_t key, uint32_t *h)
+ static inline void _inthash2(const int64_t key, uint32_t *h)
{
    *h = (key * (UINT32_C(0xEB382D69) + UINT32_C(1804289383)))
         + UINT32_C(846930886);
}

uint16_t
- inthash(const uint16_t key)
+ inthash(const uint32_t key)
{

Metadata

Metadata

Assignees

Labels

No labels
No labels

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions