-
Notifications
You must be signed in to change notification settings - Fork 80
BloomFilter v2 support for Spark's bloom-filter based joins #4360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
ac2540a
7af89f7
bface5f
aa9b576
8c2c101
2e63ec8
d83b2c9
095dd88
be2a120
bd90bf4
2037b0e
10ba6f0
3e74747
edc63a9
6292048
a773edd
110fc0e
d48c048
5eb90ea
dc224c9
5d7d5e1
27e3388
317e800
81893fb
4995300
090714c
92db59f
5f8d711
cd79867
8480b6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| /* | ||
| * Copyright (c) 2023-2025, NVIDIA CORPORATION. | ||
| * Copyright (c) 2023-2026, NVIDIA CORPORATION. | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
|
|
@@ -20,17 +20,32 @@ | |
| #include "jni_utils.hpp" | ||
| #include "utilities.hpp" | ||
|
|
||
| #include <limits> | ||
|
|
||
| extern "C" { | ||
|
|
||
| JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_BloomFilter_creategpu( | ||
| JNIEnv* env, jclass, jint numHashes, jlong bloomFilterBits) | ||
| JNIEnv* env, jclass, jint version, jint numHashes, jlong bloomFilterBits, jint seed) | ||
| { | ||
| JNI_TRY | ||
| { | ||
| cudf::jni::auto_set_device(env); | ||
|
|
||
| int bloom_filter_longs = static_cast<int>((bloomFilterBits + 63) / 64); | ||
| auto bloom_filter = spark_rapids_jni::bloom_filter_create(numHashes, bloom_filter_longs); | ||
| // TODO (future): There is an impedance mismatch between the C++ and Java APIs. | ||
| // This seems to have been introduced in https://github.com/NVIDIA/spark-rapids-jni/pull/1303. | ||
| // The Java API accepts a long for the bloom filter bit count, but the C++ API accepts an int. | ||
| // This means that the Java API can represent a bloom filter bit count that is too large to | ||
| // be represented as an int in the C++ API. | ||
| // We should fix this by changing the C++ API to accept a long for the bloom filter bit count. | ||
| // We will address this in a future PR. For now, we add error checking to avoid overflow. | ||
|
||
| JNI_ARG_CHECK(env, | ||
| bloomFilterBits >= 0 && bloomFilterBits <= std::numeric_limits<int>::max() - 63, | ||
| "bloom filter bit count overflows int when converted to longs", | ||
| 0); | ||
| auto const bloom_filter_longs_long = (bloomFilterBits + 63) / 64; | ||
| auto const bloom_filter_longs = static_cast<int>(bloom_filter_longs_long); | ||
| auto bloom_filter = | ||
| spark_rapids_jni::bloom_filter_create(version, numHashes, bloom_filter_longs, seed); | ||
| return reinterpret_cast<jlong>(bloom_filter.release()); | ||
| } | ||
| JNI_CATCH(env, 0); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.