Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
112 commits
Select commit Hold shift + click to select a range
4aa7dbb
ai draft
thirtiseven Dec 19, 2025
a319d10
style
thirtiseven Dec 23, 2025
458b583
address comments
thirtiseven Dec 25, 2025
5c1bbf4
address comments
thirtiseven Dec 25, 2025
0445daa
address comments
thirtiseven Dec 25, 2025
8ddbf96
address comments
thirtiseven Dec 25, 2025
c9eea59
copyrights
thirtiseven Jan 4, 2026
f27fd6e
Merge branch 'main' into protocol_buffer_jni_dev
thirtiseven Jan 8, 2026
617471d
update, added more tests
thirtiseven Jan 9, 2026
bdfac06
Merge branch 'protocol_buffer_jni_dev' of https://github.com/thirtise…
thirtiseven Jan 9, 2026
3701cdf
style
thirtiseven Jan 9, 2026
1bf7660
address comments
thirtiseven Jan 13, 2026
b012421
multi column processing
thirtiseven Jan 20, 2026
ecd3a38
fix merge
thirtiseven Jan 20, 2026
c86f78c
Support enum, required and default values
thirtiseven Jan 27, 2026
3b37d28
style
thirtiseven Jan 27, 2026
46d2130
Support nested types
thirtiseven Jan 30, 2026
5f89e60
performance optimization
thirtiseven Feb 5, 2026
e311b4b
single-pass-kernel, with debug log, met unbreakable wall
thirtiseven Feb 6, 2026
c52782c
delete debug log
thirtiseven Feb 9, 2026
47973a7
check point before schema projection option A, PROTOBUF_SINGLE_PASS=1…
thirtiseven Feb 10, 2026
d2595b1
Merge flat and nested api for from_protobuf
thirtiseven Feb 11, 2026
aa3c852
Fix nested type support
thirtiseven Feb 11, 2026
7e0e77d
add a tests
thirtiseven Feb 12, 2026
ac39a4e
Kernal code clean up
thirtiseven Feb 25, 2026
fd7ec66
style change
thirtiseven Feb 25, 2026
828e3c0
clean up code
thirtiseven Feb 25, 2026
ca5a921
address comments
thirtiseven Feb 25, 2026
edbfd98
ai self review and comment addressed
thirtiseven Feb 25, 2026
34bcf0b
ai self review and comment addressed
thirtiseven Feb 26, 2026
632448b
address comments
thirtiseven Feb 27, 2026
46ef3b0
bug fixs
thirtiseven Feb 28, 2026
5438a17
clean up code
thirtiseven Feb 28, 2026
c009f62
style
thirtiseven Feb 28, 2026
6152733
address comments
thirtiseven Feb 28, 2026
48204d5
clean up and split big files
thirtiseven Mar 2, 2026
89e6e8c
style
thirtiseven Mar 2, 2026
f37d3c9
style
thirtiseven Mar 3, 2026
78b1c60
address cc comments
thirtiseven Mar 3, 2026
661f085
codex review and address
thirtiseven Mar 3, 2026
8e09a47
gemini review and address
thirtiseven Mar 3, 2026
aa2586f
Merge branch 'main' into protocol_buffer_jni_dev
thirtiseven Mar 3, 2026
3789cfc
adapt new api
thirtiseven Mar 4, 2026
33ddacd
add micro benchmark
thirtiseven Mar 4, 2026
c56ed5b
merge reduce+scan, defer error check, combined occ scan
thirtiseven Mar 4, 2026
2b113c7
O1 field number lookup in nested
thirtiseven Mar 4, 2026
ce408d6
Remove perf-results
thirtiseven Mar 4, 2026
38348cc
Batched scalar extraction (2D grid kernel)
thirtiseven Mar 4, 2026
3647299
style
thirtiseven Mar 4, 2026
6af1577
address greptile comments
thirtiseven Mar 5, 2026
0d9d105
address comments
thirtiseven Mar 5, 2026
e861f40
address greptile comments
thirtiseven Mar 5, 2026
2ff75a9
address greptile comments
thirtiseven Mar 5, 2026
f6ebffe
address greptile comments
thirtiseven Mar 5, 2026
7a312d4
address greptile comments
thirtiseven Mar 5, 2026
4fa362c
address greptile comments
thirtiseven Mar 5, 2026
54fda94
style
thirtiseven Mar 5, 2026
aeaf50c
address greptile comments
thirtiseven Mar 5, 2026
d50f089
address greptile comments
thirtiseven Mar 5, 2026
63bc13a
address greptile comments
thirtiseven Mar 5, 2026
8316f75
address greptile comments
thirtiseven Mar 5, 2026
8ae9547
address greptile comments
thirtiseven Mar 5, 2026
756010d
address greptile comments
thirtiseven Mar 5, 2026
64de422
address greptile comments
thirtiseven Mar 5, 2026
aae6bca
address comments
thirtiseven Mar 6, 2026
6e1c4d2
address greptile comments
thirtiseven Mar 6, 2026
9c6dd70
address greptile comments
thirtiseven Mar 6, 2026
6b2f494
address greptile comments
thirtiseven Mar 6, 2026
e40d5a7
Lookup table for repeated message child scan
thirtiseven Mar 6, 2026
4b4f6f9
reflection refactor
thirtiseven Mar 12, 2026
66daed1
reflection refactor
thirtiseven Mar 12, 2026
cd1763b
bug fixes
thirtiseven Mar 12, 2026
0213721
comment address
thirtiseven Mar 12, 2026
5357378
comment address
thirtiseven Mar 12, 2026
c4b1507
address comments
thirtiseven Mar 12, 2026
3852f53
address comments
thirtiseven Mar 12, 2026
ce29c11
address comments
thirtiseven Mar 13, 2026
96b20e0
address comments
thirtiseven Mar 13, 2026
87c5e99
address comments
thirtiseven Mar 13, 2026
fca9ea7
address comments
thirtiseven Mar 13, 2026
8e5473c
address comments
thirtiseven Mar 13, 2026
95e2586
address comments
thirtiseven Mar 13, 2026
67f2db8
address greptile comments
thirtiseven Mar 13, 2026
06c15ee
address greptile comments
thirtiseven Mar 13, 2026
14c6644
bugfix
thirtiseven Mar 13, 2026
7a06053
address comments
thirtiseven Mar 14, 2026
809daa1
nits
thirtiseven Mar 14, 2026
3067f0b
address comments
thirtiseven Mar 14, 2026
8aa0bdf
style
thirtiseven Mar 15, 2026
a13433f
style
thirtiseven Mar 15, 2026
e0cd7f5
fix bug
thirtiseven Mar 16, 2026
5ef84e9
address comments
thirtiseven Mar 16, 2026
4817d06
address comments
thirtiseven Mar 16, 2026
bdd2e40
address comments
thirtiseven Mar 16, 2026
6101a4b
address human comments
thirtiseven Mar 17, 2026
0213433
port back refactor from pr 0
thirtiseven Mar 19, 2026
6bcac2f
style
thirtiseven Mar 19, 2026
ff83290
address comments from part0
thirtiseven Mar 19, 2026
35b1cb8
nghia style self-check
thirtiseven Mar 19, 2026
82bf02a
backport suggestions
thirtiseven Mar 20, 2026
fe73ada
address comments
thirtiseven Mar 23, 2026
d01a25b
copyright
thirtiseven Mar 23, 2026
54223ca
fix
thirtiseven Mar 24, 2026
70a214b
apply comments suggestions
thirtiseven Mar 30, 2026
6e00831
apply refactor on headers
thirtiseven Mar 30, 2026
1754032
Merge remote-tracking branch 'origin/main' into protocol_buffer_jni_dev
thirtiseven Mar 31, 2026
02b7303
Merge origin/main into protocol_buffer_jni_dev
thirtiseven Apr 7, 2026
8ea16b5
Remove stale mr parameter from call sites
thirtiseven Apr 7, 2026
2e91499
Apply part0 brace style to dev-only code
thirtiseven Apr 7, 2026
c1f13c2
Replace copy_varlen_data_kernel with cub::DeviceMemcpy::Batched
thirtiseven Apr 7, 2026
33e30f7
Remove unused threads/blocks variables
thirtiseven Apr 7, 2026
7508a80
style
thirtiseven Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2025, NVIDIA CORPORATION.
# Copyright (c) 2022-2026, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -207,6 +207,7 @@ add_library(
src/NativeParquetJni.cpp
src/NumberConverterJni.cpp
src/ParseURIJni.cpp
src/ProtobufJni.cpp
src/RegexRewriteUtilsJni.cpp
src/RowConversionJni.cpp
src/SparkResourceAdaptorJni.cpp
Expand Down Expand Up @@ -254,6 +255,7 @@ add_library(
src/multiply.cu
src/number_converter.cu
src/parse_uri.cu
src/protobuf.cu
src/regex_rewrite_utils.cu
src/row_conversion.cu
src/round_float.cu
Expand Down
80 changes: 80 additions & 0 deletions src/main/cpp/src/ProtobufJni.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright (c) 2026, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "cudf_jni_apis.hpp"
#include "dtype_utils.hpp"
#include "protobuf.hpp"

#include <cudf/column/column_view.hpp>
#include <cudf/utilities/traits.hpp>

extern "C" {

JNIEXPORT jlong JNICALL
Java_com_nvidia_spark_rapids_jni_Protobuf_decodeToStruct(JNIEnv* env,
jclass,
jlong binary_input_view,
jintArray field_numbers,
jintArray type_ids,
jintArray type_scales,
jboolean fail_on_errors)
{
JNI_NULL_CHECK(env, binary_input_view, "binary_input_view is null", 0);
JNI_NULL_CHECK(env, field_numbers, "field_numbers is null", 0);
JNI_NULL_CHECK(env, type_ids, "type_ids is null", 0);
JNI_NULL_CHECK(env, type_scales, "type_scales is null", 0);

JNI_TRY
{
cudf::jni::auto_set_device(env);
auto const* input = reinterpret_cast<cudf::column_view const*>(binary_input_view);
cudf::jni::native_jintArray n_field_numbers(env, field_numbers);
cudf::jni::native_jintArray n_type_ids(env, type_ids);
cudf::jni::native_jintArray n_type_scales(env, type_scales);
if (n_field_numbers.size() != n_type_ids.size() ||
n_field_numbers.size() != n_type_scales.size()) {
JNI_THROW_NEW(env,
cudf::jni::ILLEGAL_ARG_EXCEPTION_CLASS,
"fieldNumbers/typeIds/typeScales must be the same length",
0);
}

std::vector<int> field_nums(n_field_numbers.begin(), n_field_numbers.end());
std::vector<int> encodings(n_type_scales.begin(), n_type_scales.end());
std::vector<cudf::data_type> out_types;
out_types.reserve(n_type_ids.size());
for (int i = 0; i < n_type_ids.size(); ++i) {
// For protobuf decoding, typeScales contains encoding info (0=default, 1=fixed,
// 2=zigzag) not decimal scales. For non-decimal types, scale should be 0. Decimal types are
// not currently supported in protobuf decoder.
auto type_id = static_cast<cudf::type_id>(n_type_ids[i]);
if (cudf::is_fixed_point(cudf::data_type{type_id})) {
// For decimal types, use the scale from typeScales (though currently unsupported)
out_types.emplace_back(cudf::jni::make_data_type(n_type_ids[i], n_type_scales[i]));
} else {
// For non-decimal types, scale is always 0; typeScales contains encoding info
out_types.emplace_back(cudf::jni::make_data_type(n_type_ids[i], 0));
}
}

auto result = spark_rapids_jni::decode_protobuf_to_struct(
*input, field_nums, out_types, encodings, fail_on_errors);
return cudf::jni::release_as_jlong(result);
}
JNI_CATCH(env, 0);
}

} // extern "C"
Loading