diff --git a/Cargo.toml b/Cargo.toml index bf127ce..a2a44d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ path = "src/lib.rs" default = ["test_util"] test_util = [] use_existing_hdfs = [] +no_jvm_invocation = [] [build-dependencies] cc = "1" diff --git a/README.md b/README.md index ed37244..46c2132 100644 --- a/README.md +++ b/README.md @@ -84,4 +84,60 @@ match fs.mkdir("/data") { Ok(_) => { println!("/data has been created") }, Err(_) => { panic!("/data creation has failed") } }; -``` \ No newline at end of file +``` + +## JNI Context Support + +fs-hdfs3 supports a special `no_jvm_invocation` feature for use cases where the library runs within JNI native functions and JVM invocation APIs are not needed. This is useful when implementing Java libraries with native JNI modules that use fs-hdfs3 to call Hadoop FileSystem APIs. + +### Using the no_jvm_invocation Feature + +To use this feature, add it to your Cargo.toml: + +```toml +[dependencies.fs-hdfs3] +version = "0.1.12" +features = ["no_jvm_invocation"] +``` + +When this feature is enabled: +- The library does not link to `libjvm.so` +- JVM invocation APIs are disabled +- You must provide the JavaVM using the `jni_context` module functions + +### JNI Native Function Example + +```rust +use fs_hdfs3::jni_context::set_java_vm; +use fs_hdfs3::hdfs::get_hdfs; + +// Call this once at application startup, typically in JNI_OnLoad +#[no_mangle] +pub extern "C" fn JNI_OnLoad(vm: *mut std::ffi::c_void, _reserved: *mut std::ffi::c_void) -> i32 { + // Set the JavaVM for fs-hdfs3 to use + if unsafe { set_java_vm(vm) }.is_err() { + return -1; // JNI_ERR + } + 0x00010008 // JNI_VERSION_1_8 +} + +// Later, in your JNI native functions, just use fs-hdfs3 normally +#[no_mangle] +pub extern "C" fn Java_com_example_MyClass_myNativeMethod( + _env: *mut std::ffi::c_void, + _class: *mut std::ffi::c_void, +) -> i32 { + // No need to manage JNIEnv - fs-hdfs3 handles it automatically + match get_hdfs() { + Ok(fs) => { + // Use the filesystem... + println!("Successfully connected to HDFS"); + } + Err(e) => { + eprintln!("Failed to connect to HDFS: {:?}", e); + return -1; + } + } + 0 +} +``` diff --git a/build.rs b/build.rs index fd3b701..8a76ff6 100644 --- a/build.rs +++ b/build.rs @@ -39,17 +39,22 @@ fn build_ffi(flags: &[String]) { ); // To avoid the order issue of dependent dynamic libraries + #[cfg(not(feature = "no_jvm_invocation"))] println!("cargo:rustc-link-lib=jvm"); - let bindings = bindgen::Builder::default() + let builder = bindgen::Builder::default() .header(header) .allowlist_function("nmd.*") .allowlist_function("hdfs.*") .allowlist_function("hadoop.*") .clang_args(flags) - .rustified_enum("tObjectKind") - .generate() - .expect("Unable to generate bindings"); + .rustified_enum("tObjectKind"); + + // Add the conditional functions for no_jvm_invocation + #[cfg(feature = "no_jvm_invocation")] + let builder = builder.allowlist_function("hdfsSetJavaVM"); + + let bindings = builder.generate().expect("Unable to generate bindings"); let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); bindings @@ -114,6 +119,9 @@ fn get_build_flags() -> Vec { result.extend(get_java_dependency()); result.push(String::from("-Wno-incompatible-pointer-types")); + #[cfg(feature = "no_jvm_invocation")] + result.push(String::from("-DLIBHDFS_NO_JVM_INVOCATION")); + result } @@ -129,14 +137,17 @@ fn get_java_dependency() -> Vec { #[cfg(target_os = "macos")] result.push(format!("-I{java_home}/include/darwin")); - // libjvm link - let jvm_lib_location = java_locator::locate_jvm_dyn_library().unwrap(); - println!("cargo:rustc-link-search=native={}", jvm_lib_location); - println!("cargo:rustc-link-lib=jvm"); + #[cfg(not(feature = "no_jvm_invocation"))] + { + // libjvm link + let jvm_lib_location = java_locator::locate_jvm_dyn_library().unwrap(); + println!("cargo:rustc-link-search=native={}", jvm_lib_location); + println!("cargo:rustc-link-lib=jvm"); - // For tests, add libjvm path to rpath, this does not propagate upwards, - // unless building an .so, as per Cargo specs, so is only used when testing - println!("cargo:rustc-link-arg=-Wl,-rpath,{jvm_lib_location}"); + // For tests, add libjvm path to rpath, this does not propagate upwards, + // unless building an .so, as per Cargo specs, so is only used when testing + println!("cargo:rustc-link-arg=-Wl,-rpath,{jvm_lib_location}"); + } result } diff --git a/c_src/libhdfs/hdfs.c b/c_src/libhdfs/hdfs.c index 34ae654..467ea51 100644 --- a/c_src/libhdfs/hdfs.c +++ b/c_src/libhdfs/hdfs.c @@ -3600,6 +3600,13 @@ char* hdfsGetLastExceptionStackTrace() return getLastTLSExceptionStackTrace(); } +#ifdef LIBHDFS_NO_JVM_INVOCATION +int hdfsSetJavaVM(void *vm) +{ + return setJavaVM(vm); +} +#endif /* LIBHDFS_NO_JVM_INVOCATION */ + /** * vim: ts=4: sw=4: et: */ diff --git a/c_src/libhdfs/hdfs.h b/c_src/libhdfs/hdfs.h index 88f99bf..acb0b6d 100644 --- a/c_src/libhdfs/hdfs.h +++ b/c_src/libhdfs/hdfs.h @@ -1079,6 +1079,11 @@ extern "C" { LIBHDFS_EXTERNAL char* hdfsGetLastExceptionStackTrace(); + #ifdef LIBHDFS_NO_JVM_INVOCATION + LIBHDFS_EXTERNAL + int hdfsSetJavaVM(void *vm); + #endif /* LIBHDFS_NO_JVM_INVOCATION */ + #ifdef __cplusplus } #endif diff --git a/c_src/libhdfs/jni_helper.c b/c_src/libhdfs/jni_helper.c index 982df42..02c6172 100644 --- a/c_src/libhdfs/jni_helper.c +++ b/c_src/libhdfs/jni_helper.c @@ -28,6 +28,7 @@ #include #include #include +#include static struct htable *gClassRefHTable = NULL; @@ -629,6 +630,7 @@ static char* getClassPath() return expandedClasspath; } +#ifndef LIBHDFS_NO_JVM_INVOCATION /** * Get the global JNI environemnt. @@ -701,7 +703,7 @@ static JNIEnv* getGlobalJNIEnv(void) } options[0].optionString = optHadoopClassPath; hadoopJvmArgs = getenv("LIBHDFS_OPTS"); - if (hadoopJvmArgs != NULL) { + if (hadoopJvmArgs != NULL) { hadoopJvmArgs = strdup(hadoopJvmArgs); for (noArgs = 1, str = hadoopJvmArgs; ; noArgs++, str = NULL) { token = strtok_r(str, jvmArgDelims, &savePtr); @@ -820,6 +822,143 @@ JNIEnv* getJNIEnv(void) return NULL; } +#else + +/* Global JavaVM for no_jvm_invocation mode - using atomic for efficiency + * + * Memory ordering explanation: + * - setJavaVM uses memory_order_release: ensures all prior writes are visible + * before the JavaVM pointer becomes visible to other threads + * - getJNIEnvNoInvocation uses memory_order_acquire: ensures the JavaVM pointer + * load happens before any subsequent operations that depend on it + * - This provides safe publication of the JavaVM without expensive mutex locking + */ +static _Atomic(JavaVM*) g_cachedJavaVM = NULL; + +int setJavaVM(void *vm) +{ + JavaVM *javaVM = (JavaVM *)vm; + JavaVM *expected = NULL; + + if (!vm) { + fprintf(stderr, "setJavaVM: vm parameter cannot be NULL\n"); + return -1; + } + + /* Atomically set the JavaVM if it's not already set (compare-and-swap) */ + if (!atomic_compare_exchange_strong(&g_cachedJavaVM, &expected, javaVM)) { + if (atomic_load(&g_cachedJavaVM) == javaVM) { + /* The same JavaVM is already set, so we ignore this call. */ + return 0; + } else { + fprintf(stderr, "setJavaVM: JavaVM already set. Cannot call setJavaVM() with a different JavaVM.\n"); + return -1; + } + } + + return 0; +} + +/** + * getJNIEnv: A helper function to get the JNIEnv* for the given thread. + * Uses a JavaVM that was previously set via setJavaVM(). If the current thread + * is already attached by the caller, returns JNIEnv directly. If not attached, + * attaches the thread and stores JNIEnv in TLS for proper cleanup. + * + * Implementation note: we use POSIX thread-local storage (TLS) ONLY for threads + * that WE attach (not caller-attached threads). This allows us to associate a + * destructor function with each thread, that will detach the thread from the Java VM + * when the thread terminates. If we fail to do this, it will cause a memory leak. + * The contract is: if state->env is not NULL, then WE attached this thread. + * + * However, POSIX TLS is not the most efficient way to do things. It requires a + * key to be initialized before it can be used. Since we don't know if this key + * is initialized at the start of this function, we have to lock a mutex first + * and check. Luckily, most operating systems support the more efficient + * __thread construct, which is initialized by the linker. + * + * @param: None. + * @return The JNIEnv* corresponding to the thread. + */ +JNIEnv* getJNIEnv(void) +{ + struct ThreadLocalState *state = NULL; + JNIEnv *env = NULL; + jint rv; + + JavaVM *vm = atomic_load(&g_cachedJavaVM); + if (vm == NULL) { + fprintf(stderr, "getJNIEnv: JavaVM not set. Call setJavaVM() first.\n"); + return NULL; + } + + /* Check thread local storage first - if we have TLS with an env, we must have + * attached this thread */ + THREAD_LOCAL_STORAGE_GET_QUICK(&state); + if (state && state->env) return state->env; + + mutexLock(&jvmMutex); + if (threadLocalStorageGet(&state)) { + mutexUnlock(&jvmMutex); + return NULL; + } + if (state) { + if (state->env) { + /* We have attached this thread before, so we can return the JNIEnv directly. */ + mutexUnlock(&jvmMutex); + + /* Free any stale exception strings */ + free(state->lastExceptionRootCause); + free(state->lastExceptionStackTrace); + state->lastExceptionRootCause = NULL; + state->lastExceptionStackTrace = NULL; + + return state->env; + } + } else { + /* Create a ThreadLocalState for this thread */ + state = threadLocalStorageCreate(); + if (!state) { + mutexUnlock(&jvmMutex); + fprintf(stderr, "getJNIEnv: Unable to create ThreadLocalState\n"); + return NULL; + } + if (threadLocalStorageSet(state)) { + mutexUnlock(&jvmMutex); + fprintf(stderr, "getJNIEnv: Unable to set ThreadLocalState\n"); + return NULL; + } + THREAD_LOCAL_STORAGE_SET_QUICK(state); + mutexUnlock(&jvmMutex); + } + + /* Try to get JNIEnv from the JavaVM - this will succeed if the thread is already attached */ + rv = (*vm)->GetEnv(vm, (void**)&env, JNI_VERSION_1_2); + + if (rv == JNI_OK && env != NULL) { + /* Thread is already attached by caller - return JNIEnv directly without storing in TLS */ + return env; + } else if (rv == JNI_EDETACHED) { + /* Thread is not attached - we need to attach it ourselves and store in TLS */ + rv = (*vm)->AttachCurrentThread(vm, (void**)&env, NULL); + if (rv != JNI_OK || env == NULL) { + fprintf(stderr, "getJNIEnv: AttachCurrentThread failed with error: %d\n", rv); + return NULL; + } + + /* Store the JNIEnv we attached in the thread local state. The contract is that if + * state->env is not NULL, then it MUST BE US who attached this thread. */ + state->env = env; + return env; + } else { + /* Some other error occurred */ + fprintf(stderr, "getJNIEnv: JavaVM->GetEnv failed with error: %d\n", rv); + return NULL; + } +} + +#endif /* LIBHDFS_NO_JVM_INVOCATION */ + char* getLastTLSExceptionRootCause() { struct ThreadLocalState *state = NULL; @@ -938,4 +1077,3 @@ jthrowable fetchEnumInstance(JNIEnv *env, const char *className, *out = jEnum; return NULL; } - diff --git a/c_src/libhdfs/jni_helper.h b/c_src/libhdfs/jni_helper.h index 049f556..604ba01 100644 --- a/c_src/libhdfs/jni_helper.h +++ b/c_src/libhdfs/jni_helper.h @@ -138,10 +138,14 @@ LIBHDFS_EXTERNAL jthrowable classNameOfObject(jobject jobj, JNIEnv *env, char **name); /** getJNIEnv: A helper function to get the JNIEnv* for the given thread. - * It gets this from the ThreadLocalState if it exists. If a ThreadLocalState - * does not exist, one will be created. - * If no JVM exists, then one will be created. JVM command line arguments - * are obtained from the LIBHDFS_OPTS environment variable. + * + * In regular mode: Gets JNIEnv from ThreadLocalState if it exists, otherwise + * creates one. If no JVM exists, creates one using LIBHDFS_OPTS environment variable. + * + * In no_jvm_invocation mode: Uses JavaVM set via setJavaVM(). If the current thread + * is already attached by the caller, returns JNIEnv directly. If not attached, + * attaches the thread and stores JNIEnv in ThreadLocalState for proper cleanup. + * * @param: None. * @return The JNIEnv* corresponding to the thread. * */ @@ -184,6 +188,19 @@ char* getLastTLSExceptionStackTrace(); LIBHDFS_EXTERNAL void setTLSExceptionStrings(const char *rootCause, const char *stackTrace); +#ifdef LIBHDFS_NO_JVM_INVOCATION +/** setJavaVM: Set the JavaVM for use in JNI context. + * This function should be called once at the beginning of the application + * to provide the JavaVM that was passed to the JNI library. + * The JavaVM will be cached and used to obtain JNIEnv for each thread. + * + * @param vm The JavaVM pointer from JNI context. + * @return 0 on success, -1 on error. + */ +LIBHDFS_EXTERNAL +int setJavaVM(void *vm); +#endif /* LIBHDFS_NO_JVM_INVOCATION */ + /** * Figure out if a Java object is an instance of a particular class. * diff --git a/c_src/libhdfs/os/posix/thread_local_storage.c b/c_src/libhdfs/os/posix/thread_local_storage.c index e6b59d6..b6be5e7 100644 --- a/c_src/libhdfs/os/posix/thread_local_storage.c +++ b/c_src/libhdfs/os/posix/thread_local_storage.c @@ -42,7 +42,7 @@ void hdfsThreadDestructor(void *v) JNIEnv *env = state->env;; jint ret; - /* Detach the current thread from the JVM */ + /* Detach the current thread from the JVM if env is present (means WE attached it) */ if (env) { ret = (*env)->GetJavaVM(env, &vm); if (ret) { @@ -71,6 +71,7 @@ struct ThreadLocalState* threadLocalStorageCreate() "threadLocalStorageSet: OOM - Unable to allocate thread local state\n"); return NULL; } + state->env = NULL; state->lastExceptionStackTrace = NULL; state->lastExceptionRootCause = NULL; return state; diff --git a/src/lib.rs b/src/lib.rs index b47ca1f..94cceff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,3 +112,61 @@ pub mod minidfs; pub mod util; /// For list files in a directory recursively pub mod walkdir; + +#[cfg(feature = "no_jvm_invocation")] +/// JNI context support for running inside JVM without invocation APIs +pub mod jni_context { + //! This module provides support for using fs-hdfs3 within JNI native functions + //! where the JavaVM is provided by the calling JVM, eliminating the need for + //! JVM invocation APIs and linking to libjvm.so + //! + //! ## Usage in JNI Applications + //! + //! When implementing JNI native functions that use fs-hdfs3, call `set_java_vm()` + //! once at application startup with the JavaVM from your JNI context: + //! + //! ```c + //! // In your JNI_OnLoad function or early in your application + //! JNIEXPORT jint JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) { + //! if (set_java_vm_from_rust(vm) != 0) { + //! return JNI_ERR; + //! } + //! return JNI_VERSION_1_8; + //! } + //! + //! // Later, in your JNI native functions, just use fs-hdfs3 normally + //! JNIEXPORT jint JNICALL + //! Java_com_example_MyClass_myNativeMethod(JNIEnv *env, jobject this) { + //! // No need to set JNIEnv - fs-hdfs3 will get it from the JavaVM + //! // Use fs-hdfs3 functions normally + //! return 0; + //! } + //! ``` + + use crate::native::hdfsSetJavaVM; + use std::ffi::c_void; + + /// Set the JavaVM for use in JNI context. + /// This function should be called once at application startup, typically + /// in JNI_OnLoad, to provide the JavaVM that fs-hdfs3 will use to obtain + /// JNIEnv for each thread. + /// + /// # Arguments + /// * `vm` - The JavaVM pointer from JNI context + /// + /// # Returns + /// * `Ok(())` on success + /// * `Err(())` on error + /// + /// # Safety + /// This function is unsafe because it accepts a raw pointer that must be a valid JavaVM. + /// The JavaVM must remain valid for the lifetime of the application. + pub unsafe fn set_java_vm(vm: *mut c_void) -> Result<(), ()> { + let result = hdfsSetJavaVM(vm); + if result == 0 { + Ok(()) + } else { + Err(()) + } + } +}