11#!/usr/bin/env python
2+ """Build the native XGBoost4J JNI library."""
3+
24import argparse
3- import errno
4- import glob
55import os
66import platform
7+ import shlex
78import shutil
89import subprocess
910import sys
10- from contextlib import contextmanager
11-
12- # Monkey-patch the API inconsistency between Python2.X and 3.X.
13- if sys .platform .startswith ("linux" ):
14- sys .platform = "linux"
11+ from pathlib import Path
12+ from typing import Sequence
1513
14+ ROOT = Path (__file__ ).resolve ().parents [1 ]
15+ JVM_PACKAGES = Path (__file__ ).resolve ().parent
1616
17- CONFIG = {
17+ DEFAULT_CONFIG = {
1818 "USE_OPENMP" : "ON" ,
1919 "USE_CUDA" : "OFF" ,
2020 "USE_NCCL" : "OFF" ,
2424}
2525
2626
27- @contextmanager
28- def cd (path ):
29- path = normpath (path )
30- cwd = os .getcwd ()
31- os .chdir (path )
32- print ("cd " + path , flush = True )
33- try :
34- yield path
35- finally :
36- os .chdir (cwd )
27+ def run (command : Sequence [str ], * , cwd : Path | None = None ) -> None :
28+ """Run a shell command."""
29+ print (shlex .join (command ), flush = True )
30+ subprocess .run (command , cwd = cwd , check = True , env = os .environ )
3731
3832
39- def maybe_makedirs (path ):
40- path = normpath (path )
41- print ("mkdir -p " + path , flush = True )
42- try :
43- os .makedirs (path )
44- except OSError as e :
45- if e .errno != errno .EEXIST :
46- raise
33+ def mkdir (path : Path ) -> None :
34+ """Create a directory if it does not already exist."""
35+ print (f"mkdir -p { path } " , flush = True )
36+ path .mkdir (parents = True , exist_ok = True )
4737
4838
49- def run (command , ** kwargs ):
50- print (command , flush = True )
51- subprocess .run (command , shell = True , check = True , env = os .environ , ** kwargs )
39+ def copy_file (source : Path , target : Path ) -> None :
40+ """Copy a file to a target path or directory."""
41+ print (f"cp { source } { target } " , flush = True )
42+ shutil .copy (source , target )
5243
5344
54- def cp (source , target ):
55- source = normpath (source )
56- target = normpath (target )
57- print ("cp {0} {1}" .format (source , target ), flush = True )
58- shutil .copy (source , target )
45+ def copy_glob (pattern : str , target : Path ) -> None :
46+ """Copy files matching a glob pattern to a target directory."""
47+ for source in ROOT .glob (pattern ):
48+ copy_file (source , target )
5949
6050
61- def normpath (path ):
62- """Normalize UNIX path to a native path."""
63- normalized = os .path .join (* path .split ("/" ))
64- if os .path .isabs (path ):
65- return os .path .abspath ("/" ) + normalized
66- else :
67- return normalized
51+ def cmake_config (options : argparse .Namespace ) -> dict [str , str ]:
52+ """Create CMake configuration from CLI options."""
53+ config = DEFAULT_CONFIG .copy ()
54+ config ["USE_OPENMP" ] = options .use_openmp
55+ config ["USE_NVTX" ] = options .use_nvtx
56+ config ["PLUGIN_RMM" ] = options .plugin_rmm
6857
58+ if options .log_capi_invocation == "ON" :
59+ config ["LOG_CAPI_INVOCATION" ] = "ON"
60+ if options .use_debug == "ON" :
61+ config ["CMAKE_BUILD_TYPE" ] = "Debug"
62+ if options .use_cuda == "ON" :
63+ config ["USE_CUDA" ] = "ON"
64+ config ["USE_NCCL" ] = "ON"
65+ config ["USE_DLOPEN_NCCL" ] = "OFF"
6966
70- def native_build (cli_args : argparse .Namespace ) -> None :
71- CONFIG ["USE_OPENMP" ] = cli_args .use_openmp
72- if sys .platform == "darwin" :
73- os .environ ["JAVA_HOME" ] = (
74- subprocess .check_output ("/usr/libexec/java_home" ).strip ().decode ()
75- )
76- if cli_args .use_debug == "ON" :
77- CONFIG ["CMAKE_BUILD_TYPE" ] = "Debug"
78- CONFIG ["USE_NVTX" ] = cli_args .use_nvtx
79- CONFIG ["PLUGIN_RMM" ] = cli_args .plugin_rmm
67+ return config
8068
81- print ("building Java wrapper" , flush = True )
82- with cd (".." ):
83- build_dir = "build-gpu" if cli_args .use_cuda == "ON" else "build"
84- maybe_makedirs (build_dir )
85-
86- if sys .platform == "linux" :
87- maybe_parallel_build = " -- -j $(nproc)"
88- elif sys .platform == "win32" :
89- maybe_parallel_build = ' -- /m /nodeReuse:false "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"'
90- else :
91- maybe_parallel_build = ""
92-
93- if cli_args .log_capi_invocation == "ON" :
94- CONFIG ["LOG_CAPI_INVOCATION" ] = "ON"
95-
96- if cli_args .use_cuda == "ON" :
97- CONFIG ["USE_CUDA" ] = "ON"
98- CONFIG ["USE_NCCL" ] = "ON"
99- CONFIG ["USE_DLOPEN_NCCL" ] = "OFF"
100-
101- args = ["-D{0}:BOOL={1}" .format (k , v ) for k , v in CONFIG .items ()]
102- if sys .platform != "win32" :
69+
70+ def cmake_args (config : dict [str , str ]) -> list [str ]:
71+ """Create CMake command line arguments."""
72+ args = [
73+ f"-D{ k } :BOOL={ v } " if v in ("ON" , "OFF" ) else f"-D{ k } :STRING={ v } "
74+ for k , v in config .items ()
75+ ]
76+
77+ if sys .platform != "win32" and shutil .which ("ninja" ):
78+ args .append ("-GNinja" )
79+
80+ # Set GPU_ARCH_FLAG to override the CUDA architectures.
81+ if gpu_arch_flag := os .getenv ("GPU_ARCH_FLAG" ):
82+ args .append (f"-DCMAKE_CUDA_ARCHITECTURES={ gpu_arch_flag } " )
83+
84+ return args
85+
86+
87+ def windows_generators () -> tuple [list [str ], ...]:
88+ """Return CMake generator arguments to try on Windows."""
89+ return (
90+ [], # Let CMake decide.
91+ ["-G" , "Visual Studio 18 2026" , "-A" , "x64" ],
92+ ["-G" , "Visual Studio 17 2022" , "-A" , "x64" ],
93+ ["-G" , "Visual Studio 16 2019" , "-A" , "x64" ],
94+ ["-G" , "Visual Studio 15 2017" , "-A" , "x64" ],
95+ )
96+
97+
98+ def configure (config_args : list [str ], build_dir : Path ) -> None :
99+ """Configure the CMake build."""
100+ if sys .platform == "win32" :
101+ for generator in windows_generators ():
103102 try :
104- subprocess .check_call (["ninja" , "--version" ])
105- args .append ("-GNinja" )
106- except FileNotFoundError :
107- pass
108-
109- # if enviorment set GPU_ARCH_FLAG
110- gpu_arch_flag = os .getenv ("GPU_ARCH_FLAG" , None )
111- if gpu_arch_flag is not None :
112- args .append ("-DCMAKE_CUDA_ARCHITECTURES=%s" % gpu_arch_flag )
113-
114- with cd (build_dir ):
115- lib_dir = os .path .join (os .pardir , "lib" )
116- if os .path .exists (lib_dir ):
117- shutil .rmtree (lib_dir )
118-
119- # Same trick as Python build, just test all possible generators.
120- if sys .platform == "win32" :
121- supported_generators = (
122- "" , # empty, decided by cmake
123- '-G"Visual Studio 17 2022" -A x64' ,
124- '-G"Visual Studio 16 2019" -A x64' ,
125- '-G"Visual Studio 15 2017" -A x64' ,
103+ run (["cmake" , str (ROOT ), * config_args , * generator ], cwd = build_dir )
104+ return
105+ except subprocess .CalledProcessError as err :
106+ print (
107+ f"Failed to build with generator: { shlex .join (generator )} " ,
108+ err ,
109+ flush = True ,
126110 )
127- for generator in supported_generators :
128- try :
129- run ("cmake .. " + " " .join (args + [generator ]))
130- break
131- except subprocess .CalledProcessError as e :
132- print (f"Failed to build with generator: { generator } " , e , flush = True )
133- with cd (os .path .pardir ):
134- shutil .rmtree (build_dir )
135- maybe_makedirs (build_dir )
136- else :
137- run ("cmake .. " + " " .join (args ))
138- run ("cmake --build . --config Release" + maybe_parallel_build )
111+ shutil .rmtree (build_dir )
112+ mkdir (build_dir )
113+ raise RuntimeError ("None of the supported CMake generators worked." )
114+
115+ run (["cmake" , str (ROOT ), * config_args ], cwd = build_dir )
116+
117+
118+ def build (config : dict [str , str ], build_dir : Path ) -> None :
119+ """Build the native library."""
120+ if (lib_dir := ROOT / "lib" ).exists ():
121+ shutil .rmtree (lib_dir )
122+
123+ configure (cmake_args (config ), build_dir )
124+
125+ build_args = ["cmake" , "--build" , "." , "--config" , "Release" ]
126+ if sys .platform == "linux" :
127+ build_args .extend (["--" , "-j" , str (os .cpu_count () or 1 )])
128+ elif sys .platform == "win32" :
129+ build_args .extend (
130+ [
131+ "--" ,
132+ "/m" ,
133+ "/nodeReuse:false" ,
134+ "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal" ,
135+ ]
136+ )
137+ run (build_args , cwd = build_dir )
139138
140139
141- print ("copying native library" , flush = True )
140+ def copy_native_library () -> None :
141+ """Copy the native library into the JVM package resources."""
142142 library_name , os_folder = {
143143 "Windows" : ("xgboost4j.dll" , "windows" ),
144144 "Darwin" : ("libxgboost4j.dylib" , "macos" ),
@@ -154,35 +154,59 @@ def native_build(cli_args: argparse.Namespace) -> None:
154154 "arm64" : "aarch64" , # on macOS & Windows ARM 64-bit
155155 "aarch64" : "aarch64" ,
156156 }[platform .machine ().lower ()]
157- output_folder = "xgboost4j/src/main/resources/lib/{}/{}" .format (
158- os_folder , arch_folder
157+
158+ output_folder = (
159+ JVM_PACKAGES / "xgboost4j/src/main/resources/lib" / os_folder / arch_folder
159160 )
160- maybe_makedirs (output_folder )
161- cp ("../lib/" + library_name , output_folder )
161+ mkdir (output_folder )
162+ copy_file (ROOT / "lib" / library_name , output_folder )
163+
164+
165+ def copy_test_resources (* , use_cuda : bool ) -> None :
166+ """Copy training data used by JVM package tests."""
167+ xgboost4j_resources = JVM_PACKAGES / "xgboost4j/src/test/resources"
168+ mkdir (xgboost4j_resources )
169+ copy_glob ("demo/data/agaricus.*" , xgboost4j_resources )
170+
171+ xgboost4j_spark_resources = JVM_PACKAGES / "xgboost4j-spark/src/test/resources"
172+ mkdir (xgboost4j_spark_resources )
173+
174+ regression_dir = ROOT / "demo/data/regression"
175+ run ([sys .executable , "mapfeat.py" ], cwd = regression_dir )
176+ run ([sys .executable , "mknfold.py" , "machine.txt" , "1" ], cwd = regression_dir )
177+
178+ copy_glob ("demo/data/regression/machine.txt.t*" , xgboost4j_spark_resources )
179+ copy_glob ("demo/data/agaricus.*" , xgboost4j_spark_resources )
180+
181+ if use_cuda :
182+ xgboost4j_spark_gpu_resources = (
183+ JVM_PACKAGES / "xgboost4j-spark-gpu/src/test/resources"
184+ )
185+ mkdir (xgboost4j_spark_gpu_resources )
186+ copy_glob ("demo/data/veterans_lung_cancer.csv" , xgboost4j_spark_gpu_resources )
187+ copy_file (
188+ xgboost4j_spark_resources / "rank.train.csv" ,
189+ xgboost4j_spark_gpu_resources ,
190+ )
162191
163- print ("copying train/test files" , flush = True )
164192
165- # for xgboost4j
166- maybe_makedirs ("xgboost4j/src/test/resources" )
167- for file in glob .glob ("../demo/data/agaricus.*" ):
168- cp (file , "xgboost4j/src/test/resources" )
169-
170- # for xgboost4j-spark
171- maybe_makedirs ("xgboost4j-spark/src/test/resources" )
172- with cd ("../demo/data/regression" ):
173- run (f'"{ sys .executable } " mapfeat.py' )
174- run (f'"{ sys .executable } " mknfold.py machine.txt 1' )
175- for file in glob .glob ("../demo/data/regression/machine.txt.t*" ):
176- cp (file , "xgboost4j-spark/src/test/resources" )
177- for file in glob .glob ("../demo/data/agaricus.*" ):
178- cp (file , "xgboost4j-spark/src/test/resources" )
179-
180- # for xgboost4j-spark-gpu
181- if cli_args .use_cuda == "ON" :
182- maybe_makedirs ("xgboost4j-spark-gpu/src/test/resources" )
183- for file in glob .glob ("../demo/data/veterans_lung_cancer.csv" ):
184- cp (file , "xgboost4j-spark-gpu/src/test/resources" )
185- cp ("xgboost4j-spark/src/test/resources/rank.train.csv" , "xgboost4j-spark-gpu/src/test/resources" )
193+ def native_build (options : argparse .Namespace ) -> None :
194+ """Build and copy the native JNI library and its test resources."""
195+ if sys .platform == "darwin" :
196+ os .environ ["JAVA_HOME" ] = (
197+ subprocess .check_output (["/usr/libexec/java_home" ]).strip ().decode ()
198+ )
199+
200+ print ("building Java wrapper" , flush = True )
201+ build_dir = ROOT / ("build-gpu" if options .use_cuda == "ON" else "build" )
202+ mkdir (build_dir )
203+ build (cmake_config (options ), build_dir )
204+
205+ print ("copying native library" , flush = True )
206+ copy_native_library ()
207+
208+ print ("copying train/test files" , flush = True )
209+ copy_test_resources (use_cuda = options .use_cuda == "ON" )
186210
187211
188212if __name__ == "__main__" :
@@ -195,5 +219,5 @@ def native_build(cli_args: argparse.Namespace) -> None:
195219 parser .add_argument ("--use-debug" , type = str , choices = ["ON" , "OFF" ], default = "OFF" )
196220 parser .add_argument ("--use-nvtx" , type = str , choices = ["ON" , "OFF" ], default = "OFF" )
197221 parser .add_argument ("--plugin-rmm" , type = str , choices = ["ON" , "OFF" ], default = "OFF" )
198- cli_args = parser .parse_args ()
199- native_build (cli_args )
222+ parsed_args = parser .parse_args ()
223+ native_build (parsed_args )
0 commit comments