Skip to content

Commit 502ad99

Browse files
committed
Merge remote-tracking branch 'origin/master' into skbuild
2 parents 62f6bc1 + f872422 commit 502ad99

8 files changed

Lines changed: 197 additions & 155 deletions

File tree

doc/conf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,9 @@ def is_readthedocs_build():
378378

379379
# -- Options for HTML output ----------------------------------------------
380380

381+
# See RTD document for the effect of using canonical URL on SEO.
382+
html_baseurl: str = os.environ.get("READTHEDOCS_CANONICAL_URL", "")
383+
381384
# The theme to use for HTML and HTML Help pages. See the documentation for
382385
# a list of builtin themes.
383386
html_theme = "sphinx_rtd_theme"

doc/parameter.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,10 @@ Parameter for using Expectile Loss (``reg:expectileerror``)
535535

536536
.. versionadded:: 3.3.0
537537

538-
.. note:: Multiple alphas must be sorted in ascending order. Unlike the quantile objective, expectile does not suffer from curve crossing.
538+
.. note::
539+
540+
Multiple alphas must be sorted in ascending order. Unlike the quantile objective, expectile does not suffer from curve crossing.
541+
When predicting with ``output_margin=True`` and multiple alphas, the first margin corresponds to the smallest alpha; subsequent margins are reparameterized gaps between consecutive expectile predictions, use normal prediction to obtain the actual expectile values.
539542

540543
Parameter for using AFT Survival Loss (``survival:aft``) and Negative Log Likelihood of AFT metric (``aft-nloglik``)
541544
====================================================================================================================

jvm-packages/create_jni.py

Lines changed: 160 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
#!/usr/bin/env python
2+
"""Build the native XGBoost4J JNI library."""
3+
24
import argparse
3-
import errno
4-
import glob
55
import os
66
import platform
7+
import shlex
78
import shutil
89
import subprocess
910
import sys
10-
from contextlib import contextmanager
11-
12-
# Monkey-patch the API inconsistency between Python2.X and 3.X.
13-
if sys.platform.startswith("linux"):
14-
sys.platform = "linux"
11+
from pathlib import Path
12+
from typing import Sequence
1513

14+
ROOT = Path(__file__).resolve().parents[1]
15+
JVM_PACKAGES = Path(__file__).resolve().parent
1616

17-
CONFIG = {
17+
DEFAULT_CONFIG = {
1818
"USE_OPENMP": "ON",
1919
"USE_CUDA": "OFF",
2020
"USE_NCCL": "OFF",
@@ -24,121 +24,121 @@
2424
}
2525

2626

27-
@contextmanager
28-
def cd(path):
29-
path = normpath(path)
30-
cwd = os.getcwd()
31-
os.chdir(path)
32-
print("cd " + path, flush=True)
33-
try:
34-
yield path
35-
finally:
36-
os.chdir(cwd)
27+
def run(command: Sequence[str], *, cwd: Path | None = None) -> None:
28+
"""Run a shell command."""
29+
print(shlex.join(command), flush=True)
30+
subprocess.run(command, cwd=cwd, check=True, env=os.environ)
3731

3832

39-
def maybe_makedirs(path):
40-
path = normpath(path)
41-
print("mkdir -p " + path, flush=True)
42-
try:
43-
os.makedirs(path)
44-
except OSError as e:
45-
if e.errno != errno.EEXIST:
46-
raise
33+
def mkdir(path: Path) -> None:
34+
"""Create a directory if it does not already exist."""
35+
print(f"mkdir -p {path}", flush=True)
36+
path.mkdir(parents=True, exist_ok=True)
4737

4838

49-
def run(command, **kwargs):
50-
print(command, flush=True)
51-
subprocess.run(command, shell=True, check=True, env=os.environ, **kwargs)
39+
def copy_file(source: Path, target: Path) -> None:
40+
"""Copy a file to a target path or directory."""
41+
print(f"cp {source} {target}", flush=True)
42+
shutil.copy(source, target)
5243

5344

54-
def cp(source, target):
55-
source = normpath(source)
56-
target = normpath(target)
57-
print("cp {0} {1}".format(source, target), flush=True)
58-
shutil.copy(source, target)
45+
def copy_glob(pattern: str, target: Path) -> None:
46+
"""Copy files matching a glob pattern to a target directory."""
47+
for source in ROOT.glob(pattern):
48+
copy_file(source, target)
5949

6050

61-
def normpath(path):
62-
"""Normalize UNIX path to a native path."""
63-
normalized = os.path.join(*path.split("/"))
64-
if os.path.isabs(path):
65-
return os.path.abspath("/") + normalized
66-
else:
67-
return normalized
51+
def cmake_config(options: argparse.Namespace) -> dict[str, str]:
52+
"""Create CMake configuration from CLI options."""
53+
config = DEFAULT_CONFIG.copy()
54+
config["USE_OPENMP"] = options.use_openmp
55+
config["USE_NVTX"] = options.use_nvtx
56+
config["PLUGIN_RMM"] = options.plugin_rmm
6857

58+
if options.log_capi_invocation == "ON":
59+
config["LOG_CAPI_INVOCATION"] = "ON"
60+
if options.use_debug == "ON":
61+
config["CMAKE_BUILD_TYPE"] = "Debug"
62+
if options.use_cuda == "ON":
63+
config["USE_CUDA"] = "ON"
64+
config["USE_NCCL"] = "ON"
65+
config["USE_DLOPEN_NCCL"] = "OFF"
6966

70-
def native_build(cli_args: argparse.Namespace) -> None:
71-
CONFIG["USE_OPENMP"] = cli_args.use_openmp
72-
if sys.platform == "darwin":
73-
os.environ["JAVA_HOME"] = (
74-
subprocess.check_output("/usr/libexec/java_home").strip().decode()
75-
)
76-
if cli_args.use_debug == "ON":
77-
CONFIG["CMAKE_BUILD_TYPE"] = "Debug"
78-
CONFIG["USE_NVTX"] = cli_args.use_nvtx
79-
CONFIG["PLUGIN_RMM"] = cli_args.plugin_rmm
67+
return config
8068

81-
print("building Java wrapper", flush=True)
82-
with cd(".."):
83-
build_dir = "build-gpu" if cli_args.use_cuda == "ON" else "build"
84-
maybe_makedirs(build_dir)
85-
86-
if sys.platform == "linux":
87-
maybe_parallel_build = " -- -j $(nproc)"
88-
elif sys.platform == "win32":
89-
maybe_parallel_build = ' -- /m /nodeReuse:false "/consoleloggerparameters:ShowCommandLine;Verbosity=minimal"'
90-
else:
91-
maybe_parallel_build = ""
92-
93-
if cli_args.log_capi_invocation == "ON":
94-
CONFIG["LOG_CAPI_INVOCATION"] = "ON"
95-
96-
if cli_args.use_cuda == "ON":
97-
CONFIG["USE_CUDA"] = "ON"
98-
CONFIG["USE_NCCL"] = "ON"
99-
CONFIG["USE_DLOPEN_NCCL"] = "OFF"
100-
101-
args = ["-D{0}:BOOL={1}".format(k, v) for k, v in CONFIG.items()]
102-
if sys.platform != "win32":
69+
70+
def cmake_args(config: dict[str, str]) -> list[str]:
71+
"""Create CMake command line arguments."""
72+
args = [
73+
f"-D{k}:BOOL={v}" if v in ("ON", "OFF") else f"-D{k}:STRING={v}"
74+
for k, v in config.items()
75+
]
76+
77+
if sys.platform != "win32" and shutil.which("ninja"):
78+
args.append("-GNinja")
79+
80+
# Set GPU_ARCH_FLAG to override the CUDA architectures.
81+
if gpu_arch_flag := os.getenv("GPU_ARCH_FLAG"):
82+
args.append(f"-DCMAKE_CUDA_ARCHITECTURES={gpu_arch_flag}")
83+
84+
return args
85+
86+
87+
def windows_generators() -> tuple[list[str], ...]:
88+
"""Return CMake generator arguments to try on Windows."""
89+
return (
90+
[], # Let CMake decide.
91+
["-G", "Visual Studio 18 2026", "-A", "x64"],
92+
["-G", "Visual Studio 17 2022", "-A", "x64"],
93+
["-G", "Visual Studio 16 2019", "-A", "x64"],
94+
["-G", "Visual Studio 15 2017", "-A", "x64"],
95+
)
96+
97+
98+
def configure(config_args: list[str], build_dir: Path) -> None:
99+
"""Configure the CMake build."""
100+
if sys.platform == "win32":
101+
for generator in windows_generators():
103102
try:
104-
subprocess.check_call(["ninja", "--version"])
105-
args.append("-GNinja")
106-
except FileNotFoundError:
107-
pass
108-
109-
# if enviorment set GPU_ARCH_FLAG
110-
gpu_arch_flag = os.getenv("GPU_ARCH_FLAG", None)
111-
if gpu_arch_flag is not None:
112-
args.append("-DCMAKE_CUDA_ARCHITECTURES=%s" % gpu_arch_flag)
113-
114-
with cd(build_dir):
115-
lib_dir = os.path.join(os.pardir, "lib")
116-
if os.path.exists(lib_dir):
117-
shutil.rmtree(lib_dir)
118-
119-
# Same trick as Python build, just test all possible generators.
120-
if sys.platform == "win32":
121-
supported_generators = (
122-
"", # empty, decided by cmake
123-
'-G"Visual Studio 17 2022" -A x64',
124-
'-G"Visual Studio 16 2019" -A x64',
125-
'-G"Visual Studio 15 2017" -A x64',
103+
run(["cmake", str(ROOT), *config_args, *generator], cwd=build_dir)
104+
return
105+
except subprocess.CalledProcessError as err:
106+
print(
107+
f"Failed to build with generator: {shlex.join(generator)}",
108+
err,
109+
flush=True,
126110
)
127-
for generator in supported_generators:
128-
try:
129-
run("cmake .. " + " ".join(args + [generator]))
130-
break
131-
except subprocess.CalledProcessError as e:
132-
print(f"Failed to build with generator: {generator}", e, flush=True)
133-
with cd(os.path.pardir):
134-
shutil.rmtree(build_dir)
135-
maybe_makedirs(build_dir)
136-
else:
137-
run("cmake .. " + " ".join(args))
138-
run("cmake --build . --config Release" + maybe_parallel_build)
111+
shutil.rmtree(build_dir)
112+
mkdir(build_dir)
113+
raise RuntimeError("None of the supported CMake generators worked.")
114+
115+
run(["cmake", str(ROOT), *config_args], cwd=build_dir)
116+
117+
118+
def build(config: dict[str, str], build_dir: Path) -> None:
119+
"""Build the native library."""
120+
if (lib_dir := ROOT / "lib").exists():
121+
shutil.rmtree(lib_dir)
122+
123+
configure(cmake_args(config), build_dir)
124+
125+
build_args = ["cmake", "--build", ".", "--config", "Release"]
126+
if sys.platform == "linux":
127+
build_args.extend(["--", "-j", str(os.cpu_count() or 1)])
128+
elif sys.platform == "win32":
129+
build_args.extend(
130+
[
131+
"--",
132+
"/m",
133+
"/nodeReuse:false",
134+
"/consoleloggerparameters:ShowCommandLine;Verbosity=minimal",
135+
]
136+
)
137+
run(build_args, cwd=build_dir)
139138

140139

141-
print("copying native library", flush=True)
140+
def copy_native_library() -> None:
141+
"""Copy the native library into the JVM package resources."""
142142
library_name, os_folder = {
143143
"Windows": ("xgboost4j.dll", "windows"),
144144
"Darwin": ("libxgboost4j.dylib", "macos"),
@@ -154,35 +154,59 @@ def native_build(cli_args: argparse.Namespace) -> None:
154154
"arm64": "aarch64", # on macOS & Windows ARM 64-bit
155155
"aarch64": "aarch64",
156156
}[platform.machine().lower()]
157-
output_folder = "xgboost4j/src/main/resources/lib/{}/{}".format(
158-
os_folder, arch_folder
157+
158+
output_folder = (
159+
JVM_PACKAGES / "xgboost4j/src/main/resources/lib" / os_folder / arch_folder
159160
)
160-
maybe_makedirs(output_folder)
161-
cp("../lib/" + library_name, output_folder)
161+
mkdir(output_folder)
162+
copy_file(ROOT / "lib" / library_name, output_folder)
163+
164+
165+
def copy_test_resources(*, use_cuda: bool) -> None:
166+
"""Copy training data used by JVM package tests."""
167+
xgboost4j_resources = JVM_PACKAGES / "xgboost4j/src/test/resources"
168+
mkdir(xgboost4j_resources)
169+
copy_glob("demo/data/agaricus.*", xgboost4j_resources)
170+
171+
xgboost4j_spark_resources = JVM_PACKAGES / "xgboost4j-spark/src/test/resources"
172+
mkdir(xgboost4j_spark_resources)
173+
174+
regression_dir = ROOT / "demo/data/regression"
175+
run([sys.executable, "mapfeat.py"], cwd=regression_dir)
176+
run([sys.executable, "mknfold.py", "machine.txt", "1"], cwd=regression_dir)
177+
178+
copy_glob("demo/data/regression/machine.txt.t*", xgboost4j_spark_resources)
179+
copy_glob("demo/data/agaricus.*", xgboost4j_spark_resources)
180+
181+
if use_cuda:
182+
xgboost4j_spark_gpu_resources = (
183+
JVM_PACKAGES / "xgboost4j-spark-gpu/src/test/resources"
184+
)
185+
mkdir(xgboost4j_spark_gpu_resources)
186+
copy_glob("demo/data/veterans_lung_cancer.csv", xgboost4j_spark_gpu_resources)
187+
copy_file(
188+
xgboost4j_spark_resources / "rank.train.csv",
189+
xgboost4j_spark_gpu_resources,
190+
)
162191

163-
print("copying train/test files", flush=True)
164192

165-
# for xgboost4j
166-
maybe_makedirs("xgboost4j/src/test/resources")
167-
for file in glob.glob("../demo/data/agaricus.*"):
168-
cp(file, "xgboost4j/src/test/resources")
169-
170-
# for xgboost4j-spark
171-
maybe_makedirs("xgboost4j-spark/src/test/resources")
172-
with cd("../demo/data/regression"):
173-
run(f'"{sys.executable}" mapfeat.py')
174-
run(f'"{sys.executable}" mknfold.py machine.txt 1')
175-
for file in glob.glob("../demo/data/regression/machine.txt.t*"):
176-
cp(file, "xgboost4j-spark/src/test/resources")
177-
for file in glob.glob("../demo/data/agaricus.*"):
178-
cp(file, "xgboost4j-spark/src/test/resources")
179-
180-
# for xgboost4j-spark-gpu
181-
if cli_args.use_cuda == "ON":
182-
maybe_makedirs("xgboost4j-spark-gpu/src/test/resources")
183-
for file in glob.glob("../demo/data/veterans_lung_cancer.csv"):
184-
cp(file, "xgboost4j-spark-gpu/src/test/resources")
185-
cp("xgboost4j-spark/src/test/resources/rank.train.csv", "xgboost4j-spark-gpu/src/test/resources")
193+
def native_build(options: argparse.Namespace) -> None:
194+
"""Build and copy the native JNI library and its test resources."""
195+
if sys.platform == "darwin":
196+
os.environ["JAVA_HOME"] = (
197+
subprocess.check_output(["/usr/libexec/java_home"]).strip().decode()
198+
)
199+
200+
print("building Java wrapper", flush=True)
201+
build_dir = ROOT / ("build-gpu" if options.use_cuda == "ON" else "build")
202+
mkdir(build_dir)
203+
build(cmake_config(options), build_dir)
204+
205+
print("copying native library", flush=True)
206+
copy_native_library()
207+
208+
print("copying train/test files", flush=True)
209+
copy_test_resources(use_cuda=options.use_cuda == "ON")
186210

187211

188212
if __name__ == "__main__":
@@ -195,5 +219,5 @@ def native_build(cli_args: argparse.Namespace) -> None:
195219
parser.add_argument("--use-debug", type=str, choices=["ON", "OFF"], default="OFF")
196220
parser.add_argument("--use-nvtx", type=str, choices=["ON", "OFF"], default="OFF")
197221
parser.add_argument("--plugin-rmm", type=str, choices=["ON", "OFF"], default="OFF")
198-
cli_args = parser.parse_args()
199-
native_build(cli_args)
222+
parsed_args = parser.parse_args()
223+
native_build(parsed_args)

python-package/xgboost/dask/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
def get_n_threads(local_param: Dict[str, Any], worker: "distributed.Worker") -> int:
1919
"""Get the number of threads from a worker and the user-supplied parameters."""
20-
# dask worker nthreads, "state" is available in 2022.6.1
21-
dwnt = worker.state.nthreads if hasattr(worker, "state") else worker.nthreads
20+
# dask worker nthreads
21+
dwnt = worker.state.nthreads
2222
n_threads = None
2323
for p in ["nthread", "n_jobs"]:
2424
if local_param.get(p, None) is not None and local_param.get(p, dwnt) != dwnt:

0 commit comments

Comments
 (0)