Skip to content

Commit 15f5a48

Browse files
xuhancnpytorchmergebot
authored andcommitted
[inductor] enable Intel Compiler(icx-cl) for inductor windows (pytorch#134772)
This PR is enable Intel Compiler (`icx-cl`) for Windows inductor, likes previous PR: pytorch#134444 which enable clang. Changes: 1. Fix icx-cl crash by wrong decode args, the right decode should be "utf-8". 2. Add intel compiler check, and intel compiler Windows drivers check(icx-cl). 3. Add Intel compiler openmp args config. 4. Add intel compiler openmp binary preload. For intel compiler openmp binary path: <img width="788" alt="image" src="https://github.com/user-attachments/assets/54c76356-018d-4bef-a9b7-0ea150fd7aba"> For performance, Intel compiler(`icx-cl`) is much better performance than MSVC(`cl`): <img width="875" alt="image" src="https://github.com/user-attachments/assets/67865faf-b1de-4535-917a-486b72527204"> Append `clang-cl` performance data: <img width="821" alt="image" src="https://github.com/user-attachments/assets/476f4568-bf58-457f-b73d-4e57f49be384"> Pull Request resolved: pytorch#134772 Approved by: https://github.com/jgong5, https://github.com/jansel
1 parent 9e0ddc0 commit 15f5a48

File tree

1 file changed

+65
-1
lines changed

1 file changed

+65
-1
lines changed

torch/_inductor/cpp_builder.py

+65-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def use_global_cache() -> bool:
5858
_IS_MACOS = sys.platform.startswith("darwin")
5959
_IS_WINDOWS = sys.platform == "win32"
6060

61-
SUBPROCESS_DECODE_ARGS = ("oem",) if _IS_WINDOWS else ()
61+
SUBPROCESS_DECODE_ARGS = ("utf-8",) if _IS_WINDOWS else ()
6262

6363
log = logging.getLogger(__name__)
6464

@@ -198,6 +198,33 @@ def _is_msvc_cl(cpp_compiler: str) -> bool:
198198
return False
199199

200200

201+
@functools.lru_cache(None)
202+
def _is_intel_compiler(cpp_compiler: str) -> bool:
203+
try:
204+
output_msg = (
205+
subprocess.check_output(
206+
[cpp_compiler, "--version"], stderr=subprocess.DEVNULL
207+
)
208+
.strip()
209+
.decode(*SUBPROCESS_DECODE_ARGS)
210+
)
211+
is_intel_compiler = "Intel" in output_msg.splitlines()[0]
212+
if is_intel_compiler:
213+
if _IS_WINDOWS:
214+
if re.search(r"((icx$)|(icx-cc$))", cpp_compiler):
215+
raise RuntimeError(
216+
"Please use icx-cl, due to torch.compile only support MSVC-like CLI (compiler flags syntax)."
217+
)
218+
return is_intel_compiler
219+
except FileNotFoundError as exc:
220+
return False
221+
except subprocess.SubprocessError:
222+
# --version args not support.
223+
return False
224+
225+
return False
226+
227+
201228
@functools.lru_cache(None)
202229
def is_gcc() -> bool:
203230
return _is_gcc(get_cpp_compiler())
@@ -208,6 +235,11 @@ def is_clang() -> bool:
208235
return _is_clang(get_cpp_compiler())
209236

210237

238+
@functools.lru_cache(None)
239+
def is_intel_compiler() -> bool:
240+
return _is_intel_compiler(get_cpp_compiler())
241+
242+
211243
@functools.lru_cache(None)
212244
def is_apple_clang() -> bool:
213245
return _is_apple_clang(get_cpp_compiler())
@@ -798,6 +830,20 @@ def perload_clang_libomp_win(cpp_compiler: str, omp_name: str) -> None:
798830
pass
799831

800832

833+
@functools.lru_cache(None)
834+
def perload_icx_libomp_win(cpp_compiler: str) -> None:
835+
try:
836+
output = subprocess.check_output(
837+
[cpp_compiler, "-print-file-name=libiomp5md.dll"], stderr=subprocess.DEVNULL
838+
).decode(*SUBPROCESS_DECODE_ARGS)
839+
omp_path = output.rstrip()
840+
if os.path.isfile(omp_path):
841+
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
842+
omp_module = cdll.LoadLibrary(omp_path)
843+
except subprocess.SubprocessError:
844+
pass
845+
846+
801847
def _get_openmp_args(
802848
cpp_compiler: str,
803849
) -> Tuple[List[str], List[str], List[str], List[str], List[str], List[str]]:
@@ -854,10 +900,28 @@ def _get_openmp_args(
854900
# if openmp is still not available, we let the compiler to have a try,
855901
# and raise error together with instructions at compilation error later
856902
elif _IS_WINDOWS:
903+
"""
904+
On Windows, `clang` and `icx` have their specific openmp implenmention.
905+
And the openmp lib is in compiler's some sub-directory.
906+
For dynamic library(DLL) load, the Windows native APIs are `LoadLibraryA` and `LoadLibraryExA`, and their search
907+
dependencies have some rules:
908+
https://learn.microsoft.com/en-us/windows/win32/api/libloaderapi/nf-libloaderapi-loadlibraryexa#searching-for-dlls-and-dependencies
909+
In some case, the rules may not include compiler's sub-directories.
910+
So, it can't search and load compiler's openmp library correctly.
911+
And then, the whole application would be broken.
912+
913+
To avoid the openmp load failed, we can automatic locate the openmp binary and preload it.
914+
1. For clang, the function is `perload_clang_libomp_win`.
915+
2. For icx, the function is `perload_icx_libomp_win`.
916+
"""
857917
if _is_clang(cpp_compiler):
858918
cflags.append("openmp")
859919
libs.append("libomp")
860920
perload_clang_libomp_win(cpp_compiler, "libomp.dll")
921+
elif _is_intel_compiler(cpp_compiler):
922+
cflags.append("Qiopenmp")
923+
libs.append("libiomp5md")
924+
perload_icx_libomp_win(cpp_compiler)
861925
else:
862926
# /openmp, /openmp:llvm
863927
# llvm on Windows, new openmp: https://devblogs.microsoft.com/cppblog/msvc-openmp-update/

0 commit comments

Comments
 (0)