Skip to content

Commit dfc9bfc

Browse files
xuhancnpytorchmergebot
authored andcommitted
[reland][inductor] switch AotCodeCompiler to new cpp_builder (pytorch#130127)
Changes: 1. Switch `AotCodeCompiler` to new cpp_builder. 2. Only use `deprecated_cpp_compile_command` for `fb_code`, due to I can't debug anymore on no Meta internal environment access. 3. Add `TODO` comments for further some Meta employee help on contine to do this work. 4. Due to item 3, we only remaining `deprecated_cpp_compile_command` for `fb_code` to be fix, let's remove `validate_new_cpp_commands`. Pull Request resolved: pytorch#130127 Approved by: https://github.com/jgong5, https://github.com/jansel
1 parent f3df7de commit dfc9bfc

File tree

2 files changed

+115
-202
lines changed

2 files changed

+115
-202
lines changed

test/inductor/test_torchinductor.py

-5
Original file line numberDiff line numberDiff line change
@@ -7121,11 +7121,6 @@ def fn(x):
71217121

71227122
self.common(fn, [torch.randn(64, 64)])
71237123

7124-
def test_new_cpp_build_logical(self):
7125-
from torch._inductor.codecache import validate_new_cpp_commands
7126-
7127-
validate_new_cpp_commands()
7128-
71297124
def test_as_strided(self):
71307125
def fn(x):
71317126
return (

torch/_inductor/codecache.py

+115-197
Original file line numberDiff line numberDiff line change
@@ -1508,7 +1508,7 @@ def get_include_and_linking_paths(
15081508
return ipaths, lpaths_str, libs_str, macros, build_arch_flags
15091509

15101510

1511-
def cpp_compile_command(
1511+
def deprecated_cpp_compile_command(
15121512
input: Union[str, List[str]],
15131513
output: str,
15141514
warning_all: bool = True,
@@ -1522,6 +1522,13 @@ def cpp_compile_command(
15221522
use_mmap_weights: bool = False,
15231523
extra_flags: Sequence[str] = (),
15241524
) -> str:
1525+
"""
1526+
Please don't use this function in new development code.
1527+
It was planed to remove after we switched to new cpp_builder, but I can't access to Meta
1528+
internal environment to fix AotCodeCompiler fb_code.
1529+
TODO: need some Meta employee help on fix AotCodeCompiler fb_code, and then delete this
1530+
deprecated function.
1531+
"""
15251532
ipaths, lpaths, libs, macros, build_arch_flags = get_include_and_linking_paths(
15261533
include_pytorch, vec_isa, cuda, aot_mode
15271534
)
@@ -1576,6 +1583,20 @@ def cpp_compile_command(
15761583
).strip()
15771584

15781585

1586+
def _temp_validate_new_and_old_command(new_cmd: List[str], old_cmd: List[str]) -> None:
1587+
"""
1588+
TODO: Will remove the temp code after switch to new cpp_builder
1589+
"""
1590+
new_diff: List[str] = [x for x in new_cmd if x not in old_cmd]
1591+
old_diff: List[str] = [y for y in old_cmd if y not in new_cmd]
1592+
if new_diff or old_diff:
1593+
print("!!! new_cmd: ", new_cmd)
1594+
print("!!! old_cmd: ", old_cmd)
1595+
print("!!! new_diff: ", new_diff)
1596+
print("!!! old_diff: ", old_diff)
1597+
raise RuntimeError("Error in new and old command different.")
1598+
1599+
15791600
def run_command_and_check(cmd: str) -> None:
15801601
cmd = shlex.split(cmd)
15811602
try:
@@ -1823,7 +1844,6 @@ def _compile_consts_darwin(consts: bytes) -> str:
18231844
else os.path.splitext(input_path)[0] + ".so"
18241845
)
18251846

1826-
output_o = os.path.splitext(input_path)[0] + ".o"
18271847
consts_size = sum(
18281848
torch.ops.mkldnn._nbytes(tensor)
18291849
if tensor.is_mkldnn
@@ -1836,43 +1856,50 @@ def _compile_consts_darwin(consts: bytes) -> str:
18361856
if config.aot_inductor.force_mmap_weights:
18371857
use_mmap_weights = True
18381858

1839-
if config.aot_inductor.package:
1840-
(
1841-
object_output_name,
1842-
object_output_dir,
1843-
) = get_name_and_dir_from_output_file_path(input_path)
1844-
object_build_options = CppTorchCudaOptions(
1845-
vec_isa=picked_vec_isa,
1846-
cuda=cuda,
1847-
aot_mode=graph.aot_mode,
1848-
compile_only=True,
1849-
use_absolute_path=use_absolute_path,
1850-
use_mmap_weights=use_mmap_weights,
1851-
)
1852-
object_builder = CppBuilder(
1853-
name=object_output_name,
1854-
sources=input_path,
1855-
output_dir=object_output_dir,
1856-
BuildOption=object_build_options,
1857-
)
1858-
compile_cmd = object_builder.get_command_line()
1859-
output_o = object_builder.get_target_file_path()
1859+
(
1860+
object_output_name,
1861+
object_output_dir,
1862+
) = get_name_and_dir_from_output_file_path(input_path)
1863+
object_build_options = CppTorchCudaOptions(
1864+
vec_isa=picked_vec_isa,
1865+
cuda=cuda,
1866+
aot_mode=graph.aot_mode,
1867+
compile_only=True,
1868+
use_absolute_path=use_absolute_path,
1869+
use_mmap_weights=use_mmap_weights,
1870+
)
1871+
object_builder = CppBuilder(
1872+
name=object_output_name,
1873+
sources=input_path,
1874+
output_dir=object_output_dir,
1875+
BuildOption=object_build_options,
1876+
)
1877+
compile_cmd = object_builder.get_command_line()
1878+
output_o = object_builder.get_target_file_path()
18601879

1880+
if config.aot_inductor.package:
18611881
compile_flags = os.path.splitext(input_path)[0] + "_compile_flags.json"
18621882
object_build_options.save_flags_to_file(compile_flags)
1863-
18641883
else:
1865-
# TODO: replace this with using the CppBuilder above
1866-
compile_cmd = cpp_compile_command(
1867-
input=input_path,
1868-
output=output_o,
1869-
vec_isa=picked_vec_isa,
1870-
cuda=cuda,
1871-
aot_mode=graph.aot_mode,
1872-
compile_only=True,
1873-
use_absolute_path=use_absolute_path,
1874-
use_mmap_weights=use_mmap_weights,
1875-
)
1884+
if config.is_fbcode():
1885+
# TODO: enable AotCodeCompiler fb_code, and remove deprecated_cpp_compile_command.
1886+
compile_cmd_old = deprecated_cpp_compile_command(
1887+
input=input_path,
1888+
output=output_o,
1889+
vec_isa=picked_vec_isa,
1890+
cuda=cuda,
1891+
aot_mode=graph.aot_mode,
1892+
compile_only=True,
1893+
use_absolute_path=use_absolute_path,
1894+
use_mmap_weights=use_mmap_weights,
1895+
)
1896+
# TODO: Enable below code to debug in fb_code.
1897+
"""
1898+
_temp_validate_new_and_old_command(
1899+
compile_cmd.split(" "), compile_cmd_old.split(" ")
1900+
)
1901+
"""
1902+
compile_cmd = compile_cmd_old
18761903

18771904
log.debug("aot compilation command: %s", compile_cmd)
18781905
if fbcode_aot_cpu_re:
@@ -1935,28 +1962,25 @@ def _pad_to_alignment(raw_bytes: bytes) -> bytes:
19351962
"darwin": _compile_consts_darwin,
19361963
}[sys.platform](aot_constants)
19371964

1938-
if config.aot_inductor.package:
1939-
output_name, output_dir = get_name_and_dir_from_output_file_path(
1940-
output_so
1941-
)
1942-
so_build_options = CppTorchCudaOptions(
1943-
vec_isa=picked_vec_isa,
1944-
cuda=cuda,
1945-
aot_mode=graph.aot_mode,
1946-
use_absolute_path=use_absolute_path,
1947-
)
1948-
so_builder = CppBuilder(
1949-
name=output_name,
1950-
sources=[output_o, consts_o],
1951-
output_dir=output_dir,
1952-
BuildOption=so_build_options,
1953-
)
1954-
link_cmd = so_builder.get_command_line()
1955-
output_so = so_builder.get_target_file_path()
1965+
output_name, output_dir = get_name_and_dir_from_output_file_path(output_so)
1966+
so_build_options = CppTorchCudaOptions(
1967+
vec_isa=picked_vec_isa,
1968+
cuda=cuda,
1969+
aot_mode=graph.aot_mode,
1970+
use_absolute_path=use_absolute_path,
1971+
)
1972+
so_builder = CppBuilder(
1973+
name=output_name,
1974+
sources=[output_o, consts_o],
1975+
output_dir=output_dir,
1976+
BuildOption=so_build_options,
1977+
)
1978+
link_cmd = so_builder.get_command_line()
1979+
output_so = so_builder.get_target_file_path()
19561980

1981+
if config.aot_inductor.package:
19571982
linker_flags = os.path.splitext(input_path)[0] + "_linker_flags.json"
19581983
so_build_options.save_flags_to_file(linker_flags)
1959-
19601984
from torch._inductor.package import package_aoti
19611985

19621986
if use_mmap_weights:
@@ -1969,37 +1993,45 @@ def _pad_to_alignment(raw_bytes: bytes) -> bytes:
19691993

19701994
archive_path = package_aoti(os.path.split(input_path)[0])
19711995
return archive_path
1996+
else:
1997+
if config.is_fbcode():
1998+
# TODO: enable AotCodeCompiler fb_code, and remove deprecated_cpp_compile_command.
1999+
link_cmd_old = deprecated_cpp_compile_command(
2000+
input=[output_o, consts_o],
2001+
output=output_so,
2002+
vec_isa=picked_vec_isa,
2003+
cuda=cuda,
2004+
aot_mode=graph.aot_mode,
2005+
use_absolute_path=use_absolute_path,
2006+
)
2007+
# TODO: Enable below code to debug in fb_code.
2008+
"""
2009+
_temp_validate_new_and_old_command(
2010+
link_cmd.split(" "), link_cmd_old.split(" ")
2011+
)
2012+
"""
2013+
link_cmd = link_cmd_old
19722014

1973-
# TODO: replace this with using the CppBuilder above
1974-
link_cmd = cpp_compile_command(
1975-
input=[output_o, consts_o],
1976-
output=output_so,
1977-
vec_isa=picked_vec_isa,
1978-
cuda=cuda,
1979-
aot_mode=graph.aot_mode,
1980-
use_absolute_path=use_absolute_path,
1981-
)
2015+
log.debug("aot linkage command: %s", link_cmd)
2016+
if fbcode_aot_cpu_re:
2017+
compile_file([output_o, consts_o], output_so, link_cmd.split())
2018+
os.chmod(output_so, 0o755)
2019+
else:
2020+
run_command_and_check(link_cmd)
19822021

1983-
log.debug("aot linkage command: %s", link_cmd)
1984-
if fbcode_aot_cpu_re:
1985-
compile_file([output_o, consts_o], output_so, link_cmd.split())
1986-
os.chmod(output_so, 0o755)
1987-
else:
1988-
run_command_and_check(link_cmd)
1989-
1990-
if use_mmap_weights:
1991-
with open(output_so, "a+b") as f_so:
1992-
so_size = f_so.tell()
1993-
# Page align the weights
1994-
f_so.write(b" " * (16384 - so_size % 16384))
1995-
f_so.write(serialized_weights)
1996-
f_so.write(struct.pack("q", magic_number))
1997-
1998-
# Append cmds to the end of codegen-ed wrapper file
1999-
with open(input_path, "a") as f:
2000-
f.write("\n")
2001-
f.write(f"// Compile cmd\n// {compile_cmd}\n")
2002-
f.write(f"// Link cmd\n// {link_cmd}\n")
2022+
if use_mmap_weights:
2023+
with open(output_so, "a+b") as f_so:
2024+
so_size = f_so.tell()
2025+
# Page align the weights
2026+
f_so.write(b" " * (16384 - so_size % 16384))
2027+
f_so.write(serialized_weights)
2028+
f_so.write(struct.pack("q", magic_number))
2029+
2030+
# Append cmds to the end of codegen-ed wrapper file
2031+
with open(input_path, "a") as f:
2032+
f.write("\n")
2033+
f.write(f"// Compile cmd\n// {compile_cmd}\n")
2034+
f.write(f"// Link cmd\n// {link_cmd}\n")
20032035

20042036
return output_so
20052037

@@ -2499,120 +2531,6 @@ class CppWrapperCodeCache(CppPythonBindingsCodeCache):
24992531
)
25002532

25012533

2502-
# TODO: Will remove the temp code after switch to new cpp_builder
2503-
def _temp_validate_new_and_old_command(new_cmd: List[str], old_cmd: List[str]) -> None:
2504-
new_diff: List[str] = [x for x in new_cmd if x not in old_cmd]
2505-
old_diff: List[str] = [y for y in old_cmd if y not in new_cmd]
2506-
2507-
if new_diff or old_diff:
2508-
print("!!! new_cmd: ", new_cmd)
2509-
print("!!! old_cmd: ", old_cmd)
2510-
print("!!! new_diff: ", new_diff)
2511-
print("!!! old_diff: ", old_diff)
2512-
raise RuntimeError("Error in new and old command different.")
2513-
2514-
2515-
def _do_validate_cpp_commands(
2516-
include_pytorch: bool,
2517-
cuda: bool,
2518-
compile_only: bool,
2519-
mmap_weights: bool,
2520-
use_absolute_path: bool,
2521-
aot_mode: bool,
2522-
) -> None:
2523-
# PreCI will failed if test machine can't run cuda.
2524-
temp_dir = tempfile.TemporaryDirectory()
2525-
test_dir_path = temp_dir.name
2526-
test_cuda = torch.cuda.is_available() and cuda
2527-
input_path = os.path.join(test_dir_path, "dummy_file.cpp")
2528-
output_path = os.path.join(test_dir_path, "dummy_file.so")
2529-
extra_flags = ["-D TEST_EXTRA_FLAGS"]
2530-
if compile_only:
2531-
output_path = os.path.join(test_dir_path, "dummy_file.o")
2532-
picked_isa = pick_vec_isa()
2533-
2534-
# Simulate fb_code env:
2535-
if not (aot_mode and not use_absolute_path):
2536-
input_path = os.path.basename(input_path)
2537-
output_path = os.path.basename(output_path)
2538-
2539-
# Fix test_new_cpp_build_logical failed on MacOS
2540-
if sys.platform != "linux":
2541-
aot_mode = False
2542-
2543-
old_cmd = cpp_compile_command(
2544-
input=input_path,
2545-
output=output_path,
2546-
include_pytorch=include_pytorch,
2547-
vec_isa=picked_isa,
2548-
cuda=test_cuda,
2549-
aot_mode=aot_mode,
2550-
compile_only=compile_only,
2551-
use_absolute_path=use_absolute_path,
2552-
use_mmap_weights=mmap_weights,
2553-
extra_flags=extra_flags,
2554-
).split(" ")
2555-
2556-
name, dir = get_name_and_dir_from_output_file_path(input_path)
2557-
2558-
dummy_build_option = CppTorchCudaOptions(
2559-
vec_isa=picked_isa,
2560-
include_pytorch=include_pytorch,
2561-
cuda=test_cuda,
2562-
aot_mode=aot_mode,
2563-
compile_only=compile_only,
2564-
use_absolute_path=use_absolute_path,
2565-
use_mmap_weights=mmap_weights,
2566-
extra_flags=extra_flags,
2567-
)
2568-
2569-
dummy_builder = CppBuilder(
2570-
name=name,
2571-
sources=input_path,
2572-
output_dir=dir,
2573-
BuildOption=dummy_build_option,
2574-
)
2575-
new_cmd = dummy_builder.get_command_line().split(" ")
2576-
2577-
_temp_validate_new_and_old_command(new_cmd, old_cmd)
2578-
2579-
temp_dir.cleanup()
2580-
2581-
2582-
# TODO: Will remove the temp code after switch to new cpp_builder
2583-
# It could help on sync new cpp_builder generate same command line as the old one.
2584-
def validate_new_cpp_commands() -> None:
2585-
cuda = [True, False]
2586-
use_mmap_weights = [True, False]
2587-
compile_only = [True, False]
2588-
include_pytorch = [True, False]
2589-
use_absolute_path = [True, False]
2590-
aot_mode = [False, True]
2591-
2592-
# Try to pass it in fb_code.
2593-
if config.is_fbcode():
2594-
return
2595-
2596-
for x in cuda:
2597-
for y in use_mmap_weights:
2598-
for z in compile_only:
2599-
for m in include_pytorch:
2600-
for n in use_absolute_path:
2601-
for o in aot_mode:
2602-
print(
2603-
f"!!! cuda:{x}, use_mmap_weights:{y}, compile_only:{z}, include_pytorch:{m},"
2604-
f" use_absolute_path:{n}, aot_mode:{o}"
2605-
)
2606-
_do_validate_cpp_commands(
2607-
include_pytorch=m,
2608-
cuda=x,
2609-
mmap_weights=y,
2610-
compile_only=z,
2611-
use_absolute_path=n,
2612-
aot_mode=o,
2613-
)
2614-
2615-
26162534
@clear_on_fresh_inductor_cache
26172535
class HalideCodeCache(CppPythonBindingsCodeCache):
26182536
cache: Dict[str, Callable[[], Union[ModuleType, CDLL]]] = {}

0 commit comments

Comments
 (0)