Skip to content

Commit 990a084

Browse files
committed
upgrade autort support for pytorch2
1 parent 0a6df04 commit 990a084

38 files changed

+26
-1807
lines changed

README.md

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,6 @@ BACKEND=c-cuda STEP=2000 COMPUTE_V1='- S = 512; einstein_v2(input_dict={"input0"
6161
# Cleanup history caches:
6262
antares clean
6363

64-
# Boot HTTP daemon for accepting searching tasks:
65-
antares rest-server
66-
67-
# Setup Plugin for Pytorch && Examples:
68-
BACKEND=c-cuda antares torch-setup
69-
BACKEND=c-mcpu antares torch-setup
70-
python3 -m antares_core.frameworks.pytorch.examples.1_hello_world
7164
```
7265

7366
## Contributing

antares/antares_compiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -676,14 +676,14 @@ def measure_batch(inputs):
676676
eval_client.init(backend_root=backend_root)
677677
dev_id = int(os.environ.get('DEV_ID', '0'))
678678

679-
if (save_path and dump_path) is None and os.environ.get("FUNC_NAME", None) and '// [metadata] ' in device_source:
679+
if (save_path and dump_path) is None and os.environ.get("TORCH_FN", None) and '// [metadata] ' in device_source:
680680
AntaresGlobal.device_source = device_source
681681
metadata = device_source.index('// [metadata] ')
682682
metadata = device_source[metadata:device_source.index('\n', metadata)].split()[-1].encode('utf-8')
683683
hex_code = device_source.encode('utf-8')
684684
if int(os.environ.get('CODE_DEBUG', 0)) == 0:
685685
hex_code = binascii.unhexlify(eval_client.eval(kernel_path=kernel_path, dev_id=(fix_device_id if fix_device_id >= 0 else dev_id), backend_root=backend_root, compile=1)['HEX'][1:-1])
686-
with open(get_real_path(os.environ["FUNC_NAME"] + ".mod"), 'wb') as fp:
686+
with open(get_real_path(os.environ["TORCH_FN"] + ".mod"), 'wb') as fp:
687687
fp.write(metadata)
688688
fp.write(hex_code)
689689

antares/default_codegen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,5 +354,5 @@ def translate_code(code, config):
354354
kernel_slices = translate_code(func.imported_modules[0].get_source(), best_config)
355355
return kernel_slices
356356

357-
if int(os.environ.get('TVM', 1)) == 0:
358-
from next_codegen import codegen
357+
if len(os.environ.get('TORCH_FN', '')) > 0:
358+
from torch_codegen.torch_codegen import codegen

antares/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ if [[ "${TVM}" != "0" ]] && ( [[ "$(cat ${TVM_HOME}/VERSION_TAG 2>/dev/null)" !=
5151
fi
5252

5353
if [[ "$COMPUTE_V1" == "" ]]; then
54-
export COMPUTE_V1='- einstein_v2("output0[N, M] = input0[N, M] + input1[N, M]", input_dict={"input0": {"dtype": "float32", "shape": [1024, 512]}, "input1": {"dtype": "float32", "shape": [1024, 512]}})'
54+
export COMPUTE_V1='- N = 1024 * 1024 * 64; einstein_v2("output0[N] = input0[N].call(`max`, const(0, dtype=input0.dtype()))", {"input0": {"dtype": "float32", "shape": [N]}})' antares
5555
fi
5656

5757
mkdir -p ${ANTARES_DRIVER_PATH}

backends/c-mcpu/schedule/standard/default.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def mcpu_auto_schedule(s, output, prefix):
1515
for i in range(len(output.op.reduce_axis)):
1616
slice_reduce.append(cfg.define_split(f"{prefix}:R{i}", attrs.get_extent(output.op.reduce_axis[i]), num_outputs=2, init_vals=[[-1, 4],]))
1717

18-
unroll = cfg.define_knob(f"{prefix}:UN", [1, 4, 8, 16, 32, 64], init_vals=[1,] if attrs.backend == 'c-mcpu_avx512' else [0,])
18+
unroll = cfg.define_knob(f"{prefix}:UN", [1, 4, 8, 16, 32, 64], init_vals=[1,] if attrs.backend != 'c-mcpu' else [0,])
1919

2020
output_local, = s.cache_write([output], "local")
2121

backends/c-ocl_amdgpu/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def get_execution_parallism():
2424

2525
def do_native_translation_v2(codeset, **kwargs):
2626
kernel_name, in_args, out_args, body = codeset
27-
expand_args = ', '.join([f'__global {x[0]}* {x[1]}' for x in in_args + out_args])
27+
expand_args = ', '.join([f'__global {x[0]}* __restrict {x[1]}' for x in in_args + out_args])
2828
if 'VAMAP' in os.environ:
2929
expand_args += ', ' + ', '.join([f'int {x.split(":")[0]}' if '/_' not in x.split(":")[0] else x.split(":")[0].replace('/', ' ') for x in os.environ['VAMAP'].split(',')])
3030

backends/c-rocm/schedule/standard/default.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,11 @@ def schedule(attrs):
3939
attrs.advanced_sched = config or step > 0
4040
tail_op, explicit_ops = None, [x for x in attrs.explicit_ops]
4141

42-
if (len(explicit_ops) > 1 and
43-
not explicit_ops[-1].output(0).op.reduce_axis):
42+
red = int(os.environ.get('RED', -1))
43+
if red == -1:
44+
red = (len(explicit_ops) > 1 and not explicit_ops[-1].output(0).op.reduce_axis)
45+
46+
if red:
4447
fuse_tail = attrs.auto_config.define_knob(f"FU", [False, True])
4548
tail_op = explicit_ops[-1]
4649
if fuse_tail:

backends/c-sycl_intel/include/backend.hpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace ab {
1919

2020
void init(int dev) {
2121
try {
22-
if (__BACKEND__ == "c-sycl_intel")
22+
if (__BACKEND__ != "c-sycl_cuda")
2323
_sycl_queue = std::move(sycl::queue(sycl::default_selector{}));
2424
else {
2525
// for SYCL CUDA, select the i-th GPU device
@@ -41,6 +41,10 @@ namespace ab {
4141
} catch (sycl::exception const &e) {
4242
std::terminate();
4343
}
44+
45+
int steps = getenv("STEP") ? std::atoi(getenv("STEP")) : 0;
46+
if (steps > 0)
47+
return;
4448
size_t max_compute_units = _sycl_queue.get_device().get_info<cl::sycl::info::device::max_compute_units>();
4549
size_t max_work_group_size = _sycl_queue.get_device().get_info<cl::sycl::info::device::max_work_group_size>();
4650
size_t max_mem_alloc_size = _sycl_queue.get_device().get_info<cl::sycl::info::device::max_mem_alloc_size>();
@@ -61,7 +65,7 @@ namespace ab {
6165
it.pop_back();
6266
return dptr;
6367
}
64-
// if (__BACKEND__ == "c-sycl_intel")
68+
// if (__BACKEND__ != "c-sycl_cuda")
6569
// return memalign(sysconf(_SC_PAGESIZE), byteSize);
6670
return sycl::malloc_device(byteSize, _sycl_queue);
6771
}
@@ -75,8 +79,8 @@ namespace ab {
7579
ab_utils::TempFile tempfile("cpp", source);
7680
auto path = tempfile.get_path();
7781

78-
if (__BACKEND__ == "c-sycl_intel")
79-
ab_utils::Process({"dpcpp", path, "-std=c++17", "-lpthread", "-fPIC", "-shared", "-Wno-pass-failed", "-O3", "-ffast-math", "-march=native", "-o", path + ".out"}, 10);
82+
if (__BACKEND__ != "c-sycl_cuda")
83+
ab_utils::Process({"dpcpp", path, "-std=c++17", "-lpthread", "-fPIC", "-shared", "-Wno-pass-failed", "-O3", "-ffast-math", "-Wno-deprecated", "-march=native", "-o", path + ".out"}, 10);
8084
else {
8185
std::string gpu_arch = "50"; // Corresponds to the back-end default.
8286
#ifdef SYCL_CUDA
@@ -142,7 +146,7 @@ namespace ab {
142146
}
143147

144148
((void(*)(void*, long, void* const*))hFunc[0])(&_sycl_queue, attrs, krnl_args.data());
145-
if (__BACKEND__ == "c-sycl_intel") // have to sync unlike CUDA
149+
if (__BACKEND__ != "c-sycl_cuda") // have to sync except CUDA
146150
_sycl_queue.wait();
147151
}
148152

docker/Dockerfile.c-base

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,11 @@ RUN apt-get update && apt install -y --no-install-recommends git ca-certificates
1616
RUN /bin/echo -e "set backspace=indent,eol,start\nset nocompatible\nset ts=4" > /etc/vim/vimrc.tiny
1717

1818
ADD ./engine /antares/engine
19-
RUN NO_PYTHON=1 /antares/engine/install_antares_host.sh && rm -rf /var/lib/apt/lists/* ~/.cache
20-
RUN bash -c 'rm -rf ~/.local/antares/3rdparty/tvm/build/{CMake*,Makefile,cmake_install.cmake}'
21-
RUN bash -c 'rm -rf ~/.local/antares/3rdparty/tvm/{src,include,golang,tests,3rdparty,device-stub,apps,conda,docker,docs,gallery,jvm,nnvm,rust,vta,web,cmake,.??*}'
22-
RUN echo '' > ~/.local/antares/3rdparty/tvm/python/tvm/relay/__init__.py
2319

24-
ENV ANTARES_VERSION 0.3.23.2
20+
ENV ANTARES_VERSION 0.9.0
2521

26-
RUN cd ~ && git clone https://github.com/microsoft/antares --branch latest --single-branch --depth 1 antares_core && mv ~/.local/antares/3rdparty antares_core
22+
RUN cd ~ && git clone https://github.com/microsoft/antares --branch latest --single-branch --depth 1 antares_core
23+
RUN cd ~ && curl -LO https://github.com/microsoft/antares/releases/download/v0.3.20/3rdparty.tar.gz && mkdir antares_core/3rdparty && tar xzvf 3rdparty.tar.gz -C antares_core/3rdparty >/dev/null 2>&1
2724
RUN cd ~ && sed -i "s/@VERSION@/${ANTARES_VERSION}/g" /antares/engine/dist-info/METADATA && cp -r /antares/engine/dist-info ~/antares-${ANTARES_VERSION}.dist-info
28-
RUN cd ~ && touch antares_core/__init__.py
25+
RUN cd ~ && touch antares_core/__init__.py && mv /antares/engine/torch_codegen antares_core/antares 2>/dev/null 2>&1 || true
2926
RUN cd ~ && rm -rf antares_core/.??* && zip -r /antares-${ANTARES_VERSION}-py3-none-manylinux1_x86_64.whl antares* >/dev/null

frameworks/pytorch/custom_op.py

Lines changed: 0 additions & 148 deletions
This file was deleted.

0 commit comments

Comments
 (0)