Description
Firstly, I'd like to express my admiration for the remarkable work done on this project. The advancements and capabilities it offers are truly impressive.
I've been diligently following the provided "walkthrough.ipynb" to familiarize myself with the pipeline. Unfortunately, I encountered an issue with the trace part, which seems to malfunction, possibly due to updates in the diffusers library. To circumvent this, I opted for a simplified network module as demonstrated below:
class Net(nn.Module):
def __init__(self):
super().__init__()
def forward(self, x):
return x + 1
Following this, I proceeded to convert the network to ONNX format and subsequently to IR:
trace = torch.jit.trace(net, input.to(dtype).to(device))
torch.onnx.export(
trace, input.to(dtype).to(device), "test/net.onnx", verbose=True, input_names=["input"], output_names=["input"],
)
# Exported graph: graph(%input.1 : Float(1, 3, strides=[3, 1], requires_grad=0, device=cpu)):
# %/Constant_output_0 : Float(requires_grad=0, device=cpu) = onnx::Constant[value={1}, onnx_name="/Constant"](), scope: Net:: # /tmp/ipykernel_3004661/4051825751.py:6:0
# %input : Float(1, 3, strides=[3, 1], requires_grad=0, device=cpu) = onnx::Add[onnx_name="/Add"](%input.1, %/Constant_output_0), scope: Net:: # /tmp/ipykernel_3004661/4051825751.py:6:0
# return (%input)
# ============= Diagnostic Run torch.onnx.export version 2.0.0+cu117 =============
# verbose: False, log level: Level.ERROR
# ======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
onnx_model_path = "test/net.onnx"
model = onnx.load(onnx_model_path)
tvm_model = from_onnx(model, keep_params_in_input=True)
tvm_model
# # from tvm.script import ir as I
# # from tvm.script import relax as R
# @I.ir_module
# class Module:
# @R.function
# def main(input_1: R.Tensor((1, 3), dtype="float32")) -> R.Tensor((1, 3), dtype="float32"):
# R.func_attr({"num_input": 1})
# with R.dataflow():
# gv: R.Tensor((1, 3), dtype="float32") = R.add(input_1, R.const(1, "float32"))
# R.output(gv)
# return gv
After that, I compiled it to wasm:
tvm_model, model_params = relax.frontend.detach_params(tvm_model) # no params actually
target = tvm.target.Target(
"webgpu", host="llvm -mtriple=wasm32-unknown-unknown-wasm"
)
ex = relax.build(mod=tvm_model, target=target)
ex.export_library("test/net.wasm")
Finally, I used the following JS to run it:
const tvmjs = require("./public/dist/tvmjs.bundle.js");
const EmccWASI = require("./public/dist/tvmjs_runtime.wasi.js");
window.tvmjs = tvmjs
async function asyncInitTVM() {
const wasmSource = await (
await fetch("./public/net.wasm")
).arrayBuffer();
logger = function (message) {
console.log(message);
};
const tvm = await tvmjs.instantiate(
new Uint8Array(wasmSource),
new EmccWASI(),
logger
);
const output = await tvmjs.detectGPUDevice();
if (output !== undefined) {
var label = "WebGPU";
if (output.adapterInfo.description.length != 0) {
label += " - " + output.adapterInfo.description;
} else {
label += " - " + output.adapterInfo.vendor;
}
console.log("Initialize GPU device: " + label);
tvm.initWebGPU(output.device);
} else {
console.log("This browser env do not support WebGPU");
}
tvm.withNewScope(() => {
device = tvm.webgpu();
// device = tvm.cpu();
vm = tvm.detachFromCurrentScope(tvm.createVirtualMachine(device));
net = tvm.detachFromCurrentScope(vm.getFunction("main"));
})
await tvm.asyncLoadWebGPUPipelines(vm.getInternalModule());
const input_cpu = tvm.withNewScope(() => {
return tvm.detachFromCurrentScope(
tvm.empty([1, 3], "float32", tvm.cpu()).copyFrom([1, 1, 1])
)
});
const input_gpu = tvm.withNewScope(() => {
return tvm.detachFromCurrentScope(
tvm.empty([1, 3], "float32", device)
)
});
input_gpu.copyFrom(input_cpu);
await tvm.webgpu().sync();
console.log("input_cpu", input_cpu.toArray());
tvm.withNewScope(() => {
output_gpu = net(input_gpu);
output_gpu = tvm.detachFromCurrentScope(output_gpu);
});
const output_cpu = tvm.withNewScope(() => {
return tvm.detachFromCurrentScope(
tvm.empty([1, 3], "float32", tvm.cpu()).copyFrom([2, 3, 4])
)
});
output_cpu.copyFrom(output_gpu);
await tvm.webgpu().sync();
console.log("output_cpu", output_cpu.toArray());
}
asyncInitTVM()
However, I've hit a roadblock during the execution phase, particularly at await tvm.asyncLoadWebGPUPipelines(vm.getInternalModule());
, where the console outputs the following error:
tvmjs.bundle.js:1863 Uncaught (in promise) Error: Cannot find function webgpu.get_fmap
at Module.getFunction (tvmjs.bundle.js:1863:23)
at Instance.eval (tvmjs.bundle.js:2791:38)
at Generator.next (<anonymous>)
at eval (tvmjs.bundle.js:28:75)
at new Promise (<anonymous>)
at __awaiter (tvmjs.bundle.js:24:16)
at Instance.asyncLoadWebGPUPipelines (tvmjs.bundle.js:2786:20)
at asyncInitTVM (main.js:48:15)
In addition, I found that when I use llvm
as build target instead of webgpu
and use tvm.cpu()
as device and skip this line, the example is working.
Given the scarcity of detailed documentation and tutorials on integrating custom networks with tvmjs, especially regarding WebGPU support, I find myself in need of your expertise and guidance.
Could you please help me identify any potential missteps in my approach? I am particularly interested in ensuring that my network can be successfully operated using tvmjs and would greatly appreciate any insights or suggestions you might have.
Thank you very much for your time and assistance.