Description
Describe the issue
I'm wondering if the onnxruntime-web
package has any Node.js-specific optimisations within it when using the wasm
execution provider? I know there's an onnxruntime-node
package - but I'm talking specifically about the onnxruntime-web
package here.
I assumed that since Node.js and Deno and Chrome all use V8, they should have similar performance, but Node.js is about 1.6x faster than the other two.
I'm hoping to be able to achieve the observed Node.js-level performance in Deno and in the browser.
To reproduce
Deno - 510ms
// Deno v1.35.2
// deno run --allow-net main.js
await import("https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.js");
ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/[email protected]/dist/";
ort.env.wasm.numThreads = 1;
console.log("Downloading model...");
let executionProviders = ["wasm"];
let onnxImageSession = await ort.InferenceSession.create("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx", { executionProviders });
for(let i = 0; i < 5; i++) {
let data = new Float32Array(3*224*224).map(n => Math.random()-0.5);
const feeds = {'input': new ort.Tensor('float32', data, [1,3,224,224])};
let t = performance.now();
console.log("Starting inference...");
const results = await onnxImageSession.run(feeds);
console.log(`Finished inference in ${performance.now()-t}ms`);
}
Chrome Browser - 640ms
Same code as Deno - just paste the above code in your DevTools console.
Node.js - 320ms
// Node.js v 18.15.0
// npm init -y
// npm install [email protected]
const ort = require('onnxruntime-web');
ort.env.wasm.numThreads = 1;
(async function() {
console.log("Downloading model...");
let executionProviders = ["wasm"];
let onnxImageSession = await ort.InferenceSession.create("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx", { executionProviders });
for(let i = 0; i < 5; i++) {
let data = new Float32Array(3*224*224).map(n => Math.random()-0.5);
const feeds = {'input': new ort.Tensor('float32', data, [1,3,224,224])};
let t = performance.now();
console.log("Starting inference...");
const results = await onnxImageSession.run(feeds);
console.log(`Finished inference in ${performance.now()-t}ms`);
}
})();
Urgency
Not urgent.
Platform
Linux
OS Version
22.04
ONNX Runtime Installation
Released Package
ONNX Runtime Version or Commit ID
1.15.1
ONNX Runtime API
JavaScript
Architecture
X64
Execution Provider
Other / Unknown
Execution Provider Library Version
wasm 1.15.1
Model File
Model used in the above code examples: https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx
Is this a quantized model?
No