View
682
Download
2
Category
Preview:
Citation preview
Application Areas
• Image/video/sound processing• Games (AI, rendering)• General main-thread offloading• asm.js (C++ threads, locks, atomics)• Code generation (Clojure?)• Communication without the event loop• Streaming compressed assets without main thread
jank• Augmented Reality asynchronous image recognition
Parallelism: Workers may be enough
• Workers are heavier than system threads but work• Core saturation appears to be a coordination issue
Parallel JavaScript
• Casual API but not good for casual programmers
• Warm-up costs
• Memory overhead of functional style
arr.mapPar((x) => x+1)
• Implementation complexity
Why postMessage() is not enough
• Performance just isn’t there• Limited to no-shared-state computation
Why buffer transfer is not enough
• Shared Memory but effectively one-dimensional striping of arrays
• No solution for higher dimension issues without a copy (and takes
same hit as PJS memory overuse)
• No communication through memory (back to postMessage
overhead and event loop)
• No efficient translation from C++ pthread model
• Forces a priori segregation of data which may not always be
possible or efficient
Design Criteria
• Native-like performance• Not dependent on main thread event loop• Implementation versatility• Support applications and algorithms based on
threads/pthreads• Support Extensible Web philosophy
Main Worker
new Worker()
postMessage(Ready)
runTest()
postMessage( {item:0})
postMessage(item++)
output result
count = iterations
postMessage(item)
var iterations = 100000;
var w = new Worker("test-postmsg-worker.js");
w.onmessage = workerReady;w.postMessage([iterations]);
function workerReady(ev) { w.onmessage = processMsg; document.getElementById("button").disabled = false;}
var i;var start;
function runTest() { document.getElementById("button").disabled = true; msg("Master waiting"); i = 0; start = Date.now(); w.postMessage({item:0});}
function processMsg(ev) { var c = ev.data; if (++i == iterations) {
msg("Should be " + iterations + ": " + c.item);msg(Math.round(1000 * (2*iterations) / (Date.now() - start)) + " messages/s");document.getElementById("button").disabled = false;return;
} w.postMessage(c);}
var iterations = 100000;
var w = new Worker("test-postmsg-worker.js");
w.onmessage = workerReady;w.postMessage([iterations]);
function workerReady(ev) { w.onmessage = processMsg; document.getElementById("button").disabled = false;}
var i;var start;
function runTest() { document.getElementById("button").disabled = true; msg("Master waiting"); i = 0; start = Date.now(); w.postMessage({item:0});}
function processMsg(ev) { var c = ev.data; if (++i == iterations) {
msg("Should be " + iterations + ": " + c.item);msg(Math.round(1000 * (2*iterations) / (Date.now() - start)) + " messages/s");document.getElementById("button").disabled = false;return;
} w.postMessage(c);}
onmessage = stage1;
function stage1(ev) { var iterations = ev.data[0]; onmessage = stage2; postMessage("ready");};
function stage2(ev) { var c = ev.data; c.item++; postMessage(c);}
onmessage = stage1;
function stage1(ev) { var iterations = ev.data[0]; onmessage = stage2; postMessage("ready");};
function stage2(ev) { var c = ev.data; c.item++; postMessage(c);}
var iterations = 500000;var bufSize = 1024; // Should be divisible by 2 and "large enough"
var w = new Worker("test-sendint-worker.js");var sab = new SharedArrayBuffer(bufSize);
// Setup our state first.
var s = new SynchronicInt32(sab, 0); var locIdx = 512;
// Kick off the worker and wait for a message that it is ready.
w.onmessage = workerReady;w.postMessage([sab, 0, 512, iterations], [sab]);
console.log("Master waiting");
function workerReady(ev) { var iab = new SharedInt32Array(sab, locIdx, 1); var start = Date.now();
for ( var i=0 ; i < iterations ; i++ ) {iab[0]++;var old = s.add(1); s.expectUpdate(old+1, Number.POSITIVE_INFINITY);
}
var end = Date.now();
console.log("Should be " + iterations*2 + ": " + iab[0]); console.log(Math.round(1000 * (2*iterations) / (end - start)) + " messages/s");}
var iterations = 500000;var bufSize = 1024; // Should be divisible by 2 and "large enough"
var w = new Worker("test-sendint-worker.js");var sab = new SharedArrayBuffer(bufSize);
// Setup our state first.
var s = new SynchronicInt32(sab, 0); var locIdx = 512;
// Kick off the worker and wait for a message that it is ready.
w.onmessage = workerReady;w.postMessage([sab, 0, 512, iterations], [sab]);
console.log("Master waiting");
function workerReady(ev) { var iab = new SharedInt32Array(sab, locIdx, 1); var start = Date.now();
for ( var i=0 ; i < iterations ; i++ ) {iab[0]++;var old = s.add(1); s.expectUpdate(old+1, Number.POSITIVE_INFINITY);
}
var end = Date.now();
console.log("Should be " + iterations*2 + ": " + iab[0]); console.log(Math.round(1000 * (2*iterations) / (end - start)) + " messages/s");}
importScripts("../src/synchronic.js");
onmessage = function (ev) {
var [sab, sabIdx, locIdx, iterations] = ev.data;
// Initialize our state
var s = new SynchronicInt32(sab, 0);var iab = new SharedInt32Array(sab, locIdx, 1);
// Let the master know we're ready to go
postMessage("ready");
var x = 0;for ( var i=0 ; i < iterations ; i++ ) { s.expectUpdate(x, Number.POSITIVE_INFINITY); iab[0]++; x = s.add(1)+1;}
console.log("Worker exiting"); };
importScripts("../src/synchronic.js");
onmessage = function (ev) {
var [sab, sabIdx, locIdx, iterations] = ev.data;
// Initialize our state
var s = new SynchronicInt32(sab, 0);var iab = new SharedInt32Array(sab, locIdx, 1);
// Let the master know we're ready to go
postMessage("ready");
var x = 0;for ( var i=0 ; i < iterations ; i++ ) { s.expectUpdate(x, Number.POSITIVE_INFINITY); iab[0]++; x = s.add(1)+1;}
console.log("Worker exiting"); };
var iterations = 100000;var bufSize = 8192; // Should be divisible by 2 and "large enough" // (8K is much more than needed for this test)
var w = new Worker("test-sendmsg-worker.js");var sab = new SharedArrayBuffer(bufSize);
// Setup our state first.var s = new ChannelSender(sab, 0, bufSize/2);var r = new ChannelReceiver(sab, bufSize/2, bufSize/2);Var m = new MyPostMessage(s,r);
// Kick off the worker and wait for a message that it is ready.w.onmessage = workerReady;w.postMessage([sab, iterations, 0, bufSize/2, bufSize/2, bufSize/2], [sab]);
console.log("Master waiting");
function workerReady(ev) { var start = Date.now();
var c = {item:0}; for ( var i=0 ; i < iterations ; i++ ) {
m.send(c);c = m.receive();
}
var end = Date.now();
console.log("Should be " + iterations + ": " + c.item); console.log(Math.round(1000 * (2*iterations) / (end - start)) + " messages/s");}
var iterations = 100000;var bufSize = 8192; // Should be divisible by 2 and "large enough" // (8K is much more than needed for this test)
var w = new Worker("test-sendmsg-worker.js");var sab = new SharedArrayBuffer(bufSize);
// Setup our state first.var s = new ChannelSender(sab, 0, bufSize/2);var r = new ChannelReceiver(sab, bufSize/2, bufSize/2);Var m = new MyPostMessage(s,r);
// Kick off the worker and wait for a message that it is ready.w.onmessage = workerReady;w.postMessage([sab, iterations, 0, bufSize/2, bufSize/2, bufSize/2], [sab]);
console.log("Master waiting");
function workerReady(ev) { var start = Date.now();
var c = {item:0}; for ( var i=0 ; i < iterations ; i++ ) {
m.send(c);c = m.receive();
}
var end = Date.now();
console.log("Should be " + iterations + ": " + c.item); console.log(Math.round(1000 * (2*iterations) / (end - start)) + " messages/s");}
importScripts("../src/arena.js", "../src/synchronic.js", "../src/marshaler.js", "../src/intqueue.js", "../src/channel.js");
onmessage = function (ev) {
var [sab, iterations, recvIdx, recvLength, sendIdx, sendLength] = ev.data;
// Initialize our statevar r = new ChannelReceiver(sab, recvIdx, recvLength);var s = new ChannelSender(sab, sendIdx, sendLength);var m = new MyPostMessage(r, s);
// Let the master know we're ready to go
postMessage("ready");
var c = {item:-1};for ( var i=0 ; i < iterations ; i++ ) { c = m.receive(); c.item++; m.send(c);}
console.log("Worker exiting"); };
importScripts("../src/arena.js", "../src/synchronic.js", "../src/marshaler.js", "../src/intqueue.js", "../src/channel.js");
onmessage = function (ev) {
var [sab, iterations, recvIdx, recvLength, sendIdx, sendLength] = ev.data;
// Initialize our statevar r = new ChannelReceiver(sab, recvIdx, recvLength);var s = new ChannelSender(sab, sendIdx, sendLength);var m = new MyPostMessage(r, s);
// Let the master know we're ready to go
postMessage("ready");
var c = {item:-1};for ( var i=0 ; i < iterations ; i++ ) { c = m.receive(); c.item++; m.send(c);}
console.log("Worker exiting"); };
Incrementing Worker
Communication Messages per second
postMessage 54,245
SharedInt32Array 6,329,114
MyPostMessage 275,482
Design Criteria: How did we do?
• Native-like performance• Not dependent on main thread event loop• Implementation versatility• Support Extensible Web philosophy• Support applications and algorithms based on
threads/pthreads
JavaScript Mandlebrot
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 -
5.00
10.00
15.00
20.00
25.00
30.00
35.00 Scalar SSE
Unity WebGL Benchmark
Asteroid Field
2D Physics Boxes
Mandelbrot GPU
CryptoHash Script
Instantiate & Destroy
Particles
Physics Spheres
Physics Meshes
Physics Cubes
Animation & Skinning
AI Agents
2D Physics Spheres
0.0000 0.5000 1.0000 1.5000 2.0000 2.5000 3.0000
nativeeightfourone
Design Criteria: How did we do part 2?
• Native-like performance• Not dependent on main thread event loop• Implementation versatility• Support Extensible Web philosophy• Support applications and algorithms based on
threads/pthreads
✓✓✓✓✓
Unity WebGL Benchmark: Significance
• Exercises the whole Unity Engine• pthread Unity games for the web may not be far
behind• Industrial strength example of SAB supporting
automatic porting of large multi-threaded C++
codebases• The benchmark isn’t just functional, it is fast!
Unity WebGL Benchmark: Significance
“With shared memory, the web lifts an important limitation with
parallel execution that it had compared to native, and Unity is now
able to reuse the same codebase for the web as well. Shared memory
is not comparable to a library call that could be emulated or polyfilled,
but it is a fundamental concept of parallel execution architectures. I'm
not sure if it can get any bigger than this!”
- Jukka Jylanki
What’s Next
• Shared memory is in Firefox Nightly now
• asm.js supports shared memory (and pthreads)
• It will not ride the release trains until standardized
• We are talking to TC39 about standardization
• The API might change a little
• Google has announced that they will start implementing
Further Reading
• Mozilla Blog: https://blog.mozilla.org/javascript/2015/02/26/the-path-to-parallel-javascript/
• Demo: http://
clb.demon.fi/emscripten/mandelbrot_sse1_pthreads_20150429/mandelbrot_pthreads.html
• V8 Intent to Implement:https
://groups.google.com/a/chromium.org/forum/#!topic/blink-dev/d-0ibJwCS24
• EWM - https://extensiblewebmanifesto.org/
• Specification, examples, demos, experiments, etc: http://axis-of-eval.org/shmem
Recommended