diff --git a/.eslintrc.json b/.eslintrc.json index dadaa4e7..ea995b62 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -50,6 +50,7 @@ "promise/no-nesting": "off", "import/no-absolute-path": "off", "import/no-extraneous-dependencies": "off", + "node/no-unpublished-import": "off", "node/no-unpublished-require": "off", "no-regex-spaces": "off", "radix": "off" diff --git a/README.md b/README.md index 2e6ecbd3..fda016b3 100644 --- a/README.md +++ b/README.md @@ -160,10 +160,8 @@ If your application resides in a different folder, modify `modelPath` property i Demos are included in `/demo`: Browser: -- `demo-esm`: Demo using Browser with ESM module -- `demo-iife`: Demo using Browser with IIFE module -- `demo-webworker`: Demo using Browser with ESM module and Web Workers -*All three following demos are identical, they just illustrate different ways to load and work with `Human` library:* +- `demo-esm`: Full demo using Browser with ESM module, includes selectable backends and webworkers +- `demo-iife`: Older demo using Browser with IIFE module NodeJS: - `demo-node`: Demo using NodeJS with CJS module diff --git a/demo/demo-webworker-worker.js b/demo/demo-esm-webworker.js similarity index 100% rename from demo/demo-webworker-worker.js rename to demo/demo-esm-webworker.js diff --git a/demo/demo-esm.js b/demo/demo-esm.js index 7d0521b2..2fa08520 100644 --- a/demo/demo-esm.js +++ b/demo/demo-esm.js @@ -3,7 +3,7 @@ import human from '../dist/human.esm.js'; const ui = { - backend: 'wasm', + backend: 'webgl', baseColor: 'rgba(255, 200, 255, 0.3)', baseLabel: 'rgba(255, 200, 255, 0.8)', baseFont: 'small-caps 1.2rem "Segoe UI"', @@ -24,6 +24,8 @@ const config = { hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 }, }; let settings; +let worker; +let timeStamp; function str(...msg) { if (!Array.isArray(msg)) return msg; @@ -35,13 +37,30 @@ function str(...msg) { return line; } -async function setupTF() { +async function setupTF(input) { + // pause video if running before changing backend + const live = input.srcObject ? ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused)) : false; + if (live) await input.pause(); + + // if user explicitly loaded tfjs, override one used in human library + if (window.tf) human.tf = window.tf; + + // cheks for wasm backend if (ui.backend === 'wasm') { - tf.env().set('WASM_HAS_SIMD_SUPPORT', false); - tf.env().set('WASM_HAS_MULTITHREAD_SUPPORT', true); + if (!window.tf) { + document.getElementById('log').innerText = 'Error: WASM Backend is not loaded, enable it in HTML file'; + ui.backend = 'webgl'; + } else { + human.tf = window.tf; + tf.env().set('WASM_HAS_SIMD_SUPPORT', false); + tf.env().set('WASM_HAS_MULTITHREAD_SUPPORT', true); + } } await human.tf.setBackend(ui.backend); await human.tf.ready(); + + // continue video if it was previously running + if (live) await input.play(); } async function drawFace(result, canvas) { @@ -201,45 +220,64 @@ async function drawHand(result, canvas) { } } -async function runHumanDetect(input, canvas) { +async function drawResults(input, result, canvas) { + // update fps + settings.setValue('FPS', Math.round(1000 / (performance.now() - timeStamp))); + // draw image from video + const ctx = canvas.getContext('2d'); + ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); + // draw all results + drawFace(result.face, canvas); + drawBody(result.body, canvas); + drawHand(result.hand, canvas); + // update log + const engine = await human.tf.engine(); + const memory = `${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors`; + const gpu = engine.backendInstance.numBytesInGPU ? `GPU: ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes` : ''; const log = document.getElementById('log'); + log.innerText = ` + TFJS Version: ${human.tf.version_core} | Backend: {human.tf.getBackend()} | Memory: ${memory} ${gpu} + Performance: ${str(result.performance)} | Object size: ${(str(result)).length.toLocaleString()} bytes + `; +} + +async function webWorker(input, image, canvas) { + if (!worker) { + // create new webworker + worker = new Worker('demo-esm-webworker.js', { type: 'module' }); + // after receiving message from webworker, parse&draw results and send new frame for processing + worker.addEventListener('message', async (msg) => { + await drawResults(input, msg.data, canvas); + // eslint-disable-next-line no-use-before-define + requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop + }); + } + // const offscreen = image.transferControlToOffscreen(); + worker.postMessage({ image, config }); +} + +async function runHumanDetect(input, canvas) { const live = input.srcObject ? ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused)) : false; + timeStamp = performance.now(); // perform detect if live video or not video at all if (live || !(input instanceof HTMLVideoElement)) { - // perform detection - const t0 = performance.now(); - let result; - try { - result = await human.detect(input, config); - } catch (err) { - log.innerText = err.message; + if (settings.getValue('Use Web Worker')) { + // get image data from video as we cannot send html objects to webworker + const offscreen = new OffscreenCanvas(canvas.width, canvas.height); + const ctx = offscreen.getContext('2d'); + ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); + const data = ctx.getImageData(0, 0, canvas.width, canvas.height); + // perform detection + await webWorker(input, data, canvas); + } else { + const result = await human.detect(input, config); + await drawResults(input, result, canvas); + if (input.readyState) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop } - if (!result) return; - const t1 = performance.now(); - // update fps - settings.setValue('FPS', Math.round(1000 / (t1 - t0))); - // draw image from video - const ctx = canvas.getContext('2d'); - ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height); - // draw all results - drawFace(result.face, canvas); - drawBody(result.body, canvas); - drawHand(result.hand, canvas); - // update log - const engine = await human.tf.engine(); - const memory = `${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors`; - const gpu = engine.backendInstance.numBytesInGPU ? `GPU: ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes` : ''; - log.innerText = ` - TFJS Version: ${human.tf.version_core} | Backend: ${human.tf.getBackend()} | Memory: ${memory} ${gpu} - Performance: ${str(result.performance)} | Object size: ${(str(result)).length.toLocaleString()} bytes - `; - // rinse & repeate - // if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes - if (input.readyState) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop } } -function setupGUI() { +function setupUI(input) { // add all variables to ui control panel settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main')); const style = document.createElement('style'); @@ -266,9 +304,9 @@ function setupGUI() { } runHumanDetect(video, canvas); }); - settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], (val) => { + settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], async (val) => { ui.backend = val.value; - setupTF(); + await setupTF(input); }); settings.addHTML('title', 'Enabled Models'); settings.hideTitle('title'); settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val); @@ -305,6 +343,7 @@ function setupGUI() { config.hand.iouThreshold = parseFloat(val); }); settings.addHTML('title', 'UI Options'); settings.hideTitle('title'); + settings.addBoolean('Use Web Worker', false); settings.addBoolean('Draw Boxes', true); settings.addBoolean('Draw Points', true); settings.addBoolean('Draw Polygons', true); @@ -357,17 +396,17 @@ async function setupImage() { } async function main() { - // initialize tensorflow - await setupTF(); - // setup ui control panel - await setupGUI(); // setup webcam - const video = await setupCamera(); + const input = await setupCamera(); // or setup image - // const image = await setupImage(); - // setup output canvas from input object, select video or image - await setupCanvas(video); + // const input = await setupImage(); + // setup output canvas from input object + await setupCanvas(input); // run actual detection. if input is video, it will run in a loop else it will run only once + // setup ui control panel + await setupUI(input); + // initialize tensorflow + await setupTF(input); // runHumanDetect(video, canvas); } diff --git a/package.json b/package.json index 50a60451..892f9267 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "start": "node --trace-warnings --trace-uncaught --no-deprecation demo/demo-node.js", "lint": "eslint src/*.js demo/*.js", "build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --external:fs --global-name=human --outfile=dist/human.js src/index.js", - "build-esm-bundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:fs --outfile=dist/human.esm.js src/index.js", + "build-esm-bundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:fs --outfile=dist/human.esm.js src/index.js", "build-esm-nobundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:@tensorflow --external:fs --outfile=dist/human.esm-nobundle.js src/index.js", "build-node-bundle": "esbuild --bundle --platform=node --sourcemap --target=esnext --format=cjs --minify --outfile=dist/human.cjs src/index.js", "build-node-nobundle": "esbuild --bundle --platform=node --sourcemap --target=esnext --format=cjs --external:@tensorflow --outfile=dist/human-nobundle.cjs src/index.js", diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js index ff607728..bb5ec2a7 100644 --- a/src/emotion/emotion.js +++ b/src/emotion/emotion.js @@ -18,6 +18,7 @@ function getImage(image, size) { async function load(config) { if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath); + return models.emotion; } async function predict(image, config) { @@ -31,7 +32,7 @@ async function predict(image, config) { const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); const [r, g, b] = tf.split(resize, 3, 3); if (config.face.emotion.useGrayscale) { - // 0.2989 * R + 0.5870 * G + 0.1140 * B // https://www.mathworks.com/help/matlab/ref/rgb2gray.html + // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html const r1 = tf.mul(r, [0.2989]); const g1 = tf.mul(g, [0.5870]); const b1 = tf.mul(b, [0.1140]); diff --git a/src/index.js b/src/index.js index 545e11a9..51ec5086 100644 --- a/src/index.js +++ b/src/index.js @@ -6,13 +6,18 @@ const posenet = require('./posenet/posenet.js'); const handpose = require('./handpose/handpose.js'); const defaults = require('./config.js').default; +// object that contains all initialized models const models = { facemesh: null, - blazeface: null, - ssrnet: null, + posenet: null, + handpose: null, iris: null, + age: null, + gender: null, + emotion: null, }; +// helper function that performs deep merge of multiple objects so it allows full inheriance with overrides function mergeDeep(...objects) { const isObject = (obj) => obj && typeof obj === 'object'; return objects.reduce((prev, obj) => { @@ -37,15 +42,14 @@ async function detect(input, userConfig) { const config = mergeDeep(defaults, userConfig); // load models if enabled - if (config.face.age.enabled) await ssrnet.loadAge(config); - if (config.face.gender.enabled) await ssrnet.loadGender(config); - if (config.face.emotion.enabled) await emotion.load(config); + if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body); if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand); - if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); - - tf.engine().startScope(); + if (config.face.enabled && config.face.age.enabled && !models.age) models.age = await ssrnet.loadAge(config); + if (config.face.enabled && config.face.gender.enabled && !models.gender) models.gender = await ssrnet.loadGender(config); + if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config); + // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders let savedWebglPackDepthwiseConvFlag; if (tf.getBackend() === 'webgl') { savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV'); @@ -58,29 +62,34 @@ async function detect(input, userConfig) { // run posenet timeStamp = performance.now(); let poseRes = []; + tf.engine().startScope(); if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body); + tf.engine().endScope(); perf.body = Math.trunc(performance.now() - timeStamp); // run handpose timeStamp = performance.now(); let handRes = []; + tf.engine().startScope(); if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand); + tf.engine().endScope(); perf.hand = Math.trunc(performance.now() - timeStamp); // run facemesh, includes blazeface and iris const faceRes = []; if (config.face.enabled) { timeStamp = performance.now(); + tf.engine().startScope(); const faces = await models.facemesh.estimateFaces(input, config.face); perf.face = Math.trunc(performance.now() - timeStamp); for (const face of faces) { // run ssr-net age & gender, inherits face from blazeface timeStamp = performance.now(); - const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; + const ssrData = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; perf.agegender = Math.trunc(performance.now() - timeStamp); // run emotion, inherits face from blazeface timeStamp = performance.now(); - const emotiondata = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {}; + const emotionData = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {}; perf.emotion = Math.trunc(performance.now() - timeStamp); face.image.dispose(); // calculate iris distance @@ -93,18 +102,19 @@ async function detect(input, userConfig) { box: face.box, mesh: face.mesh, annotations: face.annotations, - age: ssrdata.age, - gender: ssrdata.gender, - emotion: emotiondata, + age: ssrData.age, + gender: ssrData.gender, + emotion: emotionData, iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, }); } + tf.engine().endScope(); } + // set depthwiseconv to original value tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag); - tf.engine().endScope(); - // combine results + // combine and return results perf.total = Object.values(perf).reduce((a, b) => a + b); resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf }); });