From f3bf35533eca1bb71fbb8aa8432a867beb339647 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sat, 17 Oct 2020 07:15:23 -0400 Subject: [PATCH] added state handling --- README.md | 55 ++++++++------- config.js | 4 +- demo/browser.js | 179 ++++++++++++++++++++++++------------------------ src/index.js | 43 ++++++++---- 4 files changed, 151 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index c84b8c0c..1f92f9e4 100644 --- a/README.md +++ b/README.md @@ -133,15 +133,6 @@ const config = { } ``` -Note that when using `Human` in NodeJS, you must load and parse the image *before* you pass it for detection -For example: -```js - const buffer = fs.readFileSync(input); - const image = tf.node.decodeImage(buffer); - const result = human.detect(image, config); - image.dispose(); -``` - ### Weights Pretrained model weights are includes in `./models` @@ -167,34 +158,48 @@ NodeJS: ## Usage `Human` library does not require special initialization. -All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage(and only then, `Human` will not load weights that it doesn't need according to configuration). +All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage +(and only then, `Human` will not load weights that it doesn't need according to configuration). There is only *ONE* method you need: ```js - import * as tf from '@tensorflow/tfjs'; - import human from '@vladmandic/human'; - - // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D - // 'options': optional parameter used to override any options present in default configuration - const result = await human.detect(image, options?) + // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D + // 'config': optional parameter used to override any options present in default configuration + // configuration is fully dynamic and can change between different calls to 'detect()' + const result = await human.detect(image, config?) ``` or if you want to use promises ```js - human.detect(image, options?).then((result) => { + human.detect(image, config?).then((result) => { // your code }) ``` -Additionally, `Human` library exposes several classes: +Additionally, `Human` library exposes several objects and methods: ```js - human.config // access to configuration object, normally set as parameter to detect() - human.defaults // read-only view of default configuration object - human.models // dynamically maintained list of object of any loaded models - human.tf // instance of tfjs used by human + human.config // access to configuration object, normally set as parameter to detect() + human.defaults // read-only view of default configuration object + human.models // dynamically maintained list of object of any loaded models + human.tf // instance of tfjs used by human + human.state // describing current operation in progress + // progresses through: 'config', 'check', 'backend', 'load', 'run:', 'idle' + human.load(config) // explicitly call load method that loads configured models + // if you want to pre-load them instead of on-demand loading during 'human.detect()' +``` + +Note that when using `Human` library in `NodeJS`, you must load and parse the image *before* you pass it for detection and dispose it afterwards + +For example: +```js + const imageFile = '../assets/sample1.jpg'; + const buffer = fs.readFileSync(imageFile); + const image = tf.node.decodeImage(buffer); + const result = human.detect(image, config); + image.dispose(); ```
@@ -213,7 +218,7 @@ Configurtion object is large, but typically you only need to modify few values: ```js -export default { +config = { backend: 'webgl', // select tfjs backend to use console: true, // enable debugging output to console face: { @@ -221,9 +226,9 @@ export default { // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion // note: module is not loaded until it is required detector: { - modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'. + modelPath: '../models/blazeface/back/model.json', // can be 'front' or 'back'. // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces. - inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back' + inputSize: 256, // fixed value: 128 for front and 256 for 'back' maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance skipFrames: 10, // how many frames to go without re-running the face bounding box detector // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis diff --git a/config.js b/config.js index 39448db0..0c985298 100644 --- a/config.js +++ b/config.js @@ -9,9 +9,9 @@ export default { // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion // (note: module is not loaded until it is required) detector: { - modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'. + modelPath: '../models/blazeface/back/model.json', // can be 'front' or 'back'. // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces. - inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back' + inputSize: 256, // fixed value: 128 for front and 256 for 'back' maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance skipFrames: 10, // how many frames to go without re-running the face bounding box detector // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis diff --git a/demo/browser.js b/demo/browser.js index a178d1e3..432fdec5 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -5,13 +5,14 @@ import human from '../dist/human.esm.js'; const ui = { baseColor: 'rgba(255, 200, 255, 0.3)', baseLabel: 'rgba(255, 200, 255, 0.9)', - baseFont: 'small-caps 1.2rem "Segoe UI"', + baseFontProto: 'small-caps {size} "Segoe UI"', baseLineWidth: 16, - baseLineHeight: 2, + baseLineHeightProto: 2, columns: 3, busy: false, facing: 'user', worker: 'worker.js', + samples: ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'], }; const config = { @@ -285,82 +286,6 @@ async function runHumanDetect(input, canvas) { } } -function setupUI() { - // add all variables to ui control panel - settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main')); - const style = document.createElement('style'); - // style.type = 'text/css'; - style.innerHTML = ` - .qs_main { font: 1rem "Segoe UI"; } - .qs_label { font: 0.8rem "Segoe UI"; } - .qs_content { background: darkslategray; } - .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; } - .qs_checkbox_label { top: 2px; } - .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; } - `; - document.getElementsByTagName('head')[0].appendChild(style); - settings.addButton('Play/Pause', () => { - const video = document.getElementById('video'); - const canvas = document.getElementById('canvas'); - if (!video.paused) { - document.getElementById('log').innerText += '\nPaused ...'; - video.pause(); - } else { - document.getElementById('log').innerText += '\nStarting Human Library ...'; - video.play(); - } - runHumanDetect(video, canvas); - }); - settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], async (val) => config.backend = val.value); - settings.addHTML('title', 'Enabled Models'); settings.hideTitle('title'); - settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val); - settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val); - settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val); - settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val); - settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val); - settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val); - settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val); - settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val); - settings.addHTML('title', 'Model Parameters'); settings.hideTitle('title'); - settings.addRange('Max Objects', 1, 20, 5, 1, (val) => { - config.face.detector.maxFaces = parseInt(val); - config.body.maxDetections = parseInt(val); - }); - settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => { - config.face.detector.skipFrames = parseInt(val); - config.face.emotion.skipFrames = parseInt(val); - config.face.age.skipFrames = parseInt(val); - config.hand.skipFrames = parseInt(val); - }); - settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => { - config.face.detector.minConfidence = parseFloat(val); - config.face.emotion.minConfidence = parseFloat(val); - config.hand.minConfidence = parseFloat(val); - }); - settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => { - config.face.detector.scoreThreshold = parseFloat(val); - config.hand.scoreThreshold = parseFloat(val); - config.body.scoreThreshold = parseFloat(val); - }); - settings.addRange('IOU Threshold', 0.1, 1.0, config.face.detector.iouThreshold, 0.05, (val) => { - config.face.detector.iouThreshold = parseFloat(val); - config.hand.iouThreshold = parseFloat(val); - }); - settings.addHTML('title', 'UI Options'); settings.hideTitle('title'); - settings.addBoolean('Use Web Worker', false); - settings.addBoolean('Camera Front/Back', true, (val) => { - ui.facing = val ? 'user' : 'environment'; - // eslint-disable-next-line no-use-before-define - setupCamera(); - }); - settings.addBoolean('Draw Boxes', true); - settings.addBoolean('Draw Points', true); - settings.addBoolean('Draw Polygons', true); - settings.addBoolean('Fill Polygons', true); - settings.addHTML('line1', '
'); settings.hideTitle('line1'); - settings.addRange('FPS', 0, 100, 0, 1); -} - // eslint-disable-next-line no-unused-vars async function setupCamera() { if (ui.busy) return null; @@ -409,7 +334,6 @@ async function processImage(input) { ui.baseLabel = 'rgba(200, 255, 255, 0.8)'; ui.baseFont = 'small-caps 3.5rem "Segoe UI"'; ui.baseLineWidth = 16; - ui.baseLineHeight = 5; ui.columns = 3; const cfg = { backend: 'webgl', @@ -450,28 +374,105 @@ async function processImage(input) { }); } +async function detectVideo() { + document.getElementById('samples').style.display = 'none'; + document.getElementById('canvas').style.display = 'block'; + const video = document.getElementById('video'); + const canvas = document.getElementById('canvas'); + ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.2rem'); + ui.baseLineHeight = ui.baseLineHeightProto; + if (!video.paused) { + document.getElementById('log').innerText += '\nPaused ...'; + video.pause(); + } else { + await setupCamera(); + document.getElementById('log').innerText += '\nStarting Human Library ...'; + video.play(); + } + runHumanDetect(video, canvas); +} + // eslint-disable-next-line no-unused-vars async function detectSampleImages() { - ui.baseFont = 'small-caps 3rem "Segoe UI"'; + ui.baseFont = ui.baseFontProto.replace(/{size}/, `${ui.columns}rem`); + ui.baseLineHeight = ui.baseLineHeightProto * ui.columns; document.getElementById('canvas').style.display = 'none'; + document.getElementById('samples').style.display = 'block'; log('Running detection of sample images'); - const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg']; - for (const sample of samples) await processImage(sample); + for (const sample of ui.samples) await processImage(sample); +} + +function setupUI() { + // add all variables to ui control panel + settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main')); + const style = document.createElement('style'); + style.innerHTML = ` + .qs_main { font: 1rem "Segoe UI"; } + .qs_label { font: 0.8rem "Segoe UI"; } + .qs_content { background: darkslategray; } + .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; } + .qs_checkbox_label { top: 2px; } + .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; } + `; + document.getElementsByTagName('head')[0].appendChild(style); + settings.addButton('Play/Pause WebCam', () => detectVideo()); + settings.addButton('Process Images', () => detectSampleImages()); + settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], async (val) => config.backend = val.value); + settings.addHTML('title', 'Enabled Models'); settings.hideTitle('title'); + settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val); + settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val); + settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val); + settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val); + settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val); + settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val); + settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val); + settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val); + settings.addHTML('title', 'Model Parameters'); settings.hideTitle('title'); + settings.addRange('Max Objects', 1, 20, 5, 1, (val) => { + config.face.detector.maxFaces = parseInt(val); + config.body.maxDetections = parseInt(val); + }); + settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => { + config.face.detector.skipFrames = parseInt(val); + config.face.emotion.skipFrames = parseInt(val); + config.face.age.skipFrames = parseInt(val); + config.hand.skipFrames = parseInt(val); + }); + settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => { + config.face.detector.minConfidence = parseFloat(val); + config.face.emotion.minConfidence = parseFloat(val); + config.hand.minConfidence = parseFloat(val); + }); + settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => { + config.face.detector.scoreThreshold = parseFloat(val); + config.hand.scoreThreshold = parseFloat(val); + config.body.scoreThreshold = parseFloat(val); + }); + settings.addRange('IOU Threshold', 0.1, 1.0, config.face.detector.iouThreshold, 0.05, (val) => { + config.face.detector.iouThreshold = parseFloat(val); + config.hand.iouThreshold = parseFloat(val); + }); + settings.addHTML('title', 'UI Options'); settings.hideTitle('title'); + settings.addBoolean('Use Web Worker', false); + settings.addBoolean('Camera Front/Back', true, (val) => { + ui.facing = val ? 'user' : 'environment'; + setupCamera(); + }); + settings.addBoolean('Draw Boxes', true); + settings.addBoolean('Draw Points', true); + settings.addBoolean('Draw Polygons', true); + settings.addBoolean('Fill Polygons', true); + settings.addHTML('line1', '
'); settings.hideTitle('line1'); + settings.addRange('FPS', 0, 100, 0, 1); } async function main() { log('Human demo starting ...'); - - // setup ui control panel - await setupUI(); + setupUI(); const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`; document.getElementById('log').innerText += '\n' + msg; log(msg); - - // use one of the two: - await setupCamera(); - // await detectSampleImages(); } window.onload = main; diff --git a/src/index.js b/src/index.js index a6b10a79..2cd2403f 100644 --- a/src/index.js +++ b/src/index.js @@ -8,6 +8,7 @@ const defaults = require('../config.js').default; const app = require('../package.json'); let config; +let state = 'idle'; // object that contains all initialized models const models = { @@ -61,10 +62,22 @@ function sanity(input) { return null; } -async function detect(input, userConfig) { +async function load(userConfig) { + if (userConfig) config = mergeDeep(defaults, userConfig); + if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); + if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body); + if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand); + if (config.face.enabled && config.face.age.enabled && !models.age) models.age = await ssrnet.loadAge(config); + if (config.face.enabled && config.face.gender.enabled && !models.gender) models.gender = await ssrnet.loadGender(config); + if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config); +} + +async function detect(input, userConfig = {}) { + state = 'config'; config = mergeDeep(defaults, userConfig); // sanity checks + state = 'check'; const error = sanity(input); if (error) { log(error, input); @@ -79,6 +92,7 @@ async function detect(input, userConfig) { // configure backend if (tf.getBackend() !== config.backend) { + state = 'backend'; log('Human library setting backend:', config.backend); await tf.setBackend(config.backend); await tf.ready(); @@ -91,35 +105,31 @@ async function detect(input, userConfig) { // } // load models if enabled - if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); - if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body); - if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand); - if (config.face.enabled && config.face.age.enabled && !models.age) models.age = await ssrnet.loadAge(config); - if (config.face.enabled && config.face.gender.enabled && !models.gender) models.gender = await ssrnet.loadGender(config); - if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config); + state = 'load'; + await load(); const perf = {}; let timeStamp; - // run posenet - timeStamp = now(); tf.engine().startScope(); + + // run posenet + state = 'run:body'; + timeStamp = now(); const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : []; - tf.engine().endScope(); perf.body = Math.trunc(now() - timeStamp); // run handpose + state = 'run:hand'; timeStamp = now(); - tf.engine().startScope(); const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : []; - tf.engine().endScope(); perf.hand = Math.trunc(now() - timeStamp); // run facemesh, includes blazeface and iris const faceRes = []; if (config.face.enabled) { + state = 'run:face'; timeStamp = now(); - tf.engine().startScope(); const faces = await models.facemesh.estimateFaces(input, config.face); perf.face = Math.trunc(now() - timeStamp); for (const face of faces) { @@ -129,10 +139,12 @@ async function detect(input, userConfig) { continue; } // run ssr-net age & gender, inherits face from blazeface + state = 'run:agegender'; timeStamp = now(); const ssrData = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; perf.agegender = Math.trunc(now() - timeStamp); // run emotion, inherits face from blazeface + state = 'run:emotion'; timeStamp = now(); const emotionData = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {}; perf.emotion = Math.trunc(now() - timeStamp); @@ -154,12 +166,14 @@ async function detect(input, userConfig) { iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, }); } - tf.engine().endScope(); + state = 'idle'; } // set depthwiseconv to original value // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag); + tf.engine().endScope(); + // combine and return results perf.total = Object.values(perf).reduce((a, b) => a + b); resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf }); @@ -176,3 +190,4 @@ exports.posenet = posenet; exports.handpose = handpose; exports.tf = tf; exports.version = app.version; +exports.state = state;