From f3bf35533eca1bb71fbb8aa8432a867beb339647 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 17 Oct 2020 07:15:23 -0400
Subject: [PATCH] added state handling

---
 README.md       |  55 ++++++++-------
 config.js       |   4 +-
 demo/browser.js | 179 ++++++++++++++++++++++++------------------------
 src/index.js    |  43 ++++++++----
 4 files changed, 151 insertions(+), 130 deletions(-)
diff --git a/README.md b/README.md
index c84b8c0c..1f92f9e4 100644
--- a/README.md
+++ b/README.md
@@ -133,15 +133,6 @@ const config = {
 }
 ```
 
-Note that when using `Human` in NodeJS, you must load and parse the image *before* you pass it for detection  
-For example:
-```js
-  const buffer = fs.readFileSync(input);
-  const image = tf.node.decodeImage(buffer);
-  const result = human.detect(image, config);
-  image.dispose();
-```
-
 ### Weights
 
 Pretrained model weights are includes in `./models`  
@@ -167,34 +158,48 @@ NodeJS:
 ## Usage
 
 `Human` library does not require special initialization.
-All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage(and only then, `Human` will not load weights that it doesn't need according to configuration).
+All configuration is done in a single JSON object and all model weights will be dynamically loaded upon their first usage  
+(and only then, `Human` will not load weights that it doesn't need according to configuration).
 
 There is only *ONE* method you need:
 
 ```js
-  import * as tf from '@tensorflow/tfjs';
-  import human from '@vladmandic/human';
-
-  // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
-  // 'options': optional parameter used to override any options present in default configuration
-  const result = await human.detect(image, options?)
+  // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D  
+  // 'config': optional parameter used to override any options present in default configuration  
+  // configuration is fully dynamic and can change between different calls to 'detect()'  
+  const result = await human.detect(image, config?)
 ```
 
 or if you want to use promises
 
 ```js
-  human.detect(image, options?).then((result) => {
+  human.detect(image, config?).then((result) => {
     // your code
   })
 ```
 
-Additionally, `Human` library exposes several classes:
+Additionally, `Human` library exposes several objects and methods:
 
 ```js
-  human.config   // access to configuration object, normally set as parameter to detect()
-  human.defaults // read-only view of default configuration object
-  human.models   // dynamically maintained list of object of any loaded models
-  human.tf       // instance of tfjs used by human
+  human.config        // access to configuration object, normally set as parameter to detect()
+  human.defaults      // read-only view of default configuration object
+  human.models        // dynamically maintained list of object of any loaded models
+  human.tf            // instance of tfjs used by human
+  human.state         // <string> describing current operation in progress
+                      // progresses through: 'config', 'check', 'backend', 'load', 'run:<model>', 'idle'
+  human.load(config)  // explicitly call load method that loads configured models
+                      // if you want to pre-load them instead of on-demand loading during 'human.detect()'
+```
+
+Note that when using `Human` library in `NodeJS`, you must load and parse the image *before* you pass it for detection and dispose it afterwards  
+
+For example:
+```js
+  const imageFile = '../assets/sample1.jpg';
+  const buffer = fs.readFileSync(imageFile);
+  const image = tf.node.decodeImage(buffer);
+  const result = human.detect(image, config);
+  image.dispose();
 ```
 
 <hr>
@@ -213,7 +218,7 @@ Configurtion object is large, but typically you only need to modify few values:
 
 
 ```js
-export default {
+config = {
   backend: 'webgl',          // select tfjs backend to use
   console: true,             // enable debugging output to console
   face: {
@@ -221,9 +226,9 @@ export default {
                              // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
                              // note: module is not loaded until it is required
     detector: {
-      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+      modelPath: '../models/blazeface/back/model.json', // can be 'front' or 'back'.
                                                         // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
-      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+      inputSize: 256,        // fixed value: 128 for front and 256 for 'back'
       maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
       skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
                              // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
diff --git a/config.js b/config.js
index 39448db0..0c985298 100644
--- a/config.js
+++ b/config.js
@@ -9,9 +9,9 @@ export default {
                              // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
                              // (note: module is not loaded until it is required)
     detector: {
-      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+      modelPath: '../models/blazeface/back/model.json', // can be 'front' or 'back'.
                                                         // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
-      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+      inputSize: 256,        // fixed value: 128 for front and 256 for 'back'
       maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
       skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
                              // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
diff --git a/demo/browser.js b/demo/browser.js
index a178d1e3..432fdec5 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -5,13 +5,14 @@ import human from '../dist/human.esm.js';
 const ui = {
   baseColor: 'rgba(255, 200, 255, 0.3)',
   baseLabel: 'rgba(255, 200, 255, 0.9)',
-  baseFont: 'small-caps 1.2rem "Segoe UI"',
+  baseFontProto: 'small-caps {size} "Segoe UI"',
   baseLineWidth: 16,
-  baseLineHeight: 2,
+  baseLineHeightProto: 2,
   columns: 3,
   busy: false,
   facing: 'user',
   worker: 'worker.js',
+  samples: ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'],
 };
 
 const config = {
@@ -285,82 +286,6 @@ async function runHumanDetect(input, canvas) {
   }
 }
 
-function setupUI() {
-  // add all variables to ui control panel
-  settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main'));
-  const style = document.createElement('style');
-  // style.type = 'text/css';
-  style.innerHTML = `
-    .qs_main { font: 1rem "Segoe UI"; }
-    .qs_label { font: 0.8rem "Segoe UI"; }
-    .qs_content { background: darkslategray; }
-    .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; }
-    .qs_checkbox_label { top: 2px; }
-    .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; }
-  `;
-  document.getElementsByTagName('head')[0].appendChild(style);
-  settings.addButton('Play/Pause', () => {
-    const video = document.getElementById('video');
-    const canvas = document.getElementById('canvas');
-    if (!video.paused) {
-      document.getElementById('log').innerText += '\nPaused ...';
-      video.pause();
-    } else {
-      document.getElementById('log').innerText += '\nStarting Human Library ...';
-      video.play();
-    }
-    runHumanDetect(video, canvas);
-  });
-  settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], async (val) => config.backend = val.value);
-  settings.addHTML('title', 'Enabled Models'); settings.hideTitle('title');
-  settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val);
-  settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val);
-  settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val);
-  settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val);
-  settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val);
-  settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val);
-  settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val);
-  settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val);
-  settings.addHTML('title', 'Model Parameters'); settings.hideTitle('title');
-  settings.addRange('Max Objects', 1, 20, 5, 1, (val) => {
-    config.face.detector.maxFaces = parseInt(val);
-    config.body.maxDetections = parseInt(val);
-  });
-  settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => {
-    config.face.detector.skipFrames = parseInt(val);
-    config.face.emotion.skipFrames = parseInt(val);
-    config.face.age.skipFrames = parseInt(val);
-    config.hand.skipFrames = parseInt(val);
-  });
-  settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => {
-    config.face.detector.minConfidence = parseFloat(val);
-    config.face.emotion.minConfidence = parseFloat(val);
-    config.hand.minConfidence = parseFloat(val);
-  });
-  settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => {
-    config.face.detector.scoreThreshold = parseFloat(val);
-    config.hand.scoreThreshold = parseFloat(val);
-    config.body.scoreThreshold = parseFloat(val);
-  });
-  settings.addRange('IOU Threshold', 0.1, 1.0, config.face.detector.iouThreshold, 0.05, (val) => {
-    config.face.detector.iouThreshold = parseFloat(val);
-    config.hand.iouThreshold = parseFloat(val);
-  });
-  settings.addHTML('title', 'UI Options'); settings.hideTitle('title');
-  settings.addBoolean('Use Web Worker', false);
-  settings.addBoolean('Camera Front/Back', true, (val) => {
-    ui.facing = val ? 'user' : 'environment';
-    // eslint-disable-next-line no-use-before-define
-    setupCamera();
-  });
-  settings.addBoolean('Draw Boxes', true);
-  settings.addBoolean('Draw Points', true);
-  settings.addBoolean('Draw Polygons', true);
-  settings.addBoolean('Fill Polygons', true);
-  settings.addHTML('line1', '<hr>'); settings.hideTitle('line1');
-  settings.addRange('FPS', 0, 100, 0, 1);
-}
-
 // eslint-disable-next-line no-unused-vars
 async function setupCamera() {
   if (ui.busy) return null;
@@ -409,7 +334,6 @@ async function processImage(input) {
   ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
   ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
   ui.baseLineWidth = 16;
-  ui.baseLineHeight = 5;
   ui.columns = 3;
   const cfg = {
     backend: 'webgl',
@@ -450,28 +374,105 @@ async function processImage(input) {
   });
 }
 
+async function detectVideo() {
+  document.getElementById('samples').style.display = 'none';
+  document.getElementById('canvas').style.display = 'block';
+  const video = document.getElementById('video');
+  const canvas = document.getElementById('canvas');
+  ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.2rem');
+  ui.baseLineHeight = ui.baseLineHeightProto;
+  if (!video.paused) {
+    document.getElementById('log').innerText += '\nPaused ...';
+    video.pause();
+  } else {
+    await setupCamera();
+    document.getElementById('log').innerText += '\nStarting Human Library ...';
+    video.play();
+  }
+  runHumanDetect(video, canvas);
+}
+
 // eslint-disable-next-line no-unused-vars
 async function detectSampleImages() {
-  ui.baseFont = 'small-caps 3rem "Segoe UI"';
+  ui.baseFont = ui.baseFontProto.replace(/{size}/, `${ui.columns}rem`);
+  ui.baseLineHeight = ui.baseLineHeightProto * ui.columns;
   document.getElementById('canvas').style.display = 'none';
+  document.getElementById('samples').style.display = 'block';
   log('Running detection of sample images');
-  const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
-  for (const sample of samples) await processImage(sample);
+  for (const sample of ui.samples) await processImage(sample);
+}
+
+function setupUI() {
+  // add all variables to ui control panel
+  settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main'));
+  const style = document.createElement('style');
+  style.innerHTML = `
+    .qs_main { font: 1rem "Segoe UI"; }
+    .qs_label { font: 0.8rem "Segoe UI"; }
+    .qs_content { background: darkslategray; }
+    .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; }
+    .qs_checkbox_label { top: 2px; }
+    .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; }
+  `;
+  document.getElementsByTagName('head')[0].appendChild(style);
+  settings.addButton('Play/Pause WebCam', () => detectVideo());
+  settings.addButton('Process Images', () => detectSampleImages());
+  settings.addDropDown('Backend', ['webgl', 'wasm', 'cpu'], async (val) => config.backend = val.value);
+  settings.addHTML('title', 'Enabled Models'); settings.hideTitle('title');
+  settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val);
+  settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val);
+  settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val);
+  settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val);
+  settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val);
+  settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val);
+  settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val);
+  settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val);
+  settings.addHTML('title', 'Model Parameters'); settings.hideTitle('title');
+  settings.addRange('Max Objects', 1, 20, 5, 1, (val) => {
+    config.face.detector.maxFaces = parseInt(val);
+    config.body.maxDetections = parseInt(val);
+  });
+  settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => {
+    config.face.detector.skipFrames = parseInt(val);
+    config.face.emotion.skipFrames = parseInt(val);
+    config.face.age.skipFrames = parseInt(val);
+    config.hand.skipFrames = parseInt(val);
+  });
+  settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => {
+    config.face.detector.minConfidence = parseFloat(val);
+    config.face.emotion.minConfidence = parseFloat(val);
+    config.hand.minConfidence = parseFloat(val);
+  });
+  settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => {
+    config.face.detector.scoreThreshold = parseFloat(val);
+    config.hand.scoreThreshold = parseFloat(val);
+    config.body.scoreThreshold = parseFloat(val);
+  });
+  settings.addRange('IOU Threshold', 0.1, 1.0, config.face.detector.iouThreshold, 0.05, (val) => {
+    config.face.detector.iouThreshold = parseFloat(val);
+    config.hand.iouThreshold = parseFloat(val);
+  });
+  settings.addHTML('title', 'UI Options'); settings.hideTitle('title');
+  settings.addBoolean('Use Web Worker', false);
+  settings.addBoolean('Camera Front/Back', true, (val) => {
+    ui.facing = val ? 'user' : 'environment';
+    setupCamera();
+  });
+  settings.addBoolean('Draw Boxes', true);
+  settings.addBoolean('Draw Points', true);
+  settings.addBoolean('Draw Polygons', true);
+  settings.addBoolean('Fill Polygons', true);
+  settings.addHTML('line1', '<hr>'); settings.hideTitle('line1');
+  settings.addRange('FPS', 0, 100, 0, 1);
 }
 
 async function main() {
   log('Human demo starting ...');
-
-  // setup ui control panel
-  await setupUI();
+  setupUI();
 
   const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
   document.getElementById('log').innerText += '\n' + msg;
   log(msg);
-
-  // use one of the two:
-  await setupCamera();
-  // await detectSampleImages();
 }
 
 window.onload = main;
diff --git a/src/index.js b/src/index.js
index a6b10a79..2cd2403f 100644
--- a/src/index.js
+++ b/src/index.js
@@ -8,6 +8,7 @@ const defaults = require('../config.js').default;
 const app = require('../package.json');
 
 let config;
+let state = 'idle';
 
 // object that contains all initialized models
 const models = {
@@ -61,10 +62,22 @@ function sanity(input) {
   return null;
 }
 
-async function detect(input, userConfig) {
+async function load(userConfig) {
+  if (userConfig) config = mergeDeep(defaults, userConfig);
+  if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
+  if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
+  if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
+  if (config.face.enabled && config.face.age.enabled && !models.age) models.age = await ssrnet.loadAge(config);
+  if (config.face.enabled && config.face.gender.enabled && !models.gender) models.gender = await ssrnet.loadGender(config);
+  if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
+}
+
+async function detect(input, userConfig = {}) {
+  state = 'config';
   config = mergeDeep(defaults, userConfig);
 
   // sanity checks
+  state = 'check';
   const error = sanity(input);
   if (error) {
     log(error, input);
@@ -79,6 +92,7 @@ async function detect(input, userConfig) {
 
     // configure backend
     if (tf.getBackend() !== config.backend) {
+      state = 'backend';
       log('Human library setting backend:', config.backend);
       await tf.setBackend(config.backend);
       await tf.ready();
@@ -91,35 +105,31 @@ async function detect(input, userConfig) {
     // }
 
     // load models if enabled
-    if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
-    if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
-    if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
-    if (config.face.enabled && config.face.age.enabled && !models.age) models.age = await ssrnet.loadAge(config);
-    if (config.face.enabled && config.face.gender.enabled && !models.gender) models.gender = await ssrnet.loadGender(config);
-    if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
+    state = 'load';
+    await load();
 
     const perf = {};
     let timeStamp;
 
-    // run posenet
-    timeStamp = now();
     tf.engine().startScope();
+
+    // run posenet
+    state = 'run:body';
+    timeStamp = now();
     const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
-    tf.engine().endScope();
     perf.body = Math.trunc(now() - timeStamp);
 
     // run handpose
+    state = 'run:hand';
     timeStamp = now();
-    tf.engine().startScope();
     const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
-    tf.engine().endScope();
     perf.hand = Math.trunc(now() - timeStamp);
 
     // run facemesh, includes blazeface and iris
     const faceRes = [];
     if (config.face.enabled) {
+      state = 'run:face';
       timeStamp = now();
-      tf.engine().startScope();
       const faces = await models.facemesh.estimateFaces(input, config.face);
       perf.face = Math.trunc(now() - timeStamp);
       for (const face of faces) {
@@ -129,10 +139,12 @@ async function detect(input, userConfig) {
           continue;
         }
         // run ssr-net age & gender, inherits face from blazeface
+        state = 'run:agegender';
         timeStamp = now();
         const ssrData = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
         perf.agegender = Math.trunc(now() - timeStamp);
         // run emotion, inherits face from blazeface
+        state = 'run:emotion';
         timeStamp = now();
         const emotionData = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {};
         perf.emotion = Math.trunc(now() - timeStamp);
@@ -154,12 +166,14 @@ async function detect(input, userConfig) {
           iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
         });
       }
-      tf.engine().endScope();
+      state = 'idle';
     }
 
     // set depthwiseconv to original value
     // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
 
+    tf.engine().endScope();
+
     // combine and return results
     perf.total = Object.values(perf).reduce((a, b) => a + b);
     resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf });
@@ -176,3 +190,4 @@ exports.posenet = posenet;
 exports.handpose = handpose;
 exports.tf = tf;
 exports.version = app.version;
+exports.state = state;