From c82b1698d533eeaf8813f554b464ca64901f2ae7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 16 Oct 2020 15:04:51 -0400
Subject: [PATCH] updated examples plus  bugfixes

---
 README.md        |  82 ++++++++++++++++++++++++------------
 demo/demo-esm.js | 105 +++++++++++++++++++++++++++++++++--------------
 src/config.js    |  76 +++++++++++++++++++---------------
 src/index.js     |  13 +++---
 4 files changed, 181 insertions(+), 95 deletions(-)
diff --git a/README.md b/README.md
index 050e6b8f..9be06d20 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!
 
 <hr>
 
-**Example using static image:**  
-![Example Using Image](demo/sample-image.jpg)
+## Examples
 
-**Example using webcam:**  
-![Example Using WebCam](demo/sample-video.jpg)
+**Using static images:**  
+![Example Using Image](assets/screenshot1.jpg)
+
+**Using webcam:**  
+![Example Using WebCam](assets/screenshot2.jpg)
 
 <hr>
 
@@ -211,59 +213,85 @@ Below is output of `human.defaults` object
 Any property can be overriden by passing user object during `human.detect()`  
 Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration  
 
+Configurtion object is large, but typically you only need to modify few values:
+
+- `enabled`: Choose which models to use
+- `skipFrames`: Must be set to 0 for static images
+- `modelPath`: Update as needed to reflect your application's relative path
+
+
 ```js
-human.defaults = {
-  console: true,            // enable debugging output to console
-  backend: 'webgl',         // select tfjs backend to use
+export default {
+  backend: 'webgl',          // select tfjs backend to use
+  console: true,             // enable debugging output to console
   face: {
-    enabled: true,          // controls if specified modul is enabled (note: module is not loaded until it is required)
+    enabled: true,           // controls if specified modul is enabled
+                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+                             // note: module is not loaded until it is required
     detector: {
-      modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
-      inputSize: 128,       // 128 for tfhub and front models, 256 for back
-      maxFaces: 10,         // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
-      minConfidence: 0.5,   // threshold for discarding a prediction
-      iouThreshold: 0.3,    // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.7,  // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+      minConfidence: 0.5,    // threshold for discarding a prediction
+      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
+      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
     },
     mesh: {
       enabled: true,
       modelPath: '../models/facemesh/model.json',
+      inputSize: 192,        // fixed value
     },
     iris: {
       enabled: true,
       modelPath: '../models/iris/model.json',
+      enlargeFactor: 2.3,    // empiric tuning
+      inputSize: 64,         // fixed value
     },
     age: {
       enabled: true,
       modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+                                                         // which determines training set for model
+      inputSize: 64,         // fixed value
+      skipFrames: 10,        // how many frames to go without re-running the detector
     },
     gender: {
       enabled: true,
-      modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
+      minConfidence: 0.8,    // threshold for discarding a prediction
+      modelPath: '../models/ssrnet-gender/imdb/model.json',
     },
     emotion: {
       enabled: true,
-      minConfidence: 0.5,   // threshold for discarding a prediction
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
-      useGrayscale: true,   // convert color input to grayscale before processing or use single channels when color input is not supported
+      inputSize: 64,         // fixed value
+      minConfidence: 0.5,    // threshold for discarding a prediction
+      skipFrames: 10,        // how many frames to go without re-running the detector
+      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
       modelPath: '../models/emotion/model.json',
     },
   },
   body: {
     enabled: true,
     modelPath: '../models/posenet/model.json',
-    maxDetections: 5,       // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance  
-    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
-    nmsRadius: 20,          // radius for deciding points are too close in non-maximum suppression
+    inputResolution: 257,    // fixed value
+    outputStride: 16,        // fixed value
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
   },
   hand: {
     enabled: true,
-    skipFrames: 10,         // how many frames to skip before re-running bounding box detection
-    minConfidence: 0.5,     // threshold for discarding a prediction
-    iouThreshold: 0.3,      // threshold for deciding whether boxes overlap too much in non-maximum suppression
-    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    inputSize: 256,          // fixed value
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+    minConfidence: 0.5,      // threshold for discarding a prediction
+    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
+    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
     detector: {
       anchors: '../models/handdetect/anchors.json',
       modelPath: '../models/handdetect/model.json',
diff --git a/demo/demo-esm.js b/demo/demo-esm.js
index 73b64bca..7a505050 100644
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@@ -4,9 +4,11 @@ import human from '../dist/human.esm.js';
 
 const ui = {
   baseColor: 'rgba(255, 200, 255, 0.3)',
-  baseLabel: 'rgba(255, 200, 255, 0.8)',
+  baseLabel: 'rgba(255, 200, 255, 0.9)',
   baseFont: 'small-caps 1.2rem "Segoe UI"',
   baseLineWidth: 16,
+  baseLineHeight: 2,
+  columns: 3,
   busy: false,
   facing: 'user',
 };
@@ -23,8 +25,8 @@ const config = {
     gender: { enabled: true },
     emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
   },
-  body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
-  hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
+  body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
 let settings;
 let worker;
@@ -49,20 +51,23 @@ const log = (...msg) => {
 async function drawFace(result, canvas) {
   if (!result) return;
   const ctx = canvas.getContext('2d');
-  ctx.strokeStyle = ui.baseColor;
-  ctx.font = ui.baseFont;
   for (const face of result) {
+    ctx.font = ui.baseFont;
+    ctx.strokeStyle = ui.baseColor;
     ctx.fillStyle = ui.baseColor;
     ctx.lineWidth = ui.baseLineWidth;
     ctx.beginPath();
     if (settings.getValue('Draw Boxes')) {
       ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
     }
-    const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
-    const labelIris = face.iris ? `iris: ${face.iris}` : '';
-    const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
+    // silly hack since fillText does not suport new line
+    const labels = [];
+    if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`);
+    if (face.age) labels.push(`Age:${face.age || ''}`);
+    if (face.iris) labels.push(`iris: ${face.iris}`);
+    if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`);
     ctx.fillStyle = ui.baseLabel;
-    ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
+    for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight));
     ctx.stroke();
     ctx.lineWidth = 1;
     if (face.mesh) {
@@ -102,11 +107,11 @@ async function drawFace(result, canvas) {
 async function drawBody(result, canvas) {
   if (!result) return;
   const ctx = canvas.getContext('2d');
-  ctx.fillStyle = ui.baseColor;
-  ctx.strokeStyle = ui.baseColor;
-  ctx.font = ui.baseFont;
-  ctx.lineWidth = ui.baseLineWidth;
   for (const pose of result) {
+    ctx.fillStyle = ui.baseColor;
+    ctx.strokeStyle = ui.baseColor;
+    ctx.font = ui.baseFont;
+    ctx.lineWidth = ui.baseLineWidth;
     if (settings.getValue('Draw Points')) {
       for (const point of pose.keypoints) {
         ctx.beginPath();
@@ -164,13 +169,13 @@ async function drawBody(result, canvas) {
 async function drawHand(result, canvas) {
   if (!result) return;
   const ctx = canvas.getContext('2d');
-  ctx.font = ui.baseFont;
-  ctx.lineWidth = ui.baseLineWidth;
-  window.result = result;
   for (const hand of result) {
+    ctx.font = ui.baseFont;
+    ctx.lineWidth = ui.baseLineWidth;
     if (settings.getValue('Draw Boxes')) {
       ctx.lineWidth = ui.baseLineWidth;
       ctx.beginPath();
+      ctx.strokeStyle = ui.baseColor;
       ctx.fillStyle = ui.baseColor;
       ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
       ctx.fillStyle = ui.baseLabel;
@@ -398,34 +403,74 @@ async function setupCamera() {
   });
 }
 
-// eslint-disable-next-line no-unused-vars
-async function setupImage() {
-  const image = document.getElementById('image');
-  image.width = window.innerWidth;
-  image.height = window.innerHeight;
+async function processImage(input) {
+  ui.baseColor = 'rgba(200, 255, 255, 0.5)';
+  ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
+  ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
+  ui.baseLineWidth = 16;
+  ui.baseLineHeight = 5;
+  ui.columns = 3;
+  const cfg = {
+    backend: 'webgl',
+    console: true,
+    face: {
+      enabled: true,
+      detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 },
+      mesh: { enabled: true },
+      iris: { enabled: true },
+      age: { enabled: true, skipFrames: 0 },
+      gender: { enabled: true },
+      emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true },
+    },
+    body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+    hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 },
+  };
   return new Promise((resolve) => {
-    image.onload = () => resolve(image);
-    image.src = 'sample.jpg';
+    const image = document.getElementById('image');
+    image.onload = async () => {
+      log('Processing image:', image.src);
+      const canvas = document.getElementById('canvas');
+      image.width = image.naturalWidth;
+      image.height = image.naturalHeight;
+      canvas.width = image.naturalWidth;
+      canvas.height = image.naturalHeight;
+      const result = await human.detect(image, cfg);
+      await drawResults(image, result, canvas);
+      const thumb = document.createElement('canvas');
+      thumb.width = window.innerWidth / (ui.columns + 0.02);
+      thumb.height = canvas.height / (window.innerWidth / thumb.width);
+      const ctx = thumb.getContext('2d');
+      ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height);
+      document.getElementById('samples').appendChild(thumb);
+      image.src = '';
+      resolve(true);
+    };
+    image.src = input;
   });
 }
 
+// eslint-disable-next-line no-unused-vars
+async function detectSampleImages() {
+  ui.baseFont = 'small-caps 3rem "Segoe UI"';
+  document.getElementById('canvas').style.display = 'none';
+  log('Running detection of sample images');
+  const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
+  for (const sample of samples) await processImage(sample);
+}
+
 async function main() {
   log('Human demo starting ...');
 
   // setup ui control panel
   await setupUI();
-  // setup webcam
-  await setupCamera();
-
-  // or setup image
-  // const input = await setupImage();
 
   const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
   document.getElementById('log').innerText += '\n' + msg;
   log(msg);
 
-  // run actual detection. if input is video, it will run in a loop else it will run only once
-  // runHumanDetect(video, canvas);
+  // use one of the two:
+  await setupCamera();
+  // await detectSampleImages();
 }
 
 window.onload = main;
diff --git a/src/config.js b/src/config.js
index 1b4b00db..39448db0 100644
--- a/src/config.js
+++ b/src/config.js
@@ -1,65 +1,77 @@
+/* eslint-disable indent */
+/* eslint-disable no-multi-spaces */
+
 export default {
-  backend: 'webgl',
-  console: true,
+  backend: 'webgl',          // select tfjs backend to use
+  console: true,             // enable debugging output to console
   face: {
-    enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
+    enabled: true,           // controls if specified modul is enabled
+                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+                             // (note: module is not loaded until it is required)
     detector: {
-      modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back
-      inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back
-      maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
-      skipFrames: 10, // how many frames to go without running the bounding box detector
-      minConfidence: 0.5, // threshold for discarding a prediction
-      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+      minConfidence: 0.5,    // threshold for discarding a prediction
+      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
+      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
     },
     mesh: {
       enabled: true,
       modelPath: '../models/facemesh/model.json',
-      inputSize: 192, // fixed value
+      inputSize: 192,        // fixed value
     },
     iris: {
       enabled: true,
       modelPath: '../models/iris/model.json',
-      enlargeFactor: 2.3, // empiric tuning
-      inputSize: 64, // fixed value
+      enlargeFactor: 2.3,    // empiric tuning
+      inputSize: 64,         // fixed value
     },
     age: {
       enabled: true,
-      modelPath: '../models/ssrnet-age/imdb/model.json',
-      inputSize: 64, // fixed value
-      skipFrames: 10,
+      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
+                                                         // which determines training set for model
+      inputSize: 64,         // fixed value
+      skipFrames: 10,        // how many frames to go without re-running the detector
     },
     gender: {
       enabled: true,
+      minConfidence: 0.8,    // threshold for discarding a prediction
       modelPath: '../models/ssrnet-gender/imdb/model.json',
     },
     emotion: {
       enabled: true,
-      inputSize: 64, // fixed value
-      minConfidence: 0.5,
-      skipFrames: 10,
-      useGrayscale: true,
+      inputSize: 64,         // fixed value
+      minConfidence: 0.5,    // threshold for discarding a prediction
+      skipFrames: 10,        // how many frames to go without re-running the detector
+      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
       modelPath: '../models/emotion/model.json',
     },
   },
   body: {
     enabled: true,
     modelPath: '../models/posenet/model.json',
-    inputResolution: 257, // fixed value
-    outputStride: 16, // fixed value
-    maxDetections: 5,
-    scoreThreshold: 0.7,
-    nmsRadius: 20,
+    inputResolution: 257,    // fixed value
+    outputStride: 16,        // fixed value
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
   },
   hand: {
     enabled: true,
-    inputSize: 256, // fixed value
-    skipFrames: 10,
-    minConfidence: 0.5,
-    iouThreshold: 0.3,
-    scoreThreshold: 0.7,
-    enlargeFactor: 1.65, // empiric tuning
-    maxHands: 2,
+    inputSize: 256,          // fixed value
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+    minConfidence: 0.5,      // threshold for discarding a prediction
+    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
+    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
     detector: {
       anchors: '../models/handdetect/anchors.json',
       modelPath: '../models/handdetect/model.json',
diff --git a/src/index.js b/src/index.js
index 1ea656b2..2456104d 100644
--- a/src/index.js
+++ b/src/index.js
@@ -84,11 +84,11 @@ async function detect(input, userConfig) {
       await tf.ready();
     }
     // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
-    let savedWebglPackDepthwiseConvFlag;
-    if (tf.getBackend() === 'webgl') {
-      savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
-      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
-    }
+    // let savedWebglPackDepthwiseConvFlag;
+    // if (tf.getBackend() === 'webgl') {
+    //   savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
+    //  tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
+    // }
 
     // load models if enabled
     if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@@ -149,6 +149,7 @@ async function detect(input, userConfig) {
           annotations: face.annotations,
           age: ssrData.age,
           gender: ssrData.gender,
+          agConfidence: ssrData.confidence,
           emotion: emotionData,
           iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
         });
@@ -157,7 +158,7 @@ async function detect(input, userConfig) {
     }
 
     // set depthwiseconv to original value
-    tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+    // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
 
     // combine and return results
     perf.total = Object.values(perf).reduce((a, b) => a + b);