From 5ecc072f0f87c80131ba582ac44d21570099dfcb Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 6 Nov 2020 15:35:58 -0500
Subject: [PATCH] model tuning

---
 config.js                           | 22 ++++++++--------
 demo/browser.js                     |  5 ++--
 demo/menu.js                        |  2 +-
 src/age/{ssrnet.js => age.js}       |  0
 src/gender/{ssrnet.js => gender.js} | 39 +++++++++++++++++++++++------
 src/human.js                        | 12 ++++-----
 6 files changed, 53 insertions(+), 27 deletions(-)
 rename src/age/{ssrnet.js => age.js} (100%)
 rename src/gender/{ssrnet.js => gender.js} (53%)

diff --git a/config.js b/config.js
index 68381f3b..0f218540 100644
--- a/config.js
+++ b/config.js
@@ -56,9 +56,9 @@ export default {
       skipFrames: 15,        // how many frames to go without re-running the face bounding box detector, only used for video inputs
                              // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
                              // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-      minConfidence: 0.5,    // threshold for discarding a prediction
-      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.8,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      minConfidence: 0.1,    // threshold for discarding a prediction
+      iouThreshold: 0.1,     // threshold for deciding whether boxes overlap too much in non-maximum suppression (0.1 means drop if overlap 10%)
+      scoreThreshold: 0.1,   // threshold for deciding when to remove boxes based on score in non-maximum suppression, this is applied on detection objects only and before minConfidence
     },
     mesh: {
       enabled: true,
@@ -73,20 +73,22 @@ export default {
     },
     age: {
       enabled: true,
-      modelPath: '../models/ssrnet-age-imdb.json', // can be 'imdb' or 'wiki'
+      modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki'
                                                    // which determines training set for model
       inputSize: 64,         // fixed value
       skipFrames: 15,        // how many frames to go without re-running the detector, only used for video inputs
     },
     gender: {
       enabled: true,
-      minConfidence: 0.5,    // threshold for discarding a prediction
-      modelPath: '../models/ssrnet-gender-imdb.json',
+      minConfidence: 0.1,    // threshold for discarding a prediction
+      modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki'
+      inputSize: 64,         // fixed value
+      skipFrames: 15,        // how many frames to go without re-running the detector, only used for video inputs
     },
     emotion: {
       enabled: true,
       inputSize: 64,         // fixed value
-      minConfidence: 0.5,    // threshold for discarding a prediction
+      minConfidence: 0.2,    // threshold for discarding a prediction
       skipFrames: 15,        // how many frames to go without re-running the detector
       modelPath: '../models/emotion-large.json', // can be 'mini', 'large'
     },
@@ -106,9 +108,9 @@ export default {
     skipFrames: 15,          // how many frames to go without re-running the hand bounding box detector, only used for video inputs
                              // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
                              // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-    minConfidence: 0.5,      // threshold for discarding a prediction
-    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
-    scoreThreshold: 0.8,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    minConfidence: 0.1,      // threshold for discarding a prediction
+    iouThreshold: 0.2,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
+    scoreThreshold: 0.1,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
     enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
     maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
     detector: {
diff --git a/demo/browser.js b/demo/browser.js
index f2015ba8..90277c1e 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -69,7 +69,7 @@ function drawResults(input, result, canvas) {
   // console.log(result.performance);
 
   // eslint-disable-next-line no-use-before-define
-  requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results
+  if (input.srcObject) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results
 
   // draw fps chart
   menu.updateChart('FPS', fps);
@@ -187,7 +187,7 @@ function runHumanDetect(input, canvas) {
   timeStamp = performance.now();
   // if live video
   const live = input.srcObject && (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused);
-  if (!live) {
+  if (!live && input.srcObject) {
     // if we want to continue and camera not ready, retry in 0.5sec, else just give up
     if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500);
     else log(`camera not ready: track state: ${input.srcObject?.getVideoTracks()[0].readyState} stream state: ${input.readyState}`);
@@ -317,6 +317,7 @@ function setupMenu() {
   });
   menu.addRange('Min Confidence', human.config.face.detector, 'minConfidence', 0.0, 1.0, 0.05, (val) => {
     human.config.face.detector.minConfidence = parseFloat(val);
+    human.config.face.gender.minConfidence = parseFloat(val);
     human.config.face.emotion.minConfidence = parseFloat(val);
     human.config.hand.minConfidence = parseFloat(val);
   });
diff --git a/demo/menu.js b/demo/menu.js
index e3758785..9375859f 100644
--- a/demo/menu.js
+++ b/demo/menu.js
@@ -213,7 +213,7 @@ class Menu {
     el.innerHTML = `<input class="menu-range" type="range" id="${this.newID}" min="${min}" max="${max}" step="${step}" value="${object[variable]}">${title}`;
     this.container.appendChild(el);
     el.addEventListener('change', (evt) => {
-      object[variable] = evt.target.value;
+      object[variable] = parseInt(evt.target.value) === parseFloat(evt.target.value) ? parseInt(evt.target.value) : parseFloat(evt.target.value);
       evt.target.setAttribute('value', evt.target.value);
       if (callback) callback(evt.target.value);
     });
diff --git a/src/age/ssrnet.js b/src/age/age.js
similarity index 100%
rename from src/age/ssrnet.js
rename to src/age/age.js
diff --git a/src/gender/ssrnet.js b/src/gender/gender.js
similarity index 53%
rename from src/gender/ssrnet.js
rename to src/gender/gender.js
index fcc5ec52..5524d18a 100644
--- a/src/gender/ssrnet.js
+++ b/src/gender/gender.js
@@ -4,17 +4,20 @@ const profile = require('../profile.js');
 const models = {};
 let last = { gender: '' };
 let frame = Number.MAX_SAFE_INTEGER;
+let alternative = false;
 
 // tuning values
 const zoom = [0, 0]; // 0..1 meaning 0%..100%
+const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale
 
 async function load(config) {
   if (!models.gender) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
+  alternative = models.gender.inputs[0].shape[3] === 1;
   return models.gender;
 }
 
 async function predict(image, config) {
-  if ((frame < config.face.age.skipFrames) && last.gender !== '') {
+  if ((frame < config.face.gender.skipFrames) && last.gender !== '') {
     frame += 1;
     return last;
   }
@@ -26,9 +29,21 @@ async function predict(image, config) {
       (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
       (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
     ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
+    const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]);
+    let enhance;
+    if (alternative) {
+      enhance = tf.tidy(() => {
+        const [red, green, blue] = tf.split(resize, 3, 3);
+        const redNorm = tf.mul(red, rgb[0]);
+        const greenNorm = tf.mul(green, rgb[1]);
+        const blueNorm = tf.mul(blue, rgb[2]);
+        const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
+        return grayscale.sub(0.5).mul(2);
+      });
+    } else {
+      enhance = tf.mul(resize, [255.0]);
+    }
     // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
-    const enhance = tf.mul(resize, [255.0]);
     tf.dispose(resize);
 
     let genderT;
@@ -46,10 +61,20 @@ async function predict(image, config) {
 
     if (genderT) {
       const data = genderT.dataSync();
-      const confidence = Math.trunc(Math.abs(1.9 * 100 * (data[0] - 0.5))) / 100;
-      if (confidence > config.face.gender.minConfidence) {
-        obj.gender = data[0] <= 0.5 ? 'female' : 'male';
-        obj.confidence = confidence;
+      if (alternative) {
+        // returns two values 0..1, bigger one is prediction
+        const confidence = Math.trunc(100 * Math.abs(data[0] - data[1])) / 100;
+        if (confidence > config.face.gender.minConfidence) {
+          obj.gender = data[0] > data[1] ? 'female' : 'male';
+          obj.confidence = confidence;
+        }
+      } else {
+        // returns one value 0..1, .5 is prediction threshold
+        const confidence = Math.trunc(200 * Math.abs((data[0] - 0.5))) / 100;
+        if (confidence > config.face.gender.minConfidence) {
+          obj.gender = data[0] <= 0.5 ? 'female' : 'male';
+          obj.confidence = confidence;
+        }
       }
     }
     genderT.dispose();
diff --git a/src/human.js b/src/human.js
index 13b6ecab..69406544 100644
--- a/src/human.js
+++ b/src/human.js
@@ -1,7 +1,7 @@
 const tf = require('@tensorflow/tfjs');
 const facemesh = require('./face/facemesh.js');
-const age = require('./age/ssrnet.js');
-const gender = require('./gender/ssrnet.js');
+const age = require('./age/age.js');
+const gender = require('./gender/gender.js');
 const emotion = require('./emotion/emotion.js');
 const posenet = require('./body/posenet.js');
 const handpose = require('./hand/handpose.js');
@@ -13,8 +13,7 @@ const app = require('../package.json');
 
 // static config override for non-video detection
 const override = {
-  face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, emotion: { skipFrames: 0 } },
-  hand: { skipFrames: 0 },
+  face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, gender: { skipFrames: 0 }, emotion: { skipFrames: 0 } }, hand: { skipFrames: 0 },
 };
 
 // helper function: gets elapsed time on both browser and nodejs
@@ -46,7 +45,6 @@ class Human {
   constructor() {
     this.tf = tf;
     this.version = app.version;
-    this.defaults = defaults;
     this.config = defaults;
     this.fx = null;
     this.state = 'idle';
@@ -114,7 +112,7 @@ class Human {
   async load(userConfig) {
     this.state = 'load';
     const timeStamp = now();
-    if (userConfig) this.config = mergeDeep(defaults, userConfig);
+    if (userConfig) this.config = mergeDeep(this.config, userConfig);
 
     if (this.firstRun) {
       this.checkBackend(true);
@@ -300,7 +298,7 @@ class Human {
     let timeStamp;
 
     // update configuration
-    this.config = mergeDeep(defaults, userConfig);
+    this.config = mergeDeep(this.config, userConfig);
     if (!this.config.videoOptimized) this.config = mergeDeep(this.config, override);
 
     // sanity checks