model tuning

2020-11-06 15:35:58 -05:00 · 2020-11-06 15:35:58 -05:00 · 5ecc072f0f
parent f705ce9dce
commit 5ecc072f0f
6 changed files with 53 additions and 27 deletions
--- a/config.js
+++ b/config.js
@ -56,9 +56,9 @@ export default {
      skipFrames: 15,        // how many frames to go without re-running the face bounding box detector, only used for video inputs
                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-      minConfidence: 0.5,    // threshold for discarding a prediction
-      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.8,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      minConfidence: 0.1,    // threshold for discarding a prediction
+      iouThreshold: 0.1,     // threshold for deciding whether boxes overlap too much in non-maximum suppression (0.1 means drop if overlap 10%)
+      scoreThreshold: 0.1,   // threshold for deciding when to remove boxes based on score in non-maximum suppression, this is applied on detection objects only and before minConfidence
    },
    mesh: {
      enabled: true,
@ -73,20 +73,22 @@ export default {
    },
    age: {
      enabled: true,
-      modelPath: '../models/ssrnet-age-imdb.json', // can be 'imdb' or 'wiki'
+      modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki'
                                                   // which determines training set for model
      inputSize: 64,         // fixed value
      skipFrames: 15,        // how many frames to go without re-running the detector, only used for video inputs
    },
    gender: {
      enabled: true,
-      minConfidence: 0.5,    // threshold for discarding a prediction
-      modelPath: '../models/ssrnet-gender-imdb.json',
+      minConfidence: 0.1,    // threshold for discarding a prediction
+      modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki'
+      inputSize: 64,         // fixed value
+      skipFrames: 15,        // how many frames to go without re-running the detector, only used for video inputs
    },
    emotion: {
      enabled: true,
      inputSize: 64,         // fixed value
-      minConfidence: 0.5,    // threshold for discarding a prediction
+      minConfidence: 0.2,    // threshold for discarding a prediction
      skipFrames: 15,        // how many frames to go without re-running the detector
      modelPath: '../models/emotion-large.json', // can be 'mini', 'large'
    },
@ -106,9 +108,9 @@ export default {
    skipFrames: 15,          // how many frames to go without re-running the hand bounding box detector, only used for video inputs
                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
                             // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-    minConfidence: 0.5,      // threshold for discarding a prediction
-    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
-    scoreThreshold: 0.8,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    minConfidence: 0.1,      // threshold for discarding a prediction
+    iouThreshold: 0.2,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
+    scoreThreshold: 0.1,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
    detector: {
--- a/demo/browser.js
+++ b/demo/browser.js
@ -69,7 +69,7 @@ function drawResults(input, result, canvas) {
  // console.log(result.performance);

  // eslint-disable-next-line no-use-before-define
-  requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results
+  if (input.srcObject) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results

  // draw fps chart
  menu.updateChart('FPS', fps);
@ -187,7 +187,7 @@ function runHumanDetect(input, canvas) {
  timeStamp = performance.now();
  // if live video
  const live = input.srcObject && (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused);
-  if (!live) {
+  if (!live && input.srcObject) {
    // if we want to continue and camera not ready, retry in 0.5sec, else just give up
    if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500);
    else log(`camera not ready: track state: ${input.srcObject?.getVideoTracks()[0].readyState} stream state: ${input.readyState}`);
@ -317,6 +317,7 @@ function setupMenu() {
  });
  menu.addRange('Min Confidence', human.config.face.detector, 'minConfidence', 0.0, 1.0, 0.05, (val) => {
    human.config.face.detector.minConfidence = parseFloat(val);
+    human.config.face.gender.minConfidence = parseFloat(val);
    human.config.face.emotion.minConfidence = parseFloat(val);
    human.config.hand.minConfidence = parseFloat(val);
  });
--- a/demo/menu.js
+++ b/demo/menu.js
@ -213,7 +213,7 @@ class Menu {
    el.innerHTML = `<input class="menu-range" type="range" id="${this.newID}" min="${min}" max="${max}" step="${step}" value="${object[variable]}">${title}`;
    this.container.appendChild(el);
    el.addEventListener('change', (evt) => {
-      object[variable] = evt.target.value;
+      object[variable] = parseInt(evt.target.value) === parseFloat(evt.target.value) ? parseInt(evt.target.value) : parseFloat(evt.target.value);
      evt.target.setAttribute('value', evt.target.value);
      if (callback) callback(evt.target.value);
    });
--- a/src/age/ssrnet.js
+++ b/src/age/ssrnet.js
--- a/src/gender/gender.js
+++ b/src/gender/gender.js
@ -4,17 +4,20 @@ const profile = require('../profile.js');
 const models = {};
 let last = { gender: '' };
 let frame = Number.MAX_SAFE_INTEGER;
+let alternative = false;

 // tuning values
 const zoom = [0, 0]; // 0..1 meaning 0%..100%
+const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale

 async function load(config) {
  if (!models.gender) models.gender = await tf.loadGraphModel(config.face.gender.modelPath);
+  alternative = models.gender.inputs[0].shape[3] === 1;
  return models.gender;
 }

 async function predict(image, config) {
-  if ((frame < config.face.age.skipFrames) && last.gender !== '') {
+  if ((frame < config.face.gender.skipFrames) && last.gender !== '') {
    frame += 1;
    return last;
  }
@ -26,9 +29,21 @@ async function predict(image, config) {
      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
+    const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]);
+    let enhance;
+    if (alternative) {
+      enhance = tf.tidy(() => {
+        const [red, green, blue] = tf.split(resize, 3, 3);
+        const redNorm = tf.mul(red, rgb[0]);
+        const greenNorm = tf.mul(green, rgb[1]);
+        const blueNorm = tf.mul(blue, rgb[2]);
+        const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
+        return grayscale.sub(0.5).mul(2);
+      });
+    } else {
+      enhance = tf.mul(resize, [255.0]);
+    }
    // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
-    const enhance = tf.mul(resize, [255.0]);
    tf.dispose(resize);

    let genderT;
@ -46,10 +61,20 @@ async function predict(image, config) {

    if (genderT) {
      const data = genderT.dataSync();
-      const confidence = Math.trunc(Math.abs(1.9 * 100 * (data[0] - 0.5))) / 100;
-      if (confidence > config.face.gender.minConfidence) {
-        obj.gender = data[0] <= 0.5 ? 'female' : 'male';
-        obj.confidence = confidence;
+      if (alternative) {
+        // returns two values 0..1, bigger one is prediction
+        const confidence = Math.trunc(100 * Math.abs(data[0] - data[1])) / 100;
+        if (confidence > config.face.gender.minConfidence) {
+          obj.gender = data[0] > data[1] ? 'female' : 'male';
+          obj.confidence = confidence;
+        }
+      } else {
+        // returns one value 0..1, .5 is prediction threshold
+        const confidence = Math.trunc(200 * Math.abs((data[0] - 0.5))) / 100;
+        if (confidence > config.face.gender.minConfidence) {
+          obj.gender = data[0] <= 0.5 ? 'female' : 'male';
+          obj.confidence = confidence;
+        }
      }
    }
    genderT.dispose();
--- a/src/human.js
+++ b/src/human.js
@ -1,7 +1,7 @@
 const tf = require('@tensorflow/tfjs');
 const facemesh = require('./face/facemesh.js');
-const age = require('./age/ssrnet.js');
-const gender = require('./gender/ssrnet.js');
+const age = require('./age/age.js');
+const gender = require('./gender/gender.js');
 const emotion = require('./emotion/emotion.js');
 const posenet = require('./body/posenet.js');
 const handpose = require('./hand/handpose.js');
@ -13,8 +13,7 @@ const app = require('../package.json');

 // static config override for non-video detection
 const override = {
-  face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, emotion: { skipFrames: 0 } },
-  hand: { skipFrames: 0 },
+  face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, gender: { skipFrames: 0 }, emotion: { skipFrames: 0 } }, hand: { skipFrames: 0 },
 };

 // helper function: gets elapsed time on both browser and nodejs
@ -46,7 +45,6 @@ class Human {
  constructor() {
    this.tf = tf;
    this.version = app.version;
-    this.defaults = defaults;
    this.config = defaults;
    this.fx = null;
    this.state = 'idle';
@ -114,7 +112,7 @@ class Human {
  async load(userConfig) {
    this.state = 'load';
    const timeStamp = now();
-    if (userConfig) this.config = mergeDeep(defaults, userConfig);
+    if (userConfig) this.config = mergeDeep(this.config, userConfig);

    if (this.firstRun) {
      this.checkBackend(true);
@ -300,7 +298,7 @@ class Human {
    let timeStamp;

    // update configuration
-    this.config = mergeDeep(defaults, userConfig);
+    this.config = mergeDeep(this.config, userConfig);
    if (!this.config.videoOptimized) this.config = mergeDeep(this.config, override);

    // sanity checks