updated examples plus bugfixes

2020-10-16 15:04:51 -04:00 · 2020-10-16 15:04:51 -04:00 · c82b1698d5
parent 924eb3eb25
commit c82b1698d5
4 changed files with 181 additions and 95 deletions
--- a/README.md
+++ b/README.md
@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!

 <hr>

-**Example using static image:**  
-![Example Using Image](demo/sample-image.jpg)
+## Examples

-**Example using webcam:**  
-![Example Using WebCam](demo/sample-video.jpg)
+**Using static images:**  
+![Example Using Image](assets/screenshot1.jpg)
+
+**Using webcam:**  
+![Example Using WebCam](assets/screenshot2.jpg)

 <hr>

@ -211,17 +213,29 @@ Below is output of `human.defaults` object
 Any property can be overriden by passing user object during `human.detect()`  
 Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration  

+Configurtion object is large, but typically you only need to modify few values:
+
+- `enabled`: Choose which models to use
+- `skipFrames`: Must be set to 0 for static images
+- `modelPath`: Update as needed to reflect your application's relative path
+
+
 ```js
-human.defaults = {
-  console: true,            // enable debugging output to console
+export default {
  backend: 'webgl',          // select tfjs backend to use
+  console: true,             // enable debugging output to console
  face: {
-    enabled: true,          // controls if specified modul is enabled (note: module is not loaded until it is required)
+    enabled: true,           // controls if specified modul is enabled
+                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+                             // note: module is not loaded until it is required
    detector: {
-      modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
-      inputSize: 128,       // 128 for tfhub and front models, 256 for back
-      maxFaces: 10,         // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
      minConfidence: 0.5,    // threshold for discarding a prediction
      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
@ -229,41 +243,55 @@ human.defaults = {
    mesh: {
      enabled: true,
      modelPath: '../models/facemesh/model.json',
+      inputSize: 192,        // fixed value
    },
    iris: {
      enabled: true,
      modelPath: '../models/iris/model.json',
+      enlargeFactor: 2.3,    // empiric tuning
+      inputSize: 64,         // fixed value
    },
    age: {
      enabled: true,
      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+                                                         // which determines training set for model
+      inputSize: 64,         // fixed value
+      skipFrames: 10,        // how many frames to go without re-running the detector
    },
    gender: {
      enabled: true,
-      modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
+      minConfidence: 0.8,    // threshold for discarding a prediction
+      modelPath: '../models/ssrnet-gender/imdb/model.json',
    },
    emotion: {
      enabled: true,
+      inputSize: 64,         // fixed value
      minConfidence: 0.5,    // threshold for discarding a prediction
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
-      useGrayscale: true,   // convert color input to grayscale before processing or use single channels when color input is not supported
+      skipFrames: 10,        // how many frames to go without re-running the detector
+      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
  body: {
    enabled: true,
    modelPath: '../models/posenet/model.json',
-    maxDetections: 5,       // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance  
+    inputResolution: 257,    // fixed value
+    outputStride: 16,        // fixed value
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
  },
  hand: {
    enabled: true,
-    skipFrames: 10,         // how many frames to skip before re-running bounding box detection
+    inputSize: 256,          // fixed value
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
    minConfidence: 0.5,      // threshold for discarding a prediction
    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
+    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -4,9 +4,11 @@ import human from '../dist/human.esm.js';

 const ui = {
  baseColor: 'rgba(255, 200, 255, 0.3)',
-  baseLabel: 'rgba(255, 200, 255, 0.8)',
+  baseLabel: 'rgba(255, 200, 255, 0.9)',
  baseFont: 'small-caps 1.2rem "Segoe UI"',
  baseLineWidth: 16,
+  baseLineHeight: 2,
+  columns: 3,
  busy: false,
  facing: 'user',
 };
@ -23,8 +25,8 @@ const config = {
    gender: { enabled: true },
    emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
  },
-  body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
-  hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
+  body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
 let settings;
 let worker;
@ -49,20 +51,23 @@ const log = (...msg) => {
 async function drawFace(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
-  ctx.strokeStyle = ui.baseColor;
-  ctx.font = ui.baseFont;
  for (const face of result) {
+    ctx.font = ui.baseFont;
+    ctx.strokeStyle = ui.baseColor;
    ctx.fillStyle = ui.baseColor;
    ctx.lineWidth = ui.baseLineWidth;
    ctx.beginPath();
    if (settings.getValue('Draw Boxes')) {
      ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
    }
-    const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
-    const labelIris = face.iris ? `iris: ${face.iris}` : '';
-    const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
+    // silly hack since fillText does not suport new line
+    const labels = [];
+    if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`);
+    if (face.age) labels.push(`Age:${face.age || ''}`);
+    if (face.iris) labels.push(`iris: ${face.iris}`);
+    if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`);
    ctx.fillStyle = ui.baseLabel;
-    ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
+    for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight));
    ctx.stroke();
    ctx.lineWidth = 1;
    if (face.mesh) {
@ -102,11 +107,11 @@ async function drawFace(result, canvas) {
 async function drawBody(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
+  for (const pose of result) {
    ctx.fillStyle = ui.baseColor;
    ctx.strokeStyle = ui.baseColor;
    ctx.font = ui.baseFont;
    ctx.lineWidth = ui.baseLineWidth;
-  for (const pose of result) {
    if (settings.getValue('Draw Points')) {
      for (const point of pose.keypoints) {
        ctx.beginPath();
@ -164,13 +169,13 @@ async function drawBody(result, canvas) {
 async function drawHand(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
+  for (const hand of result) {
    ctx.font = ui.baseFont;
    ctx.lineWidth = ui.baseLineWidth;
-  window.result = result;
-  for (const hand of result) {
    if (settings.getValue('Draw Boxes')) {
      ctx.lineWidth = ui.baseLineWidth;
      ctx.beginPath();
+      ctx.strokeStyle = ui.baseColor;
      ctx.fillStyle = ui.baseColor;
      ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
      ctx.fillStyle = ui.baseLabel;
@ -398,34 +403,74 @@ async function setupCamera() {
  });
 }

-// eslint-disable-next-line no-unused-vars
-async function setupImage() {
-  const image = document.getElementById('image');
-  image.width = window.innerWidth;
-  image.height = window.innerHeight;
+async function processImage(input) {
+  ui.baseColor = 'rgba(200, 255, 255, 0.5)';
+  ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
+  ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
+  ui.baseLineWidth = 16;
+  ui.baseLineHeight = 5;
+  ui.columns = 3;
+  const cfg = {
+    backend: 'webgl',
+    console: true,
+    face: {
+      enabled: true,
+      detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 },
+      mesh: { enabled: true },
+      iris: { enabled: true },
+      age: { enabled: true, skipFrames: 0 },
+      gender: { enabled: true },
+      emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true },
+    },
+    body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+    hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 },
+  };
  return new Promise((resolve) => {
-    image.onload = () => resolve(image);
-    image.src = 'sample.jpg';
+    const image = document.getElementById('image');
+    image.onload = async () => {
+      log('Processing image:', image.src);
+      const canvas = document.getElementById('canvas');
+      image.width = image.naturalWidth;
+      image.height = image.naturalHeight;
+      canvas.width = image.naturalWidth;
+      canvas.height = image.naturalHeight;
+      const result = await human.detect(image, cfg);
+      await drawResults(image, result, canvas);
+      const thumb = document.createElement('canvas');
+      thumb.width = window.innerWidth / (ui.columns + 0.02);
+      thumb.height = canvas.height / (window.innerWidth / thumb.width);
+      const ctx = thumb.getContext('2d');
+      ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height);
+      document.getElementById('samples').appendChild(thumb);
+      image.src = '';
+      resolve(true);
+    };
+    image.src = input;
  });
 }

+// eslint-disable-next-line no-unused-vars
+async function detectSampleImages() {
+  ui.baseFont = 'small-caps 3rem "Segoe UI"';
+  document.getElementById('canvas').style.display = 'none';
+  log('Running detection of sample images');
+  const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
+  for (const sample of samples) await processImage(sample);
+}
+
 async function main() {
  log('Human demo starting ...');

  // setup ui control panel
  await setupUI();
-  // setup webcam
-  await setupCamera();
-
-  // or setup image
-  // const input = await setupImage();

  const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
  document.getElementById('log').innerText += '\n' + msg;
  log(msg);

-  // run actual detection. if input is video, it will run in a loop else it will run only once
-  // runHumanDetect(video, canvas);
+  // use one of the two:
+  await setupCamera();
+  // await detectSampleImages();
 }

 window.onload = main;
--- a/src/config.js
+++ b/src/config.js
@ -1,13 +1,21 @@
+/* eslint-disable indent */
+/* eslint-disable no-multi-spaces */
+
 export default {
-  backend: 'webgl',
-  console: true,
+  backend: 'webgl',          // select tfjs backend to use
+  console: true,             // enable debugging output to console
  face: {
-    enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
+    enabled: true,           // controls if specified modul is enabled
+                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+                             // (note: module is not loaded until it is required)
    detector: {
-      modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back
-      inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
-      skipFrames: 10, // how many frames to go without running the bounding box detector
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
      minConfidence: 0.5,    // threshold for discarding a prediction
      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
@ -25,20 +33,22 @@ export default {
    },
    age: {
      enabled: true,
-      modelPath: '../models/ssrnet-age/imdb/model.json',
+      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
+                                                         // which determines training set for model
      inputSize: 64,         // fixed value
-      skipFrames: 10,
+      skipFrames: 10,        // how many frames to go without re-running the detector
    },
    gender: {
      enabled: true,
+      minConfidence: 0.8,    // threshold for discarding a prediction
      modelPath: '../models/ssrnet-gender/imdb/model.json',
    },
    emotion: {
      enabled: true,
      inputSize: 64,         // fixed value
-      minConfidence: 0.5,
-      skipFrames: 10,
-      useGrayscale: true,
+      minConfidence: 0.5,    // threshold for discarding a prediction
+      skipFrames: 10,        // how many frames to go without re-running the detector
+      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
@ -47,19 +57,21 @@ export default {
    modelPath: '../models/posenet/model.json',
    inputResolution: 257,    // fixed value
    outputStride: 16,        // fixed value
-    maxDetections: 5,
-    scoreThreshold: 0.7,
-    nmsRadius: 20,
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
  },
  hand: {
    enabled: true,
    inputSize: 256,          // fixed value
-    skipFrames: 10,
-    minConfidence: 0.5,
-    iouThreshold: 0.3,
-    scoreThreshold: 0.7,
-    enlargeFactor: 1.65, // empiric tuning
-    maxHands: 2,
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+    minConfidence: 0.5,      // threshold for discarding a prediction
+    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
+    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/src/index.js
+++ b/src/index.js
@ -84,11 +84,11 @@ async function detect(input, userConfig) {
      await tf.ready();
    }
    // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
-    let savedWebglPackDepthwiseConvFlag;
-    if (tf.getBackend() === 'webgl') {
-      savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
-      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
-    }
+    // let savedWebglPackDepthwiseConvFlag;
+    // if (tf.getBackend() === 'webgl') {
+    //   savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
+    //  tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
+    // }

    // load models if enabled
    if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@ -149,6 +149,7 @@ async function detect(input, userConfig) {
          annotations: face.annotations,
          age: ssrData.age,
          gender: ssrData.gender,
+          agConfidence: ssrData.confidence,
          emotion: emotionData,
          iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
        });
@ -157,7 +158,7 @@ async function detect(input, userConfig) {
    }

    // set depthwiseconv to original value
-    tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+    // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);

    // combine and return results
    perf.total = Object.values(perf).reduce((a, b) => a + b);