updated examples plus bugfixes

2020-10-16 15:04:51 -04:00 · 2020-10-16 15:04:51 -04:00 · c82b1698d5
parent 924eb3eb25
commit c82b1698d5
4 changed files with 181 additions and 95 deletions
--- a/README.md
+++ b/README.md
@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!
 <hr>
-**Example using static image:**  
+## Examples
 ![Example Using Image](demo/sample-image.jpg)
-**Example using webcam:**  
+**Using static images:**  
-![Example Using WebCam](demo/sample-video.jpg)
+![Example Using Image](assets/screenshot1.jpg)
 **Using webcam:**  
 ![Example Using WebCam](assets/screenshot2.jpg)
 <hr>
@ -211,59 +213,85 @@ Below is output of `human.defaults` object
 Any property can be overriden by passing user object during `human.detect()`  
 Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration  
 Configurtion object is large, but typically you only need to modify few values:
 - `enabled`: Choose which models to use
 - `skipFrames`: Must be set to 0 for static images
 - `modelPath`: Update as needed to reflect your application's relative path
 ```js
-human.defaults = {
+export default {
-  console: true,            // enable debugging output to console
+  backend: 'webgl',          // select tfjs backend to use
-  backend: 'webgl',         // select tfjs backend to use
+  console: true,             // enable debugging output to console
  face: {
-    enabled: true,          // controls if specified modul is enabled (note: module is not loaded until it is required)
+    enabled: true,           // controls if specified modul is enabled
                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
                             // note: module is not loaded until it is required
    detector: {
-      modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
-      inputSize: 128,       // 128 for tfhub and front models, 256 for back
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
-      maxFaces: 10,         // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
-      minConfidence: 0.5,   // threshold for discarding a prediction
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
-      iouThreshold: 0.3,    // threshold for deciding whether boxes overlap too much in non-maximum suppression
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
-      scoreThreshold: 0.7,  // threshold for deciding when to remove boxes based on score in non-maximum suppression
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
      minConfidence: 0.5,    // threshold for discarding a prediction
      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
    },
    mesh: {
      enabled: true,
      modelPath: '../models/facemesh/model.json',
      inputSize: 192,        // fixed value
    },
    iris: {
      enabled: true,
      modelPath: '../models/iris/model.json',
      enlargeFactor: 2.3,    // empiric tuning
      inputSize: 64,         // fixed value
    },
    age: {
      enabled: true,
      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+                                                         // which determines training set for model
      inputSize: 64,         // fixed value
      skipFrames: 10,        // how many frames to go without re-running the detector
    },
    gender: {
      enabled: true,
-      modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
+      minConfidence: 0.8,    // threshold for discarding a prediction
      modelPath: '../models/ssrnet-gender/imdb/model.json',
    },
    emotion: {
      enabled: true,
-      minConfidence: 0.5,   // threshold for discarding a prediction
+      inputSize: 64,         // fixed value
-      skipFrames: 10,       // how many frames to skip before re-running bounding box detection
+      minConfidence: 0.5,    // threshold for discarding a prediction
-      useGrayscale: true,   // convert color input to grayscale before processing or use single channels when color input is not supported
+      skipFrames: 10,        // how many frames to go without re-running the detector
      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
  body: {
    enabled: true,
    modelPath: '../models/posenet/model.json',
-    maxDetections: 5,       // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance  
+    inputResolution: 257,    // fixed value
-    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
+    outputStride: 16,        // fixed value
-    nmsRadius: 20,          // radius for deciding points are too close in non-maximum suppression
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
  },
  hand: {
    enabled: true,
-    skipFrames: 10,         // how many frames to skip before re-running bounding box detection
+    inputSize: 256,          // fixed value
-    minConfidence: 0.5,     // threshold for discarding a prediction
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
-    iouThreshold: 0.3,      // threshold for deciding whether boxes overlap too much in non-maximum suppression
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
-    scoreThreshold: 0.7,    // threshold for deciding when to remove boxes based on score in non-maximum suppression
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
    minConfidence: 0.5,      // threshold for discarding a prediction
    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -4,9 +4,11 @@ import human from '../dist/human.esm.js';
 const ui = {
  baseColor: 'rgba(255, 200, 255, 0.3)',
-  baseLabel: 'rgba(255, 200, 255, 0.8)',
+  baseLabel: 'rgba(255, 200, 255, 0.9)',
  baseFont: 'small-caps 1.2rem "Segoe UI"',
  baseLineWidth: 16,
  baseLineHeight: 2,
  columns: 3,
  busy: false,
  facing: 'user',
 };
@ -23,8 +25,8 @@ const config = {
    gender: { enabled: true },
    emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
  },
-  body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+  body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
-  hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
+  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
 let settings;
 let worker;
@ -49,20 +51,23 @@ const log = (...msg) => {
 async function drawFace(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
  ctx.strokeStyle = ui.baseColor;
  ctx.font = ui.baseFont;
  for (const face of result) {
    ctx.font = ui.baseFont;
    ctx.strokeStyle = ui.baseColor;
    ctx.fillStyle = ui.baseColor;
    ctx.lineWidth = ui.baseLineWidth;
    ctx.beginPath();
    if (settings.getValue('Draw Boxes')) {
      ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
    }
-    const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
+    // silly hack since fillText does not suport new line
-    const labelIris = face.iris ? `iris: ${face.iris}` : '';
+    const labels = [];
-    const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
+    if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`);
    if (face.age) labels.push(`Age:${face.age || ''}`);
    if (face.iris) labels.push(`iris: ${face.iris}`);
    if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`);
    ctx.fillStyle = ui.baseLabel;
-    ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
+    for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight));
    ctx.stroke();
    ctx.lineWidth = 1;
    if (face.mesh) {
@ -102,11 +107,11 @@ async function drawFace(result, canvas) {
 async function drawBody(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
  ctx.fillStyle = ui.baseColor;
  ctx.strokeStyle = ui.baseColor;
  ctx.font = ui.baseFont;
  ctx.lineWidth = ui.baseLineWidth;
  for (const pose of result) {
    ctx.fillStyle = ui.baseColor;
    ctx.strokeStyle = ui.baseColor;
    ctx.font = ui.baseFont;
    ctx.lineWidth = ui.baseLineWidth;
    if (settings.getValue('Draw Points')) {
      for (const point of pose.keypoints) {
        ctx.beginPath();
@ -164,13 +169,13 @@ async function drawBody(result, canvas) {
 async function drawHand(result, canvas) {
  if (!result) return;
  const ctx = canvas.getContext('2d');
  ctx.font = ui.baseFont;
  ctx.lineWidth = ui.baseLineWidth;
  window.result = result;
  for (const hand of result) {
    ctx.font = ui.baseFont;
    ctx.lineWidth = ui.baseLineWidth;
    if (settings.getValue('Draw Boxes')) {
      ctx.lineWidth = ui.baseLineWidth;
      ctx.beginPath();
      ctx.strokeStyle = ui.baseColor;
      ctx.fillStyle = ui.baseColor;
      ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
      ctx.fillStyle = ui.baseLabel;
@ -398,34 +403,74 @@ async function setupCamera() {
  });
 }
-// eslint-disable-next-line no-unused-vars
+async function processImage(input) {
-async function setupImage() {
+  ui.baseColor = 'rgba(200, 255, 255, 0.5)';
-  const image = document.getElementById('image');
+  ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
-  image.width = window.innerWidth;
+  ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
-  image.height = window.innerHeight;
+  ui.baseLineWidth = 16;
  ui.baseLineHeight = 5;
  ui.columns = 3;
  const cfg = {
    backend: 'webgl',
    console: true,
    face: {
      enabled: true,
      detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 },
      mesh: { enabled: true },
      iris: { enabled: true },
      age: { enabled: true, skipFrames: 0 },
      gender: { enabled: true },
      emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true },
    },
    body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
    hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 },
  };
  return new Promise((resolve) => {
-    image.onload = () => resolve(image);
+    const image = document.getElementById('image');
-    image.src = 'sample.jpg';
+    image.onload = async () => {
      log('Processing image:', image.src);
      const canvas = document.getElementById('canvas');
      image.width = image.naturalWidth;
      image.height = image.naturalHeight;
      canvas.width = image.naturalWidth;
      canvas.height = image.naturalHeight;
      const result = await human.detect(image, cfg);
      await drawResults(image, result, canvas);
      const thumb = document.createElement('canvas');
      thumb.width = window.innerWidth / (ui.columns + 0.02);
      thumb.height = canvas.height / (window.innerWidth / thumb.width);
      const ctx = thumb.getContext('2d');
      ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height);
      document.getElementById('samples').appendChild(thumb);
      image.src = '';
      resolve(true);
    };
    image.src = input;
  });
 }
 // eslint-disable-next-line no-unused-vars
 async function detectSampleImages() {
  ui.baseFont = 'small-caps 3rem "Segoe UI"';
  document.getElementById('canvas').style.display = 'none';
  log('Running detection of sample images');
  const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
  for (const sample of samples) await processImage(sample);
 }
 async function main() {
  log('Human demo starting ...');
  // setup ui control panel
  await setupUI();
  // setup webcam
  await setupCamera();
  // or setup image
  // const input = await setupImage();
  const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
  document.getElementById('log').innerText += '\n' + msg;
  log(msg);
-  // run actual detection. if input is video, it will run in a loop else it will run only once
+  // use one of the two:
-  // runHumanDetect(video, canvas);
+  await setupCamera();
  // await detectSampleImages();
 }
 window.onload = main;
--- a/src/config.js
+++ b/src/config.js
@ -1,65 +1,77 @@
 /* eslint-disable indent */
 /* eslint-disable no-multi-spaces */
 export default {
-  backend: 'webgl',
+  backend: 'webgl',          // select tfjs backend to use
-  console: true,
+  console: true,             // enable debugging output to console
  face: {
-    enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
+    enabled: true,           // controls if specified modul is enabled
                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
                             // (note: module is not loaded until it is required)
    detector: {
-      modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back
+      modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
-      inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back
+                                                        // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
-      maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
+      inputSize: 256,        // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
-      skipFrames: 10, // how many frames to go without running the bounding box detector
+      maxFaces: 10,          // maximum number of faces detected in the input, should be set to the minimum number for performance
-      minConfidence: 0.5, // threshold for discarding a prediction
+      skipFrames: 10,        // how many frames to go without re-running the face bounding box detector
-      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
-      scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
      minConfidence: 0.5,    // threshold for discarding a prediction
      iouThreshold: 0.3,     // threshold for deciding whether boxes overlap too much in non-maximum suppression
      scoreThreshold: 0.7,   // threshold for deciding when to remove boxes based on score in non-maximum suppression
    },
    mesh: {
      enabled: true,
      modelPath: '../models/facemesh/model.json',
-      inputSize: 192, // fixed value
+      inputSize: 192,        // fixed value
    },
    iris: {
      enabled: true,
      modelPath: '../models/iris/model.json',
-      enlargeFactor: 2.3, // empiric tuning
+      enlargeFactor: 2.3,    // empiric tuning
-      inputSize: 64, // fixed value
+      inputSize: 64,         // fixed value
    },
    age: {
      enabled: true,
-      modelPath: '../models/ssrnet-age/imdb/model.json',
+      modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
-      inputSize: 64, // fixed value
+                                                         // which determines training set for model
-      skipFrames: 10,
+      inputSize: 64,         // fixed value
      skipFrames: 10,        // how many frames to go without re-running the detector
    },
    gender: {
      enabled: true,
      minConfidence: 0.8,    // threshold for discarding a prediction
      modelPath: '../models/ssrnet-gender/imdb/model.json',
    },
    emotion: {
      enabled: true,
-      inputSize: 64, // fixed value
+      inputSize: 64,         // fixed value
-      minConfidence: 0.5,
+      minConfidence: 0.5,    // threshold for discarding a prediction
-      skipFrames: 10,
+      skipFrames: 10,        // how many frames to go without re-running the detector
-      useGrayscale: true,
+      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
  body: {
    enabled: true,
    modelPath: '../models/posenet/model.json',
-    inputResolution: 257, // fixed value
+    inputResolution: 257,    // fixed value
-    outputStride: 16, // fixed value
+    outputStride: 16,        // fixed value
-    maxDetections: 5,
+    maxDetections: 10,       // maximum number of people detected in the input, should be set to the minimum number for performance
-    scoreThreshold: 0.7,
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
-    nmsRadius: 20,
+    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
  },
  hand: {
    enabled: true,
-    inputSize: 256, // fixed value
+    inputSize: 256,          // fixed value
-    skipFrames: 10,
+    skipFrames: 10,          // how many frames to go without re-running the hand bounding box detector
-    minConfidence: 0.5,
+                             // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
-    iouThreshold: 0.3,
+                             // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-    scoreThreshold: 0.7,
+    minConfidence: 0.5,      // threshold for discarding a prediction
-    enlargeFactor: 1.65, // empiric tuning
+    iouThreshold: 0.3,       // threshold for deciding whether boxes overlap too much in non-maximum suppression
-    maxHands: 2,
+    scoreThreshold: 0.7,     // threshold for deciding when to remove boxes based on score in non-maximum suppression
    enlargeFactor: 1.65,     // empiric tuning as skeleton prediction prefers hand box with some whitespace
    maxHands: 10,            // maximum number of hands detected in the input, should be set to the minimum number for performance
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/src/index.js
+++ b/src/index.js
@ -84,11 +84,11 @@ async function detect(input, userConfig) {
      await tf.ready();
    }
    // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
-    let savedWebglPackDepthwiseConvFlag;
+    // let savedWebglPackDepthwiseConvFlag;
-    if (tf.getBackend() === 'webgl') {
+    // if (tf.getBackend() === 'webgl') {
-      savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
+    //   savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
-      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
+    //  tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
-    }
+    // }
    // load models if enabled
    if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@ -149,6 +149,7 @@ async function detect(input, userConfig) {
          annotations: face.annotations,
          age: ssrData.age,
          gender: ssrData.gender,
          agConfidence: ssrData.confidence,
          emotion: emotionData,
          iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
        });
@ -157,7 +158,7 @@ async function detect(input, userConfig) {
    }
    // set depthwiseconv to original value
-    tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+    // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
    // combine and return results
    perf.total = Object.values(perf).reduce((a, b) => a + b);