From 23aeb81b76e68b7fa5bd052020e3828b08812045 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Wed, 14 Oct 2020 13:23:02 -0400
Subject: [PATCH] module parametrization and performance monitoring

---
 README.md                    | 25 ++++++++++-
 demo/demo-esm.js             |  1 +
 src/config.js                |  4 +-
 src/handpose/box.js          |  6 +++
 src/handpose/handdetector.js | 18 ++++----
 src/handpose/handpose.js     | 80 ++++++++++++------------------------
 src/index.js                 | 15 ++++++-
 7 files changed, 84 insertions(+), 65 deletions(-)
diff --git a/README.md b/README.md
index adab5513..857a3e5e 100644
--- a/README.md
+++ b/README.md
@@ -294,6 +294,18 @@ result = {
 }
 ```
 
+Additionally, `result` object includes internal performance data - total time spend and time per module (measured in ms):
+
+```js
+  result.performance = {
+    body,
+    hand,
+    face,
+    agegender,
+    total,
+  }
+```
+
 <hr>
 
 ## Build
@@ -321,7 +333,18 @@ Development dependencies are [eslint](https://github.com/eslint) used for code l
 
 Performance will vary depending on your hardware, but also on number of resolution of input video/image, enabled modules as well as their parameters  
 
-For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 50+ FPS, but drops to 5-10 FPS on a medium complex images if all modules are enabled  
+For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 60+ FPS, but drops to 10 FPS on a medium complex images if all modules are enabled  
+
+Performance per module:
+
+- Enabled all: 10 FPS
+- Face Detect: 80 FPS
+- Face Geometry: 30 FPS (includes face detect)
+- Face Iris: 25 FPS (includes face detect and face geometry)
+- Age: 60 FPS (includes face detect)
+- Gender: 60 FPS (includes face detect)
+- Hand: 40 FPS
+- Body: 50 FPS
 
 Library can also be used on mobile devices  
 
diff --git a/demo/demo-esm.js b/demo/demo-esm.js
index 2fc930db..c57dc30c 100644
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@@ -203,6 +203,7 @@ async function runHumanDetect(input, canvas) {
       TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
       GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
       Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
+      Performance: ${JSON.stringify(result.performance)}
     `;
     // rinse & repeate
     // if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes
diff --git a/src/config.js b/src/config.js
index 589eaaec..ff72d491 100644
--- a/src/config.js
+++ b/src/config.js
@@ -18,7 +18,8 @@ export default {
     iris: {
       enabled: true,
       modelPath: '../models/iris/model.json',
-      inputSize: 192, // fixed value
+      enlargeFactor: 2.3, // empiric tuning
+      inputSize: 64, // fixed value
     },
     age: {
       enabled: true,
@@ -47,6 +48,7 @@ export default {
     minConfidence: 0.5,
     iouThreshold: 0.3,
     scoreThreshold: 0.7,
+    enlargeFactor: 1.65, // empiric tuning
     maxHands: 2,
     detector: {
       anchors: '../models/handdetect/anchors.json',
diff --git a/src/handpose/box.js b/src/handpose/box.js
index 3450ca7b..c7b23e7e 100644
--- a/src/handpose/box.js
+++ b/src/handpose/box.js
@@ -7,6 +7,7 @@ function getBoxSize(box) {
   ];
 }
 exports.getBoxSize = getBoxSize;
+
 function getBoxCenter(box) {
   return [
     box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
@@ -14,6 +15,7 @@ function getBoxCenter(box) {
   ];
 }
 exports.getBoxCenter = getBoxCenter;
+
 function cutBoxFromImageAndResize(box, image, cropSize) {
   const h = image.shape[1];
   const w = image.shape[2];
@@ -24,6 +26,7 @@ function cutBoxFromImageAndResize(box, image, cropSize) {
   return tf.image.cropAndResize(image, boxes, [0], cropSize);
 }
 exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
+
 function scaleBoxCoordinates(box, factor) {
   const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
   const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
@@ -34,6 +37,7 @@ function scaleBoxCoordinates(box, factor) {
   return { startPoint, endPoint, palmLandmarks };
 }
 exports.scaleBoxCoordinates = scaleBoxCoordinates;
+
 function enlargeBox(box, factor = 1.5) {
   const center = getBoxCenter(box);
   const size = getBoxSize(box);
@@ -43,6 +47,7 @@ function enlargeBox(box, factor = 1.5) {
   return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
 }
 exports.enlargeBox = enlargeBox;
+
 function squarifyBox(box) {
   const centers = getBoxCenter(box);
   const size = getBoxSize(box);
@@ -53,6 +58,7 @@ function squarifyBox(box) {
   return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
 }
 exports.squarifyBox = squarifyBox;
+
 function shiftBox(box, shiftFactor) {
   const boxSize = [
     box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1],
diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js
index c9f5f924..e2bc28e4 100644
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@@ -2,17 +2,14 @@ const tf = require('@tensorflow/tfjs');
 const bounding = require('./box');
 
 class HandDetector {
-  constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) {
+  constructor(model, anchors, config) {
     this.model = model;
-    this.width = width;
-    this.height = height;
-    this.iouThreshold = iouThreshold;
-    this.scoreThreshold = scoreThreshold;
-    this.maxHands = maxHands;
+    this.width = config.inputSize;
+    this.height = config.inputSize;
     this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
     this.anchorsTensor = tf.tensor2d(this.anchors);
-    this.inputSizeTensor = tf.tensor1d([width, height]);
-    this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]);
+    this.inputSizeTensor = tf.tensor1d([config.inputSize, config.inputSize]);
+    this.doubleInputSizeTensor = tf.tensor1d([config.inputSize * 2, config.inputSize * 2]);
   }
 
   normalizeBoxes(boxes) {
@@ -73,9 +70,12 @@ class HandDetector {
      *
      * @param input The image to classify.
      */
-  async estimateHandBounds(input) {
+  async estimateHandBounds(input, config) {
     const inputHeight = input.shape[1];
     const inputWidth = input.shape[2];
+    this.iouThreshold = config.iouThreshold;
+    this.scoreThreshold = config.scoreThreshold;
+    this.maxHands = config.maxHands;
     const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
     const predictions = await this.getBoundingBoxes(image);
     image.dispose();
diff --git a/src/handpose/handpose.js b/src/handpose/handpose.js
index 19acd9ce..58136fac 100644
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@@ -3,71 +3,22 @@ const hand = require('./handdetector');
 const keypoints = require('./keypoints');
 const pipe = require('./pipeline');
 
-// Load the bounding box detector model.
-async function loadHandDetectorModel(url) {
-  return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
-}
-
-// Load the mesh detector model.
-async function loadHandPoseModel(url) {
-  return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') });
-}
-
-// In single shot detector pipelines, the output space is discretized into a set
-// of bounding boxes, each of which is assigned a score during prediction. The
-// anchors define the coordinates of these boxes.
-async function loadAnchors(url) {
-  if (tf.env().features.IS_NODE) {
-    // eslint-disable-next-line global-require
-    const fs = require('fs');
-    const data = await fs.readFileSync(url.replace('file://', ''));
-    return JSON.parse(data);
-  }
-  return tf.util.fetch(url).then((d) => d.json());
-}
-
-/**
- * Load handpose.
- *
- * @param config A configuration object with the following properties:
- * - `maxContinuousChecks` How many frames to go without running the bounding
- * box detector. Defaults to infinity. Set to a lower value if you want a safety
- * net in case the mesh detector produces consistently flawed predictions.
- * - `detectionConfidence` Threshold for discarding a prediction. Defaults to
- * 0.8.
- * - `iouThreshold` A float representing the threshold for deciding whether
- * boxes overlap too much in non-maximum suppression. Must be between [0, 1].
- * Defaults to 0.3.
- * - `scoreThreshold` A threshold for deciding when to remove boxes based
- * on score in non-maximum suppression. Defaults to 0.75.
- */
-async function load(config) {
-  const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([
-    loadAnchors(config.detector.anchors),
-    loadHandDetectorModel(config.detector.modelPath),
-    loadHandPoseModel(config.skeleton.modelPath),
-  ]);
-  const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
-  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
-  // eslint-disable-next-line no-use-before-define
-  const handpose = new HandPose(pipeline);
-  return handpose;
-}
-exports.load = load;
-
 class HandPose {
   constructor(pipeline) {
     this.pipeline = pipeline;
   }
 
   async estimateHands(input, config) {
+    this.maxContinuousChecks = config.skipFrames;
+    this.detectionConfidence = config.minConfidence;
+    this.maxHands = config.maxHands;
     const image = tf.tidy(() => {
       if (!(input instanceof tf.Tensor)) {
         input = tf.browser.fromPixels(input);
       }
       return input.toFloat().expandDims(0);
     });
-    const predictions = await this.pipeline.estimateHand(image, config);
+    const predictions = await this.pipeline.estimateHands(image, config);
     image.dispose();
     const hands = [];
     if (!predictions) return hands;
@@ -88,3 +39,26 @@ class HandPose {
   }
 }
 exports.HandPose = HandPose;
+
+async function loadAnchors(url) {
+  if (tf.env().features.IS_NODE) {
+    // eslint-disable-next-line global-require
+    const fs = require('fs');
+    const data = await fs.readFileSync(url.replace('file://', ''));
+    return JSON.parse(data);
+  }
+  return tf.util.fetch(url).then((d) => d.json());
+}
+
+async function load(config) {
+  const [anchors, handDetectorModel, handPoseModel] = await Promise.all([
+    loadAnchors(config.detector.anchors),
+    tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') }),
+    tf.loadGraphModel(config.skeleton.modelPath, { fromTFHub: config.skeleton.modelPath.includes('tfhub.dev') }),
+  ]);
+  const detector = new hand.HandDetector(handDetectorModel, anchors, config);
+  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config);
+  const handpose = new HandPose(pipeline);
+  return handpose;
+}
+exports.load = load;
diff --git a/src/index.js b/src/index.js
index f3974fc5..faf36337 100644
--- a/src/index.js
+++ b/src/index.js
@@ -50,21 +50,32 @@ async function detect(input, userConfig) {
       tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
     }
 
+    const perf = {};
+    let timeStamp;
+
     // run posenet
+    timeStamp = performance.now();
     let poseRes = [];
     if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);
+    perf.body = Math.trunc(performance.now() - timeStamp);
 
     // run handpose
+    timeStamp = performance.now();
     let handRes = [];
     if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand);
+    perf.hand = Math.trunc(performance.now() - timeStamp);
 
     // run facemesh, includes blazeface and iris
     const faceRes = [];
     if (config.face.enabled) {
+      timeStamp = performance.now();
       const faces = await models.facemesh.estimateFaces(input, config.face);
+      perf.face = Math.trunc(performance.now() - timeStamp);
       for (const face of faces) {
         // run ssr-net age & gender, inherits face from blazeface
+        timeStamp = performance.now();
         const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
+        perf.agegender = Math.trunc(performance.now() - timeStamp);
         face.image.dispose();
         // iris: array[ bottom, left, top, right, center ]
         const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
@@ -86,7 +97,9 @@ async function detect(input, userConfig) {
 
     tf.engine().endScope();
     // combine results
-    resolve({ face: faceRes, body: poseRes, hand: handRes });
+    perf.total = Object.values(perf).reduce((a, b) => a + b);
+    console.log('total', perf.total);
+    resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf });
   });
 }