From 8df844cd7b3eb364a1d7419ec2e28a722a222ad6 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 11 Dec 2020 10:11:49 -0500
Subject: [PATCH] improved caching and warmup

---
 config.js                | 12 +++++++-----
 demo/browser.js          |  3 +--
 src/age/age.js           |  9 +++++----
 src/emotion/emotion.js   |  9 +++++----
 src/gender/gender.js     | 20 +++++---------------
 src/hand/handdetector.js |  2 +-
 src/hand/handpipeline.js | 11 +++++------
 src/human.js             | 38 +++++++++++++++++++++++++++-----------
 wiki                     |  2 +-
 9 files changed, 57 insertions(+), 49 deletions(-)

diff --git a/config.js b/config.js
index 15fe3ee6..cc4a5f9b 100644
--- a/config.js
+++ b/config.js
@@ -26,7 +26,9 @@ export default {
                              // must be disabled for images
                              // basically this skips object box boundary detection for every n frames
                              // while maintaining in-box detection since objects cannot move that fast
-
+  warmup: 'full',            // what to use for human.warmup(), can be 'none', 'face', 'full'
+                             // warmup pre-initializes all models for faster inference but can take
+                             // significant time on startup
   filter: {
     enabled: true,           // enable image pre-processing filters
     width: 0,                // resize input width
@@ -69,7 +71,7 @@ export default {
                              // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
       maxFaces: 10,          // maximum number of faces detected in the input
                              // should be set to the minimum number for performance
-      skipFrames: 20,        // how many frames to go without re-running the face bounding box detector
+      skipFrames: 11,        // how many frames to go without re-running the face bounding box detector
                              // only used for video inputs
                              // e.g., if model is running st 25 FPS, we can re-use existing bounding
                              // box for updated face analysis as the head probably hasn't moved much
@@ -99,7 +101,7 @@ export default {
       modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki'
                                                    // which determines training set for model
       inputSize: 64,         // fixed value
-      skipFrames: 41,        // how many frames to go without re-running the detector
+      skipFrames: 31,        // how many frames to go without re-running the detector
                              // only used for video inputs
     },
 
@@ -108,7 +110,7 @@ export default {
       minConfidence: 0.1,    // threshold for discarding a prediction
       modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki'
       inputSize: 64,         // fixed value
-      skipFrames: 42,        // how many frames to go without re-running the detector
+      skipFrames: 41,        // how many frames to go without re-running the detector
                              // only used for video inputs
     },
 
@@ -143,7 +145,7 @@ export default {
     rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                              // false means higher performance, but incorrect finger mapping if hand is inverted
     inputSize: 256,          // fixed value
-    skipFrames: 19,          // how many frames to go without re-running the hand bounding box detector
+    skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
                              // only used for video inputs
                              // e.g., if model is running st 25 FPS, we can re-use existing bounding
                              // box for updated hand skeleton analysis as the hand probably
diff --git a/demo/browser.js b/demo/browser.js
index c0e2556b..14028b0d 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -37,7 +37,6 @@ const ui = {
   console: true,
   maxFPSframes: 10,
   modelsPreload: true,
-  modelsWarmup: true,
   menuWidth: 0,
   menuHeight: 0,
   camera: {},
@@ -518,7 +517,7 @@ async function main() {
     status('loading');
     await human.load(userConfig); // this is not required, just pre-loads all models
   }
-  if (ui.modelsWarmup && !ui.useWorker) {
+  if (!ui.useWorker) {
     status('initializing');
     await human.warmup(userConfig); // this is not required, just pre-warms all models for faster initial inference
   }
diff --git a/src/age/age.js b/src/age/age.js
index 4db844a2..28004386 100644
--- a/src/age/age.js
+++ b/src/age/age.js
@@ -4,7 +4,7 @@ import * as profile from '../profile.js';
 
 const models = {};
 let last = { age: 0 };
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;
 
 async function load(config) {
   if (!models.age) {
@@ -16,11 +16,12 @@ async function load(config) {
 
 async function predict(image, config) {
   if (!models.age) return null;
-  if ((frame < config.face.age.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
-    frame += 1;
+  if ((skipped < config.face.age.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
+    skipped++;
     return last;
   }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
     /*
     const zoom = [0, 0]; // 0..1 meaning 0%..100%
diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js
index 099cba96..88980490 100644
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@@ -5,7 +5,7 @@ import * as profile from '../profile.js';
 const annotations = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
 const models = {};
 let last = [];
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;
 
 // tuning values
 const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale
@@ -21,11 +21,12 @@ async function load(config) {
 
 async function predict(image, config) {
   if (!models.emotion) return null;
-  if ((frame < config.face.emotion.skipFrames) && config.videoOptimized && (last.length > 0)) {
-    frame += 1;
+  if ((skipped < config.face.emotion.skipFrames) && config.videoOptimized && (last.length > 0)) {
+    skipped++;
     return last;
   }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
     /*
     const zoom = [0, 0]; // 0..1 meaning 0%..100%
diff --git a/src/gender/gender.js b/src/gender/gender.js
index f1e7075b..b2d28967 100644
--- a/src/gender/gender.js
+++ b/src/gender/gender.js
@@ -4,7 +4,7 @@ import * as profile from '../profile.js';
 
 const models = {};
 let last = { gender: '' };
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;
 let alternative = false;
 
 // tuning values
@@ -21,22 +21,13 @@ async function load(config) {
 
 async function predict(image, config) {
   if (!models.gender) return null;
-  if ((frame < config.face.gender.skipFrames) && config.videoOptimized && last.gender !== '') {
-    frame += 1;
+  if ((skipped < config.face.gender.skipFrames) && config.videoOptimized && last.gender !== '') {
+    skipped++;
     return last;
   }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]);
-    */
     const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
     let enhance;
     if (alternative) {
@@ -51,7 +42,6 @@ async function predict(image, config) {
     } else {
       enhance = tf.mul(resize, [255.0]);
     }
-    // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
     tf.dispose(resize);
 
     let genderT;
diff --git a/src/hand/handdetector.js b/src/hand/handdetector.js
index 8446cdbb..ce6dd8a2 100644
--- a/src/hand/handdetector.js
+++ b/src/hand/handdetector.js
@@ -81,8 +81,8 @@ class HandDetector {
     const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
     const predictions = await this.getBoxes(image, config);
     image.dispose();
-    if (!predictions || predictions.length === 0) return null;
     const hands = [];
+    if (!predictions || predictions.length === 0) return hands;
     for (const prediction of predictions) {
       const boxes = prediction.box.dataSync();
       const startPoint = boxes.slice(0, 2);
diff --git a/src/hand/handpipeline.js b/src/hand/handpipeline.js
index 79a3e1f9..cebd80cc 100644
--- a/src/hand/handpipeline.js
+++ b/src/hand/handpipeline.js
@@ -35,7 +35,7 @@ class HandPipeline {
     this.landmarkDetector = landmarkDetector;
     this.inputSize = inputSize;
     this.storedBoxes = [];
-    this.skipped = 1000;
+    this.skipped = 0;
     this.detectedHands = 0;
   }
 
@@ -84,16 +84,15 @@ class HandPipeline {
   }
 
   async estimateHands(image, config) {
-    this.skipped++;
     let useFreshBox = false;
 
     // run new detector every skipFrames unless we only want box to start with
     let boxes;
-    if ((this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.videoOptimized) {
+    if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.videoOptimized) {
       boxes = await this.handDetector.estimateHandBounds(image, config);
-      // don't reset on test image
-      if ((image.shape[1] !== 255) && (image.shape[2] !== 255)) this.skipped = 0;
+      this.skipped = 0;
     }
+    if (config.videoOptimized) this.skipped++;
 
     // if detector result count doesn't match current working set, use it to reset current working set
     if (boxes && (boxes.length > 0) && ((boxes.length !== this.detectedHands) && (this.detectedHands !== config.hand.maxHands) || !config.hand.landmarks)) {
@@ -103,7 +102,7 @@ class HandPipeline {
       if (this.storedBoxes.length > 0) useFreshBox = true;
     }
     const hands = [];
-    // log(`skipped: ${this.skipped} max: ${config.hand.maxHands} detected: ${this.detectedHands} stored: ${this.storedBoxes.length} new: ${boxes?.length}`);
+    // log('hand', `skipped: ${this.skipped} max: ${config.hand.maxHands} detected: ${this.detectedHands} stored: ${this.storedBoxes.length} new: ${boxes?.length}`);
 
     // go through working set of boxes
     for (let i = 0; i < this.storedBoxes.length; i++) {
diff --git a/src/human.js b/src/human.js
index 92e6f54c..878a49ef 100644
--- a/src/human.js
+++ b/src/human.js
@@ -418,26 +418,42 @@ class Human {
 
   async warmup(userConfig) {
     if (userConfig) this.config = mergeDeep(this.config, userConfig);
-    const width = 256;
-    const height = 256;
-    const video = this.config.videoOptimized;
-    this.config.videoOptimized = false;
     return new Promise((resolve) => {
-      const img = new Image(width, height);
+      const video = this.config.videoOptimized;
+      this.config.videoOptimized = false;
+      let src;
+      let size;
+      switch (this.config.warmup) {
+        case 'face':
+          size = 256;
+          src = sample.face;
+          break;
+        case 'full':
+          size = 1200;
+          src = sample.body;
+          break;
+        default:
+          size = 0;
+          src = null;
+      }
+      const img = new Image(size, size);
       img.onload = () => {
-        const canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(width, height) : document.createElement('canvas');
-        canvas.width = width;
-        canvas.height = height;
+        const canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(size, size) : document.createElement('canvas');
+        canvas.width = size;
+        canvas.height = size;
         const ctx = canvas.getContext('2d');
         ctx.drawImage(img, 0, 0);
-        const data = ctx.getImageData(0, 0, width, height);
+        const data = ctx.getImageData(0, 0, size, size);
+        const t0 = now();
         this.detect(data, config).then((warmup) => {
-          log('Warmup', warmup);
+          const t1 = now();
+          log('Warmup', this.config.warmup, (t1 - t0), warmup);
           this.config.videoOptimized = video;
           resolve(warmup);
         });
       };
-      img.src = sample.face;
+      if (src) img.src = src;
+      else resolve(null);
     });
   }
 }
diff --git a/wiki b/wiki
index 785bde4c..c4c8b30f 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 785bde4caa1a29d8bfe82a4ae987ffde1d9a0a73
+Subproject commit c4c8b30f6bf211ee267cf1884aaff9725f594631