improved caching and warmup

2020-12-11 10:11:49 -05:00 · 2020-12-11 10:11:49 -05:00 · 8df844cd7b
parent b48047109b
commit 8df844cd7b
9 changed files with 57 additions and 49 deletions
--- a/config.js
+++ b/config.js
@ -26,7 +26,9 @@ export default {
                             // must be disabled for images
                             // basically this skips object box boundary detection for every n frames
                             // while maintaining in-box detection since objects cannot move that fast
-
+  warmup: 'full',            // what to use for human.warmup(), can be 'none', 'face', 'full'
+                             // warmup pre-initializes all models for faster inference but can take
+                             // significant time on startup
  filter: {
    enabled: true,           // enable image pre-processing filters
    width: 0,                // resize input width
@ -69,7 +71,7 @@ export default {
                             // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
      maxFaces: 10,          // maximum number of faces detected in the input
                             // should be set to the minimum number for performance
-      skipFrames: 20,        // how many frames to go without re-running the face bounding box detector
+      skipFrames: 11,        // how many frames to go without re-running the face bounding box detector
                             // only used for video inputs
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
                             // box for updated face analysis as the head probably hasn't moved much
@ -99,7 +101,7 @@ export default {
      modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki'
                                                   // which determines training set for model
      inputSize: 64,         // fixed value
-      skipFrames: 41,        // how many frames to go without re-running the detector
+      skipFrames: 31,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },

@ -108,7 +110,7 @@ export default {
      minConfidence: 0.1,    // threshold for discarding a prediction
      modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki'
      inputSize: 64,         // fixed value
-      skipFrames: 42,        // how many frames to go without re-running the detector
+      skipFrames: 41,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },

@ -143,7 +145,7 @@ export default {
    rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                             // false means higher performance, but incorrect finger mapping if hand is inverted
    inputSize: 256,          // fixed value
-    skipFrames: 19,          // how many frames to go without re-running the hand bounding box detector
+    skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
                             // only used for video inputs
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
                             // box for updated hand skeleton analysis as the hand probably
--- a/demo/browser.js
+++ b/demo/browser.js
@ -37,7 +37,6 @@ const ui = {
  console: true,
  maxFPSframes: 10,
  modelsPreload: true,
-  modelsWarmup: true,
  menuWidth: 0,
  menuHeight: 0,
  camera: {},
@ -518,7 +517,7 @@ async function main() {
    status('loading');
    await human.load(userConfig); // this is not required, just pre-loads all models
  }
-  if (ui.modelsWarmup && !ui.useWorker) {
+  if (!ui.useWorker) {
    status('initializing');
    await human.warmup(userConfig); // this is not required, just pre-warms all models for faster initial inference
  }
--- a/src/age/age.js
+++ b/src/age/age.js
@ -4,7 +4,7 @@ import * as profile from '../profile.js';

 const models = {};
 let last = { age: 0 };
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;

 async function load(config) {
  if (!models.age) {
@ -16,11 +16,12 @@ async function load(config) {

 async function predict(image, config) {
  if (!models.age) return null;
-  if ((frame < config.face.age.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
-    frame += 1;
+  if ((skipped < config.face.age.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
+    skipped++;
    return last;
  }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
    /*
    const zoom = [0, 0]; // 0..1 meaning 0%..100%
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -5,7 +5,7 @@ import * as profile from '../profile.js';
 const annotations = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
 const models = {};
 let last = [];
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;

 // tuning values
 const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale
@ -21,11 +21,12 @@ async function load(config) {

 async function predict(image, config) {
  if (!models.emotion) return null;
-  if ((frame < config.face.emotion.skipFrames) && config.videoOptimized && (last.length > 0)) {
-    frame += 1;
+  if ((skipped < config.face.emotion.skipFrames) && config.videoOptimized && (last.length > 0)) {
+    skipped++;
    return last;
  }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
    /*
    const zoom = [0, 0]; // 0..1 meaning 0%..100%
--- a/src/gender/gender.js
+++ b/src/gender/gender.js
@ -4,7 +4,7 @@ import * as profile from '../profile.js';

 const models = {};
 let last = { gender: '' };
-let frame = Number.MAX_SAFE_INTEGER;
+let skipped = Number.MAX_SAFE_INTEGER;
 let alternative = false;

 // tuning values
@ -21,22 +21,13 @@ async function load(config) {

 async function predict(image, config) {
  if (!models.gender) return null;
-  if ((frame < config.face.gender.skipFrames) && config.videoOptimized && last.gender !== '') {
-    frame += 1;
+  if ((skipped < config.face.gender.skipFrames) && config.videoOptimized && last.gender !== '') {
+    skipped++;
    return last;
  }
-  frame = 0;
+  if (config.videoOptimized) skipped = 0;
+  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]);
-    */
    const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
    let enhance;
    if (alternative) {
@ -51,7 +42,6 @@ async function predict(image, config) {
    } else {
      enhance = tf.mul(resize, [255.0]);
    }
-    // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
    tf.dispose(resize);

    let genderT;
--- a/src/hand/handdetector.js
+++ b/src/hand/handdetector.js
@ -81,8 +81,8 @@ class HandDetector {
    const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
    const predictions = await this.getBoxes(image, config);
    image.dispose();
-    if (!predictions || predictions.length === 0) return null;
    const hands = [];
+    if (!predictions || predictions.length === 0) return hands;
    for (const prediction of predictions) {
      const boxes = prediction.box.dataSync();
      const startPoint = boxes.slice(0, 2);
--- a/src/hand/handpipeline.js
+++ b/src/hand/handpipeline.js
@ -35,7 +35,7 @@ class HandPipeline {
    this.landmarkDetector = landmarkDetector;
    this.inputSize = inputSize;
    this.storedBoxes = [];
-    this.skipped = 1000;
+    this.skipped = 0;
    this.detectedHands = 0;
  }

@ -84,16 +84,15 @@ class HandPipeline {
  }

  async estimateHands(image, config) {
-    this.skipped++;
    let useFreshBox = false;

    // run new detector every skipFrames unless we only want box to start with
    let boxes;
-    if ((this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.videoOptimized) {
+    if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.videoOptimized) {
      boxes = await this.handDetector.estimateHandBounds(image, config);
-      // don't reset on test image
-      if ((image.shape[1] !== 255) && (image.shape[2] !== 255)) this.skipped = 0;
+      this.skipped = 0;
    }
+    if (config.videoOptimized) this.skipped++;

    // if detector result count doesn't match current working set, use it to reset current working set
    if (boxes && (boxes.length > 0) && ((boxes.length !== this.detectedHands) && (this.detectedHands !== config.hand.maxHands) || !config.hand.landmarks)) {
@ -103,7 +102,7 @@ class HandPipeline {
      if (this.storedBoxes.length > 0) useFreshBox = true;
    }
    const hands = [];
-    // log(`skipped: ${this.skipped} max: ${config.hand.maxHands} detected: ${this.detectedHands} stored: ${this.storedBoxes.length} new: ${boxes?.length}`);
+    // log('hand', `skipped: ${this.skipped} max: ${config.hand.maxHands} detected: ${this.detectedHands} stored: ${this.storedBoxes.length} new: ${boxes?.length}`);

    // go through working set of boxes
    for (let i = 0; i < this.storedBoxes.length; i++) {
--- a/src/human.js
+++ b/src/human.js
@ -418,26 +418,42 @@ class Human {

  async warmup(userConfig) {
    if (userConfig) this.config = mergeDeep(this.config, userConfig);
-    const width = 256;
-    const height = 256;
-    const video = this.config.videoOptimized;
-    this.config.videoOptimized = false;
    return new Promise((resolve) => {
-      const img = new Image(width, height);
+      const video = this.config.videoOptimized;
+      this.config.videoOptimized = false;
+      let src;
+      let size;
+      switch (this.config.warmup) {
+        case 'face':
+          size = 256;
+          src = sample.face;
+          break;
+        case 'full':
+          size = 1200;
+          src = sample.body;
+          break;
+        default:
+          size = 0;
+          src = null;
+      }
+      const img = new Image(size, size);
      img.onload = () => {
-        const canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(width, height) : document.createElement('canvas');
-        canvas.width = width;
-        canvas.height = height;
+        const canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(size, size) : document.createElement('canvas');
+        canvas.width = size;
+        canvas.height = size;
        const ctx = canvas.getContext('2d');
        ctx.drawImage(img, 0, 0);
-        const data = ctx.getImageData(0, 0, width, height);
+        const data = ctx.getImageData(0, 0, size, size);
+        const t0 = now();
        this.detect(data, config).then((warmup) => {
-          log('Warmup', warmup);
+          const t1 = now();
+          log('Warmup', this.config.warmup, (t1 - t0), warmup);
          this.config.videoOptimized = video;
          resolve(warmup);
        });
      };
-      img.src = sample.face;
+      if (src) img.src = src;
+      else resolve(null);
    });
  }
 }
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 785bde4caa1a29d8bfe82a4ae987ffde1d9a0a73
+Subproject commit c4c8b30f6bf211ee267cf1884aaff9725f594631