From d5b620dbe8e845c3f3856deb65f915091801ded7 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Thu, 11 Mar 2021 10:26:14 -0500
Subject: [PATCH] autodetect inputSizes

---
 config.js                    | 12 +-----------
 demo/browser.js              | 17 ++++++-----------
 demo/node.js                 |  6 +++---
 package.json                 |  2 +-
 src/age/age.ts               | 12 +-----------
 src/emotion/emotion.ts       | 12 +-----------
 src/gender/gender.ts         |  2 +-
 src/handpose/handdetector.ts |  6 ++++--
 src/handpose/handpose.ts     |  4 ++--
 src/human.ts                 | 14 +++++++-------
 10 files changed, 27 insertions(+), 60 deletions(-)

diff --git a/config.js b/config.js
index 855b33bf..f3b6c57a 100644
--- a/config.js
+++ b/config.js
@@ -67,7 +67,6 @@ export default {
                              // (note: module is not loaded until it is required)
     detector: {
       modelPath: '../models/blazeface-back.json',
-      inputSize: 256,        // fixed value
       rotation: true,        // use best-guess rotated face image or just box with rotation as-is
                              // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
                              // this parameter is not valid in nodejs
@@ -91,19 +90,16 @@ export default {
     mesh: {
       enabled: true,
       modelPath: '../models/facemesh.json',
-      inputSize: 192,        // fixed value
     },
 
     iris: {
       enabled: true,
       modelPath: '../models/iris.json',
-      inputSize: 64,         // fixed value
     },
 
     age: {
       enabled: true,
-      modelPath: '../models/age-ssrnet-imdb.json',
-      inputSize: 64,         // fixed value
+      modelPath: '../models/age.json',
       skipFrames: 31,        // how many frames to go without re-running the detector
                              // only used for video inputs
     },
@@ -112,14 +108,12 @@ export default {
       enabled: true,
       minConfidence: 0.1,    // threshold for discarding a prediction
       modelPath: '../models/gender.json', // can be 'gender' or 'gender-ssrnet-imdb'
-      inputSize: 64,         // fixed value
       skipFrames: 32,        // how many frames to go without re-running the detector
                              // only used for video inputs
     },
 
     emotion: {
       enabled: true,
-      inputSize: 64,         // fixed value
       minConfidence: 0.1,    // threshold for discarding a prediction
       skipFrames: 33,        // how many frames to go without re-running the detector
       modelPath: '../models/emotion.json',
@@ -127,7 +121,6 @@ export default {
 
     embedding: {
       enabled: false,
-      inputSize: 112,        // fixed value
       modelPath: '../models/mobilefacenet.json',
     },
   },
@@ -135,7 +128,6 @@ export default {
   body: {
     enabled: true,
     modelPath: '../models/posenet.json', // can be 'posenet' or 'blazepose'
-    inputSize: 257,          // fixed value, 257 for posenet and 256 for blazepose
     maxDetections: 10,       // maximum number of people detected in the input
                              // should be set to the minimum number for performance
                              // only valid for posenet as blazepose only detects single pose
@@ -144,14 +136,12 @@ export default {
                              // only valid for posenet as blazepose only detects single pose
     nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
                              // only valid for posenet as blazepose only detects single pose
-    modelType: 'posenet-mobilenet',  // can be 'posenet-mobilenet', 'posenet-resnet', 'blazepose'
   },
 
   hand: {
     enabled: true,
     rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                              // false means higher performance, but incorrect finger mapping if hand is inverted
-    inputSize: 256,          // fixed value
     skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
                              // only used for video inputs
                              // e.g., if model is running st 25 FPS, we can re-use existing bounding
diff --git a/demo/browser.js b/demo/browser.js
index 045372ed..e4294947 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -3,20 +3,18 @@ import Human from '../src/human';
 import Menu from './menu.js';
 import GLBench from './gl-bench.js';
 
-const userConfig = { backend: 'webgl' }; // add any user configuration overrides
+// const userConfig = { backend: 'webgl' }; // add any user configuration overrides
 
-/*
 const userConfig = {
-  backend: 'wasm',
+  backend: 'webgl',
   async: false,
-  warmup: 'none',
+  warmup: 'face',
   videoOptimized: false,
-  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
+  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
   hand: { enabled: false },
   gesture: { enabled: false },
-  body: { enabled: false, modelType: 'blazepose', modelPath: '../models/blazepose.json' },
+  body: { enabled: false, modelPath: '../models/blazepose.json' },
 };
-*/
 
 const human = new Human(userConfig);
 
@@ -40,7 +38,7 @@ const ui = {
   detectFPS: [], // internal, holds fps values for detection performance
   drawFPS: [], // internal, holds fps values for draw performance
   buffered: false, // experimental, should output be buffered between frames
-  drawWarmup: false, // debug only, should warmup image processing be displayed on startup
+  drawWarmup: true, // debug only, should warmup image processing be displayed on startup
   drawThread: null, // internl, perform draw operations in a separate thread
   detectThread: null, // internl, perform detect operations in a separate thread
   framesDraw: 0, // internal, statistics on frames drawn
@@ -104,9 +102,6 @@ async function drawResults(input) {
   if (ui.drawFPS.length > ui.maxFPSframes) ui.drawFPS.shift();
   lastDraw = performance.now();
 
-  // enable for continous performance monitoring
-  // console.log(result.performance);
-
   // draw fps chart
   await menu.process.updateChart('FPS', ui.detectFPS);
 
diff --git a/demo/node.js b/demo/node.js
index 7d0a83ce..afe00a53 100644
--- a/demo/node.js
+++ b/demo/node.js
@@ -18,12 +18,12 @@ const myConfig = {
     detector: { modelPath: 'file://models/blazeface-back.json', enabled: true },
     mesh: { modelPath: 'file://models/facemesh.json', enabled: true },
     iris: { modelPath: 'file://models/iris.json', enabled: true },
-    age: { modelPath: 'file://models/age-ssrnet-imdb.json', enabled: true },
+    age: { modelPath: 'file://models/age.json', enabled: true },
     gender: { modelPath: 'file://models/gender.json', enabled: true },
     emotion: { modelPath: 'file://models/emotion.json', enabled: true },
   },
-  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', inputSize: 256, enabled: true },
-  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', inputSize: 257, enabled: true },
+  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', enabled: true },
+  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', enabled: true },
   hand: {
     enabled: true,
     detector: { modelPath: 'file://models/handdetect.json' },
diff --git a/package.json b/package.json
index baa9a624..d7059415 100644
--- a/package.json
+++ b/package.json
@@ -68,7 +68,7 @@
     "eslint-plugin-node": "^11.1.0",
     "eslint-plugin-promise": "^4.3.1",
     "rimraf": "^3.0.2",
-    "simple-git": "^2.36.1",
+    "simple-git": "^2.36.2",
     "tslib": "^2.1.0",
     "typescript": "^4.2.3"
   }
diff --git a/src/age/age.ts b/src/age/age.ts
index b869e197..404b4fc4 100644
--- a/src/age/age.ts
+++ b/src/age/age.ts
@@ -23,17 +23,7 @@ export async function predict(image, config) {
   if (config.videoOptimized) skipped = 0;
   else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     const enhance = tf.mul(resize, [255.0]);
     tf.dispose(resize);
 
diff --git a/src/emotion/emotion.ts b/src/emotion/emotion.ts
index ea1a3b62..5c0744da 100644
--- a/src/emotion/emotion.ts
+++ b/src/emotion/emotion.ts
@@ -27,17 +27,7 @@ export async function predict(image, config) {
   if (config.videoOptimized) skipped = 0;
   else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     const [red, green, blue] = tf.split(resize, 3, 3);
     resize.dispose();
     // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
diff --git a/src/gender/gender.ts b/src/gender/gender.ts
index 88a73986..72733477 100644
--- a/src/gender/gender.ts
+++ b/src/gender/gender.ts
@@ -28,7 +28,7 @@ export async function predict(image, config) {
   if (config.videoOptimized) skipped = 0;
   else skipped = Number.MAX_SAFE_INTEGER;
   return new Promise(async (resolve) => {
-    const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     let enhance;
     if (alternative) {
       enhance = tf.tidy(() => {
diff --git a/src/handpose/handdetector.ts b/src/handpose/handdetector.ts
index 27d50a63..2d861b2d 100644
--- a/src/handpose/handdetector.ts
+++ b/src/handpose/handdetector.ts
@@ -5,6 +5,7 @@ export class HandDetector {
   model: any;
   anchors: any;
   anchorsTensor: any;
+  inputSize: number;
   inputSizeTensor: any;
   doubleInputSizeTensor: any;
 
@@ -12,6 +13,7 @@ export class HandDetector {
     this.model = model;
     this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
     this.anchorsTensor = tf.tensor2d(this.anchors);
+    this.inputSize = inputSize;
     this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
     this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]);
   }
@@ -67,7 +69,7 @@ export class HandDetector {
   async estimateHandBounds(input, config) {
     const inputHeight = input.shape[1];
     const inputWidth = input.shape[2];
-    const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
+    const image = tf.tidy(() => input.resizeBilinear([this.inputSize, this.inputSize]).div(127.5).sub(1));
     const predictions = await this.getBoxes(image, config);
     image.dispose();
     const hands: Array<{}> = [];
@@ -79,7 +81,7 @@ export class HandDetector {
       const palmLandmarks = prediction.palmLandmarks.arraySync();
       prediction.box.dispose();
       prediction.palmLandmarks.dispose();
-      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.hand.inputSize, inputHeight / config.hand.inputSize]));
+      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
     }
     return hands;
   }
diff --git a/src/handpose/handpose.ts b/src/handpose/handpose.ts
index 4595798d..80b6f758 100644
--- a/src/handpose/handpose.ts
+++ b/src/handpose/handpose.ts
@@ -54,8 +54,8 @@ export async function load(config) {
     config.hand.enabled ? tf.loadGraphModel(config.hand.detector.modelPath, { fromTFHub: config.hand.detector.modelPath.includes('tfhub.dev') }) : null,
     config.hand.landmarks ? tf.loadGraphModel(config.hand.skeleton.modelPath, { fromTFHub: config.hand.skeleton.modelPath.includes('tfhub.dev') }) : null,
   ]);
-  const handDetector = new handdetector.HandDetector(handDetectorModel, config.hand.inputSize, anchors.anchors);
-  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, config.hand.inputSize);
+  const handDetector = new handdetector.HandDetector(handDetectorModel, handDetectorModel?.inputs[0].shape[2], anchors.anchors);
+  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, handPoseModel?.inputs[0].shape[2]);
   const handPose = new HandPose(handPipeline);
   if (config.hand.enabled && config.debug) log(`load model: ${config.hand.detector.modelPath.match(/\/(.*)\./)[1]}`);
   if (config.hand.landmarks && config.debug) log(`load model: ${config.hand.skeleton.modelPath.match(/\/(.*)\./)[1]}`);
diff --git a/src/human.ts b/src/human.ts
index eecad033..806cb217 100644
--- a/src/human.ts
+++ b/src/human.ts
@@ -109,7 +109,7 @@ class Human {
       age,
       gender,
       emotion,
-      body: this.config.body.modelType.startsWith('posenet') ? posenet : blazepose,
+      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
       hand: handpose,
     };
     // include platform info
@@ -186,8 +186,8 @@ class Human {
         this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
         this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null),
         this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('posenet') ? posenet.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('blazepose') ? blazepose.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null),
       ]);
     } else {
       if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@@ -196,8 +196,8 @@ class Human {
       if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
       if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config);
       if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config);
-      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelType.startsWith('posenet')) this.models.posenet = await posenet.load(this.config);
-      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelType.startsWith('blazepose')) this.models.blazepose = await blazepose.load(this.config);
+      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config);
+      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config);
     }
 
     if (this.#firstRun) {
@@ -477,13 +477,13 @@ class Human {
       // run body: can be posenet or blazepose
       this.#analyze('Start Body:');
       if (this.config.async) {
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
         else bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : [];
         if (this.#perf.body) delete this.#perf.body;
       } else {
         this.state = 'run:body';
         timeStamp = now();
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
         else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : [];
         this.#perf.body = Math.trunc(now() - timeStamp);
       }