autodetect inputSizes

2021-03-11 10:26:14 -05:00 · 2021-03-11 10:26:14 -05:00 · 1b53cd4b6b
parent d5b6c676c9
commit 1b53cd4b6b
10 changed files with 27 additions and 60 deletions
--- a/config.js
+++ b/config.js
@ -67,7 +67,6 @@ export default {
                             // (note: module is not loaded until it is required)
    detector: {
      modelPath: '../models/blazeface-back.json',
-      inputSize: 256,        // fixed value
      rotation: true,        // use best-guess rotated face image or just box with rotation as-is
                             // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
                             // this parameter is not valid in nodejs
@ -91,19 +90,16 @@ export default {
    mesh: {
      enabled: true,
      modelPath: '../models/facemesh.json',
-      inputSize: 192,        // fixed value
    },

    iris: {
      enabled: true,
      modelPath: '../models/iris.json',
-      inputSize: 64,         // fixed value
    },

    age: {
      enabled: true,
-      modelPath: '../models/age-ssrnet-imdb.json',
-      inputSize: 64,         // fixed value
+      modelPath: '../models/age.json',
      skipFrames: 31,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },
@ -112,14 +108,12 @@ export default {
      enabled: true,
      minConfidence: 0.1,    // threshold for discarding a prediction
      modelPath: '../models/gender.json', // can be 'gender' or 'gender-ssrnet-imdb'
-      inputSize: 64,         // fixed value
      skipFrames: 32,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },

    emotion: {
      enabled: true,
-      inputSize: 64,         // fixed value
      minConfidence: 0.1,    // threshold for discarding a prediction
      skipFrames: 33,        // how many frames to go without re-running the detector
      modelPath: '../models/emotion.json',
@ -127,7 +121,6 @@ export default {

    embedding: {
      enabled: false,
-      inputSize: 112,        // fixed value
      modelPath: '../models/mobilefacenet.json',
    },
  },
@ -135,7 +128,6 @@ export default {
  body: {
    enabled: true,
    modelPath: '../models/posenet.json', // can be 'posenet' or 'blazepose'
-    inputSize: 257,          // fixed value, 257 for posenet and 256 for blazepose
    maxDetections: 10,       // maximum number of people detected in the input
                             // should be set to the minimum number for performance
                             // only valid for posenet as blazepose only detects single pose
@ -144,14 +136,12 @@ export default {
                             // only valid for posenet as blazepose only detects single pose
    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
                             // only valid for posenet as blazepose only detects single pose
-    modelType: 'posenet-mobilenet',  // can be 'posenet-mobilenet', 'posenet-resnet', 'blazepose'
  },

  hand: {
    enabled: true,
    rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                             // false means higher performance, but incorrect finger mapping if hand is inverted
-    inputSize: 256,          // fixed value
    skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
                             // only used for video inputs
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
--- a/demo/browser.js
+++ b/demo/browser.js
@ -3,20 +3,18 @@ import Human from '../src/human';
 import Menu from './menu.js';
 import GLBench from './gl-bench.js';

-const userConfig = { backend: 'webgl' }; // add any user configuration overrides
+// const userConfig = { backend: 'webgl' }; // add any user configuration overrides

-/*
 const userConfig = {
-  backend: 'wasm',
+  backend: 'webgl',
  async: false,
-  warmup: 'none',
+  warmup: 'face',
  videoOptimized: false,
-  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
+  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
  hand: { enabled: false },
  gesture: { enabled: false },
-  body: { enabled: false, modelType: 'blazepose', modelPath: '../models/blazepose.json' },
+  body: { enabled: false, modelPath: '../models/blazepose.json' },
 };
-*/

 const human = new Human(userConfig);

@ -40,7 +38,7 @@ const ui = {
  detectFPS: [], // internal, holds fps values for detection performance
  drawFPS: [], // internal, holds fps values for draw performance
  buffered: false, // experimental, should output be buffered between frames
-  drawWarmup: false, // debug only, should warmup image processing be displayed on startup
+  drawWarmup: true, // debug only, should warmup image processing be displayed on startup
  drawThread: null, // internl, perform draw operations in a separate thread
  detectThread: null, // internl, perform detect operations in a separate thread
  framesDraw: 0, // internal, statistics on frames drawn
@ -104,9 +102,6 @@ async function drawResults(input) {
  if (ui.drawFPS.length > ui.maxFPSframes) ui.drawFPS.shift();
  lastDraw = performance.now();

-  // enable for continous performance monitoring
-  // console.log(result.performance);
-
  // draw fps chart
  await menu.process.updateChart('FPS', ui.detectFPS);

--- a/demo/node.js
+++ b/demo/node.js
@ -18,12 +18,12 @@ const myConfig = {
    detector: { modelPath: 'file://models/blazeface-back.json', enabled: true },
    mesh: { modelPath: 'file://models/facemesh.json', enabled: true },
    iris: { modelPath: 'file://models/iris.json', enabled: true },
-    age: { modelPath: 'file://models/age-ssrnet-imdb.json', enabled: true },
+    age: { modelPath: 'file://models/age.json', enabled: true },
    gender: { modelPath: 'file://models/gender.json', enabled: true },
    emotion: { modelPath: 'file://models/emotion.json', enabled: true },
  },
-  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', inputSize: 256, enabled: true },
-  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', inputSize: 257, enabled: true },
+  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', enabled: true },
+  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', enabled: true },
  hand: {
    enabled: true,
    detector: { modelPath: 'file://models/handdetect.json' },
--- a/package.json
+++ b/package.json
@ -68,7 +68,7 @@
    "eslint-plugin-node": "^11.1.0",
    "eslint-plugin-promise": "^4.3.1",
    "rimraf": "^3.0.2",
-    "simple-git": "^2.36.1",
+    "simple-git": "^2.36.2",
    "tslib": "^2.1.0",
    "typescript": "^4.2.3"
  }
--- a/src/age/age.ts
+++ b/src/age/age.ts
@ -23,17 +23,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    const enhance = tf.mul(resize, [255.0]);
    tf.dispose(resize);

--- a/src/emotion/emotion.ts
+++ b/src/emotion/emotion.ts
@ -27,17 +27,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    const [red, green, blue] = tf.split(resize, 3, 3);
    resize.dispose();
    // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
--- a/src/gender/gender.ts
+++ b/src/gender/gender.ts
@ -28,7 +28,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    let enhance;
    if (alternative) {
      enhance = tf.tidy(() => {
--- a/src/handpose/handdetector.ts
+++ b/src/handpose/handdetector.ts
@ -5,6 +5,7 @@ export class HandDetector {
  model: any;
  anchors: any;
  anchorsTensor: any;
+  inputSize: number;
  inputSizeTensor: any;
  doubleInputSizeTensor: any;

@ -12,6 +13,7 @@ export class HandDetector {
    this.model = model;
    this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
    this.anchorsTensor = tf.tensor2d(this.anchors);
+    this.inputSize = inputSize;
    this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
    this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]);
  }
@ -67,7 +69,7 @@ export class HandDetector {
  async estimateHandBounds(input, config) {
    const inputHeight = input.shape[1];
    const inputWidth = input.shape[2];
-    const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
+    const image = tf.tidy(() => input.resizeBilinear([this.inputSize, this.inputSize]).div(127.5).sub(1));
    const predictions = await this.getBoxes(image, config);
    image.dispose();
    const hands: Array<{}> = [];
@ -79,7 +81,7 @@ export class HandDetector {
      const palmLandmarks = prediction.palmLandmarks.arraySync();
      prediction.box.dispose();
      prediction.palmLandmarks.dispose();
-      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.hand.inputSize, inputHeight / config.hand.inputSize]));
+      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
    }
    return hands;
  }
--- a/src/handpose/handpose.ts
+++ b/src/handpose/handpose.ts
@ -54,8 +54,8 @@ export async function load(config) {
    config.hand.enabled ? tf.loadGraphModel(config.hand.detector.modelPath, { fromTFHub: config.hand.detector.modelPath.includes('tfhub.dev') }) : null,
    config.hand.landmarks ? tf.loadGraphModel(config.hand.skeleton.modelPath, { fromTFHub: config.hand.skeleton.modelPath.includes('tfhub.dev') }) : null,
  ]);
-  const handDetector = new handdetector.HandDetector(handDetectorModel, config.hand.inputSize, anchors.anchors);
-  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, config.hand.inputSize);
+  const handDetector = new handdetector.HandDetector(handDetectorModel, handDetectorModel?.inputs[0].shape[2], anchors.anchors);
+  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, handPoseModel?.inputs[0].shape[2]);
  const handPose = new HandPose(handPipeline);
  if (config.hand.enabled && config.debug) log(`load model: ${config.hand.detector.modelPath.match(/\/(.*)\./)[1]}`);
  if (config.hand.landmarks && config.debug) log(`load model: ${config.hand.skeleton.modelPath.match(/\/(.*)\./)[1]}`);
--- a/src/human.ts
+++ b/src/human.ts
@ -109,7 +109,7 @@ class Human {
      age,
      gender,
      emotion,
-      body: this.config.body.modelType.startsWith('posenet') ? posenet : blazepose,
+      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
      hand: handpose,
    };
    // include platform info
@ -186,8 +186,8 @@ class Human {
        this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
        this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null),
        this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('posenet') ? posenet.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('blazepose') ? blazepose.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null),
      ]);
    } else {
      if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@ -196,8 +196,8 @@ class Human {
      if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
      if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config);
      if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config);
-      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelType.startsWith('posenet')) this.models.posenet = await posenet.load(this.config);
-      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelType.startsWith('blazepose')) this.models.blazepose = await blazepose.load(this.config);
+      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config);
+      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config);
    }

    if (this.#firstRun) {
@ -477,13 +477,13 @@ class Human {
      // run body: can be posenet or blazepose
      this.#analyze('Start Body:');
      if (this.config.async) {
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
        else bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : [];
        if (this.#perf.body) delete this.#perf.body;
      } else {
        this.state = 'run:body';
        timeStamp = now();
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
        else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : [];
        this.#perf.body = Math.trunc(now() - timeStamp);
      }