autodetect inputSizes

2021-03-11 10:26:14 -05:00 · 2021-03-11 10:26:14 -05:00 · 3ceb3df73e
parent e2cf948425
commit 3ceb3df73e
46 changed files with 443286 additions and 7050 deletions
--- a/config.js
+++ b/config.js
@ -67,7 +67,6 @@ export default {
                             // (note: module is not loaded until it is required)
    detector: {
      modelPath: '../models/blazeface-back.json',
-      inputSize: 256,        // fixed value
      rotation: true,        // use best-guess rotated face image or just box with rotation as-is
                             // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees
                             // this parameter is not valid in nodejs
@ -91,19 +90,16 @@ export default {
    mesh: {
      enabled: true,
      modelPath: '../models/facemesh.json',
-      inputSize: 192,        // fixed value
    },

    iris: {
      enabled: true,
      modelPath: '../models/iris.json',
-      inputSize: 64,         // fixed value
    },

    age: {
      enabled: true,
-      modelPath: '../models/age-ssrnet-imdb.json',
-      inputSize: 64,         // fixed value
+      modelPath: '../models/age.json',
      skipFrames: 31,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },
@ -112,14 +108,12 @@ export default {
      enabled: true,
      minConfidence: 0.1,    // threshold for discarding a prediction
      modelPath: '../models/gender.json', // can be 'gender' or 'gender-ssrnet-imdb'
-      inputSize: 64,         // fixed value
      skipFrames: 32,        // how many frames to go without re-running the detector
                             // only used for video inputs
    },

    emotion: {
      enabled: true,
-      inputSize: 64,         // fixed value
      minConfidence: 0.1,    // threshold for discarding a prediction
      skipFrames: 33,        // how many frames to go without re-running the detector
      modelPath: '../models/emotion.json',
@ -127,7 +121,6 @@ export default {

    embedding: {
      enabled: false,
-      inputSize: 112,        // fixed value
      modelPath: '../models/mobilefacenet.json',
    },
  },
@ -135,7 +128,6 @@ export default {
  body: {
    enabled: true,
    modelPath: '../models/posenet.json', // can be 'posenet' or 'blazepose'
-    inputSize: 257,          // fixed value, 257 for posenet and 256 for blazepose
    maxDetections: 10,       // maximum number of people detected in the input
                             // should be set to the minimum number for performance
                             // only valid for posenet as blazepose only detects single pose
@ -144,14 +136,12 @@ export default {
                             // only valid for posenet as blazepose only detects single pose
    nmsRadius: 20,           // radius for deciding points are too close in non-maximum suppression
                             // only valid for posenet as blazepose only detects single pose
-    modelType: 'posenet-mobilenet',  // can be 'posenet-mobilenet', 'posenet-resnet', 'blazepose'
  },

  hand: {
    enabled: true,
    rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                             // false means higher performance, but incorrect finger mapping if hand is inverted
-    inputSize: 256,          // fixed value
    skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
                             // only used for video inputs
                             // e.g., if model is running st 25 FPS, we can re-use existing bounding
--- a/demo/browser.js
+++ b/demo/browser.js
@ -3,20 +3,18 @@ import Human from '../src/human';
 import Menu from './menu.js';
 import GLBench from './gl-bench.js';

-const userConfig = { backend: 'webgl' }; // add any user configuration overrides
+// const userConfig = { backend: 'webgl' }; // add any user configuration overrides

-/*
 const userConfig = {
-  backend: 'wasm',
+  backend: 'webgl',
  async: false,
-  warmup: 'none',
+  warmup: 'face',
  videoOptimized: false,
-  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } },
+  face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } },
  hand: { enabled: false },
  gesture: { enabled: false },
-  body: { enabled: false, modelType: 'blazepose', modelPath: '../models/blazepose.json' },
+  body: { enabled: false, modelPath: '../models/blazepose.json' },
 };
-*/

 const human = new Human(userConfig);

@ -40,7 +38,7 @@ const ui = {
  detectFPS: [], // internal, holds fps values for detection performance
  drawFPS: [], // internal, holds fps values for draw performance
  buffered: false, // experimental, should output be buffered between frames
-  drawWarmup: false, // debug only, should warmup image processing be displayed on startup
+  drawWarmup: true, // debug only, should warmup image processing be displayed on startup
  drawThread: null, // internl, perform draw operations in a separate thread
  detectThread: null, // internl, perform detect operations in a separate thread
  framesDraw: 0, // internal, statistics on frames drawn
@ -104,9 +102,6 @@ async function drawResults(input) {
  if (ui.drawFPS.length > ui.maxFPSframes) ui.drawFPS.shift();
  lastDraw = performance.now();

-  // enable for continous performance monitoring
-  // console.log(result.performance);
-
  // draw fps chart
  await menu.process.updateChart('FPS', ui.detectFPS);

--- a/demo/node.js
+++ b/demo/node.js
@ -18,12 +18,12 @@ const myConfig = {
    detector: { modelPath: 'file://models/blazeface-back.json', enabled: true },
    mesh: { modelPath: 'file://models/facemesh.json', enabled: true },
    iris: { modelPath: 'file://models/iris.json', enabled: true },
-    age: { modelPath: 'file://models/age-ssrnet-imdb.json', enabled: true },
+    age: { modelPath: 'file://models/age.json', enabled: true },
    gender: { modelPath: 'file://models/gender.json', enabled: true },
    emotion: { modelPath: 'file://models/emotion.json', enabled: true },
  },
-  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', inputSize: 256, enabled: true },
-  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', inputSize: 257, enabled: true },
+  // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', enabled: true },
+  body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', enabled: true },
  hand: {
    enabled: true,
    detector: { modelPath: 'file://models/handdetect.json' },
--- a/dist/demo-browser-index.js
+++ b/dist/demo-browser-index.js
--- a/dist/demo-browser-index.js.map
+++ b/dist/demo-browser-index.js.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
--- a/dist/human.node-gpu.js.map
+++ b/dist/human.node-gpu.js.map
--- a/dist/human.node.js
+++ b/dist/human.node.js
--- a/dist/human.node.js.map
+++ b/dist/human.node.js.map
--- a/dist/tfjs.esm.js
+++ b/dist/tfjs.esm.js
--- a/dist/tfjs.esm.js.map
+++ b/dist/tfjs.esm.js.map
--- a/models/age-ssrnet-imdb.bin
+++ b/models/age-ssrnet-imdb.bin
--- a/models/age-ssrnet-imdb.json
+++ b/models/age-ssrnet-imdb.json
--- a/models/gender-ssrnet-imdb.bin
+++ b/models/gender-ssrnet-imdb.bin
--- a/models/gender-ssrnet-imdb.json
+++ b/models/gender-ssrnet-imdb.json
--- a/models/posenet.json
+++ b/models/posenet.json
@ -1,12 +1,12 @@
 {
  "format": "graph-model",
  "generatedBy": "2.0.0-dev20190603",
-  "convertedBy": "TensorFlow.js Converter v1.1.2",
+  "convertedBy": "https://github.com/vladmandic",
  "modelTopology":
  {
      "node":
      [
-          {"name":"sub_2","op":"Placeholder","attr":{"dtype":{"type":"DT_FLOAT"},"shape":{"shape":{"dim":[{"size":"1"},{"size":"-1"},{"size":"-1"},{"size":"3"}]}}}},
+          {"name":"sub_2","op":"Placeholder","attr":{"dtype":{"type":"DT_FLOAT"},"shape":{"shape":{"dim":[{"size":"1"},{"size":"257"},{"size":"257"},{"size":"3"}]}}}},
          {"name":"MobilenetV1/offset_2/Conv2D_bias","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"34"}]}}},"dtype":{"type":"DT_FLOAT"}}},
          {"name":"MobilenetV1/offset_2/weights","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1"},{"size":"1"},{"size":"1024"},{"size":"34"}]}}},"dtype":{"type":"DT_FLOAT"}}},
          {"name":"MobilenetV1/Conv2d_13_pointwise/Conv2D_bias","op":"Const","attr":{"value":{"tensor":{"dtype":"DT_FLOAT","tensorShape":{"dim":[{"size":"1024"}]}}},"dtype":{"type":"DT_FLOAT"}}},
--- a/package-lock.json
+++ b/package-lock.json
@ -33,7 +33,7 @@
        "eslint-plugin-node": "^11.1.0",
        "eslint-plugin-promise": "^4.3.1",
        "rimraf": "^3.0.2",
-        "simple-git": "^2.36.1",
+        "simple-git": "^2.36.2",
        "tslib": "^2.1.0",
        "typescript": "^4.2.3"
      },
@ -3298,9 +3298,9 @@
      "dev": true
    },
    "node_modules/simple-git": {
-      "version": "2.36.1",
-      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.1.tgz",
-      "integrity": "sha512-bN18Ea/4IJgqgbZyE9VpVEUkAu9vyP0VWP7acP0CRC1p/N80GGJ0HhIVeFJsm8TdJLBowiJpdLesQuAZ5TFSKw==",
+      "version": "2.36.2",
+      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.2.tgz",
+      "integrity": "sha512-orBEf65GfSiQMsYedbJXSiRNnIRvhbeE5rrxZuEimCpWxDZOav0KLy2IEiPi1YJCF+zaC2quiJF8A4TsxI9/tw==",
      "dev": true,
      "dependencies": {
        "@kwsites/file-exists": "^1.1.1",
@ -3880,9 +3880,9 @@
      }
    },
    "node_modules/yargs-parser": {
-      "version": "20.2.6",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.6.tgz",
-      "integrity": "sha512-AP1+fQIWSM/sMiET8fyayjx/J+JmTPt2Mr0FkrgqB4todtfa53sOsrSAcIrJRD5XS20bKUwaDIuMkWKCEiQLKA==",
+      "version": "20.2.7",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz",
+      "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==",
      "dev": true,
      "engines": {
        "node": ">=10"
@ -6422,9 +6422,9 @@
      "dev": true
    },
    "simple-git": {
-      "version": "2.36.1",
-      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.1.tgz",
-      "integrity": "sha512-bN18Ea/4IJgqgbZyE9VpVEUkAu9vyP0VWP7acP0CRC1p/N80GGJ0HhIVeFJsm8TdJLBowiJpdLesQuAZ5TFSKw==",
+      "version": "2.36.2",
+      "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.36.2.tgz",
+      "integrity": "sha512-orBEf65GfSiQMsYedbJXSiRNnIRvhbeE5rrxZuEimCpWxDZOav0KLy2IEiPi1YJCF+zaC2quiJF8A4TsxI9/tw==",
      "dev": true,
      "requires": {
        "@kwsites/file-exists": "^1.1.1",
@ -6925,9 +6925,9 @@
      }
    },
    "yargs-parser": {
-      "version": "20.2.6",
-      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.6.tgz",
-      "integrity": "sha512-AP1+fQIWSM/sMiET8fyayjx/J+JmTPt2Mr0FkrgqB4todtfa53sOsrSAcIrJRD5XS20bKUwaDIuMkWKCEiQLKA==",
+      "version": "20.2.7",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.7.tgz",
+      "integrity": "sha512-FiNkvbeHzB/syOjIUxFDCnhSfzAL8R5vs40MgLFBorXACCOAEaWu0gRZl14vG8MR9AOJIZbmkjhusqBYZ3HTHw==",
      "dev": true
    }
  }
--- a/package.json
+++ b/package.json
@ -68,7 +68,7 @@
    "eslint-plugin-node": "^11.1.0",
    "eslint-plugin-promise": "^4.3.1",
    "rimraf": "^3.0.2",
-    "simple-git": "^2.36.1",
+    "simple-git": "^2.36.2",
    "tslib": "^2.1.0",
    "typescript": "^4.2.3"
  }
--- a/src/age/age.ts
+++ b/src/age/age.ts
@ -23,17 +23,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    const enhance = tf.mul(resize, [255.0]);
    tf.dispose(resize);

--- a/src/blazeface/blazeface.ts
+++ b/src/blazeface/blazeface.ts
@ -57,15 +57,15 @@ export class BlazeFaceModel {
  height: number;
  anchorsData: any;
  anchors: any;
-  inputSize: number;
+  inputSize: any;
  config: any;
  scaleFaces: number;

  constructor(model, config) {
    this.blazeFaceModel = model;
-    this.width = config.face.detector.inputSize;
-    this.height = config.face.detector.inputSize;
-    this.anchorsData = generateAnchors(config.face.detector.inputSize);
+    this.width = model.inputs[0].shape[2];
+    this.height = model.inputs[0].shape[1];
+    this.anchorsData = generateAnchors(model.inputs[0].shape[1]);
    this.anchors = tf.tensor2d(this.anchorsData);
    this.inputSize = tf.tensor1d([this.width, this.height]);
    this.config = config;
--- a/src/blazeface/facemesh.ts
+++ b/src/blazeface/facemesh.ts
@ -9,7 +9,7 @@ export class MediaPipeFaceMesh {
  config: any;

  constructor(blazeFace, blazeMeshModel, irisModel, config) {
-    this.facePipeline = new facepipeline.Pipeline(blazeFace, blazeMeshModel, irisModel, config);
+    this.facePipeline = new facepipeline.Pipeline(blazeFace, blazeMeshModel, irisModel);
    this.config = config;
  }

--- a/src/blazeface/facepipeline.ts
+++ b/src/blazeface/facepipeline.ts
@ -43,22 +43,22 @@ export class Pipeline {
  boundingBoxDetector: any;
  meshDetector: any;
  irisModel: any;
-  meshWidth: number;
-  meshHeight: number;
+  boxSize: number;
+  meshSize: number;
  irisSize: number;
  irisEnlarge: number;
  skipped: number;
  detectedFaces: number;

-  constructor(boundingBoxDetector, meshDetector, irisModel, config) {
+  constructor(boundingBoxDetector, meshDetector, irisModel) {
    // An array of facial bounding boxes.
    this.storedBoxes = [];
    this.boundingBoxDetector = boundingBoxDetector;
    this.meshDetector = meshDetector;
    this.irisModel = irisModel;
-    this.meshWidth = config.face.mesh.inputSize;
-    this.meshHeight = config.face.mesh.inputSize;
-    this.irisSize = config.face.iris.inputSize;
+    this.boxSize = boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2] || 0;
+    this.meshSize = meshDetector?.inputs[0].shape[2] || boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2];
+    this.irisSize = irisModel?.inputs[0].shape[1] || 0;
    this.irisEnlarge = 2.3;
    this.skipped = 0;
    this.detectedFaces = 0;
@ -66,10 +66,10 @@ export class Pipeline {

  transformRawCoords(rawCoords, box, angle, rotationMatrix) {
    const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
-    const scaleFactor = [boxSize[0] / this.meshWidth, boxSize[1] / this.meshHeight];
+    const scaleFactor = [boxSize[0] / this.meshSize, boxSize[1] / this.boxSize];
    const coordsScaled = rawCoords.map((coord) => ([
-      scaleFactor[0] * (coord[0] - this.meshWidth / 2),
-      scaleFactor[1] * (coord[1] - this.meshHeight / 2), coord[2],
+      scaleFactor[0] * (coord[0] - this.boxSize / 2),
+      scaleFactor[1] * (coord[1] - this.boxSize / 2), coord[2],
    ]));
    const coordsRotationMatrix = (angle !== 0) ? util.buildRotationMatrix(angle, [0, 0]) : util.IDENTITY_MATRIX;
    const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
@ -93,9 +93,9 @@ export class Pipeline {
    const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
    const boxSize = bounding.getBoxSize(box);
    let crop = tf.image.cropAndResize(face, [[
-      box.startPoint[1] / this.meshHeight,
-      box.startPoint[0] / this.meshWidth, box.endPoint[1] / this.meshHeight,
-      box.endPoint[0] / this.meshWidth,
+      box.startPoint[1] / this.meshSize,
+      box.startPoint[0] / this.meshSize, box.endPoint[1] / this.meshSize,
+      box.endPoint[0] / this.meshSize,
    ]], [0], [this.irisSize, this.irisSize]);
    if (flip && tf.ENV.flags.IS_BROWSER) {
      crop = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
@ -192,11 +192,11 @@ export class Pipeline {
        const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
        const rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
        rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
-        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshHeight, this.meshWidth]).div(255);
+        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshSize, this.meshSize]).div(255);
      } else {
        rotationMatrix = util.IDENTITY_MATRIX;
        const cloned = input.clone();
-        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.meshHeight, this.meshWidth]).div(255);
+        face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.boxSize, this.boxSize]).div(255);
      }

      // if we're not going to produce mesh, don't spend time with further processing
--- a/src/blazepose/blazepose.ts
+++ b/src/blazepose/blazepose.ts
@ -8,7 +8,6 @@ let model;
 export async function load(config) {
  if (!model) {
    model = await tf.loadGraphModel(config.body.modelPath);
-    // blazepose inputSize is 256x256px, but we can find that out dynamically
    model.width = parseInt(model.signature.inputs['input_1:0'].tensorShape.dim[2].size);
    model.height = parseInt(model.signature.inputs['input_1:0'].tensorShape.dim[1].size);
    if (config.debug) log(`load model: ${config.body.modelPath.match(/\/(.*)\./)[1]}`);
@ -20,7 +19,7 @@ export async function predict(image, config) {
  if (!model) return null;
  if (!config.body.enabled) return null;
  const imgSize = { width: image.shape[2], height: image.shape[1] };
-  const resize = tf.image.resizeBilinear(image, [model.width || config.body.inputSize, model.height || config.body.inputSize], false);
+  const resize = tf.image.resizeBilinear(image, [model.width, model.height], false);
  const normalize = tf.div(resize, [255.0]);
  resize.dispose();
  let points;
@ -30,7 +29,6 @@ export async function predict(image, config) {
    // const segmentation = segmentationT.arraySync(); // array 128 x 128
    // segmentationT.dispose();
    points = resT.find((t) => (t.size === 195 || t.size === 155)).dataSync(); // order of output tensors may change between models, full has 195 and upper has 155 items
-    // console.log(resT, points, segmentation);
    resT.forEach((t) => t.dispose());
  } else {
    const profileData = await tf.profile(() => model.predict(normalize));
@ -55,6 +53,5 @@ export async function predict(image, config) {
      presence: (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100, // reverse sigmoid value
    });
  }
-  // console.log('POINTS', imgSize, pts.length, pts);
  return [{ keypoints }];
 }
--- a/src/embedding/embedding.ts
+++ b/src/embedding/embedding.ts
@ -2,8 +2,9 @@ import { log } from '../log';
 import * as tf from '../../dist/tfjs.esm.js';
 import * as profile from '../profile';

-// based on https://github.com/sirius-ai/MobileFaceNet_TF
-// model converted from https://github.com/sirius-ai/MobileFaceNet_TF/files/3551493/FaceMobileNet192_train_false.zip
+// original: https://github.com/sirius-ai/MobileFaceNet_TF
+// modified: https://github.com/sirius-ai/MobileFaceNet_TF/issues/46
+// download: https://github.com/sirius-ai/MobileFaceNet_TF/files/3551493/FaceMobileNet192_train_false.zip

 let model;

@ -29,7 +30,7 @@ export function simmilarity(embedding1, embedding2) {
 export async function predict(image, config) {
  if (!model) return null;
  return new Promise(async (resolve) => {
-    const resize = tf.image.resizeBilinear(image, [config.face.embedding.inputSize, config.face.embedding.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    // const normalize = tf.tidy(() => resize.div(127.5).sub(0.5)); // this is -0.5...0.5 ???
    let data: Array<[]> = [];
    if (config.face.embedding.enabled) {
--- a/src/emotion/emotion.ts
+++ b/src/emotion/emotion.ts
@ -27,17 +27,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    /*
-    const zoom = [0, 0]; // 0..1 meaning 0%..100%
-    const box = [[
-      (image.shape[1] * zoom[0]) / image.shape[1],
-      (image.shape[2] * zoom[1]) / image.shape[2],
-      (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1],
-      (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2],
-    ]];
-    const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]);
-    */
-    const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    const [red, green, blue] = tf.split(resize, 3, 3);
    resize.dispose();
    // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
--- a/src/gender/gender.ts
+++ b/src/gender/gender.ts
@ -28,7 +28,7 @@ export async function predict(image, config) {
  if (config.videoOptimized) skipped = 0;
  else skipped = Number.MAX_SAFE_INTEGER;
  return new Promise(async (resolve) => {
-    const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false);
+    const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
    let enhance;
    if (alternative) {
      enhance = tf.tidy(() => {
--- a/src/handpose/handdetector.ts
+++ b/src/handpose/handdetector.ts
@ -5,6 +5,7 @@ export class HandDetector {
  model: any;
  anchors: any;
  anchorsTensor: any;
+  inputSize: number;
  inputSizeTensor: any;
  doubleInputSizeTensor: any;

@ -12,6 +13,7 @@ export class HandDetector {
    this.model = model;
    this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
    this.anchorsTensor = tf.tensor2d(this.anchors);
+    this.inputSize = inputSize;
    this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
    this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]);
  }
@ -67,7 +69,7 @@ export class HandDetector {
  async estimateHandBounds(input, config) {
    const inputHeight = input.shape[1];
    const inputWidth = input.shape[2];
-    const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1));
+    const image = tf.tidy(() => input.resizeBilinear([this.inputSize, this.inputSize]).div(127.5).sub(1));
    const predictions = await this.getBoxes(image, config);
    image.dispose();
    const hands: Array<{}> = [];
@ -79,7 +81,7 @@ export class HandDetector {
      const palmLandmarks = prediction.palmLandmarks.arraySync();
      prediction.box.dispose();
      prediction.palmLandmarks.dispose();
-      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.hand.inputSize, inputHeight / config.hand.inputSize]));
+      hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
    }
    return hands;
  }
--- a/src/handpose/handpose.ts
+++ b/src/handpose/handpose.ts
@ -54,8 +54,8 @@ export async function load(config) {
    config.hand.enabled ? tf.loadGraphModel(config.hand.detector.modelPath, { fromTFHub: config.hand.detector.modelPath.includes('tfhub.dev') }) : null,
    config.hand.landmarks ? tf.loadGraphModel(config.hand.skeleton.modelPath, { fromTFHub: config.hand.skeleton.modelPath.includes('tfhub.dev') }) : null,
  ]);
-  const handDetector = new handdetector.HandDetector(handDetectorModel, config.hand.inputSize, anchors.anchors);
-  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, config.hand.inputSize);
+  const handDetector = new handdetector.HandDetector(handDetectorModel, handDetectorModel?.inputs[0].shape[2], anchors.anchors);
+  const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, handPoseModel?.inputs[0].shape[2]);
  const handPose = new HandPose(handPipeline);
  if (config.hand.enabled && config.debug) log(`load model: ${config.hand.detector.modelPath.match(/\/(.*)\./)[1]}`);
  if (config.hand.landmarks && config.debug) log(`load model: ${config.hand.skeleton.modelPath.match(/\/(.*)\./)[1]}`);
--- a/src/human.ts
+++ b/src/human.ts
@ -109,7 +109,7 @@ class Human {
      age,
      gender,
      emotion,
-      body: this.config.body.modelType.startsWith('posenet') ? posenet : blazepose,
+      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
      hand: handpose,
    };
    // include platform info
@ -186,8 +186,8 @@ class Human {
        this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
        this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null),
        this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('posenet') ? posenet.load(this.config) : null),
-        this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('blazepose') ? blazepose.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null),
+        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null),
      ]);
    } else {
      if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@ -196,8 +196,8 @@ class Human {
      if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
      if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config);
      if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config);
-      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelType.startsWith('posenet')) this.models.posenet = await posenet.load(this.config);
-      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelType.startsWith('blazepose')) this.models.blazepose = await blazepose.load(this.config);
+      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config);
+      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config);
    }

    if (this.#firstRun) {
@ -477,13 +477,13 @@ class Human {
      // run body: can be posenet or blazepose
      this.#analyze('Start Body:');
      if (this.config.async) {
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
        else bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : [];
        if (this.#perf.body) delete this.#perf.body;
      } else {
        this.state = 'run:body';
        timeStamp = now();
-        if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
+        if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : [];
        else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : [];
        this.#perf.body = Math.trunc(now() - timeStamp);
      }
--- a/src/posenet/decodeMultiple.ts
+++ b/src/posenet/decodeMultiple.ts
@ -20,12 +20,12 @@ function getInstanceScore(existingPoses, squaredNmsRadius, instanceKeypoints) {
  return notOverlappedKeypointScores / instanceKeypoints.length;
 }

-export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config) {
+export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, nmsRadius, maxDetections, scoreThreshold) {
  const poses: Array<{ keypoints: any, score: number }> = [];
-  const queue = buildParts.buildPartWithScoreQueue(config.body.scoreThreshold, kLocalMaximumRadius, scoresBuffer);
-  const squaredNmsRadius = config.body.nmsRadius ^ 2;
+  const queue = buildParts.buildPartWithScoreQueue(scoreThreshold, kLocalMaximumRadius, scoresBuffer);
+  const squaredNmsRadius = nmsRadius ^ 2;
  // Generate at most maxDetections object instances per image in decreasing root part score order.
-  while (poses.length < config.body.maxDetections && !queue.empty()) {
+  while (poses.length < maxDetections && !queue.empty()) {
    // The top element in the queue is the next root candidate.
    const root = queue.dequeue();
    // Part-based non-maximum suppression: We reject a root candidate if it is within a disk of `nmsRadius` pixels from the corresponding part of a previously detected instance.
@ -34,7 +34,7 @@ export function decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFw
    // Else start a new detection instance at the position of the root.
    const keypoints = decodePose.decodePose(root, scoresBuffer, offsetsBuffer, defaultOutputStride, displacementsFwdBuffer, displacementsBwdBuffer);
    const score = getInstanceScore(poses, squaredNmsRadius, keypoints);
-    if (score > config.body.scoreThreshold) poses.push({ keypoints, score });
+    if (score > scoreThreshold) poses.push({ keypoints, score });
  }
  return poses;
 }
--- a/src/posenet/decodePose.ts
+++ b/src/posenet/decodePose.ts
@ -74,7 +74,7 @@ export function decodePose(root, scores, offsets, outputStride, displacementsFwd
  return instanceKeypoints;
 }

-export async function decodeSinglePose(heatmapScores, offsets, config) {
+export async function decodeSinglePose(heatmapScores, offsets, minScore) {
  let totalScore = 0.0;
  const heatmapValues = decoders.argmax2d(heatmapScores);
  const allTensorBuffers = await Promise.all([heatmapScores.buffer(), offsets.buffer(), heatmapValues.buffer()]);
@ -95,7 +95,7 @@ export async function decodeSinglePose(heatmapScores, offsets, config) {
      score,
    };
  });
-  const filteredKeypoints = instanceKeypoints.filter((kpt) => kpt.score > config.body.scoreThreshold);
+  const filteredKeypoints = instanceKeypoints.filter((kpt) => kpt.score > minScore);
  heatmapValues.dispose();
  offsetPoints.dispose();
  return { keypoints: filteredKeypoints, score: totalScore / instanceKeypoints.length };
--- a/src/posenet/modelBase.ts
+++ b/src/posenet/modelBase.ts
@ -1,30 +1,23 @@
 import * as tf from '../../dist/tfjs.esm.js';

-const imageNetMean = [-123.15, -115.90, -103.06];
-
 function nameOutputResultsMobileNet(results) {
  const [offsets, heatmap, displacementFwd, displacementBwd] = results;
  return { offsets, heatmap, displacementFwd, displacementBwd };
 }

-function nameOutputResultsResNet(results) {
-  const [displacementFwd, displacementBwd, offsets, heatmap] = results;
-  return { offsets, heatmap, displacementFwd, displacementBwd };
-}
-
 export class BaseModel {
  model: any;
  constructor(model) {
    this.model = model;
  }

-  predict(input, config) {
+  predict(input) {
    return tf.tidy(() => {
-      const asFloat = (config.body.modelType === 'posenet-resnet') ? input.toFloat().add(imageNetMean) : input.toFloat().div(127.5).sub(1.0);
+      const asFloat = input.toFloat().div(127.5).sub(1.0);
      const asBatch = asFloat.expandDims(0);
      const results = this.model.predict(asBatch);
      const results3d = results.map((y) => y.squeeze([0]));
-      const namedResults = (config.body.modelType === 'posenet-resnet') ? nameOutputResultsResNet(results3d) : nameOutputResultsMobileNet(results3d);
+      const namedResults = nameOutputResultsMobileNet(results3d);
      return {
        heatmapScores: namedResults.heatmap.sigmoid(),
        offsets: namedResults.offsets,
--- a/src/posenet/posenet.ts
+++ b/src/posenet/posenet.ts
@ -5,43 +5,42 @@ import * as decodeMultiple from './decodeMultiple';
 import * as decodePose from './decodePose';
 import * as util from './util';

-async function estimateMultiple(input, res, config) {
+async function estimateMultiple(input, res, config, inputSize) {
  return new Promise(async (resolve) => {
-    const height = input.shape[1];
-    const width = input.shape[2];
    const allTensorBuffers = await util.toTensorBuffers3D([res.heatmapScores, res.offsets, res.displacementFwd, res.displacementBwd]);
    const scoresBuffer = allTensorBuffers[0];
    const offsetsBuffer = allTensorBuffers[1];
    const displacementsFwdBuffer = allTensorBuffers[2];
    const displacementsBwdBuffer = allTensorBuffers[3];
-    const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config);
-    const scaled = util.scaleAndFlipPoses(poses, [height, width], [config.body.inputSize, config.body.inputSize]);
+    const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, config.body.nmsRadius, config.body.maxDetections, config.body.scoreThreshold);
+    const scaled = util.scaleAndFlipPoses(poses, [input.shape[1], input.shape[2]], [inputSize, inputSize]);
    resolve(scaled);
  });
 }

-async function estimateSingle(input, res, config) {
+async function estimateSingle(input, res, config, inputSize) {
  return new Promise(async (resolve) => {
-    const height = input.shape[1];
-    const width = input.shape[2];
-    const pose = await decodePose.decodeSinglePose(res.heatmapScores, res.offsets, config);
-    const poses = [pose];
-    const scaled = util.scaleAndFlipPoses(poses, [height, width], [config.body.inputSize, config.body.inputSize]);
+    const pose = await decodePose.decodeSinglePose(res.heatmapScores, res.offsets, config.body.scoreThreshold);
+    const scaled = util.scaleAndFlipPoses([pose], [input.shape[1], input.shape[2]], [inputSize, inputSize]);
    resolve(scaled);
  });
 }

 export class PoseNet {
  baseModel: any;
+  inputSize: number
  constructor(model) {
    this.baseModel = model;
+    this.inputSize = model.model.inputs[0].shape[1];
  }

  async estimatePoses(input, config) {
-    const resized = util.resizeTo(input, [config.body.inputSize, config.body.inputSize]);
+    const resized = util.resizeTo(input, [this.inputSize, this.inputSize]);
    const res = this.baseModel.predict(resized, config);

-    const poses = (config.body.maxDetections < 2) ? await estimateSingle(input, res, config) : await estimateMultiple(input, res, config);
+    const poses = (config.body.maxDetections < 2)
+      ? await estimateSingle(input, res, config, this.inputSize)
+      : await estimateMultiple(input, res, config, this.inputSize);

    res.heatmapScores.dispose();
    res.offsets.dispose();
--- a/types/blazeface/blazeface.d.ts
+++ b/types/blazeface/blazeface.d.ts
@ -5,7 +5,7 @@ export declare class BlazeFaceModel {
    height: number;
    anchorsData: any;
    anchors: any;
-    inputSize: number;
+    inputSize: any;
    config: any;
    scaleFaces: number;
    constructor(model: any, config: any);
--- a/types/blazeface/facepipeline.d.ts
+++ b/types/blazeface/facepipeline.d.ts
@ -3,13 +3,13 @@ export declare class Pipeline {
    boundingBoxDetector: any;
    meshDetector: any;
    irisModel: any;
-    meshWidth: number;
-    meshHeight: number;
+    boxSize: number;
+    meshSize: number;
    irisSize: number;
    irisEnlarge: number;
    skipped: number;
    detectedFaces: number;
-    constructor(boundingBoxDetector: any, meshDetector: any, irisModel: any, config: any);
+    constructor(boundingBoxDetector: any, meshDetector: any, irisModel: any);
    transformRawCoords(rawCoords: any, box: any, angle: any, rotationMatrix: any): any;
    getLeftToRightEyeDepthDifference(rawCoords: any): number;
    getEyeBox(rawCoords: any, face: any, eyeInnerCornerIndex: any, eyeOuterCornerIndex: any, flip?: boolean): {
--- a/types/handpose/handdetector.d.ts
+++ b/types/handpose/handdetector.d.ts
@ -2,6 +2,7 @@ export declare class HandDetector {
    model: any;
    anchors: any;
    anchorsTensor: any;
+    inputSize: number;
    inputSizeTensor: any;
    doubleInputSizeTensor: any;
    constructor(model: any, inputSize: any, anchorsAnnotated: any);
--- a/types/posenet/decodeMultiple.d.ts
+++ b/types/posenet/decodeMultiple.d.ts
@ -1,4 +1,4 @@
-export declare function decodeMultiplePoses(scoresBuffer: any, offsetsBuffer: any, displacementsFwdBuffer: any, displacementsBwdBuffer: any, config: any): {
+export declare function decodeMultiplePoses(scoresBuffer: any, offsetsBuffer: any, displacementsFwdBuffer: any, displacementsBwdBuffer: any, nmsRadius: any, maxDetections: any, scoreThreshold: any): {
    keypoints: any;
    score: number;
 }[];
--- a/types/posenet/decodePose.d.ts
+++ b/types/posenet/decodePose.d.ts
@ -1,5 +1,5 @@
 export declare function decodePose(root: any, scores: any, offsets: any, outputStride: any, displacementsFwd: any, displacementsBwd: any): any[];
-export declare function decodeSinglePose(heatmapScores: any, offsets: any, config: any): Promise<{
+export declare function decodeSinglePose(heatmapScores: any, offsets: any, minScore: any): Promise<{
    keypoints: {
        position: {
            y: any;
--- a/types/posenet/modelBase.d.ts
+++ b/types/posenet/modelBase.d.ts
@ -1,6 +1,6 @@
 export declare class BaseModel {
    model: any;
    constructor(model: any);
-    predict(input: any, config: any): any;
+    predict(input: any): any;
    dispose(): void;
 }
--- a/types/posenet/posenet.d.ts
+++ b/types/posenet/posenet.d.ts
@ -1,5 +1,6 @@
 export declare class PoseNet {
    baseModel: any;
+    inputSize: number;
    constructor(model: any);
    estimatePoses(input: any, config: any): Promise<unknown>;
    dispose(): void;