implemented multi-hand support

2020-10-14 11:43:33 -04:00 · 2020-10-14 11:43:33 -04:00 · 9e1776906f
parent f484493b6f
commit 9e1776906f
11 changed files with 139 additions and 140 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -49,6 +49,8 @@
    "promise/catch-or-return": "off",
    "promise/no-nesting": "off",
    "import/no-absolute-path": "off",
    "import/no-extraneous-dependencies": "off",
    "node/no-unpublished-require": "off",
    "no-regex-spaces": "off",
    "radix": "off"
  }
--- a/README.md
+++ b/README.md
@ -1,11 +1,14 @@
 # Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction
-**Documentation**: <https://github.com/vladmandic/human#readme>  
+- [**Documentation**](https://github.com/vladmandic/human#readme)
-**Code Repository**: <https://github.com/vladmandic/human>  
+- [**Code Repository**](https://github.com/vladmandic/human)
-**Package**: <https://www.npmjs.com/package/@vladmandic/human>  
+- [**Package**](https://www.npmjs.com/package/@vladmandic/human)
-**Live Demo**: <https://vladmandic.github.io/human/demo/demo-esm.html>  
+- [**Issues Tracker**](https://github.com/vladmandic/human/issues)
 - [**Live Demo**](https://vladmandic.github.io/human/demo/demo-esm.html)
-Compatible with Browser, WebWorker and NodeJS** execution!
+Compatible with Browser, WebWorker and NodeJS execution!
 *This is a pre-release project, see [issues](https://github.com/vladmandic/human/issues) for list of known limitations*  
 *Suggestions are welcome!*
@ -47,7 +50,7 @@ There are multiple ways to use `Human` library, pick one that suits you:
 Simply download `dist/human.js`, include it in your `HTML` file & it's ready to use.
 ```html
-<script src="dist/human.js"><script>
+  <script src="dist/human.js"><script>
 ``` 
 IIFE script auto-registers global namespace `human` within global `Window` object  
@ -64,9 +67,17 @@ IIFE script is distributed in minified form with attached sourcemap
 If you're using bundler *(such as rollup, webpack, esbuild)* to package your client application, you can import ESM version of `Human` library which supports full tree shaking  
 ```js
-  import human from 'dist/human.esm.js';
+  import human from '@vladmandic/human'; // points to @vladmandic/human/dist/human.esm.js
 ```
 Or if you prefer to package your version of `tfjs`, you can use `nobundle` version
 ```js
  import tf from '@tensorflow/tfjs'
  import human from '@vladmandic/human/dist/human.nobundle.js'; // same functionality as default import, but without tfjs bundled
 ```
 #### 2.2 Using Script Module
 You could use same syntax within your main `JS` file if it's imported with `<script type="module">`  
@ -94,11 +105,26 @@ Install with:
 ```
 And then use with:
 ```js
-  const tf = require('@tensorflow/tfjs-node');
+  const tf = require('@tensorflow/tfjs-node'); 
-  const human = require('@vladmandic/human');
+  const human = require('@vladmandic/human'); // points to @vladmandic/human/dist/human.node.js
 ```
 *See limitations for NodeJS usage under `demo`*  
 Since NodeJS projects load `weights` from local filesystem instead of using `http` calls, you must modify default configuration to include correct paths with `file://` prefix  
 For example:
 ```js
 const config = {
  body: { enabled: true, modelPath: 'file://models/posenet/model.json' },
 }
 ```
 Note that when using `Human` in NodeJS, you must load and parse the image *before* you pass it for detection  
 For example:
 ```js
  const buffer = fs.readFileSync(input);
  const image = tf.node.decodeImage(buffer);
  const result = human.detect(image, config);
  image.dispose();
 ```
 ### Weights
@ -122,10 +148,6 @@ NodeJS:
 - `demo-node`: Demo using NodeJS with CJS module  
  This is a very simple demo as althought `Human` library is compatible with NodeJS execution  
  and is able to load images and models from local filesystem,  
  `tfjs-node` backend does not implement function required for execution of some models
  Currently only body pose detection works while face and hand models are not supported  
  See `tfjs-node` issue <https://github.com/tensorflow/tfjs/issues/4066> for details  
 <hr>
@ -137,20 +159,28 @@ All configuration is done in a single JSON object and all model weights will be
 There is only *ONE* method you need:
 ```js
-import * as tf from '@tensorflow/tfjs';
+  import * as tf from '@tensorflow/tfjs';
-import human from '@vladmandic/human';
+  import human from '@vladmandic/human';
-// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
+  // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
-// 'options': optional parameter used to override any options present in default configuration
+  // 'options': optional parameter used to override any options present in default configuration
-const result = await human.detect(image, options?)
+  const result = await human.detect(image, options?)
 ```
 or if you want to use promises
 ```js
  human.detect(image, options?).then((result) => {
    // your code
  })
 ```
 Additionally, `Human` library exposes several classes:
 ```js
-human.defaults // default configuration object
+  human.defaults // default configuration object
-human.models   // dynamically maintained object of any loaded models
+  human.models   // dynamically maintained object of any loaded models
-human.tf       // instance of tfjs used by human
+  human.tf       // instance of tfjs used by human
 ```
 <hr>
@ -299,7 +329,5 @@ Library can also be used on mobile devices
 ## Todo
 - Improve detection of smaller faces
 - Tweak default parameters
 - Verify age/gender models
 - Make it work with multiple hands
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -10,15 +10,15 @@ const ui = {
 const config = {
  face: {
-    enabled: true,
+    enabled: false,
-    detector: { maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+    detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
    mesh: { enabled: true },
    iris: { enabled: true },
-    age: { enabled: true, skipFrames: 5 },
+    age: { enabled: true, skipFrames: 10 },
    gender: { enabled: true },
  },
-  body: { enabled: true, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
+  body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
-  hand: { enabled: true, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
 let settings;
--- a/demo/demo-node.js
+++ b/demo/demo-node.js
@ -1,7 +1,7 @@
 const tf = require('@tensorflow/tfjs-node');
 const fs = require('fs');
 const process = require('process');
 const console = require('console');
 const tf = require('@tensorflow/tfjs-node');
 const human = require('..'); // this would be '@vladmandic/human'
 const logger = new console.Console({
@ -54,6 +54,7 @@ async function detect(input, output) {
  const image = tf.node.decodeImage(buffer);
  logger.log('Processing:', image.shape);
  const result = await human.detect(image, config);
  image.dispose();
  logger.log(result);
  // Draw detected data and save processed image
  logger.log('Saving:', output);
--- a/demo/demo-webworker-worker.js
+++ b/demo/demo-webworker-worker.js
@ -4,19 +4,3 @@ onmessage = async (msg) => {
  const result = await human.detect(msg.data.image, msg.data.config);
  postMessage(result);
 };
 /*
 web workers are finicky
 - cannot pass HTMLImage or HTMLVideo to web worker, so need to pass canvas instead
 - canvases can execute transferControlToOffscreen() and then become offscreenCanvas which can be passed to worker, but...
  cannot transfer canvas that has a rendering context (basically, first time you execute getContext() on it)
 which means that if we pass main Canvas that will be used to render results on,
 then all operations on it must be within webworker and we cannot touch it in the main thread at all.
 doable, but...how to paint a video frame on it before we pass it?
 and we create new offscreenCanvas that we drew video frame on and pass it's imageData and return results from worker
 then there is an overhead of creating it and it ends up being slower than executing in the main thread
 */
--- a/package.json
+++ b/package.json
@ -5,7 +5,7 @@
  "sideEffects": false,
  "main": "dist/human.node.js",
  "module": "dist/human.esm.js",
-  "browser": "dist/human.js",
+  "browser": "dist/human.esmjs",
  "author": "Vladimir Mandic <mandic00@live.com>",
  "bugs": {
    "url": "https://github.com/vladmandic/human/issues"
@ -20,11 +20,10 @@
    "url": "git+https://github.com/vladmandic/human.git"
  },
  "dependencies": {},
-  "peerDependencies": {
+  "peerDependencies": {},
    "@tensorflow/tfjs-node": "^2.6.0"
  },
  "devDependencies": {
    "@tensorflow/tfjs": "^2.6.0",
    "@tensorflow/tfjs-node": "^2.6.0",
    "esbuild": "^0.7.15",
    "eslint": "^7.10.0",
    "eslint-config-airbnb-base": "^14.2.0",
@ -37,9 +36,10 @@
  "scripts": {
    "start": "node --trace-warnings --trace-uncaught --no-deprecation demo/demo-node.js",
    "lint": "eslint src/*.js demo/*.js",
-    "build": "rimraf dist/ && npm run build-esm && npm run build-iife && npm run build-node",
+    "build": "rimraf dist/ && npm run build-iife && npm run build-esm && npm run build-nobundle && npm run build-node && ls -l dist/",
    "build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:fs --outfile=dist/human.esm.js src/index.js",
    "build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --external:fs --global-name=human --outfile=dist/human.js src/index.js",
    "build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:fs --outfile=dist/human.esm.js src/index.js",
    "build-nobundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:@tensorflow --external:fs --outfile=dist/human.nobundle.js src/index.js",
    "build-node": "esbuild --bundle --platform=node --sourcemap --target=esnext --format=cjs --external:@tensorflow --outfile=dist/human.node.js src/index.js",
    "update": "npm update --depth 20 && npm dedupe && npm prune && npm audit"
  },
--- a/src/config.js
+++ b/src/config.js
@ -5,10 +5,10 @@ export default {
      modelPath: '../models/blazeface/model.json',
      inputSize: 128, // fixed value
      maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
-      skipFrames: 5, // how many frames to go without running the bounding box detector, only relevant if maxFaces > 1
+      skipFrames: 10, // how many frames to go without running the bounding box detector
-      minConfidence: 0.8, // threshold for discarding a prediction
+      minConfidence: 0.5, // threshold for discarding a prediction
-      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression, must be between [0, 1]
+      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
-      scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
    },
    mesh: {
      enabled: true,
@ -24,7 +24,7 @@ export default {
      enabled: true,
      modelPath: '../models/ssrnet-age/imdb/model.json',
      inputSize: 64, // fixed value
-      skipFrames: 5,
+      skipFrames: 10,
    },
    gender: {
      enabled: true,
@ -37,16 +37,17 @@ export default {
    inputResolution: 257, // fixed value
    outputStride: 16, // fixed value
    maxDetections: 5,
-    scoreThreshold: 0.75,
+    scoreThreshold: 0.7,
    nmsRadius: 20,
  },
  hand: {
    enabled: true,
    inputSize: 256, // fixed value
-    skipFrames: 5,
+    skipFrames: 10,
-    minConfidence: 0.8,
+    minConfidence: 0.5,
    iouThreshold: 0.3,
-    scoreThreshold: 0.75,
+    scoreThreshold: 0.7,
    maxHands: 2,
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@ -2,12 +2,13 @@ const tf = require('@tensorflow/tfjs');
 const bounding = require('./box');
 class HandDetector {
-  constructor(model, width, height, anchors, iouThreshold, scoreThreshold) {
+  constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) {
    this.model = model;
    this.width = width;
    this.height = height;
    this.iouThreshold = iouThreshold;
    this.scoreThreshold = scoreThreshold;
    this.maxHands = maxHands;
    this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
    this.anchorsTensor = tf.tensor2d(this.anchors);
    this.inputSizeTensor = tf.tensor1d([width, height]);
@ -35,28 +36,14 @@ class HandDetector {
  async getBoundingBoxes(input) {
    const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
-    let batchedPrediction;
+    const batchedPrediction = this.model.predict(normalizedInput);
    if (tf.getBackend() === 'webgl') {
      // Currently tfjs-core does not pack depthwiseConv because it fails for
      // very large inputs (https://github.com/tensorflow/tfjs/issues/1652).
      // TODO(annxingyuan): call tf.enablePackedDepthwiseConv when available
      // (https://github.com/tensorflow/tfjs/issues/2821)
      const savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
      // The model returns a tensor with the following shape:
      //  [1 (batch), 2944 (anchor points), 19 (data for each anchor)]
      batchedPrediction = this.model.predict(normalizedInput);
      tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
    } else {
      batchedPrediction = this.model.predict(normalizedInput);
    }
    const prediction = batchedPrediction.squeeze();
    // Regression score for each anchor point.
    const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
    // Bounding box for each anchor point.
    const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
    const boxes = this.normalizeBoxes(rawBoxes);
-    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, 1, this.iouThreshold, this.scoreThreshold);
+    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
    const boxesWithHands = await boxesWithHandsTensor.array();
    const toDispose = [
      normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction,
@ -66,15 +53,18 @@ class HandDetector {
      toDispose.forEach((tensor) => tensor.dispose());
      return null;
    }
-    const boxIndex = boxesWithHands[0];
+    const detectedHands = tf.tidy(() => {
-    const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
+      const detectedBoxes = [];
-    const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
+      for (const i in boxesWithHands) {
-    const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([
+        const boxIndex = boxesWithHands[i];
-      -1, 2,
+        const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
-    ]));
+        const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
-    toDispose.push(rawPalmLandmarks);
+        const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
-    toDispose.forEach((tensor) => tensor.dispose());
+        detectedBoxes.push({ boxes: matchingBox, palmLandmarks });
-    return { boxes: matchingBox, palmLandmarks };
+      }
      return detectedBoxes;
    });
    return detectedHands;
  }
  /**
@ -87,19 +77,21 @@ class HandDetector {
    const inputHeight = input.shape[1];
    const inputWidth = input.shape[2];
    const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
-    const prediction = await this.getBoundingBoxes(image);
+    const predictions = await this.getBoundingBoxes(image);
    if (prediction === null) {
      image.dispose();
      return null;
    }
    const boundingBoxes = await prediction.boxes.array();
    const startPoint = boundingBoxes[0].slice(0, 2);
    const endPoint = boundingBoxes[0].slice(2, 4);
    const palmLandmarks = await prediction.palmLandmarks.array();
    image.dispose();
-    prediction.boxes.dispose();
+    if (!predictions || (predictions.length === 0)) return null;
-    prediction.palmLandmarks.dispose();
+    const hands = [];
-    return bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]);
+    for (const i in predictions) {
      const prediction = predictions[i];
      const boundingBoxes = await prediction.boxes.array();
      const startPoint = boundingBoxes[0].slice(0, 2);
      const endPoint = boundingBoxes[0].slice(2, 4);
      const palmLandmarks = await prediction.palmLandmarks.array();
      prediction.boxes.dispose();
      prediction.palmLandmarks.dispose();
      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
    }
    return hands;
  }
 }
 exports.HandDetector = HandDetector;
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@ -1,5 +1,5 @@
 const tf = require('@tensorflow/tfjs');
-const hand = require('./hand');
+const hand = require('./handdetector');
 const keypoints = require('./keypoints');
 const pipe = require('./pipeline');
@ -47,8 +47,8 @@ async function load(config) {
    loadHandDetectorModel(config.detector.modelPath),
    loadHandPoseModel(config.skeleton.modelPath),
  ]);
-  const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold);
+  const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
-  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence);
+  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
  // eslint-disable-next-line no-use-before-define
  const handpose = new HandPose(pipeline);
  return handpose;
@ -67,19 +67,24 @@ class HandPose {
      }
      return input.toFloat().expandDims(0);
    });
-    const prediction = await this.pipeline.estimateHand(image, config);
+    const predictions = await this.pipeline.estimateHand(image, config);
    image.dispose();
-    if (!prediction) return [];
+    const hands = [];
-    const annotations = {};
+    if (!predictions) return hands;
-    for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
+    for (const prediction of predictions) {
-      annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
+      if (!prediction) return [];
      const annotations = {};
      for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
        annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
      }
      hands.push({
        confidence: prediction.confidence || 0,
        box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
        landmarks: prediction.landmarks,
        annotations,
      });
    }
-    return [{
+    return hands;
      confidence: prediction.confidence || 0,
      box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
      landmarks: prediction.landmarks,
      annotations,
    }];
  }
 }
 exports.HandPose = HandPose;
--- a/src/index.js
+++ b/src/index.js
@ -1,8 +1,8 @@
 const tf = require('@tensorflow/tfjs');
-const facemesh = require('./facemesh/index.js');
+const facemesh = require('./facemesh/facemesh.js');
-const ssrnet = require('./ssrnet/index.js');
+const ssrnet = require('./ssrnet/ssrnet.js');
-const posenet = require('./posenet/index.js');
+const posenet = require('./posenet/posenet.js');
-const handpose = require('./handpose/index.js');
+const handpose = require('./handpose/handpose.js');
 const defaults = require('./config.js').default;
 const models = {
@ -44,9 +44,15 @@ async function detect(input, userConfig) {
    tf.engine().startScope();
    let savedWebglPackDepthwiseConvFlag;
    if (tf.getBackend() === 'webgl') {
      savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
    }
    // run posenet
    let poseRes = [];
-    if (config.body.enabled) poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
+    if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);
    // run handpose
    let handRes = [];
@ -76,6 +82,8 @@ async function detect(input, userConfig) {
      }
    }
    tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
    tf.engine().endScope();
    // combine results
    resolve({ face: faceRes, body: poseRes, hand: handRes });
--- a/src/posenet/index.js
+++ b/src/posenet/index.js
@ -1,22 +0,0 @@
 const modelMobileNet = require('./modelMobileNet');
 const modelPoseNet = require('./modelPoseNet');
 const decodeMultiple = require('./decodeMultiple');
 const decodeSingle = require('./decodeSingle');
 const keypoints = require('./keypoints');
 const util = require('./util');
 exports.load = modelPoseNet.load;
 exports.PoseNet = modelPoseNet.PoseNet;
 exports.MobileNet = modelMobileNet.MobileNet;
 exports.decodeMultiplePoses = decodeMultiple.decodeMultiplePoses;
 exports.decodeSinglePose = decodeSingle.decodeSinglePose;
 exports.partChannels = keypoints.partChannels;
 exports.partIds = keypoints.partIds;
 exports.partNames = keypoints.partNames;
 exports.poseChain = keypoints.poseChain;
 exports.getAdjacentKeyPoints = util.getAdjacentKeyPoints;
 exports.getBoundingBox = util.getBoundingBox;
 exports.getBoundingBoxPoints = util.getBoundingBoxPoints;
 exports.scaleAndFlipPoses = util.scaleAndFlipPoses;
 exports.scalePose = util.scalePose;