implemented multi-hand support

2020-10-14 11:43:33 -04:00 · 2020-10-14 11:43:33 -04:00 · 9e1776906f
parent f484493b6f
commit 9e1776906f
11 changed files with 139 additions and 140 deletions
--- a/.eslintrc.json
+++ b/.eslintrc.json
@ -49,6 +49,8 @@
    "promise/catch-or-return": "off",
    "promise/no-nesting": "off",
    "import/no-absolute-path": "off",
+    "import/no-extraneous-dependencies": "off",
+    "node/no-unpublished-require": "off",
    "no-regex-spaces": "off",
    "radix": "off"
  }
--- a/README.md
+++ b/README.md
@ -1,11 +1,14 @@
 # Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction

-**Documentation**: <https://github.com/vladmandic/human#readme>  
-**Code Repository**: <https://github.com/vladmandic/human>  
-**Package**: <https://www.npmjs.com/package/@vladmandic/human>  
-**Live Demo**: <https://vladmandic.github.io/human/demo/demo-esm.html>  
+- [**Documentation**](https://github.com/vladmandic/human#readme)
+- [**Code Repository**](https://github.com/vladmandic/human)
+- [**Package**](https://www.npmjs.com/package/@vladmandic/human)
+- [**Issues Tracker**](https://github.com/vladmandic/human/issues)
+- [**Live Demo**](https://vladmandic.github.io/human/demo/demo-esm.html)

-Compatible with Browser, WebWorker and NodeJS** execution!
+Compatible with Browser, WebWorker and NodeJS execution!
+
+*This is a pre-release project, see [issues](https://github.com/vladmandic/human/issues) for list of known limitations*  

 *Suggestions are welcome!*

@ -47,7 +50,7 @@ There are multiple ways to use `Human` library, pick one that suits you:
 Simply download `dist/human.js`, include it in your `HTML` file & it's ready to use.

 ```html
-<script src="dist/human.js"><script>
+  <script src="dist/human.js"><script>
 ``` 

 IIFE script auto-registers global namespace `human` within global `Window` object  
@ -64,9 +67,17 @@ IIFE script is distributed in minified form with attached sourcemap
 If you're using bundler *(such as rollup, webpack, esbuild)* to package your client application, you can import ESM version of `Human` library which supports full tree shaking  

 ```js
-  import human from 'dist/human.esm.js';
+  import human from '@vladmandic/human'; // points to @vladmandic/human/dist/human.esm.js
 ```

+Or if you prefer to package your version of `tfjs`, you can use `nobundle` version
+
+```js
+  import tf from '@tensorflow/tfjs'
+  import human from '@vladmandic/human/dist/human.nobundle.js'; // same functionality as default import, but without tfjs bundled
+```
+
+
 #### 2.2 Using Script Module
 You could use same syntax within your main `JS` file if it's imported with `<script type="module">`  

@ -95,10 +106,25 @@ Install with:
 And then use with:
 ```js
  const tf = require('@tensorflow/tfjs-node'); 
-  const human = require('@vladmandic/human');
+  const human = require('@vladmandic/human'); // points to @vladmandic/human/dist/human.node.js
 ```
-*See limitations for NodeJS usage under `demo`*  

+Since NodeJS projects load `weights` from local filesystem instead of using `http` calls, you must modify default configuration to include correct paths with `file://` prefix  
+For example:
+```js
+const config = {
+  body: { enabled: true, modelPath: 'file://models/posenet/model.json' },
+}
+```
+
+Note that when using `Human` in NodeJS, you must load and parse the image *before* you pass it for detection  
+For example:
+```js
+  const buffer = fs.readFileSync(input);
+  const image = tf.node.decodeImage(buffer);
+  const result = human.detect(image, config);
+  image.dispose();
+```

 ### Weights

@ -122,10 +148,6 @@ NodeJS:
 - `demo-node`: Demo using NodeJS with CJS module  
  This is a very simple demo as althought `Human` library is compatible with NodeJS execution  
  and is able to load images and models from local filesystem,  
-  `tfjs-node` backend does not implement function required for execution of some models
-
-  Currently only body pose detection works while face and hand models are not supported  
-  See `tfjs-node` issue <https://github.com/tensorflow/tfjs/issues/4066> for details  

 <hr>

@ -137,20 +159,28 @@ All configuration is done in a single JSON object and all model weights will be
 There is only *ONE* method you need:

 ```js
-import * as tf from '@tensorflow/tfjs';
-import human from '@vladmandic/human';
+  import * as tf from '@tensorflow/tfjs';
+  import human from '@vladmandic/human';

-// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
-// 'options': optional parameter used to override any options present in default configuration
-const result = await human.detect(image, options?)
+  // 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
+  // 'options': optional parameter used to override any options present in default configuration
+  const result = await human.detect(image, options?)
+```
+
+or if you want to use promises
+
+```js
+  human.detect(image, options?).then((result) => {
+    // your code
+  })
 ```

 Additionally, `Human` library exposes several classes:

 ```js
-human.defaults // default configuration object
-human.models   // dynamically maintained object of any loaded models
-human.tf       // instance of tfjs used by human
+  human.defaults // default configuration object
+  human.models   // dynamically maintained object of any loaded models
+  human.tf       // instance of tfjs used by human
 ```

 <hr>
@ -299,7 +329,5 @@ Library can also be used on mobile devices

 ## Todo

- Improve detection of smaller faces
 - Tweak default parameters
 - Verify age/gender models
- Make it work with multiple hands
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@ -10,15 +10,15 @@ const ui = {

 const config = {
  face: {
-    enabled: true,
-    detector: { maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+    enabled: false,
+    detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
    mesh: { enabled: true },
    iris: { enabled: true },
-    age: { enabled: true, skipFrames: 5 },
+    age: { enabled: true, skipFrames: 10 },
    gender: { enabled: true },
  },
-  body: { enabled: true, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
-  hand: { enabled: true, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+  body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
 let settings;

--- a/demo/demo-node.js
+++ b/demo/demo-node.js
@ -1,7 +1,7 @@
+const tf = require('@tensorflow/tfjs-node');
 const fs = require('fs');
 const process = require('process');
 const console = require('console');
-const tf = require('@tensorflow/tfjs-node');
 const human = require('..'); // this would be '@vladmandic/human'

 const logger = new console.Console({
@ -54,6 +54,7 @@ async function detect(input, output) {
  const image = tf.node.decodeImage(buffer);
  logger.log('Processing:', image.shape);
  const result = await human.detect(image, config);
+  image.dispose();
  logger.log(result);
  // Draw detected data and save processed image
  logger.log('Saving:', output);
--- a/demo/demo-webworker-worker.js
+++ b/demo/demo-webworker-worker.js
@ -4,19 +4,3 @@ onmessage = async (msg) => {
  const result = await human.detect(msg.data.image, msg.data.config);
  postMessage(result);
 };
-
-/*
-
-web workers are finicky
- cannot pass HTMLImage or HTMLVideo to web worker, so need to pass canvas instead
- canvases can execute transferControlToOffscreen() and then become offscreenCanvas which can be passed to worker, but...
-  cannot transfer canvas that has a rendering context (basically, first time you execute getContext() on it)
-
-which means that if we pass main Canvas that will be used to render results on,
-then all operations on it must be within webworker and we cannot touch it in the main thread at all.
-doable, but...how to paint a video frame on it before we pass it?
-
-and we create new offscreenCanvas that we drew video frame on and pass it's imageData and return results from worker
-then there is an overhead of creating it and it ends up being slower than executing in the main thread
-
-*/
--- a/package.json
+++ b/package.json
@ -5,7 +5,7 @@
  "sideEffects": false,
  "main": "dist/human.node.js",
  "module": "dist/human.esm.js",
-  "browser": "dist/human.js",
+  "browser": "dist/human.esmjs",
  "author": "Vladimir Mandic <mandic00@live.com>",
  "bugs": {
    "url": "https://github.com/vladmandic/human/issues"
@ -20,11 +20,10 @@
    "url": "git+https://github.com/vladmandic/human.git"
  },
  "dependencies": {},
-  "peerDependencies": {
-    "@tensorflow/tfjs-node": "^2.6.0"
-  },
+  "peerDependencies": {},
  "devDependencies": {
    "@tensorflow/tfjs": "^2.6.0",
+    "@tensorflow/tfjs-node": "^2.6.0",
    "esbuild": "^0.7.15",
    "eslint": "^7.10.0",
    "eslint-config-airbnb-base": "^14.2.0",
@ -37,9 +36,10 @@
  "scripts": {
    "start": "node --trace-warnings --trace-uncaught --no-deprecation demo/demo-node.js",
    "lint": "eslint src/*.js demo/*.js",
-    "build": "rimraf dist/ && npm run build-esm && npm run build-iife && npm run build-node",
-    "build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:fs --outfile=dist/human.esm.js src/index.js",
+    "build": "rimraf dist/ && npm run build-iife && npm run build-esm && npm run build-nobundle && npm run build-node && ls -l dist/",
    "build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --external:fs --global-name=human --outfile=dist/human.js src/index.js",
+    "build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:fs --outfile=dist/human.esm.js src/index.js",
+    "build-nobundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:@tensorflow --external:fs --outfile=dist/human.nobundle.js src/index.js",
    "build-node": "esbuild --bundle --platform=node --sourcemap --target=esnext --format=cjs --external:@tensorflow --outfile=dist/human.node.js src/index.js",
    "update": "npm update --depth 20 && npm dedupe && npm prune && npm audit"
  },
--- a/src/config.js
+++ b/src/config.js
@ -5,10 +5,10 @@ export default {
      modelPath: '../models/blazeface/model.json',
      inputSize: 128, // fixed value
      maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
-      skipFrames: 5, // how many frames to go without running the bounding box detector, only relevant if maxFaces > 1
-      minConfidence: 0.8, // threshold for discarding a prediction
-      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression, must be between [0, 1]
-      scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+      skipFrames: 10, // how many frames to go without running the bounding box detector
+      minConfidence: 0.5, // threshold for discarding a prediction
+      iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+      scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
    },
    mesh: {
      enabled: true,
@ -24,7 +24,7 @@ export default {
      enabled: true,
      modelPath: '../models/ssrnet-age/imdb/model.json',
      inputSize: 64, // fixed value
-      skipFrames: 5,
+      skipFrames: 10,
    },
    gender: {
      enabled: true,
@ -37,16 +37,17 @@ export default {
    inputResolution: 257, // fixed value
    outputStride: 16, // fixed value
    maxDetections: 5,
-    scoreThreshold: 0.75,
+    scoreThreshold: 0.7,
    nmsRadius: 20,
  },
  hand: {
    enabled: true,
    inputSize: 256, // fixed value
-    skipFrames: 5,
-    minConfidence: 0.8,
+    skipFrames: 10,
+    minConfidence: 0.5,
    iouThreshold: 0.3,
-    scoreThreshold: 0.75,
+    scoreThreshold: 0.7,
+    maxHands: 2,
    detector: {
      anchors: '../models/handdetect/anchors.json',
      modelPath: '../models/handdetect/model.json',
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@ -2,12 +2,13 @@ const tf = require('@tensorflow/tfjs');
 const bounding = require('./box');

 class HandDetector {
-  constructor(model, width, height, anchors, iouThreshold, scoreThreshold) {
+  constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) {
    this.model = model;
    this.width = width;
    this.height = height;
    this.iouThreshold = iouThreshold;
    this.scoreThreshold = scoreThreshold;
+    this.maxHands = maxHands;
    this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
    this.anchorsTensor = tf.tensor2d(this.anchors);
    this.inputSizeTensor = tf.tensor1d([width, height]);
@ -35,28 +36,14 @@ class HandDetector {

  async getBoundingBoxes(input) {
    const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
-    let batchedPrediction;
-    if (tf.getBackend() === 'webgl') {
-      // Currently tfjs-core does not pack depthwiseConv because it fails for
-      // very large inputs (https://github.com/tensorflow/tfjs/issues/1652).
-      // TODO(annxingyuan): call tf.enablePackedDepthwiseConv when available
-      // (https://github.com/tensorflow/tfjs/issues/2821)
-      const savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
-      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
-      // The model returns a tensor with the following shape:
-      //  [1 (batch), 2944 (anchor points), 19 (data for each anchor)]
-      batchedPrediction = this.model.predict(normalizedInput);
-      tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
-    } else {
-      batchedPrediction = this.model.predict(normalizedInput);
-    }
+    const batchedPrediction = this.model.predict(normalizedInput);
    const prediction = batchedPrediction.squeeze();
    // Regression score for each anchor point.
    const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
    // Bounding box for each anchor point.
    const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
    const boxes = this.normalizeBoxes(rawBoxes);
-    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, 1, this.iouThreshold, this.scoreThreshold);
+    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
    const boxesWithHands = await boxesWithHandsTensor.array();
    const toDispose = [
      normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction,
@ -66,15 +53,18 @@ class HandDetector {
      toDispose.forEach((tensor) => tensor.dispose());
      return null;
    }
-    const boxIndex = boxesWithHands[0];
-    const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
-    const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
-    const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([
-      -1, 2,
-    ]));
-    toDispose.push(rawPalmLandmarks);
-    toDispose.forEach((tensor) => tensor.dispose());
-    return { boxes: matchingBox, palmLandmarks };
+    const detectedHands = tf.tidy(() => {
+      const detectedBoxes = [];
+      for (const i in boxesWithHands) {
+        const boxIndex = boxesWithHands[i];
+        const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
+        const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
+        const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
+        detectedBoxes.push({ boxes: matchingBox, palmLandmarks });
+      }
+      return detectedBoxes;
+    });
+    return detectedHands;
  }

  /**
@ -87,19 +77,21 @@ class HandDetector {
    const inputHeight = input.shape[1];
    const inputWidth = input.shape[2];
    const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
-    const prediction = await this.getBoundingBoxes(image);
-    if (prediction === null) {
-      image.dispose();
-      return null;
-    }
-    const boundingBoxes = await prediction.boxes.array();
-    const startPoint = boundingBoxes[0].slice(0, 2);
-    const endPoint = boundingBoxes[0].slice(2, 4);
-    const palmLandmarks = await prediction.palmLandmarks.array();
+    const predictions = await this.getBoundingBoxes(image);
    image.dispose();
-    prediction.boxes.dispose();
-    prediction.palmLandmarks.dispose();
-    return bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]);
+    if (!predictions || (predictions.length === 0)) return null;
+    const hands = [];
+    for (const i in predictions) {
+      const prediction = predictions[i];
+      const boundingBoxes = await prediction.boxes.array();
+      const startPoint = boundingBoxes[0].slice(0, 2);
+      const endPoint = boundingBoxes[0].slice(2, 4);
+      const palmLandmarks = await prediction.palmLandmarks.array();
+      prediction.boxes.dispose();
+      prediction.palmLandmarks.dispose();
+      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
+    }
+    return hands;
  }
 }
 exports.HandDetector = HandDetector;
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@ -1,5 +1,5 @@
 const tf = require('@tensorflow/tfjs');
-const hand = require('./hand');
+const hand = require('./handdetector');
 const keypoints = require('./keypoints');
 const pipe = require('./pipeline');

@ -47,8 +47,8 @@ async function load(config) {
    loadHandDetectorModel(config.detector.modelPath),
    loadHandPoseModel(config.skeleton.modelPath),
  ]);
-  const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold);
-  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence);
+  const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
+  const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
  // eslint-disable-next-line no-use-before-define
  const handpose = new HandPose(pipeline);
  return handpose;
@ -67,19 +67,24 @@ class HandPose {
      }
      return input.toFloat().expandDims(0);
    });
-    const prediction = await this.pipeline.estimateHand(image, config);
+    const predictions = await this.pipeline.estimateHand(image, config);
    image.dispose();
-    if (!prediction) return [];
-    const annotations = {};
-    for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
-      annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
+    const hands = [];
+    if (!predictions) return hands;
+    for (const prediction of predictions) {
+      if (!prediction) return [];
+      const annotations = {};
+      for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
+        annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
+      }
+      hands.push({
+        confidence: prediction.confidence || 0,
+        box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
+        landmarks: prediction.landmarks,
+        annotations,
+      });
    }
-    return [{
-      confidence: prediction.confidence || 0,
-      box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
-      landmarks: prediction.landmarks,
-      annotations,
-    }];
+    return hands;
  }
 }
 exports.HandPose = HandPose;
--- a/src/index.js
+++ b/src/index.js
@ -1,8 +1,8 @@
 const tf = require('@tensorflow/tfjs');
-const facemesh = require('./facemesh/index.js');
-const ssrnet = require('./ssrnet/index.js');
-const posenet = require('./posenet/index.js');
-const handpose = require('./handpose/index.js');
+const facemesh = require('./facemesh/facemesh.js');
+const ssrnet = require('./ssrnet/ssrnet.js');
+const posenet = require('./posenet/posenet.js');
+const handpose = require('./handpose/handpose.js');
 const defaults = require('./config.js').default;

 const models = {
@ -44,9 +44,15 @@ async function detect(input, userConfig) {

    tf.engine().startScope();

+    let savedWebglPackDepthwiseConvFlag;
+    if (tf.getBackend() === 'webgl') {
+      savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
+      tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
+    }
+
    // run posenet
    let poseRes = [];
-    if (config.body.enabled) poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
+    if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);

    // run handpose
    let handRes = [];
@ -76,6 +82,8 @@ async function detect(input, userConfig) {
      }
    }

+    tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+
    tf.engine().endScope();
    // combine results
    resolve({ face: faceRes, body: poseRes, hand: handRes });
--- a/src/posenet/index.js
+++ b/src/posenet/index.js
@ -1,22 +0,0 @@
-const modelMobileNet = require('./modelMobileNet');
-const modelPoseNet = require('./modelPoseNet');
-const decodeMultiple = require('./decodeMultiple');
-const decodeSingle = require('./decodeSingle');
-const keypoints = require('./keypoints');
-const util = require('./util');
-
-exports.load = modelPoseNet.load;
-exports.PoseNet = modelPoseNet.PoseNet;
-
-exports.MobileNet = modelMobileNet.MobileNet;
-exports.decodeMultiplePoses = decodeMultiple.decodeMultiplePoses;
-exports.decodeSinglePose = decodeSingle.decodeSinglePose;
-exports.partChannels = keypoints.partChannels;
-exports.partIds = keypoints.partIds;
-exports.partNames = keypoints.partNames;
-exports.poseChain = keypoints.poseChain;
-exports.getAdjacentKeyPoints = util.getAdjacentKeyPoints;
-exports.getBoundingBox = util.getBoundingBox;
-exports.getBoundingBoxPoints = util.getBoundingBoxPoints;
-exports.scaleAndFlipPoses = util.scaleAndFlipPoses;
-exports.scalePose = util.scalePose;