implemented multi-hand support

pull/280/head
Vladimir Mandic 2020-10-14 11:43:33 -04:00
parent f484493b6f
commit 9e1776906f
11 changed files with 139 additions and 140 deletions

View File

@ -49,6 +49,8 @@
"promise/catch-or-return": "off",
"promise/no-nesting": "off",
"import/no-absolute-path": "off",
"import/no-extraneous-dependencies": "off",
"node/no-unpublished-require": "off",
"no-regex-spaces": "off",
"radix": "off"
}

View File

@ -1,11 +1,14 @@
# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction
**Documentation**: <https://github.com/vladmandic/human#readme>
**Code Repository**: <https://github.com/vladmandic/human>
**Package**: <https://www.npmjs.com/package/@vladmandic/human>
**Live Demo**: <https://vladmandic.github.io/human/demo/demo-esm.html>
- [**Documentation**](https://github.com/vladmandic/human#readme)
- [**Code Repository**](https://github.com/vladmandic/human)
- [**Package**](https://www.npmjs.com/package/@vladmandic/human)
- [**Issues Tracker**](https://github.com/vladmandic/human/issues)
- [**Live Demo**](https://vladmandic.github.io/human/demo/demo-esm.html)
Compatible with Browser, WebWorker and NodeJS** execution!
Compatible with Browser, WebWorker and NodeJS execution!
*This is a pre-release project, see [issues](https://github.com/vladmandic/human/issues) for list of known limitations*
*Suggestions are welcome!*
@ -47,7 +50,7 @@ There are multiple ways to use `Human` library, pick one that suits you:
Simply download `dist/human.js`, include it in your `HTML` file & it's ready to use.
```html
<script src="dist/human.js"><script>
<script src="dist/human.js"><script>
```
IIFE script auto-registers global namespace `human` within global `Window` object
@ -64,9 +67,17 @@ IIFE script is distributed in minified form with attached sourcemap
If you're using bundler *(such as rollup, webpack, esbuild)* to package your client application, you can import ESM version of `Human` library which supports full tree shaking
```js
import human from 'dist/human.esm.js';
import human from '@vladmandic/human'; // points to @vladmandic/human/dist/human.esm.js
```
Or if you prefer to package your version of `tfjs`, you can use `nobundle` version
```js
import tf from '@tensorflow/tfjs'
import human from '@vladmandic/human/dist/human.nobundle.js'; // same functionality as default import, but without tfjs bundled
```
#### 2.2 Using Script Module
You could use same syntax within your main `JS` file if it's imported with `<script type="module">`
@ -95,10 +106,25 @@ Install with:
And then use with:
```js
const tf = require('@tensorflow/tfjs-node');
const human = require('@vladmandic/human');
const human = require('@vladmandic/human'); // points to @vladmandic/human/dist/human.node.js
```
*See limitations for NodeJS usage under `demo`*
Since NodeJS projects load `weights` from local filesystem instead of using `http` calls, you must modify default configuration to include correct paths with `file://` prefix
For example:
```js
const config = {
body: { enabled: true, modelPath: 'file://models/posenet/model.json' },
}
```
Note that when using `Human` in NodeJS, you must load and parse the image *before* you pass it for detection
For example:
```js
const buffer = fs.readFileSync(input);
const image = tf.node.decodeImage(buffer);
const result = human.detect(image, config);
image.dispose();
```
### Weights
@ -122,10 +148,6 @@ NodeJS:
- `demo-node`: Demo using NodeJS with CJS module
This is a very simple demo as althought `Human` library is compatible with NodeJS execution
and is able to load images and models from local filesystem,
`tfjs-node` backend does not implement function required for execution of some models
Currently only body pose detection works while face and hand models are not supported
See `tfjs-node` issue <https://github.com/tensorflow/tfjs/issues/4066> for details
<hr>
@ -137,20 +159,28 @@ All configuration is done in a single JSON object and all model weights will be
There is only *ONE* method you need:
```js
import * as tf from '@tensorflow/tfjs';
import human from '@vladmandic/human';
import * as tf from '@tensorflow/tfjs';
import human from '@vladmandic/human';
// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
// 'options': optional parameter used to override any options present in default configuration
const result = await human.detect(image, options?)
// 'image': can be of any type of an image object: HTMLImage, HTMLVideo, HTMLMedia, Canvas, Tensor4D
// 'options': optional parameter used to override any options present in default configuration
const result = await human.detect(image, options?)
```
or if you want to use promises
```js
human.detect(image, options?).then((result) => {
// your code
})
```
Additionally, `Human` library exposes several classes:
```js
human.defaults // default configuration object
human.models // dynamically maintained object of any loaded models
human.tf // instance of tfjs used by human
human.defaults // default configuration object
human.models // dynamically maintained object of any loaded models
human.tf // instance of tfjs used by human
```
<hr>
@ -299,7 +329,5 @@ Library can also be used on mobile devices
## Todo
- Improve detection of smaller faces
- Tweak default parameters
- Verify age/gender models
- Make it work with multiple hands

View File

@ -10,15 +10,15 @@ const ui = {
const config = {
face: {
enabled: true,
detector: { maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
enabled: false,
detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
mesh: { enabled: true },
iris: { enabled: true },
age: { enabled: true, skipFrames: 5 },
age: { enabled: true, skipFrames: 10 },
gender: { enabled: true },
},
body: { enabled: true, maxDetections: 5, scoreThreshold: 0.75, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
};
let settings;

View File

@ -1,7 +1,7 @@
const tf = require('@tensorflow/tfjs-node');
const fs = require('fs');
const process = require('process');
const console = require('console');
const tf = require('@tensorflow/tfjs-node');
const human = require('..'); // this would be '@vladmandic/human'
const logger = new console.Console({
@ -54,6 +54,7 @@ async function detect(input, output) {
const image = tf.node.decodeImage(buffer);
logger.log('Processing:', image.shape);
const result = await human.detect(image, config);
image.dispose();
logger.log(result);
// Draw detected data and save processed image
logger.log('Saving:', output);

View File

@ -4,19 +4,3 @@ onmessage = async (msg) => {
const result = await human.detect(msg.data.image, msg.data.config);
postMessage(result);
};
/*
web workers are finicky
- cannot pass HTMLImage or HTMLVideo to web worker, so need to pass canvas instead
- canvases can execute transferControlToOffscreen() and then become offscreenCanvas which can be passed to worker, but...
cannot transfer canvas that has a rendering context (basically, first time you execute getContext() on it)
which means that if we pass main Canvas that will be used to render results on,
then all operations on it must be within webworker and we cannot touch it in the main thread at all.
doable, but...how to paint a video frame on it before we pass it?
and we create new offscreenCanvas that we drew video frame on and pass it's imageData and return results from worker
then there is an overhead of creating it and it ends up being slower than executing in the main thread
*/

View File

@ -5,7 +5,7 @@
"sideEffects": false,
"main": "dist/human.node.js",
"module": "dist/human.esm.js",
"browser": "dist/human.js",
"browser": "dist/human.esmjs",
"author": "Vladimir Mandic <mandic00@live.com>",
"bugs": {
"url": "https://github.com/vladmandic/human/issues"
@ -20,11 +20,10 @@
"url": "git+https://github.com/vladmandic/human.git"
},
"dependencies": {},
"peerDependencies": {
"@tensorflow/tfjs-node": "^2.6.0"
},
"peerDependencies": {},
"devDependencies": {
"@tensorflow/tfjs": "^2.6.0",
"@tensorflow/tfjs-node": "^2.6.0",
"esbuild": "^0.7.15",
"eslint": "^7.10.0",
"eslint-config-airbnb-base": "^14.2.0",
@ -37,9 +36,10 @@
"scripts": {
"start": "node --trace-warnings --trace-uncaught --no-deprecation demo/demo-node.js",
"lint": "eslint src/*.js demo/*.js",
"build": "rimraf dist/ && npm run build-esm && npm run build-iife && npm run build-node",
"build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:fs --outfile=dist/human.esm.js src/index.js",
"build": "rimraf dist/ && npm run build-iife && npm run build-esm && npm run build-nobundle && npm run build-node && ls -l dist/",
"build-iife": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=iife --minify --external:fs --global-name=human --outfile=dist/human.js src/index.js",
"build-esm": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --external:fs --outfile=dist/human.esm.js src/index.js",
"build-nobundle": "esbuild --bundle --platform=browser --sourcemap --target=esnext --format=esm --minify --external:@tensorflow --external:fs --outfile=dist/human.nobundle.js src/index.js",
"build-node": "esbuild --bundle --platform=node --sourcemap --target=esnext --format=cjs --external:@tensorflow --outfile=dist/human.node.js src/index.js",
"update": "npm update --depth 20 && npm dedupe && npm prune && npm audit"
},

View File

@ -5,10 +5,10 @@ export default {
modelPath: '../models/blazeface/model.json',
inputSize: 128, // fixed value
maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
skipFrames: 5, // how many frames to go without running the bounding box detector, only relevant if maxFaces > 1
minConfidence: 0.8, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression, must be between [0, 1]
scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression
skipFrames: 10, // how many frames to go without running the bounding box detector
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
},
mesh: {
enabled: true,
@ -24,7 +24,7 @@ export default {
enabled: true,
modelPath: '../models/ssrnet-age/imdb/model.json',
inputSize: 64, // fixed value
skipFrames: 5,
skipFrames: 10,
},
gender: {
enabled: true,
@ -37,16 +37,17 @@ export default {
inputResolution: 257, // fixed value
outputStride: 16, // fixed value
maxDetections: 5,
scoreThreshold: 0.75,
scoreThreshold: 0.7,
nmsRadius: 20,
},
hand: {
enabled: true,
inputSize: 256, // fixed value
skipFrames: 5,
minConfidence: 0.8,
skipFrames: 10,
minConfidence: 0.5,
iouThreshold: 0.3,
scoreThreshold: 0.75,
scoreThreshold: 0.7,
maxHands: 2,
detector: {
anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json',

View File

@ -2,12 +2,13 @@ const tf = require('@tensorflow/tfjs');
const bounding = require('./box');
class HandDetector {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold) {
constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) {
this.model = model;
this.width = width;
this.height = height;
this.iouThreshold = iouThreshold;
this.scoreThreshold = scoreThreshold;
this.maxHands = maxHands;
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSizeTensor = tf.tensor1d([width, height]);
@ -35,28 +36,14 @@ class HandDetector {
async getBoundingBoxes(input) {
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
let batchedPrediction;
if (tf.getBackend() === 'webgl') {
// Currently tfjs-core does not pack depthwiseConv because it fails for
// very large inputs (https://github.com/tensorflow/tfjs/issues/1652).
// TODO(annxingyuan): call tf.enablePackedDepthwiseConv when available
// (https://github.com/tensorflow/tfjs/issues/2821)
const savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
// The model returns a tensor with the following shape:
// [1 (batch), 2944 (anchor points), 19 (data for each anchor)]
batchedPrediction = this.model.predict(normalizedInput);
tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
} else {
batchedPrediction = this.model.predict(normalizedInput);
}
const batchedPrediction = this.model.predict(normalizedInput);
const prediction = batchedPrediction.squeeze();
// Regression score for each anchor point.
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
// Bounding box for each anchor point.
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, 1, this.iouThreshold, this.scoreThreshold);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
const boxesWithHands = await boxesWithHandsTensor.array();
const toDispose = [
normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction,
@ -66,15 +53,18 @@ class HandDetector {
toDispose.forEach((tensor) => tensor.dispose());
return null;
}
const boxIndex = boxesWithHands[0];
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([
-1, 2,
]));
toDispose.push(rawPalmLandmarks);
toDispose.forEach((tensor) => tensor.dispose());
return { boxes: matchingBox, palmLandmarks };
const detectedHands = tf.tidy(() => {
const detectedBoxes = [];
for (const i in boxesWithHands) {
const boxIndex = boxesWithHands[i];
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
detectedBoxes.push({ boxes: matchingBox, palmLandmarks });
}
return detectedBoxes;
});
return detectedHands;
}
/**
@ -87,19 +77,21 @@ class HandDetector {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
const prediction = await this.getBoundingBoxes(image);
if (prediction === null) {
image.dispose();
return null;
}
const boundingBoxes = await prediction.boxes.array();
const startPoint = boundingBoxes[0].slice(0, 2);
const endPoint = boundingBoxes[0].slice(2, 4);
const palmLandmarks = await prediction.palmLandmarks.array();
const predictions = await this.getBoundingBoxes(image);
image.dispose();
prediction.boxes.dispose();
prediction.palmLandmarks.dispose();
return bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]);
if (!predictions || (predictions.length === 0)) return null;
const hands = [];
for (const i in predictions) {
const prediction = predictions[i];
const boundingBoxes = await prediction.boxes.array();
const startPoint = boundingBoxes[0].slice(0, 2);
const endPoint = boundingBoxes[0].slice(2, 4);
const palmLandmarks = await prediction.palmLandmarks.array();
prediction.boxes.dispose();
prediction.palmLandmarks.dispose();
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
}
return hands;
}
}
exports.HandDetector = HandDetector;

View File

@ -1,5 +1,5 @@
const tf = require('@tensorflow/tfjs');
const hand = require('./hand');
const hand = require('./handdetector');
const keypoints = require('./keypoints');
const pipe = require('./pipeline');
@ -47,8 +47,8 @@ async function load(config) {
loadHandDetectorModel(config.detector.modelPath),
loadHandPoseModel(config.skeleton.modelPath),
]);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence);
const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands);
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands);
// eslint-disable-next-line no-use-before-define
const handpose = new HandPose(pipeline);
return handpose;
@ -67,19 +67,24 @@ class HandPose {
}
return input.toFloat().expandDims(0);
});
const prediction = await this.pipeline.estimateHand(image, config);
const predictions = await this.pipeline.estimateHand(image, config);
image.dispose();
if (!prediction) return [];
const annotations = {};
for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
const hands = [];
if (!predictions) return hands;
for (const prediction of predictions) {
if (!prediction) return [];
const annotations = {};
for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
}
hands.push({
confidence: prediction.confidence || 0,
box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
landmarks: prediction.landmarks,
annotations,
});
}
return [{
confidence: prediction.confidence || 0,
box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
landmarks: prediction.landmarks,
annotations,
}];
return hands;
}
}
exports.HandPose = HandPose;

View File

@ -1,8 +1,8 @@
const tf = require('@tensorflow/tfjs');
const facemesh = require('./facemesh/index.js');
const ssrnet = require('./ssrnet/index.js');
const posenet = require('./posenet/index.js');
const handpose = require('./handpose/index.js');
const facemesh = require('./facemesh/facemesh.js');
const ssrnet = require('./ssrnet/ssrnet.js');
const posenet = require('./posenet/posenet.js');
const handpose = require('./handpose/handpose.js');
const defaults = require('./config.js').default;
const models = {
@ -44,9 +44,15 @@ async function detect(input, userConfig) {
tf.engine().startScope();
let savedWebglPackDepthwiseConvFlag;
if (tf.getBackend() === 'webgl') {
savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
}
// run posenet
let poseRes = [];
if (config.body.enabled) poseRes = await models.posenet.estimateMultiplePoses(input, config.body);
if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body);
// run handpose
let handRes = [];
@ -76,6 +82,8 @@ async function detect(input, userConfig) {
}
}
tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
tf.engine().endScope();
// combine results
resolve({ face: faceRes, body: poseRes, hand: handRes });

View File

@ -1,22 +0,0 @@
const modelMobileNet = require('./modelMobileNet');
const modelPoseNet = require('./modelPoseNet');
const decodeMultiple = require('./decodeMultiple');
const decodeSingle = require('./decodeSingle');
const keypoints = require('./keypoints');
const util = require('./util');
exports.load = modelPoseNet.load;
exports.PoseNet = modelPoseNet.PoseNet;
exports.MobileNet = modelMobileNet.MobileNet;
exports.decodeMultiplePoses = decodeMultiple.decodeMultiplePoses;
exports.decodeSinglePose = decodeSingle.decodeSinglePose;
exports.partChannels = keypoints.partChannels;
exports.partIds = keypoints.partIds;
exports.partNames = keypoints.partNames;
exports.poseChain = keypoints.poseChain;
exports.getAdjacentKeyPoints = util.getAdjacentKeyPoints;
exports.getBoundingBox = util.getBoundingBox;
exports.getBoundingBoxPoints = util.getBoundingBoxPoints;
exports.scaleAndFlipPoses = util.scaleAndFlipPoses;
exports.scalePose = util.scalePose;