diff --git a/demo/browser.js b/demo/browser.js
index a9122958..fc81fc6e 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -332,7 +332,7 @@ function setupMenu() {
menu.addHTML('
');
menu.addLabel('Model Parameters');
- menu.addRange('Max Objects', config.face.detector, 'maxFaces', 0, 50, 1, (val) => {
+ menu.addRange('Max Objects', config.face.detector, 'maxFaces', 1, 50, 1, (val) => {
config.face.detector.maxFaces = parseInt(val);
config.body.maxDetections = parseInt(val);
config.hand.maxHands = parseInt(val);
diff --git a/src/gesture.js b/src/gesture.js
index 5531975b..23f489d1 100644
--- a/src/gesture.js
+++ b/src/gesture.js
@@ -37,9 +37,11 @@ exports.hand = (res) => {
for (const [finger, pos] of Object.entries(hand['annotations'])) {
if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger
}
- const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
- const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
- gestures.push(`${closest.name} forward ${highest.name} up`);
+ if (fingers && fingers.length > 0) {
+ const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
+ const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
+ gestures.push(`${closest.name} forward ${highest.name} up`);
+ }
}
return gestures;
};
diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js
index 1a13de42..237c425f 100644
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@@ -21,8 +21,6 @@ const box = require('./box');
class HandDetector {
constructor(model, inputSize, anchorsAnnotated) {
this.model = model;
- this.width = inputSize;
- this.height = inputSize;
this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
this.anchorsTensor = tf.tensor2d(this.anchors);
this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
@@ -49,16 +47,14 @@ class HandDetector {
}
async getBoundingBoxes(input, config) {
- const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
- const batchedPrediction = this.model.predict(normalizedInput);
+ const batchedPrediction = this.model.predict(input);
const prediction = batchedPrediction.squeeze();
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes);
- const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
+ const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, 0.95); // config.scoreThreshold
const boxesWithHands = boxesWithHandsTensor.arraySync();
const toDispose = [
- normalizedInput,
batchedPrediction,
boxesWithHandsTensor,
prediction,
@@ -85,22 +81,19 @@ class HandDetector {
async estimateHandBounds(input, config) {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
- const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
+ const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
const predictions = await this.getBoundingBoxes(image, config);
- if (!predictions || predictions.length === 0) {
- image.dispose();
- return null;
- }
+ image.dispose();
+ if (!predictions || predictions.length === 0) return null;
const hands = [];
for (const prediction of predictions) {
- const boundingBoxes = prediction.boxes.arraySync();
- const startPoint = boundingBoxes[0].slice(0, 2);
- const endPoint = boundingBoxes[0].slice(2, 4);
+ const boundingBoxes = prediction.boxes.dataSync();
+ const startPoint = boundingBoxes.slice(0, 2);
+ const endPoint = boundingBoxes.slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync();
- image.dispose();
prediction.boxes.dispose();
prediction.palmLandmarks.dispose();
- hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
+ hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
}
return hands;
}
diff --git a/src/handpose/handpipeline.js b/src/handpose/handpipeline.js
index 146d040d..f700ca81 100644
--- a/src/handpose/handpipeline.js
+++ b/src/handpose/handpipeline.js
@@ -35,8 +35,8 @@ class HandPipeline {
this.inputSize = inputSize;
this.regionsOfInterest = [];
this.runsWithoutHandDetector = 0;
- this.maxHandsNumber = 1;
this.skipFrames = 0;
+ this.detectedHands = 0;
}
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
@@ -87,12 +87,18 @@ class HandPipeline {
async estimateHands(image, config) {
this.skipFrames = config.skipFrames;
- const useFreshBox = this.shouldUpdateRegionsOfInterest();
+ // don't need box detection if we have sufficient number of boxes
+ let useFreshBox = (this.detectedHands === 0) || (this.detectedHands !== this.regionsOfInterest.length);
+ let boundingBoxPredictions;
+ // but every skipFrames check if detect boxes number changed
+ if (useFreshBox || this.runsWithoutHandDetector > this.skipFrames) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
+ // if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1
+ if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true;
if (useFreshBox) {
- const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
this.regionsOfInterest = [];
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
image.dispose();
+ this.detectedHands = 0;
return null;
}
for (const boundingBoxPrediction of boundingBoxPredictions) {
@@ -121,28 +127,38 @@ class HandPipeline {
handImage.dispose();
const confidenceValue = confidence.dataSync()[0];
confidence.dispose();
- if (confidenceValue < config.minConfidence) {
+ if (confidenceValue >= config.minConfidence) {
+ const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
+ const rawCoords = keypointsReshaped.arraySync();
keypoints.dispose();
- this.regionsOfInterest[i] = null;
- return null;
+ keypointsReshaped.dispose();
+ const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
+ const nextBoundingBox = this.getBoxForHandLandmarks(coords);
+ this.updateRegionsOfInterest(nextBoundingBox, i);
+ const result = {
+ landmarks: coords,
+ handInViewConfidence: confidenceValue,
+ boundingBox: {
+ topLeft: nextBoundingBox.startPoint,
+ bottomRight: nextBoundingBox.endPoint,
+ },
+ };
+ hands.push(result);
+ } else {
+ /*
+ const result = {
+ handInViewConfidence: confidenceValue,
+ boundingBox: {
+ topLeft: currentBox.startPoint,
+ bottomRight: currentBox.endPoint,
+ },
+ };
+ hands.push(result);
+ */
}
- const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
- const rawCoords = keypointsReshaped.arraySync();
keypoints.dispose();
- keypointsReshaped.dispose();
- const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
- const nextBoundingBox = this.getBoxForHandLandmarks(coords);
- this.updateRegionsOfInterest(nextBoundingBox, i);
- const result = {
- landmarks: coords,
- handInViewConfidence: confidenceValue,
- boundingBox: {
- topLeft: nextBoundingBox.startPoint,
- bottomRight: nextBoundingBox.endPoint,
- },
- };
- hands.push(result);
}
+ this.detectedHands = hands.length;
return hands;
}
@@ -174,10 +190,6 @@ class HandPipeline {
}
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
}
-
- shouldUpdateRegionsOfInterest() {
- return !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.skipFrames);
- }
}
exports.HandPipeline = HandPipeline;
diff --git a/src/handpose/handpose.js b/src/handpose/handpose.js
index df069fed..9348f626 100644
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@@ -14,6 +14,8 @@
* limitations under the License.
* =============================================================================
*/
+// https://storage.googleapis.com/tfjs-models/demos/handpose/index.html
+
const tf = require('@tensorflow/tfjs');
const handdetector = require('./handdetector');
const pipeline = require('./handpipeline');
@@ -43,12 +45,19 @@ class HandPose {
const hands = [];
for (const prediction of predictions) {
const annotations = {};
- for (const key of Object.keys(MESH_ANNOTATIONS)) {
- annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
+ if (prediction.landmarks) {
+ for (const key of Object.keys(MESH_ANNOTATIONS)) {
+ annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
+ }
}
hands.push({
confidence: prediction.handInViewConfidence,
- box: prediction.boundingBox ? [prediction.boundingBox.topLeft[0], prediction.boundingBox.topLeft[1], prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0], prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1]] : 0,
+ box: prediction.boundingBox ? [
+ prediction.boundingBox.topLeft[0],
+ prediction.boundingBox.topLeft[1],
+ prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0],
+ prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1],
+ ] : 0,
landmarks: prediction.landmarks,
annotations,
});
diff --git a/src/human.js b/src/human.js
index 8e97df57..c3371948 100644
--- a/src/human.js
+++ b/src/human.js
@@ -171,7 +171,7 @@ class Human {
}
// tf.ENV.set('WEBGL_CPU_FORWARD', true);
// tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
- // tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
+ tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
await tf.ready();
}
}