mirror of https://github.com/vladmandic/human
major work on handpose model
parent
479fc2547c
commit
9fa7e3d467
|
@ -332,7 +332,7 @@ function setupMenu() {
|
||||||
|
|
||||||
menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
|
menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
|
||||||
menu.addLabel('Model Parameters');
|
menu.addLabel('Model Parameters');
|
||||||
menu.addRange('Max Objects', config.face.detector, 'maxFaces', 0, 50, 1, (val) => {
|
menu.addRange('Max Objects', config.face.detector, 'maxFaces', 1, 50, 1, (val) => {
|
||||||
config.face.detector.maxFaces = parseInt(val);
|
config.face.detector.maxFaces = parseInt(val);
|
||||||
config.body.maxDetections = parseInt(val);
|
config.body.maxDetections = parseInt(val);
|
||||||
config.hand.maxHands = parseInt(val);
|
config.hand.maxHands = parseInt(val);
|
||||||
|
|
|
@ -37,9 +37,11 @@ exports.hand = (res) => {
|
||||||
for (const [finger, pos] of Object.entries(hand['annotations'])) {
|
for (const [finger, pos] of Object.entries(hand['annotations'])) {
|
||||||
if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger
|
if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger
|
||||||
}
|
}
|
||||||
const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
|
if (fingers && fingers.length > 0) {
|
||||||
const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
|
const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
|
||||||
gestures.push(`${closest.name} forward ${highest.name} up`);
|
const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
|
||||||
|
gestures.push(`${closest.name} forward ${highest.name} up`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return gestures;
|
return gestures;
|
||||||
};
|
};
|
||||||
|
|
|
@ -21,8 +21,6 @@ const box = require('./box');
|
||||||
class HandDetector {
|
class HandDetector {
|
||||||
constructor(model, inputSize, anchorsAnnotated) {
|
constructor(model, inputSize, anchorsAnnotated) {
|
||||||
this.model = model;
|
this.model = model;
|
||||||
this.width = inputSize;
|
|
||||||
this.height = inputSize;
|
|
||||||
this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
|
this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
|
||||||
this.anchorsTensor = tf.tensor2d(this.anchors);
|
this.anchorsTensor = tf.tensor2d(this.anchors);
|
||||||
this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
|
this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
|
||||||
|
@ -49,16 +47,14 @@ class HandDetector {
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBoundingBoxes(input, config) {
|
async getBoundingBoxes(input, config) {
|
||||||
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
|
const batchedPrediction = this.model.predict(input);
|
||||||
const batchedPrediction = this.model.predict(normalizedInput);
|
|
||||||
const prediction = batchedPrediction.squeeze();
|
const prediction = batchedPrediction.squeeze();
|
||||||
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
||||||
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
|
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
|
||||||
const boxes = this.normalizeBoxes(rawBoxes);
|
const boxes = this.normalizeBoxes(rawBoxes);
|
||||||
const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
|
const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, 0.95); // config.scoreThreshold
|
||||||
const boxesWithHands = boxesWithHandsTensor.arraySync();
|
const boxesWithHands = boxesWithHandsTensor.arraySync();
|
||||||
const toDispose = [
|
const toDispose = [
|
||||||
normalizedInput,
|
|
||||||
batchedPrediction,
|
batchedPrediction,
|
||||||
boxesWithHandsTensor,
|
boxesWithHandsTensor,
|
||||||
prediction,
|
prediction,
|
||||||
|
@ -85,22 +81,19 @@ class HandDetector {
|
||||||
async estimateHandBounds(input, config) {
|
async estimateHandBounds(input, config) {
|
||||||
const inputHeight = input.shape[1];
|
const inputHeight = input.shape[1];
|
||||||
const inputWidth = input.shape[2];
|
const inputWidth = input.shape[2];
|
||||||
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
|
const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
|
||||||
const predictions = await this.getBoundingBoxes(image, config);
|
const predictions = await this.getBoundingBoxes(image, config);
|
||||||
if (!predictions || predictions.length === 0) {
|
image.dispose();
|
||||||
image.dispose();
|
if (!predictions || predictions.length === 0) return null;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const hands = [];
|
const hands = [];
|
||||||
for (const prediction of predictions) {
|
for (const prediction of predictions) {
|
||||||
const boundingBoxes = prediction.boxes.arraySync();
|
const boundingBoxes = prediction.boxes.dataSync();
|
||||||
const startPoint = boundingBoxes[0].slice(0, 2);
|
const startPoint = boundingBoxes.slice(0, 2);
|
||||||
const endPoint = boundingBoxes[0].slice(2, 4);
|
const endPoint = boundingBoxes.slice(2, 4);
|
||||||
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
||||||
image.dispose();
|
|
||||||
prediction.boxes.dispose();
|
prediction.boxes.dispose();
|
||||||
prediction.palmLandmarks.dispose();
|
prediction.palmLandmarks.dispose();
|
||||||
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
|
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
|
||||||
}
|
}
|
||||||
return hands;
|
return hands;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,8 @@ class HandPipeline {
|
||||||
this.inputSize = inputSize;
|
this.inputSize = inputSize;
|
||||||
this.regionsOfInterest = [];
|
this.regionsOfInterest = [];
|
||||||
this.runsWithoutHandDetector = 0;
|
this.runsWithoutHandDetector = 0;
|
||||||
this.maxHandsNumber = 1;
|
|
||||||
this.skipFrames = 0;
|
this.skipFrames = 0;
|
||||||
|
this.detectedHands = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
|
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
|
||||||
|
@ -87,12 +87,18 @@ class HandPipeline {
|
||||||
|
|
||||||
async estimateHands(image, config) {
|
async estimateHands(image, config) {
|
||||||
this.skipFrames = config.skipFrames;
|
this.skipFrames = config.skipFrames;
|
||||||
const useFreshBox = this.shouldUpdateRegionsOfInterest();
|
// don't need box detection if we have sufficient number of boxes
|
||||||
|
let useFreshBox = (this.detectedHands === 0) || (this.detectedHands !== this.regionsOfInterest.length);
|
||||||
|
let boundingBoxPredictions;
|
||||||
|
// but every skipFrames check if detect boxes number changed
|
||||||
|
if (useFreshBox || this.runsWithoutHandDetector > this.skipFrames) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
|
||||||
|
// if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1
|
||||||
|
if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true;
|
||||||
if (useFreshBox) {
|
if (useFreshBox) {
|
||||||
const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
|
|
||||||
this.regionsOfInterest = [];
|
this.regionsOfInterest = [];
|
||||||
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
|
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
|
||||||
image.dispose();
|
image.dispose();
|
||||||
|
this.detectedHands = 0;
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
for (const boundingBoxPrediction of boundingBoxPredictions) {
|
for (const boundingBoxPrediction of boundingBoxPredictions) {
|
||||||
|
@ -121,28 +127,38 @@ class HandPipeline {
|
||||||
handImage.dispose();
|
handImage.dispose();
|
||||||
const confidenceValue = confidence.dataSync()[0];
|
const confidenceValue = confidence.dataSync()[0];
|
||||||
confidence.dispose();
|
confidence.dispose();
|
||||||
if (confidenceValue < config.minConfidence) {
|
if (confidenceValue >= config.minConfidence) {
|
||||||
|
const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
|
||||||
|
const rawCoords = keypointsReshaped.arraySync();
|
||||||
keypoints.dispose();
|
keypoints.dispose();
|
||||||
this.regionsOfInterest[i] = null;
|
keypointsReshaped.dispose();
|
||||||
return null;
|
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
|
||||||
|
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
|
||||||
|
this.updateRegionsOfInterest(nextBoundingBox, i);
|
||||||
|
const result = {
|
||||||
|
landmarks: coords,
|
||||||
|
handInViewConfidence: confidenceValue,
|
||||||
|
boundingBox: {
|
||||||
|
topLeft: nextBoundingBox.startPoint,
|
||||||
|
bottomRight: nextBoundingBox.endPoint,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
hands.push(result);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
const result = {
|
||||||
|
handInViewConfidence: confidenceValue,
|
||||||
|
boundingBox: {
|
||||||
|
topLeft: currentBox.startPoint,
|
||||||
|
bottomRight: currentBox.endPoint,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
hands.push(result);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
|
|
||||||
const rawCoords = keypointsReshaped.arraySync();
|
|
||||||
keypoints.dispose();
|
keypoints.dispose();
|
||||||
keypointsReshaped.dispose();
|
|
||||||
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
|
|
||||||
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
|
|
||||||
this.updateRegionsOfInterest(nextBoundingBox, i);
|
|
||||||
const result = {
|
|
||||||
landmarks: coords,
|
|
||||||
handInViewConfidence: confidenceValue,
|
|
||||||
boundingBox: {
|
|
||||||
topLeft: nextBoundingBox.startPoint,
|
|
||||||
bottomRight: nextBoundingBox.endPoint,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
hands.push(result);
|
|
||||||
}
|
}
|
||||||
|
this.detectedHands = hands.length;
|
||||||
return hands;
|
return hands;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,10 +190,6 @@ class HandPipeline {
|
||||||
}
|
}
|
||||||
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
|
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
|
||||||
}
|
}
|
||||||
|
|
||||||
shouldUpdateRegionsOfInterest() {
|
|
||||||
return !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.skipFrames);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
exports.HandPipeline = HandPipeline;
|
exports.HandPipeline = HandPipeline;
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
* =============================================================================
|
* =============================================================================
|
||||||
*/
|
*/
|
||||||
|
// https://storage.googleapis.com/tfjs-models/demos/handpose/index.html
|
||||||
|
|
||||||
const tf = require('@tensorflow/tfjs');
|
const tf = require('@tensorflow/tfjs');
|
||||||
const handdetector = require('./handdetector');
|
const handdetector = require('./handdetector');
|
||||||
const pipeline = require('./handpipeline');
|
const pipeline = require('./handpipeline');
|
||||||
|
@ -43,12 +45,19 @@ class HandPose {
|
||||||
const hands = [];
|
const hands = [];
|
||||||
for (const prediction of predictions) {
|
for (const prediction of predictions) {
|
||||||
const annotations = {};
|
const annotations = {};
|
||||||
for (const key of Object.keys(MESH_ANNOTATIONS)) {
|
if (prediction.landmarks) {
|
||||||
annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
|
for (const key of Object.keys(MESH_ANNOTATIONS)) {
|
||||||
|
annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
hands.push({
|
hands.push({
|
||||||
confidence: prediction.handInViewConfidence,
|
confidence: prediction.handInViewConfidence,
|
||||||
box: prediction.boundingBox ? [prediction.boundingBox.topLeft[0], prediction.boundingBox.topLeft[1], prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0], prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1]] : 0,
|
box: prediction.boundingBox ? [
|
||||||
|
prediction.boundingBox.topLeft[0],
|
||||||
|
prediction.boundingBox.topLeft[1],
|
||||||
|
prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0],
|
||||||
|
prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1],
|
||||||
|
] : 0,
|
||||||
landmarks: prediction.landmarks,
|
landmarks: prediction.landmarks,
|
||||||
annotations,
|
annotations,
|
||||||
});
|
});
|
||||||
|
|
|
@ -171,7 +171,7 @@ class Human {
|
||||||
}
|
}
|
||||||
// tf.ENV.set('WEBGL_CPU_FORWARD', true);
|
// tf.ENV.set('WEBGL_CPU_FORWARD', true);
|
||||||
// tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
|
// tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
|
||||||
// tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
|
tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
|
||||||
await tf.ready();
|
await tf.ready();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue