From 9fa7e3d4679be5e74255a95f6a7823ca71d802cd Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 4 Nov 2020 14:59:30 -0500 Subject: [PATCH] major work on handpose model --- demo/browser.js | 2 +- src/gesture.js | 8 +++-- src/handpose/handdetector.js | 25 ++++++--------- src/handpose/handpipeline.js | 62 +++++++++++++++++++++--------------- src/handpose/handpose.js | 15 +++++++-- src/human.js | 2 +- 6 files changed, 65 insertions(+), 49 deletions(-) diff --git a/demo/browser.js b/demo/browser.js index a9122958..fc81fc6e 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -332,7 +332,7 @@ function setupMenu() { menu.addHTML('
'); menu.addLabel('Model Parameters'); - menu.addRange('Max Objects', config.face.detector, 'maxFaces', 0, 50, 1, (val) => { + menu.addRange('Max Objects', config.face.detector, 'maxFaces', 1, 50, 1, (val) => { config.face.detector.maxFaces = parseInt(val); config.body.maxDetections = parseInt(val); config.hand.maxHands = parseInt(val); diff --git a/src/gesture.js b/src/gesture.js index 5531975b..23f489d1 100644 --- a/src/gesture.js +++ b/src/gesture.js @@ -37,9 +37,11 @@ exports.hand = (res) => { for (const [finger, pos] of Object.entries(hand['annotations'])) { if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger } - const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a)); - const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a)); - gestures.push(`${closest.name} forward ${highest.name} up`); + if (fingers && fingers.length > 0) { + const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a)); + const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a)); + gestures.push(`${closest.name} forward ${highest.name} up`); + } } return gestures; }; diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js index 1a13de42..237c425f 100644 --- a/src/handpose/handdetector.js +++ b/src/handpose/handdetector.js @@ -21,8 +21,6 @@ const box = require('./box'); class HandDetector { constructor(model, inputSize, anchorsAnnotated) { this.model = model; - this.width = inputSize; - this.height = inputSize; this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchorsTensor = tf.tensor2d(this.anchors); this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]); @@ -49,16 +47,14 @@ class HandDetector { } async getBoundingBoxes(input, config) { - const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2)); - const batchedPrediction = this.model.predict(normalizedInput); + const batchedPrediction = this.model.predict(input); const prediction = batchedPrediction.squeeze(); const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze()); const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]); const boxes = this.normalizeBoxes(rawBoxes); - const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold); + const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, 0.95); // config.scoreThreshold const boxesWithHands = boxesWithHandsTensor.arraySync(); const toDispose = [ - normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, @@ -85,22 +81,19 @@ class HandDetector { async estimateHandBounds(input, config) { const inputHeight = input.shape[1]; const inputWidth = input.shape[2]; - const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); + const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1)); const predictions = await this.getBoundingBoxes(image, config); - if (!predictions || predictions.length === 0) { - image.dispose(); - return null; - } + image.dispose(); + if (!predictions || predictions.length === 0) return null; const hands = []; for (const prediction of predictions) { - const boundingBoxes = prediction.boxes.arraySync(); - const startPoint = boundingBoxes[0].slice(0, 2); - const endPoint = boundingBoxes[0].slice(2, 4); + const boundingBoxes = prediction.boxes.dataSync(); + const startPoint = boundingBoxes.slice(0, 2); + const endPoint = boundingBoxes.slice(2, 4); const palmLandmarks = prediction.palmLandmarks.arraySync(); - image.dispose(); prediction.boxes.dispose(); prediction.palmLandmarks.dispose(); - hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height])); + hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize])); } return hands; } diff --git a/src/handpose/handpipeline.js b/src/handpose/handpipeline.js index 146d040d..f700ca81 100644 --- a/src/handpose/handpipeline.js +++ b/src/handpose/handpipeline.js @@ -35,8 +35,8 @@ class HandPipeline { this.inputSize = inputSize; this.regionsOfInterest = []; this.runsWithoutHandDetector = 0; - this.maxHandsNumber = 1; this.skipFrames = 0; + this.detectedHands = 0; } getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) { @@ -87,12 +87,18 @@ class HandPipeline { async estimateHands(image, config) { this.skipFrames = config.skipFrames; - const useFreshBox = this.shouldUpdateRegionsOfInterest(); + // don't need box detection if we have sufficient number of boxes + let useFreshBox = (this.detectedHands === 0) || (this.detectedHands !== this.regionsOfInterest.length); + let boundingBoxPredictions; + // but every skipFrames check if detect boxes number changed + if (useFreshBox || this.runsWithoutHandDetector > this.skipFrames) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config); + // if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1 + if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true; if (useFreshBox) { - const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config); this.regionsOfInterest = []; if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) { image.dispose(); + this.detectedHands = 0; return null; } for (const boundingBoxPrediction of boundingBoxPredictions) { @@ -121,28 +127,38 @@ class HandPipeline { handImage.dispose(); const confidenceValue = confidence.dataSync()[0]; confidence.dispose(); - if (confidenceValue < config.minConfidence) { + if (confidenceValue >= config.minConfidence) { + const keypointsReshaped = tf.reshape(keypoints, [-1, 3]); + const rawCoords = keypointsReshaped.arraySync(); keypoints.dispose(); - this.regionsOfInterest[i] = null; - return null; + keypointsReshaped.dispose(); + const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix); + const nextBoundingBox = this.getBoxForHandLandmarks(coords); + this.updateRegionsOfInterest(nextBoundingBox, i); + const result = { + landmarks: coords, + handInViewConfidence: confidenceValue, + boundingBox: { + topLeft: nextBoundingBox.startPoint, + bottomRight: nextBoundingBox.endPoint, + }, + }; + hands.push(result); + } else { + /* + const result = { + handInViewConfidence: confidenceValue, + boundingBox: { + topLeft: currentBox.startPoint, + bottomRight: currentBox.endPoint, + }, + }; + hands.push(result); + */ } - const keypointsReshaped = tf.reshape(keypoints, [-1, 3]); - const rawCoords = keypointsReshaped.arraySync(); keypoints.dispose(); - keypointsReshaped.dispose(); - const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix); - const nextBoundingBox = this.getBoxForHandLandmarks(coords); - this.updateRegionsOfInterest(nextBoundingBox, i); - const result = { - landmarks: coords, - handInViewConfidence: confidenceValue, - boundingBox: { - topLeft: nextBoundingBox.startPoint, - bottomRight: nextBoundingBox.endPoint, - }, - }; - hands.push(result); } + this.detectedHands = hands.length; return hands; } @@ -174,10 +190,6 @@ class HandPipeline { } this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox; } - - shouldUpdateRegionsOfInterest() { - return !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.skipFrames); - } } exports.HandPipeline = HandPipeline; diff --git a/src/handpose/handpose.js b/src/handpose/handpose.js index df069fed..9348f626 100644 --- a/src/handpose/handpose.js +++ b/src/handpose/handpose.js @@ -14,6 +14,8 @@ * limitations under the License. * ============================================================================= */ +// https://storage.googleapis.com/tfjs-models/demos/handpose/index.html + const tf = require('@tensorflow/tfjs'); const handdetector = require('./handdetector'); const pipeline = require('./handpipeline'); @@ -43,12 +45,19 @@ class HandPose { const hands = []; for (const prediction of predictions) { const annotations = {}; - for (const key of Object.keys(MESH_ANNOTATIONS)) { - annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]); + if (prediction.landmarks) { + for (const key of Object.keys(MESH_ANNOTATIONS)) { + annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]); + } } hands.push({ confidence: prediction.handInViewConfidence, - box: prediction.boundingBox ? [prediction.boundingBox.topLeft[0], prediction.boundingBox.topLeft[1], prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0], prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1]] : 0, + box: prediction.boundingBox ? [ + prediction.boundingBox.topLeft[0], + prediction.boundingBox.topLeft[1], + prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0], + prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1], + ] : 0, landmarks: prediction.landmarks, annotations, }); diff --git a/src/human.js b/src/human.js index 8e97df57..c3371948 100644 --- a/src/human.js +++ b/src/human.js @@ -171,7 +171,7 @@ class Human { } // tf.ENV.set('WEBGL_CPU_FORWARD', true); // tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true); - // tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true); + tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true); await tf.ready(); } }