mirror of https://github.com/vladmandic/human
redo hand detection
parent
bfdcb301f4
commit
430a950112
|
@ -16,7 +16,6 @@
|
|||
<!-- <script src="../assets/tf.es2017.js"></script> -->
|
||||
<!-- <script src="../assets/tf-backend-wasm.es2017.js"></script> -->
|
||||
<!-- <script src="../assets/tf-backend-webgpu.js"></script> -->
|
||||
<!-- <script src='../assets/tfjs-vis.min.js'></script> -->
|
||||
<script src="./browser.js" type="module"></script>
|
||||
<style>
|
||||
body { margin: 0; background: black; color: white; font-family: 'Segoe UI'; font-size: 16px; font-variant: small-caps; overflow-x: hidden; scrollbar-width: none; }
|
||||
|
|
|
@ -1,3 +1,19 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2020 Google LLC. All Rights Reserved.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
const tf = require('@tensorflow/tfjs');
|
||||
|
||||
function getBoxSize(box) {
|
||||
|
@ -6,35 +22,32 @@ function getBoxSize(box) {
|
|||
Math.abs(box.endPoint[1] - box.startPoint[1]),
|
||||
];
|
||||
}
|
||||
exports.getBoxSize = getBoxSize;
|
||||
|
||||
function getBoxCenter(box) {
|
||||
return [
|
||||
box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
|
||||
box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2,
|
||||
];
|
||||
}
|
||||
exports.getBoxCenter = getBoxCenter;
|
||||
|
||||
function cutBoxFromImageAndResize(box, image, cropSize) {
|
||||
const h = image.shape[1];
|
||||
const w = image.shape[2];
|
||||
const boxes = [[
|
||||
box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h,
|
||||
box.startPoint[1] / h,
|
||||
box.startPoint[0] / w,
|
||||
box.endPoint[1] / h,
|
||||
box.endPoint[0] / w,
|
||||
]];
|
||||
return tf.image.cropAndResize(image, boxes, [0], cropSize);
|
||||
}
|
||||
exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
|
||||
|
||||
function scaleBoxCoordinates(box, factor) {
|
||||
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
|
||||
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
|
||||
const palmLandmarks = box.palmLandmarks.map((coord) => [coord[0] * factor[0], coord[1] * factor[1]]);
|
||||
const palmLandmarks = box.palmLandmarks.map((coord) => {
|
||||
const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
|
||||
return scaledCoord;
|
||||
});
|
||||
return { startPoint, endPoint, palmLandmarks };
|
||||
}
|
||||
exports.scaleBoxCoordinates = scaleBoxCoordinates;
|
||||
|
||||
function enlargeBox(box, factor = 1.5) {
|
||||
const center = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
|
@ -43,8 +56,6 @@ function enlargeBox(box, factor = 1.5) {
|
|||
const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]];
|
||||
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
|
||||
}
|
||||
exports.enlargeBox = enlargeBox;
|
||||
|
||||
function squarifyBox(box) {
|
||||
const centers = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
|
@ -54,15 +65,22 @@ function squarifyBox(box) {
|
|||
const endPoint = [centers[0] + halfSize, centers[1] + halfSize];
|
||||
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
|
||||
}
|
||||
exports.squarifyBox = squarifyBox;
|
||||
|
||||
function shiftBox(box, shiftFactor) {
|
||||
const boxSize = [
|
||||
box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1],
|
||||
box.endPoint[0] - box.startPoint[0],
|
||||
box.endPoint[1] - box.startPoint[1],
|
||||
];
|
||||
const shiftVector = [boxSize[0] * shiftFactor[0], boxSize[1] * shiftFactor[1]];
|
||||
const startPoint = [box.startPoint[0] + shiftVector[0], box.startPoint[1] + shiftVector[1]];
|
||||
const endPoint = [box.endPoint[0] + shiftVector[0], box.endPoint[1] + shiftVector[1]];
|
||||
return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
|
||||
}
|
||||
exports.shiftBox = shiftBox;
|
||||
export {
|
||||
cutBoxFromImageAndResize,
|
||||
enlargeBox,
|
||||
getBoxCenter,
|
||||
getBoxSize,
|
||||
scaleBoxCoordinates,
|
||||
shiftBox,
|
||||
squarifyBox,
|
||||
};
|
||||
|
|
|
@ -1,15 +1,32 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2020 Google LLC. All Rights Reserved.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
const tf = require('@tensorflow/tfjs');
|
||||
const bounding = require('./box');
|
||||
const box = require('./box');
|
||||
|
||||
class HandDetector {
|
||||
constructor(model, anchors, config) {
|
||||
constructor(model, inputSize, anchorsAnnotated) {
|
||||
this.model = model;
|
||||
this.width = config.inputSize;
|
||||
this.height = config.inputSize;
|
||||
this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]);
|
||||
this.width = inputSize;
|
||||
this.height = inputSize;
|
||||
this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]);
|
||||
this.anchorsTensor = tf.tensor2d(this.anchors);
|
||||
this.inputSizeTensor = tf.tensor1d([config.inputSize, config.inputSize]);
|
||||
this.doubleInputSizeTensor = tf.tensor1d([config.inputSize * 2, config.inputSize * 2]);
|
||||
this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]);
|
||||
this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]);
|
||||
}
|
||||
|
||||
normalizeBoxes(boxes) {
|
||||
|
@ -31,59 +48,59 @@ class HandDetector {
|
|||
});
|
||||
}
|
||||
|
||||
async getBoundingBoxes(input) {
|
||||
const batchedPrediction = this.model.predict(input);
|
||||
async getBoundingBoxes(input, config) {
|
||||
const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
|
||||
const batchedPrediction = this.model.predict(normalizedInput);
|
||||
const prediction = batchedPrediction.squeeze();
|
||||
// Regression score for each anchor point.
|
||||
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
||||
// Bounding box for each anchor point.
|
||||
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
|
||||
const boxes = this.normalizeBoxes(rawBoxes);
|
||||
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
|
||||
const boxesWithHandsTensor = tf.image.nonMaxSuppression(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
|
||||
const boxesWithHands = boxesWithHandsTensor.arraySync();
|
||||
const detectedHands = tf.tidy(() => {
|
||||
const detectedBoxes = [];
|
||||
for (const i in boxesWithHands) {
|
||||
const boxIndex = boxesWithHands[i];
|
||||
const toDispose = [
|
||||
normalizedInput,
|
||||
batchedPrediction,
|
||||
boxesWithHandsTensor,
|
||||
prediction,
|
||||
boxes,
|
||||
rawBoxes,
|
||||
scores,
|
||||
];
|
||||
if (boxesWithHands.length === 0) {
|
||||
toDispose.forEach((tensor) => tensor.dispose());
|
||||
return null;
|
||||
}
|
||||
const hands = [];
|
||||
for (const boxIndex of boxesWithHands) {
|
||||
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
|
||||
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
|
||||
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
|
||||
detectedBoxes.push({ boxes: matchingBox, palmLandmarks });
|
||||
rawPalmLandmarks.dispose();
|
||||
hands.push({ boxes: matchingBox, palmLandmarks });
|
||||
}
|
||||
return detectedBoxes;
|
||||
});
|
||||
[batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores].forEach((tensor) => tensor.dispose());
|
||||
return detectedHands;
|
||||
toDispose.forEach((tensor) => tensor.dispose());
|
||||
return hands;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Box identifying the bounding box of a hand within the image.
|
||||
* Returns null if there is no hand in the image.
|
||||
*
|
||||
* @param input The image to classify.
|
||||
*/
|
||||
async estimateHandBounds(input, config) {
|
||||
this.iouThreshold = config.iouThreshold;
|
||||
this.scoreThreshold = config.scoreThreshold;
|
||||
this.maxHands = config.maxHands;
|
||||
const resized = input.resizeBilinear([this.width, this.height]);
|
||||
const divided = resized.mul([1 / 127.5]);
|
||||
const image = divided.sub(0.5);
|
||||
resized.dispose();
|
||||
divided.dispose();
|
||||
const predictions = await this.getBoundingBoxes(image);
|
||||
const inputHeight = input.shape[1];
|
||||
const inputWidth = input.shape[2];
|
||||
const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
|
||||
const predictions = await this.getBoundingBoxes(image, config);
|
||||
if (!predictions || predictions.length === 0) {
|
||||
image.dispose();
|
||||
if (!predictions || (predictions.length === 0)) return null;
|
||||
return null;
|
||||
}
|
||||
const hands = [];
|
||||
for (const i in predictions) {
|
||||
const prediction = predictions[i];
|
||||
const boundingBoxes = prediction.boxes.dataSync();
|
||||
const startPoint = [boundingBoxes[0], boundingBoxes[1]];
|
||||
const endPoint = [boundingBoxes[2], boundingBoxes[3]];
|
||||
for (const prediction of predictions) {
|
||||
const boundingBoxes = prediction.boxes.arraySync();
|
||||
const startPoint = boundingBoxes[0].slice(0, 2);
|
||||
const endPoint = boundingBoxes[0].slice(2, 4);
|
||||
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
||||
image.dispose();
|
||||
prediction.boxes.dispose();
|
||||
prediction.palmLandmarks.dispose();
|
||||
hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
|
||||
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
|
||||
}
|
||||
return hands;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,184 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2020 Google LLC. All Rights Reserved.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
|
||||
const tf = require('@tensorflow/tfjs');
|
||||
const box = require('./box');
|
||||
const util = require('./util');
|
||||
|
||||
const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
|
||||
const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
|
||||
const PALM_BOX_ENLARGE_FACTOR = 3;
|
||||
const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
|
||||
const HAND_BOX_ENLARGE_FACTOR = 1.65;
|
||||
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
|
||||
const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
|
||||
const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
|
||||
|
||||
class HandPipeline {
|
||||
constructor(boundingBoxDetector, meshDetector, inputSize) {
|
||||
this.boundingBoxDetector = boundingBoxDetector;
|
||||
this.meshDetector = meshDetector;
|
||||
this.inputSize = inputSize;
|
||||
this.regionsOfInterest = [];
|
||||
this.runsWithoutHandDetector = 0;
|
||||
this.maxHandsNumber = 1;
|
||||
this.skipFrames = 0;
|
||||
}
|
||||
|
||||
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
|
||||
const rotatedPalmLandmarks = palmLandmarks.map((coord) => {
|
||||
const homogeneousCoordinate = [...coord, 1];
|
||||
return util.rotatePoint(homogeneousCoordinate, rotationMatrix);
|
||||
});
|
||||
const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks);
|
||||
return box.enlargeBox(box.squarifyBox(box.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR);
|
||||
}
|
||||
|
||||
getBoxForHandLandmarks(landmarks) {
|
||||
const boundingBox = this.calculateLandmarksBoundingBox(landmarks);
|
||||
const boxAroundHand = box.enlargeBox(box.squarifyBox(box.shiftBox(boundingBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR);
|
||||
const palmLandmarks = [];
|
||||
for (let i = 0; i < PALM_LANDMARK_IDS.length; i++) {
|
||||
palmLandmarks.push(landmarks[PALM_LANDMARK_IDS[i]].slice(0, 2));
|
||||
}
|
||||
boxAroundHand.palmLandmarks = palmLandmarks;
|
||||
return boxAroundHand;
|
||||
}
|
||||
|
||||
transformRawCoords(rawCoords, box2, angle, rotationMatrix) {
|
||||
const boxSize = box.getBoxSize(box2);
|
||||
const scaleFactor = [boxSize[0] / this.inputSize, boxSize[1] / this.inputSize];
|
||||
const coordsScaled = rawCoords.map((coord) => [
|
||||
scaleFactor[0] * (coord[0] - this.inputSize / 2),
|
||||
scaleFactor[1] * (coord[1] - this.inputSize / 2),
|
||||
coord[2],
|
||||
]);
|
||||
const coordsRotationMatrix = util.buildRotationMatrix(angle, [0, 0]);
|
||||
const coordsRotated = coordsScaled.map((coord) => {
|
||||
const rotated = util.rotatePoint(coord, coordsRotationMatrix);
|
||||
return [...rotated, coord[2]];
|
||||
});
|
||||
const inverseRotationMatrix = util.invertTransformMatrix(rotationMatrix);
|
||||
const boxCenter = [...box.getBoxCenter(box2), 1];
|
||||
const originalBoxCenter = [
|
||||
util.dot(boxCenter, inverseRotationMatrix[0]),
|
||||
util.dot(boxCenter, inverseRotationMatrix[1]),
|
||||
];
|
||||
return coordsRotated.map((coord) => [
|
||||
coord[0] + originalBoxCenter[0],
|
||||
coord[1] + originalBoxCenter[1],
|
||||
coord[2],
|
||||
]);
|
||||
}
|
||||
|
||||
async estimateHands(image, config) {
|
||||
this.skipFrames = config.skipFrames;
|
||||
const useFreshBox = this.shouldUpdateRegionsOfInterest();
|
||||
if (useFreshBox) {
|
||||
const boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
|
||||
this.regionsOfInterest = [];
|
||||
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
|
||||
image.dispose();
|
||||
return null;
|
||||
}
|
||||
for (const boundingBoxPrediction of boundingBoxPredictions) {
|
||||
this.regionsOfInterest.push(boundingBoxPrediction);
|
||||
}
|
||||
this.runsWithoutHandDetector = 0;
|
||||
} else {
|
||||
this.runsWithoutHandDetector++;
|
||||
}
|
||||
const hands = [];
|
||||
for (const i in this.regionsOfInterest) {
|
||||
const currentBox = this.regionsOfInterest[i];
|
||||
if (!currentBox) continue;
|
||||
const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
|
||||
const palmCenter = box.getBoxCenter(currentBox);
|
||||
const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]];
|
||||
const rotatedImage = tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized);
|
||||
const rotationMatrix = util.buildRotationMatrix(-angle, palmCenter);
|
||||
const newBox = useFreshBox ? this.getBoxForPalmLandmarks(currentBox.palmLandmarks, rotationMatrix) : currentBox;
|
||||
const croppedInput = box.cutBoxFromImageAndResize(newBox, rotatedImage, [this.inputSize, this.inputSize]);
|
||||
const handImage = croppedInput.div(255);
|
||||
croppedInput.dispose();
|
||||
rotatedImage.dispose();
|
||||
const prediction = this.meshDetector.predict(handImage);
|
||||
const [confidence, keypoints] = prediction;
|
||||
handImage.dispose();
|
||||
const confidenceValue = confidence.dataSync()[0];
|
||||
confidence.dispose();
|
||||
if (confidenceValue < config.minConfidence) {
|
||||
keypoints.dispose();
|
||||
this.regionsOfInterest[i] = null;
|
||||
return null;
|
||||
}
|
||||
const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
|
||||
const rawCoords = keypointsReshaped.arraySync();
|
||||
keypoints.dispose();
|
||||
keypointsReshaped.dispose();
|
||||
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
|
||||
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
|
||||
this.updateRegionsOfInterest(nextBoundingBox, i);
|
||||
const result = {
|
||||
landmarks: coords,
|
||||
handInViewConfidence: confidenceValue,
|
||||
boundingBox: {
|
||||
topLeft: nextBoundingBox.startPoint,
|
||||
bottomRight: nextBoundingBox.endPoint,
|
||||
},
|
||||
};
|
||||
hands.push(result);
|
||||
}
|
||||
return hands;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
calculateLandmarksBoundingBox(landmarks) {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
const startPoint = [Math.min(...xs), Math.min(...ys)];
|
||||
const endPoint = [Math.max(...xs), Math.max(...ys)];
|
||||
return { startPoint, endPoint };
|
||||
}
|
||||
|
||||
updateRegionsOfInterest(newBox, i) {
|
||||
const previousBox = this.regionsOfInterest[i];
|
||||
let iou = 0;
|
||||
if (previousBox != null && previousBox.startPoint != null) {
|
||||
const [boxStartX, boxStartY] = newBox.startPoint;
|
||||
const [boxEndX, boxEndY] = newBox.endPoint;
|
||||
const [previousBoxStartX, previousBoxStartY] = previousBox.startPoint;
|
||||
const [previousBoxEndX, previousBoxEndY] = previousBox.endPoint;
|
||||
const xStartMax = Math.max(boxStartX, previousBoxStartX);
|
||||
const yStartMax = Math.max(boxStartY, previousBoxStartY);
|
||||
const xEndMin = Math.min(boxEndX, previousBoxEndX);
|
||||
const yEndMin = Math.min(boxEndY, previousBoxEndY);
|
||||
const intersection = (xEndMin - xStartMax) * (yEndMin - yStartMax);
|
||||
const boxArea = (boxEndX - boxStartX) * (boxEndY - boxStartY);
|
||||
const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
|
||||
iou = intersection / (boxArea + previousBoxArea - intersection);
|
||||
}
|
||||
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
|
||||
}
|
||||
|
||||
shouldUpdateRegionsOfInterest() {
|
||||
console.log(this.regionsOfInterest.length, this.runsWithoutHandDetector, !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.skipFrames));
|
||||
return !this.regionsOfInterest || (this.regionsOfInterest.length === 0) || (this.runsWithoutHandDetector >= this.skipFrames);
|
||||
}
|
||||
}
|
||||
|
||||
exports.HandPipeline = HandPipeline;
|
|
@ -1,30 +1,54 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2020 Google LLC. All Rights Reserved.
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* =============================================================================
|
||||
*/
|
||||
const tf = require('@tensorflow/tfjs');
|
||||
const hand = require('./handdetector');
|
||||
const keypoints = require('./keypoints');
|
||||
const pipe = require('./pipeline');
|
||||
const anchors = require('./anchors.js');
|
||||
const handdetector = require('./handdetector');
|
||||
const pipeline = require('./handpipeline');
|
||||
const anchors = require('./anchors');
|
||||
|
||||
const MESH_ANNOTATIONS = {
|
||||
thumb: [1, 2, 3, 4],
|
||||
indexFinger: [5, 6, 7, 8],
|
||||
middleFinger: [9, 10, 11, 12],
|
||||
ringFinger: [13, 14, 15, 16],
|
||||
pinky: [17, 18, 19, 20],
|
||||
palmBase: [0],
|
||||
};
|
||||
|
||||
class HandPose {
|
||||
constructor(pipeline) {
|
||||
this.pipeline = pipeline;
|
||||
constructor(pipe) {
|
||||
this.pipeline = pipe;
|
||||
}
|
||||
|
||||
static getAnnotations() {
|
||||
return MESH_ANNOTATIONS;
|
||||
}
|
||||
|
||||
async estimateHands(input, config) {
|
||||
this.skipFrames = config.skipFrames;
|
||||
this.detectionConfidence = config.minConfidence;
|
||||
this.maxHands = config.maxHands;
|
||||
const predictions = await this.pipeline.estimateHands(input, config);
|
||||
if (!predictions) return [];
|
||||
const hands = [];
|
||||
if (!predictions) return hands;
|
||||
for (const prediction of predictions) {
|
||||
if (!prediction) return [];
|
||||
const annotations = {};
|
||||
for (const key of Object.keys(keypoints.MESH_ANNOTATIONS)) {
|
||||
annotations[key] = keypoints.MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
|
||||
for (const key of Object.keys(MESH_ANNOTATIONS)) {
|
||||
annotations[key] = MESH_ANNOTATIONS[key].map((index) => prediction.landmarks[index]);
|
||||
}
|
||||
hands.push({
|
||||
confidence: prediction.confidence || 0,
|
||||
box: prediction.box ? [prediction.box.topLeft[0], prediction.box.topLeft[1], prediction.box.bottomRight[0] - prediction.box.topLeft[0], prediction.box.bottomRight[1] - prediction.box.topLeft[1]] : 0,
|
||||
confidence: prediction.handInViewConfidence,
|
||||
box: prediction.boundingBox ? [prediction.boundingBox.topLeft[0], prediction.boundingBox.topLeft[1], prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0], prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1]] : 0,
|
||||
landmarks: prediction.landmarks,
|
||||
annotations,
|
||||
});
|
||||
|
@ -35,13 +59,14 @@ class HandPose {
|
|||
exports.HandPose = HandPose;
|
||||
|
||||
async function load(config) {
|
||||
// maxContinuousChecks = Infinity, detectionConfidence = 0.8, iouThreshold = 0.3, scoreThreshold = 0.5
|
||||
const [handDetectorModel, handPoseModel] = await Promise.all([
|
||||
tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') }),
|
||||
tf.loadGraphModel(config.skeleton.modelPath, { fromTFHub: config.skeleton.modelPath.includes('tfhub.dev') }),
|
||||
]);
|
||||
const detector = new hand.HandDetector(handDetectorModel, anchors.anchors, config);
|
||||
const pipeline = new pipe.HandPipeline(detector, handPoseModel, config);
|
||||
const handpose = new HandPose(pipeline);
|
||||
const detector = new handdetector.HandDetector(handDetectorModel, config.inputSize, anchors.anchors);
|
||||
const pipe = new pipeline.HandPipeline(detector, handPoseModel, config.inputSize);
|
||||
const handpose = new HandPose(pipe);
|
||||
return handpose;
|
||||
}
|
||||
exports.load = load;
|
||||
|
|
53
src/human.js
53
src/human.js
|
@ -167,6 +167,7 @@ class Human {
|
|||
this.log('Changing WebGL: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
|
||||
tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', this.config.deallocate ? 0 : -1);
|
||||
}
|
||||
tf.ENV.set('WEBGL_CPU_FORWARD', true);
|
||||
await tf.ready();
|
||||
}
|
||||
}
|
||||
|
@ -284,32 +285,6 @@ class Human {
|
|||
perf.image = Math.trunc(now() - timeStamp);
|
||||
const imageTensor = image.tensor;
|
||||
|
||||
// run posenet
|
||||
if (this.config.async) {
|
||||
poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
} else {
|
||||
this.state = 'run:body';
|
||||
timeStamp = now();
|
||||
this.analyze('Start PoseNet');
|
||||
poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
this.analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
// run handpose
|
||||
if (this.config.async) {
|
||||
handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
} else {
|
||||
this.state = 'run:hand';
|
||||
timeStamp = now();
|
||||
this.analyze('Start HandPose:');
|
||||
handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
this.analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);
|
||||
|
||||
// run facemesh, includes blazeface and iris
|
||||
const faceRes = [];
|
||||
if (this.config.face.enabled) {
|
||||
|
@ -357,6 +332,32 @@ class Human {
|
|||
}
|
||||
}
|
||||
|
||||
// run posenet
|
||||
if (this.config.async) {
|
||||
poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
} else {
|
||||
this.state = 'run:body';
|
||||
timeStamp = now();
|
||||
this.analyze('Start PoseNet');
|
||||
poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
|
||||
this.analyze('End PoseNet:');
|
||||
perf.body = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
// run handpose
|
||||
if (this.config.async) {
|
||||
handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
} else {
|
||||
this.state = 'run:hand';
|
||||
timeStamp = now();
|
||||
this.analyze('Start HandPose:');
|
||||
handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
|
||||
this.analyze('End HandPose:');
|
||||
perf.hand = Math.trunc(now() - timeStamp);
|
||||
}
|
||||
|
||||
if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);
|
||||
|
||||
imageTensor.dispose();
|
||||
this.state = 'idle';
|
||||
|
||||
|
|
Loading…
Reference in New Issue