update hand algorithm

pull/280/head
Vladimir Mandic 2020-11-08 01:17:25 -05:00
parent 0adac25629
commit cd6f1f7e7a
3 changed files with 45 additions and 56 deletions

View File

@ -109,10 +109,10 @@ export default {
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
// as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.2, // threshold for deciding whether boxes overlap too much in non-maximum suppression iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.5, // threshold for deciding when to remove boxes based on score in non-maximum suppression scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance maxHands: 1, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: { detector: {
modelPath: '../models/handdetect.json', modelPath: '../models/handdetect.json',
}, },

View File

@ -46,33 +46,30 @@ class HandDetector {
}); });
} }
async getBoundingBoxes(input, config) { async getBoxes(input, config) {
const batchedPrediction = this.model.predict(input); const batched = this.model.predict(input);
const prediction = batchedPrediction.squeeze(); const predictions = batched.squeeze();
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze()); const scores = tf.tidy(() => tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1])).squeeze());
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]); // const scoresVal = scores.dataSync(); // scoresVal[boxIndex] is box confidence
const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes); const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold); const boxesWithHandsT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
const boxesWithHands = boxesWithHandsTensor.arraySync(); const boxesWithHands = boxesWithHandsT.arraySync();
const toDispose = [ const toDispose = [
batchedPrediction, batched,
boxesWithHandsTensor, boxesWithHandsT,
prediction, predictions,
boxes, boxes,
rawBoxes, rawBoxes,
scores, scores,
]; ];
if (boxesWithHands.length === 0) {
toDispose.forEach((tensor) => tensor.dispose());
return null;
}
const hands = []; const hands = [];
for (const boxIndex of boxesWithHands) { for (const boxIndex of boxesWithHands) {
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]); const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]); const rawPalmLandmarks = tf.slice(predictions, [boxIndex, 5], [1, 14]);
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2])); const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
rawPalmLandmarks.dispose(); rawPalmLandmarks.dispose();
hands.push({ boxes: matchingBox, palmLandmarks }); hands.push({ box: matchingBox, palmLandmarks });
} }
toDispose.forEach((tensor) => tensor.dispose()); toDispose.forEach((tensor) => tensor.dispose());
return hands; return hands;
@ -82,16 +79,16 @@ class HandDetector {
const inputHeight = input.shape[1]; const inputHeight = input.shape[1];
const inputWidth = input.shape[2]; const inputWidth = input.shape[2];
const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1)); const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
const predictions = await this.getBoundingBoxes(image, config); const predictions = await this.getBoxes(image, config);
image.dispose(); image.dispose();
if (!predictions || predictions.length === 0) return null; if (!predictions || predictions.length === 0) return null;
const hands = []; const hands = [];
for (const prediction of predictions) { for (const prediction of predictions) {
const boundingBoxes = prediction.boxes.dataSync(); const boundingBoxes = prediction.box.dataSync();
const startPoint = boundingBoxes.slice(0, 2); const startPoint = boundingBoxes.slice(0, 2);
const endPoint = boundingBoxes.slice(2, 4); const endPoint = boundingBoxes.slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync(); const palmLandmarks = prediction.palmLandmarks.arraySync();
prediction.boxes.dispose(); prediction.box.dispose();
prediction.palmLandmarks.dispose(); prediction.palmLandmarks.dispose();
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize])); hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
} }

View File

@ -30,12 +30,11 @@ const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
class HandPipeline { class HandPipeline {
constructor(boundingBoxDetector, meshDetector, inputSize) { constructor(boundingBoxDetector, meshDetector, inputSize) {
this.boundingBoxDetector = boundingBoxDetector; this.boxDetector = boundingBoxDetector;
this.meshDetector = meshDetector; this.meshDetector = meshDetector;
this.inputSize = inputSize; this.inputSize = inputSize;
this.regionsOfInterest = []; this.storedBoxes = [];
this.runsWithoutHandDetector = 0; this.skipped = 0;
this.skipFrames = 0;
this.detectedHands = 0; this.detectedHands = 0;
} }
@ -86,30 +85,24 @@ class HandPipeline {
} }
async estimateHands(image, config) { async estimateHands(image, config) {
this.skipFrames = config.skipFrames; this.skipped++;
// don't need box detection if we have sufficient number of boxes let useFreshBox = false;
let useFreshBox = (this.runsWithoutHandDetector > this.skipFrames) || (this.detectedHands !== this.regionsOfInterest.length); // run new detector every skipFrames
let boundingBoxPredictions; const boxes = (this.skipped > config.skipFrames)
// but every skipFrames check if detect boxes number changed ? await this.boxDetector.estimateHandBounds(image, config) : null;
if (useFreshBox) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config); // if detector result count doesn't match current working set, use it to reset current working set
// if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1 if (boxes && (boxes.length !== this.detectedHands) && (this.detectedHands !== config.maxHands)) {
if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true; // console.log(this.skipped, config.maxHands, this.detectedHands, this.storedBoxes.length, boxes.length);
if (useFreshBox) { this.storedBoxes = [];
this.regionsOfInterest = [];
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
this.detectedHands = 0; this.detectedHands = 0;
return null; for (const possible of boxes) this.storedBoxes.push(possible);
} if (this.storedBoxes.length > 0) useFreshBox = true;
for (const boundingBoxPrediction of boundingBoxPredictions) { this.skipped = 0;
this.regionsOfInterest.push(boundingBoxPrediction);
}
this.runsWithoutHandDetector = 0;
} else {
this.runsWithoutHandDetector++;
} }
const hands = []; const hands = [];
for (const i in this.regionsOfInterest) { // go through working set of boxes
const currentBox = this.regionsOfInterest[i]; for (const i in this.storedBoxes) {
const currentBox = this.storedBoxes[i];
if (!currentBox) continue; if (!currentBox) continue;
const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]); const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
const palmCenter = box.getBoxCenter(currentBox); const palmCenter = box.getBoxCenter(currentBox);
@ -121,8 +114,7 @@ class HandPipeline {
const handImage = croppedInput.div(255); const handImage = croppedInput.div(255);
croppedInput.dispose(); croppedInput.dispose();
rotatedImage.dispose(); rotatedImage.dispose();
const prediction = this.meshDetector.predict(handImage); const [confidence, keypoints] = await this.meshDetector.predict(handImage);
const [confidence, keypoints] = prediction;
handImage.dispose(); handImage.dispose();
const confidenceValue = confidence.dataSync()[0]; const confidenceValue = confidence.dataSync()[0];
confidence.dispose(); confidence.dispose();
@ -133,7 +125,7 @@ class HandPipeline {
keypointsReshaped.dispose(); keypointsReshaped.dispose();
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix); const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
const nextBoundingBox = this.getBoxForHandLandmarks(coords); const nextBoundingBox = this.getBoxForHandLandmarks(coords);
this.updateRegionsOfInterest(nextBoundingBox, i); this.updateStoredBoxes(nextBoundingBox, i);
const result = { const result = {
landmarks: coords, landmarks: coords,
handInViewConfidence: confidenceValue, handInViewConfidence: confidenceValue,
@ -144,7 +136,7 @@ class HandPipeline {
}; };
hands.push(result); hands.push(result);
} else { } else {
this.updateRegionsOfInterest(null, i); this.updateStoredBoxes(null, i);
/* /*
const result = { const result = {
handInViewConfidence: confidenceValue, handInViewConfidence: confidenceValue,
@ -158,7 +150,7 @@ class HandPipeline {
} }
keypoints.dispose(); keypoints.dispose();
} }
this.regionsOfInterest = this.regionsOfInterest.filter((a) => a !== null); this.storedBoxes = this.storedBoxes.filter((a) => a !== null);
this.detectedHands = hands.length; this.detectedHands = hands.length;
return hands; return hands;
} }
@ -172,8 +164,8 @@ class HandPipeline {
return { startPoint, endPoint }; return { startPoint, endPoint };
} }
updateRegionsOfInterest(newBox, i) { updateStoredBoxes(newBox, i) {
const previousBox = this.regionsOfInterest[i]; const previousBox = this.storedBoxes[i];
let iou = 0; let iou = 0;
if (newBox && previousBox && previousBox.startPoint) { if (newBox && previousBox && previousBox.startPoint) {
const [boxStartX, boxStartY] = newBox.startPoint; const [boxStartX, boxStartY] = newBox.startPoint;
@ -189,7 +181,7 @@ class HandPipeline {
const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY); const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
iou = intersection / (boxArea + previousBoxArea - intersection); iou = intersection / (boxArea + previousBoxArea - intersection);
} }
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox; this.storedBoxes[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
} }
} }