mirror of https://github.com/vladmandic/human
update hand algorithm
parent
0adac25629
commit
cd6f1f7e7a
|
@ -109,10 +109,10 @@ export default {
|
||||||
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
|
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
|
||||||
// as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
|
// as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
|
||||||
minConfidence: 0.5, // threshold for discarding a prediction
|
minConfidence: 0.5, // threshold for discarding a prediction
|
||||||
iouThreshold: 0.2, // threshold for deciding whether boxes overlap too much in non-maximum suppression
|
iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression
|
||||||
scoreThreshold: 0.5, // threshold for deciding when to remove boxes based on score in non-maximum suppression
|
scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression
|
||||||
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
|
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
|
||||||
maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
|
maxHands: 1, // maximum number of hands detected in the input, should be set to the minimum number for performance
|
||||||
detector: {
|
detector: {
|
||||||
modelPath: '../models/handdetect.json',
|
modelPath: '../models/handdetect.json',
|
||||||
},
|
},
|
||||||
|
|
|
@ -46,33 +46,30 @@ class HandDetector {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async getBoundingBoxes(input, config) {
|
async getBoxes(input, config) {
|
||||||
const batchedPrediction = this.model.predict(input);
|
const batched = this.model.predict(input);
|
||||||
const prediction = batchedPrediction.squeeze();
|
const predictions = batched.squeeze();
|
||||||
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
|
const scores = tf.tidy(() => tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1])).squeeze());
|
||||||
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
|
// const scoresVal = scores.dataSync(); // scoresVal[boxIndex] is box confidence
|
||||||
|
const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]);
|
||||||
const boxes = this.normalizeBoxes(rawBoxes);
|
const boxes = this.normalizeBoxes(rawBoxes);
|
||||||
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
|
const boxesWithHandsT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
|
||||||
const boxesWithHands = boxesWithHandsTensor.arraySync();
|
const boxesWithHands = boxesWithHandsT.arraySync();
|
||||||
const toDispose = [
|
const toDispose = [
|
||||||
batchedPrediction,
|
batched,
|
||||||
boxesWithHandsTensor,
|
boxesWithHandsT,
|
||||||
prediction,
|
predictions,
|
||||||
boxes,
|
boxes,
|
||||||
rawBoxes,
|
rawBoxes,
|
||||||
scores,
|
scores,
|
||||||
];
|
];
|
||||||
if (boxesWithHands.length === 0) {
|
|
||||||
toDispose.forEach((tensor) => tensor.dispose());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
const hands = [];
|
const hands = [];
|
||||||
for (const boxIndex of boxesWithHands) {
|
for (const boxIndex of boxesWithHands) {
|
||||||
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
|
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
|
||||||
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
|
const rawPalmLandmarks = tf.slice(predictions, [boxIndex, 5], [1, 14]);
|
||||||
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
|
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
|
||||||
rawPalmLandmarks.dispose();
|
rawPalmLandmarks.dispose();
|
||||||
hands.push({ boxes: matchingBox, palmLandmarks });
|
hands.push({ box: matchingBox, palmLandmarks });
|
||||||
}
|
}
|
||||||
toDispose.forEach((tensor) => tensor.dispose());
|
toDispose.forEach((tensor) => tensor.dispose());
|
||||||
return hands;
|
return hands;
|
||||||
|
@ -82,16 +79,16 @@ class HandDetector {
|
||||||
const inputHeight = input.shape[1];
|
const inputHeight = input.shape[1];
|
||||||
const inputWidth = input.shape[2];
|
const inputWidth = input.shape[2];
|
||||||
const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
|
const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
|
||||||
const predictions = await this.getBoundingBoxes(image, config);
|
const predictions = await this.getBoxes(image, config);
|
||||||
image.dispose();
|
image.dispose();
|
||||||
if (!predictions || predictions.length === 0) return null;
|
if (!predictions || predictions.length === 0) return null;
|
||||||
const hands = [];
|
const hands = [];
|
||||||
for (const prediction of predictions) {
|
for (const prediction of predictions) {
|
||||||
const boundingBoxes = prediction.boxes.dataSync();
|
const boundingBoxes = prediction.box.dataSync();
|
||||||
const startPoint = boundingBoxes.slice(0, 2);
|
const startPoint = boundingBoxes.slice(0, 2);
|
||||||
const endPoint = boundingBoxes.slice(2, 4);
|
const endPoint = boundingBoxes.slice(2, 4);
|
||||||
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
const palmLandmarks = prediction.palmLandmarks.arraySync();
|
||||||
prediction.boxes.dispose();
|
prediction.box.dispose();
|
||||||
prediction.palmLandmarks.dispose();
|
prediction.palmLandmarks.dispose();
|
||||||
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
|
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,12 +30,11 @@ const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
|
||||||
|
|
||||||
class HandPipeline {
|
class HandPipeline {
|
||||||
constructor(boundingBoxDetector, meshDetector, inputSize) {
|
constructor(boundingBoxDetector, meshDetector, inputSize) {
|
||||||
this.boundingBoxDetector = boundingBoxDetector;
|
this.boxDetector = boundingBoxDetector;
|
||||||
this.meshDetector = meshDetector;
|
this.meshDetector = meshDetector;
|
||||||
this.inputSize = inputSize;
|
this.inputSize = inputSize;
|
||||||
this.regionsOfInterest = [];
|
this.storedBoxes = [];
|
||||||
this.runsWithoutHandDetector = 0;
|
this.skipped = 0;
|
||||||
this.skipFrames = 0;
|
|
||||||
this.detectedHands = 0;
|
this.detectedHands = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,30 +85,24 @@ class HandPipeline {
|
||||||
}
|
}
|
||||||
|
|
||||||
async estimateHands(image, config) {
|
async estimateHands(image, config) {
|
||||||
this.skipFrames = config.skipFrames;
|
this.skipped++;
|
||||||
// don't need box detection if we have sufficient number of boxes
|
let useFreshBox = false;
|
||||||
let useFreshBox = (this.runsWithoutHandDetector > this.skipFrames) || (this.detectedHands !== this.regionsOfInterest.length);
|
// run new detector every skipFrames
|
||||||
let boundingBoxPredictions;
|
const boxes = (this.skipped > config.skipFrames)
|
||||||
// but every skipFrames check if detect boxes number changed
|
? await this.boxDetector.estimateHandBounds(image, config) : null;
|
||||||
if (useFreshBox) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
|
// if detector result count doesn't match current working set, use it to reset current working set
|
||||||
// if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1
|
if (boxes && (boxes.length !== this.detectedHands) && (this.detectedHands !== config.maxHands)) {
|
||||||
if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true;
|
// console.log(this.skipped, config.maxHands, this.detectedHands, this.storedBoxes.length, boxes.length);
|
||||||
if (useFreshBox) {
|
this.storedBoxes = [];
|
||||||
this.regionsOfInterest = [];
|
|
||||||
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
|
|
||||||
this.detectedHands = 0;
|
this.detectedHands = 0;
|
||||||
return null;
|
for (const possible of boxes) this.storedBoxes.push(possible);
|
||||||
}
|
if (this.storedBoxes.length > 0) useFreshBox = true;
|
||||||
for (const boundingBoxPrediction of boundingBoxPredictions) {
|
this.skipped = 0;
|
||||||
this.regionsOfInterest.push(boundingBoxPrediction);
|
|
||||||
}
|
|
||||||
this.runsWithoutHandDetector = 0;
|
|
||||||
} else {
|
|
||||||
this.runsWithoutHandDetector++;
|
|
||||||
}
|
}
|
||||||
const hands = [];
|
const hands = [];
|
||||||
for (const i in this.regionsOfInterest) {
|
// go through working set of boxes
|
||||||
const currentBox = this.regionsOfInterest[i];
|
for (const i in this.storedBoxes) {
|
||||||
|
const currentBox = this.storedBoxes[i];
|
||||||
if (!currentBox) continue;
|
if (!currentBox) continue;
|
||||||
const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
|
const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
|
||||||
const palmCenter = box.getBoxCenter(currentBox);
|
const palmCenter = box.getBoxCenter(currentBox);
|
||||||
|
@ -121,8 +114,7 @@ class HandPipeline {
|
||||||
const handImage = croppedInput.div(255);
|
const handImage = croppedInput.div(255);
|
||||||
croppedInput.dispose();
|
croppedInput.dispose();
|
||||||
rotatedImage.dispose();
|
rotatedImage.dispose();
|
||||||
const prediction = this.meshDetector.predict(handImage);
|
const [confidence, keypoints] = await this.meshDetector.predict(handImage);
|
||||||
const [confidence, keypoints] = prediction;
|
|
||||||
handImage.dispose();
|
handImage.dispose();
|
||||||
const confidenceValue = confidence.dataSync()[0];
|
const confidenceValue = confidence.dataSync()[0];
|
||||||
confidence.dispose();
|
confidence.dispose();
|
||||||
|
@ -133,7 +125,7 @@ class HandPipeline {
|
||||||
keypointsReshaped.dispose();
|
keypointsReshaped.dispose();
|
||||||
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
|
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
|
||||||
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
|
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
|
||||||
this.updateRegionsOfInterest(nextBoundingBox, i);
|
this.updateStoredBoxes(nextBoundingBox, i);
|
||||||
const result = {
|
const result = {
|
||||||
landmarks: coords,
|
landmarks: coords,
|
||||||
handInViewConfidence: confidenceValue,
|
handInViewConfidence: confidenceValue,
|
||||||
|
@ -144,7 +136,7 @@ class HandPipeline {
|
||||||
};
|
};
|
||||||
hands.push(result);
|
hands.push(result);
|
||||||
} else {
|
} else {
|
||||||
this.updateRegionsOfInterest(null, i);
|
this.updateStoredBoxes(null, i);
|
||||||
/*
|
/*
|
||||||
const result = {
|
const result = {
|
||||||
handInViewConfidence: confidenceValue,
|
handInViewConfidence: confidenceValue,
|
||||||
|
@ -158,7 +150,7 @@ class HandPipeline {
|
||||||
}
|
}
|
||||||
keypoints.dispose();
|
keypoints.dispose();
|
||||||
}
|
}
|
||||||
this.regionsOfInterest = this.regionsOfInterest.filter((a) => a !== null);
|
this.storedBoxes = this.storedBoxes.filter((a) => a !== null);
|
||||||
this.detectedHands = hands.length;
|
this.detectedHands = hands.length;
|
||||||
return hands;
|
return hands;
|
||||||
}
|
}
|
||||||
|
@ -172,8 +164,8 @@ class HandPipeline {
|
||||||
return { startPoint, endPoint };
|
return { startPoint, endPoint };
|
||||||
}
|
}
|
||||||
|
|
||||||
updateRegionsOfInterest(newBox, i) {
|
updateStoredBoxes(newBox, i) {
|
||||||
const previousBox = this.regionsOfInterest[i];
|
const previousBox = this.storedBoxes[i];
|
||||||
let iou = 0;
|
let iou = 0;
|
||||||
if (newBox && previousBox && previousBox.startPoint) {
|
if (newBox && previousBox && previousBox.startPoint) {
|
||||||
const [boxStartX, boxStartY] = newBox.startPoint;
|
const [boxStartX, boxStartY] = newBox.startPoint;
|
||||||
|
@ -189,7 +181,7 @@ class HandPipeline {
|
||||||
const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
|
const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
|
||||||
iou = intersection / (boxArea + previousBoxArea - intersection);
|
iou = intersection / (boxArea + previousBoxArea - intersection);
|
||||||
}
|
}
|
||||||
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
|
this.storedBoxes[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue