diff --git a/config.js b/config.js
index 716e6766..c49c1999 100644
--- a/config.js
+++ b/config.js
@@ -113,6 +113,7 @@ export default {
scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
maxHands: 1, // maximum number of hands detected in the input, should be set to the minimum number for performance
+ landmarks: true, // detect hand landmarks or just hand boundary box
detector: {
modelPath: '../models/handdetect.json',
},
diff --git a/demo/browser.js b/demo/browser.js
index 3fa25bc7..a5e436e1 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -27,6 +27,10 @@ const ui = {
maxFrames: 10,
modelsPreload: true,
modelsWarmup: true,
+ menuWidth: 0,
+ menuHeight: 0,
+ camera: {},
+ fps: [],
};
// global variables
@@ -34,8 +38,6 @@ let menu;
let menuFX;
let worker;
let timeStamp;
-let camera = {};
-const fps = [];
// helper function: translates json to human readable string
function str(...msg) {
@@ -62,17 +64,22 @@ const status = (msg) => {
// draws processed results and starts processing of a next frame
function drawResults(input, result, canvas) {
// update fps data
- fps.push(1000 / (performance.now() - timeStamp));
- if (fps.length > ui.maxFrames) fps.shift();
+ const elapsed = performance.now() - timeStamp;
+ ui.fps.push(1000 / elapsed);
+ if (ui.fps.length > ui.maxFrames) ui.fps.shift();
// enable for continous performance monitoring
// console.log(result.performance);
- // eslint-disable-next-line no-use-before-define
- if (input.srcObject) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results
-
+ // immediate loop before we even draw results, but limit frame rate to 30
+ if (input.srcObject) {
+ // eslint-disable-next-line no-use-before-define
+ if (elapsed > 33) requestAnimationFrame(() => runHumanDetect(input, canvas));
+ // eslint-disable-next-line no-use-before-define
+ else setTimeout(() => runHumanDetect(input, canvas), 33 - elapsed);
+ }
// draw fps chart
- menu.updateChart('FPS', fps);
+ menu.updateChart('FPS', ui.fps);
// draw image from video
const ctx = canvas.getContext('2d');
ctx.fillStyle = ui.baseBackground;
@@ -94,9 +101,9 @@ function drawResults(input, result, canvas) {
const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
- const avg = Math.trunc(10 * fps.reduce((a, b) => a + b) / fps.length) / 10;
+ const avg = Math.trunc(10 * ui.fps.reduce((a, b) => a + b) / ui.fps.length) / 10;
document.getElementById('log').innerText = `
- video: ${camera.name} | facing: ${camera.facing} | resolution: ${camera.width} x ${camera.height} ${processing}
+ video: ${ui.camera.name} | facing: ${ui.camera.facing} | resolution: ${ui.camera.width} x ${ui.camera.height} ${processing}
backend: ${human.tf.getBackend()} | ${memory}
performance: ${str(result.performance)} FPS:${avg}
`;
@@ -147,7 +154,7 @@ async function setupCamera() {
const track = stream.getVideoTracks()[0];
const settings = track.getSettings();
log('camera constraints:', constraints, 'window:', { width: window.innerWidth, height: window.innerHeight }, 'settings:', settings, 'track:', track);
- camera = { name: track.label, width: settings.width, height: settings.height, facing: settings.facingMode === 'user' ? 'front' : 'back' };
+ ui.camera = { name: track.label, width: settings.width, height: settings.height, facing: settings.facingMode === 'user' ? 'front' : 'back' };
return new Promise((resolve) => {
video.onloadeddata = async () => {
video.width = video.videoWidth;
@@ -156,6 +163,8 @@ async function setupCamera() {
canvas.height = video.height;
canvas.style.width = canvas.width > canvas.height ? '100vw' : '';
canvas.style.height = canvas.width > canvas.height ? '' : '100vh';
+ ui.menuWidth.input.setAttribute('value', video.width);
+ ui.menuHeight.input.setAttribute('value', video.height);
// silly font resizing for paint-on-canvas since viewport can be zoomed
const size = 14 + (6 * canvas.width / window.innerWidth);
ui.baseFont = ui.baseFontProto.replace(/{size}/, `${size}px`);
@@ -351,8 +360,8 @@ function setupMenu() {
menuFX.addHTML('
');
menuFX.addLabel('Image Processing');
menuFX.addBool('Enabled', human.config.filter, 'enabled');
- menuFX.addRange('Image width', human.config.filter, 'width', 0, 3840, 10, (val) => human.config.filter.width = parseInt(val));
- menuFX.addRange('Image height', human.config.filter, 'height', 0, 2160, 10, (val) => human.config.filter.height = parseInt(val));
+ ui.menuWidth = menuFX.addRange('Image width', human.config.filter, 'width', 0, 3840, 10, (val) => human.config.filter.width = parseInt(val));
+ ui.menuHeight = menuFX.addRange('Image height', human.config.filter, 'height', 0, 2160, 10, (val) => human.config.filter.height = parseInt(val));
menuFX.addRange('Brightness', human.config.filter, 'brightness', -1.0, 1.0, 0.05, (val) => human.config.filter.brightness = parseFloat(val));
menuFX.addRange('Contrast', human.config.filter, 'contrast', -1.0, 1.0, 0.05, (val) => human.config.filter.contrast = parseFloat(val));
menuFX.addRange('Sharpness', human.config.filter, 'sharpness', 0, 1.0, 0.05, (val) => human.config.filter.sharpness = parseFloat(val));
diff --git a/demo/menu.js b/demo/menu.js
index 2e68ef1a..a5fb0b51 100644
--- a/demo/menu.js
+++ b/demo/menu.js
@@ -219,6 +219,7 @@ class Menu {
evt.target.setAttribute('value', evt.target.value);
if (callback) callback(evt.target.value);
});
+ el.input = el.children[0];
return el;
}
diff --git a/package.json b/package.json
index 8c502c60..2c1ce5e7 100644
--- a/package.json
+++ b/package.json
@@ -41,16 +41,16 @@
"scripts": {
"start": "node --trace-warnings --unhandled-rejections=strict --trace-uncaught --no-deprecation src/node.js",
"lint": "eslint src/*.js demo/*.js",
- "dev": "npm install && node --trace-warnings --unhandled-rejections=strict --trace-uncaught --no-deprecation dev-server.js",
+ "dev": "npm install && node --trace-warnings --unhandled-rejections=strict --trace-uncaught --no-deprecation dev-server/dev-server.js",
+ "changelog": "node dev-server/changelog.js",
"build-iife": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=iife --external:fs --global-name=Human --metafile=dist/human.json --outfile=dist/human.js src/human.js",
"build-esm-bundle": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=esm --external:fs --metafile=dist/human.esm.json --outfile=dist/human.esm.js src/human.js",
"build-esm-nobundle": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=esm --external:@tensorflow --external:fs --metafile=dist/human.esm-nobundle.json --outfile=dist/human.esm-nobundle.js src/human.js",
"build-node": "esbuild --bundle --minify --platform=node --sourcemap --target=es2018 --format=cjs --metafile=dist/human.node.json --outfile=dist/human.node.js src/human.js",
"build-node-nobundle": "esbuild --bundle --minify --platform=node --sourcemap --target=es2018 --format=cjs --external:@tensorflow --metafile=dist/human.node.json --outfile=dist/human.node-nobundle.js src/human.js",
"build-demo": "esbuild --bundle --log-level=error --platform=browser --sourcemap --target=es2018 --format=esm --external:fs --metafile=dist/demo-browser-index.json --outfile=dist/demo-browser-index.js demo/browser.js",
- "build": "rimraf dist/* && npm run build-iife && npm run build-esm-bundle && npm run build-esm-nobundle && npm run build-node && npm run build-node-nobundle && npm run build-demo",
- "update": "npm update --depth 20 --force && npm dedupe && npm prune && npm audit",
- "changelog": "node changelog.js"
+ "build": "rimraf dist/* && npm run build-iife && npm run build-esm-bundle && npm run build-esm-nobundle && npm run build-node && npm run build-node-nobundle && npm run build-demo && npm run changelog",
+ "update": "npm update --depth 20 --force && npm dedupe && npm prune && npm audit"
},
"keywords": [
"tensorflowjs",
diff --git a/src/hand/box.js b/src/hand/box.js
index 4078f732..e8e1916a 100644
--- a/src/hand/box.js
+++ b/src/hand/box.js
@@ -46,7 +46,7 @@ function scaleBoxCoordinates(box, factor) {
const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
return scaledCoord;
});
- return { startPoint, endPoint, palmLandmarks };
+ return { startPoint, endPoint, palmLandmarks, confidence: box.confidence };
}
function enlargeBox(box, factor = 1.5) {
const center = getBoxCenter(box);
diff --git a/src/hand/handdetector.js b/src/hand/handdetector.js
index 6df3cace..66d57628 100644
--- a/src/hand/handdetector.js
+++ b/src/hand/handdetector.js
@@ -49,29 +49,28 @@ class HandDetector {
async getBoxes(input, config) {
const batched = this.model.predict(input);
const predictions = batched.squeeze();
+ batched.dispose();
const scores = tf.tidy(() => tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1])).squeeze());
- // const scoresVal = scores.dataSync(); // scoresVal[boxIndex] is box confidence
+ const scoresVal = scores.dataSync();
const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes);
- const boxesWithHandsT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
- const boxesWithHands = boxesWithHandsT.arraySync();
- const toDispose = [
- batched,
- boxesWithHandsT,
- predictions,
- boxes,
- rawBoxes,
- scores,
- ];
+ rawBoxes.dispose();
+ const filteredT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
+ const filtered = filteredT.arraySync();
+ scores.dispose();
+ filteredT.dispose();
const hands = [];
- for (const boxIndex of boxesWithHands) {
- const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
- const rawPalmLandmarks = tf.slice(predictions, [boxIndex, 5], [1, 14]);
- const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
- rawPalmLandmarks.dispose();
- hands.push({ box: matchingBox, palmLandmarks });
+ for (const boxIndex of filtered) {
+ if (scoresVal[boxIndex] >= config.minConfidence) {
+ const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
+ const rawPalmLandmarks = tf.slice(predictions, [boxIndex, 5], [1, 14]);
+ const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
+ rawPalmLandmarks.dispose();
+ hands.push({ box: matchingBox, palmLandmarks, confidence: scoresVal[boxIndex] });
+ }
}
- toDispose.forEach((tensor) => tensor.dispose());
+ predictions.dispose();
+ boxes.dispose();
return hands;
}
@@ -84,13 +83,13 @@ class HandDetector {
if (!predictions || predictions.length === 0) return null;
const hands = [];
for (const prediction of predictions) {
- const boundingBoxes = prediction.box.dataSync();
- const startPoint = boundingBoxes.slice(0, 2);
- const endPoint = boundingBoxes.slice(2, 4);
+ const boxes = prediction.box.dataSync();
+ const startPoint = boxes.slice(0, 2);
+ const endPoint = boxes.slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync();
prediction.box.dispose();
prediction.palmLandmarks.dispose();
- hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
+ hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
}
return hands;
}
diff --git a/src/hand/handpipeline.js b/src/hand/handpipeline.js
index 276463c4..0439eecb 100644
--- a/src/hand/handpipeline.js
+++ b/src/hand/handpipeline.js
@@ -19,7 +19,6 @@ const tf = require('@tensorflow/tfjs');
const box = require('./box');
const util = require('./util');
-const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
const PALM_BOX_ENLARGE_FACTOR = 3;
const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; // move detected hand box by x,y to ease landmark detection
@@ -87,68 +86,75 @@ class HandPipeline {
async estimateHands(image, config) {
this.skipped++;
let useFreshBox = false;
- // run new detector every skipFrames
- const boxes = (this.skipped > config.skipFrames)
- ? await this.boxDetector.estimateHandBounds(image, config) : null;
+
+ // run new detector every skipFrames unless we only want box to start with
+ let boxes;
+ if ((this.skipped > config.skipFrames) || !config.landmarks) {
+ boxes = await this.boxDetector.estimateHandBounds(image, config);
+ this.skipped = 0;
+ }
+
// if detector result count doesn't match current working set, use it to reset current working set
- if (boxes && (boxes.length !== this.detectedHands) && (this.detectedHands !== config.maxHands)) {
- // console.log(this.skipped, config.maxHands, this.detectedHands, this.storedBoxes.length, boxes.length);
+ if (boxes && (boxes.length > 0) && ((boxes.length !== this.detectedHands) && (this.detectedHands !== config.maxHands) || !config.landmarks)) {
this.storedBoxes = [];
this.detectedHands = 0;
for (const possible of boxes) this.storedBoxes.push(possible);
if (this.storedBoxes.length > 0) useFreshBox = true;
- this.skipped = 0;
}
const hands = [];
+ // console.log(`skipped: ${this.skipped} max: ${config.maxHands} detected: ${this.detectedHands} stored: ${this.storedBoxes.length} new: ${boxes?.length}`);
+
// go through working set of boxes
for (const i in this.storedBoxes) {
const currentBox = this.storedBoxes[i];
if (!currentBox) continue;
- const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
- const palmCenter = box.getBoxCenter(currentBox);
- const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]];
- const rotatedImage = tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized);
- const rotationMatrix = util.buildRotationMatrix(-angle, palmCenter);
- const newBox = useFreshBox ? this.getBoxForPalmLandmarks(currentBox.palmLandmarks, rotationMatrix) : currentBox;
- const croppedInput = box.cutBoxFromImageAndResize(newBox, rotatedImage, [this.inputSize, this.inputSize]);
- const handImage = croppedInput.div(255);
- croppedInput.dispose();
- rotatedImage.dispose();
- const [confidence, keypoints] = await this.meshDetector.predict(handImage);
- handImage.dispose();
- const confidenceValue = confidence.dataSync()[0];
- confidence.dispose();
- if (confidenceValue >= config.minConfidence) {
- const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
- const rawCoords = keypointsReshaped.arraySync();
+ if (config.landmarks) {
+ const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
+ const palmCenter = box.getBoxCenter(currentBox);
+ const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]];
+ const rotatedImage = tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized);
+ const rotationMatrix = util.buildRotationMatrix(-angle, palmCenter);
+ const newBox = useFreshBox ? this.getBoxForPalmLandmarks(currentBox.palmLandmarks, rotationMatrix) : currentBox;
+ const croppedInput = box.cutBoxFromImageAndResize(newBox, rotatedImage, [this.inputSize, this.inputSize]);
+ const handImage = croppedInput.div(255);
+ croppedInput.dispose();
+ rotatedImage.dispose();
+ const [confidence, keypoints] = await this.meshDetector.predict(handImage);
+ handImage.dispose();
+ const confidenceValue = confidence.dataSync()[0];
+ confidence.dispose();
+ if (confidenceValue >= config.minConfidence) {
+ const keypointsReshaped = tf.reshape(keypoints, [-1, 3]);
+ const rawCoords = keypointsReshaped.arraySync();
+ keypoints.dispose();
+ keypointsReshaped.dispose();
+ const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
+ const nextBoundingBox = this.getBoxForHandLandmarks(coords);
+ this.storedBoxes[i] = nextBoundingBox;
+ const result = {
+ landmarks: coords,
+ confidence: confidenceValue,
+ box: {
+ topLeft: nextBoundingBox.startPoint,
+ bottomRight: nextBoundingBox.endPoint,
+ },
+ };
+ hands.push(result);
+ } else {
+ this.storedBoxes[i] = null;
+ }
keypoints.dispose();
- keypointsReshaped.dispose();
- const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
- const nextBoundingBox = this.getBoxForHandLandmarks(coords);
- this.updateStoredBoxes(nextBoundingBox, i);
- const result = {
- landmarks: coords,
- handInViewConfidence: confidenceValue,
- boundingBox: {
- topLeft: nextBoundingBox.startPoint,
- bottomRight: nextBoundingBox.endPoint,
- },
- };
- hands.push(result);
} else {
- this.updateStoredBoxes(null, i);
- /*
+ const enlarged = box.enlargeBox(box.squarifyBox(box.shiftBox(currentBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR);
const result = {
- handInViewConfidence: confidenceValue,
- boundingBox: {
- topLeft: currentBox.startPoint,
- bottomRight: currentBox.endPoint,
+ confidence: currentBox.confidence,
+ box: {
+ topLeft: enlarged.startPoint,
+ bottomRight: enlarged.endPoint,
},
};
hands.push(result);
- */
}
- keypoints.dispose();
}
this.storedBoxes = this.storedBoxes.filter((a) => a !== null);
this.detectedHands = hands.length;
@@ -163,26 +169,6 @@ class HandPipeline {
const endPoint = [Math.max(...xs), Math.max(...ys)];
return { startPoint, endPoint };
}
-
- updateStoredBoxes(newBox, i) {
- const previousBox = this.storedBoxes[i];
- let iou = 0;
- if (newBox && previousBox && previousBox.startPoint) {
- const [boxStartX, boxStartY] = newBox.startPoint;
- const [boxEndX, boxEndY] = newBox.endPoint;
- const [previousBoxStartX, previousBoxStartY] = previousBox.startPoint;
- const [previousBoxEndX, previousBoxEndY] = previousBox.endPoint;
- const xStartMax = Math.max(boxStartX, previousBoxStartX);
- const yStartMax = Math.max(boxStartY, previousBoxStartY);
- const xEndMin = Math.min(boxEndX, previousBoxEndX);
- const yEndMin = Math.min(boxEndY, previousBoxEndY);
- const intersection = (xEndMin - xStartMax) * (yEndMin - yStartMax);
- const boxArea = (boxEndX - boxStartX) * (boxEndY - boxStartY);
- const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
- iou = intersection / (boxArea + previousBoxArea - intersection);
- }
- this.storedBoxes[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
- }
}
exports.HandPipeline = HandPipeline;
diff --git a/src/hand/handpose.js b/src/hand/handpose.js
index 005c2b87..3e769d52 100644
--- a/src/hand/handpose.js
+++ b/src/hand/handpose.js
@@ -51,12 +51,12 @@ class HandPose {
}
}
hands.push({
- confidence: prediction.handInViewConfidence,
- box: prediction.boundingBox ? [
- prediction.boundingBox.topLeft[0],
- prediction.boundingBox.topLeft[1],
- prediction.boundingBox.bottomRight[0] - prediction.boundingBox.topLeft[0],
- prediction.boundingBox.bottomRight[1] - prediction.boundingBox.topLeft[1],
+ confidence: prediction.confidence,
+ box: prediction.box ? [
+ prediction.box.topLeft[0],
+ prediction.box.topLeft[1],
+ prediction.box.bottomRight[0] - prediction.box.topLeft[0],
+ prediction.box.bottomRight[1] - prediction.box.topLeft[1],
] : 0,
landmarks: prediction.landmarks,
annotations,