update hand algorithm

pull/50/head
Vladimir Mandic 2020-11-08 01:17:25 -05:00
parent 8b7594b459
commit 1c0499d797
21 changed files with 143 additions and 158 deletions

View File

@ -109,10 +109,10 @@ export default {
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
// as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.2, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.5, // threshold for deciding when to remove boxes based on score in non-maximum suppression
iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
maxHands: 1, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: {
modelPath: '../models/handdetect.json',
},

View File

@ -61,7 +61,7 @@ async function build(f, msg) {
if (!es) es = await esbuild.startService();
// common build options
const cfg = {
minify: true,
minify: false,
bundle: true,
sourcemap: true,
logLevel: 'error',

View File

@ -32769,25 +32769,22 @@ var Mx = we((Px) => {
return et.mul(e, this.inputSizeTensor);
});
}
async getBoundingBoxes(n, t) {
const e = this.model.predict(n), r = e.squeeze(), i = et.tidy(() => et.sigmoid(et.slice(r, [0, 0], [-1, 1])).squeeze()), a = et.slice(r, [0, 1], [-1, 4]), s = this.normalizeBoxes(a), o = await et.image.nonMaxSuppressionAsync(s, i, t.maxHands, t.iouThreshold, t.scoreThreshold), c = o.arraySync(), l = [e, o, r, s, a, i];
if (c.length === 0)
return l.forEach((h) => h.dispose()), null;
const u = [];
async getBoxes(n, t) {
const e = this.model.predict(n), r = e.squeeze(), i = et.tidy(() => et.sigmoid(et.slice(r, [0, 0], [-1, 1])).squeeze()), a = et.slice(r, [0, 1], [-1, 4]), s = this.normalizeBoxes(a), o = await et.image.nonMaxSuppressionAsync(s, i, t.maxHands, t.iouThreshold, t.scoreThreshold), c = o.arraySync(), l = [e, o, r, s, a, i], u = [];
for (const h of c) {
const d = et.slice(s, [h, 0], [1, -1]), p = et.slice(r, [h, 5], [1, 14]), f = et.tidy(() => this.normalizeLandmarks(p, h).reshape([-1, 2]));
p.dispose(), u.push({boxes: d, palmLandmarks: f});
p.dispose(), u.push({box: d, palmLandmarks: f});
}
return l.forEach((h) => h.dispose()), u;
}
async estimateHandBounds(n, t) {
const e = n.shape[1], r = n.shape[2], i = et.tidy(() => n.resizeBilinear([t.inputSize, t.inputSize]).div(127.5).sub(1)), a = await this.getBoundingBoxes(i, t);
const e = n.shape[1], r = n.shape[2], i = et.tidy(() => n.resizeBilinear([t.inputSize, t.inputSize]).div(127.5).sub(1)), a = await this.getBoxes(i, t);
if (i.dispose(), !a || a.length === 0)
return null;
const s = [];
for (const o of a) {
const c = o.boxes.dataSync(), l = c.slice(0, 2), u = c.slice(2, 4), h = o.palmLandmarks.arraySync();
o.boxes.dispose(), o.palmLandmarks.dispose(), s.push(UV.scaleBoxCoordinates({startPoint: l, endPoint: u, palmLandmarks: h}, [r / t.inputSize, e / t.inputSize]));
const c = o.box.dataSync(), l = c.slice(0, 2), u = c.slice(2, 4), h = o.palmLandmarks.arraySync();
o.box.dispose(), o.palmLandmarks.dispose(), s.push(UV.scaleBoxCoordinates({startPoint: l, endPoint: u, palmLandmarks: h}, [r / t.inputSize, e / t.inputSize]));
}
return s;
}
@ -32841,7 +32838,7 @@ var Xx = we((Kx) => {
const jx = Ut(), Vn = Jp(), Wr = Yx(), GV = 0.8, qV = [0, -0.4], YV = 3, KV = [0, -0.1], jV = 1.65, $x = [0, 5, 9, 13, 17, 1, 2], $V = 0, XV = 2;
class JV {
constructor(n, t, e) {
this.boundingBoxDetector = n, this.meshDetector = t, this.inputSize = e, this.regionsOfInterest = [], this.runsWithoutHandDetector = 0, this.skipFrames = 0, this.detectedHands = 0;
this.boxDetector = n, this.meshDetector = t, this.inputSize = e, this.storedBoxes = [], this.skipped = 0, this.detectedHands = 0;
}
getBoxForPalmLandmarks(n, t) {
const e = n.map((i) => {
@ -32864,51 +32861,50 @@ var Xx = we((Kx) => {
return c.map((d) => [d[0] + h[0], d[1] + h[1], d[2]]);
}
async estimateHands(n, t) {
this.skipFrames = t.skipFrames;
let e = this.runsWithoutHandDetector > this.skipFrames || this.detectedHands !== this.regionsOfInterest.length, r;
if (e && (r = await this.boundingBoxDetector.estimateHandBounds(n, t)), t.maxHands > 1 && r && r.length > 0 && r.length !== this.detectedHands && (e = true), e) {
if (this.regionsOfInterest = [], !r || r.length === 0)
return this.detectedHands = 0, null;
this.skipped++;
let e = false;
const r = this.skipped > t.skipFrames ? await this.boxDetector.estimateHandBounds(n, t) : null;
if (r && r.length !== this.detectedHands && this.detectedHands !== t.maxHands) {
this.storedBoxes = [], this.detectedHands = 0;
for (const a of r)
this.regionsOfInterest.push(a);
this.runsWithoutHandDetector = 0;
} else
this.runsWithoutHandDetector++;
this.storedBoxes.push(a);
this.storedBoxes.length > 0 && (e = true), this.skipped = 0;
}
const i = [];
for (const a in this.regionsOfInterest) {
const s = this.regionsOfInterest[a];
for (const a in this.storedBoxes) {
const s = this.storedBoxes[a];
if (!s)
continue;
const o = Wr.computeRotation(s.palmLandmarks[$V], s.palmLandmarks[XV]), c = Vn.getBoxCenter(s), l = [c[0] / n.shape[2], c[1] / n.shape[1]], u = jx.image.rotateWithOffset(n, o, 0, l), h = Wr.buildRotationMatrix(-o, c), d = e ? this.getBoxForPalmLandmarks(s.palmLandmarks, h) : s, p = Vn.cutBoxFromImageAndResize(d, u, [this.inputSize, this.inputSize]), f = p.div(255);
p.dispose(), u.dispose();
const m = this.meshDetector.predict(f), [g, y] = m;
const [m, g] = await this.meshDetector.predict(f);
f.dispose();
const w = g.dataSync()[0];
if (g.dispose(), w >= t.minConfidence) {
const b = jx.reshape(y, [-1, 3]), L = b.arraySync();
y.dispose(), b.dispose();
const x = this.transformRawCoords(L, d, o, h), N = this.getBoxForHandLandmarks(x);
this.updateRegionsOfInterest(N, a);
const I = {landmarks: x, handInViewConfidence: w, boundingBox: {topLeft: N.startPoint, bottomRight: N.endPoint}};
i.push(I);
const y = m.dataSync()[0];
if (m.dispose(), y >= t.minConfidence) {
const w = jx.reshape(g, [-1, 3]), b = w.arraySync();
g.dispose(), w.dispose();
const L = this.transformRawCoords(b, d, o, h), x = this.getBoxForHandLandmarks(L);
this.updateStoredBoxes(x, a);
const N = {landmarks: L, handInViewConfidence: y, boundingBox: {topLeft: x.startPoint, bottomRight: x.endPoint}};
i.push(N);
} else
this.updateRegionsOfInterest(null, a);
y.dispose();
this.updateStoredBoxes(null, a);
g.dispose();
}
return this.regionsOfInterest = this.regionsOfInterest.filter((a) => a !== null), this.detectedHands = i.length, i;
return this.storedBoxes = this.storedBoxes.filter((a) => a !== null), this.detectedHands = i.length, i;
}
calculateLandmarksBoundingBox(n) {
const t = n.map((a) => a[0]), e = n.map((a) => a[1]), r = [Math.min(...t), Math.min(...e)], i = [Math.max(...t), Math.max(...e)];
return {startPoint: r, endPoint: i};
}
updateRegionsOfInterest(n, t) {
const e = this.regionsOfInterest[t];
updateStoredBoxes(n, t) {
const e = this.storedBoxes[t];
let r = 0;
if (n && e && e.startPoint) {
const [i, a] = n.startPoint, [s, o] = n.endPoint, [c, l] = e.startPoint, [u, h] = e.endPoint, d = Math.max(i, c), p = Math.max(a, l), f = Math.min(s, u), m = Math.min(o, h), g = (f - d) * (m - p), y = (s - i) * (o - a), w = (u - c) * (h - a);
r = g / (y + w - g);
}
this.regionsOfInterest[t] = r > GV ? e : n;
this.storedBoxes[t] = r > GV ? e : n;
}
}
Kx.HandPipeline = JV;
@ -33159,7 +33155,7 @@ var sL = we((aL) => {
});
var oL = we((sG) => {
Is(sG, {default: () => oG});
var oG = {backend: "webgl", console: true, async: true, profile: false, deallocate: false, scoped: false, videoOptimized: true, filter: {enabled: true, width: 0, height: 0, return: true, brightness: 0, contrast: 0, sharpness: 0, blur: 0, saturation: 0, hue: 0, negative: false, sepia: false, vintage: false, kodachrome: false, technicolor: false, polaroid: false, pixelate: 0}, gesture: {enabled: true}, face: {enabled: true, detector: {modelPath: "../models/blazeface-back.json", inputSize: 256, maxFaces: 10, skipFrames: 15, minConfidence: 0.1, iouThreshold: 0.1, scoreThreshold: 0.2}, mesh: {enabled: true, modelPath: "../models/facemesh.json", inputSize: 192}, iris: {enabled: true, modelPath: "../models/iris.json", enlargeFactor: 2.3, inputSize: 64}, age: {enabled: true, modelPath: "../models/age-ssrnet-imdb.json", inputSize: 64, skipFrames: 15}, gender: {enabled: true, minConfidence: 0.1, modelPath: "../models/gender-ssrnet-imdb.json", inputSize: 64, skipFrames: 15}, emotion: {enabled: true, inputSize: 64, minConfidence: 0.2, skipFrames: 15, modelPath: "../models/emotion-large.json"}}, body: {enabled: true, modelPath: "../models/posenet.json", inputResolution: 257, outputStride: 16, maxDetections: 10, scoreThreshold: 0.8, nmsRadius: 20}, hand: {enabled: true, inputSize: 256, skipFrames: 15, minConfidence: 0.5, iouThreshold: 0.2, scoreThreshold: 0.5, enlargeFactor: 1.65, maxHands: 10, detector: {modelPath: "../models/handdetect.json"}, skeleton: {modelPath: "../models/handskeleton.json"}}};
var oG = {backend: "webgl", console: true, async: true, profile: false, deallocate: false, scoped: false, videoOptimized: true, filter: {enabled: true, width: 0, height: 0, return: true, brightness: 0, contrast: 0, sharpness: 0, blur: 0, saturation: 0, hue: 0, negative: false, sepia: false, vintage: false, kodachrome: false, technicolor: false, polaroid: false, pixelate: 0}, gesture: {enabled: true}, face: {enabled: true, detector: {modelPath: "../models/blazeface-back.json", inputSize: 256, maxFaces: 10, skipFrames: 15, minConfidence: 0.1, iouThreshold: 0.1, scoreThreshold: 0.2}, mesh: {enabled: true, modelPath: "../models/facemesh.json", inputSize: 192}, iris: {enabled: true, modelPath: "../models/iris.json", enlargeFactor: 2.3, inputSize: 64}, age: {enabled: true, modelPath: "../models/age-ssrnet-imdb.json", inputSize: 64, skipFrames: 15}, gender: {enabled: true, minConfidence: 0.1, modelPath: "../models/gender-ssrnet-imdb.json", inputSize: 64, skipFrames: 15}, emotion: {enabled: true, inputSize: 64, minConfidence: 0.2, skipFrames: 15, modelPath: "../models/emotion-large.json"}}, body: {enabled: true, modelPath: "../models/posenet.json", inputResolution: 257, outputStride: 16, maxDetections: 10, scoreThreshold: 0.8, nmsRadius: 20}, hand: {enabled: true, inputSize: 256, skipFrames: 15, minConfidence: 0.5, iouThreshold: 0.1, scoreThreshold: 0.8, enlargeFactor: 1.65, maxHands: 1, detector: {modelPath: "../models/handdetect.json"}, skeleton: {modelPath: "../models/handskeleton.json"}}};
});
var lL = we((pq, cL) => {
cL.exports = {name: "@vladmandic/human", version: "0.8.1", description: "human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction, Emotion Prediction & Gesture Recognition", sideEffects: false, main: "dist/human.node.js", module: "dist/human.esm.js", browser: "dist/human.esm.js", author: "Vladimir Mandic <mandic00@live.com>", bugs: {url: "https://github.com/vladmandic/human/issues"}, homepage: "https://github.com/vladmandic/human#readme", license: "MIT", engines: {node: ">=14.0.0"}, repository: {type: "git", url: "git+https://github.com/vladmandic/human.git"}, dependencies: {}, peerDependencies: {}, devDependencies: {"@tensorflow/tfjs": "^2.7.0", "@tensorflow/tfjs-node": "^2.7.0", "@vladmandic/pilogger": "^0.2.7", chokidar: "^3.4.3", dayjs: "^1.9.5", esbuild: "^0.7.22", eslint: "^7.13.0", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-import": "^2.22.1", "eslint-plugin-json": "^2.1.2", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.2.1", rimraf: "^3.0.2", seedrandom: "^3.0.5", "simple-git": "^2.21.0"}, scripts: {start: "node --trace-warnings --unhandled-rejections=strict --trace-uncaught --no-deprecation src/node.js", lint: "eslint src/*.js demo/*.js", dev: "npm install && node --trace-warnings --unhandled-rejections=strict --trace-uncaught --no-deprecation dev-server.js", "build-iife": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=iife --external:fs --global-name=Human --metafile=dist/human.json --outfile=dist/human.js src/human.js", "build-esm-bundle": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=esm --external:fs --metafile=dist/human.esm.json --outfile=dist/human.esm.js src/human.js", "build-esm-nobundle": "esbuild --bundle --minify --platform=browser --sourcemap --target=es2018 --format=esm --external:@tensorflow --external:fs --metafile=dist/human.esm-nobundle.json --outfile=dist/human.esm-nobundle.js src/human.js", "build-node": "esbuild --bundle --minify --platform=node --sourcemap --target=es2018 --format=cjs --metafile=dist/human.node.json --outfile=dist/human.node.js src/human.js", "build-node-nobundle": "esbuild --bundle --minify --platform=node --sourcemap --target=es2018 --format=cjs --external:@tensorflow --metafile=dist/human.node.json --outfile=dist/human.node-nobundle.js src/human.js", "build-demo": "esbuild --bundle --log-level=error --platform=browser --sourcemap --target=es2018 --format=esm --external:fs --metafile=dist/demo-browser-index.json --outfile=dist/demo-browser-index.js demo/browser.js", build: "rimraf dist/* && npm run build-iife && npm run build-esm-bundle && npm run build-esm-nobundle && npm run build-node && npm run build-node-nobundle && npm run build-demo", update: "npm update --depth 20 --force && npm dedupe && npm prune && npm audit", changelog: "node changelog.js"}, keywords: ["tensorflowjs", "face-detection", "face-geometry", "body-tracking", "hand-tracking", "iris-tracking", "age-estimation", "emotion-detection", "gender-prediction", "gesture-recognition"]};

File diff suppressed because one or more lines are too long

View File

@ -23,7 +23,7 @@
"imports": []
},
"dist/human.esm.js": {
"bytes": 1278535,
"bytes": 1278200,
"imports": []
}
},
@ -31,13 +31,13 @@
"dist/demo-browser-index.js.map": {
"imports": [],
"inputs": {},
"bytes": 5533079
"bytes": 5532275
},
"dist/demo-browser-index.js": {
"imports": [],
"inputs": {
"dist/human.esm.js": {
"bytesInOutput": 1665000
"bytesInOutput": 1664606
},
"dist/human.esm.js": {
"bytesInOutput": 8716
@ -52,7 +52,7 @@
"bytesInOutput": 15855
}
},
"bytes": 1709822
"bytes": 1709428
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -206,7 +206,7 @@
"imports": []
},
"src/hand/handdetector.js": {
"bytes": 4316,
"bytes": 4220,
"imports": [
{
"path": "src/hand/box.js"
@ -214,7 +214,7 @@
]
},
"src/hand/handpipeline.js": {
"bytes": 8724,
"bytes": 8214,
"imports": [
{
"path": "src/hand/box.js"
@ -301,7 +301,7 @@
"dist/human.esm-nobundle.js.map": {
"imports": [],
"inputs": {},
"bytes": 623550
"bytes": 622741
},
"dist/human.esm-nobundle.js": {
"imports": [],
@ -379,13 +379,13 @@
"bytesInOutput": 1420
},
"src/hand/handdetector.js": {
"bytesInOutput": 1830
"bytesInOutput": 1748
},
"src/hand/util.js": {
"bytesInOutput": 997
},
"src/hand/handpipeline.js": {
"bytesInOutput": 3130
"bytesInOutput": 2878
},
"src/hand/anchors.js": {
"bytesInOutput": 127000
@ -403,7 +403,7 @@
"bytesInOutput": 2349
},
"config.js": {
"bytesInOutput": 1326
"bytesInOutput": 1325
},
"package.json": {
"bytesInOutput": 3004
@ -415,7 +415,7 @@
"bytesInOutput": 0
}
},
"bytes": 216865
"bytes": 216530
}
}
}

4
dist/human.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

14
dist/human.esm.json vendored
View File

@ -387,7 +387,7 @@
]
},
"src/hand/handdetector.js": {
"bytes": 4316,
"bytes": 4220,
"imports": [
{
"path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -398,7 +398,7 @@
]
},
"src/hand/handpipeline.js": {
"bytes": 8724,
"bytes": 8214,
"imports": [
{
"path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -513,7 +513,7 @@
"dist/human.esm.js.map": {
"imports": [],
"inputs": {},
"bytes": 5419535
"bytes": 5418726
},
"dist/human.esm.js": {
"imports": [],
@ -648,13 +648,13 @@
"bytesInOutput": 1398
},
"src/hand/handdetector.js": {
"bytesInOutput": 1836
"bytesInOutput": 1754
},
"src/hand/util.js": {
"bytesInOutput": 1005
},
"src/hand/handpipeline.js": {
"bytesInOutput": 3128
"bytesInOutput": 2876
},
"src/hand/anchors.js": {
"bytesInOutput": 127001
@ -672,7 +672,7 @@
"bytesInOutput": 2365
},
"config.js": {
"bytesInOutput": 1327
"bytesInOutput": 1326
},
"package.json": {
"bytesInOutput": 3005
@ -684,7 +684,7 @@
"bytesInOutput": 0
}
},
"bytes": 1278535
"bytes": 1278200
}
}
}

4
dist/human.js vendored

File diff suppressed because one or more lines are too long

4
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

14
dist/human.json vendored
View File

@ -387,7 +387,7 @@
]
},
"src/hand/handdetector.js": {
"bytes": 4316,
"bytes": 4220,
"imports": [
{
"path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -398,7 +398,7 @@
]
},
"src/hand/handpipeline.js": {
"bytes": 8724,
"bytes": 8214,
"imports": [
{
"path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -513,7 +513,7 @@
"dist/human.js.map": {
"imports": [],
"inputs": {},
"bytes": 5419531
"bytes": 5418722
},
"dist/human.js": {
"imports": [],
@ -648,13 +648,13 @@
"bytesInOutput": 1398
},
"src/hand/handdetector.js": {
"bytesInOutput": 1836
"bytesInOutput": 1754
},
"src/hand/util.js": {
"bytesInOutput": 1005
},
"src/hand/handpipeline.js": {
"bytesInOutput": 3128
"bytesInOutput": 2876
},
"src/hand/anchors.js": {
"bytesInOutput": 127001
@ -672,7 +672,7 @@
"bytesInOutput": 2365
},
"config.js": {
"bytesInOutput": 1327
"bytesInOutput": 1326
},
"package.json": {
"bytesInOutput": 3004
@ -681,7 +681,7 @@
"bytesInOutput": 7257
}
},
"bytes": 1278580
"bytes": 1278245
}
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

4
dist/human.node.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

14
dist/human.node.json vendored
View File

@ -206,7 +206,7 @@
"imports": []
},
"src/hand/handdetector.js": {
"bytes": 4316,
"bytes": 4220,
"imports": [
{
"path": "src/hand/box.js"
@ -214,7 +214,7 @@
]
},
"src/hand/handpipeline.js": {
"bytes": 8724,
"bytes": 8214,
"imports": [
{
"path": "src/hand/box.js"
@ -301,7 +301,7 @@
"dist/human.node-nobundle.js.map": {
"imports": [],
"inputs": {},
"bytes": 637683
"bytes": 636874
},
"dist/human.node-nobundle.js": {
"imports": [],
@ -379,13 +379,13 @@
"bytesInOutput": 1419
},
"src/hand/handdetector.js": {
"bytesInOutput": 1830
"bytesInOutput": 1748
},
"src/hand/util.js": {
"bytesInOutput": 996
},
"src/hand/handpipeline.js": {
"bytesInOutput": 3130
"bytesInOutput": 2878
},
"src/hand/anchors.js": {
"bytesInOutput": 127000
@ -403,7 +403,7 @@
"bytesInOutput": 2349
},
"config.js": {
"bytesInOutput": 1325
"bytesInOutput": 1324
},
"package.json": {
"bytesInOutput": 3004
@ -415,7 +415,7 @@
"bytesInOutput": 7201
}
},
"bytes": 216872
"bytes": 216537
}
}
}

View File

@ -46,33 +46,30 @@ class HandDetector {
});
}
async getBoundingBoxes(input, config) {
const batchedPrediction = this.model.predict(input);
const prediction = batchedPrediction.squeeze();
const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
async getBoxes(input, config) {
const batched = this.model.predict(input);
const predictions = batched.squeeze();
const scores = tf.tidy(() => tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1])).squeeze());
// const scoresVal = scores.dataSync(); // scoresVal[boxIndex] is box confidence
const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]);
const boxes = this.normalizeBoxes(rawBoxes);
const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
const boxesWithHands = boxesWithHandsTensor.arraySync();
const boxesWithHandsT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold);
const boxesWithHands = boxesWithHandsT.arraySync();
const toDispose = [
batchedPrediction,
boxesWithHandsTensor,
prediction,
batched,
boxesWithHandsT,
predictions,
boxes,
rawBoxes,
scores,
];
if (boxesWithHands.length === 0) {
toDispose.forEach((tensor) => tensor.dispose());
return null;
}
const hands = [];
for (const boxIndex of boxesWithHands) {
const matchingBox = tf.slice(boxes, [boxIndex, 0], [1, -1]);
const rawPalmLandmarks = tf.slice(prediction, [boxIndex, 5], [1, 14]);
const rawPalmLandmarks = tf.slice(predictions, [boxIndex, 5], [1, 14]);
const palmLandmarks = tf.tidy(() => this.normalizeLandmarks(rawPalmLandmarks, boxIndex).reshape([-1, 2]));
rawPalmLandmarks.dispose();
hands.push({ boxes: matchingBox, palmLandmarks });
hands.push({ box: matchingBox, palmLandmarks });
}
toDispose.forEach((tensor) => tensor.dispose());
return hands;
@ -82,16 +79,16 @@ class HandDetector {
const inputHeight = input.shape[1];
const inputWidth = input.shape[2];
const image = tf.tidy(() => input.resizeBilinear([config.inputSize, config.inputSize]).div(127.5).sub(1));
const predictions = await this.getBoundingBoxes(image, config);
const predictions = await this.getBoxes(image, config);
image.dispose();
if (!predictions || predictions.length === 0) return null;
const hands = [];
for (const prediction of predictions) {
const boundingBoxes = prediction.boxes.dataSync();
const boundingBoxes = prediction.box.dataSync();
const startPoint = boundingBoxes.slice(0, 2);
const endPoint = boundingBoxes.slice(2, 4);
const palmLandmarks = prediction.palmLandmarks.arraySync();
prediction.boxes.dispose();
prediction.box.dispose();
prediction.palmLandmarks.dispose();
hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / config.inputSize, inputHeight / config.inputSize]));
}

View File

@ -30,12 +30,11 @@ const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
class HandPipeline {
constructor(boundingBoxDetector, meshDetector, inputSize) {
this.boundingBoxDetector = boundingBoxDetector;
this.boxDetector = boundingBoxDetector;
this.meshDetector = meshDetector;
this.inputSize = inputSize;
this.regionsOfInterest = [];
this.runsWithoutHandDetector = 0;
this.skipFrames = 0;
this.storedBoxes = [];
this.skipped = 0;
this.detectedHands = 0;
}
@ -86,30 +85,24 @@ class HandPipeline {
}
async estimateHands(image, config) {
this.skipFrames = config.skipFrames;
// don't need box detection if we have sufficient number of boxes
let useFreshBox = (this.runsWithoutHandDetector > this.skipFrames) || (this.detectedHands !== this.regionsOfInterest.length);
let boundingBoxPredictions;
// but every skipFrames check if detect boxes number changed
if (useFreshBox) boundingBoxPredictions = await this.boundingBoxDetector.estimateHandBounds(image, config);
// if there are new boxes and number of boxes doesn't match use new boxes, but not if maxhands is fixed to 1
if (config.maxHands > 1 && boundingBoxPredictions && boundingBoxPredictions.length > 0 && boundingBoxPredictions.length !== this.detectedHands) useFreshBox = true;
if (useFreshBox) {
this.regionsOfInterest = [];
if (!boundingBoxPredictions || boundingBoxPredictions.length === 0) {
this.detectedHands = 0;
return null;
}
for (const boundingBoxPrediction of boundingBoxPredictions) {
this.regionsOfInterest.push(boundingBoxPrediction);
}
this.runsWithoutHandDetector = 0;
} else {
this.runsWithoutHandDetector++;
this.skipped++;
let useFreshBox = false;
// run new detector every skipFrames
const boxes = (this.skipped > config.skipFrames)
? await this.boxDetector.estimateHandBounds(image, config) : null;
// if detector result count doesn't match current working set, use it to reset current working set
if (boxes && (boxes.length !== this.detectedHands) && (this.detectedHands !== config.maxHands)) {
// console.log(this.skipped, config.maxHands, this.detectedHands, this.storedBoxes.length, boxes.length);
this.storedBoxes = [];
this.detectedHands = 0;
for (const possible of boxes) this.storedBoxes.push(possible);
if (this.storedBoxes.length > 0) useFreshBox = true;
this.skipped = 0;
}
const hands = [];
for (const i in this.regionsOfInterest) {
const currentBox = this.regionsOfInterest[i];
// go through working set of boxes
for (const i in this.storedBoxes) {
const currentBox = this.storedBoxes[i];
if (!currentBox) continue;
const angle = util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]);
const palmCenter = box.getBoxCenter(currentBox);
@ -121,8 +114,7 @@ class HandPipeline {
const handImage = croppedInput.div(255);
croppedInput.dispose();
rotatedImage.dispose();
const prediction = this.meshDetector.predict(handImage);
const [confidence, keypoints] = prediction;
const [confidence, keypoints] = await this.meshDetector.predict(handImage);
handImage.dispose();
const confidenceValue = confidence.dataSync()[0];
confidence.dispose();
@ -133,7 +125,7 @@ class HandPipeline {
keypointsReshaped.dispose();
const coords = this.transformRawCoords(rawCoords, newBox, angle, rotationMatrix);
const nextBoundingBox = this.getBoxForHandLandmarks(coords);
this.updateRegionsOfInterest(nextBoundingBox, i);
this.updateStoredBoxes(nextBoundingBox, i);
const result = {
landmarks: coords,
handInViewConfidence: confidenceValue,
@ -144,7 +136,7 @@ class HandPipeline {
};
hands.push(result);
} else {
this.updateRegionsOfInterest(null, i);
this.updateStoredBoxes(null, i);
/*
const result = {
handInViewConfidence: confidenceValue,
@ -158,7 +150,7 @@ class HandPipeline {
}
keypoints.dispose();
}
this.regionsOfInterest = this.regionsOfInterest.filter((a) => a !== null);
this.storedBoxes = this.storedBoxes.filter((a) => a !== null);
this.detectedHands = hands.length;
return hands;
}
@ -172,8 +164,8 @@ class HandPipeline {
return { startPoint, endPoint };
}
updateRegionsOfInterest(newBox, i) {
const previousBox = this.regionsOfInterest[i];
updateStoredBoxes(newBox, i) {
const previousBox = this.storedBoxes[i];
let iou = 0;
if (newBox && previousBox && previousBox.startPoint) {
const [boxStartX, boxStartY] = newBox.startPoint;
@ -189,7 +181,7 @@ class HandPipeline {
const previousBoxArea = (previousBoxEndX - previousBoxStartX) * (previousBoxEndY - boxStartY);
iou = intersection / (boxArea + previousBoxArea - intersection);
}
this.regionsOfInterest[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
this.storedBoxes[i] = iou > UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD ? previousBox : newBox;
}
}