From b0bd103db200ffe91d7c687f5888d66fb01ecbef Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 2 Sep 2021 08:50:16 -0400 Subject: [PATCH] update hand detector processing algorithm --- CHANGELOG.md | 5 ++--- demo/index.js | 14 +++++++++---- package.json | 2 +- src/config.ts | 6 +++--- src/draw/draw.ts | 1 + src/fingerpose/estimator.ts | 7 +++++-- src/handpose/handdetector.ts | 38 ++++++++++++++---------------------- src/handpose/handpipeline.ts | 5 +++-- src/human.ts | 5 +++-- src/interpolate.ts | 5 +++-- 10 files changed, 46 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c41c13ec..c6478593 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,8 @@ Repository: **** ### **HEAD -> main** 2021/08/31 mandic00@live.com - -### **origin/main** 2021/08/31 mandic00@live.com - +- simplify canvas handling in nodejs +- full rebuild ### **2.1.5** 2021/08/31 mandic00@live.com diff --git a/demo/index.js b/demo/index.js index 81811206..8e38ce8d 100644 --- a/demo/index.js +++ b/demo/index.js @@ -35,7 +35,7 @@ let userConfig = { /* wasmPath: 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@3.9.0/dist/', async: false, - cacheSensitivity: 0, + cacheSensitivity: 0.75, filter: { enabled: false, flip: false, @@ -49,11 +49,12 @@ let userConfig = { }, object: { enabled: false }, gesture: { enabled: true }, - hand: { enabled: false }, + hand: { enabled: true }, body: { enabled: false }, // body: { enabled: true, modelPath: 'movenet-multipose.json' }, // body: { enabled: true, modelPath: 'posenet.json' }, segmentation: { enabled: false }, + /* */ }; @@ -82,6 +83,7 @@ const ui = { buffered: true, // should output be buffered between frames interpolated: true, // should output be interpolated for smoothness between frames iconSize: '48px', // ui icon sizes + autoPlay: false, // start webcam & detection on load // internal variables busy: false, // internal camera busy flag @@ -375,9 +377,9 @@ async function setupCamera() { canvas.height = video.videoHeight; ui.menuWidth.input.setAttribute('value', video.videoWidth); ui.menuHeight.input.setAttribute('value', video.videoHeight); - if (live) video.play(); + if (live || ui.autoPlay) video.play(); // eslint-disable-next-line no-use-before-define - if (live && !ui.detectThread) runHumanDetect(video, canvas); + if ((live || ui.autoPlay) && !ui.detectThread) runHumanDetect(video, canvas); ui.busy = false; resolve(); }; @@ -936,6 +938,10 @@ async function main() { ui.bench = JSON.parse(params.get('bench')); log('overriding bench:', ui.bench); } + if (params.has('play')) { + ui.autoPlay = true; + log('overriding autoplay:', true); + } if (params.has('draw')) { ui.drawWarmup = JSON.parse(params.get('draw')); log('overriding drawWarmup:', ui.drawWarmup); diff --git a/package.json b/package.json index d9360b70..4cd6a46e 100644 --- a/package.json +++ b/package.json @@ -66,7 +66,7 @@ "@tensorflow/tfjs-layers": "^3.9.0", "@tensorflow/tfjs-node": "^3.9.0", "@tensorflow/tfjs-node-gpu": "^3.9.0", - "@types/node": "^16.7.8", + "@types/node": "^16.7.10", "@typescript-eslint/eslint-plugin": "^4.30.0", "@typescript-eslint/parser": "^4.30.0", "@vladmandic/pilogger": "^0.2.18", diff --git a/src/config.ts b/src/config.ts index 2e05c167..f258050e 100644 --- a/src/config.ts +++ b/src/config.ts @@ -331,9 +331,9 @@ const config: Config = { // e.g., if model is running st 25 FPS, we can re-use existing bounding // box for updated hand skeleton analysis as the hand probably // hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.1, // threshold for discarding a prediction - iouThreshold: 0.1, // ammount of overlap between two detected objects before one object is removed - maxDetected: 2, // maximum number of hands detected in the input + minConfidence: 0.8, // threshold for discarding a prediction + iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed + maxDetected: 1, // maximum number of hands detected in the input // should be set to the minimum number for performance landmarks: true, // detect hand landmarks or just hand boundary box detector: { diff --git a/src/draw/draw.ts b/src/draw/draw.ts index 0a496cf2..6d4e13d2 100644 --- a/src/draw/draw.ts +++ b/src/draw/draw.ts @@ -407,6 +407,7 @@ export async function hand(inCanvas: HTMLCanvasElement, result: Array, dra } if (localOptions.drawLabels) { const addHandLabel = (part, title) => { + if (!part) return; ctx.fillStyle = localOptions.useDepth ? `rgba(${127.5 + (2 * part[part.length - 1][2])}, ${127.5 - (2 * part[part.length - 1][2])}, 255, 0.5)` : localOptions.color; ctx.fillText(title, part[part.length - 1][0] + 4, part[part.length - 1][1] + 4); }; diff --git a/src/fingerpose/estimator.ts b/src/fingerpose/estimator.ts index 33d92a41..7cee2f81 100644 --- a/src/fingerpose/estimator.ts +++ b/src/fingerpose/estimator.ts @@ -167,6 +167,11 @@ export function estimate(landmarks) { // step 1: calculate slopes const slopesXY: Array = []; const slopesYZ: Array = []; + const fingerCurls: Array = []; + const fingerDirections: Array = []; + if (!landmarks) return { curls: fingerCurls, directions: fingerDirections }; + + // step 1: calculate slopes for (const finger of Finger.all) { const points = Finger.getPoints(finger); const slopeAtXY: Array = []; @@ -186,8 +191,6 @@ export function estimate(landmarks) { } // step 2: calculate orientations - const fingerCurls: Array = []; - const fingerDirections: Array = []; for (const finger of Finger.all) { // start finger predictions from palm - except for thumb const pointIndexAt = (finger === Finger.thumb) ? 1 : 0; diff --git a/src/handpose/handdetector.ts b/src/handpose/handdetector.ts index eb32de4e..762a25b1 100644 --- a/src/handpose/handdetector.ts +++ b/src/handpose/handdetector.ts @@ -40,31 +40,23 @@ export class HandDetector { } async getBoxes(input, config) { - const batched = this.model.predict(input) as Tensor; - const predictions = tf.squeeze(batched); - tf.dispose(batched); - const scoresT = tf.tidy(() => tf.squeeze(tf.sigmoid(tf.slice(predictions, [0, 0], [-1, 1])))); - const scores = await scoresT.data(); - const rawBoxes = tf.slice(predictions, [0, 1], [-1, 4]); - const boxes = this.normalizeBoxes(rawBoxes); - tf.dispose(rawBoxes); - const filteredT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence); - const filtered = await filteredT.array(); - - tf.dispose(scoresT); - tf.dispose(filteredT); + const t: Record = {}; + t.batched = this.model.predict(input) as Tensor; + t.predictions = tf.squeeze(t.batched); + t.scores = tf.tidy(() => tf.squeeze(tf.sigmoid(tf.slice(t.predictions, [0, 0], [-1, 1])))); + const scores = await t.scores.data(); + t.boxes = tf.slice(t.predictions, [0, 1], [-1, 4]); + t.norm = this.normalizeBoxes(t.boxes); + t.nms = await tf.image.nonMaxSuppressionAsync(t.norm, t.scores, 10 * config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence); + const nms = await t.nms.array() as Array; const hands: Array<{ box: Tensor, palmLandmarks: Tensor, confidence: number }> = []; - for (const index of filtered) { - if (scores[index] >= config.hand.minConfidence) { - const matchingBox = tf.slice(boxes, [index, 0], [1, -1]); - const rawPalmLandmarks = tf.slice(predictions, [index, 5], [1, 14]); - const palmLandmarks = tf.tidy(() => tf.reshape(this.normalizeLandmarks(rawPalmLandmarks, index), [-1, 2])); - tf.dispose(rawPalmLandmarks); - hands.push({ box: matchingBox, palmLandmarks, confidence: scores[index] }); - } + for (const index of nms) { + const palmBox = tf.slice(t.norm, [index, 0], [1, -1]); + const palmLandmarks = tf.tidy(() => tf.reshape(this.normalizeLandmarks(tf.slice(t.predictions, [index, 5], [1, 14]), index), [-1, 2])); + hands.push({ box: palmBox, palmLandmarks, confidence: scores[index] }); + // console.log('handdetector:getBoxes', nms.length, index, scores[index], config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence, palmBox.dataSync()); } - tf.dispose(predictions); - tf.dispose(boxes); + for (const tensor of Object.keys(t)) tf.dispose(t[tensor]); // dispose all return hands; } diff --git a/src/handpose/handpipeline.ts b/src/handpose/handpipeline.ts index b178fd41..2b6d941b 100644 --- a/src/handpose/handpipeline.ts +++ b/src/handpose/handpipeline.ts @@ -85,7 +85,7 @@ export class HandPipeline { // run new detector every skipFrames unless we only want box to start with let boxes; - // console.log(this.skipped, config.hand.skipFrames, !config.hand.landmarks, !config.skipFrame); + // console.log('handpipeline:estimateHands:skip criteria', this.skipped, config.hand.skipFrames, !config.hand.landmarks, !config.skipFrame); // should skip hand detector? if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.skipFrame) { boxes = await this.handDetector.estimateHandBounds(image, config); this.skipped = 0; @@ -120,7 +120,7 @@ export class HandPipeline { tf.dispose(handImage); const confidence = (await confidenceT.data())[0]; tf.dispose(confidenceT); - if (confidence >= config.hand.minConfidence) { + if (confidence >= config.hand.minConfidence / 4) { const keypointsReshaped = tf.reshape(keypoints, [-1, 3]); const rawCoords = await keypointsReshaped.array(); tf.dispose(keypoints); @@ -135,6 +135,7 @@ export class HandPipeline { }; hands.push(result); } else { + // console.log('handpipeline:estimateHands low', confidence); this.storedBoxes[i] = null; } tf.dispose(keypoints); diff --git a/src/human.ts b/src/human.ts index 7445c08a..fee348c1 100644 --- a/src/human.ts +++ b/src/human.ts @@ -148,8 +148,8 @@ export class Human { * @param userConfig: {@link Config} */ constructor(userConfig?: Config | Record) { - Human.version = app.version; - Object.defineProperty(this, 'version', { value: app.version }); + Human.version = app.version; // expose version property on instance of class + Object.defineProperty(this, 'version', { value: app.version }); // expose version property directly on class itself defaults.wasmPath = `https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm@${tf.version_core}/dist/`; this.config = mergeDeep(defaults, userConfig || {}); this.tf = tf; @@ -427,6 +427,7 @@ export class Human { const skipFrame = diff < Math.max(this.config.cacheSensitivity, this.#lastCacheDiff); // if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images this.#lastCacheDiff = diff > 10 * this.config.cacheSensitivity ? 0 : diff; + // console.log('skipFrame', skipFrame, this.config.cacheSensitivity, diff); return skipFrame; } diff --git a/src/interpolate.ts b/src/interpolate.ts index 200f7dfd..835fa80f 100644 --- a/src/interpolate.ts +++ b/src/interpolate.ts @@ -59,9 +59,10 @@ export function calc(newResult: Result): Result { .map((b, j) => ((bufferedFactor - 1) * bufferedResult.hand[i].box[j] + b) / bufferedFactor)) as [number, number, number, number]; const boxRaw = (newResult.hand[i].boxRaw // update boxRaw .map((b, j) => ((bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + b) / bufferedFactor)) as [number, number, number, number]; - const keypoints = newResult.hand[i].keypoints // update landmarks + const keypoints = newResult.hand[i].keypoints ? newResult.hand[i].keypoints // update landmarks .map((landmark, j) => landmark - .map((coord, k) => (((bufferedFactor - 1) * bufferedResult.hand[i].keypoints[j][k] + coord) / bufferedFactor)) as [number, number, number]); + .map((coord, k) => (((bufferedFactor - 1) * bufferedResult.hand[i].keypoints[j][k] + coord) / bufferedFactor)) as [number, number, number]) + : []; const keys = Object.keys(newResult.hand[i].annotations); // update annotations const annotations = {}; for (const key of keys) {