From 48351b153932d694e4a537b7616b96244bef437e Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 5 May 2021 10:07:44 -0400 Subject: [PATCH] update & fix posenet --- CHANGELOG.md | 7 ++-- demo/index.js | 13 +++---- src/config.ts | 2 +- src/handpose/handpipeline.ts | 46 ++++++++++++------------- src/human.ts | 4 +-- src/posenet/poses.ts | 66 ++++++++++++++++++------------------ src/posenet/utils.ts | 1 - wiki | 2 +- 8 files changed, 70 insertions(+), 71 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9eba2565..cc00fad2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # @vladmandic/human -Version: **1.8.2** +Version: **1.8.3** Description: **Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition** Author: **Vladimir Mandic ** @@ -9,7 +9,10 @@ Repository: **** ## Changelog -### **HEAD -> main** 2021/05/04 mandic00@live.com +### **1.8.3** 2021/05/05 mandic00@live.com + + +### **origin/main** 2021/05/04 mandic00@live.com ### **1.8.2** 2021/05/04 mandic00@live.com diff --git a/demo/index.js b/demo/index.js index 341356e1..654b1414 100644 --- a/demo/index.js +++ b/demo/index.js @@ -9,11 +9,10 @@ import webRTC from './helpers/webrtc.js'; let human; const userConfig = { - warmup: 'full', + warmup: 'none', /* backend: 'webgl', async: true, - videoOptimized: false, filter: { enabled: false, @@ -36,10 +35,12 @@ const userConfig = { // ui options const ui = { - baseBackground: 'rgba(50, 50, 50, 1)', // 'grey' + // configurable items + console: true, // log messages to browser console crop: true, // video mode crop to size or leave full frame - columns: 2, // when processing sample images create this many columns facing: true, // camera facing front or back + baseBackground: 'rgba(50, 50, 50, 1)', // 'grey' + columns: 2, // when processing sample images create this many columns useWorker: false, // use web workers for processing worker: 'index-worker.js', samples: ['../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'], @@ -47,10 +48,10 @@ const ui = { useWebRTC: false, // use webrtc as camera source instead of local webcam webRTCServer: 'http://localhost:8002', webRTCStream: 'reowhite', - console: true, // log messages to browser console maxFPSframes: 10, // keep fps history for how many frames modelsPreload: true, // preload human models on startup modelsWarmup: true, // warmup human models on startup + // internal variables busy: false, // internal camera busy flag menuWidth: 0, // internal menuHeight: 0, // internal @@ -58,7 +59,7 @@ const ui = { detectFPS: [], // internal, holds fps values for detection performance drawFPS: [], // internal, holds fps values for draw performance buffered: true, // should output be buffered between frames - drawWarmup: true, // debug only, should warmup image processing be displayed on startup + drawWarmup: false, // debug only, should warmup image processing be displayed on startup drawThread: null, // internl, perform draw operations in a separate thread detectThread: null, // internl, perform detect operations in a separate thread framesDraw: 0, // internal, statistics on frames drawn diff --git a/src/config.ts b/src/config.ts index 75f41cae..0acaa61c 100644 --- a/src/config.ts +++ b/src/config.ts @@ -318,7 +318,7 @@ const config: Config = { // should skipFrames be reset immediately to force new detection cycle minConfidence: 0.1, // threshold for discarding a prediction iouThreshold: 0.1, // ammount of overlap between two detected objects before one object is removed - maxDetected: 1, // maximum number of hands detected in the input + maxDetected: 2, // maximum number of hands detected in the input // should be set to the minimum number for performance landmarks: true, // detect hand landmarks or just hand boundary box detector: { diff --git a/src/handpose/handpipeline.ts b/src/handpose/handpipeline.ts index fee268fd..991b7326 100644 --- a/src/handpose/handpipeline.ts +++ b/src/handpose/handpipeline.ts @@ -2,13 +2,11 @@ import * as tf from '../../dist/tfjs.esm.js'; import * as box from './box'; import * as util from './util'; -// const PALM_BOX_SHIFT_VECTOR = [0, -0.4]; -const PALM_BOX_ENLARGE_FACTOR = 5; // default 3 -// const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; // move detected hand box by x,y to ease landmark detection -const HAND_BOX_ENLARGE_FACTOR = 1.65; // default 1.65 -const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]; -const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0; -const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2; +const palmBoxEnlargeFactor = 5; // default 3 +const handBoxEnlargeFactor = 1.65; // default 1.65 +const palmLandmarkIds = [0, 5, 9, 13, 17, 1, 2]; +const palmLandmarksPalmBase = 0; +const palmLandmarksMiddleFingerBase = 2; export class HandPipeline { handDetector: any; @@ -27,20 +25,27 @@ export class HandPipeline { this.detectedHands = 0; } + // eslint-disable-next-line class-methods-use-this + calculateLandmarksBoundingBox(landmarks) { + const xs = landmarks.map((d) => d[0]); + const ys = landmarks.map((d) => d[1]); + const startPoint = [Math.min(...xs), Math.min(...ys)]; + const endPoint = [Math.max(...xs), Math.max(...ys)]; + return { startPoint, endPoint }; + } + getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) { const rotatedPalmLandmarks = palmLandmarks.map((coord) => util.rotatePoint([...coord, 1], rotationMatrix)); const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks); - // return box.enlargeBox(box.squarifyBox(box.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR); - return box.enlargeBox(box.squarifyBox(boxAroundPalm), PALM_BOX_ENLARGE_FACTOR); + return box.enlargeBox(box.squarifyBox(boxAroundPalm), palmBoxEnlargeFactor); } getBoxForHandLandmarks(landmarks) { const boundingBox = this.calculateLandmarksBoundingBox(landmarks); - // const boxAroundHand = box.enlargeBox(box.squarifyBox(box.shiftBox(boundingBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR); - const boxAroundHand = box.enlargeBox(box.squarifyBox(boundingBox), HAND_BOX_ENLARGE_FACTOR); + const boxAroundHand = box.enlargeBox(box.squarifyBox(boundingBox), handBoxEnlargeFactor); boxAroundHand.palmLandmarks = []; - for (let i = 0; i < PALM_LANDMARK_IDS.length; i++) { - boxAroundHand.palmLandmarks.push(landmarks[PALM_LANDMARK_IDS[i]].slice(0, 2)); + for (let i = 0; i < palmLandmarkIds.length; i++) { + boxAroundHand.palmLandmarks.push(landmarks[palmLandmarkIds[i]].slice(0, 2)); } return boxAroundHand; } @@ -98,7 +103,7 @@ export class HandPipeline { const currentBox = this.storedBoxes[i]; if (!currentBox) continue; if (config.hand.landmarks) { - const angle = config.hand.rotation ? util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]) : 0; + const angle = config.hand.rotation ? util.computeRotation(currentBox.palmLandmarks[palmLandmarksPalmBase], currentBox.palmLandmarks[palmLandmarksMiddleFingerBase]) : 0; const palmCenter = box.getBoxCenter(currentBox); const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]]; const rotatedImage = config.hand.rotation ? tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized) : image.clone(); @@ -131,8 +136,8 @@ export class HandPipeline { } keypoints.dispose(); } else { - // const enlarged = box.enlargeBox(box.squarifyBox(box.shiftBox(currentBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR); - const enlarged = box.enlargeBox(box.squarifyBox(currentBox), HAND_BOX_ENLARGE_FACTOR); + // const enlarged = box.enlargeBox(box.squarifyBox(box.shiftBox(currentBox, HAND_BOX_SHIFT_VECTOR)), handBoxEnlargeFactor); + const enlarged = box.enlargeBox(box.squarifyBox(currentBox), handBoxEnlargeFactor); const result = { confidence: currentBox.confidence, box: { topLeft: enlarged.startPoint, bottomRight: enlarged.endPoint }, @@ -144,13 +149,4 @@ export class HandPipeline { this.detectedHands = hands.length; return hands; } - - // eslint-disable-next-line class-methods-use-this - calculateLandmarksBoundingBox(landmarks) { - const xs = landmarks.map((d) => d[0]); - const ys = landmarks.map((d) => d[1]); - const startPoint = [Math.min(...xs), Math.min(...ys)]; - const endPoint = [Math.max(...xs), Math.max(...ys)]; - return { startPoint, endPoint }; - } } diff --git a/src/human.ts b/src/human.ts index af6a91ae..a720c2da 100644 --- a/src/human.ts +++ b/src/human.ts @@ -1,4 +1,6 @@ import { log, now, mergeDeep } from './helpers'; +import { Config, defaults } from './config'; +import { Result } from './result'; import * as sysinfo from './sysinfo'; import * as tf from '../dist/tfjs.esm.js'; import * as backend from './tfjs/backend'; @@ -13,8 +15,6 @@ import * as nanodet from './nanodet/nanodet'; import * as gesture from './gesture/gesture'; import * as image from './image/image'; import * as draw from './draw/draw'; -import { Config, defaults } from './config'; -import { Result } from './result'; import * as sample from './sample'; import * as app from '../package.json'; diff --git a/src/posenet/poses.ts b/src/posenet/poses.ts index 3ffd471b..e1a0dcb1 100644 --- a/src/posenet/poses.ts +++ b/src/posenet/poses.ts @@ -5,7 +5,7 @@ const localMaximumRadius = 1; const outputStride = 16; const squaredNmsRadius = 50 ** 2; -function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scoresBuffer, offsets, displacements, offsetRefineStep = 2) { +function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacements, offsetRefineStep = 2) { const getDisplacement = (point) => ({ y: displacements.get(point.y, point.x, edgeId), x: displacements.get(point.y, point.x, (displacements.shape[2] / 2) + edgeId), @@ -15,7 +15,7 @@ function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scor x: utils.clamp(Math.round(point.x / outputStride), 0, width - 1), }); - const [height, width] = scoresBuffer.shape; + const [height, width] = scores.shape; // Nearest neighbor interpolation for the source->target displacements. const sourceKeypointIndices = getStridedIndexNearPoint(sourceKeypoint.position, height, width); const displacement = getDisplacement(sourceKeypointIndices); @@ -23,49 +23,48 @@ function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scor let targetKeypoint = displacedPoint; for (let i = 0; i < offsetRefineStep; i++) { const targetKeypointIndices = getStridedIndexNearPoint(targetKeypoint, height, width); - const offsetPoint = utils.getOffsetPoint(targetKeypointIndices.y, targetKeypointIndices.x, targetKeypointId, offsets); - targetKeypoint = utils.addVectors({ - x: targetKeypointIndices.x * outputStride, - y: targetKeypointIndices.y * outputStride, - }, { x: offsetPoint.x, y: offsetPoint.y }); + const offsetPoint = utils.getOffsetPoint(targetKeypointIndices.y, targetKeypointIndices.x, targetId, offsets); + targetKeypoint = utils.addVectors( + { x: targetKeypointIndices.x * outputStride, y: targetKeypointIndices.y * outputStride }, + { x: offsetPoint.x, y: offsetPoint.y }, + ); } const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width); - const score = scoresBuffer.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetKeypointId); - return { position: targetKeypoint, part: kpt.partNames[targetKeypointId], score }; + const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId); + return { position: targetKeypoint, part: kpt.partNames[targetId], score }; } export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) { - const parentChildrenTuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]])); - const parentToChildEdges = parentChildrenTuples.map(([, childJointId]) => childJointId); - const childToParentEdges = parentChildrenTuples.map(([parentJointId]) => parentJointId); + const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]])); + const edgesFwd = tuples.map(([, childJointId]) => childJointId); + const edgesBwd = tuples.map(([parentJointId]) => parentJointId); const numParts = scores.shape[2]; // [21,21,17] - const numEdges = parentToChildEdges.length; - const instanceKeypoints = new Array(numParts); + const numEdges = edgesFwd.length; + const keypoints = new Array(numParts); // Start a new detection instance at the position of the root. - // const { part: rootPart, score: rootScore } = root; const rootPoint = utils.getImageCoords(root.part, outputStride, offsets); - instanceKeypoints[root.part.id] = { + keypoints[root.part.id] = { score: root.score, part: kpt.partNames[root.part.id], position: rootPoint, }; // Decode the part positions upwards in the tree, following the backward displacements. for (let edge = numEdges - 1; edge >= 0; --edge) { - const sourceKeypointId = parentToChildEdges[edge]; - const targetKeypointId = childToParentEdges[edge]; - if (instanceKeypoints[sourceKeypointId] && !instanceKeypoints[targetKeypointId]) { - instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, displacementsBwd); + const sourceId = edgesFwd[edge]; + const targetId = edgesBwd[edge]; + if (keypoints[sourceId] && !keypoints[targetId]) { + keypoints[targetId] = traverse(edge, keypoints[sourceId], targetId, scores, offsets, displacementsBwd); } } // Decode the part positions downwards in the tree, following the forward displacements. for (let edge = 0; edge < numEdges; ++edge) { - const sourceKeypointId = childToParentEdges[edge]; - const targetKeypointId = parentToChildEdges[edge]; - if (instanceKeypoints[sourceKeypointId] && !instanceKeypoints[targetKeypointId]) { - instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, displacementsFwd); + const sourceId = edgesBwd[edge]; + const targetId = edgesFwd[edge]; + if (keypoints[sourceId] && !keypoints[targetId]) { + keypoints[targetId] = traverse(edge, keypoints[sourceId], targetId, scores, offsets, displacementsFwd); } } - return instanceKeypoints; + return keypoints; } function scoreIsMaximumInLocalWindow(keypointId, score, heatmapY, heatmapX, scores) { @@ -106,31 +105,32 @@ export function buildPartWithScoreQueue(minConfidence, scores) { function withinRadius(poses, { x, y }, keypointId) { return poses.some(({ keypoints }) => { - const correspondingKeypoint = keypoints[keypointId].position; + const correspondingKeypoint = keypoints[keypointId]?.position; + if (!correspondingKeypoint) return false; return utils.squaredDistance(y, x, correspondingKeypoint.y, correspondingKeypoint.x) <= squaredNmsRadius; }); } -function getInstanceScore(existingPoses, instanceKeypoints) { - const notOverlappedKeypointScores = instanceKeypoints.reduce((result, { position, score }, keypointId) => { +function getInstanceScore(existingPoses, keypoints) { + const notOverlappedKeypointScores = keypoints.reduce((result, { position, score }, keypointId) => { if (!withinRadius(existingPoses, position, keypointId)) result += score; return result; }, 0.0); - return notOverlappedKeypointScores / instanceKeypoints.length; + return notOverlappedKeypointScores / keypoints.length; } -export function decode(offsetsBuffer, scoresBuffer, displacementsFwdBuffer, displacementsBwdBuffer, maxDetected, minConfidence) { +export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxDetected, minConfidence) { const poses: Array<{ keypoints: any, box: any, score: number }> = []; - const queue = buildPartWithScoreQueue(minConfidence, scoresBuffer); + const queue = buildPartWithScoreQueue(minConfidence, scores); // Generate at most maxDetected object instances per image in decreasing root part score order. while (poses.length < maxDetected && !queue.empty()) { // The top element in the queue is the next root candidate. const root = queue.dequeue(); // Part-based non-maximum suppression: We reject a root candidate if it is within a disk of `nmsRadius` pixels from the corresponding part of a previously detected instance. - const rootImageCoords = utils.getImageCoords(root.part, outputStride, offsetsBuffer); + const rootImageCoords = utils.getImageCoords(root.part, outputStride, offsets); if (withinRadius(poses, rootImageCoords, root.part.id)) continue; // Else start a new detection instance at the position of the root. - let keypoints = decodePose(root, scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer); + let keypoints = decodePose(root, scores, offsets, displacementsFwd, displacementsBwd); keypoints = keypoints.filter((a) => a.score > minConfidence); const score = getInstanceScore(poses, keypoints); const box = utils.getBoundingBox(keypoints); diff --git a/src/posenet/utils.ts b/src/posenet/utils.ts index cf63df9b..881cb1fd 100644 --- a/src/posenet/utils.ts +++ b/src/posenet/utils.ts @@ -39,7 +39,6 @@ export function scalePoses(poses, [height, width], [inputResolutionHeight, input position: { x: Math.trunc(position.x * scaleX), y: Math.trunc(position.y * scaleY) }, })), }); - const scaledPoses = poses.map((pose) => scalePose(pose, height / inputResolutionHeight, width / inputResolutionWidth)); return scaledPoses; } diff --git a/wiki b/wiki index f9fc8596..981e1452 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit f9fc859622290183712e7f984db59e3c8494afe9 +Subproject commit 981e14523d86586926d9d1141b04e652decafe5f