mirror of https://github.com/vladmandic/human
update & fix posenet
parent
5546580eda
commit
48351b1539
|
@ -1,6 +1,6 @@
|
|||
# @vladmandic/human
|
||||
|
||||
Version: **1.8.2**
|
||||
Version: **1.8.3**
|
||||
Description: **Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition**
|
||||
|
||||
Author: **Vladimir Mandic <mandic00@live.com>**
|
||||
|
@ -9,7 +9,10 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
|
|||
|
||||
## Changelog
|
||||
|
||||
### **HEAD -> main** 2021/05/04 mandic00@live.com
|
||||
### **1.8.3** 2021/05/05 mandic00@live.com
|
||||
|
||||
|
||||
### **origin/main** 2021/05/04 mandic00@live.com
|
||||
|
||||
|
||||
### **1.8.2** 2021/05/04 mandic00@live.com
|
||||
|
|
|
@ -9,11 +9,10 @@ import webRTC from './helpers/webrtc.js';
|
|||
let human;
|
||||
|
||||
const userConfig = {
|
||||
warmup: 'full',
|
||||
warmup: 'none',
|
||||
/*
|
||||
backend: 'webgl',
|
||||
async: true,
|
||||
|
||||
videoOptimized: false,
|
||||
filter: {
|
||||
enabled: false,
|
||||
|
@ -36,10 +35,12 @@ const userConfig = {
|
|||
|
||||
// ui options
|
||||
const ui = {
|
||||
baseBackground: 'rgba(50, 50, 50, 1)', // 'grey'
|
||||
// configurable items
|
||||
console: true, // log messages to browser console
|
||||
crop: true, // video mode crop to size or leave full frame
|
||||
columns: 2, // when processing sample images create this many columns
|
||||
facing: true, // camera facing front or back
|
||||
baseBackground: 'rgba(50, 50, 50, 1)', // 'grey'
|
||||
columns: 2, // when processing sample images create this many columns
|
||||
useWorker: false, // use web workers for processing
|
||||
worker: 'index-worker.js',
|
||||
samples: ['../assets/sample6.jpg', '../assets/sample1.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample3.jpg', '../assets/sample2.jpg'],
|
||||
|
@ -47,10 +48,10 @@ const ui = {
|
|||
useWebRTC: false, // use webrtc as camera source instead of local webcam
|
||||
webRTCServer: 'http://localhost:8002',
|
||||
webRTCStream: 'reowhite',
|
||||
console: true, // log messages to browser console
|
||||
maxFPSframes: 10, // keep fps history for how many frames
|
||||
modelsPreload: true, // preload human models on startup
|
||||
modelsWarmup: true, // warmup human models on startup
|
||||
// internal variables
|
||||
busy: false, // internal camera busy flag
|
||||
menuWidth: 0, // internal
|
||||
menuHeight: 0, // internal
|
||||
|
@ -58,7 +59,7 @@ const ui = {
|
|||
detectFPS: [], // internal, holds fps values for detection performance
|
||||
drawFPS: [], // internal, holds fps values for draw performance
|
||||
buffered: true, // should output be buffered between frames
|
||||
drawWarmup: true, // debug only, should warmup image processing be displayed on startup
|
||||
drawWarmup: false, // debug only, should warmup image processing be displayed on startup
|
||||
drawThread: null, // internl, perform draw operations in a separate thread
|
||||
detectThread: null, // internl, perform detect operations in a separate thread
|
||||
framesDraw: 0, // internal, statistics on frames drawn
|
||||
|
|
|
@ -318,7 +318,7 @@ const config: Config = {
|
|||
// should skipFrames be reset immediately to force new detection cycle
|
||||
minConfidence: 0.1, // threshold for discarding a prediction
|
||||
iouThreshold: 0.1, // ammount of overlap between two detected objects before one object is removed
|
||||
maxDetected: 1, // maximum number of hands detected in the input
|
||||
maxDetected: 2, // maximum number of hands detected in the input
|
||||
// should be set to the minimum number for performance
|
||||
landmarks: true, // detect hand landmarks or just hand boundary box
|
||||
detector: {
|
||||
|
|
|
@ -2,13 +2,11 @@ import * as tf from '../../dist/tfjs.esm.js';
|
|||
import * as box from './box';
|
||||
import * as util from './util';
|
||||
|
||||
// const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
|
||||
const PALM_BOX_ENLARGE_FACTOR = 5; // default 3
|
||||
// const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; // move detected hand box by x,y to ease landmark detection
|
||||
const HAND_BOX_ENLARGE_FACTOR = 1.65; // default 1.65
|
||||
const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
|
||||
const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
|
||||
const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
|
||||
const palmBoxEnlargeFactor = 5; // default 3
|
||||
const handBoxEnlargeFactor = 1.65; // default 1.65
|
||||
const palmLandmarkIds = [0, 5, 9, 13, 17, 1, 2];
|
||||
const palmLandmarksPalmBase = 0;
|
||||
const palmLandmarksMiddleFingerBase = 2;
|
||||
|
||||
export class HandPipeline {
|
||||
handDetector: any;
|
||||
|
@ -27,20 +25,27 @@ export class HandPipeline {
|
|||
this.detectedHands = 0;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
calculateLandmarksBoundingBox(landmarks) {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
const startPoint = [Math.min(...xs), Math.min(...ys)];
|
||||
const endPoint = [Math.max(...xs), Math.max(...ys)];
|
||||
return { startPoint, endPoint };
|
||||
}
|
||||
|
||||
getBoxForPalmLandmarks(palmLandmarks, rotationMatrix) {
|
||||
const rotatedPalmLandmarks = palmLandmarks.map((coord) => util.rotatePoint([...coord, 1], rotationMatrix));
|
||||
const boxAroundPalm = this.calculateLandmarksBoundingBox(rotatedPalmLandmarks);
|
||||
// return box.enlargeBox(box.squarifyBox(box.shiftBox(boxAroundPalm, PALM_BOX_SHIFT_VECTOR)), PALM_BOX_ENLARGE_FACTOR);
|
||||
return box.enlargeBox(box.squarifyBox(boxAroundPalm), PALM_BOX_ENLARGE_FACTOR);
|
||||
return box.enlargeBox(box.squarifyBox(boxAroundPalm), palmBoxEnlargeFactor);
|
||||
}
|
||||
|
||||
getBoxForHandLandmarks(landmarks) {
|
||||
const boundingBox = this.calculateLandmarksBoundingBox(landmarks);
|
||||
// const boxAroundHand = box.enlargeBox(box.squarifyBox(box.shiftBox(boundingBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR);
|
||||
const boxAroundHand = box.enlargeBox(box.squarifyBox(boundingBox), HAND_BOX_ENLARGE_FACTOR);
|
||||
const boxAroundHand = box.enlargeBox(box.squarifyBox(boundingBox), handBoxEnlargeFactor);
|
||||
boxAroundHand.palmLandmarks = [];
|
||||
for (let i = 0; i < PALM_LANDMARK_IDS.length; i++) {
|
||||
boxAroundHand.palmLandmarks.push(landmarks[PALM_LANDMARK_IDS[i]].slice(0, 2));
|
||||
for (let i = 0; i < palmLandmarkIds.length; i++) {
|
||||
boxAroundHand.palmLandmarks.push(landmarks[palmLandmarkIds[i]].slice(0, 2));
|
||||
}
|
||||
return boxAroundHand;
|
||||
}
|
||||
|
@ -98,7 +103,7 @@ export class HandPipeline {
|
|||
const currentBox = this.storedBoxes[i];
|
||||
if (!currentBox) continue;
|
||||
if (config.hand.landmarks) {
|
||||
const angle = config.hand.rotation ? util.computeRotation(currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_PALM_BASE], currentBox.palmLandmarks[PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]) : 0;
|
||||
const angle = config.hand.rotation ? util.computeRotation(currentBox.palmLandmarks[palmLandmarksPalmBase], currentBox.palmLandmarks[palmLandmarksMiddleFingerBase]) : 0;
|
||||
const palmCenter = box.getBoxCenter(currentBox);
|
||||
const palmCenterNormalized = [palmCenter[0] / image.shape[2], palmCenter[1] / image.shape[1]];
|
||||
const rotatedImage = config.hand.rotation ? tf.image.rotateWithOffset(image, angle, 0, palmCenterNormalized) : image.clone();
|
||||
|
@ -131,8 +136,8 @@ export class HandPipeline {
|
|||
}
|
||||
keypoints.dispose();
|
||||
} else {
|
||||
// const enlarged = box.enlargeBox(box.squarifyBox(box.shiftBox(currentBox, HAND_BOX_SHIFT_VECTOR)), HAND_BOX_ENLARGE_FACTOR);
|
||||
const enlarged = box.enlargeBox(box.squarifyBox(currentBox), HAND_BOX_ENLARGE_FACTOR);
|
||||
// const enlarged = box.enlargeBox(box.squarifyBox(box.shiftBox(currentBox, HAND_BOX_SHIFT_VECTOR)), handBoxEnlargeFactor);
|
||||
const enlarged = box.enlargeBox(box.squarifyBox(currentBox), handBoxEnlargeFactor);
|
||||
const result = {
|
||||
confidence: currentBox.confidence,
|
||||
box: { topLeft: enlarged.startPoint, bottomRight: enlarged.endPoint },
|
||||
|
@ -144,13 +149,4 @@ export class HandPipeline {
|
|||
this.detectedHands = hands.length;
|
||||
return hands;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
calculateLandmarksBoundingBox(landmarks) {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
const startPoint = [Math.min(...xs), Math.min(...ys)];
|
||||
const endPoint = [Math.max(...xs), Math.max(...ys)];
|
||||
return { startPoint, endPoint };
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
import { log, now, mergeDeep } from './helpers';
|
||||
import { Config, defaults } from './config';
|
||||
import { Result } from './result';
|
||||
import * as sysinfo from './sysinfo';
|
||||
import * as tf from '../dist/tfjs.esm.js';
|
||||
import * as backend from './tfjs/backend';
|
||||
|
@ -13,8 +15,6 @@ import * as nanodet from './nanodet/nanodet';
|
|||
import * as gesture from './gesture/gesture';
|
||||
import * as image from './image/image';
|
||||
import * as draw from './draw/draw';
|
||||
import { Config, defaults } from './config';
|
||||
import { Result } from './result';
|
||||
import * as sample from './sample';
|
||||
import * as app from '../package.json';
|
||||
|
||||
|
|
|
@ -5,7 +5,7 @@ const localMaximumRadius = 1;
|
|||
const outputStride = 16;
|
||||
const squaredNmsRadius = 50 ** 2;
|
||||
|
||||
function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scoresBuffer, offsets, displacements, offsetRefineStep = 2) {
|
||||
function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacements, offsetRefineStep = 2) {
|
||||
const getDisplacement = (point) => ({
|
||||
y: displacements.get(point.y, point.x, edgeId),
|
||||
x: displacements.get(point.y, point.x, (displacements.shape[2] / 2) + edgeId),
|
||||
|
@ -15,7 +15,7 @@ function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scor
|
|||
x: utils.clamp(Math.round(point.x / outputStride), 0, width - 1),
|
||||
});
|
||||
|
||||
const [height, width] = scoresBuffer.shape;
|
||||
const [height, width] = scores.shape;
|
||||
// Nearest neighbor interpolation for the source->target displacements.
|
||||
const sourceKeypointIndices = getStridedIndexNearPoint(sourceKeypoint.position, height, width);
|
||||
const displacement = getDisplacement(sourceKeypointIndices);
|
||||
|
@ -23,49 +23,48 @@ function traverseToTargetKeypoint(edgeId, sourceKeypoint, targetKeypointId, scor
|
|||
let targetKeypoint = displacedPoint;
|
||||
for (let i = 0; i < offsetRefineStep; i++) {
|
||||
const targetKeypointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
|
||||
const offsetPoint = utils.getOffsetPoint(targetKeypointIndices.y, targetKeypointIndices.x, targetKeypointId, offsets);
|
||||
targetKeypoint = utils.addVectors({
|
||||
x: targetKeypointIndices.x * outputStride,
|
||||
y: targetKeypointIndices.y * outputStride,
|
||||
}, { x: offsetPoint.x, y: offsetPoint.y });
|
||||
const offsetPoint = utils.getOffsetPoint(targetKeypointIndices.y, targetKeypointIndices.x, targetId, offsets);
|
||||
targetKeypoint = utils.addVectors(
|
||||
{ x: targetKeypointIndices.x * outputStride, y: targetKeypointIndices.y * outputStride },
|
||||
{ x: offsetPoint.x, y: offsetPoint.y },
|
||||
);
|
||||
}
|
||||
const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
|
||||
const score = scoresBuffer.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetKeypointId);
|
||||
return { position: targetKeypoint, part: kpt.partNames[targetKeypointId], score };
|
||||
const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId);
|
||||
return { position: targetKeypoint, part: kpt.partNames[targetId], score };
|
||||
}
|
||||
|
||||
export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) {
|
||||
const parentChildrenTuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
|
||||
const parentToChildEdges = parentChildrenTuples.map(([, childJointId]) => childJointId);
|
||||
const childToParentEdges = parentChildrenTuples.map(([parentJointId]) => parentJointId);
|
||||
const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
|
||||
const edgesFwd = tuples.map(([, childJointId]) => childJointId);
|
||||
const edgesBwd = tuples.map(([parentJointId]) => parentJointId);
|
||||
const numParts = scores.shape[2]; // [21,21,17]
|
||||
const numEdges = parentToChildEdges.length;
|
||||
const instanceKeypoints = new Array(numParts);
|
||||
const numEdges = edgesFwd.length;
|
||||
const keypoints = new Array(numParts);
|
||||
// Start a new detection instance at the position of the root.
|
||||
// const { part: rootPart, score: rootScore } = root;
|
||||
const rootPoint = utils.getImageCoords(root.part, outputStride, offsets);
|
||||
instanceKeypoints[root.part.id] = {
|
||||
keypoints[root.part.id] = {
|
||||
score: root.score,
|
||||
part: kpt.partNames[root.part.id],
|
||||
position: rootPoint,
|
||||
};
|
||||
// Decode the part positions upwards in the tree, following the backward displacements.
|
||||
for (let edge = numEdges - 1; edge >= 0; --edge) {
|
||||
const sourceKeypointId = parentToChildEdges[edge];
|
||||
const targetKeypointId = childToParentEdges[edge];
|
||||
if (instanceKeypoints[sourceKeypointId] && !instanceKeypoints[targetKeypointId]) {
|
||||
instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, displacementsBwd);
|
||||
const sourceId = edgesFwd[edge];
|
||||
const targetId = edgesBwd[edge];
|
||||
if (keypoints[sourceId] && !keypoints[targetId]) {
|
||||
keypoints[targetId] = traverse(edge, keypoints[sourceId], targetId, scores, offsets, displacementsBwd);
|
||||
}
|
||||
}
|
||||
// Decode the part positions downwards in the tree, following the forward displacements.
|
||||
for (let edge = 0; edge < numEdges; ++edge) {
|
||||
const sourceKeypointId = childToParentEdges[edge];
|
||||
const targetKeypointId = parentToChildEdges[edge];
|
||||
if (instanceKeypoints[sourceKeypointId] && !instanceKeypoints[targetKeypointId]) {
|
||||
instanceKeypoints[targetKeypointId] = traverseToTargetKeypoint(edge, instanceKeypoints[sourceKeypointId], targetKeypointId, scores, offsets, displacementsFwd);
|
||||
const sourceId = edgesBwd[edge];
|
||||
const targetId = edgesFwd[edge];
|
||||
if (keypoints[sourceId] && !keypoints[targetId]) {
|
||||
keypoints[targetId] = traverse(edge, keypoints[sourceId], targetId, scores, offsets, displacementsFwd);
|
||||
}
|
||||
}
|
||||
return instanceKeypoints;
|
||||
return keypoints;
|
||||
}
|
||||
|
||||
function scoreIsMaximumInLocalWindow(keypointId, score, heatmapY, heatmapX, scores) {
|
||||
|
@ -106,31 +105,32 @@ export function buildPartWithScoreQueue(minConfidence, scores) {
|
|||
|
||||
function withinRadius(poses, { x, y }, keypointId) {
|
||||
return poses.some(({ keypoints }) => {
|
||||
const correspondingKeypoint = keypoints[keypointId].position;
|
||||
const correspondingKeypoint = keypoints[keypointId]?.position;
|
||||
if (!correspondingKeypoint) return false;
|
||||
return utils.squaredDistance(y, x, correspondingKeypoint.y, correspondingKeypoint.x) <= squaredNmsRadius;
|
||||
});
|
||||
}
|
||||
|
||||
function getInstanceScore(existingPoses, instanceKeypoints) {
|
||||
const notOverlappedKeypointScores = instanceKeypoints.reduce((result, { position, score }, keypointId) => {
|
||||
function getInstanceScore(existingPoses, keypoints) {
|
||||
const notOverlappedKeypointScores = keypoints.reduce((result, { position, score }, keypointId) => {
|
||||
if (!withinRadius(existingPoses, position, keypointId)) result += score;
|
||||
return result;
|
||||
}, 0.0);
|
||||
return notOverlappedKeypointScores / instanceKeypoints.length;
|
||||
return notOverlappedKeypointScores / keypoints.length;
|
||||
}
|
||||
|
||||
export function decode(offsetsBuffer, scoresBuffer, displacementsFwdBuffer, displacementsBwdBuffer, maxDetected, minConfidence) {
|
||||
export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxDetected, minConfidence) {
|
||||
const poses: Array<{ keypoints: any, box: any, score: number }> = [];
|
||||
const queue = buildPartWithScoreQueue(minConfidence, scoresBuffer);
|
||||
const queue = buildPartWithScoreQueue(minConfidence, scores);
|
||||
// Generate at most maxDetected object instances per image in decreasing root part score order.
|
||||
while (poses.length < maxDetected && !queue.empty()) {
|
||||
// The top element in the queue is the next root candidate.
|
||||
const root = queue.dequeue();
|
||||
// Part-based non-maximum suppression: We reject a root candidate if it is within a disk of `nmsRadius` pixels from the corresponding part of a previously detected instance.
|
||||
const rootImageCoords = utils.getImageCoords(root.part, outputStride, offsetsBuffer);
|
||||
const rootImageCoords = utils.getImageCoords(root.part, outputStride, offsets);
|
||||
if (withinRadius(poses, rootImageCoords, root.part.id)) continue;
|
||||
// Else start a new detection instance at the position of the root.
|
||||
let keypoints = decodePose(root, scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer);
|
||||
let keypoints = decodePose(root, scores, offsets, displacementsFwd, displacementsBwd);
|
||||
keypoints = keypoints.filter((a) => a.score > minConfidence);
|
||||
const score = getInstanceScore(poses, keypoints);
|
||||
const box = utils.getBoundingBox(keypoints);
|
||||
|
|
|
@ -39,7 +39,6 @@ export function scalePoses(poses, [height, width], [inputResolutionHeight, input
|
|||
position: { x: Math.trunc(position.x * scaleX), y: Math.trunc(position.y * scaleY) },
|
||||
})),
|
||||
});
|
||||
|
||||
const scaledPoses = poses.map((pose) => scalePose(pose, height / inputResolutionHeight, width / inputResolutionWidth));
|
||||
return scaledPoses;
|
||||
}
|
||||
|
|
2
wiki
2
wiki
|
@ -1 +1 @@
|
|||
Subproject commit f9fc859622290183712e7f984db59e3c8494afe9
|
||||
Subproject commit 981e14523d86586926d9d1141b04e652decafe5f
|
Loading…
Reference in New Issue