mirror of https://github.com/vladmandic/human
enhanced movenet postprocessing
parent
86a4cedf81
commit
cd35d733d9
|
@ -11,6 +11,7 @@
|
|||
|
||||
### **HEAD -> main** 2021/10/13 mandic00@live.com
|
||||
|
||||
- use transferrable buffer for worker messages
|
||||
- add optional anti-spoofing module
|
||||
- add node-match advanced example using worker thread pool
|
||||
- package updates
|
||||
|
|
|
@ -32,7 +32,7 @@ let human;
|
|||
|
||||
let userConfig = {
|
||||
// face: { enabled: false },
|
||||
// body: { enabled: false },
|
||||
// body: { enabled: true },
|
||||
// hand: { enabled: false },
|
||||
/*
|
||||
warmup: 'none',
|
||||
|
|
|
@ -66,15 +66,15 @@
|
|||
"@tensorflow/tfjs-layers": "^3.9.0",
|
||||
"@tensorflow/tfjs-node": "^3.9.0",
|
||||
"@tensorflow/tfjs-node-gpu": "^3.9.0",
|
||||
"@types/node": "^16.10.5",
|
||||
"@types/node": "^16.10.9",
|
||||
"@typescript-eslint/eslint-plugin": "^5.0.0",
|
||||
"@typescript-eslint/parser": "^5.0.0",
|
||||
"@vladmandic/build": "^0.6.0",
|
||||
"@vladmandic/pilogger": "^0.3.3",
|
||||
"canvas": "^2.8.0",
|
||||
"dayjs": "^1.10.7",
|
||||
"esbuild": "^0.13.5",
|
||||
"eslint": "8.0.0",
|
||||
"esbuild": "^0.13.6",
|
||||
"eslint": "8.0.1",
|
||||
"eslint-config-airbnb-base": "^14.2.1",
|
||||
"eslint-plugin-import": "^2.25.2",
|
||||
"eslint-plugin-json": "^3.1.0",
|
||||
|
|
|
@ -8,6 +8,7 @@ import { log, join } from '../util/util';
|
|||
import * as box from '../util/box';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as coords from './movenetcoords';
|
||||
import * as fix from './movenetfix';
|
||||
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
|
||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
|
@ -16,19 +17,17 @@ import { env } from '../util/env';
|
|||
|
||||
let model: GraphModel | null;
|
||||
let inputSize = 0;
|
||||
const boxExpandFact = 1.5; // increase to 150%
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
// const boxExpandFact = 1.5; // increase to 150%
|
||||
|
||||
const cache: {
|
||||
boxes: Array<Box>,
|
||||
boxes: Array<Box>, // unused
|
||||
bodies: Array<BodyResult>;
|
||||
} = {
|
||||
boxes: [],
|
||||
bodies: [],
|
||||
};
|
||||
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
const keypoints: Array<BodyKeypoint> = [];
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
|
@ -42,23 +41,9 @@ export async function load(config: Config): Promise<GraphModel> {
|
|||
return model;
|
||||
}
|
||||
|
||||
function fixSides() { // model sometimes mixes up left vs right keypoints so we fix them
|
||||
for (const pair of coords.pairs) {
|
||||
let left = keypoints.find((kp) => kp.part === pair[0]);
|
||||
let right = keypoints.find((kp) => kp.part === pair[1]);
|
||||
if (left && right) {
|
||||
if (left.position[0] > right.position[0]) {
|
||||
const tmp = left;
|
||||
left = right;
|
||||
right = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function parseSinglePose(res, config, image, inputBox) {
|
||||
const kpt = res[0][0];
|
||||
keypoints.length = 0;
|
||||
const keypoints: Array<BodyKeypoint> = [];
|
||||
let score = 0;
|
||||
for (let id = 0; id < kpt.length; id++) {
|
||||
score = kpt[id][2];
|
||||
|
@ -78,7 +63,6 @@ async function parseSinglePose(res, config, image, inputBox) {
|
|||
});
|
||||
}
|
||||
}
|
||||
fixSides();
|
||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||
const bodies: Array<BodyResult> = [];
|
||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||
|
@ -92,7 +76,9 @@ async function parseSinglePose(res, config, image, inputBox) {
|
|||
}
|
||||
annotations[name] = pt;
|
||||
}
|
||||
bodies.push({ id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations });
|
||||
const body: BodyResult = { id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations };
|
||||
fix.bodyParts(body);
|
||||
bodies.push(body);
|
||||
return bodies;
|
||||
}
|
||||
|
||||
|
@ -102,7 +88,7 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
const kpt = res[0][id];
|
||||
const totalScore = Math.round(100 * kpt[51 + 4]) / 100;
|
||||
if (totalScore > config.body.minConfidence) {
|
||||
keypoints.length = 0;
|
||||
const keypoints: Array<BodyKeypoint> = [];
|
||||
for (let i = 0; i < 17; i++) {
|
||||
const score = kpt[3 * i + 2];
|
||||
if (score > config.body.minConfidence) {
|
||||
|
@ -118,7 +104,6 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
});
|
||||
}
|
||||
}
|
||||
fixSides();
|
||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||
// movenet-multipose has built-in box details
|
||||
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
|
||||
|
@ -133,7 +118,9 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
}
|
||||
annotations[name] = pt;
|
||||
}
|
||||
bodies.push({ id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations });
|
||||
const body: BodyResult = { id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations };
|
||||
fix.bodyParts(body);
|
||||
bodies.push(body);
|
||||
}
|
||||
}
|
||||
bodies.sort((a, b) => b.score - a.score);
|
||||
|
@ -158,11 +145,14 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
|
|||
return new Promise(async (resolve) => {
|
||||
const t: Record<string, Tensor> = {};
|
||||
skipped = 0;
|
||||
// run detection on squared input and cached boxes
|
||||
/*
|
||||
cache.bodies = []; // reset bodies result
|
||||
if (cache.boxes.length >= (config.body.maxDetected || 0)) { // if we have enough cached boxes run detection using cache
|
||||
for (let i = 0; i < cache.boxes.length; i++) { // run detection based on cached boxes
|
||||
t.crop = tf.image.cropAndResize(input, [cache.boxes[i]], [0], [inputSize, inputSize], 'bilinear');
|
||||
t.cast = tf.cast(t.crop, 'int32');
|
||||
// t.input = prepareImage(input);
|
||||
t.res = await model?.predict(t.cast) as Tensor;
|
||||
const res = await t.res.array();
|
||||
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cache.boxes[i]) : await parseMultiPose(res, config, input, cache.boxes[i]);
|
||||
|
@ -171,11 +161,11 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
|
|||
}
|
||||
}
|
||||
if (cache.bodies.length !== config.body.maxDetected) { // did not find enough bodies based on cached boxes so run detection on full frame
|
||||
t.resized = tf.image.resizeBilinear(input, [inputSize, inputSize], false);
|
||||
t.cast = tf.cast(t.resized, 'int32');
|
||||
t.res = await model?.predict(t.cast) as Tensor;
|
||||
t.input = prepareImage(input);
|
||||
t.res = await model?.predict(t.input) as Tensor;
|
||||
const res = await t.res.array();
|
||||
cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
||||
for (const body of cache.bodies) rescaleBody(body, [input.shape[2] || 1, input.shape[1] || 1]);
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
}
|
||||
cache.boxes.length = 0; // reset cache
|
||||
|
@ -186,6 +176,21 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
|
|||
cache.boxes.push(cropBox);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// run detection on squared input and no cached boxes
|
||||
t.input = fix.padInput(input, inputSize);
|
||||
t.res = await model?.predict(t.input) as Tensor;
|
||||
const res = await t.res.array();
|
||||
cache.bodies = (t.res.shape[2] === 17)
|
||||
? await parseSinglePose(res, config, input, [0, 0, 1, 1])
|
||||
: await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
||||
for (const body of cache.bodies) {
|
||||
fix.rescaleBody(body, [input.shape[2] || 1, input.shape[1] || 1]);
|
||||
fix.jitter(body.keypoints);
|
||||
}
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
|
||||
resolve(cache.bodies);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
export const kpt: Array<string> = [
|
||||
export const kpt: Array<string> = [ // used to create part labels
|
||||
'nose',
|
||||
'leftEye',
|
||||
'rightEye',
|
||||
|
@ -18,7 +18,7 @@ export const kpt: Array<string> = [
|
|||
'rightAnkle',
|
||||
];
|
||||
|
||||
export const pairs: Array<string[]> = [
|
||||
export const horizontal: Array<string[]> = [ // used to fix left vs right
|
||||
['leftEye', 'rightEye'],
|
||||
['leftEar', 'rightEar'],
|
||||
['leftShoulder', 'rightShoulder'],
|
||||
|
@ -29,7 +29,19 @@ export const pairs: Array<string[]> = [
|
|||
['leftAnkle', 'rightAnkle'],
|
||||
];
|
||||
|
||||
export const connected: Record<string, string[]> = {
|
||||
export const vertical: Array<string[]> = [ // used to remove unlikely keypoint positions
|
||||
['leftKnee', 'leftShoulder'],
|
||||
['rightKnee', 'rightShoulder'],
|
||||
['leftAnkle', 'leftKnee'],
|
||||
['rightAnkle', 'rightKnee'],
|
||||
];
|
||||
|
||||
export const relative: Array<string[][]> = [ // used to match relative body parts
|
||||
[['leftHip', 'rightHip'], ['leftShoulder', 'rightShoulder']],
|
||||
[['leftElbow', 'rightElbow'], ['leftShoulder', 'rightShoulder']],
|
||||
];
|
||||
|
||||
export const connected: Record<string, string[]> = { // used to create body outline in annotations
|
||||
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
|
||||
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
|
||||
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
import type { BodyKeypoint, BodyResult } from '../result';
|
||||
import * as box from '../util/box';
|
||||
import * as coords from './movenetcoords';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { Tensor } from '../tfjs/types';
|
||||
|
||||
const maxJitter = 0.005; // default allowed jitter is within 0.5%
|
||||
|
||||
const cache: {
|
||||
keypoints: Array<BodyKeypoint>,
|
||||
padding: [number, number][];
|
||||
} = {
|
||||
keypoints: [],
|
||||
padding: [[0, 0], [0, 0], [0, 0], [0, 0]],
|
||||
};
|
||||
|
||||
export function bodyParts(body: BodyResult) { // model sometimes mixes up left vs right keypoints so we fix them
|
||||
for (const pair of coords.horizontal) { // fix body parts left vs right
|
||||
const left = body.keypoints.findIndex((kp) => kp.part === pair[0]);
|
||||
const right = body.keypoints.findIndex((kp) => kp.part === pair[1]);
|
||||
if (body.keypoints[left] && body.keypoints[right]) {
|
||||
if (body.keypoints[left].position[0] < body.keypoints[right].position[0]) {
|
||||
const tmp = body.keypoints[left];
|
||||
body.keypoints[left] = body.keypoints[right];
|
||||
body.keypoints[right] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const pair of coords.vertical) { // remove body parts with improbable vertical position
|
||||
const lower = body.keypoints.findIndex((kp) => (kp && kp.part === pair[0]));
|
||||
const higher = body.keypoints.findIndex((kp) => (kp && kp.part === pair[1]));
|
||||
if (body.keypoints[lower] && body.keypoints[higher]) {
|
||||
if (body.keypoints[lower].position[1] < body.keypoints[higher].position[1]) {
|
||||
body.keypoints.splice(lower, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const [pair, compare] of coords.relative) { // rearrange body parts according to their relative position
|
||||
const left = body.keypoints.findIndex((kp) => (kp && kp.part === pair[0]));
|
||||
const right = body.keypoints.findIndex((kp) => (kp && kp.part === pair[1]));
|
||||
const leftTo = body.keypoints.findIndex((kp) => (kp && kp.part === compare[0]));
|
||||
const rightTo = body.keypoints.findIndex((kp) => (kp && kp.part === compare[1]));
|
||||
if (!body.keypoints[leftTo] || !body.keypoints[rightTo]) continue; // only if we have both compare points
|
||||
const distanceLeft = body.keypoints[left] ? [
|
||||
Math.abs(body.keypoints[leftTo].position[0] - body.keypoints[left].position[0]),
|
||||
Math.abs(body.keypoints[rightTo].position[0] - body.keypoints[left].position[0]),
|
||||
] : [0, 0];
|
||||
const distanceRight = body.keypoints[right] ? [
|
||||
Math.abs(body.keypoints[rightTo].position[0] - body.keypoints[right].position[0]),
|
||||
Math.abs(body.keypoints[leftTo].position[0] - body.keypoints[right].position[0]),
|
||||
] : [0, 0];
|
||||
if (distanceLeft[0] > distanceLeft[1] || distanceRight[0] > distanceRight[1]) { // should flip keypoints
|
||||
const tmp = body.keypoints[left];
|
||||
body.keypoints[left] = body.keypoints[right];
|
||||
body.keypoints[right] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function jitter(keypoints: Array<BodyKeypoint>): Array<BodyKeypoint> {
|
||||
for (let i = 0; i < keypoints.length; i++) {
|
||||
if (keypoints[i] && cache.keypoints[i]) {
|
||||
const diff = [Math.abs(keypoints[i].positionRaw[0] - cache.keypoints[i].positionRaw[0]), Math.abs(keypoints[i].positionRaw[1] - cache.keypoints[i].positionRaw[1])];
|
||||
if (diff[0] < maxJitter && diff[1] < maxJitter) {
|
||||
keypoints[i] = cache.keypoints[i]; // below jitter so replace keypoint
|
||||
} else {
|
||||
cache.keypoints[i] = keypoints[i]; // above jitter so update cache
|
||||
}
|
||||
} else {
|
||||
cache.keypoints[i] = keypoints[i]; // cache for keypoint doesnt exist so create it here
|
||||
}
|
||||
}
|
||||
return keypoints;
|
||||
}
|
||||
|
||||
export function padInput(input: Tensor, inputSize: number): Tensor {
|
||||
const t: Record<string, Tensor> = {};
|
||||
if (!input.shape || !input.shape[1] || !input.shape[2]) return input;
|
||||
cache.padding = [
|
||||
[0, 0], // dont touch batch
|
||||
[input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0], // height before&after
|
||||
[input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0], // width before&after
|
||||
[0, 0], // dont touch rbg
|
||||
];
|
||||
t.pad = tf.pad(input, cache.padding);
|
||||
t.resize = tf.image.resizeBilinear(t.pad, [inputSize, inputSize]);
|
||||
const final = tf.cast(t.resize, 'int32');
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
return final;
|
||||
}
|
||||
|
||||
export function rescaleBody(body: BodyResult, outputSize: [number, number]): BodyResult {
|
||||
body.keypoints = body.keypoints.filter((kpt) => kpt && kpt.position); // filter invalid keypoints
|
||||
for (const kpt of body.keypoints) {
|
||||
kpt.position = [
|
||||
kpt.position[0] * (outputSize[0] + cache.padding[2][0] + cache.padding[2][1]) / outputSize[0] - cache.padding[2][0],
|
||||
kpt.position[1] * (outputSize[1] + cache.padding[1][0] + cache.padding[1][1]) / outputSize[1] - cache.padding[1][0],
|
||||
];
|
||||
kpt.positionRaw = [
|
||||
kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1],
|
||||
];
|
||||
}
|
||||
const rescaledBoxes = box.calc(body.keypoints.map((pt) => pt.position), outputSize);
|
||||
body.box = rescaledBoxes.box;
|
||||
body.boxRaw = rescaledBoxes.boxRaw;
|
||||
return body;
|
||||
}
|
|
@ -426,7 +426,7 @@ const config: Config = {
|
|||
// should be set to the minimum number for performance
|
||||
// only valid for posenet and movenet-multipose as other models detects single pose
|
||||
// set to -1 to autodetect based on number of detected faces
|
||||
minConfidence: 0.2, // threshold for discarding a prediction
|
||||
minConfidence: 0.3, // threshold for discarding a prediction
|
||||
skipFrames: 1, // how many max frames to go without re-running the detector
|
||||
// only used when cacheSensitivity is not zero
|
||||
},
|
||||
|
|
Loading…
Reference in New Issue