From cd35d733d91cc3676d095102d008fce7f6ae2ec2 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 14 Oct 2021 12:26:59 -0400 Subject: [PATCH] enhanced movenet postprocessing --- CHANGELOG.md | 1 + demo/index.js | 2 +- package.json | 6 +-- src/body/movenet.ts | 61 ++++++++++++---------- src/body/movenetcoords.ts | 18 +++++-- src/body/movenetfix.ts | 107 ++++++++++++++++++++++++++++++++++++++ src/config.ts | 2 +- 7 files changed, 161 insertions(+), 36 deletions(-) create mode 100644 src/body/movenetfix.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index dc121b73..d20eda97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### **HEAD -> main** 2021/10/13 mandic00@live.com +- use transferrable buffer for worker messages - add optional anti-spoofing module - add node-match advanced example using worker thread pool - package updates diff --git a/demo/index.js b/demo/index.js index e7ed7e20..7bd6c0ac 100644 --- a/demo/index.js +++ b/demo/index.js @@ -32,7 +32,7 @@ let human; let userConfig = { // face: { enabled: false }, - // body: { enabled: false }, + // body: { enabled: true }, // hand: { enabled: false }, /* warmup: 'none', diff --git a/package.json b/package.json index 7ac29164..c20a9a2a 100644 --- a/package.json +++ b/package.json @@ -66,15 +66,15 @@ "@tensorflow/tfjs-layers": "^3.9.0", "@tensorflow/tfjs-node": "^3.9.0", "@tensorflow/tfjs-node-gpu": "^3.9.0", - "@types/node": "^16.10.5", + "@types/node": "^16.10.9", "@typescript-eslint/eslint-plugin": "^5.0.0", "@typescript-eslint/parser": "^5.0.0", "@vladmandic/build": "^0.6.0", "@vladmandic/pilogger": "^0.3.3", "canvas": "^2.8.0", "dayjs": "^1.10.7", - "esbuild": "^0.13.5", - "eslint": "8.0.0", + "esbuild": "^0.13.6", + "eslint": "8.0.1", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-import": "^2.25.2", "eslint-plugin-json": "^3.1.0", diff --git a/src/body/movenet.ts b/src/body/movenet.ts index 3b06077c..30998c44 100644 --- a/src/body/movenet.ts +++ b/src/body/movenet.ts @@ -8,6 +8,7 @@ import { log, join } from '../util/util'; import * as box from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; import * as coords from './movenetcoords'; +import * as fix from './movenetfix'; import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; @@ -16,19 +17,17 @@ import { env } from '../util/env'; let model: GraphModel | null; let inputSize = 0; -const boxExpandFact = 1.5; // increase to 150% +let skipped = Number.MAX_SAFE_INTEGER; +// const boxExpandFact = 1.5; // increase to 150% const cache: { - boxes: Array, + boxes: Array, // unused bodies: Array; } = { boxes: [], bodies: [], }; -let skipped = Number.MAX_SAFE_INTEGER; -const keypoints: Array = []; - export async function load(config: Config): Promise { if (env.initial) model = null; if (!model) { @@ -42,23 +41,9 @@ export async function load(config: Config): Promise { return model; } -function fixSides() { // model sometimes mixes up left vs right keypoints so we fix them - for (const pair of coords.pairs) { - let left = keypoints.find((kp) => kp.part === pair[0]); - let right = keypoints.find((kp) => kp.part === pair[1]); - if (left && right) { - if (left.position[0] > right.position[0]) { - const tmp = left; - left = right; - right = tmp; - } - } - } -} - async function parseSinglePose(res, config, image, inputBox) { const kpt = res[0][0]; - keypoints.length = 0; + const keypoints: Array = []; let score = 0; for (let id = 0; id < kpt.length; id++) { score = kpt[id][2]; @@ -78,7 +63,6 @@ async function parseSinglePose(res, config, image, inputBox) { }); } } - fixSides(); score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); const bodies: Array = []; const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); @@ -92,7 +76,9 @@ async function parseSinglePose(res, config, image, inputBox) { } annotations[name] = pt; } - bodies.push({ id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations }); + const body: BodyResult = { id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations }; + fix.bodyParts(body); + bodies.push(body); return bodies; } @@ -102,7 +88,7 @@ async function parseMultiPose(res, config, image, inputBox) { const kpt = res[0][id]; const totalScore = Math.round(100 * kpt[51 + 4]) / 100; if (totalScore > config.body.minConfidence) { - keypoints.length = 0; + const keypoints: Array = []; for (let i = 0; i < 17; i++) { const score = kpt[3 * i + 2]; if (score > config.body.minConfidence) { @@ -118,7 +104,6 @@ async function parseMultiPose(res, config, image, inputBox) { }); } } - fixSides(); const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); // movenet-multipose has built-in box details // const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; @@ -133,7 +118,9 @@ async function parseMultiPose(res, config, image, inputBox) { } annotations[name] = pt; } - bodies.push({ id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations }); + const body: BodyResult = { id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations }; + fix.bodyParts(body); + bodies.push(body); } } bodies.sort((a, b) => b.score - a.score); @@ -158,11 +145,14 @@ export async function predict(input: Tensor, config: Config): Promise { const t: Record = {}; skipped = 0; + // run detection on squared input and cached boxes + /* cache.bodies = []; // reset bodies result if (cache.boxes.length >= (config.body.maxDetected || 0)) { // if we have enough cached boxes run detection using cache for (let i = 0; i < cache.boxes.length; i++) { // run detection based on cached boxes t.crop = tf.image.cropAndResize(input, [cache.boxes[i]], [0], [inputSize, inputSize], 'bilinear'); t.cast = tf.cast(t.crop, 'int32'); + // t.input = prepareImage(input); t.res = await model?.predict(t.cast) as Tensor; const res = await t.res.array(); const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cache.boxes[i]) : await parseMultiPose(res, config, input, cache.boxes[i]); @@ -171,11 +161,11 @@ export async function predict(input: Tensor, config: Config): Promise tf.dispose(t[tensor])); } cache.boxes.length = 0; // reset cache @@ -186,6 +176,21 @@ export async function predict(input: Tensor, config: Config): Promise tf.dispose(t[tensor])); + resolve(cache.bodies); }); } diff --git a/src/body/movenetcoords.ts b/src/body/movenetcoords.ts index ac49985a..0aa1075c 100644 --- a/src/body/movenetcoords.ts +++ b/src/body/movenetcoords.ts @@ -1,4 +1,4 @@ -export const kpt: Array = [ +export const kpt: Array = [ // used to create part labels 'nose', 'leftEye', 'rightEye', @@ -18,7 +18,7 @@ export const kpt: Array = [ 'rightAnkle', ]; -export const pairs: Array = [ +export const horizontal: Array = [ // used to fix left vs right ['leftEye', 'rightEye'], ['leftEar', 'rightEar'], ['leftShoulder', 'rightShoulder'], @@ -29,7 +29,19 @@ export const pairs: Array = [ ['leftAnkle', 'rightAnkle'], ]; -export const connected: Record = { +export const vertical: Array = [ // used to remove unlikely keypoint positions + ['leftKnee', 'leftShoulder'], + ['rightKnee', 'rightShoulder'], + ['leftAnkle', 'leftKnee'], + ['rightAnkle', 'rightKnee'], +]; + +export const relative: Array = [ // used to match relative body parts + [['leftHip', 'rightHip'], ['leftShoulder', 'rightShoulder']], + [['leftElbow', 'rightElbow'], ['leftShoulder', 'rightShoulder']], +]; + +export const connected: Record = { // used to create body outline in annotations leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], diff --git a/src/body/movenetfix.ts b/src/body/movenetfix.ts new file mode 100644 index 00000000..b09768e7 --- /dev/null +++ b/src/body/movenetfix.ts @@ -0,0 +1,107 @@ +import type { BodyKeypoint, BodyResult } from '../result'; +import * as box from '../util/box'; +import * as coords from './movenetcoords'; +import * as tf from '../../dist/tfjs.esm.js'; +import type { Tensor } from '../tfjs/types'; + +const maxJitter = 0.005; // default allowed jitter is within 0.5% + +const cache: { + keypoints: Array, + padding: [number, number][]; +} = { + keypoints: [], + padding: [[0, 0], [0, 0], [0, 0], [0, 0]], +}; + +export function bodyParts(body: BodyResult) { // model sometimes mixes up left vs right keypoints so we fix them + for (const pair of coords.horizontal) { // fix body parts left vs right + const left = body.keypoints.findIndex((kp) => kp.part === pair[0]); + const right = body.keypoints.findIndex((kp) => kp.part === pair[1]); + if (body.keypoints[left] && body.keypoints[right]) { + if (body.keypoints[left].position[0] < body.keypoints[right].position[0]) { + const tmp = body.keypoints[left]; + body.keypoints[left] = body.keypoints[right]; + body.keypoints[right] = tmp; + } + } + } + for (const pair of coords.vertical) { // remove body parts with improbable vertical position + const lower = body.keypoints.findIndex((kp) => (kp && kp.part === pair[0])); + const higher = body.keypoints.findIndex((kp) => (kp && kp.part === pair[1])); + if (body.keypoints[lower] && body.keypoints[higher]) { + if (body.keypoints[lower].position[1] < body.keypoints[higher].position[1]) { + body.keypoints.splice(lower, 1); + } + } + } + for (const [pair, compare] of coords.relative) { // rearrange body parts according to their relative position + const left = body.keypoints.findIndex((kp) => (kp && kp.part === pair[0])); + const right = body.keypoints.findIndex((kp) => (kp && kp.part === pair[1])); + const leftTo = body.keypoints.findIndex((kp) => (kp && kp.part === compare[0])); + const rightTo = body.keypoints.findIndex((kp) => (kp && kp.part === compare[1])); + if (!body.keypoints[leftTo] || !body.keypoints[rightTo]) continue; // only if we have both compare points + const distanceLeft = body.keypoints[left] ? [ + Math.abs(body.keypoints[leftTo].position[0] - body.keypoints[left].position[0]), + Math.abs(body.keypoints[rightTo].position[0] - body.keypoints[left].position[0]), + ] : [0, 0]; + const distanceRight = body.keypoints[right] ? [ + Math.abs(body.keypoints[rightTo].position[0] - body.keypoints[right].position[0]), + Math.abs(body.keypoints[leftTo].position[0] - body.keypoints[right].position[0]), + ] : [0, 0]; + if (distanceLeft[0] > distanceLeft[1] || distanceRight[0] > distanceRight[1]) { // should flip keypoints + const tmp = body.keypoints[left]; + body.keypoints[left] = body.keypoints[right]; + body.keypoints[right] = tmp; + } + } +} + +export function jitter(keypoints: Array): Array { + for (let i = 0; i < keypoints.length; i++) { + if (keypoints[i] && cache.keypoints[i]) { + const diff = [Math.abs(keypoints[i].positionRaw[0] - cache.keypoints[i].positionRaw[0]), Math.abs(keypoints[i].positionRaw[1] - cache.keypoints[i].positionRaw[1])]; + if (diff[0] < maxJitter && diff[1] < maxJitter) { + keypoints[i] = cache.keypoints[i]; // below jitter so replace keypoint + } else { + cache.keypoints[i] = keypoints[i]; // above jitter so update cache + } + } else { + cache.keypoints[i] = keypoints[i]; // cache for keypoint doesnt exist so create it here + } + } + return keypoints; +} + +export function padInput(input: Tensor, inputSize: number): Tensor { + const t: Record = {}; + if (!input.shape || !input.shape[1] || !input.shape[2]) return input; + cache.padding = [ + [0, 0], // dont touch batch + [input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0], // height before&after + [input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0], // width before&after + [0, 0], // dont touch rbg + ]; + t.pad = tf.pad(input, cache.padding); + t.resize = tf.image.resizeBilinear(t.pad, [inputSize, inputSize]); + const final = tf.cast(t.resize, 'int32'); + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + return final; +} + +export function rescaleBody(body: BodyResult, outputSize: [number, number]): BodyResult { + body.keypoints = body.keypoints.filter((kpt) => kpt && kpt.position); // filter invalid keypoints + for (const kpt of body.keypoints) { + kpt.position = [ + kpt.position[0] * (outputSize[0] + cache.padding[2][0] + cache.padding[2][1]) / outputSize[0] - cache.padding[2][0], + kpt.position[1] * (outputSize[1] + cache.padding[1][0] + cache.padding[1][1]) / outputSize[1] - cache.padding[1][0], + ]; + kpt.positionRaw = [ + kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], + ]; + } + const rescaledBoxes = box.calc(body.keypoints.map((pt) => pt.position), outputSize); + body.box = rescaledBoxes.box; + body.boxRaw = rescaledBoxes.boxRaw; + return body; +} diff --git a/src/config.ts b/src/config.ts index 4f40262a..dc4381f9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -426,7 +426,7 @@ const config: Config = { // should be set to the minimum number for performance // only valid for posenet and movenet-multipose as other models detects single pose // set to -1 to autodetect based on number of detected faces - minConfidence: 0.2, // threshold for discarding a prediction + minConfidence: 0.3, // threshold for discarding a prediction skipFrames: 1, // how many max frames to go without re-running the detector // only used when cacheSensitivity is not zero },