redesign body and hand caching and interpolation

pull/193/head
Vladimir Mandic 2021-10-08 18:39:04 -04:00
parent 99b81c1e61
commit f7c189fd8a
20 changed files with 9185 additions and 2038 deletions

View File

@ -277,12 +277,10 @@ async function drawResults(input) {
}
// draw all results using interpolated results
if (ui.interpolated) {
const interpolated = human.next(result);
human.draw.all(canvas, interpolated, drawOptions);
} else {
human.draw.all(canvas, result, drawOptions);
}
let interpolated;
if (ui.interpolated) interpolated = human.next(result);
else interpolated = result;
human.draw.all(canvas, interpolated, drawOptions);
// show tree with results
if (ui.results) {
@ -315,7 +313,7 @@ async function drawResults(input) {
document.getElementById('log').innerHTML = `
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
backend: ${backend}<br>
performance: ${str(lastDetectedResult.performance)}ms ${fps}<br>
performance: ${str(interpolated.performance)}ms ${fps}<br>
${warning}<br>
`;
ui.framesDraw++;

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

592
dist/human.esm.js vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

800
dist/human.js vendored

File diff suppressed because one or more lines are too long

600
dist/human.node-gpu.js vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

600
dist/human.node.js vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
/* eslint-disable no-multi-spaces */
export const kpt = [
export const kpt: Array<string> = [
'nose', // 0
'leftEyeInside', // 1
'leftEye', // 2
@ -42,7 +42,7 @@ export const kpt = [
'rightHand', // 38
];
export const connected = {
export const connected: Record<string, string[]> = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle', 'leftHeel', 'leftFoot'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle', 'rightHeel', 'rightFoot'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],

View File

@ -1,4 +1,4 @@
export const kpt = [
export const kpt: Array<string> = [
'head',
'neck',
'rightShoulder',
@ -17,7 +17,7 @@ export const kpt = [
'leftAnkle',
];
export const connected = {
export const connected: Record<string, string[]> = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],

View File

@ -5,7 +5,7 @@
*/
import { log, join } from '../util/util';
import { scale } from '../util/box';
import * as box from '../util/box';
import * as tf from '../../dist/tfjs.esm.js';
import * as coords from './movenetcoords';
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
@ -16,7 +16,15 @@ import { env } from '../util/env';
let model: GraphModel | null;
let inputSize = 0;
const cachedBoxes: Array<Box> = [];
const boxExpandFact = 1.5; // increase to 150%
const cache: {
boxes: Array<Box>,
bodies: Array<BodyResult>;
} = {
boxes: [],
bodies: [],
};
let skipped = Number.MAX_SAFE_INTEGER;
const keypoints: Array<BodyKeypoint> = [];
@ -34,26 +42,6 @@ export async function load(config: Config): Promise<GraphModel> {
return model;
}
function createBox(points): [Box, Box] {
const x = points.map((a) => a.position[0]);
const y = points.map((a) => a.position[1]);
const box: Box = [
Math.min(...x),
Math.min(...y),
Math.max(...x) - Math.min(...x),
Math.max(...y) - Math.min(...y),
];
const xRaw = points.map((a) => a.positionRaw[0]);
const yRaw = points.map((a) => a.positionRaw[1]);
const boxRaw: Box = [
Math.min(...xRaw),
Math.min(...yRaw),
Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw),
];
return [box, boxRaw];
}
async function parseSinglePose(res, config, image, inputBox) {
const kpt = res[0][0];
keypoints.length = 0;
@ -78,7 +66,7 @@ async function parseSinglePose(res, config, image, inputBox) {
}
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
const bodies: Array<BodyResult> = [];
const [box, boxRaw] = createBox(keypoints);
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
@ -89,7 +77,7 @@ async function parseSinglePose(res, config, image, inputBox) {
}
annotations[name] = pt;
}
bodies.push({ id: 0, score, box, boxRaw, keypoints, annotations });
bodies.push({ id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations });
return bodies;
}
@ -111,14 +99,11 @@ async function parseMultiPose(res, config, image, inputBox) {
part: coords.kpt[i],
score: Math.round(100 * score) / 100,
positionRaw,
position: [
Math.round((image.shape[2] || 0) * positionRaw[0]),
Math.round((image.shape[1] || 0) * positionRaw[1]),
],
position: [Math.round((image.shape[2] || 0) * positionRaw[0]), Math.round((image.shape[1] || 0) * positionRaw[1])],
});
}
}
const [box, boxRaw] = createBox(keypoints);
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
// movenet-multipose has built-in box details
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
// const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))];
@ -132,7 +117,7 @@ async function parseMultiPose(res, config, image, inputBox) {
}
annotations[name] = pt;
}
bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints], annotations });
bodies.push({ id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations });
}
}
bodies.sort((a, b) => b.score - a.score);
@ -141,46 +126,44 @@ async function parseMultiPose(res, config, image, inputBox) {
}
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
if (!model || !model?.inputs[0].shape) return [];
if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model
if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not
skipped++; // increment skip frames
if (config.skipFrame && (skipped <= (config.body.skipFrames || 0))) {
return cache.bodies; // return cached results without running anything
}
return new Promise(async (resolve) => {
const t: Record<string, Tensor> = {};
let bodies: Array<BodyResult> = [];
if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not
skipped++;
for (let i = 0; i < cachedBoxes.length; i++) { // run detection based on cached boxes
t.crop = tf.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize, inputSize], 'bilinear');
t.cast = tf.cast(t.crop, 'int32');
t.res = await model?.predict(t.cast) as Tensor;
const res = await t.res.array();
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cachedBoxes[i]) : await parseMultiPose(res, config, input, cachedBoxes[i]);
bodies = bodies.concat(newBodies);
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
skipped = 0;
cache.bodies = []; // reset bodies result
if (cache.boxes.length >= (config.body.maxDetected || 0)) { // if we have enough cached boxes run detection using cache
for (let i = 0; i < cache.boxes.length; i++) { // run detection based on cached boxes
t.crop = tf.image.cropAndResize(input, [cache.boxes[i]], [0], [inputSize, inputSize], 'bilinear');
t.cast = tf.cast(t.crop, 'int32');
t.res = await model?.predict(t.cast) as Tensor;
const res = await t.res.array();
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cache.boxes[i]) : await parseMultiPose(res, config, input, cache.boxes[i]);
cache.bodies = cache.bodies.concat(newBodies);
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
}
}
if ((bodies.length !== config.body.maxDetected) && (skipped > (config.body.skipFrames || 0))) { // run detection on full frame
if (cache.bodies.length !== config.body.maxDetected) { // did not find enough bodies based on cached boxes so run detection on full frame
t.resized = tf.image.resizeBilinear(input, [inputSize, inputSize], false);
t.cast = tf.cast(t.resized, 'int32');
t.res = await model?.predict(t.cast) as Tensor;
const res = await t.res.array();
bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
// cache.bodies = cache.bodies.map((body) => ({ ...body, box: box.scale(body.box, 0.5) }));
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
cachedBoxes.length = 0; // reset cache
skipped = 0;
}
if (config.skipFrame) { // create box cache based on last detections
cachedBoxes.length = 0;
for (let i = 0; i < bodies.length; i++) {
if (bodies[i].keypoints.length > 10) { // only update cache if we detected sufficient number of keypoints
const kpts = bodies[i].keypoints.map((kpt) => kpt.position);
const newBox = scale(kpts, 1.5, [input.shape[2], input.shape[1]]);
cachedBoxes.push([...newBox.yxBox]);
}
cache.boxes.length = 0; // reset cache
for (let i = 0; i < cache.bodies.length; i++) {
if (cache.bodies[i].keypoints.length > (coords.kpt.length / 2)) { // only update cache if we detected at least half keypoints
const scaledBox = box.scale(cache.bodies[i].boxRaw, boxExpandFact);
const cropBox = box.crop(scaledBox);
cache.boxes.push(cropBox);
}
}
resolve(bodies);
resolve(cache.bodies);
});
}

View File

@ -1,4 +1,4 @@
export const kpt = [
export const kpt: Array<string> = [
'nose',
'leftEye',
'rightEye',
@ -18,7 +18,7 @@ export const kpt = [
'rightAnkle',
];
export const connected = {
export const connected: Record<string, string[]> = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],

View File

@ -420,12 +420,12 @@ const config: Config = {
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
// false means higher performance, but incorrect finger mapping if hand is inverted
// only valid for `handdetect` variation
skipFrames: 14, // how many max frames to go without re-running the hand bounding box detector
skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector
// only used when cacheSensitivity is not zero
// e.g., if model is running st 25 FPS, we can re-use existing bounding
// box for updated hand skeleton analysis as the hand
// hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
minConfidence: 0.55, // threshold for discarding a prediction
iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed
maxDetected: -1, // maximum number of hands detected in the input
// should be set to the minimum number for performance

View File

@ -7,7 +7,7 @@
*/
import { log, join } from '../util/util';
import { scale } from '../util/box';
import * as box from '../util/box';
import * as tf from '../../dist/tfjs.esm.js';
import type { HandResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
@ -16,7 +16,6 @@ import { env } from '../util/env';
import * as fingerPose from './fingerpose';
import { fakeOps } from '../tfjs/backend';
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
const models: [GraphModel | null, GraphModel | null] = [null, null];
const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'StatefulPartitionedCall/Postprocessor/ExpandDims_1'];
@ -24,26 +23,26 @@ const inputSize = [[0, 0], [0, 0]];
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
const boxExpandFact = 1.6; // increase to 160%
let skipped = 0;
let outputSize: Point = [0, 0];
let outputSize: [number, number] = [0, 0];
type HandDetectResult = {
id: number,
score: number,
box: Box,
boxRaw: Box,
boxCrop: Box,
label: string,
yxBox: Box,
}
const cache: {
handBoxes: Array<HandDetectResult>,
fingerBoxes: Array<HandDetectResult>
tmpBoxes: Array<HandDetectResult>
boxes: Array<HandDetectResult>,
hands: Array<HandResult>;
} = {
handBoxes: [],
fingerBoxes: [],
tmpBoxes: [],
boxes: [],
hands: [],
};
const fingerMap = {
@ -103,35 +102,29 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
[t.rawScores, t.rawBoxes] = await models[0].executeAsync(t.cast, modelOutputNodes) as Tensor[];
t.boxes = tf.squeeze(t.rawBoxes, [0, 2]);
t.scores = tf.squeeze(t.rawScores, [0]);
const classScores = tf.unstack(t.scores, 1);
const classScores = tf.unstack(t.scores, 1); // unstack scores based on classes
classScores.splice(4, 1); // remove faces
t.filtered = tf.stack(classScores, 1); // restack
tf.dispose(...classScores);
t.max = tf.max(t.filtered, 1); // max overall score
t.argmax = tf.argMax(t.filtered, 1); // class index of max overall score
let id = 0;
for (let i = 0; i < classScores.length; i++) {
if (i === 4) continue; // skip faces
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, classScores[i], config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
const nms = await t.nms.data();
tf.dispose(t.nms);
for (const res of Array.from(nms)) { // generates results for each class
const boxSlice = tf.slice(t.boxes, res, 1);
let yxBox: Box = [0, 0, 0, 0];
if (config.hand.landmarks) { // scale box
const detectedBox: Box = await boxSlice.data();
const boxCenter: Point = [(detectedBox[0] + detectedBox[2]) / 2, (detectedBox[1] + detectedBox[3]) / 2];
const boxDiff: Box = [+boxCenter[0] - detectedBox[0], +boxCenter[1] - detectedBox[1], -boxCenter[0] + detectedBox[2], -boxCenter[1] + detectedBox[3]];
yxBox = [boxCenter[0] - boxScaleFact * boxDiff[0], boxCenter[1] - boxScaleFact * boxDiff[1], boxCenter[0] + boxScaleFact * boxDiff[2], boxCenter[1] + boxScaleFact * boxDiff[3]];
} else { // use box as-is
yxBox = await boxSlice.data();
}
const boxRaw: Box = [yxBox[1], yxBox[0], yxBox[3] - yxBox[1], yxBox[2] - yxBox[0]];
const box: Box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
tf.dispose(boxSlice);
const scoreSlice = tf.slice(classScores[i], res, 1);
const score = (await scoreSlice.data())[0];
tf.dispose(scoreSlice);
const hand: HandDetectResult = { id: id++, score, box, boxRaw, label: classes[i], yxBox };
hands.push(hand);
}
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.max, config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
const nms = await t.nms.data();
const scores = await t.max.data();
const classNum = await t.argmax.data();
for (const nmsIndex of Array.from(nms)) { // generates results for each class
const boxSlice = tf.slice(t.boxes, nmsIndex, 1);
const boxData = await boxSlice.data();
tf.dispose(boxSlice);
const boxInput: Box = [boxData[1], boxData[0], boxData[3] - boxData[1], boxData[2] - boxData[0]];
const boxRaw: Box = box.scale(boxInput, 1.2); // handtrack model returns tight box so we expand it a bit
const boxFull: Box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
const score = scores[nmsIndex];
const label = classes[classNum[nmsIndex]];
const hand: HandDetectResult = { id: id++, score, box: boxFull, boxRaw, boxCrop: box.crop(boxRaw), label };
hands.push(hand);
}
classScores.forEach((tensor) => tf.dispose(tensor));
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
hands.sort((a, b) => b.score - a.score);
if (hands.length > (config.hand.maxDetected || 1)) hands.length = (config.hand.maxDetected || 1);
@ -139,7 +132,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
}
async function detectFingers(input: Tensor, h: HandDetectResult, config: Config): Promise<HandResult> {
const hand: HandResult = {
const hand: HandResult = { // initial values inherited from hand detect
id: h.id,
score: Math.round(100 * h.score) / 100,
boxScore: Math.round(100 * h.score) / 100,
@ -151,36 +144,27 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
landmarks: {} as HandResult['landmarks'],
annotations: {} as HandResult['annotations'],
};
if (input && models[1] && config.hand.landmarks) {
if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) {
const t: Record<string, Tensor> = {};
if (!h.yxBox) return hand;
t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
t.crop = tf.image.cropAndResize(input, [box.crop(h.boxRaw)], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
t.cast = tf.cast(t.crop, 'float32');
t.div = tf.div(t.cast, 255);
[t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
// const score = Math.round(100 * (await t.score.data())[0] / 100);
const rawScore = (await t.score.data())[0];
const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value
if (score >= (config.hand.minConfidence || 0)) {
hand.fingerScore = score;
t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
const rawCoords = await t.reshaped.array() as Point[];
hand.keypoints = (rawCoords as Point[]).map((coord) => [
(h.box[2] * coord[0] / inputSize[1][0]) + h.box[0],
(h.box[3] * coord[1] / inputSize[1][1]) + h.box[1],
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * (coord[2] || 0),
hand.keypoints = (rawCoords as Point[]).map((kpt) => [
outputSize[0] * ((h.boxCrop[3] - h.boxCrop[1]) * kpt[0] / inputSize[1][0] + h.boxCrop[1]),
outputSize[1] * ((h.boxCrop[2] - h.boxCrop[0]) * kpt[1] / inputSize[1][1] + h.boxCrop[0]),
(h.boxCrop[3] + h.boxCrop[3] / 2 * (kpt[2] || 0)),
]);
const updatedBox = scale(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints
h.box = updatedBox.box;
h.boxRaw = updatedBox.boxRaw;
h.yxBox = updatedBox.yxBox;
hand.box = h.box;
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
}
const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1));
if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
}
@ -188,22 +172,37 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
}
export async function predict(input: Tensor, config: Config): Promise<HandResult[]> {
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
outputSize = [input.shape[2] || 0, input.shape[1] || 0];
let hands: Array<HandResult> = [];
cache.tmpBoxes = []; // clear temp cache
if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache
if (!config.skipFrame) cache.fingerBoxes = [];
if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes
skipped++;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
} else { // calculate new boxes and run finger detection
skipped = 0;
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache
cache.handBoxes = await detectHands(input, config);
hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
}
skipped++; // increment skip frames
if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) {
return cache.hands; // return cached results without running anything
}
cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands
return hands as HandResult[];
return new Promise(async (resolve) => {
skipped = 0;
if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
} else {
cache.hands = []; // reset hands
}
if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame
cache.boxes = await detectHands(input, config);
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
}
const oldCache = [...cache.boxes];
cache.boxes.length = 0; // reset cache
for (let i = 0; i < cache.hands.length; i++) {
const boxKpt = box.square(cache.hands[i].keypoints, outputSize);
if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) {
const boxScale = box.scale(boxKpt.box, boxExpandFact);
const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact);
const boxCrop = box.crop(boxScaleRaw);
cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop });
}
}
resolve(cache.hands);
});
}

View File

@ -359,7 +359,7 @@ export class Human {
* @returns result: {@link Result}
*/
next(result: Result = this.result): Result {
return interpolate.calc(result) as Result;
return interpolate.calc(result, this.config) as Result;
}
/** Warmup method pre-initializes all configured models for faster inference

View File

@ -84,7 +84,7 @@ export interface BodyResult {
score: number,
box: Box,
boxRaw: Box,
annotations: Record<string, Point[][]>,
annotations: Record<string, Array<Point[]>>,
keypoints: Array<BodyKeypoint>
}

View File

@ -1,28 +1,32 @@
import type { Box } from '../result';
import type { Point, Box } from '../result';
// helper function: find box around keypoints, square it and scale it
export function scale(keypoints, boxScaleFact, outputSize) {
export function calc(keypoints: Array<Point>, outputSize: [number, number] = [1, 1]) {
const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction
const box = [
Math.trunc(center[0] - diff),
Math.trunc(center[1] - diff),
Math.trunc(2 * diff),
Math.trunc(2 * diff),
] as Box;
const boxRaw = [ // work backwards
box[0] / outputSize[0],
box[1] / outputSize[1],
box[2] / outputSize[0],
box[3] / outputSize[1],
] as Box;
const yxBox = [ // work backwards
boxRaw[1],
boxRaw[0],
boxRaw[3] + boxRaw[1],
boxRaw[2] + boxRaw[0],
] as Box;
return { box, boxRaw, yxBox };
const min = [Math.min(...coords[0]), Math.min(...coords[1])];
const max = [Math.max(...coords[0]), Math.max(...coords[1])];
const box: Box = [min[0], min[1], max[0] - min[0], max[1] - min[1]];
const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]];
return { box, boxRaw };
}
export function square(keypoints: Array<Point>, outputSize: [number, number] = [1, 1]) {
const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
const min = [Math.min(...coords[0]), Math.min(...coords[1])];
const max = [Math.max(...coords[0]), Math.max(...coords[1])];
const center = [(min[0] + max[0]) / 2, (min[1] + max[1]) / 2]; // find center x and y coord of all fingers
const dist = Math.max(center[0] - min[0], center[1] - min[1], -center[0] + max[0], -center[1] + max[1]); // largest distance from center in any direction
const box: Box = [Math.trunc(center[0] - dist), Math.trunc(center[1] - dist), Math.trunc(2 * dist), Math.trunc(2 * dist)];
const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]];
return { box, boxRaw };
}
export function scale(box: Box, scaleFact: number) {
const dist = [box[2] * (scaleFact - 1), box[3] * (scaleFact - 1)];
const newBox: Box = [box[0] - dist[0] / 2, box[1] - dist[1] / 2, box[2] + dist[0], box[3] + dist[0]];
return newBox;
}
export function crop(box: Box) { // [y1, x1, y2, x2] clamped to 0..1
const yxBox: Box = [Math.max(0, box[1]), Math.max(0, box[0]), Math.min(1, box[3] + box[1]), Math.min(1, box[2] + box[0])];
return yxBox;
}

View File

@ -3,10 +3,16 @@
*/
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from '../result';
import type { Config } from '../config';
import * as moveNetCoords from '../body/movenetcoords';
import * as blazePoseCoords from '../body/blazeposecoords';
import * as efficientPoseCoords from '../body/efficientposecoords';
const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
export function calc(newResult: Result): Result {
export function calc(newResult: Result, config: Config): Result {
const t0 = performance.now();
if (!newResult) return { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
// each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself
// otherwise bufferedResult is a shallow clone of result plus updated local calculated values
@ -46,7 +52,22 @@ export function calc(newResult: Result): Result {
bufferedResult.body[i].keypoints[j] ? ((bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].positionRaw[1] + keypoint.positionRaw[1]) / bufferedFactor : keypoint.position[1],
],
}))) as Array<{ score: number, part: string, position: [number, number, number?], positionRaw: [number, number, number?] }>;
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values
const annotations: Record<string, Point[][]> = {};
let coords = { connected: {} };
if (config.body?.modelPath?.includes('efficientpose')) coords = efficientPoseCoords;
else if (config.body?.modelPath?.includes('blazepose')) coords = blazePoseCoords;
else if (config.body?.modelPath?.includes('movenet')) coords = moveNetCoords;
for (const [name, indexes] of Object.entries(coords.connected as Record<string, string[]>)) {
const pt: Array<Point[]> = [];
for (let j = 0; j < indexes.length - 1; j++) {
const pt0 = keypoints.find((kp) => kp.part === indexes[j]);
const pt1 = keypoints.find((kp) => kp.part === indexes[j + 1]);
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints, annotations: annotations as BodyResult['annotations'] }; // shallow clone plus updated values
}
}
@ -64,12 +85,16 @@ export function calc(newResult: Result): Result {
.map((landmark, j) => landmark
.map((coord, k) => (((bufferedFactor - 1) * (bufferedResult.hand[i].keypoints[j][k] || 1) + (coord || 0)) / bufferedFactor)) as Point)
: [];
const annotations = {};
if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them
if (newResult.hand[i].annotations) {
let annotations = {};
if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) {
bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them
annotations = bufferedResult.hand[i].annotations;
} else if (newResult.hand[i].annotations) {
for (const key of Object.keys(newResult.hand[i].annotations)) { // update annotations
annotations[key] = newResult.hand[i].annotations[key] && newResult.hand[i].annotations[key][0]
? newResult.hand[i].annotations[key].map((val, j) => val.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor))
? newResult.hand[i].annotations[key]
.map((val, j) => val
.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor))
: null;
}
}
@ -134,7 +159,10 @@ export function calc(newResult: Result): Result {
// just copy latest gestures without interpolation
if (newResult.gesture) bufferedResult.gesture = newResult.gesture as GestureResult[];
if (newResult.performance) bufferedResult.performance = newResult.performance;
// append interpolation performance data
const t1 = performance.now();
if (newResult.performance) bufferedResult.performance = { ...newResult.performance, interpolate: Math.round(t1 - t0) };
return bufferedResult;
}

File diff suppressed because it is too large Load Diff