mirror of https://github.com/vladmandic/human
redesign body and hand caching and interpolation
parent
99b81c1e61
commit
f7c189fd8a
|
@ -277,12 +277,10 @@ async function drawResults(input) {
|
|||
}
|
||||
|
||||
// draw all results using interpolated results
|
||||
if (ui.interpolated) {
|
||||
const interpolated = human.next(result);
|
||||
human.draw.all(canvas, interpolated, drawOptions);
|
||||
} else {
|
||||
human.draw.all(canvas, result, drawOptions);
|
||||
}
|
||||
let interpolated;
|
||||
if (ui.interpolated) interpolated = human.next(result);
|
||||
else interpolated = result;
|
||||
human.draw.all(canvas, interpolated, drawOptions);
|
||||
|
||||
// show tree with results
|
||||
if (ui.results) {
|
||||
|
@ -315,7 +313,7 @@ async function drawResults(input) {
|
|||
document.getElementById('log').innerHTML = `
|
||||
video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}<br>
|
||||
backend: ${backend}<br>
|
||||
performance: ${str(lastDetectedResult.performance)}ms ${fps}<br>
|
||||
performance: ${str(interpolated.performance)}ms ${fps}<br>
|
||||
${warning}<br>
|
||||
`;
|
||||
ui.framesDraw++;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
/* eslint-disable no-multi-spaces */
|
||||
|
||||
export const kpt = [
|
||||
export const kpt: Array<string> = [
|
||||
'nose', // 0
|
||||
'leftEyeInside', // 1
|
||||
'leftEye', // 2
|
||||
|
@ -42,7 +42,7 @@ export const kpt = [
|
|||
'rightHand', // 38
|
||||
];
|
||||
|
||||
export const connected = {
|
||||
export const connected: Record<string, string[]> = {
|
||||
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle', 'leftHeel', 'leftFoot'],
|
||||
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle', 'rightHeel', 'rightFoot'],
|
||||
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
export const kpt = [
|
||||
export const kpt: Array<string> = [
|
||||
'head',
|
||||
'neck',
|
||||
'rightShoulder',
|
||||
|
@ -17,7 +17,7 @@ export const kpt = [
|
|||
'leftAnkle',
|
||||
];
|
||||
|
||||
export const connected = {
|
||||
export const connected: Record<string, string[]> = {
|
||||
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
|
||||
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
|
||||
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import { scale } from '../util/box';
|
||||
import * as box from '../util/box';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as coords from './movenetcoords';
|
||||
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
|
||||
|
@ -16,7 +16,15 @@ import { env } from '../util/env';
|
|||
|
||||
let model: GraphModel | null;
|
||||
let inputSize = 0;
|
||||
const cachedBoxes: Array<Box> = [];
|
||||
const boxExpandFact = 1.5; // increase to 150%
|
||||
|
||||
const cache: {
|
||||
boxes: Array<Box>,
|
||||
bodies: Array<BodyResult>;
|
||||
} = {
|
||||
boxes: [],
|
||||
bodies: [],
|
||||
};
|
||||
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
const keypoints: Array<BodyKeypoint> = [];
|
||||
|
@ -34,26 +42,6 @@ export async function load(config: Config): Promise<GraphModel> {
|
|||
return model;
|
||||
}
|
||||
|
||||
function createBox(points): [Box, Box] {
|
||||
const x = points.map((a) => a.position[0]);
|
||||
const y = points.map((a) => a.position[1]);
|
||||
const box: Box = [
|
||||
Math.min(...x),
|
||||
Math.min(...y),
|
||||
Math.max(...x) - Math.min(...x),
|
||||
Math.max(...y) - Math.min(...y),
|
||||
];
|
||||
const xRaw = points.map((a) => a.positionRaw[0]);
|
||||
const yRaw = points.map((a) => a.positionRaw[1]);
|
||||
const boxRaw: Box = [
|
||||
Math.min(...xRaw),
|
||||
Math.min(...yRaw),
|
||||
Math.max(...xRaw) - Math.min(...xRaw),
|
||||
Math.max(...yRaw) - Math.min(...yRaw),
|
||||
];
|
||||
return [box, boxRaw];
|
||||
}
|
||||
|
||||
async function parseSinglePose(res, config, image, inputBox) {
|
||||
const kpt = res[0][0];
|
||||
keypoints.length = 0;
|
||||
|
@ -78,7 +66,7 @@ async function parseSinglePose(res, config, image, inputBox) {
|
|||
}
|
||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||
const bodies: Array<BodyResult> = [];
|
||||
const [box, boxRaw] = createBox(keypoints);
|
||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||
const annotations: Record<string, Point[][]> = {};
|
||||
for (const [name, indexes] of Object.entries(coords.connected)) {
|
||||
const pt: Array<Point[]> = [];
|
||||
|
@ -89,7 +77,7 @@ async function parseSinglePose(res, config, image, inputBox) {
|
|||
}
|
||||
annotations[name] = pt;
|
||||
}
|
||||
bodies.push({ id: 0, score, box, boxRaw, keypoints, annotations });
|
||||
bodies.push({ id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations });
|
||||
return bodies;
|
||||
}
|
||||
|
||||
|
@ -111,14 +99,11 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
part: coords.kpt[i],
|
||||
score: Math.round(100 * score) / 100,
|
||||
positionRaw,
|
||||
position: [
|
||||
Math.round((image.shape[2] || 0) * positionRaw[0]),
|
||||
Math.round((image.shape[1] || 0) * positionRaw[1]),
|
||||
],
|
||||
position: [Math.round((image.shape[2] || 0) * positionRaw[0]), Math.round((image.shape[1] || 0) * positionRaw[1])],
|
||||
});
|
||||
}
|
||||
}
|
||||
const [box, boxRaw] = createBox(keypoints);
|
||||
const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]);
|
||||
// movenet-multipose has built-in box details
|
||||
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
|
||||
// const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))];
|
||||
|
@ -132,7 +117,7 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
}
|
||||
annotations[name] = pt;
|
||||
}
|
||||
bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints], annotations });
|
||||
bodies.push({ id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations });
|
||||
}
|
||||
}
|
||||
bodies.sort((a, b) => b.score - a.score);
|
||||
|
@ -141,46 +126,44 @@ async function parseMultiPose(res, config, image, inputBox) {
|
|||
}
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||
if (!model || !model?.inputs[0].shape) return [];
|
||||
if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model
|
||||
if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not
|
||||
skipped++; // increment skip frames
|
||||
if (config.skipFrame && (skipped <= (config.body.skipFrames || 0))) {
|
||||
return cache.bodies; // return cached results without running anything
|
||||
}
|
||||
return new Promise(async (resolve) => {
|
||||
const t: Record<string, Tensor> = {};
|
||||
|
||||
let bodies: Array<BodyResult> = [];
|
||||
|
||||
if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not
|
||||
skipped++;
|
||||
|
||||
for (let i = 0; i < cachedBoxes.length; i++) { // run detection based on cached boxes
|
||||
t.crop = tf.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize, inputSize], 'bilinear');
|
||||
t.cast = tf.cast(t.crop, 'int32');
|
||||
t.res = await model?.predict(t.cast) as Tensor;
|
||||
const res = await t.res.array();
|
||||
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cachedBoxes[i]) : await parseMultiPose(res, config, input, cachedBoxes[i]);
|
||||
bodies = bodies.concat(newBodies);
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
skipped = 0;
|
||||
cache.bodies = []; // reset bodies result
|
||||
if (cache.boxes.length >= (config.body.maxDetected || 0)) { // if we have enough cached boxes run detection using cache
|
||||
for (let i = 0; i < cache.boxes.length; i++) { // run detection based on cached boxes
|
||||
t.crop = tf.image.cropAndResize(input, [cache.boxes[i]], [0], [inputSize, inputSize], 'bilinear');
|
||||
t.cast = tf.cast(t.crop, 'int32');
|
||||
t.res = await model?.predict(t.cast) as Tensor;
|
||||
const res = await t.res.array();
|
||||
const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cache.boxes[i]) : await parseMultiPose(res, config, input, cache.boxes[i]);
|
||||
cache.bodies = cache.bodies.concat(newBodies);
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
}
|
||||
}
|
||||
|
||||
if ((bodies.length !== config.body.maxDetected) && (skipped > (config.body.skipFrames || 0))) { // run detection on full frame
|
||||
if (cache.bodies.length !== config.body.maxDetected) { // did not find enough bodies based on cached boxes so run detection on full frame
|
||||
t.resized = tf.image.resizeBilinear(input, [inputSize, inputSize], false);
|
||||
t.cast = tf.cast(t.resized, 'int32');
|
||||
t.res = await model?.predict(t.cast) as Tensor;
|
||||
const res = await t.res.array();
|
||||
bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
||||
cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]);
|
||||
// cache.bodies = cache.bodies.map((body) => ({ ...body, box: box.scale(body.box, 0.5) }));
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
cachedBoxes.length = 0; // reset cache
|
||||
skipped = 0;
|
||||
}
|
||||
|
||||
if (config.skipFrame) { // create box cache based on last detections
|
||||
cachedBoxes.length = 0;
|
||||
for (let i = 0; i < bodies.length; i++) {
|
||||
if (bodies[i].keypoints.length > 10) { // only update cache if we detected sufficient number of keypoints
|
||||
const kpts = bodies[i].keypoints.map((kpt) => kpt.position);
|
||||
const newBox = scale(kpts, 1.5, [input.shape[2], input.shape[1]]);
|
||||
cachedBoxes.push([...newBox.yxBox]);
|
||||
}
|
||||
cache.boxes.length = 0; // reset cache
|
||||
for (let i = 0; i < cache.bodies.length; i++) {
|
||||
if (cache.bodies[i].keypoints.length > (coords.kpt.length / 2)) { // only update cache if we detected at least half keypoints
|
||||
const scaledBox = box.scale(cache.bodies[i].boxRaw, boxExpandFact);
|
||||
const cropBox = box.crop(scaledBox);
|
||||
cache.boxes.push(cropBox);
|
||||
}
|
||||
}
|
||||
resolve(bodies);
|
||||
resolve(cache.bodies);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
export const kpt = [
|
||||
export const kpt: Array<string> = [
|
||||
'nose',
|
||||
'leftEye',
|
||||
'rightEye',
|
||||
|
@ -18,7 +18,7 @@ export const kpt = [
|
|||
'rightAnkle',
|
||||
];
|
||||
|
||||
export const connected = {
|
||||
export const connected: Record<string, string[]> = {
|
||||
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
|
||||
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
|
||||
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
|
||||
|
|
|
@ -420,12 +420,12 @@ const config: Config = {
|
|||
rotation: true, // use best-guess rotated hand image or just box with rotation as-is
|
||||
// false means higher performance, but incorrect finger mapping if hand is inverted
|
||||
// only valid for `handdetect` variation
|
||||
skipFrames: 14, // how many max frames to go without re-running the hand bounding box detector
|
||||
skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector
|
||||
// only used when cacheSensitivity is not zero
|
||||
// e.g., if model is running st 25 FPS, we can re-use existing bounding
|
||||
// box for updated hand skeleton analysis as the hand
|
||||
// hasn't moved much in short time (10 * 1/25 = 0.25 sec)
|
||||
minConfidence: 0.5, // threshold for discarding a prediction
|
||||
minConfidence: 0.55, // threshold for discarding a prediction
|
||||
iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed
|
||||
maxDetected: -1, // maximum number of hands detected in the input
|
||||
// should be set to the minimum number for performance
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import { scale } from '../util/box';
|
||||
import * as box from '../util/box';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { HandResult, Box, Point } from '../result';
|
||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
|
@ -16,7 +16,6 @@ import { env } from '../util/env';
|
|||
import * as fingerPose from './fingerpose';
|
||||
import { fakeOps } from '../tfjs/backend';
|
||||
|
||||
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
|
||||
const models: [GraphModel | null, GraphModel | null] = [null, null];
|
||||
const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'StatefulPartitionedCall/Postprocessor/ExpandDims_1'];
|
||||
|
||||
|
@ -24,26 +23,26 @@ const inputSize = [[0, 0], [0, 0]];
|
|||
|
||||
const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
|
||||
|
||||
const boxExpandFact = 1.6; // increase to 160%
|
||||
|
||||
let skipped = 0;
|
||||
let outputSize: Point = [0, 0];
|
||||
let outputSize: [number, number] = [0, 0];
|
||||
|
||||
type HandDetectResult = {
|
||||
id: number,
|
||||
score: number,
|
||||
box: Box,
|
||||
boxRaw: Box,
|
||||
boxCrop: Box,
|
||||
label: string,
|
||||
yxBox: Box,
|
||||
}
|
||||
|
||||
const cache: {
|
||||
handBoxes: Array<HandDetectResult>,
|
||||
fingerBoxes: Array<HandDetectResult>
|
||||
tmpBoxes: Array<HandDetectResult>
|
||||
boxes: Array<HandDetectResult>,
|
||||
hands: Array<HandResult>;
|
||||
} = {
|
||||
handBoxes: [],
|
||||
fingerBoxes: [],
|
||||
tmpBoxes: [],
|
||||
boxes: [],
|
||||
hands: [],
|
||||
};
|
||||
|
||||
const fingerMap = {
|
||||
|
@ -103,35 +102,29 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
|||
[t.rawScores, t.rawBoxes] = await models[0].executeAsync(t.cast, modelOutputNodes) as Tensor[];
|
||||
t.boxes = tf.squeeze(t.rawBoxes, [0, 2]);
|
||||
t.scores = tf.squeeze(t.rawScores, [0]);
|
||||
const classScores = tf.unstack(t.scores, 1);
|
||||
const classScores = tf.unstack(t.scores, 1); // unstack scores based on classes
|
||||
classScores.splice(4, 1); // remove faces
|
||||
t.filtered = tf.stack(classScores, 1); // restack
|
||||
tf.dispose(...classScores);
|
||||
t.max = tf.max(t.filtered, 1); // max overall score
|
||||
t.argmax = tf.argMax(t.filtered, 1); // class index of max overall score
|
||||
let id = 0;
|
||||
for (let i = 0; i < classScores.length; i++) {
|
||||
if (i === 4) continue; // skip faces
|
||||
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, classScores[i], config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
|
||||
const nms = await t.nms.data();
|
||||
tf.dispose(t.nms);
|
||||
for (const res of Array.from(nms)) { // generates results for each class
|
||||
const boxSlice = tf.slice(t.boxes, res, 1);
|
||||
let yxBox: Box = [0, 0, 0, 0];
|
||||
if (config.hand.landmarks) { // scale box
|
||||
const detectedBox: Box = await boxSlice.data();
|
||||
const boxCenter: Point = [(detectedBox[0] + detectedBox[2]) / 2, (detectedBox[1] + detectedBox[3]) / 2];
|
||||
const boxDiff: Box = [+boxCenter[0] - detectedBox[0], +boxCenter[1] - detectedBox[1], -boxCenter[0] + detectedBox[2], -boxCenter[1] + detectedBox[3]];
|
||||
yxBox = [boxCenter[0] - boxScaleFact * boxDiff[0], boxCenter[1] - boxScaleFact * boxDiff[1], boxCenter[0] + boxScaleFact * boxDiff[2], boxCenter[1] + boxScaleFact * boxDiff[3]];
|
||||
} else { // use box as-is
|
||||
yxBox = await boxSlice.data();
|
||||
}
|
||||
const boxRaw: Box = [yxBox[1], yxBox[0], yxBox[3] - yxBox[1], yxBox[2] - yxBox[0]];
|
||||
const box: Box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
|
||||
tf.dispose(boxSlice);
|
||||
const scoreSlice = tf.slice(classScores[i], res, 1);
|
||||
const score = (await scoreSlice.data())[0];
|
||||
tf.dispose(scoreSlice);
|
||||
const hand: HandDetectResult = { id: id++, score, box, boxRaw, label: classes[i], yxBox };
|
||||
hands.push(hand);
|
||||
}
|
||||
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.max, config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
|
||||
const nms = await t.nms.data();
|
||||
const scores = await t.max.data();
|
||||
const classNum = await t.argmax.data();
|
||||
for (const nmsIndex of Array.from(nms)) { // generates results for each class
|
||||
const boxSlice = tf.slice(t.boxes, nmsIndex, 1);
|
||||
const boxData = await boxSlice.data();
|
||||
tf.dispose(boxSlice);
|
||||
const boxInput: Box = [boxData[1], boxData[0], boxData[3] - boxData[1], boxData[2] - boxData[0]];
|
||||
const boxRaw: Box = box.scale(boxInput, 1.2); // handtrack model returns tight box so we expand it a bit
|
||||
const boxFull: Box = [Math.trunc(boxRaw[0] * outputSize[0]), Math.trunc(boxRaw[1] * outputSize[1]), Math.trunc(boxRaw[2] * outputSize[0]), Math.trunc(boxRaw[3] * outputSize[1])];
|
||||
const score = scores[nmsIndex];
|
||||
const label = classes[classNum[nmsIndex]];
|
||||
const hand: HandDetectResult = { id: id++, score, box: boxFull, boxRaw, boxCrop: box.crop(boxRaw), label };
|
||||
hands.push(hand);
|
||||
}
|
||||
classScores.forEach((tensor) => tf.dispose(tensor));
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
hands.sort((a, b) => b.score - a.score);
|
||||
if (hands.length > (config.hand.maxDetected || 1)) hands.length = (config.hand.maxDetected || 1);
|
||||
|
@ -139,7 +132,7 @@ async function detectHands(input: Tensor, config: Config): Promise<HandDetectRes
|
|||
}
|
||||
|
||||
async function detectFingers(input: Tensor, h: HandDetectResult, config: Config): Promise<HandResult> {
|
||||
const hand: HandResult = {
|
||||
const hand: HandResult = { // initial values inherited from hand detect
|
||||
id: h.id,
|
||||
score: Math.round(100 * h.score) / 100,
|
||||
boxScore: Math.round(100 * h.score) / 100,
|
||||
|
@ -151,36 +144,27 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
|||
landmarks: {} as HandResult['landmarks'],
|
||||
annotations: {} as HandResult['annotations'],
|
||||
};
|
||||
if (input && models[1] && config.hand.landmarks) {
|
||||
if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) {
|
||||
const t: Record<string, Tensor> = {};
|
||||
if (!h.yxBox) return hand;
|
||||
t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
|
||||
t.crop = tf.image.cropAndResize(input, [box.crop(h.boxRaw)], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear');
|
||||
t.cast = tf.cast(t.crop, 'float32');
|
||||
t.div = tf.div(t.cast, 255);
|
||||
[t.score, t.keypoints] = models[1].execute(t.div) as Tensor[];
|
||||
// const score = Math.round(100 * (await t.score.data())[0] / 100);
|
||||
const rawScore = (await t.score.data())[0];
|
||||
const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value
|
||||
if (score >= (config.hand.minConfidence || 0)) {
|
||||
hand.fingerScore = score;
|
||||
t.reshaped = tf.reshape(t.keypoints, [-1, 3]);
|
||||
const rawCoords = await t.reshaped.array() as Point[];
|
||||
hand.keypoints = (rawCoords as Point[]).map((coord) => [
|
||||
(h.box[2] * coord[0] / inputSize[1][0]) + h.box[0],
|
||||
(h.box[3] * coord[1] / inputSize[1][1]) + h.box[1],
|
||||
(h.box[2] + h.box[3]) / 2 / inputSize[1][0] * (coord[2] || 0),
|
||||
hand.keypoints = (rawCoords as Point[]).map((kpt) => [
|
||||
outputSize[0] * ((h.boxCrop[3] - h.boxCrop[1]) * kpt[0] / inputSize[1][0] + h.boxCrop[1]),
|
||||
outputSize[1] * ((h.boxCrop[2] - h.boxCrop[0]) * kpt[1] / inputSize[1][1] + h.boxCrop[0]),
|
||||
(h.boxCrop[3] + h.boxCrop[3] / 2 * (kpt[2] || 0)),
|
||||
]);
|
||||
const updatedBox = scale(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints
|
||||
h.box = updatedBox.box;
|
||||
h.boxRaw = updatedBox.boxRaw;
|
||||
h.yxBox = updatedBox.yxBox;
|
||||
hand.box = h.box;
|
||||
hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks
|
||||
for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations
|
||||
hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null));
|
||||
}
|
||||
const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1));
|
||||
if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough
|
||||
}
|
||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||
}
|
||||
|
@ -188,22 +172,37 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
|
|||
}
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<HandResult[]> {
|
||||
if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model
|
||||
outputSize = [input.shape[2] || 0, input.shape[1] || 0];
|
||||
let hands: Array<HandResult> = [];
|
||||
cache.tmpBoxes = []; // clear temp cache
|
||||
if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache
|
||||
if (!config.skipFrame) cache.fingerBoxes = [];
|
||||
if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes
|
||||
skipped++;
|
||||
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
|
||||
} else { // calculate new boxes and run finger detection
|
||||
skipped = 0;
|
||||
hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache
|
||||
if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache
|
||||
cache.handBoxes = await detectHands(input, config);
|
||||
hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config)));
|
||||
}
|
||||
|
||||
skipped++; // increment skip frames
|
||||
if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) {
|
||||
return cache.hands; // return cached results without running anything
|
||||
}
|
||||
cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands
|
||||
return hands as HandResult[];
|
||||
return new Promise(async (resolve) => {
|
||||
skipped = 0;
|
||||
if (cache.boxes.length >= (config.hand.maxDetected || 0)) {
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache
|
||||
} else {
|
||||
cache.hands = []; // reset hands
|
||||
}
|
||||
|
||||
if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame
|
||||
cache.boxes = await detectHands(input, config);
|
||||
cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config)));
|
||||
}
|
||||
|
||||
const oldCache = [...cache.boxes];
|
||||
cache.boxes.length = 0; // reset cache
|
||||
for (let i = 0; i < cache.hands.length; i++) {
|
||||
const boxKpt = box.square(cache.hands[i].keypoints, outputSize);
|
||||
if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) {
|
||||
const boxScale = box.scale(boxKpt.box, boxExpandFact);
|
||||
const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact);
|
||||
const boxCrop = box.crop(boxScaleRaw);
|
||||
cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop });
|
||||
}
|
||||
}
|
||||
resolve(cache.hands);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -359,7 +359,7 @@ export class Human {
|
|||
* @returns result: {@link Result}
|
||||
*/
|
||||
next(result: Result = this.result): Result {
|
||||
return interpolate.calc(result) as Result;
|
||||
return interpolate.calc(result, this.config) as Result;
|
||||
}
|
||||
|
||||
/** Warmup method pre-initializes all configured models for faster inference
|
||||
|
|
|
@ -84,7 +84,7 @@ export interface BodyResult {
|
|||
score: number,
|
||||
box: Box,
|
||||
boxRaw: Box,
|
||||
annotations: Record<string, Point[][]>,
|
||||
annotations: Record<string, Array<Point[]>>,
|
||||
keypoints: Array<BodyKeypoint>
|
||||
}
|
||||
|
||||
|
|
|
@ -1,28 +1,32 @@
|
|||
import type { Box } from '../result';
|
||||
import type { Point, Box } from '../result';
|
||||
|
||||
// helper function: find box around keypoints, square it and scale it
|
||||
export function scale(keypoints, boxScaleFact, outputSize) {
|
||||
export function calc(keypoints: Array<Point>, outputSize: [number, number] = [1, 1]) {
|
||||
const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
|
||||
const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates
|
||||
const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers
|
||||
const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction
|
||||
const box = [
|
||||
Math.trunc(center[0] - diff),
|
||||
Math.trunc(center[1] - diff),
|
||||
Math.trunc(2 * diff),
|
||||
Math.trunc(2 * diff),
|
||||
] as Box;
|
||||
const boxRaw = [ // work backwards
|
||||
box[0] / outputSize[0],
|
||||
box[1] / outputSize[1],
|
||||
box[2] / outputSize[0],
|
||||
box[3] / outputSize[1],
|
||||
] as Box;
|
||||
const yxBox = [ // work backwards
|
||||
boxRaw[1],
|
||||
boxRaw[0],
|
||||
boxRaw[3] + boxRaw[1],
|
||||
boxRaw[2] + boxRaw[0],
|
||||
] as Box;
|
||||
return { box, boxRaw, yxBox };
|
||||
const min = [Math.min(...coords[0]), Math.min(...coords[1])];
|
||||
const max = [Math.max(...coords[0]), Math.max(...coords[1])];
|
||||
const box: Box = [min[0], min[1], max[0] - min[0], max[1] - min[1]];
|
||||
const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]];
|
||||
return { box, boxRaw };
|
||||
}
|
||||
|
||||
export function square(keypoints: Array<Point>, outputSize: [number, number] = [1, 1]) {
|
||||
const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
|
||||
const min = [Math.min(...coords[0]), Math.min(...coords[1])];
|
||||
const max = [Math.max(...coords[0]), Math.max(...coords[1])];
|
||||
const center = [(min[0] + max[0]) / 2, (min[1] + max[1]) / 2]; // find center x and y coord of all fingers
|
||||
const dist = Math.max(center[0] - min[0], center[1] - min[1], -center[0] + max[0], -center[1] + max[1]); // largest distance from center in any direction
|
||||
const box: Box = [Math.trunc(center[0] - dist), Math.trunc(center[1] - dist), Math.trunc(2 * dist), Math.trunc(2 * dist)];
|
||||
const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]];
|
||||
return { box, boxRaw };
|
||||
}
|
||||
|
||||
export function scale(box: Box, scaleFact: number) {
|
||||
const dist = [box[2] * (scaleFact - 1), box[3] * (scaleFact - 1)];
|
||||
const newBox: Box = [box[0] - dist[0] / 2, box[1] - dist[1] / 2, box[2] + dist[0], box[3] + dist[0]];
|
||||
return newBox;
|
||||
}
|
||||
|
||||
export function crop(box: Box) { // [y1, x1, y2, x2] clamped to 0..1
|
||||
const yxBox: Box = [Math.max(0, box[1]), Math.max(0, box[0]), Math.min(1, box[3] + box[1]), Math.min(1, box[2] + box[0])];
|
||||
return yxBox;
|
||||
}
|
||||
|
|
|
@ -3,10 +3,16 @@
|
|||
*/
|
||||
|
||||
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from '../result';
|
||||
import type { Config } from '../config';
|
||||
|
||||
import * as moveNetCoords from '../body/movenetcoords';
|
||||
import * as blazePoseCoords from '../body/blazeposecoords';
|
||||
import * as efficientPoseCoords from '../body/efficientposecoords';
|
||||
|
||||
const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
|
||||
|
||||
export function calc(newResult: Result): Result {
|
||||
export function calc(newResult: Result, config: Config): Result {
|
||||
const t0 = performance.now();
|
||||
if (!newResult) return { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };
|
||||
// each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself
|
||||
// otherwise bufferedResult is a shallow clone of result plus updated local calculated values
|
||||
|
@ -46,7 +52,22 @@ export function calc(newResult: Result): Result {
|
|||
bufferedResult.body[i].keypoints[j] ? ((bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].positionRaw[1] + keypoint.positionRaw[1]) / bufferedFactor : keypoint.position[1],
|
||||
],
|
||||
}))) as Array<{ score: number, part: string, position: [number, number, number?], positionRaw: [number, number, number?] }>;
|
||||
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values
|
||||
const annotations: Record<string, Point[][]> = {};
|
||||
|
||||
let coords = { connected: {} };
|
||||
if (config.body?.modelPath?.includes('efficientpose')) coords = efficientPoseCoords;
|
||||
else if (config.body?.modelPath?.includes('blazepose')) coords = blazePoseCoords;
|
||||
else if (config.body?.modelPath?.includes('movenet')) coords = moveNetCoords;
|
||||
for (const [name, indexes] of Object.entries(coords.connected as Record<string, string[]>)) {
|
||||
const pt: Array<Point[]> = [];
|
||||
for (let j = 0; j < indexes.length - 1; j++) {
|
||||
const pt0 = keypoints.find((kp) => kp.part === indexes[j]);
|
||||
const pt1 = keypoints.find((kp) => kp.part === indexes[j + 1]);
|
||||
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
|
||||
}
|
||||
annotations[name] = pt;
|
||||
}
|
||||
bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints, annotations: annotations as BodyResult['annotations'] }; // shallow clone plus updated values
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,12 +85,16 @@ export function calc(newResult: Result): Result {
|
|||
.map((landmark, j) => landmark
|
||||
.map((coord, k) => (((bufferedFactor - 1) * (bufferedResult.hand[i].keypoints[j][k] || 1) + (coord || 0)) / bufferedFactor)) as Point)
|
||||
: [];
|
||||
const annotations = {};
|
||||
if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them
|
||||
if (newResult.hand[i].annotations) {
|
||||
let annotations = {};
|
||||
if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) {
|
||||
bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them
|
||||
annotations = bufferedResult.hand[i].annotations;
|
||||
} else if (newResult.hand[i].annotations) {
|
||||
for (const key of Object.keys(newResult.hand[i].annotations)) { // update annotations
|
||||
annotations[key] = newResult.hand[i].annotations[key] && newResult.hand[i].annotations[key][0]
|
||||
? newResult.hand[i].annotations[key].map((val, j) => val.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor))
|
||||
? newResult.hand[i].annotations[key]
|
||||
.map((val, j) => val
|
||||
.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor))
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
@ -134,7 +159,10 @@ export function calc(newResult: Result): Result {
|
|||
|
||||
// just copy latest gestures without interpolation
|
||||
if (newResult.gesture) bufferedResult.gesture = newResult.gesture as GestureResult[];
|
||||
if (newResult.performance) bufferedResult.performance = newResult.performance;
|
||||
|
||||
// append interpolation performance data
|
||||
const t1 = performance.now();
|
||||
if (newResult.performance) bufferedResult.performance = { ...newResult.performance, interpolate: Math.round(t1 - t0) };
|
||||
|
||||
return bufferedResult;
|
||||
}
|
||||
|
|
7055
test/build.log
7055
test/build.log
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue