diff --git a/demo/index.js b/demo/index.js index 0b82179d..cfdb14f1 100644 --- a/demo/index.js +++ b/demo/index.js @@ -277,12 +277,10 @@ async function drawResults(input) { } // draw all results using interpolated results - if (ui.interpolated) { - const interpolated = human.next(result); - human.draw.all(canvas, interpolated, drawOptions); - } else { - human.draw.all(canvas, result, drawOptions); - } + let interpolated; + if (ui.interpolated) interpolated = human.next(result); + else interpolated = result; + human.draw.all(canvas, interpolated, drawOptions); // show tree with results if (ui.results) { @@ -315,7 +313,7 @@ async function drawResults(input) { document.getElementById('log').innerHTML = ` video: ${ui.camera.name} | facing: ${ui.camera.facing} | screen: ${window.innerWidth} x ${window.innerHeight} camera: ${ui.camera.width} x ${ui.camera.height} ${processing}
backend: ${backend}
- performance: ${str(lastDetectedResult.performance)}ms ${fps}
+ performance: ${str(interpolated.performance)}ms ${fps}
${warning}
`; ui.framesDraw++; diff --git a/src/body/blazeposecoords.ts b/src/body/blazeposecoords.ts index 721a7e8f..428ffde9 100644 --- a/src/body/blazeposecoords.ts +++ b/src/body/blazeposecoords.ts @@ -1,6 +1,6 @@ /* eslint-disable no-multi-spaces */ -export const kpt = [ +export const kpt: Array = [ 'nose', // 0 'leftEyeInside', // 1 'leftEye', // 2 @@ -42,7 +42,7 @@ export const kpt = [ 'rightHand', // 38 ]; -export const connected = { +export const connected: Record = { leftLeg: ['leftHip', 'leftKnee', 'leftAnkle', 'leftHeel', 'leftFoot'], rightLeg: ['rightHip', 'rightKnee', 'rightAnkle', 'rightHeel', 'rightFoot'], torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], diff --git a/src/body/efficientposecoords.ts b/src/body/efficientposecoords.ts index d6e66644..9f707b24 100644 --- a/src/body/efficientposecoords.ts +++ b/src/body/efficientposecoords.ts @@ -1,4 +1,4 @@ -export const kpt = [ +export const kpt: Array = [ 'head', 'neck', 'rightShoulder', @@ -17,7 +17,7 @@ export const kpt = [ 'leftAnkle', ]; -export const connected = { +export const connected: Record = { leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], diff --git a/src/body/movenet.ts b/src/body/movenet.ts index 24a6eb87..86a96411 100644 --- a/src/body/movenet.ts +++ b/src/body/movenet.ts @@ -5,7 +5,7 @@ */ import { log, join } from '../util/util'; -import { scale } from '../util/box'; +import * as box from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; import * as coords from './movenetcoords'; import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; @@ -16,7 +16,15 @@ import { env } from '../util/env'; let model: GraphModel | null; let inputSize = 0; -const cachedBoxes: Array = []; +const boxExpandFact = 1.5; // increase to 150% + +const cache: { + boxes: Array, + bodies: Array; +} = { + boxes: [], + bodies: [], +}; let skipped = Number.MAX_SAFE_INTEGER; const keypoints: Array = []; @@ -34,26 +42,6 @@ export async function load(config: Config): Promise { return model; } -function createBox(points): [Box, Box] { - const x = points.map((a) => a.position[0]); - const y = points.map((a) => a.position[1]); - const box: Box = [ - Math.min(...x), - Math.min(...y), - Math.max(...x) - Math.min(...x), - Math.max(...y) - Math.min(...y), - ]; - const xRaw = points.map((a) => a.positionRaw[0]); - const yRaw = points.map((a) => a.positionRaw[1]); - const boxRaw: Box = [ - Math.min(...xRaw), - Math.min(...yRaw), - Math.max(...xRaw) - Math.min(...xRaw), - Math.max(...yRaw) - Math.min(...yRaw), - ]; - return [box, boxRaw]; -} - async function parseSinglePose(res, config, image, inputBox) { const kpt = res[0][0]; keypoints.length = 0; @@ -78,7 +66,7 @@ async function parseSinglePose(res, config, image, inputBox) { } score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); const bodies: Array = []; - const [box, boxRaw] = createBox(keypoints); + const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); const annotations: Record = {}; for (const [name, indexes] of Object.entries(coords.connected)) { const pt: Array = []; @@ -89,7 +77,7 @@ async function parseSinglePose(res, config, image, inputBox) { } annotations[name] = pt; } - bodies.push({ id: 0, score, box, boxRaw, keypoints, annotations }); + bodies.push({ id: 0, score, box: newBox.box, boxRaw: newBox.boxRaw, keypoints, annotations }); return bodies; } @@ -111,14 +99,11 @@ async function parseMultiPose(res, config, image, inputBox) { part: coords.kpt[i], score: Math.round(100 * score) / 100, positionRaw, - position: [ - Math.round((image.shape[2] || 0) * positionRaw[0]), - Math.round((image.shape[1] || 0) * positionRaw[1]), - ], + position: [Math.round((image.shape[2] || 0) * positionRaw[0]), Math.round((image.shape[1] || 0) * positionRaw[1])], }); } } - const [box, boxRaw] = createBox(keypoints); + const newBox = box.calc(keypoints.map((pt) => pt.position), [image.shape[2], image.shape[1]]); // movenet-multipose has built-in box details // const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; // const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))]; @@ -132,7 +117,7 @@ async function parseMultiPose(res, config, image, inputBox) { } annotations[name] = pt; } - bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints], annotations }); + bodies.push({ id, score: totalScore, box: newBox.box, boxRaw: newBox.boxRaw, keypoints: [...keypoints], annotations }); } } bodies.sort((a, b) => b.score - a.score); @@ -141,46 +126,44 @@ async function parseMultiPose(res, config, image, inputBox) { } export async function predict(input: Tensor, config: Config): Promise { - if (!model || !model?.inputs[0].shape) return []; + if (!model || !model?.inputs[0].shape) return []; // something is wrong with the model + if (!config.skipFrame) cache.boxes.length = 0; // allowed to use cache or not + skipped++; // increment skip frames + if (config.skipFrame && (skipped <= (config.body.skipFrames || 0))) { + return cache.bodies; // return cached results without running anything + } return new Promise(async (resolve) => { const t: Record = {}; - - let bodies: Array = []; - - if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not - skipped++; - - for (let i = 0; i < cachedBoxes.length; i++) { // run detection based on cached boxes - t.crop = tf.image.cropAndResize(input, [cachedBoxes[i]], [0], [inputSize, inputSize], 'bilinear'); - t.cast = tf.cast(t.crop, 'int32'); - t.res = await model?.predict(t.cast) as Tensor; - const res = await t.res.array(); - const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cachedBoxes[i]) : await parseMultiPose(res, config, input, cachedBoxes[i]); - bodies = bodies.concat(newBodies); - Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + skipped = 0; + cache.bodies = []; // reset bodies result + if (cache.boxes.length >= (config.body.maxDetected || 0)) { // if we have enough cached boxes run detection using cache + for (let i = 0; i < cache.boxes.length; i++) { // run detection based on cached boxes + t.crop = tf.image.cropAndResize(input, [cache.boxes[i]], [0], [inputSize, inputSize], 'bilinear'); + t.cast = tf.cast(t.crop, 'int32'); + t.res = await model?.predict(t.cast) as Tensor; + const res = await t.res.array(); + const newBodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, cache.boxes[i]) : await parseMultiPose(res, config, input, cache.boxes[i]); + cache.bodies = cache.bodies.concat(newBodies); + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + } } - - if ((bodies.length !== config.body.maxDetected) && (skipped > (config.body.skipFrames || 0))) { // run detection on full frame + if (cache.bodies.length !== config.body.maxDetected) { // did not find enough bodies based on cached boxes so run detection on full frame t.resized = tf.image.resizeBilinear(input, [inputSize, inputSize], false); t.cast = tf.cast(t.resized, 'int32'); t.res = await model?.predict(t.cast) as Tensor; const res = await t.res.array(); - bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]); + cache.bodies = (t.res.shape[2] === 17) ? await parseSinglePose(res, config, input, [0, 0, 1, 1]) : await parseMultiPose(res, config, input, [0, 0, 1, 1]); + // cache.bodies = cache.bodies.map((body) => ({ ...body, box: box.scale(body.box, 0.5) })); Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); - cachedBoxes.length = 0; // reset cache - skipped = 0; } - - if (config.skipFrame) { // create box cache based on last detections - cachedBoxes.length = 0; - for (let i = 0; i < bodies.length; i++) { - if (bodies[i].keypoints.length > 10) { // only update cache if we detected sufficient number of keypoints - const kpts = bodies[i].keypoints.map((kpt) => kpt.position); - const newBox = scale(kpts, 1.5, [input.shape[2], input.shape[1]]); - cachedBoxes.push([...newBox.yxBox]); - } + cache.boxes.length = 0; // reset cache + for (let i = 0; i < cache.bodies.length; i++) { + if (cache.bodies[i].keypoints.length > (coords.kpt.length / 2)) { // only update cache if we detected at least half keypoints + const scaledBox = box.scale(cache.bodies[i].boxRaw, boxExpandFact); + const cropBox = box.crop(scaledBox); + cache.boxes.push(cropBox); } } - resolve(bodies); + resolve(cache.bodies); }); } diff --git a/src/body/movenetcoords.ts b/src/body/movenetcoords.ts index 9c881c2e..8a8fc0a9 100644 --- a/src/body/movenetcoords.ts +++ b/src/body/movenetcoords.ts @@ -1,4 +1,4 @@ -export const kpt = [ +export const kpt: Array = [ 'nose', 'leftEye', 'rightEye', @@ -18,7 +18,7 @@ export const kpt = [ 'rightAnkle', ]; -export const connected = { +export const connected: Record = { leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], diff --git a/src/config.ts b/src/config.ts index ae66be17..f1dd6d75 100644 --- a/src/config.ts +++ b/src/config.ts @@ -420,12 +420,12 @@ const config: Config = { rotation: true, // use best-guess rotated hand image or just box with rotation as-is // false means higher performance, but incorrect finger mapping if hand is inverted // only valid for `handdetect` variation - skipFrames: 14, // how many max frames to go without re-running the hand bounding box detector + skipFrames: 1, // how many max frames to go without re-running the hand bounding box detector // only used when cacheSensitivity is not zero // e.g., if model is running st 25 FPS, we can re-use existing bounding // box for updated hand skeleton analysis as the hand // hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.5, // threshold for discarding a prediction + minConfidence: 0.55, // threshold for discarding a prediction iouThreshold: 0.2, // ammount of overlap between two detected objects before one object is removed maxDetected: -1, // maximum number of hands detected in the input // should be set to the minimum number for performance diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index 4bc7c6de..75158426 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -7,7 +7,7 @@ */ import { log, join } from '../util/util'; -import { scale } from '../util/box'; +import * as box from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; import type { HandResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; @@ -16,7 +16,6 @@ import { env } from '../util/env'; import * as fingerPose from './fingerpose'; import { fakeOps } from '../tfjs/backend'; -const boxScaleFact = 1.5; // hand finger model prefers slighly larger box const models: [GraphModel | null, GraphModel | null] = [null, null]; const modelOutputNodes = ['StatefulPartitionedCall/Postprocessor/Slice', 'StatefulPartitionedCall/Postprocessor/ExpandDims_1']; @@ -24,26 +23,26 @@ const inputSize = [[0, 0], [0, 0]]; const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip']; +const boxExpandFact = 1.6; // increase to 160% + let skipped = 0; -let outputSize: Point = [0, 0]; +let outputSize: [number, number] = [0, 0]; type HandDetectResult = { id: number, score: number, box: Box, boxRaw: Box, + boxCrop: Box, label: string, - yxBox: Box, } const cache: { - handBoxes: Array, - fingerBoxes: Array - tmpBoxes: Array + boxes: Array, + hands: Array; } = { - handBoxes: [], - fingerBoxes: [], - tmpBoxes: [], + boxes: [], + hands: [], }; const fingerMap = { @@ -103,35 +102,29 @@ async function detectHands(input: Tensor, config: Config): Promise tf.dispose(tensor)); Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); hands.sort((a, b) => b.score - a.score); if (hands.length > (config.hand.maxDetected || 1)) hands.length = (config.hand.maxDetected || 1); @@ -139,7 +132,7 @@ async function detectHands(input: Tensor, config: Config): Promise { - const hand: HandResult = { + const hand: HandResult = { // initial values inherited from hand detect id: h.id, score: Math.round(100 * h.score) / 100, boxScore: Math.round(100 * h.score) / 100, @@ -151,36 +144,27 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) landmarks: {} as HandResult['landmarks'], annotations: {} as HandResult['annotations'], }; - if (input && models[1] && config.hand.landmarks) { + if (input && models[1] && config.hand.landmarks && h.score > (config.hand.minConfidence || 0)) { const t: Record = {}; - if (!h.yxBox) return hand; - t.crop = tf.image.cropAndResize(input, [h.yxBox], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); + t.crop = tf.image.cropAndResize(input, [box.crop(h.boxRaw)], [0], [inputSize[1][0], inputSize[1][1]], 'bilinear'); t.cast = tf.cast(t.crop, 'float32'); t.div = tf.div(t.cast, 255); [t.score, t.keypoints] = models[1].execute(t.div) as Tensor[]; - // const score = Math.round(100 * (await t.score.data())[0] / 100); const rawScore = (await t.score.data())[0]; const score = (100 - Math.trunc(100 / (1 + Math.exp(rawScore)))) / 100; // reverse sigmoid value if (score >= (config.hand.minConfidence || 0)) { hand.fingerScore = score; t.reshaped = tf.reshape(t.keypoints, [-1, 3]); const rawCoords = await t.reshaped.array() as Point[]; - hand.keypoints = (rawCoords as Point[]).map((coord) => [ - (h.box[2] * coord[0] / inputSize[1][0]) + h.box[0], - (h.box[3] * coord[1] / inputSize[1][1]) + h.box[1], - (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * (coord[2] || 0), + hand.keypoints = (rawCoords as Point[]).map((kpt) => [ + outputSize[0] * ((h.boxCrop[3] - h.boxCrop[1]) * kpt[0] / inputSize[1][0] + h.boxCrop[1]), + outputSize[1] * ((h.boxCrop[2] - h.boxCrop[0]) * kpt[1] / inputSize[1][1] + h.boxCrop[0]), + (h.boxCrop[3] + h.boxCrop[3] / 2 * (kpt[2] || 0)), ]); - const updatedBox = scale(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints - h.box = updatedBox.box; - h.boxRaw = updatedBox.boxRaw; - h.yxBox = updatedBox.yxBox; - hand.box = h.box; hand.landmarks = fingerPose.analyze(hand.keypoints) as HandResult['landmarks']; // calculate finger landmarks for (const key of Object.keys(fingerMap)) { // map keypoints to per-finger annotations hand.annotations[key] = fingerMap[key].map((index) => (hand.landmarks && hand.keypoints[index] ? hand.keypoints[index] : null)); } - const ratioBoxFrame = Math.min(h.box[2] / (input.shape[2] || 1), h.box[3] / (input.shape[1] || 1)); - if (ratioBoxFrame > 0.05) cache.tmpBoxes.push(h); // if finger detection is enabled, only update cache if fingers are detected and box is big enough } Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); } @@ -188,22 +172,37 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) } export async function predict(input: Tensor, config: Config): Promise { + if (!models[0] || !models[1] || !models[0]?.inputs[0].shape || !models[1]?.inputs[0].shape) return []; // something is wrong with the model outputSize = [input.shape[2] || 0, input.shape[1] || 0]; - let hands: Array = []; - cache.tmpBoxes = []; // clear temp cache - if (!config.hand.landmarks) cache.fingerBoxes = cache.handBoxes; // if hand detection only reset finger boxes cache - if (!config.skipFrame) cache.fingerBoxes = []; - if ((skipped < (config.hand.skipFrames || 0)) && config.skipFrame) { // just run finger detection while reusing cached boxes - skipped++; - hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache - } else { // calculate new boxes and run finger detection - skipped = 0; - hands = await Promise.all(cache.fingerBoxes.map((hand) => detectFingers(input, hand, config))); // run from finger box cache - if (hands.length !== config.hand.maxDetected) { // re-run with hand detection only if we dont have enough hands in cache - cache.handBoxes = await detectHands(input, config); - hands = await Promise.all(cache.handBoxes.map((hand) => detectFingers(input, hand, config))); - } + + skipped++; // increment skip frames + if (config.skipFrame && (skipped <= (config.hand.skipFrames || 0))) { + return cache.hands; // return cached results without running anything } - cache.fingerBoxes = [...cache.tmpBoxes]; // repopulate cache with validated hands - return hands as HandResult[]; + return new Promise(async (resolve) => { + skipped = 0; + if (cache.boxes.length >= (config.hand.maxDetected || 0)) { + cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); // if we have enough cached boxes run detection using cache + } else { + cache.hands = []; // reset hands + } + + if (cache.hands.length !== config.hand.maxDetected) { // did not find enough hands based on cached boxes so run detection on full frame + cache.boxes = await detectHands(input, config); + cache.hands = await Promise.all(cache.boxes.map((handBox) => detectFingers(input, handBox, config))); + } + + const oldCache = [...cache.boxes]; + cache.boxes.length = 0; // reset cache + for (let i = 0; i < cache.hands.length; i++) { + const boxKpt = box.square(cache.hands[i].keypoints, outputSize); + if (boxKpt.box[2] / (input.shape[2] || 1) > 0.05 && boxKpt.box[3] / (input.shape[1] || 1) > 0.05 && cache.hands[i].fingerScore && cache.hands[i].fingerScore > (config.hand.minConfidence || 0)) { + const boxScale = box.scale(boxKpt.box, boxExpandFact); + const boxScaleRaw = box.scale(boxKpt.boxRaw, boxExpandFact); + const boxCrop = box.crop(boxScaleRaw); + cache.boxes.push({ ...oldCache[i], box: boxScale, boxRaw: boxScaleRaw, boxCrop }); + } + } + resolve(cache.hands); + }); } diff --git a/src/human.ts b/src/human.ts index 33cd3ee5..382416f0 100644 --- a/src/human.ts +++ b/src/human.ts @@ -359,7 +359,7 @@ export class Human { * @returns result: {@link Result} */ next(result: Result = this.result): Result { - return interpolate.calc(result) as Result; + return interpolate.calc(result, this.config) as Result; } /** Warmup method pre-initializes all configured models for faster inference diff --git a/src/result.ts b/src/result.ts index 4dfc16e7..fc75c759 100644 --- a/src/result.ts +++ b/src/result.ts @@ -84,7 +84,7 @@ export interface BodyResult { score: number, box: Box, boxRaw: Box, - annotations: Record, + annotations: Record>, keypoints: Array } diff --git a/src/util/box.ts b/src/util/box.ts index c6054f55..ed081e2f 100644 --- a/src/util/box.ts +++ b/src/util/box.ts @@ -1,28 +1,32 @@ -import type { Box } from '../result'; +import type { Point, Box } from '../result'; -// helper function: find box around keypoints, square it and scale it -export function scale(keypoints, boxScaleFact, outputSize) { +export function calc(keypoints: Array, outputSize: [number, number] = [1, 1]) { const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords - const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates - const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers - const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction - const box = [ - Math.trunc(center[0] - diff), - Math.trunc(center[1] - diff), - Math.trunc(2 * diff), - Math.trunc(2 * diff), - ] as Box; - const boxRaw = [ // work backwards - box[0] / outputSize[0], - box[1] / outputSize[1], - box[2] / outputSize[0], - box[3] / outputSize[1], - ] as Box; - const yxBox = [ // work backwards - boxRaw[1], - boxRaw[0], - boxRaw[3] + boxRaw[1], - boxRaw[2] + boxRaw[0], - ] as Box; - return { box, boxRaw, yxBox }; + const min = [Math.min(...coords[0]), Math.min(...coords[1])]; + const max = [Math.max(...coords[0]), Math.max(...coords[1])]; + const box: Box = [min[0], min[1], max[0] - min[0], max[1] - min[1]]; + const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]]; + return { box, boxRaw }; +} + +export function square(keypoints: Array, outputSize: [number, number] = [1, 1]) { + const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords + const min = [Math.min(...coords[0]), Math.min(...coords[1])]; + const max = [Math.max(...coords[0]), Math.max(...coords[1])]; + const center = [(min[0] + max[0]) / 2, (min[1] + max[1]) / 2]; // find center x and y coord of all fingers + const dist = Math.max(center[0] - min[0], center[1] - min[1], -center[0] + max[0], -center[1] + max[1]); // largest distance from center in any direction + const box: Box = [Math.trunc(center[0] - dist), Math.trunc(center[1] - dist), Math.trunc(2 * dist), Math.trunc(2 * dist)]; + const boxRaw: Box = [box[0] / outputSize[0], box[1] / outputSize[1], box[2] / outputSize[0], box[3] / outputSize[1]]; + return { box, boxRaw }; +} + +export function scale(box: Box, scaleFact: number) { + const dist = [box[2] * (scaleFact - 1), box[3] * (scaleFact - 1)]; + const newBox: Box = [box[0] - dist[0] / 2, box[1] - dist[1] / 2, box[2] + dist[0], box[3] + dist[0]]; + return newBox; +} + +export function crop(box: Box) { // [y1, x1, y2, x2] clamped to 0..1 + const yxBox: Box = [Math.max(0, box[1]), Math.max(0, box[0]), Math.min(1, box[3] + box[1]), Math.min(1, box[2] + box[0])]; + return yxBox; } diff --git a/src/util/interpolate.ts b/src/util/interpolate.ts index 903b3b48..f89610da 100644 --- a/src/util/interpolate.ts +++ b/src/util/interpolate.ts @@ -3,10 +3,16 @@ */ import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from '../result'; +import type { Config } from '../config'; + +import * as moveNetCoords from '../body/movenetcoords'; +import * as blazePoseCoords from '../body/blazeposecoords'; +import * as efficientPoseCoords from '../body/efficientposecoords'; const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 }; -export function calc(newResult: Result): Result { +export function calc(newResult: Result, config: Config): Result { + const t0 = performance.now(); if (!newResult) return { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 }; // each record is only updated using deep clone when number of detected record changes, otherwise it will converge by itself // otherwise bufferedResult is a shallow clone of result plus updated local calculated values @@ -46,7 +52,22 @@ export function calc(newResult: Result): Result { bufferedResult.body[i].keypoints[j] ? ((bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].positionRaw[1] + keypoint.positionRaw[1]) / bufferedFactor : keypoint.position[1], ], }))) as Array<{ score: number, part: string, position: [number, number, number?], positionRaw: [number, number, number?] }>; - bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints }; // shallow clone plus updated values + const annotations: Record = {}; + + let coords = { connected: {} }; + if (config.body?.modelPath?.includes('efficientpose')) coords = efficientPoseCoords; + else if (config.body?.modelPath?.includes('blazepose')) coords = blazePoseCoords; + else if (config.body?.modelPath?.includes('movenet')) coords = moveNetCoords; + for (const [name, indexes] of Object.entries(coords.connected as Record)) { + const pt: Array = []; + for (let j = 0; j < indexes.length - 1; j++) { + const pt0 = keypoints.find((kp) => kp.part === indexes[j]); + const pt1 = keypoints.find((kp) => kp.part === indexes[j + 1]); + if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); + } + annotations[name] = pt; + } + bufferedResult.body[i] = { ...newResult.body[i], box, boxRaw, keypoints, annotations: annotations as BodyResult['annotations'] }; // shallow clone plus updated values } } @@ -64,12 +85,16 @@ export function calc(newResult: Result): Result { .map((landmark, j) => landmark .map((coord, k) => (((bufferedFactor - 1) * (bufferedResult.hand[i].keypoints[j][k] || 1) + (coord || 0)) / bufferedFactor)) as Point) : []; - const annotations = {}; - if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them - if (newResult.hand[i].annotations) { + let annotations = {}; + if (Object.keys(bufferedResult.hand[i].annotations).length !== Object.keys(newResult.hand[i].annotations).length) { + bufferedResult.hand[i].annotations = newResult.hand[i].annotations; // reset annotations as previous frame did not have them + annotations = bufferedResult.hand[i].annotations; + } else if (newResult.hand[i].annotations) { for (const key of Object.keys(newResult.hand[i].annotations)) { // update annotations annotations[key] = newResult.hand[i].annotations[key] && newResult.hand[i].annotations[key][0] - ? newResult.hand[i].annotations[key].map((val, j) => val.map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor)) + ? newResult.hand[i].annotations[key] + .map((val, j) => val + .map((coord, k) => ((bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / bufferedFactor)) : null; } } @@ -134,7 +159,10 @@ export function calc(newResult: Result): Result { // just copy latest gestures without interpolation if (newResult.gesture) bufferedResult.gesture = newResult.gesture as GestureResult[]; - if (newResult.performance) bufferedResult.performance = newResult.performance; + + // append interpolation performance data + const t1 = performance.now(); + if (newResult.performance) bufferedResult.performance = { ...newResult.performance, interpolate: Math.round(t1 - t0) }; return bufferedResult; }