From bce1d6213538b720cfa9fc302978cc54f887080b Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Sun, 23 May 2021 13:52:49 -0400 Subject: [PATCH] implement experimental drawOptions.bufferedOutput and bufferedFactor --- demo/index.js | 13 +++++--- src/config.ts | 2 +- src/draw/draw.ts | 53 ++++++++++++++++++++++++++++-- src/efficientpose/efficientpose.ts | 45 +++++++++++++++++-------- src/handpose/handpipeline.ts | 1 + src/human.ts | 20 ++++++----- src/posenet/utils.ts | 2 +- src/result.ts | 6 ++-- 8 files changed, 107 insertions(+), 35 deletions(-) diff --git a/demo/index.js b/demo/index.js index b0d4ca51..979c9bd8 100644 --- a/demo/index.js +++ b/demo/index.js @@ -25,14 +25,19 @@ const userConfig = { description: { enabled: false }, emotion: { enabled: false }, }, - hand: { enabled: false }, - gesture: { enabled: false }, + hand: { enabled: true }, + gesture: { enabled: true }, body: { enabled: true, modelPath: 'posenet.json' }, // body: { enabled: true, modelPath: 'blazepose.json' }, object: { enabled: false }, */ }; +const drawOptions = { + bufferedOutput: true, // experimental feature that makes draw functions interpolate results between each detection for smoother movement + bufferedFactor: 3, // speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc. +}; + // ui options const ui = { // configurable items @@ -223,7 +228,7 @@ async function drawResults(input) { } // draw all results - human.draw.all(canvas, result); + human.draw.all(canvas, result, drawOptions); /* use individual functions human.draw.face(canvas, result.face); human.draw.body(canvas, result.body); @@ -643,7 +648,7 @@ async function drawWarmup(res) { canvas.height = res.canvas.height; const ctx = canvas.getContext('2d'); ctx.drawImage(res.canvas, 0, 0, res.canvas.width, res.canvas.height, 0, 0, canvas.width, canvas.height); - await human.draw.all(canvas, res); + await human.draw.all(canvas, res, drawOptions); } async function main() { diff --git a/src/config.ts b/src/config.ts index 6bc598ce..154e44ce 100644 --- a/src/config.ts +++ b/src/config.ts @@ -201,7 +201,7 @@ const config: Config = { // warmup pre-initializes all models for faster inference but can take // significant time on startup // only used for `webgl` and `humangl` backends - cacheSensitivity: 0.01, // cache sensitivity + cacheSensitivity: 0.75, // cache sensitivity // values 0..1 where 0.01 means reset cache if input changed more than 1% // set to 0 to disable caching filter: { // run input through image filters before inference diff --git a/src/draw/draw.ts b/src/draw/draw.ts index 4b7e02f1..3aa66811 100644 --- a/src/draw/draw.ts +++ b/src/draw/draw.ts @@ -21,6 +21,7 @@ import type { Result, Face, Body, Hand, Item, Gesture } from '../result'; * -useDepth: use z-axis coordinate as color shade, * -useCurves: draw polygons as cures or as lines, * -bufferedOutput: experimental: allows to call draw methods multiple times for each detection and interpolate results between results thus achieving smoother animations + * -bufferedFactor: speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc. * -useRawBoxes: Boolean: internal: use non-normalized coordinates when performing draw methods, */ export interface DrawOptions { @@ -40,6 +41,7 @@ export interface DrawOptions { useDepth: boolean, useCurves: boolean, bufferedOutput: boolean, + bufferedFactor: number, useRawBoxes: boolean, calculateHandBox: boolean, } @@ -60,12 +62,13 @@ export const options: DrawOptions = { fillPolygons: false, useDepth: true, useCurves: false, - bufferedOutput: false, // not yet implemented + bufferedFactor: 2, + bufferedOutput: false, useRawBoxes: false, calculateHandBox: true, }; -let bufferedResult: Result; +let bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], performance: {}, timestamp: 0 }; function point(ctx, x, y, z = 0, localOptions) { ctx.fillStyle = localOptions.useDepth && z ? `rgba(${127.5 + (2 * z)}, ${127.5 - (2 * z)}, 255, 0.3)` : localOptions.color; @@ -470,6 +473,50 @@ export async function object(inCanvas: HTMLCanvasElement, result: Array, d } } +function calcBuffered(newResult, localOptions) { + // if (newResult.timestamp !== bufferedResult?.timestamp) bufferedResult = JSON.parse(JSON.stringify(newResult)); // no need to force update + // each record is only updated using deep copy when number of detected record changes, otherwise it will converge by itself + + if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) bufferedResult.body = JSON.parse(JSON.stringify(newResult.body)); + for (let i = 0; i < newResult.body.length; i++) { // update body: box, boxRaw, keypoints + bufferedResult.body[i].box = newResult.body[i].box + .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + box) / localOptions.bufferedFactor) as [number, number, number, number]; + bufferedResult.body[i].boxRaw = newResult.body[i].boxRaw + .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + box) / localOptions.bufferedFactor) as [number, number, number, number]; + bufferedResult.body[i].keypoints = newResult.body[i].keypoints + .map((keypoint, j) => ({ + score: keypoint.score, + part: keypoint.part, + position: { + x: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.x + keypoint.position.x) / localOptions.bufferedFactor : keypoint.position.x, + y: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / localOptions.bufferedFactor : keypoint.position.y, + }, + })); + } + + if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand)); + for (let i = 0; i < newResult.hand.length; i++) { // update body: box, boxRaw, landmarks, annotations + bufferedResult.hand[i].box = newResult.hand[i].box + .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + box) / localOptions.bufferedFactor); + bufferedResult.hand[i].boxRaw = newResult.hand[i].boxRaw + .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + box) / localOptions.bufferedFactor); + bufferedResult.hand[i].landmarks = newResult.hand[i].landmarks + .map((landmark, j) => landmark + .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / localOptions.bufferedFactor)); + const keys = Object.keys(newResult.hand[i].annotations); + for (const key of keys) { + bufferedResult.hand[i].annotations[key] = newResult.hand[i].annotations[key] + .map((val, j) => val + .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / localOptions.bufferedFactor)); + } + } + + // no buffering implemented for face, object, gesture + bufferedResult.face = JSON.parse(JSON.stringify(newResult.face)); + bufferedResult.object = JSON.parse(JSON.stringify(newResult.object)); + bufferedResult.gesture = JSON.parse(JSON.stringify(newResult.gesture)); +} + export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasElement) { if (!inCanvas || !outCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return; @@ -482,7 +529,7 @@ export async function all(inCanvas: HTMLCanvasElement, result: Result, drawOptio if (!result || !inCanvas) return; if (!(inCanvas instanceof HTMLCanvasElement)) return; if (localOptions.bufferedOutput) { - if (result.timestamp !== bufferedResult?.timestamp) bufferedResult = result; + calcBuffered(result, localOptions); } else { bufferedResult = result; } diff --git a/src/efficientpose/efficientpose.ts b/src/efficientpose/efficientpose.ts index 87b198e0..50f12717 100644 --- a/src/efficientpose/efficientpose.ts +++ b/src/efficientpose/efficientpose.ts @@ -7,7 +7,10 @@ let model: GraphModel; type Keypoints = { score: number, part: string, position: { x: number, y: number }, positionRaw: { x: number, y: number } }; -let keypoints: Array = []; +const keypoints: Array = []; +let box: [number, number, number, number] = [0, 0, 0, 0]; +let boxRaw: [number, number, number, number] = [0, 0, 0, 0]; +let score = 0; let skipped = Number.MAX_SAFE_INTEGER; const bodyParts = ['head', 'neck', 'rightShoulder', 'rightElbow', 'rightWrist', 'chest', 'leftShoulder', 'leftElbow', 'leftWrist', 'pelvis', 'rightHip', 'rightKnee', 'rightAnkle', 'leftHip', 'leftKnee', 'leftAnkle']; @@ -31,23 +34,22 @@ function max2d(inputs, minScore) { // combine all data const reshaped = tf.reshape(inputs, [height * width]); // get highest score - const score = tf.max(reshaped, 0).dataSync()[0]; - if (score > minScore) { + const newScore = tf.max(reshaped, 0).dataSync()[0]; + if (newScore > minScore) { // skip coordinate calculation is score is too low const coords = tf.argMax(reshaped, 0); const x = mod(coords, width).dataSync()[0]; const y = tf.div(coords, tf.scalar(width, 'int32')).dataSync()[0]; - return [x, y, score]; + return [x, y, newScore]; } - return [0, 0, score]; + return [0, 0, newScore]; }); } export async function predict(image, config): Promise { if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) { skipped++; - const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); - return [{ id: 0, score, keypoints }]; + return [{ id: 0, score, box, boxRaw, keypoints }]; } skipped = 0; return new Promise(async (resolve) => { @@ -64,7 +66,7 @@ export async function predict(image, config): Promise { tensor.dispose(); if (resT) { - const parts: Array = []; + keypoints.length = 0; const squeeze = resT.squeeze(); tf.dispose(resT); // body parts are basically just a stack of 2d tensors @@ -73,10 +75,10 @@ export async function predict(image, config): Promise { // process each unstacked tensor as a separate body part for (let id = 0; id < stack.length; id++) { // actual processing to get coordinates and score - const [x, y, score] = max2d(stack[id], config.body.minConfidence); + const [x, y, partScore] = max2d(stack[id], config.body.minConfidence); if (score > config.body.minConfidence) { - parts.push({ - score: Math.round(100 * score) / 100, + keypoints.push({ + score: Math.round(100 * partScore) / 100, part: bodyParts[id], positionRaw: { // normalized to 0..1 // @ts-ignore model is not undefined here @@ -90,9 +92,24 @@ export async function predict(image, config): Promise { } } stack.forEach((s) => tf.dispose(s)); - keypoints = parts; } - const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); - resolve([{ id: 0, score, keypoints }]); + score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); + const x = keypoints.map((a) => a.position.x); + const y = keypoints.map((a) => a.position.x); + box = [ + Math.min(...x), + Math.min(...y), + Math.max(...x) - Math.min(...x), + Math.max(...y) - Math.min(...x), + ]; + const xRaw = keypoints.map((a) => a.positionRaw.x); + const yRaw = keypoints.map((a) => a.positionRaw.x); + boxRaw = [ + Math.min(...xRaw), + Math.min(...yRaw), + Math.max(...xRaw) - Math.min(...xRaw), + Math.max(...yRaw) - Math.min(...xRaw), + ]; + resolve([{ id: 0, score, box, boxRaw, keypoints }]); }); } diff --git a/src/handpose/handpipeline.ts b/src/handpose/handpipeline.ts index 269deaf6..6a44211d 100644 --- a/src/handpose/handpipeline.ts +++ b/src/handpose/handpipeline.ts @@ -85,6 +85,7 @@ export class HandPipeline { // run new detector every skipFrames unless we only want box to start with let boxes; + // console.log(this.skipped, config.hand.skipFrames, !config.hand.landmarks, !config.skipFrame); if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.skipFrame) { boxes = await this.handDetector.estimateHandBounds(image, config); this.skipped = 0; diff --git a/src/human.ts b/src/human.ts index d67de042..28731ed0 100644 --- a/src/human.ts +++ b/src/human.ts @@ -20,6 +20,7 @@ import * as sample from './sample'; import * as app from '../package.json'; import { Tensor } from './tfjs/types'; +// export types export type { Config } from './config'; export type { Result, Face, Hand, Body, Item, Gesture } from './result'; export type { DrawOptions } from './draw/draw'; @@ -355,26 +356,27 @@ export class Human { /** @hidden */ #skipFrame = async (input) => { if (this.config.cacheSensitivity === 0) return false; - const resizeFact = 40; - const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]); + const resizeFact = 32; + const reduced: Tensor = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]); // use tensor sum + /* const sumT = this.tf.sum(reduced); const sum = sumT.dataSync()[0] as number; sumT.dispose(); - // use js loop sum - /* + */ + // use js loop sum, faster than uploading tensor to gpu calculating and downloading back const reducedData = reduced.dataSync(); let sum = 0; - for (let i = 0; i < reducedData.length; i++) sum += reducedData[i]; - */ + for (let i = 0; i < reducedData.length / 3; i++) sum += reducedData[3 * i + 2]; // look only at green value as each pixel is rgb number triplet + reduced.dispose(); - const diff = Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1; + const diff = 100 * (Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1); this.#lastInputSum = sum; // if previous frame was skipped, skip this frame if changed more than cacheSensitivity // if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily const skipFrame = diff < Math.max(this.config.cacheSensitivity, this.#lastCacheDiff); - // if difference is above 4x threshold, don't use last value to force reset cache for significant change of scenes or images - this.#lastCacheDiff = diff > 4 * this.config.cacheSensitivity ? 0 : diff; + // if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images + this.#lastCacheDiff = diff > 10 * this.config.cacheSensitivity ? 0 : diff; return skipFrame; } diff --git a/src/posenet/utils.ts b/src/posenet/utils.ts index aea0092a..d1aa841e 100644 --- a/src/posenet/utils.ts +++ b/src/posenet/utils.ts @@ -35,7 +35,7 @@ export function scalePoses(poses, [height, width], [inputResolutionHeight, input const scalePose = (pose, i) => ({ id: i, score: pose.score, - bowRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight], + boxRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight], box: [Math.trunc(pose.box[0] * scaleX), Math.trunc(pose.box[1] * scaleY), Math.trunc(pose.box[2] * scaleX), Math.trunc(pose.box[3] * scaleY)], keypoints: pose.keypoints.map(({ score, part, position }) => ({ score, diff --git a/src/result.ts b/src/result.ts index 8e54fd71..2c222391 100644 --- a/src/result.ts +++ b/src/result.ts @@ -73,8 +73,8 @@ export interface Face { export interface Body { id: number, score: number, - box?: [x: number, y: number, width: number, height: number], - boxRaw?: [x: number, y: number, width: number, height: number], + box: [x: number, y: number, width: number, height: number], + boxRaw: [x: number, y: number, width: number, height: number], keypoints: Array<{ part: string, position: { x: number, y: number, z?: number }, @@ -150,6 +150,6 @@ export interface Result { /** {@link Object}: detection & analysis results */ object: Array performance: Record, - canvas: OffscreenCanvas | HTMLCanvasElement, + canvas?: OffscreenCanvas | HTMLCanvasElement, timestamp: number, }