mirror of https://github.com/vladmandic/human
implement experimental drawOptions.bufferedOutput and bufferedFactor
parent
f0739716e2
commit
bce1d62135
|
@ -25,14 +25,19 @@ const userConfig = {
|
|||
description: { enabled: false },
|
||||
emotion: { enabled: false },
|
||||
},
|
||||
hand: { enabled: false },
|
||||
gesture: { enabled: false },
|
||||
hand: { enabled: true },
|
||||
gesture: { enabled: true },
|
||||
body: { enabled: true, modelPath: 'posenet.json' },
|
||||
// body: { enabled: true, modelPath: 'blazepose.json' },
|
||||
object: { enabled: false },
|
||||
*/
|
||||
};
|
||||
|
||||
const drawOptions = {
|
||||
bufferedOutput: true, // experimental feature that makes draw functions interpolate results between each detection for smoother movement
|
||||
bufferedFactor: 3, // speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
|
||||
};
|
||||
|
||||
// ui options
|
||||
const ui = {
|
||||
// configurable items
|
||||
|
@ -223,7 +228,7 @@ async function drawResults(input) {
|
|||
}
|
||||
|
||||
// draw all results
|
||||
human.draw.all(canvas, result);
|
||||
human.draw.all(canvas, result, drawOptions);
|
||||
/* use individual functions
|
||||
human.draw.face(canvas, result.face);
|
||||
human.draw.body(canvas, result.body);
|
||||
|
@ -643,7 +648,7 @@ async function drawWarmup(res) {
|
|||
canvas.height = res.canvas.height;
|
||||
const ctx = canvas.getContext('2d');
|
||||
ctx.drawImage(res.canvas, 0, 0, res.canvas.width, res.canvas.height, 0, 0, canvas.width, canvas.height);
|
||||
await human.draw.all(canvas, res);
|
||||
await human.draw.all(canvas, res, drawOptions);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
|
|
|
@ -201,7 +201,7 @@ const config: Config = {
|
|||
// warmup pre-initializes all models for faster inference but can take
|
||||
// significant time on startup
|
||||
// only used for `webgl` and `humangl` backends
|
||||
cacheSensitivity: 0.01, // cache sensitivity
|
||||
cacheSensitivity: 0.75, // cache sensitivity
|
||||
// values 0..1 where 0.01 means reset cache if input changed more than 1%
|
||||
// set to 0 to disable caching
|
||||
filter: { // run input through image filters before inference
|
||||
|
|
|
@ -21,6 +21,7 @@ import type { Result, Face, Body, Hand, Item, Gesture } from '../result';
|
|||
* -useDepth: use z-axis coordinate as color shade,
|
||||
* -useCurves: draw polygons as cures or as lines,
|
||||
* -bufferedOutput: experimental: allows to call draw methods multiple times for each detection and interpolate results between results thus achieving smoother animations
|
||||
* -bufferedFactor: speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
|
||||
* -useRawBoxes: Boolean: internal: use non-normalized coordinates when performing draw methods,
|
||||
*/
|
||||
export interface DrawOptions {
|
||||
|
@ -40,6 +41,7 @@ export interface DrawOptions {
|
|||
useDepth: boolean,
|
||||
useCurves: boolean,
|
||||
bufferedOutput: boolean,
|
||||
bufferedFactor: number,
|
||||
useRawBoxes: boolean,
|
||||
calculateHandBox: boolean,
|
||||
}
|
||||
|
@ -60,12 +62,13 @@ export const options: DrawOptions = {
|
|||
fillPolygons: <boolean>false,
|
||||
useDepth: <boolean>true,
|
||||
useCurves: <boolean>false,
|
||||
bufferedOutput: <boolean>false, // not yet implemented
|
||||
bufferedFactor: <number>2,
|
||||
bufferedOutput: <boolean>false,
|
||||
useRawBoxes: <boolean>false,
|
||||
calculateHandBox: <boolean>true,
|
||||
};
|
||||
|
||||
let bufferedResult: Result;
|
||||
let bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], performance: {}, timestamp: 0 };
|
||||
|
||||
function point(ctx, x, y, z = 0, localOptions) {
|
||||
ctx.fillStyle = localOptions.useDepth && z ? `rgba(${127.5 + (2 * z)}, ${127.5 - (2 * z)}, 255, 0.3)` : localOptions.color;
|
||||
|
@ -470,6 +473,50 @@ export async function object(inCanvas: HTMLCanvasElement, result: Array<Item>, d
|
|||
}
|
||||
}
|
||||
|
||||
function calcBuffered(newResult, localOptions) {
|
||||
// if (newResult.timestamp !== bufferedResult?.timestamp) bufferedResult = JSON.parse(JSON.stringify(newResult)); // no need to force update
|
||||
// each record is only updated using deep copy when number of detected record changes, otherwise it will converge by itself
|
||||
|
||||
if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) bufferedResult.body = JSON.parse(JSON.stringify(newResult.body));
|
||||
for (let i = 0; i < newResult.body.length; i++) { // update body: box, boxRaw, keypoints
|
||||
bufferedResult.body[i].box = newResult.body[i].box
|
||||
.map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + box) / localOptions.bufferedFactor) as [number, number, number, number];
|
||||
bufferedResult.body[i].boxRaw = newResult.body[i].boxRaw
|
||||
.map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + box) / localOptions.bufferedFactor) as [number, number, number, number];
|
||||
bufferedResult.body[i].keypoints = newResult.body[i].keypoints
|
||||
.map((keypoint, j) => ({
|
||||
score: keypoint.score,
|
||||
part: keypoint.part,
|
||||
position: {
|
||||
x: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.x + keypoint.position.x) / localOptions.bufferedFactor : keypoint.position.x,
|
||||
y: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / localOptions.bufferedFactor : keypoint.position.y,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand));
|
||||
for (let i = 0; i < newResult.hand.length; i++) { // update body: box, boxRaw, landmarks, annotations
|
||||
bufferedResult.hand[i].box = newResult.hand[i].box
|
||||
.map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + box) / localOptions.bufferedFactor);
|
||||
bufferedResult.hand[i].boxRaw = newResult.hand[i].boxRaw
|
||||
.map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + box) / localOptions.bufferedFactor);
|
||||
bufferedResult.hand[i].landmarks = newResult.hand[i].landmarks
|
||||
.map((landmark, j) => landmark
|
||||
.map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / localOptions.bufferedFactor));
|
||||
const keys = Object.keys(newResult.hand[i].annotations);
|
||||
for (const key of keys) {
|
||||
bufferedResult.hand[i].annotations[key] = newResult.hand[i].annotations[key]
|
||||
.map((val, j) => val
|
||||
.map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / localOptions.bufferedFactor));
|
||||
}
|
||||
}
|
||||
|
||||
// no buffering implemented for face, object, gesture
|
||||
bufferedResult.face = JSON.parse(JSON.stringify(newResult.face));
|
||||
bufferedResult.object = JSON.parse(JSON.stringify(newResult.object));
|
||||
bufferedResult.gesture = JSON.parse(JSON.stringify(newResult.gesture));
|
||||
}
|
||||
|
||||
export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasElement) {
|
||||
if (!inCanvas || !outCanvas) return;
|
||||
if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return;
|
||||
|
@ -482,7 +529,7 @@ export async function all(inCanvas: HTMLCanvasElement, result: Result, drawOptio
|
|||
if (!result || !inCanvas) return;
|
||||
if (!(inCanvas instanceof HTMLCanvasElement)) return;
|
||||
if (localOptions.bufferedOutput) {
|
||||
if (result.timestamp !== bufferedResult?.timestamp) bufferedResult = result;
|
||||
calcBuffered(result, localOptions);
|
||||
} else {
|
||||
bufferedResult = result;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,10 @@ let model: GraphModel;
|
|||
|
||||
type Keypoints = { score: number, part: string, position: { x: number, y: number }, positionRaw: { x: number, y: number } };
|
||||
|
||||
let keypoints: Array<Keypoints> = [];
|
||||
const keypoints: Array<Keypoints> = [];
|
||||
let box: [number, number, number, number] = [0, 0, 0, 0];
|
||||
let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
|
||||
let score = 0;
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
|
||||
const bodyParts = ['head', 'neck', 'rightShoulder', 'rightElbow', 'rightWrist', 'chest', 'leftShoulder', 'leftElbow', 'leftWrist', 'pelvis', 'rightHip', 'rightKnee', 'rightAnkle', 'leftHip', 'leftKnee', 'leftAnkle'];
|
||||
|
@ -31,23 +34,22 @@ function max2d(inputs, minScore) {
|
|||
// combine all data
|
||||
const reshaped = tf.reshape(inputs, [height * width]);
|
||||
// get highest score
|
||||
const score = tf.max(reshaped, 0).dataSync()[0];
|
||||
if (score > minScore) {
|
||||
const newScore = tf.max(reshaped, 0).dataSync()[0];
|
||||
if (newScore > minScore) {
|
||||
// skip coordinate calculation is score is too low
|
||||
const coords = tf.argMax(reshaped, 0);
|
||||
const x = mod(coords, width).dataSync()[0];
|
||||
const y = tf.div(coords, tf.scalar(width, 'int32')).dataSync()[0];
|
||||
return [x, y, score];
|
||||
return [x, y, newScore];
|
||||
}
|
||||
return [0, 0, score];
|
||||
return [0, 0, newScore];
|
||||
});
|
||||
}
|
||||
|
||||
export async function predict(image, config): Promise<Body[]> {
|
||||
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
|
||||
skipped++;
|
||||
const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||
return [{ id: 0, score, keypoints }];
|
||||
return [{ id: 0, score, box, boxRaw, keypoints }];
|
||||
}
|
||||
skipped = 0;
|
||||
return new Promise(async (resolve) => {
|
||||
|
@ -64,7 +66,7 @@ export async function predict(image, config): Promise<Body[]> {
|
|||
tensor.dispose();
|
||||
|
||||
if (resT) {
|
||||
const parts: Array<Keypoints> = [];
|
||||
keypoints.length = 0;
|
||||
const squeeze = resT.squeeze();
|
||||
tf.dispose(resT);
|
||||
// body parts are basically just a stack of 2d tensors
|
||||
|
@ -73,10 +75,10 @@ export async function predict(image, config): Promise<Body[]> {
|
|||
// process each unstacked tensor as a separate body part
|
||||
for (let id = 0; id < stack.length; id++) {
|
||||
// actual processing to get coordinates and score
|
||||
const [x, y, score] = max2d(stack[id], config.body.minConfidence);
|
||||
const [x, y, partScore] = max2d(stack[id], config.body.minConfidence);
|
||||
if (score > config.body.minConfidence) {
|
||||
parts.push({
|
||||
score: Math.round(100 * score) / 100,
|
||||
keypoints.push({
|
||||
score: Math.round(100 * partScore) / 100,
|
||||
part: bodyParts[id],
|
||||
positionRaw: { // normalized to 0..1
|
||||
// @ts-ignore model is not undefined here
|
||||
|
@ -90,9 +92,24 @@ export async function predict(image, config): Promise<Body[]> {
|
|||
}
|
||||
}
|
||||
stack.forEach((s) => tf.dispose(s));
|
||||
keypoints = parts;
|
||||
}
|
||||
const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||
resolve([{ id: 0, score, keypoints }]);
|
||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||
const x = keypoints.map((a) => a.position.x);
|
||||
const y = keypoints.map((a) => a.position.x);
|
||||
box = [
|
||||
Math.min(...x),
|
||||
Math.min(...y),
|
||||
Math.max(...x) - Math.min(...x),
|
||||
Math.max(...y) - Math.min(...x),
|
||||
];
|
||||
const xRaw = keypoints.map((a) => a.positionRaw.x);
|
||||
const yRaw = keypoints.map((a) => a.positionRaw.x);
|
||||
boxRaw = [
|
||||
Math.min(...xRaw),
|
||||
Math.min(...yRaw),
|
||||
Math.max(...xRaw) - Math.min(...xRaw),
|
||||
Math.max(...yRaw) - Math.min(...xRaw),
|
||||
];
|
||||
resolve([{ id: 0, score, box, boxRaw, keypoints }]);
|
||||
});
|
||||
}
|
||||
|
|
|
@ -85,6 +85,7 @@ export class HandPipeline {
|
|||
// run new detector every skipFrames unless we only want box to start with
|
||||
let boxes;
|
||||
|
||||
// console.log(this.skipped, config.hand.skipFrames, !config.hand.landmarks, !config.skipFrame);
|
||||
if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.skipFrame) {
|
||||
boxes = await this.handDetector.estimateHandBounds(image, config);
|
||||
this.skipped = 0;
|
||||
|
|
20
src/human.ts
20
src/human.ts
|
@ -20,6 +20,7 @@ import * as sample from './sample';
|
|||
import * as app from '../package.json';
|
||||
import { Tensor } from './tfjs/types';
|
||||
|
||||
// export types
|
||||
export type { Config } from './config';
|
||||
export type { Result, Face, Hand, Body, Item, Gesture } from './result';
|
||||
export type { DrawOptions } from './draw/draw';
|
||||
|
@ -355,26 +356,27 @@ export class Human {
|
|||
/** @hidden */
|
||||
#skipFrame = async (input) => {
|
||||
if (this.config.cacheSensitivity === 0) return false;
|
||||
const resizeFact = 40;
|
||||
const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
|
||||
const resizeFact = 32;
|
||||
const reduced: Tensor = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
|
||||
// use tensor sum
|
||||
/*
|
||||
const sumT = this.tf.sum(reduced);
|
||||
const sum = sumT.dataSync()[0] as number;
|
||||
sumT.dispose();
|
||||
// use js loop sum
|
||||
/*
|
||||
*/
|
||||
// use js loop sum, faster than uploading tensor to gpu calculating and downloading back
|
||||
const reducedData = reduced.dataSync();
|
||||
let sum = 0;
|
||||
for (let i = 0; i < reducedData.length; i++) sum += reducedData[i];
|
||||
*/
|
||||
for (let i = 0; i < reducedData.length / 3; i++) sum += reducedData[3 * i + 2]; // look only at green value as each pixel is rgb number triplet
|
||||
|
||||
reduced.dispose();
|
||||
const diff = Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1;
|
||||
const diff = 100 * (Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1);
|
||||
this.#lastInputSum = sum;
|
||||
// if previous frame was skipped, skip this frame if changed more than cacheSensitivity
|
||||
// if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily
|
||||
const skipFrame = diff < Math.max(this.config.cacheSensitivity, this.#lastCacheDiff);
|
||||
// if difference is above 4x threshold, don't use last value to force reset cache for significant change of scenes or images
|
||||
this.#lastCacheDiff = diff > 4 * this.config.cacheSensitivity ? 0 : diff;
|
||||
// if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images
|
||||
this.#lastCacheDiff = diff > 10 * this.config.cacheSensitivity ? 0 : diff;
|
||||
return skipFrame;
|
||||
}
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ export function scalePoses(poses, [height, width], [inputResolutionHeight, input
|
|||
const scalePose = (pose, i) => ({
|
||||
id: i,
|
||||
score: pose.score,
|
||||
bowRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight],
|
||||
boxRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight],
|
||||
box: [Math.trunc(pose.box[0] * scaleX), Math.trunc(pose.box[1] * scaleY), Math.trunc(pose.box[2] * scaleX), Math.trunc(pose.box[3] * scaleY)],
|
||||
keypoints: pose.keypoints.map(({ score, part, position }) => ({
|
||||
score,
|
||||
|
|
|
@ -73,8 +73,8 @@ export interface Face {
|
|||
export interface Body {
|
||||
id: number,
|
||||
score: number,
|
||||
box?: [x: number, y: number, width: number, height: number],
|
||||
boxRaw?: [x: number, y: number, width: number, height: number],
|
||||
box: [x: number, y: number, width: number, height: number],
|
||||
boxRaw: [x: number, y: number, width: number, height: number],
|
||||
keypoints: Array<{
|
||||
part: string,
|
||||
position: { x: number, y: number, z?: number },
|
||||
|
@ -150,6 +150,6 @@ export interface Result {
|
|||
/** {@link Object}: detection & analysis results */
|
||||
object: Array<Item>
|
||||
performance: Record<string, unknown>,
|
||||
canvas: OffscreenCanvas | HTMLCanvasElement,
|
||||
canvas?: OffscreenCanvas | HTMLCanvasElement,
|
||||
timestamp: number,
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue