implement experimental drawOptions.bufferedOutput and bufferedFactor

2021-05-23 13:52:49 -04:00 · 2021-05-23 13:52:49 -04:00 · bce1d62135
parent f0739716e2
commit bce1d62135
8 changed files with 107 additions and 35 deletions
--- a/demo/index.js
+++ b/demo/index.js
@ -25,14 +25,19 @@ const userConfig = {
    description: { enabled: false },
    emotion: { enabled: false },
  },
-  hand: { enabled: false },
-  gesture: { enabled: false },
+  hand: { enabled: true },
+  gesture: { enabled: true },
  body: { enabled: true, modelPath: 'posenet.json' },
  // body: { enabled: true, modelPath: 'blazepose.json' },
  object: { enabled: false },
  */
 };

+const drawOptions = {
+  bufferedOutput: true, // experimental feature that makes draw functions interpolate results between each detection for smoother movement
+  bufferedFactor: 3, // speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
+};
+
 // ui options
 const ui = {
  // configurable items
@ -223,7 +228,7 @@ async function drawResults(input) {
  }

  // draw all results
-  human.draw.all(canvas, result);
+  human.draw.all(canvas, result, drawOptions);
  /* use individual functions
  human.draw.face(canvas, result.face);
  human.draw.body(canvas, result.body);
@ -643,7 +648,7 @@ async function drawWarmup(res) {
  canvas.height = res.canvas.height;
  const ctx = canvas.getContext('2d');
  ctx.drawImage(res.canvas, 0, 0, res.canvas.width, res.canvas.height, 0, 0, canvas.width, canvas.height);
-  await human.draw.all(canvas, res);
+  await human.draw.all(canvas, res, drawOptions);
 }

 async function main() {
--- a/src/config.ts
+++ b/src/config.ts
@ -201,7 +201,7 @@ const config: Config = {
                             // warmup pre-initializes all models for faster inference but can take
                             // significant time on startup
                             // only used for `webgl` and `humangl` backends
-  cacheSensitivity: 0.01,    // cache sensitivity
+  cacheSensitivity: 0.75,    // cache sensitivity
                             // values 0..1 where 0.01 means reset cache if input changed more than 1%
                             // set to 0 to disable caching
  filter: {                  // run input through image filters before inference
--- a/src/draw/draw.ts
+++ b/src/draw/draw.ts
@ -21,6 +21,7 @@ import type { Result, Face, Body, Hand, Item, Gesture } from '../result';
 * -useDepth: use z-axis coordinate as color shade,
 * -useCurves: draw polygons as cures or as lines,
 * -bufferedOutput: experimental: allows to call draw methods multiple times for each detection and interpolate results between results thus achieving smoother animations
+ * -bufferedFactor: speed of interpolation convergence where 1 means 100% immediately, 2 means 50% at each interpolation, etc.
 * -useRawBoxes: Boolean: internal: use non-normalized coordinates when performing draw methods,
 */
 export interface DrawOptions {
@ -40,6 +41,7 @@ export interface DrawOptions {
  useDepth: boolean,
  useCurves: boolean,
  bufferedOutput: boolean,
+  bufferedFactor: number,
  useRawBoxes: boolean,
  calculateHandBox: boolean,
 }
@ -60,12 +62,13 @@ export const options: DrawOptions = {
  fillPolygons: <boolean>false,
  useDepth: <boolean>true,
  useCurves: <boolean>false,
-  bufferedOutput: <boolean>false, // not yet implemented
+  bufferedFactor: <number>2,
+  bufferedOutput: <boolean>false,
  useRawBoxes: <boolean>false,
  calculateHandBox: <boolean>true,
 };

-let bufferedResult: Result;
+let bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], performance: {}, timestamp: 0 };

 function point(ctx, x, y, z = 0, localOptions) {
  ctx.fillStyle = localOptions.useDepth && z ? `rgba(${127.5 + (2 * z)}, ${127.5 - (2 * z)}, 255, 0.3)` : localOptions.color;
@ -470,6 +473,50 @@ export async function object(inCanvas: HTMLCanvasElement, result: Array<Item>, d
  }
 }

+function calcBuffered(newResult, localOptions) {
+  // if (newResult.timestamp !== bufferedResult?.timestamp) bufferedResult = JSON.parse(JSON.stringify(newResult)); // no need to force update
+  // each record is only updated using deep copy when number of detected record changes, otherwise it will converge by itself
+
+  if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) bufferedResult.body = JSON.parse(JSON.stringify(newResult.body));
+  for (let i = 0; i < newResult.body.length; i++) { // update body: box, boxRaw, keypoints
+    bufferedResult.body[i].box = newResult.body[i].box
+      .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].box[j] + box) / localOptions.bufferedFactor) as [number, number, number, number];
+    bufferedResult.body[i].boxRaw = newResult.body[i].boxRaw
+      .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].boxRaw[j] + box) / localOptions.bufferedFactor) as [number, number, number, number];
+    bufferedResult.body[i].keypoints = newResult.body[i].keypoints
+      .map((keypoint, j) => ({
+        score: keypoint.score,
+        part: keypoint.part,
+        position: {
+          x: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.x + keypoint.position.x) / localOptions.bufferedFactor : keypoint.position.x,
+          y: bufferedResult.body[i].keypoints[j] ? ((localOptions.bufferedFactor - 1) * bufferedResult.body[i].keypoints[j].position.y + keypoint.position.y) / localOptions.bufferedFactor : keypoint.position.y,
+        },
+      }));
+  }
+
+  if (!bufferedResult.hand || (newResult.hand.length !== bufferedResult.hand.length)) bufferedResult.hand = JSON.parse(JSON.stringify(newResult.hand));
+  for (let i = 0; i < newResult.hand.length; i++) { // update body: box, boxRaw, landmarks, annotations
+    bufferedResult.hand[i].box = newResult.hand[i].box
+      .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].box[j] + box) / localOptions.bufferedFactor);
+    bufferedResult.hand[i].boxRaw = newResult.hand[i].boxRaw
+      .map((box, j) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].boxRaw[j] + box) / localOptions.bufferedFactor);
+    bufferedResult.hand[i].landmarks = newResult.hand[i].landmarks
+      .map((landmark, j) => landmark
+        .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].landmarks[j][k] + coord) / localOptions.bufferedFactor));
+    const keys = Object.keys(newResult.hand[i].annotations);
+    for (const key of keys) {
+      bufferedResult.hand[i].annotations[key] = newResult.hand[i].annotations[key]
+        .map((val, j) => val
+          .map((coord, k) => ((localOptions.bufferedFactor - 1) * bufferedResult.hand[i].annotations[key][j][k] + coord) / localOptions.bufferedFactor));
+    }
+  }
+
+  // no buffering implemented for face, object, gesture
+  bufferedResult.face = JSON.parse(JSON.stringify(newResult.face));
+  bufferedResult.object = JSON.parse(JSON.stringify(newResult.object));
+  bufferedResult.gesture = JSON.parse(JSON.stringify(newResult.gesture));
+}
+
 export async function canvas(inCanvas: HTMLCanvasElement, outCanvas: HTMLCanvasElement) {
  if (!inCanvas || !outCanvas) return;
  if (!(inCanvas instanceof HTMLCanvasElement) || !(outCanvas instanceof HTMLCanvasElement)) return;
@ -482,7 +529,7 @@ export async function all(inCanvas: HTMLCanvasElement, result: Result, drawOptio
  if (!result || !inCanvas) return;
  if (!(inCanvas instanceof HTMLCanvasElement)) return;
  if (localOptions.bufferedOutput) {
-    if (result.timestamp !== bufferedResult?.timestamp) bufferedResult = result;
+    calcBuffered(result, localOptions);
  } else {
    bufferedResult = result;
  }
--- a/src/efficientpose/efficientpose.ts
+++ b/src/efficientpose/efficientpose.ts
@ -7,7 +7,10 @@ let model: GraphModel;

 type Keypoints = { score: number, part: string, position: { x: number, y: number }, positionRaw: { x: number, y: number } };

-let keypoints: Array<Keypoints> = [];
+const keypoints: Array<Keypoints> = [];
+let box: [number, number, number, number] = [0, 0, 0, 0];
+let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
+let score = 0;
 let skipped = Number.MAX_SAFE_INTEGER;

 const bodyParts = ['head', 'neck', 'rightShoulder', 'rightElbow', 'rightWrist', 'chest', 'leftShoulder', 'leftElbow', 'leftWrist', 'pelvis', 'rightHip', 'rightKnee', 'rightAnkle', 'leftHip', 'leftKnee', 'leftAnkle'];
@ -31,23 +34,22 @@ function max2d(inputs, minScore) {
    // combine all data
    const reshaped = tf.reshape(inputs, [height * width]);
    // get highest score
-    const score = tf.max(reshaped, 0).dataSync()[0];
-    if (score > minScore) {
+    const newScore = tf.max(reshaped, 0).dataSync()[0];
+    if (newScore > minScore) {
      // skip coordinate calculation is score is too low
      const coords = tf.argMax(reshaped, 0);
      const x = mod(coords, width).dataSync()[0];
      const y = tf.div(coords, tf.scalar(width, 'int32')).dataSync()[0];
-      return [x, y, score];
+      return [x, y, newScore];
    }
-    return [0, 0, score];
+    return [0, 0, newScore];
  });
 }

 export async function predict(image, config): Promise<Body[]> {
  if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
    skipped++;
-    const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
-    return [{ id: 0, score, keypoints }];
+    return [{ id: 0, score, box, boxRaw, keypoints }];
  }
  skipped = 0;
  return new Promise(async (resolve) => {
@ -64,7 +66,7 @@ export async function predict(image, config): Promise<Body[]> {
    tensor.dispose();

    if (resT) {
-      const parts: Array<Keypoints> = [];
+      keypoints.length = 0;
      const squeeze = resT.squeeze();
      tf.dispose(resT);
      // body parts are basically just a stack of 2d tensors
@ -73,10 +75,10 @@ export async function predict(image, config): Promise<Body[]> {
      // process each unstacked tensor as a separate body part
      for (let id = 0; id < stack.length; id++) {
        // actual processing to get coordinates and score
-        const [x, y, score] = max2d(stack[id], config.body.minConfidence);
+        const [x, y, partScore] = max2d(stack[id], config.body.minConfidence);
        if (score > config.body.minConfidence) {
-          parts.push({
-            score: Math.round(100 * score) / 100,
+          keypoints.push({
+            score: Math.round(100 * partScore) / 100,
            part: bodyParts[id],
            positionRaw: { // normalized to 0..1
              // @ts-ignore model is not undefined here
@ -90,9 +92,24 @@ export async function predict(image, config): Promise<Body[]> {
        }
      }
      stack.forEach((s) => tf.dispose(s));
-      keypoints = parts;
    }
-    const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
-    resolve([{ id: 0, score, keypoints }]);
+    score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
+    const x = keypoints.map((a) => a.position.x);
+    const y = keypoints.map((a) => a.position.x);
+    box = [
+      Math.min(...x),
+      Math.min(...y),
+      Math.max(...x) - Math.min(...x),
+      Math.max(...y) - Math.min(...x),
+    ];
+    const xRaw = keypoints.map((a) => a.positionRaw.x);
+    const yRaw = keypoints.map((a) => a.positionRaw.x);
+    boxRaw = [
+      Math.min(...xRaw),
+      Math.min(...yRaw),
+      Math.max(...xRaw) - Math.min(...xRaw),
+      Math.max(...yRaw) - Math.min(...xRaw),
+    ];
+    resolve([{ id: 0, score, box, boxRaw, keypoints }]);
  });
 }
--- a/src/handpose/handpipeline.ts
+++ b/src/handpose/handpipeline.ts
@ -85,6 +85,7 @@ export class HandPipeline {
    // run new detector every skipFrames unless we only want box to start with
    let boxes;

+    // console.log(this.skipped, config.hand.skipFrames, !config.hand.landmarks, !config.skipFrame);
    if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.skipFrame) {
      boxes = await this.handDetector.estimateHandBounds(image, config);
      this.skipped = 0;
--- a/src/human.ts
+++ b/src/human.ts
@ -20,6 +20,7 @@ import * as sample from './sample';
 import * as app from '../package.json';
 import { Tensor } from './tfjs/types';

+// export types
 export type { Config } from './config';
 export type { Result, Face, Hand, Body, Item, Gesture } from './result';
 export type { DrawOptions } from './draw/draw';
@ -355,26 +356,27 @@ export class Human {
  /** @hidden */
  #skipFrame = async (input) => {
    if (this.config.cacheSensitivity === 0) return false;
-    const resizeFact = 40;
-    const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
+    const resizeFact = 32;
+    const reduced: Tensor = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
    // use tensor sum
+    /*
    const sumT = this.tf.sum(reduced);
    const sum = sumT.dataSync()[0] as number;
    sumT.dispose();
-    // use js loop sum
-    /*
+    */
+    // use js loop sum, faster than uploading tensor to gpu calculating and downloading back
    const reducedData = reduced.dataSync();
    let sum = 0;
-    for (let i = 0; i < reducedData.length; i++) sum += reducedData[i];
-    */
+    for (let i = 0; i < reducedData.length / 3; i++) sum += reducedData[3 * i + 2]; // look only at green value as each pixel is rgb number triplet
+
    reduced.dispose();
-    const diff = Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1;
+    const diff = 100 * (Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1);
    this.#lastInputSum = sum;
    // if previous frame was skipped, skip this frame if changed more than cacheSensitivity
    // if previous frame was not skipped, then look for cacheSensitivity or difference larger than one in previous frame to avoid resetting cache in subsequent frames unnecessarily
    const skipFrame = diff < Math.max(this.config.cacheSensitivity, this.#lastCacheDiff);
-    // if difference is above 4x threshold, don't use last value to force reset cache for significant change of scenes or images
-    this.#lastCacheDiff = diff > 4 * this.config.cacheSensitivity ? 0 : diff;
+    // if difference is above 10x threshold, don't use last value to force reset cache for significant change of scenes or images
+    this.#lastCacheDiff = diff > 10 * this.config.cacheSensitivity ? 0 : diff;
    return skipFrame;
  }

--- a/src/posenet/utils.ts
+++ b/src/posenet/utils.ts
@ -35,7 +35,7 @@ export function scalePoses(poses, [height, width], [inputResolutionHeight, input
  const scalePose = (pose, i) => ({
    id: i,
    score: pose.score,
-    bowRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight],
+    boxRaw: [pose.box[0] / inputResolutionWidth, pose.box[1] / inputResolutionHeight, pose.box[2] / inputResolutionWidth, pose.box[3] / inputResolutionHeight],
    box: [Math.trunc(pose.box[0] * scaleX), Math.trunc(pose.box[1] * scaleY), Math.trunc(pose.box[2] * scaleX), Math.trunc(pose.box[3] * scaleY)],
    keypoints: pose.keypoints.map(({ score, part, position }) => ({
      score,
--- a/src/result.ts
+++ b/src/result.ts
@ -73,8 +73,8 @@ export interface Face {
 export interface Body {
  id: number,
  score: number,
-  box?: [x: number, y: number, width: number, height: number],
-  boxRaw?: [x: number, y: number, width: number, height: number],
+  box: [x: number, y: number, width: number, height: number],
+  boxRaw: [x: number, y: number, width: number, height: number],
  keypoints: Array<{
    part: string,
    position: { x: number, y: number, z?: number },
@ -150,6 +150,6 @@ export interface Result {
  /** {@link Object}: detection & analysis results */
  object: Array<Item>
  performance: Record<string, unknown>,
-  canvas: OffscreenCanvas | HTMLCanvasElement,
+  canvas?: OffscreenCanvas | HTMLCanvasElement,
  timestamp: number,
 }