performance and memory optimizations

2021-11-05 11:28:06 -04:00 · 2021-11-05 11:28:06 -04:00 · c2dc38793e
parent b64e9ae69f
commit c2dc38793e
16 changed files with 276 additions and 249 deletions
--- a/TODO.md
+++ b/TODO.md
@ -53,3 +53,5 @@ Object detection using CenterNet or NanoDet models is not working when using WAS
  <https://github.com/tensorflow/tfjs/issues/4824>  

 <br><hr><br>
+
+> const mod = (a, b) => tf.sub(a, tf.mul(tf.div(a, tf.scalar(b, 'int32')), tf.scalar(b, 'int32'))); // modulus op implemented in tf
--- a/demo/index.js
+++ b/demo/index.js
@ -327,10 +327,10 @@ async function drawResults(input) {
  `;
  ui.framesDraw++;
  ui.lastFrame = human.now();
-  // if buffered, immediate loop but limit frame rate although it's going to run slower as JS is singlethreaded
  if (ui.buffered) {
    if (isLive(input)) {
-      ui.drawThread = requestAnimationFrame(() => drawResults(input));
+      // ui.drawThread = requestAnimationFrame(() => drawResults(input));
+      ui.drawThread = setTimeout(() => drawResults(input), 25);
    } else {
      cancelAnimationFrame(ui.drawThread);
      videoPause();
--- a/demo/typescript/index.html
+++ b/demo/typescript/index.html
@ -23,7 +23,7 @@
  <body>
    <canvas id="canvas" style="margin: 0 auto; width: 100%"></canvas>
    <video id="video" playsinline style="display: none"></video>
-    <pre id="status" style="position: absolute; top: 20px; right: 20px; background-color: grey; padding: 8px; box-shadow: 2px 2px black"></pre>
+    <pre id="status" style="position: absolute; top: 12px; right: 20px; background-color: grey; padding: 8px; box-shadow: 2px 2px black"></pre>
    <pre id="log" style="padding: 8px"></pre>
    <div id="performance" style="position: absolute; bottom: 0; width: 100%; padding: 8px; font-size: 0.8rem;"></div>
  </body>
--- a/demo/typescript/index.js
+++ b/demo/typescript/index.js
@ -6,14 +6,13 @@

 // demo/typescript/index.ts
 import Human from "../../dist/human.esm.js";
-var config = {
-  modelBasePath: "../../models",
-  backend: "humangl",
-  async: true
+var humanConfig = {
+  modelBasePath: "../../models"
 };
-var human = new Human(config);
-human.env.perfadd = false;
-var result;
+var human = new Human(humanConfig);
+human.env["perfadd"] = false;
+human.draw.options.font = 'small-caps 24px "Lato"';
+human.draw.options.lineHeight = 24;
 var dom = {
  video: document.getElementById("video"),
  canvas: document.getElementById("canvas"),
@ -21,20 +20,17 @@ var dom = {
  fps: document.getElementById("status"),
  perf: document.getElementById("performance")
 };
+var timestamp = { detect: 0, draw: 0, tensors: 0 };
 var fps = { detect: 0, draw: 0 };
 var log = (...msg) => {
  dom.log.innerText += msg.join(" ") + "\n";
  console.log(...msg);
 };
-var status = (msg) => {
-  dom.fps.innerText = msg;
-};
-var perf = (msg) => {
-  dom.perf.innerText = "performance: " + JSON.stringify(msg).replace(/"|{|}/g, "").replace(/,/g, " | ");
-};
+var status = (msg) => dom.fps.innerText = msg;
+var perf = (msg) => dom.perf.innerText = "tensors:" + human.tf.memory().numTensors + " | performance: " + JSON.stringify(msg).replace(/"|{|}/g, "").replace(/,/g, " | ");
 async function webCam() {
  status("starting webcam...");
-  const options = { audio: false, video: { facingMode: "user", resizeMode: "crop-and-scale", width: { ideal: document.body.clientWidth } } };
+  const options = { audio: false, video: { facingMode: "user", resizeMode: "none", width: { ideal: document.body.clientWidth } } };
  const stream = await navigator.mediaDevices.getUserMedia(options);
  const ready = new Promise((resolve) => {
    dom.video.onloadeddata = () => resolve(true);
@ -57,34 +53,39 @@ async function webCam() {
  };
 }
 async function detectionLoop() {
-  const t0 = human.now();
  if (!dom.video.paused) {
-    result = await human.detect(dom.video);
+    await human.detect(dom.video);
+    const tensors = human.tf.memory().numTensors;
+    if (tensors - timestamp.tensors !== 0)
+      log("allocated tensors:", tensors - timestamp.tensors);
+    timestamp.tensors = tensors;
  }
-  const t1 = human.now();
-  fps.detect = 1e3 / (t1 - t0);
+  const now = human.now();
+  fps.detect = 1e3 / (now - timestamp.detect);
+  timestamp.detect = now;
  requestAnimationFrame(detectionLoop);
 }
 async function drawLoop() {
-  const t0 = human.now();
  if (!dom.video.paused) {
-    const interpolated = await human.next(result);
+    const interpolated = await human.next(human.result);
    await human.draw.canvas(dom.video, dom.canvas);
    await human.draw.all(dom.canvas, interpolated);
    perf(interpolated.performance);
  }
-  const t1 = human.now();
-  fps.draw = 1e3 / (t1 - t0);
-  status(dom.video.paused ? "paused" : `fps: ${fps.detect.toFixed(1).padStart(5, " ")} detect / ${fps.draw.toFixed(1).padStart(5, " ")} draw`);
-  requestAnimationFrame(drawLoop);
+  const now = human.now();
+  fps.draw = 1e3 / (now - timestamp.draw);
+  timestamp.draw = now;
+  status(dom.video.paused ? "paused" : `fps: ${fps.detect.toFixed(1).padStart(5, " ")} detect | ${fps.draw.toFixed(1).padStart(5, " ")} draw`);
+  setTimeout(drawLoop, 30);
 }
 async function main() {
-  log("human version:", human.version, "tfjs:", human.tf.version_core);
+  log("human version:", human.version, "tfjs version:", human.tf.version_core);
  log("platform:", human.env.platform, "agent:", human.env.agent);
  status("loading...");
  await human.load();
+  log("backend:", human.tf.getBackend(), "| available:", human.env.backends);
+  log("loaded models:" + Object.values(human.models).filter((model) => model !== null).length);
  status("initializing...");
-  log("backend:", human.tf.getBackend(), "available:", human.env.backends);
  await human.warmup();
  await webCam();
  await detectionLoop();
--- a/demo/typescript/index.ts
+++ b/demo/typescript/index.ts
@ -11,46 +11,45 @@

 import Human from '../../dist/human.esm.js'; // equivalent of @vladmandic/human

-const config = {
+const humanConfig = { // user configuration for human, used to fine-tune behavior
  modelBasePath: '../../models',
-  backend: 'humangl',
-  async: true,
-  // face: { enabled: true, detector: { rotation: true }, iris: { enabled: false }, description: { enabled: false }, emotion: { enabled: false } },
+  // backend: 'humangl',
+  // async: true,
+  // face: { enabled: false, detector: { rotation: true }, iris: { enabled: false }, description: { enabled: false }, emotion: { enabled: false } },
  // body: { enabled: false },
  // hand: { enabled: false },
  // object: { enabled: false },
  // gesture: { enabled: true },
 };

-const human = new Human(config);
-human.env.perfadd = false;
-let result;
+const human = new Human(humanConfig); // create instance of human with overrides from user configuration

-const dom = {
+human.env['perfadd'] = false; // is performance data showing instant or total values
+human.draw.options.font = 'small-caps 24px "Lato"'; // set font used to draw labels when using draw methods
+human.draw.options.lineHeight = 24;
+
+const dom = { // grab instances of dom objects so we dont have to look them up later
  video: document.getElementById('video') as HTMLVideoElement,
  canvas: document.getElementById('canvas') as HTMLCanvasElement,
  log: document.getElementById('log') as HTMLPreElement,
  fps: document.getElementById('status') as HTMLPreElement,
  perf: document.getElementById('performance') as HTMLDivElement,
 };
+const timestamp = { detect: 0, draw: 0, tensors: 0 }; // holds information used to calculate performance and possible memory leaks
+const fps = { detect: 0, draw: 0 }; // holds calculated fps information for both detect and screen refresh

-const fps = { detect: 0, draw: 0 };
-
-const log = (...msg) => {
+const log = (...msg) => { // helper method to output messages
  dom.log.innerText += msg.join(' ') + '\n';
  // eslint-disable-next-line no-console
  console.log(...msg);
 };
-const status = (msg) => {
-  dom.fps.innerText = msg;
-};
-const perf = (msg) => {
-  dom.perf.innerText = 'performance: ' + JSON.stringify(msg).replace(/"|{|}/g, '').replace(/,/g, ' | ');
-};
+const status = (msg) => dom.fps.innerText = msg; // print status element
+const perf = (msg) => dom.perf.innerText = 'tensors:' + human.tf.memory().numTensors + ' | performance: ' + JSON.stringify(msg).replace(/"|{|}/g, '').replace(/,/g, ' | '); // print performance element

-async function webCam() {
+async function webCam() { // initialize webcam
  status('starting webcam...');
-  const options = { audio: false, video: { facingMode: 'user', resizeMode: 'crop-and-scale', width: { ideal: document.body.clientWidth } } };
+  // @ts-ignore resizeMode is not yet defined in tslib
+  const options: MediaStreamConstraints = { audio: false, video: { facingMode: 'user', resizeMode: 'none', width: { ideal: document.body.clientWidth } } };
  const stream: MediaStream = await navigator.mediaDevices.getUserMedia(options);
  const ready = new Promise((resolve) => { dom.video.onloadeddata = () => resolve(true); });
  dom.video.srcObject = stream;
@ -63,47 +62,53 @@ async function webCam() {
  const settings: MediaTrackSettings | string = track.getSettings ? track.getSettings() : '';
  const constraints: MediaTrackConstraints | string = track.getConstraints ? track.getConstraints() : '';
  log('video:', dom.video.videoWidth, dom.video.videoHeight, track.label, { stream, track, settings, constraints, capabilities });
-  dom.canvas.onclick = () => {
+  dom.canvas.onclick = () => { // pause when clicked on screen and resume on next click
    if (dom.video.paused) dom.video.play();
    else dom.video.pause();
  };
 }

-async function detectionLoop() {
-  const t0 = human.now();
+async function detectionLoop() { // main detection loop
  if (!dom.video.paused) {
-    result = await human.detect(dom.video);
+    // console.log('profiling data:', await human.profile(dom.video));
+    await human.detect(dom.video); // actual detection; were not capturing output in a local variable as it can also be reached via human.result
+    const tensors = human.tf.memory().numTensors; // check current tensor usage for memory leaks
+    if (tensors - timestamp.tensors !== 0) log('allocated tensors:', tensors - timestamp.tensors); // printed on start and each time there is a tensor leak
+    timestamp.tensors = tensors;
  }
-  const t1 = human.now();
-  fps.detect = 1000 / (t1 - t0);
-  requestAnimationFrame(detectionLoop);
+  const now = human.now();
+  fps.detect = 1000 / (now - timestamp.detect);
+  timestamp.detect = now;
+  requestAnimationFrame(detectionLoop); // start new frame immediately
 }

-async function drawLoop() {
-  const t0 = human.now();
+async function drawLoop() { // main screen refresh loop
  if (!dom.video.paused) {
-    const interpolated = await human.next(result);
-    await human.draw.canvas(dom.video, dom.canvas);
-    await human.draw.all(dom.canvas, interpolated);
-    perf(interpolated.performance);
+    const interpolated = await human.next(human.result); // smoothen result using last-known results
+    await human.draw.canvas(dom.video, dom.canvas); // draw canvas to screen
+    await human.draw.all(dom.canvas, interpolated); // draw labels, boxes, lines, etc.
+    perf(interpolated.performance); // write performance data
  }
-  const t1 = human.now();
-  fps.draw = 1000 / (t1 - t0);
-  status(dom.video.paused ? 'paused' : `fps: ${fps.detect.toFixed(1).padStart(5, ' ')} detect / ${fps.draw.toFixed(1).padStart(5, ' ')} draw`);
-  requestAnimationFrame(drawLoop);
+  const now = human.now();
+  fps.draw = 1000 / (now - timestamp.draw);
+  timestamp.draw = now;
+  status(dom.video.paused ? 'paused' : `fps: ${fps.detect.toFixed(1).padStart(5, ' ')} detect | ${fps.draw.toFixed(1).padStart(5, ' ')} draw`); // write status
+  // requestAnimationFrame(drawLoop); // refresh at screen refresh rate
+  setTimeout(drawLoop, 30); // use to slow down refresh from max refresh rate to target of 30 fps
 }

-async function main() {
-  log('human version:', human.version, 'tfjs:', human.tf.version_core);
+async function main() { // main entry point
+  log('human version:', human.version, 'tfjs version:', human.tf.version_core);
  log('platform:', human.env.platform, 'agent:', human.env.agent);
  status('loading...');
-  await human.load();
+  await human.load(); // preload all models
+  log('backend:', human.tf.getBackend(), '| available:', human.env.backends);
+  log('loaded models:' + Object.values(human.models).filter((model) => model !== null).length);
  status('initializing...');
-  log('backend:', human.tf.getBackend(), 'available:', human.env.backends);
-  await human.warmup();
-  await webCam();
-  await detectionLoop();
-  await drawLoop();
+  await human.warmup(); // warmup function to initialize backend for future faster detection
+  await webCam(); // start webcam
+  await detectionLoop(); // start detection loop
+  await drawLoop(); // start draw loop
 }

 window.onload = main;
--- a/src/config.ts
+++ b/src/config.ts
@ -339,7 +339,7 @@ const config: Config = {
    enabled: true,
    rotation: true,
    skipFrames: 99,
-    skipTime: 2000,
+    skipTime: 1000,
    minConfidence: 0.50,
    iouThreshold: 0.2,
    maxDetected: -1,
@ -358,7 +358,7 @@ const config: Config = {
    iouThreshold: 0.4,
    maxDetected: 10,
    skipFrames: 99,
-    skipTime: 1000,
+    skipTime: 2000,
  },
  segmentation: {
    enabled: false,
--- a/src/face/blazeface.ts
+++ b/src/face/blazeface.ts
@ -9,6 +9,7 @@ import * as util from './facemeshutil';
 import type { Config } from '../config';
 import type { Tensor, GraphModel } from '../tfjs/types';
 import { env } from '../util/env';
+import type { Point } from '../result';

 const keypointsCount = 6;
 let model: GraphModel | null;
@ -34,63 +35,72 @@ export async function load(config: Config): Promise<GraphModel> {
 }

 function decodeBounds(boxOutputs) {
-  const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
-  const centers = tf.add(boxStarts, anchors);
-  const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
-  const boxSizesNormalized = tf.div(boxSizes, inputSize);
-  const centersNormalized = tf.div(centers, inputSize);
-  const halfBoxSize = tf.div(boxSizesNormalized, 2);
-  const starts = tf.sub(centersNormalized, halfBoxSize);
-  const ends = tf.add(centersNormalized, halfBoxSize);
-  const startNormalized = tf.mul(starts, inputSize);
-  const endNormalized = tf.mul(ends, inputSize);
-  const concatAxis = 1;
-  return tf.concat2d([startNormalized, endNormalized], concatAxis);
+  const t: Record<string, Tensor> = {};
+  t.boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
+  t.centers = tf.add(t.boxStarts, anchors);
+  t.boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
+  t.boxSizesNormalized = tf.div(t.boxSizes, inputSize);
+  t.centersNormalized = tf.div(t.centers, inputSize);
+  t.halfBoxSize = tf.div(t.boxSizesNormalized, 2);
+  t.starts = tf.sub(t.centersNormalized, t.halfBoxSize);
+  t.ends = tf.add(t.centersNormalized, t.halfBoxSize);
+  t.startNormalized = tf.mul(t.starts, inputSize);
+  t.endNormalized = tf.mul(t.ends, inputSize);
+  const boxes = tf.concat2d([t.startNormalized, t.endNormalized], 1);
+  Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+  return boxes;
 }

 export async function getBoxes(inputImage: Tensor, config: Config) {
  // sanity check on input
  if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
-  const [batch, boxes, scores] = tf.tidy(() => {
-    const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
-    const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
-    const res = model?.execute(normalizedImage);
-    let batchOut;
-    if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
-      const sorted = res.sort((a, b) => a.size - b.size);
-      const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
-      const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
-      const concat = tf.concat([concat512, concat384], 1);
-      batchOut = tf.squeeze(concat, 0);
-    } else {
-      batchOut = tf.squeeze(res); // when using tfhub model
-    }
-    const boxesOut = decodeBounds(batchOut);
-    const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
-    const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
-    return [batchOut, boxesOut, scoresOut];
-  });
+  const t: Record<string, Tensor> = {};

-  const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
-  const nms = await nmsTensor.array();
-  tf.dispose(nmsTensor);
-  const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
-  const scoresData = await scores.data();
+  t.resized = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
+  t.div = tf.div(t.resized, 127.5);
+  t.normalized = tf.sub(t.div, 0.5);
+  const res = model?.execute(t.normalized) as Tensor[];
+  if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
+    const sorted = res.sort((a, b) => a.size - b.size);
+    t.concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
+    t.concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
+    t.concat = tf.concat([t.concat512, t.concat384], 1);
+    t.batch = tf.squeeze(t.concat, 0);
+  } else {
+    t.batch = tf.squeeze(res); // when using tfhub model
+  }
+  tf.dispose(res);
+  t.boxes = decodeBounds(t.batch);
+  t.logits = tf.slice(t.batch, [0, 0], [-1, 1]);
+  t.sigmoid = tf.sigmoid(t.logits);
+  t.scores = tf.squeeze(t.sigmoid);
+
+  t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
+  const nms = await t.nms.array() as number[];
+  const boxes: Array<{ box: { startPoint: Point, endPoint: Point }, landmarks: Point[], confidence: number }> = [];
+  const scores = await t.scores.data();
  for (let i = 0; i < nms.length; i++) {
-    const confidence = scoresData[nms[i]];
+    const confidence = scores[nms[i]];
    if (confidence > (config.face.detector?.minConfidence || 0)) {
-      const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
-      const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
-      annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence });
-      tf.dispose(boundingBox);
+      const b: Record<string, Tensor> = {};
+      b.bbox = tf.slice(t.boxes, [nms[i], 0], [1, -1]);
+      b.slice = tf.slice(t.batch, [nms[i], keypointsCount - 1], [1, -1]);
+      b.squeeze = tf.squeeze(b.slice);
+      b.landmarks = tf.reshape(b.squeeze, [keypointsCount, -1]);
+      b.startPoint = tf.slice(b.bbox, [0, 0], [-1, 2]);
+      b.endPoint = tf.slice(b.bbox, [0, 2], [-1, 2]);
+      boxes.push({
+        box: {
+          startPoint: (await b.startPoint.data()) as unknown as Point,
+          endPoint: (await b.endPoint.data()) as unknown as Point,
+        },
+        landmarks: (await b.landmarks.array()) as Point[],
+        confidence,
+      });
+      Object.keys(b).forEach((tensor) => tf.dispose(b[tensor]));
    }
  }
-  tf.dispose(batch);
-  tf.dispose(boxes);
-  tf.dispose(scores);

-  return {
-    boxes: annotatedBoxes,
-    scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize],
-  };
+  Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+  return { boxes, scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize] };
 }
--- a/src/face/facemesh.ts
+++ b/src/face/facemesh.ts
@ -37,14 +37,13 @@ export async function predict(input: Tensor, config: Config): Promise<FaceResult
    boxCache = []; // empty cache
    for (const possible of possibleBoxes.boxes) { // extract data from detector
      const box: BoxCache = {
-        startPoint: await possible.box.startPoint.data() as unknown as Point,
-        endPoint: await possible.box.endPoint.data() as unknown as Point,
-        landmarks: await possible.landmarks.array() as Array<Point>,
+        startPoint: possible.box.startPoint,
+        endPoint: possible.box.endPoint,
+        landmarks: possible.landmarks,
        confidence: possible.confidence,
      };
      boxCache.push(util.squarifyBox(util.enlargeBox(util.scaleBoxCoordinates(box, possibleBoxes.scaleFactor), Math.sqrt(enlargeFact))));
    }
-    possibleBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks]));
    skipped = 0;
  } else {
    skipped++;
--- a/src/face/faceres.ts
+++ b/src/face/faceres.ts
@ -37,57 +37,50 @@ export async function load(config: Config): Promise<GraphModel> {
 }

 export function enhance(input): Tensor {
-  const image = tf.tidy(() => {
-    // input received from detector is already normalized to 0..1
-    // input is also assumed to be straightened
-    const tensor = input.image || input.tensor || input;
-    if (!(tensor instanceof tf.Tensor)) return null;
-    // do a tight crop of image and resize it to fit the model
-    if (!model?.inputs[0].shape) return null; // model has no shape so no point continuing
-    const crop = tf.image.resizeBilinear(tensor, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
-    /*
-    const box = [[0.05, 0.15, 0.85, 0.85]]; // empyrical values for top, left, bottom, right
-    const crop = (tensor.shape.length === 3)
-      ? tf.image.cropAndResize(tf.expandDims(tensor, 0), box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) // add batch dimension if missing
-      : tf.image.cropAndResize(tensor, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
-    */
-    /*
-    // just resize to fit the embedding model instead of cropping
-    const crop = tf.image.resizeBilinear(tensor, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
-    */
+  const tensor = (input.image || input.tensor || input) as Tensor; // input received from detector is already normalized to 0..1, input is also assumed to be straightened
+  if (!model?.inputs[0].shape) return tensor; // model has no shape so no point continuing
+  // do a tight crop of image and resize it to fit the model
+  const crop = tf.image.resizeBilinear(tensor, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
+  /*
+  const box = [[0.05, 0.15, 0.85, 0.85]]; // empyrical values for top, left, bottom, right
+  const crop = (tensor.shape.length === 3)
+    ? tf.image.cropAndResize(tf.expandDims(tensor, 0), box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) // add batch dimension if missing
+    : tf.image.cropAndResize(tensor, box, [0], [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
+  */
+  /*
+  // just resize to fit the embedding model instead of cropping
+  const crop = tf.image.resizeBilinear(tensor, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
+  */

-    /*
-    // convert to black&white to avoid colorization impact
-    const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
-    const [red, green, blue] = tf.split(crop, 3, 3);
-    const redNorm = tf.mul(red, rgb[0]);
-    const greenNorm = tf.mul(green, rgb[1]);
-    const blueNorm = tf.mul(blue, rgb[2]);
-    const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
-    const merge = tf.stack([grayscale, grayscale, grayscale], 3).squeeze(4);
-    */
+  /*
+  // convert to black&white to avoid colorization impact
+  const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
+  const [red, green, blue] = tf.split(crop, 3, 3);
+  const redNorm = tf.mul(red, rgb[0]);
+  const greenNorm = tf.mul(green, rgb[1]);
+  const blueNorm = tf.mul(blue, rgb[2]);
+  const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
+  const merge = tf.stack([grayscale, grayscale, grayscale], 3).squeeze(4);
+  */

-    /*
-    // increase image pseudo-contrast 100%
-    // (or do it per-channel so mean is done on each channel)
-    // (or calculate histogram and do it based on histogram)
-    const mean = merge.mean();
-    const factor = 2;
-    const contrast = merge.sub(mean).mul(factor).add(mean);
-    */
+  /*
+  // increase image pseudo-contrast 100%
+  // (or do it per-channel so mean is done on each channel)
+  // (or calculate histogram and do it based on histogram)
+  const mean = merge.mean();
+  const factor = 2;
+  const contrast = merge.sub(mean).mul(factor).add(mean);
+  */

-    /*
-    // normalize brightness from 0..1
-    // silly way of creating pseudo-hdr of image
-    const darken = crop.sub(crop.min());
-    const lighten = darken.div(darken.max());
-    */
-
-    const norm = tf.mul(crop, 255);
-
-    return norm;
-  });
-  return image;
+  /*
+  // normalize brightness from 0..1
+  // silly way of creating pseudo-hdr of image
+  const darken = crop.sub(crop.min());
+  const lighten = darken.div(darken.max());
+  */
+  const norm = tf.mul(crop, 255);
+  tf.dispose(crop);
+  return norm;
 }

 export async function predict(image: Tensor, config: Config, idx, count) {
--- a/src/face/iris.ts
+++ b/src/face/iris.ts
@ -126,7 +126,7 @@ export async function augmentIris(rawCoords, face, config, meshSize) {
  tf.dispose(rightEyeCrop);
  const eyePredictions = model.execute(combined) as Tensor;
  tf.dispose(combined);
-  const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
+  const eyePredictionsData = await eyePredictions.data();
  tf.dispose(eyePredictions);
  const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
  const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
--- a/src/gear/emotion.ts
+++ b/src/gear/emotion.ts
@ -43,35 +43,27 @@ export async function predict(image: Tensor, config: Config, idx, count) {
  return new Promise(async (resolve) => {
    const obj: Array<{ score: number, emotion: string }> = [];
    if (config.face.emotion?.enabled) {
+      const t: Record<string, Tensor> = {};
      const inputSize = model?.inputs[0].shape ? model.inputs[0].shape[2] : 0;
-      const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
+      t.resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
      // const box = [[0.15, 0.15, 0.85, 0.85]]; // empyrical values for top, left, bottom, right
      // const resize = tf.image.cropAndResize(image, box, [0], [inputSize, inputSize]);
-
-      const [red, green, blue] = tf.split(resize, 3, 3);
-      tf.dispose(resize);
+      [t.red, t.green, t.blue] = tf.split(t.resize, 3, 3);
      // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
-      const redNorm = tf.mul(red, rgb[0]);
-      const greenNorm = tf.mul(green, rgb[1]);
-      const blueNorm = tf.mul(blue, rgb[2]);
-      tf.dispose(red);
-      tf.dispose(green);
-      tf.dispose(blue);
-      const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
-      tf.dispose(redNorm);
-      tf.dispose(greenNorm);
-      tf.dispose(blueNorm);
-      const normalize = tf.tidy(() => tf.mul(tf.sub(grayscale, 0.5), 2));
-      tf.dispose(grayscale);
-      const emotionT = model?.execute(normalize) as Tensor; // result is already in range 0..1, no need for additional activation
+      t.redNorm = tf.mul(t.red, rgb[0]);
+      t.greenNorm = tf.mul(t.green, rgb[1]);
+      t.blueNorm = tf.mul(t.blue, rgb[2]);
+      t.grayscale = tf.addN([t.redNorm, t.greenNorm, t.blueNorm]);
+      t.grayscaleSub = tf.sub(t.grayscale, 0.5);
+      t.grayscaleMul = tf.mul(t.grayscaleSub, 2);
+      t.emotion = model?.execute(t.grayscaleMul) as Tensor; // result is already in range 0..1, no need for additional activation
      lastTime = now();
-      const data = await emotionT.data();
-      tf.dispose(emotionT);
+      const data = await t.emotion.data();
      for (let i = 0; i < data.length; i++) {
        if (data[i] > (config.face.emotion?.minConfidence || 0)) obj.push({ score: Math.min(0.99, Math.trunc(100 * data[i]) / 100), emotion: annotations[i] });
      }
      obj.sort((a, b) => b.score - a.score);
-      tf.dispose(normalize);
+      Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
    }
    last[idx] = obj;
    lastCount = count;
--- a/src/hand/handposedetector.ts
+++ b/src/hand/handposedetector.ts
@ -7,6 +7,7 @@ import * as tf from '../../dist/tfjs.esm.js';
 import * as util from './handposeutil';
 import * as anchors from './handposeanchors';
 import type { Tensor, GraphModel } from '../tfjs/types';
+import type { Point } from '../result';

 export class HandDetector {
  model: GraphModel;
@ -26,62 +27,64 @@ export class HandDetector {
  }

  normalizeBoxes(boxes) {
-    return tf.tidy(() => {
-      const boxOffsets = tf.slice(boxes, [0, 0], [-1, 2]);
-      const boxSizes = tf.slice(boxes, [0, 2], [-1, 2]);
-      const boxCenterPoints = tf.add(tf.div(boxOffsets, this.inputSizeTensor), this.anchorsTensor);
-      const halfBoxSizes = tf.div(boxSizes, this.doubleInputSizeTensor);
-      const startPoints = tf.mul(tf.sub(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
-      const endPoints = tf.mul(tf.add(boxCenterPoints, halfBoxSizes), this.inputSizeTensor);
-      return tf.concat2d([startPoints, endPoints], 1);
-    });
+    const t: Record<string, Tensor> = {};
+    t.boxOffsets = tf.slice(boxes, [0, 0], [-1, 2]);
+    t.boxSizes = tf.slice(boxes, [0, 2], [-1, 2]);
+    t.div = tf.div(t.boxOffsets, this.inputSizeTensor);
+    t.boxCenterPoints = tf.add(t.div, this.anchorsTensor);
+    t.halfBoxSizes = tf.div(t.boxSizes, this.doubleInputSizeTensor);
+    t.sub = tf.sub(t.boxCenterPoints, t.halfBoxSizes);
+    t.startPoints = tf.mul(t.sub, this.inputSizeTensor);
+    t.add = tf.add(t.boxCenterPoints, t.halfBoxSizes);
+    t.endPoints = tf.mul(t.add, this.inputSizeTensor);
+    const res = tf.concat2d([t.startPoints, t.endPoints], 1);
+    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+    return res;
  }

  normalizeLandmarks(rawPalmLandmarks, index) {
-    return tf.tidy(() => {
-      const landmarks = tf.add(tf.div(tf.reshape(rawPalmLandmarks, [-1, 7, 2]), this.inputSizeTensor), this.anchors[index]);
-      return tf.mul(landmarks, this.inputSizeTensor);
-    });
+    const t: Record<string, Tensor> = {};
+    t.reshape = tf.reshape(rawPalmLandmarks, [-1, 7, 2]);
+    t.div = tf.div(t.reshape, this.inputSizeTensor);
+    t.landmarks = tf.add(t.div, this.anchors[index]);
+    const res = tf.mul(t.landmarks, this.inputSizeTensor);
+    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+    return res;
  }

-  async getBoxes(input, config) {
+  async predict(input, config): Promise<{ startPoint: Point; endPoint: Point, palmLandmarks: Point[]; confidence: number }[]> {
    const t: Record<string, Tensor> = {};
-    t.batched = this.model.execute(input) as Tensor;
+    t.resize = tf.image.resizeBilinear(input, [this.inputSize, this.inputSize]);
+    t.div = tf.div(t.resize, 127.5);
+    t.image = tf.sub(t.div, 1);
+    t.batched = this.model.execute(t.image) as Tensor;
    t.predictions = tf.squeeze(t.batched);
-    t.scores = tf.tidy(() => tf.squeeze(tf.sigmoid(tf.slice(t.predictions, [0, 0], [-1, 1]))));
+    t.slice = tf.slice(t.predictions, [0, 0], [-1, 1]);
+    t.sigmoid = tf.sigmoid(t.slice);
+    t.scores = tf.squeeze(t.sigmoid);
    const scores = await t.scores.data();
    t.boxes = tf.slice(t.predictions, [0, 1], [-1, 4]);
    t.norm = this.normalizeBoxes(t.boxes);
    // box detection is flaky so we look for 3x boxes than we need results
    t.nms = await tf.image.nonMaxSuppressionAsync(t.norm, t.scores, 3 * config.hand.maxDetected, config.hand.iouThreshold, config.hand.minConfidence);
    const nms = await t.nms.array() as Array<number>;
-    const hands: Array<{ box: Tensor, palmLandmarks: Tensor, confidence: number }> = [];
+    const hands: Array<{ startPoint: Point; endPoint: Point; palmLandmarks: Point[]; confidence: number }> = [];
    for (const index of nms) {
-      const palmBox = tf.slice(t.norm, [index, 0], [1, -1]);
-      const palmLandmarks = tf.tidy(() => tf.reshape(this.normalizeLandmarks(tf.slice(t.predictions, [index, 5], [1, 14]), index), [-1, 2]));
-      hands.push({ box: palmBox, palmLandmarks, confidence: scores[index] });
-    }
-    for (const tensor of Object.keys(t)) tf.dispose(t[tensor]); // dispose all
-    return hands;
-  }
-
-  async estimateHandBounds(input, config): Promise<{ startPoint: number[]; endPoint: number[]; palmLandmarks: number[]; confidence: number }[]> {
-    const inputHeight = input.shape[1];
-    const inputWidth = input.shape[2];
-    const image = tf.tidy(() => tf.sub(tf.div(tf.image.resizeBilinear(input, [this.inputSize, this.inputSize]), 127.5), 1));
-    const predictions = await this.getBoxes(image, config);
-    tf.dispose(image);
-    const hands: Array<{ startPoint: number[]; endPoint: number[]; palmLandmarks: number[]; confidence: number }> = [];
-    if (!predictions || predictions.length === 0) return hands;
-    for (const prediction of predictions) {
-      const boxes = await prediction.box.data();
-      const startPoint = boxes.slice(0, 2);
-      const endPoint = boxes.slice(2, 4);
-      const palmLandmarks = await prediction.palmLandmarks.array();
-      tf.dispose(prediction.box);
-      tf.dispose(prediction.palmLandmarks);
-      hands.push(util.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize]));
+      const p: Record<string, Tensor> = {};
+      p.box = tf.slice(t.norm, [index, 0], [1, -1]);
+      p.slice = tf.slice(t.predictions, [index, 5], [1, 14]);
+      p.norm = this.normalizeLandmarks(p.slice, index);
+      p.palmLandmarks = tf.reshape(p.norm, [-1, 2]);
+      const box = await p.box.data();
+      const startPoint = box.slice(0, 2) as unknown as Point;
+      const endPoint = box.slice(2, 4) as unknown as Point;
+      const palmLandmarks = await p.palmLandmarks.array();
+      const hand = { startPoint, endPoint, palmLandmarks, confidence: scores[index] };
+      const scaled = util.scaleBoxCoordinates(hand, [input.shape[2] / this.inputSize, input.shape[1] / this.inputSize]);
+      hands.push(scaled);
+      Object.keys(p).forEach((tensor) => tf.dispose(p[tensor]));
    }
+    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
    return hands;
  }
 }
--- a/src/hand/handposepipeline.ts
+++ b/src/hand/handposepipeline.ts
@ -9,6 +9,7 @@ import type * as detector from './handposedetector';
 import type { Tensor, GraphModel } from '../tfjs/types';
 import { env } from '../util/env';
 import { now } from '../util/util';
+import type { Point } from '../result';

 const palmBoxEnlargeFactor = 5; // default 3
 const handBoxEnlargeFactor = 1.65; // default 1.65
@ -21,7 +22,7 @@ export class HandPipeline {
  handDetector: detector.HandDetector;
  handPoseModel: GraphModel;
  inputSize: number;
-  storedBoxes: Array<{ startPoint: number[]; endPoint: number[]; palmLandmarks: number[]; confidence: number } | null>;
+  storedBoxes: Array<{ startPoint: Point; endPoint: Point; palmLandmarks: Point[]; confidence: number } | null>;
  skipped: number;
  detectedHands: number;

@ -93,7 +94,7 @@ export class HandPipeline {
    const skipTime = (config.hand.skipTime || 0) > (now() - lastTime);
    const skipFrame = this.skipped < (config.hand.skipFrames || 0);
    if (config.skipAllowed && skipTime && skipFrame) {
-      boxes = await this.handDetector.estimateHandBounds(image, config);
+      boxes = await this.handDetector.predict(image, config);
      this.skipped = 0;
    }
    if (config.skipAllowed) this.skipped++;
@ -105,7 +106,7 @@ export class HandPipeline {
      // for (const possible of boxes) this.storedBoxes.push(possible);
      if (this.storedBoxes.length > 0) useFreshBox = true;
    }
-    const hands: Array<{ landmarks: number[], confidence: number, boxConfidence: number, fingerConfidence: number, box: { topLeft: number[], bottomRight: number[] } }> = [];
+    const hands: Array<{ landmarks: Point[], confidence: number, boxConfidence: number, fingerConfidence: number, box: { topLeft: Point, bottomRight: Point } }> = [];

    // go through working set of boxes
    for (let i = 0; i < this.storedBoxes.length; i++) {
--- a/src/hand/handposeutil.ts
+++ b/src/hand/handposeutil.ts
@ -1,4 +1,5 @@
 import * as tf from '../../dist/tfjs.esm.js';
+import type { Point } from '../result';

 export function getBoxSize(box) {
  return [
@ -27,8 +28,8 @@ export function cutBoxFromImageAndResize(box, image, cropSize) {
 }

 export function scaleBoxCoordinates(box, factor) {
-  const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
-  const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
+  const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]] as Point;
+  const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]] as Point;
  const palmLandmarks = box.palmLandmarks.map((coord) => {
    const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
    return scaledCoord;
@ -40,8 +41,8 @@ export function enlargeBox(box, factor = 1.5) {
  const center = getBoxCenter(box);
  const size = getBoxSize(box);
  const newHalfSize = [factor * size[0] / 2, factor * size[1] / 2];
-  const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]];
-  const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]];
+  const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]] as Point;
+  const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]] as Point;
  return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
 }

@ -50,8 +51,8 @@ export function squarifyBox(box) {
  const size = getBoxSize(box);
  const maxEdge = Math.max(...size);
  const halfSize = maxEdge / 2;
-  const startPoint = [centers[0] - halfSize, centers[1] - halfSize];
-  const endPoint = [centers[0] + halfSize, centers[1] + halfSize];
+  const startPoint = [centers[0] - halfSize, centers[1] - halfSize] as Point;
+  const endPoint = [centers[0] + halfSize, centers[1] + halfSize] as Point;
  return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
 }

@ -61,8 +62,8 @@ export function shiftBox(box, shiftFactor) {
    box.endPoint[1] - box.startPoint[1],
  ];
  const shiftVector = [boxSize[0] * shiftFactor[0], boxSize[1] * shiftFactor[1]];
-  const startPoint = [box.startPoint[0] + shiftVector[0], box.startPoint[1] + shiftVector[1]];
-  const endPoint = [box.endPoint[0] + shiftVector[0], box.endPoint[1] + shiftVector[1]];
+  const startPoint = [box.startPoint[0] + shiftVector[0], box.startPoint[1] + shiftVector[1]] as Point;
+  const endPoint = [box.endPoint[0] + shiftVector[0], box.endPoint[1] + shiftVector[1]] as Point;
  return { startPoint, endPoint, palmLandmarks: box.palmLandmarks };
 }

--- a/src/hand/handtrack.ts
+++ b/src/hand/handtrack.ts
@ -24,7 +24,7 @@ const inputSize = [[0, 0], [0, 0]];
 const classes = ['hand', 'fist', 'pinch', 'point', 'face', 'tip', 'pinchtip'];
 const faceIndex = 4;

-const boxExpandFact = 1.6;
+const boxExpandFact = 1.7;
 const maxDetectorResolution = 512;
 const detectorExpandFact = 1.4;

--- a/src/human.ts
+++ b/src/human.ts
@ -347,6 +347,26 @@ export class Human {
    return res;
  }

+  /** Run detect with tensorflow profiling
+   * - result object will contain total exeuction time information for top-20 kernels
+   * - actual detection object can be accessed via `human.result`
+  */
+  async profile(input: Input, userConfig?: Partial<Config>): Promise<Record<string, number>> {
+    const profile = await this.tf.profile(() => this.detect(input, userConfig));
+    const kernels = {};
+    for (const kernel of profile.kernels) { // sum kernel time values per kernel
+      if (kernels[kernel.name]) kernels[kernel.name] += kernel.kernelTimeMs;
+      else kernels[kernel.name] = kernel.kernelTimeMs;
+    }
+    const kernelArr: Array<{ name, ms }> = [];
+    Object.entries(kernels).forEach((key) => kernelArr.push({ name: key[0], ms: key[1] })); // convert to array
+    kernelArr.sort((a, b) => b.ms - a.ms); // sort
+    kernelArr.length = 20; // crop
+    const res: Record<string, number> = {};
+    for (const kernel of kernelArr) res[kernel.name] = kernel.ms; // create perf objects
+    return res;
+  }
+
  /** Main detection method
   * - Analyze configuration: {@link Config}
   * - Pre-process input: {@link Input}