From 8741695dbd8c66b2bbfe46d836154d6396c2c3c0 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Tue, 18 May 2021 11:26:16 -0400
Subject: [PATCH] human 1.9.0 beta with breaking changes regarding caching

---
 demo/facematch.js                  |  3 +-
 demo/index.js                      |  2 --
 demo/node-multiprocess-worker.js   |  1 -
 demo/node-video.js                 |  1 -
 demo/node.js                       |  1 -
 package.json                       |  2 +-
 src/age/age.ts                     |  5 ++-
 src/config.ts                      | 49 ++++++++++++------------------
 src/efficientpose/efficientpose.ts |  5 ++-
 src/emotion/emotion.ts             | 16 +++++-----
 src/{faceall.ts => face.ts}        | 36 +++++++++++-----------
 src/faceres/faceres.ts             | 13 ++++----
 src/gender/gender.ts               |  5 ++-
 src/handpose/handpipeline.ts       |  7 ++---
 src/human.ts                       | 43 ++++++++++++++++++++------
 src/nanodet/nanodet.ts             |  6 ++--
 test/test-node-gpu.js              |  1 -
 test/test-node-wasm.js             |  1 -
 test/test-node.js                  |  1 -
 19 files changed, 100 insertions(+), 98 deletions(-)
 rename src/{faceall.ts => face.ts} (86%)

diff --git a/demo/facematch.js b/demo/facematch.js
index ced4bf54..0e7620d9 100644
--- a/demo/facematch.js
+++ b/demo/facematch.js
@@ -7,7 +7,6 @@ const userConfig = {
   async: false,
   warmup: 'none',
   debug: true,
-  videoOptimized: false,
   face: {
     enabled: true,
     detector: { rotation: true, return: true },
@@ -16,7 +15,7 @@ const userConfig = {
     iris: { enabled: false },
     age: { enabled: false },
     gender: { enabled: false },
-    emotion: { enabled: false },
+    emotion: { enabled: true },
     description: { enabled: true },
   },
   hand: { enabled: false },
diff --git a/demo/index.js b/demo/index.js
index 31b22d53..44d8acf3 100644
--- a/demo/index.js
+++ b/demo/index.js
@@ -13,7 +13,6 @@ const userConfig = {
   /*
   backend: 'webgl',
   async: true,
-  videoOptimized: false,
   filter: {
     enabled: false,
     flip: false,
@@ -487,7 +486,6 @@ async function detectVideo() {
 
 // just initialize everything and call main function
 async function detectSampleImages() {
-  userConfig.videoOptimized = false; // force disable video optimizations
   document.getElementById('canvas').style.display = 'none';
   document.getElementById('samples-container').style.display = 'block';
   log('running detection of sample images');
diff --git a/demo/node-multiprocess-worker.js b/demo/node-multiprocess-worker.js
index 1ee127be..f906a1c6 100644
--- a/demo/node-multiprocess-worker.js
+++ b/demo/node-multiprocess-worker.js
@@ -14,7 +14,6 @@ const myConfig = {
   backend: 'tensorflow',
   modelBasePath: 'file://models/',
   debug: false,
-  videoOptimized: false,
   async: true,
   face: {
     enabled: true,
diff --git a/demo/node-video.js b/demo/node-video.js
index 29fd41ac..564db614 100644
--- a/demo/node-video.js
+++ b/demo/node-video.js
@@ -25,7 +25,6 @@ const humanConfig = {
   backend: 'tensorflow',
   modelBasePath: 'file://node_modules/@vladmandic/human/models/',
   debug: false,
-  videoOptimized: true,
   async: true,
   filter: { enabled: false },
   face: {
diff --git a/demo/node.js b/demo/node.js
index 5c9c01de..9a265aa7 100644
--- a/demo/node.js
+++ b/demo/node.js
@@ -16,7 +16,6 @@ const myConfig = {
   backend: 'tensorflow',
   modelBasePath: 'file://models/',
   debug: true,
-  videoOptimized: false,
   async: false,
   filter: {
     enabled: true,
diff --git a/package.json b/package.json
index 9cf8f9db..908c5fa9 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@vladmandic/human",
-  "version": "1.8.5",
+  "version": "1.9.0",
   "description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
   "sideEffects": false,
   "main": "dist/human.node.js",
diff --git a/src/age/age.ts b/src/age/age.ts
index f4d6b360..cd330cbd 100644
--- a/src/age/age.ts
+++ b/src/age/age.ts
@@ -16,12 +16,11 @@ export async function load(config) {
 
 export async function predict(image, config) {
   if (!model) return null;
-  if ((skipped < config.face.age.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
+  if ((skipped < config.face.age.skipFrames) && config.skipFrame && last.age && (last.age > 0)) {
     skipped++;
     return last;
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     const enhance = tf.mul(resize, [255.0]);
diff --git a/src/config.ts b/src/config.ts
index 4380480d..09513477 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -19,13 +19,6 @@ export interface Config {
   /** Perform model loading and inference concurrently or sequentially */
   async: boolean,
 
-  /** Perform additional optimizations when input is video,
-   * - must be disabled for images
-   * - automatically disabled for Image, ImageData, ImageBitmap and Tensor inputs
-   * - skips boundary detection for every `skipFrames` frames specified for each model
-   * - while maintaining in-box detection since objects don't change definition as fast */
-  videoOptimized: boolean,
-
   /** What to use for `human.warmup()`
    * - warmup pre-initializes all models for faster inference but can take significant time on startup
    * - only used for `webgl` and `humangl` backends
@@ -37,6 +30,12 @@ export interface Config {
   */
   modelBasePath: string,
 
+  /** Cache sensitivity
+   * - values 0..1 where 0.01 means reset cache if input changed more than 1%
+   * - set to 0 to disable caching
+  */
+  cacheSensitivity: number;
+
   /** Run input through image filters before inference
    * - image filters run with near-zero latency as they are executed on the GPU
   */
@@ -101,8 +100,6 @@ export interface Config {
    * - iouThreshold: ammount of overlap between two detected objects before one object is removed
    * - maxDetected: maximum number of faces detected in the input, should be set to the minimum number for performance
    * - rotation: use calculated rotated face image or just box with rotation as-is, false means higher performance, but incorrect mesh mapping on higher face angles
-   * - skipFrames: how many frames to go without re-running the face detector and just run modified face mesh analysis, only valid if videoOptimized is set to true
-   * - skipInitial: if previous detection resulted in no faces detected, should skipFrames be reset immediately to force new detection cycle
    * - return: return extracted face as tensor for futher user processing
   */
   face: {
@@ -112,7 +109,6 @@ export interface Config {
       rotation: boolean,
       maxDetected: number,
       skipFrames: number,
-      skipInitial: boolean,
       minConfidence: number,
       iouThreshold: number,
       return: boolean,
@@ -160,14 +156,11 @@ export interface Config {
    * - iouThreshold: ammount of overlap between two detected objects before one object is removed
    * - maxDetected: maximum number of hands detected in the input, should be set to the minimum number for performance
    * - rotation: use best-guess rotated hand image or just box with rotation as-is, false means higher performance, but incorrect finger mapping if hand is inverted
-   * - skipFrames: how many frames to go without re-running the hand bounding box detector and just run modified hand skeleton detector, only valid if videoOptimized is set to true
-   * - skipInitial: if previous detection resulted in no hands detected, should skipFrames be reset immediately to force new detection cycle
   */
   hand: {
     enabled: boolean,
     rotation: boolean,
     skipFrames: number,
-    skipInitial: boolean,
     minConfidence: number,
     iouThreshold: number,
     maxDetected: number,
@@ -186,7 +179,6 @@ export interface Config {
    * - minConfidence: minimum score that detection must have to return as valid object
    * - iouThreshold: ammount of overlap between two detected objects before one object is removed
    * - maxDetected: maximum number of detections to return
-   * - skipFrames: run object detection every n input frames, only valid if videoOptimized is set to true
   */
   object: {
     enabled: boolean,
@@ -205,14 +197,13 @@ const config: Config = {
   wasmPath: '../node_modules/@tensorflow/tfjs-backend-wasm/dist//', // path for wasm binaries, only used for backend: wasm
   debug: true,               // print additional status messages to console
   async: true,               // execute enabled models in parallel
-  videoOptimized: true,      // perform additional optimizations when input is video,
-                             // automatically disabled for Image, ImageData, ImageBitmap
-                             // skips boundary detection for every n frames
-                             // while maintaining in-box detection since objects cannot move that fast
   warmup: 'full',            // what to use for human.warmup(), can be 'none', 'face', 'full'
                              // warmup pre-initializes all models for faster inference but can take
                              // significant time on startup
                              // only used for `webgl` and `humangl` backends
+  cacheSensitivity: 0.005,   // cache sensitivity
+                             // values 0..1 where 0.01 means reset cache if input changed more than 1%
+                             // set to 0 to disable caching
   filter: {                  // run input through image filters before inference
                              // image filters run with near-zero latency as they are executed on the GPU
     enabled: true,           // enable image pre-processing filters
@@ -254,13 +245,11 @@ const config: Config = {
                              // this parameter is not valid in nodejs
       maxDetected: 10,          // maximum number of faces detected in the input
                              // should be set to the minimum number for performance
-      skipFrames: 21,        // how many frames to go without re-running the face bounding box detector
-                             // only used for video inputs
+      skipFrames: 21,        // how many max frames to go without re-running the face bounding box detector
+                             // only used when cacheSensitivity is not zero
                              // e.g., if model is running st 25 FPS, we can re-use existing bounding
                              // box for updated face analysis as the head probably hasn't moved much
                              // in short time (10 * 1/25 = 0.25 sec)
-      skipInitial: false,    // if previous detection resulted in no faces detected,
-                             // should skipFrames be reset immediately to force new detection cycle
       minConfidence: 0.2,    // threshold for discarding a prediction
       iouThreshold: 0.1,     // ammount of overlap between two detected objects before one object is removed
       return: false,         // return extracted face as tensor
@@ -282,15 +271,16 @@ const config: Config = {
                              // recommended to enable detector.rotation and mesh.enabled
       modelPath: 'faceres.json',  // face description model
                              // can be either absolute path or relative to modelBasePath
-      skipFrames: 31,        // how many frames to go without re-running the detector
-                             // only used for video inputs
+      skipFrames: 31,        // how many max frames to go without re-running the detector
+                             // only used when cacheSensitivity is not zero
       minConfidence: 0.1,    // threshold for discarding a prediction
     },
 
     emotion: {
       enabled: true,
       minConfidence: 0.1,    // threshold for discarding a prediction
-      skipFrames: 32,        // how many frames to go without re-running the detector
+      skipFrames: 32,        // how max many frames to go without re-running the detector
+                             // only used when cacheSensitivity is not zero
       modelPath: 'emotion.json',  // face emotion model, can be absolute path or relative to modelBasePath
     },
   },
@@ -309,13 +299,11 @@ const config: Config = {
     enabled: true,
     rotation: false,         // use best-guess rotated hand image or just box with rotation as-is
                              // false means higher performance, but incorrect finger mapping if hand is inverted
-    skipFrames: 12,          // how many frames to go without re-running the hand bounding box detector
-                             // only used for video inputs
+    skipFrames: 12,          // how many max frames to go without re-running the hand bounding box detector
+                             // only used when cacheSensitivity is not zero
                              // e.g., if model is running st 25 FPS, we can re-use existing bounding
                              // box for updated hand skeleton analysis as the hand probably
                              // hasn't moved much in short time (10 * 1/25 = 0.25 sec)
-    skipInitial: false,      // if previous detection resulted in no hands detected,
-                             // should skipFrames be reset immediately to force new detection cycle
     minConfidence: 0.1,      // threshold for discarding a prediction
     iouThreshold: 0.1,       // ammount of overlap between two detected objects before one object is removed
     maxDetected: 2,          // maximum number of hands detected in the input
@@ -335,7 +323,8 @@ const config: Config = {
     minConfidence: 0.2,      // threshold for discarding a prediction
     iouThreshold: 0.4,       // ammount of overlap between two detected objects before one object is removed
     maxDetected: 10,         // maximum number of objects detected in the input
-    skipFrames: 41,          // how many frames to go without re-running the detector
+    skipFrames: 41,          // how many max frames to go without re-running the detector
+                             // only used when cacheSensitivity is not zero
   },
 };
 export { config as defaults };
diff --git a/src/efficientpose/efficientpose.ts b/src/efficientpose/efficientpose.ts
index 6f675378..c891e956 100644
--- a/src/efficientpose/efficientpose.ts
+++ b/src/efficientpose/efficientpose.ts
@@ -39,12 +39,11 @@ function max2d(inputs, minScore) {
 
 export async function predict(image, config) {
   if (!model) return null;
-  if ((skipped < config.body.skipFrames) && config.videoOptimized && Object.keys(keypoints).length > 0) {
+  if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
     skipped++;
     return keypoints;
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const tensor = tf.tidy(() => {
       const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
diff --git a/src/emotion/emotion.ts b/src/emotion/emotion.ts
index 2e1c8910..2e4f7a4e 100644
--- a/src/emotion/emotion.ts
+++ b/src/emotion/emotion.ts
@@ -3,7 +3,9 @@ import * as tf from '../../dist/tfjs.esm.js';
 
 const annotations = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'];
 let model;
-let last: Array<{ score: number, emotion: string }> = [];
+// let last: Array<{ score: number, emotion: string }> = [];
+const last: Array<Array<{ score: number, emotion: string }>> = [];
+let lastCount = 0;
 let skipped = Number.MAX_SAFE_INTEGER;
 
 // tuning values
@@ -18,14 +20,13 @@ export async function load(config) {
   return model;
 }
 
-export async function predict(image, config) {
+export async function predict(image, config, idx, count) {
   if (!model) return null;
-  if ((skipped < config.face.emotion.skipFrames) && config.videoOptimized && (last.length > 0)) {
+  if ((skipped < config.face.emotion.skipFrames) && config.skipFrame && (lastCount === count) && last[idx] && (last[idx].length > 0)) {
     skipped++;
-    return last;
+    return last[idx];
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     const [red, green, blue] = tf.split(resize, 3, 3);
@@ -54,7 +55,8 @@ export async function predict(image, config) {
       obj.sort((a, b) => b.score - a.score);
     }
     normalize.dispose();
-    last = obj;
+    last[idx] = obj;
+    lastCount = count;
     resolve(obj);
   });
 }
diff --git a/src/faceall.ts b/src/face.ts
similarity index 86%
rename from src/faceall.ts
rename to src/face.ts
index 66c4576c..86deec91 100644
--- a/src/faceall.ts
+++ b/src/face.ts
@@ -57,8 +57,7 @@ const calculateFaceAngle = (face, image_size): { angle: { pitch: number, yaw: nu
     const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
     // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
     const angle = {
-      // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees
-      // value of 0 means center
+      // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
       // pitch is face move up/down
       pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
       // yaw is face turn left/right
@@ -134,25 +133,26 @@ export const detectFace = async (parent, input): Promise<any> => {
   const faces = await facemesh.predict(input, parent.config);
   parent.perf.face = Math.trunc(now() - timeStamp);
   if (!faces) return [];
-  for (const face of faces) {
+  // for (const face of faces) {
+  for (let i = 0; i < faces.length; i++) {
     parent.analyze('Get Face');
 
     // is something went wrong, skip the face
-    if (!face.image || face.image.isDisposedInternal) {
-      log('Face object is disposed:', face.image);
+    if (!faces[i].image || faces[i].image.isDisposedInternal) {
+      log('Face object is disposed:', faces[i].image);
       continue;
     }
 
-    const rotation = calculateFaceAngle(face, [input.shape[2], input.shape[1]]);
+    const rotation = calculateFaceAngle(faces[i], [input.shape[2], input.shape[1]]);
 
     // run emotion, inherits face from blazeface
     parent.analyze('Start Emotion:');
     if (parent.config.async) {
-      emotionRes = parent.config.face.emotion.enabled ? emotion.predict(face.image, parent.config) : {};
+      emotionRes = parent.config.face.emotion.enabled ? emotion.predict(faces[i].image, parent.config, i, faces.length) : {};
     } else {
       parent.state = 'run:emotion';
       timeStamp = now();
-      emotionRes = parent.config.face.emotion.enabled ? await emotion.predict(face.image, parent.config) : {};
+      emotionRes = parent.config.face.emotion.enabled ? await emotion.predict(faces[i].image, parent.config, i, faces.length) : {};
       parent.perf.emotion = Math.trunc(now() - timeStamp);
     }
     parent.analyze('End Emotion:');
@@ -160,11 +160,11 @@ export const detectFace = async (parent, input): Promise<any> => {
     // run emotion, inherits face from blazeface
     parent.analyze('Start Description:');
     if (parent.config.async) {
-      descRes = parent.config.face.description.enabled ? faceres.predict(face, parent.config) : [];
+      descRes = parent.config.face.description.enabled ? faceres.predict(faces[i], parent.config, i, faces.length) : [];
     } else {
       parent.state = 'run:description';
       timeStamp = now();
-      descRes = parent.config.face.description.enabled ? await faceres.predict(face.image, parent.config) : [];
+      descRes = parent.config.face.description.enabled ? await faceres.predict(faces[i].image, parent.config, i, faces.length) : [];
       parent.perf.embedding = Math.trunc(now() - timeStamp);
     }
     parent.analyze('End Description:');
@@ -178,18 +178,18 @@ export const detectFace = async (parent, input): Promise<any> => {
 
     // calculate iris distance
     // iris: array[ center, left, top, right, bottom]
-    if (!parent.config.face.iris.enabled && face?.annotations?.leftEyeIris && face?.annotations?.rightEyeIris) {
-      delete face.annotations.leftEyeIris;
-      delete face.annotations.rightEyeIris;
+    if (!parent.config.face.iris.enabled && faces[i]?.annotations?.leftEyeIris && faces[i]?.annotations?.rightEyeIris) {
+      delete faces[i].annotations.leftEyeIris;
+      delete faces[i].annotations.rightEyeIris;
     }
-    const irisSize = (face.annotations?.leftEyeIris && face.annotations?.rightEyeIris)
+    const irisSize = (faces[i].annotations?.leftEyeIris && faces[i].annotations?.rightEyeIris)
     /* average human iris size is 11.7mm */
-      ? 11.7 * Math.max(Math.abs(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0]), Math.abs(face.annotations.rightEyeIris[4][1] - face.annotations.rightEyeIris[2][1]))
+      ? 11.7 * Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1]))
       : 0;
 
     // combine results
     faceRes.push({
-      ...face,
+      ...faces[i],
       age: descRes.age,
       gender: descRes.gender,
       genderConfidence: descRes.genderConfidence,
@@ -197,10 +197,10 @@ export const detectFace = async (parent, input): Promise<any> => {
       emotion: emotionRes,
       iris: (irisSize !== 0) ? Math.trunc(irisSize) / 100 : 0,
       rotation,
-      tensor: parent.config.face.detector.return ? face.image?.squeeze() : null,
+      tensor: parent.config.face.detector.return ? faces[i].image?.squeeze() : null,
     });
     // dispose original face tensor
-    face.image?.dispose();
+    faces[i].image?.dispose();
 
     parent.analyze('End Face');
   }
diff --git a/src/faceres/faceres.ts b/src/faceres/faceres.ts
index 9b662f1a..a484bbb7 100644
--- a/src/faceres/faceres.ts
+++ b/src/faceres/faceres.ts
@@ -2,7 +2,8 @@ import { log, join } from '../helpers';
 import * as tf from '../../dist/tfjs.esm.js';
 
 let model;
-let last = { age: 0 };
+const last: Array<{ age: number}> = [];
+let lastCount = 0;
 let skipped = Number.MAX_SAFE_INTEGER;
 
 type Tensor = typeof tf.Tensor;
@@ -94,14 +95,13 @@ export function enhance(input): Tensor {
   return image;
 }
 
-export async function predict(image, config) {
+export async function predict(image, config, idx, count) {
   if (!model) return null;
-  if ((skipped < config.face.description.skipFrames) && config.videoOptimized && last.age && (last.age > 0)) {
+  if ((skipped < config.face.description.skipFrames) && config.skipFrame && (lastCount === count) && last[idx]?.age && (last[idx]?.age > 0)) {
     skipped++;
     return last;
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const enhanced = enhance(image);
 
@@ -136,7 +136,8 @@ export async function predict(image, config) {
       resT.forEach((t) => tf.dispose(t));
     }
 
-    last = obj;
+    last[idx] = obj;
+    lastCount = count;
     resolve(obj);
   });
 }
diff --git a/src/gender/gender.ts b/src/gender/gender.ts
index f8d6d145..0badd472 100644
--- a/src/gender/gender.ts
+++ b/src/gender/gender.ts
@@ -21,12 +21,11 @@ export async function load(config) {
 
 export async function predict(image, config) {
   if (!model) return null;
-  if ((skipped < config.face.gender.skipFrames) && config.videoOptimized && last.gender !== '') {
+  if ((skipped < config.face.gender.skipFrames) && config.skipFrame && last.gender !== '') {
     skipped++;
     return last;
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
     let enhance;
diff --git a/src/handpose/handpipeline.ts b/src/handpose/handpipeline.ts
index 991b7326..8bb9e7e7 100644
--- a/src/handpose/handpipeline.ts
+++ b/src/handpose/handpipeline.ts
@@ -81,11 +81,12 @@ export class HandPipeline {
 
     // run new detector every skipFrames unless we only want box to start with
     let boxes;
-    if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.videoOptimized) {
+
+    if ((this.skipped === 0) || (this.skipped > config.hand.skipFrames) || !config.hand.landmarks || !config.skipFrame) {
       boxes = await this.handDetector.estimateHandBounds(image, config);
       this.skipped = 0;
     }
-    if (config.videoOptimized) this.skipped++;
+    if (config.skipFrame) this.skipped++;
 
     // if detector result count doesn't match current working set, use it to reset current working set
     if (boxes && (boxes.length > 0) && ((boxes.length !== this.detectedHands) && (this.detectedHands !== config.hand.maxDetected) || !config.hand.landmarks)) {
@@ -96,8 +97,6 @@ export class HandPipeline {
     }
     const hands: Array<{}> = [];
 
-    if (config.hand.skipInitial && this.detectedHands === 0) this.skipped = 0;
-
     // go through working set of boxes
     for (let i = 0; i < this.storedBoxes.length; i++) {
       const currentBox = this.storedBoxes[i];
diff --git a/src/human.ts b/src/human.ts
index a720c2da..af04c615 100644
--- a/src/human.ts
+++ b/src/human.ts
@@ -4,7 +4,7 @@ import { Result } from './result';
 import * as sysinfo from './sysinfo';
 import * as tf from '../dist/tfjs.esm.js';
 import * as backend from './tfjs/backend';
-import * as faceall from './faceall';
+import * as face from './face';
 import * as facemesh from './blazeface/facemesh';
 import * as faceres from './faceres/faceres';
 import * as emotion from './emotion/emotion';
@@ -116,6 +116,7 @@ export class Human {
   #analyzeMemoryLeaks: boolean;
   #checkSanity: boolean;
   #firstRun: boolean;
+  #lastInputSum: number
 
   // definition end
 
@@ -165,6 +166,7 @@ export class Human {
     this.faceUVMap = facemesh.uvmap;
     // include platform info
     this.sysinfo = sysinfo.info();
+    this.#lastInputSum = 1;
   }
 
   // helper function: measure tensor leak
@@ -338,6 +340,21 @@ export class Human {
     }
   }
 
+  // check if input changed sufficiently to trigger new detections
+  /** @hidden */
+  #skipFrame = async (input) => {
+    if (this.config.cacheSensitivity === 0) return true;
+    const resizeFact = 32;
+    const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
+    const sumT = this.tf.sum(reduced);
+    reduced.dispose();
+    const sum = sumT.dataSync()[0] as number;
+    sumT.dispose();
+    const diff = Math.max(sum, this.#lastInputSum) / Math.min(sum, this.#lastInputSum) - 1;
+    this.#lastInputSum = sum;
+    return diff < this.config.cacheSensitivity;
+  }
+
   /** Main detection method
    * - Analyze configuration: {@link Config}
    * - Pre-process input: {@link Input}
@@ -369,6 +386,8 @@ export class Human {
       // load models if enabled
       await this.load();
 
+      /*
+      // function disabled in favor of inputChanged
       // disable video optimization for inputs of type image, but skip if inside worker thread
       let previousVideoOptimized;
       // @ts-ignore ignore missing type for WorkerGlobalScope as that is the point
@@ -382,6 +401,7 @@ export class Human {
         previousVideoOptimized = this.config.videoOptimized;
         this.config.videoOptimized = false;
       }
+      */
 
       timeStamp = now();
       const process = image.process(input, this.config);
@@ -393,6 +413,17 @@ export class Human {
       this.perf.image = Math.trunc(now() - timeStamp);
       this.analyze('Get Image:');
 
+      timeStamp = now();
+      // @ts-ignore hidden dynamic property that is not part of definitions
+      this.config.skipFrame = await this.#skipFrame(process.tensor);
+      if (!this.perf.frames) this.perf.frames = 0;
+      if (!this.perf.cached) this.perf.cached = 0;
+      this.perf.frames++;
+      // @ts-ignore hidden dynamic property that is not part of definitions
+      if (this.config.skipFrame) this.perf.cached++;
+      this.perf.changed = Math.trunc(now() - timeStamp);
+      this.analyze('Check Changed:');
+
       // prepare where to store model results
       let bodyRes;
       let handRes;
@@ -402,12 +433,12 @@ export class Human {
 
       // run face detection followed by all models that rely on face bounding box: face mesh, age, gender, emotion
       if (this.config.async) {
-        faceRes = this.config.face.enabled ? faceall.detectFace(this, process.tensor) : [];
+        faceRes = this.config.face.enabled ? face.detectFace(this, process.tensor) : [];
         if (this.perf.face) delete this.perf.face;
       } else {
         this.state = 'run:face';
         timeStamp = now();
-        faceRes = this.config.face.enabled ? await faceall.detectFace(this, process.tensor) : [];
+        faceRes = this.config.face.enabled ? await face.detectFace(this, process.tensor) : [];
         current = Math.trunc(now() - timeStamp);
         if (current > 0) this.perf.face = current;
       }
@@ -471,9 +502,6 @@ export class Human {
         else if (this.perf.gesture) delete this.perf.gesture;
       }
 
-      // restore video optimizations if previously disabled
-      if (previousVideoOptimized) this.config.videoOptimized = previousVideoOptimized;
-
       this.perf.total = Math.trunc(now() - timeStart);
       this.state = 'idle';
       const result = {
@@ -577,13 +605,10 @@ export class Human {
     const t0 = now();
     if (userConfig) this.config = mergeDeep(this.config, userConfig);
     if (!this.config.warmup || this.config.warmup === 'none') return { error: 'null' };
-    const save = this.config.videoOptimized;
-    this.config.videoOptimized = false;
     let res;
     if (typeof createImageBitmap === 'function') res = await this.#warmupBitmap();
     else if (typeof Image !== 'undefined') res = await this.#warmupCanvas();
     else res = await this.#warmupNode();
-    this.config.videoOptimized = save;
     const t1 = now();
     if (this.config.debug) log('Warmup', this.config.warmup, Math.round(t1 - t0), 'ms', res);
     return res;
diff --git a/src/nanodet/nanodet.ts b/src/nanodet/nanodet.ts
index 08da16b7..9cbae6cd 100644
--- a/src/nanodet/nanodet.ts
+++ b/src/nanodet/nanodet.ts
@@ -97,13 +97,11 @@ async function process(res, inputSize, outputShape, config) {
 
 export async function predict(image, config) {
   if (!model) return null;
-  // console.log(skipped, config.object.skipFrames, config.videoOptimized, ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)));
-  if ((skipped < config.object.skipFrames) && config.videoOptimized && (last.length > 0)) {
+  if ((skipped < config.object.skipFrames) && config.skipFrame && (last.length > 0)) {
     skipped++;
     return last;
   }
-  if (config.videoOptimized) skipped = 0;
-  else skipped = Number.MAX_SAFE_INTEGER;
+  skipped = 0;
   return new Promise(async (resolve) => {
     const outputSize = [image.shape[2], image.shape[1]];
     const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
diff --git a/test/test-node-gpu.js b/test/test-node-gpu.js
index b8bf175f..9b5d83e5 100644
--- a/test/test-node-gpu.js
+++ b/test/test-node-gpu.js
@@ -5,7 +5,6 @@ const config = {
   modelBasePath: 'file://models/',
   backend: 'tensorflow',
   debug: false,
-  videoOptimized: false,
   async: false,
   filter: {
     enabled: true,
diff --git a/test/test-node-wasm.js b/test/test-node-wasm.js
index facc6cdd..74df8afc 100644
--- a/test/test-node-wasm.js
+++ b/test/test-node-wasm.js
@@ -6,7 +6,6 @@ const config = {
   backend: 'wasm',
   wasmPath: 'node_modules/@tensorflow/tfjs-backend-wasm/dist/',
   debug: false,
-  videoOptimized: false,
   async: false,
   filter: {
     enabled: true,
diff --git a/test/test-node.js b/test/test-node.js
index 6fea955a..169ac997 100644
--- a/test/test-node.js
+++ b/test/test-node.js
@@ -5,7 +5,6 @@ const config = {
   modelBasePath: 'file://models/',
   backend: 'tensorflow',
   debug: false,
-  videoOptimized: false,
   async: false,
   filter: {
     enabled: true,