From 87f8e313444375bd1937e1ab497b393e2a87e572 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sat, 17 Oct 2020 10:06:02 -0400
Subject: [PATCH] fixed memory leaks and added scoped runs

---
 config.js                    |   3 +
 demo/browser.js              | 119 ++++++++++++++++++-----------------
 src/handpose/handdetector.js |  14 ++---
 src/index.js                 |  43 +++++++++----
 4 files changed, 100 insertions(+), 79 deletions(-)

diff --git a/config.js b/config.js
index 0c985298..eb49ce04 100644
--- a/config.js
+++ b/config.js
@@ -4,6 +4,9 @@
 export default {
   backend: 'webgl',          // select tfjs backend to use
   console: true,             // enable debugging output to console
+  scoped: false,             // enable scoped runs
+                             // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance
+                             // typically not needed
   face: {
     enabled: true,           // controls if specified modul is enabled
                              // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
diff --git a/demo/browser.js b/demo/browser.js
index 87250479..35ecca38 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -3,6 +3,7 @@
 import human from '../dist/human.esm.js';
 import draw from './draw.js';
 
+// ui options
 const ui = {
   baseColor: 'rgba(255, 200, 255, 0.3)',
   baseLabel: 'rgba(255, 200, 255, 0.9)',
@@ -20,11 +21,11 @@ const ui = {
   drawPolygons: true,
   fillPolygons: true,
   useDepth: true,
+  console: true,
 };
 
+// configuration overrides
 const config = {
-  backend: 'webgl',
-  console: true,
   face: {
     enabled: true,
     detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
@@ -37,11 +38,14 @@ const config = {
   body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
   hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
 };
+
+// global variables
 let settings;
 let worker;
 let timeStamp;
 const fps = [];
 
+// helper function: translates json to human readable string
 function str(...msg) {
   if (!Array.isArray(msg)) return msg;
   let line = '';
@@ -52,11 +56,13 @@ function str(...msg) {
   return line;
 }
 
+// helper function: wrapper around console output
 const log = (...msg) => {
   // eslint-disable-next-line no-console
-  if (config.console) console.log(...msg);
+  if (ui.console) console.log(...msg);
 };
 
+// draws processed results and starts processing of a next frame
 async function drawResults(input, result, canvas) {
   // update fps
   settings.setValue('FPS', Math.round(1000 / (performance.now() - timeStamp)));
@@ -84,53 +90,7 @@ async function drawResults(input, result, canvas) {
   `;
 }
 
-// simple wrapper for worker.postmessage that creates worker if one does not exist
-function webWorker(input, image, canvas) {
-  if (!worker) {
-    // create new webworker and add event handler only once
-    log('Creating worker thread');
-    worker = new Worker(ui.worker, { type: 'module' });
-    // after receiving message from webworker, parse&draw results and send new frame for processing
-    worker.addEventListener('message', (msg) => drawResults(input, msg.data, canvas));
-  }
-  // pass image data as arraybuffer to worker by reference to avoid copy
-  worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]);
-}
-
-async function runHumanDetect(input, canvas) {
-  timeStamp = performance.now();
-  // perform detect if live video or not video at all
-  if (input.srcObject) {
-    // if video not ready, just redo
-    const live = (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused);
-    if (!live) {
-      if (!input.paused) log(`Video not ready: state: ${input.srcObject.getVideoTracks()[0].readyState} stream state: ${input.readyState}`);
-      setTimeout(() => runHumanDetect(input, canvas), 500);
-      return;
-    }
-    if (ui.useWorker) {
-      // get image data from video as we cannot send html objects to webworker
-      const offscreen = new OffscreenCanvas(canvas.width, canvas.height);
-      const ctx = offscreen.getContext('2d');
-      ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height);
-      const data = ctx.getImageData(0, 0, canvas.width, canvas.height);
-      // perform detection in worker
-      webWorker(input, data, canvas);
-    } else {
-      let result = {};
-      try {
-        // perform detection
-        result = await human.detect(input, config);
-      } catch (err) {
-        log('Error during execution:', err.message);
-      }
-      if (result.error) log(result.error);
-      else drawResults(input, result, canvas);
-    }
-  }
-}
-
-// eslint-disable-next-line no-unused-vars
+// setup webcam
 async function setupCamera() {
   if (ui.busy) return null;
   ui.busy = true;
@@ -173,12 +133,55 @@ async function setupCamera() {
   });
 }
 
+// wrapper for worker.postmessage that creates worker if one does not exist
+function webWorker(input, image, canvas) {
+  if (!worker) {
+    // create new webworker and add event handler only once
+    log('Creating worker thread');
+    worker = new Worker(ui.worker, { type: 'module' });
+    // after receiving message from webworker, parse&draw results and send new frame for processing
+    worker.addEventListener('message', (msg) => drawResults(input, msg.data, canvas));
+  }
+  // pass image data as arraybuffer to worker by reference to avoid copy
+  worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]);
+}
+
+// main processing function when input is webcam, can use direct invocation or web worker
+async function runHumanDetect(input, canvas) {
+  timeStamp = performance.now();
+  // perform detect if live video or not video at all
+  if (input.srcObject) {
+    // if video not ready, just redo
+    const live = (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused);
+    if (!live) {
+      if (!input.paused) log(`Video not ready: state: ${input.srcObject.getVideoTracks()[0].readyState} stream state: ${input.readyState}`);
+      setTimeout(() => runHumanDetect(input, canvas), 500);
+      return;
+    }
+    if (ui.useWorker) {
+      // get image data from video as we cannot send html objects to webworker
+      const offscreen = new OffscreenCanvas(canvas.width, canvas.height);
+      const ctx = offscreen.getContext('2d');
+      ctx.drawImage(input, 0, 0, input.width, input.height, 0, 0, canvas.width, canvas.height);
+      const data = ctx.getImageData(0, 0, canvas.width, canvas.height);
+      // perform detection in worker
+      webWorker(input, data, canvas);
+    } else {
+      let result = {};
+      try {
+        // perform detection
+        result = await human.detect(input, config);
+      } catch (err) {
+        log('Error during execution:', err.message);
+      }
+      if (result.error) log(result.error);
+      else drawResults(input, result, canvas);
+    }
+  }
+}
+
+// main processing function when input is image, can use direct invocation or web worker
 async function processImage(input) {
-  ui.baseColor = 'rgba(200, 255, 255, 0.5)';
-  ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
-  ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
-  ui.baseLineWidth = 16;
-  ui.columns = 3;
   const cfg = {
     backend: 'webgl',
     console: true,
@@ -218,6 +221,7 @@ async function processImage(input) {
   });
 }
 
+// just initialize everything and call main function
 async function detectVideo() {
   document.getElementById('samples').style.display = 'none';
   document.getElementById('canvas').style.display = 'block';
@@ -236,7 +240,7 @@ async function detectVideo() {
   runHumanDetect(video, canvas);
 }
 
-// eslint-disable-next-line no-unused-vars
+// just initialize everything and call main function
 async function detectSampleImages() {
   ui.baseFont = ui.baseFontProto.replace(/{size}/, `${ui.columns}rem`);
   ui.baseLineHeight = ui.baseLineHeightProto * ui.columns;
@@ -246,8 +250,8 @@ async function detectSampleImages() {
   for (const sample of ui.samples) await processImage(sample);
 }
 
+// setup settings panel
 function setupUI() {
-  // add all variables to ui control panel
   settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main'));
   const style = document.createElement('style');
   style.innerHTML = `
@@ -314,7 +318,6 @@ function setupUI() {
 async function main() {
   log('Human demo starting ...');
   setupUI();
-
   const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
   document.getElementById('log').innerText += '\n' + msg;
   log(msg);
diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js
index e2bc28e4..fe0f5710 100644
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@@ -42,14 +42,11 @@ class HandDetector {
     const boxes = this.normalizeBoxes(rawBoxes);
     const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
     const boxesWithHands = await boxesWithHandsTensor.array();
-    const toDispose = [
-      normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction,
-      boxes, rawBoxes, scores,
-    ];
-    if (boxesWithHands.length === 0) {
-      toDispose.forEach((tensor) => tensor.dispose());
-      return null;
-    }
+    const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
+    // if (boxesWithHands.length === 0) {
+    // toDispose.forEach((tensor) => tensor.dispose());
+    //  return null;
+    // }
     const detectedHands = tf.tidy(() => {
       const detectedBoxes = [];
       for (const i in boxesWithHands) {
@@ -61,6 +58,7 @@ class HandDetector {
       }
       return detectedBoxes;
     });
+    toDispose.forEach((tensor) => tensor.dispose());
     return detectedHands;
   }
 
diff --git a/src/index.js b/src/index.js
index 2cd2403f..a8c41cd3 100644
--- a/src/index.js
+++ b/src/index.js
@@ -20,17 +20,32 @@ const models = {
   gender: null,
   emotion: null,
 };
+
+// helper function: gets elapsed time on both browser and nodejs
 const now = () => {
   if (typeof performance !== 'undefined') return performance.now();
   return parseInt(Number(process.hrtime.bigint()) / 1000 / 1000);
 };
 
+// helper function: wrapper around console output
 const log = (...msg) => {
   // eslint-disable-next-line no-console
-  if (config.console) console.log(...msg);
+  if (msg && config.console) console.log(...msg);
 };
 
-// helper function that performs deep merge of multiple objects so it allows full inheriance with overrides
+// helper function: measure tensor leak
+let numTensors = 0;
+const analyzeMemoryLeaks = false;
+const analyze = (...msg) => {
+  if (!analyzeMemoryLeaks) return;
+  const current = tf.engine().state.numTensors;
+  const previous = numTensors;
+  numTensors = current;
+  const leaked = current - previous;
+  if (leaked !== 0) log(...msg, leaked);
+};
+
+// helper function: perform deep merge of multiple objects so it allows full inheriance with overrides
 function mergeDeep(...objects) {
   const isObject = (obj) => obj && typeof obj === 'object';
   return objects.reduce((prev, obj) => {
@@ -97,12 +112,6 @@ async function detect(input, userConfig = {}) {
       await tf.setBackend(config.backend);
       await tf.ready();
     }
-    // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
-    // let savedWebglPackDepthwiseConvFlag;
-    // if (tf.getBackend() === 'webgl') {
-    //   savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
-    //  tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
-    // }
 
     // load models if enabled
     state = 'load';
@@ -111,18 +120,24 @@ async function detect(input, userConfig = {}) {
     const perf = {};
     let timeStamp;
 
-    tf.engine().startScope();
+    if (config.scoped) tf.engine().startScope();
+
+    analyze('Start Detect:');
 
     // run posenet
     state = 'run:body';
     timeStamp = now();
+    analyze('Start PoseNet');
     const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
+    analyze('End PoseNet:');
     perf.body = Math.trunc(now() - timeStamp);
 
     // run handpose
     state = 'run:hand';
     timeStamp = now();
+    analyze('Start HandPose:');
     const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
+    analyze('End HandPose:');
     perf.hand = Math.trunc(now() - timeStamp);
 
     // run facemesh, includes blazeface and iris
@@ -130,6 +145,7 @@ async function detect(input, userConfig = {}) {
     if (config.face.enabled) {
       state = 'run:face';
       timeStamp = now();
+      analyze('Start FaceMesh:');
       const faces = await models.facemesh.estimateFaces(input, config.face);
       perf.face = Math.trunc(now() - timeStamp);
       for (const face of faces) {
@@ -149,6 +165,7 @@ async function detect(input, userConfig = {}) {
         const emotionData = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {};
         perf.emotion = Math.trunc(now() - timeStamp);
         face.image.dispose();
+        delete face.image;
         // calculate iris distance
         // iris: array[ bottom, left, top, right, center ]
         const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
@@ -166,13 +183,13 @@ async function detect(input, userConfig = {}) {
           iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
         });
       }
-      state = 'idle';
+      analyze('End FaceMesh:');
     }
 
-    // set depthwiseconv to original value
-    // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+    state = 'idle';
 
-    tf.engine().endScope();
+    if (config.scoped) tf.engine().endScope();
+    analyze('End Scope:');
 
     // combine and return results
     perf.total = Object.values(perf).reduce((a, b) => a + b);