From 38be53cb7e045cf6b59cc20e38c3c6b41e777ba2 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Sun, 1 Nov 2020 13:07:53 -0500
Subject: [PATCH] implemented memory profiler

---
 README.md              | 38 +++++++++++++++++++++++++++++++++++++-
 config.js              |  6 ++++++
 demo/browser.js        |  5 +++++
 demo/menu.js           | 18 ++++++++----------
 src/emotion/emotion.js | 15 ++++++++++++---
 src/human.js           | 29 +++++++++++++++++++++++------
 src/profile.js         | 24 ++++++++++++++++++++++++
 7 files changed, 115 insertions(+), 20 deletions(-)
 create mode 100644 src/profile.js
diff --git a/README.md b/README.md
index 66d2a185..5b0494ec 100644
--- a/README.md
+++ b/README.md
@@ -238,16 +238,23 @@ Below is output of `human.defaults` object
 Any property can be overriden by passing user object during `human.detect()`  
 Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration  
 
+All configuration details can be changed in real-time!  
+
 Configurtion object is large, but typically you only need to modify few values:
 
 - `enabled`: Choose which models to use
 - `modelPath`: Update as needed to reflect your application's relative path
 
-
 ```js
 config = {
   backend: 'webgl',          // select tfjs backend to use
   console: true,             // enable debugging output to console
+  profile: true,             // enable tfjs profiling
+                             // this has significant performance impact, only enable for debugging purposes
+                             // currently only implemented for age,gender,emotion models
+  deallocate: true,          // aggresively deallocate gpu memory after each usage
+                             // only valid for webgl backend and only during first call, cannot be changed unless library is reloaded
+                             // this has significant performance impact, only enable on low-memory devices
   scoped: false,             // enable scoped runs
                              // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance
                              // typically not needed
@@ -415,6 +422,35 @@ result = {
 
 <hr>
 
+## Profile
+
+If `config.profile` is enabled, call to `human.profile()` will return detailed profiling data from the last detect invokation.
+
+example:
+```js
+  result = {
+    {age: {…}, gender: {…}, emotion: {…}}
+      age:
+        timeKernelOps: 53.78892800000002
+        newBytes: 4
+        newTensors: 1
+        numKernelOps: 341
+        peakBytes: 46033948
+        largestKernelOps: Array(5)
+          0: {name: "Reshape", bytesAdded: 107648, totalBytesSnapshot: 46033948, tensorsAdded: 1, totalTensorsSnapshot: 1149, …}
+          1: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45818652, tensorsAdded: 1, totalTensorsSnapshot: 1147, …}
+          2: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45633996, tensorsAdded: 1, totalTensorsSnapshot: 1148, …}
+          3: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45389376, tensorsAdded: 1, totalTensorsSnapshot: 1154, …}
+          4: {name: "Reshape", bytesAdded: 53824, totalBytesSnapshot: 45381776, tensorsAdded: 1, totalTensorsSnapshot: 1155, …}
+        slowestKernelOps: Array(5)
+          0: {name: "_FusedMatMul", bytesAdded: 12, totalBytesSnapshot: 44802280, tensorsAdded: 1, totalTensorsSnapshot: 1156, …}
+          1: {name: "_FusedMatMul", bytesAdded: 4, totalBytesSnapshot: 44727564, tensorsAdded: 1, totalTensorsSnapshot: 1152, …}
+          2: {name: "_FusedMatMul", bytesAdded: 12, totalBytesSnapshot: 44789100, tensorsAdded: 1, totalTensorsSnapshot: 1157, …}
+          3: {name: "Add", bytesAdded: 4, totalBytesSnapshot: 44788748, tensorsAdded: 1, totalTensorsSnapshot: 1158, …}
+          4: {name: "Add", bytesAdded: 4, totalBytesSnapshot: 44788748, tensorsAdded: 1, totalTensorsSnapshot: 1158, …}
+  }
+```
+
 ## Build
 
 If you want to modify the library and perform a full rebuild:  
diff --git a/config.js b/config.js
index 703a963c..dea06c13 100644
--- a/config.js
+++ b/config.js
@@ -4,6 +4,12 @@
 export default {
   backend: 'webgl',          // select tfjs backend to use
   console: true,             // enable debugging output to console
+  profile: true,             // enable tfjs profiling
+                             // this has significant performance impact, only enable for debugging purposes
+                             // currently only implemented for age,gender,emotion models
+  deallocate: true,          // aggresively deallocate gpu memory after each usage
+                             // only valid for webgl backend and only during first call, cannot be changed unless library is reloaded
+                             // this has significant performance impact, only enable on low-memory devices
   scoped: false,             // enable scoped runs
                              // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance
                              // typically not needed
diff --git a/demo/browser.js b/demo/browser.js
index cd20ca75..931d141d 100644
--- a/demo/browser.js
+++ b/demo/browser.js
@@ -30,6 +30,8 @@ const ui = {
 // configuration overrides
 const config = {
   backend: 'webgl',
+  profile: false,
+  deallocate: false,
   wasm: { path: '../assets' },
   filter: {
     enabled: true,
@@ -199,6 +201,7 @@ function runHumanDetect(input, canvas) {
       human.detect(input, config).then((result) => {
         if (result.error) log(result.error);
         else drawResults(input, result, canvas);
+        if (config.profile) log('Profile data:', human.profile());
       });
     }
   }
@@ -269,6 +272,8 @@ function setupMenu() {
 
   menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
   menu.addList('Backend', ['cpu', 'webgl', 'wasm', 'webgpu'], config.backend, (val) => config.backend = val);
+  menu.addBool('Enable Profiler', config, 'profile');
+  menu.addBool('Memory Deallocator', config, 'deallocate');
   menu.addBool('Use Web Worker', ui, 'useWorker');
   menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
   menu.addLabel('Enabled Models');
diff --git a/demo/menu.js b/demo/menu.js
index fb2dccee..87934d4a 100644
--- a/demo/menu.js
+++ b/demo/menu.js
@@ -12,6 +12,7 @@ let theme = {
   checkboxOff: 'lightcoral',
   rangeBackground: 'lightblue',
   rangeLabel: 'white',
+  chartColor: 'lightblue',
 };
 
 function createCSS() {
@@ -69,8 +70,6 @@ class Menu {
     instance++;
     this._maxFPS = 0;
     this.hidden = 0;
-    this.chartFGcolor = 'lightblue';
-    this.chartBGcolor = 'lightgray';
   }
 
   createMenu(parent, title = '', position = { top: null, left: null, bottom: null, right: null }) {
@@ -256,13 +255,12 @@ class Menu {
     else this.addValue(title, val);
   }
 
-  addChart(title, id, width = 200, height = 40, fgColor, bgColor) {
-    if (fgColor) this.chartFGcolor = fgColor;
-    if (bgColor) this.chartBGcolor = bgColor;
+  addChart(title, id, width = 200, height = 40, color) {
+    if (color) theme.chartColor = color;
     const el = document.createElement('div');
     el.className = 'menu-item menu-chart-title';
     el.id = this.newID;
-    el.innerHTML = `<font color=${this.chartFGcolor}>${title}</font><canvas id="menu-canvas-${id}" class="menu-chart-canvas" width="${width}px" height="${height}px"></canvas>`;
+    el.innerHTML = `<font color=${theme.chartColor}>${title}</font><canvas id="menu-canvas-${id}" class="menu-chart-canvas" width="${width}px" height="${height}px"></canvas>`;
     this.container.appendChild(el);
   }
 
@@ -272,18 +270,18 @@ class Menu {
     const canvas = document.getElementById(`menu-canvas-${id}`);
     if (!canvas) return;
     const ctx = canvas.getContext('2d');
-    ctx.fillStyle = this.chartBGcolor;
+    ctx.fillStyle = theme.background;
     ctx.fillRect(0, 0, canvas.width, canvas.height);
     const width = canvas.width / values.length;
     const max = 1 + Math.max(...values);
     const height = canvas.height / max;
     for (const i in values) {
       const gradient = ctx.createLinearGradient(0, (max - values[i]) * height, 0, 0);
-      gradient.addColorStop(0.1, this.chartFGcolor);
-      gradient.addColorStop(0.4, this.chartBGcolor);
+      gradient.addColorStop(0.1, theme.chartColor);
+      gradient.addColorStop(0.4, theme.background);
       ctx.fillStyle = gradient;
       ctx.fillRect(i * width, 0, width - 4, canvas.height);
-      ctx.fillStyle = this.chartBGcolor;
+      ctx.fillStyle = theme.background;
       ctx.font = `${width / 1.4}px "Segoe UI"`;
       ctx.fillText(Math.round(values[i]), i * width + 1, canvas.height - 1, width - 1);
     }
diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js
index 3c47c43c..f43f5e55 100644
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@@ -1,4 +1,5 @@
 const tf = require('@tensorflow/tfjs');
+const profile = require('../profile.js');
 
 const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
 const models = {};
@@ -33,13 +34,21 @@ async function predict(image, config) {
   blueNorm.dispose();
   const obj = [];
   if (config.face.emotion.enabled) {
-    const emotionT = await models.emotion.predict(grayscale);
-    const data = await emotionT.data();
+    let data;
+    if (!config.profile) {
+      const emotionT = await models.emotion.predict(grayscale);
+      data = await emotionT.data();
+      tf.dispose(emotionT);
+    } else {
+      const profileData = await tf.profile(() => models.emotion.predict(grayscale));
+      data = await profileData.result.data();
+      profileData.result.dispose();
+      profile.run('emotion', profileData);
+    }
     for (let i = 0; i < data.length; i++) {
       if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
     }
     obj.sort((a, b) => b.score - a.score);
-    tf.dispose(emotionT);
   }
   tf.dispose(grayscale);
   last = obj;
diff --git a/src/human.js b/src/human.js
index 815b98c0..c3cd16e6 100644
--- a/src/human.js
+++ b/src/human.js
@@ -5,6 +5,7 @@ const emotion = require('./emotion/emotion.js');
 const posenet = require('./posenet/posenet.js');
 const handpose = require('./handpose/handpose.js');
 const fxImage = require('./imagefx.js');
+const profile = require('./profile.js');
 const defaults = require('../config.js').default;
 const app = require('../package.json');
 
@@ -88,6 +89,11 @@ class Human {
     if (msg && this.config.console) console.log('Human:', ...msg);
   }
 
+  profile() {
+    if (this.config.profile) return profile.data;
+    return {};
+  }
+
   // helper function: measure tensor leak
   analyze(...msg) {
     if (!this.analyzeMemoryLeaks) return;
@@ -129,16 +135,27 @@ class Human {
   async checkBackend() {
     if (tf.getBackend() !== this.config.backend) {
       this.state = 'backend';
+      /* force backend reload
       if (this.config.backend in tf.engine().registry) {
-        this.log('Setting backend:', this.config.backend);
-        // const backendFactory = tf.findBackendFactory(backendName);
-        // tf.removeBackend(backendName);
-        // tf.registerBackend(backendName, backendFactory);
-        await tf.setBackend(this.config.backend);
-        await tf.ready();
+        const backendFactory = tf.findBackendFactory(this.config.backend);
+        tf.removeBackend(this.config.backend);
+        tf.registerBackend(this.config.backend, backendFactory);
       } else {
         this.log('Backend not registred:', this.config.backend);
       }
+      */
+      this.log('Setting backend:', this.config.backend);
+      await tf.setBackend(this.config.backend);
+      tf.enableProdMode();
+      /* debug mode is really too mcuh
+      if (this.config.profile) tf.enableDebugMode();
+      else tf.enableProdMode();
+      */
+      if (this.config.deallocate && this.config.backend === 'webgl') {
+        this.log('Changing WebGL: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
+        tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', this.config.deallocate ? 0 : -1);
+      }
+      await tf.ready();
     }
   }
 
diff --git a/src/profile.js b/src/profile.js
new file mode 100644
index 00000000..1dacbc89
--- /dev/null
+++ b/src/profile.js
@@ -0,0 +1,24 @@
+const profileData = {};
+
+function profile(name, data) {
+  if (!data || !data.kernels) return;
+  const maxResults = 5;
+  const time = data.kernels
+    .filter((a) => a.kernelTimeMs > 0)
+    .reduce((a, b) => a += b.kernelTimeMs, 0);
+  const slowest = data.kernels
+    .map((a, i) => { a.id = i; return a; })
+    .filter((a) => a.kernelTimeMs > 0)
+    .sort((a, b) => b.kernelTimeMs - a.kernelTimeMs);
+  const largest = data.kernels
+    .map((a, i) => { a.id = i; return a; })
+    .filter((a) => a.totalBytesSnapshot > 0)
+    .sort((a, b) => b.totalBytesSnapshot - a.totalBytesSnapshot);
+  if (slowest.length > maxResults) slowest.length = maxResults;
+  if (largest.length > maxResults) largest.length = maxResults;
+  const res = { newBytes: data.newBytes, newTensors: data.newTensors, peakBytes: data.peakBytes, numKernelOps: data.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest };
+  profileData[name] = res;
+}
+
+exports.run = profile;
+exports.data = profileData;