implemented memory profiler

2020-11-01 13:07:53 -05:00 · 2020-11-01 13:07:53 -05:00 · 38be53cb7e
parent 6416e5e327
commit 38be53cb7e
7 changed files with 115 additions and 20 deletions
--- a/README.md
+++ b/README.md
@ -238,16 +238,23 @@ Below is output of `human.defaults` object
 Any property can be overriden by passing user object during `human.detect()`  
 Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration  

+All configuration details can be changed in real-time!  
+
 Configurtion object is large, but typically you only need to modify few values:

 - `enabled`: Choose which models to use
 - `modelPath`: Update as needed to reflect your application's relative path

-
 ```js
 config = {
  backend: 'webgl',          // select tfjs backend to use
  console: true,             // enable debugging output to console
+  profile: true,             // enable tfjs profiling
+                             // this has significant performance impact, only enable for debugging purposes
+                             // currently only implemented for age,gender,emotion models
+  deallocate: true,          // aggresively deallocate gpu memory after each usage
+                             // only valid for webgl backend and only during first call, cannot be changed unless library is reloaded
+                             // this has significant performance impact, only enable on low-memory devices
  scoped: false,             // enable scoped runs
                             // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance
                             // typically not needed
@ -415,6 +422,35 @@ result = {

 <hr>

+## Profile
+
+If `config.profile` is enabled, call to `human.profile()` will return detailed profiling data from the last detect invokation.
+
+example:
+```js
+  result = {
+    {age: {…}, gender: {…}, emotion: {…}}
+      age:
+        timeKernelOps: 53.78892800000002
+        newBytes: 4
+        newTensors: 1
+        numKernelOps: 341
+        peakBytes: 46033948
+        largestKernelOps: Array(5)
+          0: {name: "Reshape", bytesAdded: 107648, totalBytesSnapshot: 46033948, tensorsAdded: 1, totalTensorsSnapshot: 1149, …}
+          1: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45818652, tensorsAdded: 1, totalTensorsSnapshot: 1147, …}
+          2: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45633996, tensorsAdded: 1, totalTensorsSnapshot: 1148, …}
+          3: {name: "Reshape", bytesAdded: 0, totalBytesSnapshot: 45389376, tensorsAdded: 1, totalTensorsSnapshot: 1154, …}
+          4: {name: "Reshape", bytesAdded: 53824, totalBytesSnapshot: 45381776, tensorsAdded: 1, totalTensorsSnapshot: 1155, …}
+        slowestKernelOps: Array(5)
+          0: {name: "_FusedMatMul", bytesAdded: 12, totalBytesSnapshot: 44802280, tensorsAdded: 1, totalTensorsSnapshot: 1156, …}
+          1: {name: "_FusedMatMul", bytesAdded: 4, totalBytesSnapshot: 44727564, tensorsAdded: 1, totalTensorsSnapshot: 1152, …}
+          2: {name: "_FusedMatMul", bytesAdded: 12, totalBytesSnapshot: 44789100, tensorsAdded: 1, totalTensorsSnapshot: 1157, …}
+          3: {name: "Add", bytesAdded: 4, totalBytesSnapshot: 44788748, tensorsAdded: 1, totalTensorsSnapshot: 1158, …}
+          4: {name: "Add", bytesAdded: 4, totalBytesSnapshot: 44788748, tensorsAdded: 1, totalTensorsSnapshot: 1158, …}
+  }
+```
+
 ## Build

 If you want to modify the library and perform a full rebuild:  
--- a/config.js
+++ b/config.js
@ -4,6 +4,12 @@
 export default {
  backend: 'webgl',          // select tfjs backend to use
  console: true,             // enable debugging output to console
+  profile: true,             // enable tfjs profiling
+                             // this has significant performance impact, only enable for debugging purposes
+                             // currently only implemented for age,gender,emotion models
+  deallocate: true,          // aggresively deallocate gpu memory after each usage
+                             // only valid for webgl backend and only during first call, cannot be changed unless library is reloaded
+                             // this has significant performance impact, only enable on low-memory devices
  scoped: false,             // enable scoped runs
                             // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance
                             // typically not needed
--- a/demo/browser.js
+++ b/demo/browser.js
@ -30,6 +30,8 @@ const ui = {
 // configuration overrides
 const config = {
  backend: 'webgl',
+  profile: false,
+  deallocate: false,
  wasm: { path: '../assets' },
  filter: {
    enabled: true,
@ -199,6 +201,7 @@ function runHumanDetect(input, canvas) {
      human.detect(input, config).then((result) => {
        if (result.error) log(result.error);
        else drawResults(input, result, canvas);
+        if (config.profile) log('Profile data:', human.profile());
      });
    }
  }
@ -269,6 +272,8 @@ function setupMenu() {

  menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
  menu.addList('Backend', ['cpu', 'webgl', 'wasm', 'webgpu'], config.backend, (val) => config.backend = val);
+  menu.addBool('Enable Profiler', config, 'profile');
+  menu.addBool('Memory Deallocator', config, 'deallocate');
  menu.addBool('Use Web Worker', ui, 'useWorker');
  menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
  menu.addLabel('Enabled Models');
--- a/demo/menu.js
+++ b/demo/menu.js
@ -12,6 +12,7 @@ let theme = {
  checkboxOff: 'lightcoral',
  rangeBackground: 'lightblue',
  rangeLabel: 'white',
+  chartColor: 'lightblue',
 };

 function createCSS() {
@ -69,8 +70,6 @@ class Menu {
    instance++;
    this._maxFPS = 0;
    this.hidden = 0;
-    this.chartFGcolor = 'lightblue';
-    this.chartBGcolor = 'lightgray';
  }

  createMenu(parent, title = '', position = { top: null, left: null, bottom: null, right: null }) {
@ -256,13 +255,12 @@ class Menu {
    else this.addValue(title, val);
  }

-  addChart(title, id, width = 200, height = 40, fgColor, bgColor) {
-    if (fgColor) this.chartFGcolor = fgColor;
-    if (bgColor) this.chartBGcolor = bgColor;
+  addChart(title, id, width = 200, height = 40, color) {
+    if (color) theme.chartColor = color;
    const el = document.createElement('div');
    el.className = 'menu-item menu-chart-title';
    el.id = this.newID;
-    el.innerHTML = `<font color=${this.chartFGcolor}>${title}</font><canvas id="menu-canvas-${id}" class="menu-chart-canvas" width="${width}px" height="${height}px"></canvas>`;
+    el.innerHTML = `<font color=${theme.chartColor}>${title}</font><canvas id="menu-canvas-${id}" class="menu-chart-canvas" width="${width}px" height="${height}px"></canvas>`;
    this.container.appendChild(el);
  }

@ -272,18 +270,18 @@ class Menu {
    const canvas = document.getElementById(`menu-canvas-${id}`);
    if (!canvas) return;
    const ctx = canvas.getContext('2d');
-    ctx.fillStyle = this.chartBGcolor;
+    ctx.fillStyle = theme.background;
    ctx.fillRect(0, 0, canvas.width, canvas.height);
    const width = canvas.width / values.length;
    const max = 1 + Math.max(...values);
    const height = canvas.height / max;
    for (const i in values) {
      const gradient = ctx.createLinearGradient(0, (max - values[i]) * height, 0, 0);
-      gradient.addColorStop(0.1, this.chartFGcolor);
-      gradient.addColorStop(0.4, this.chartBGcolor);
+      gradient.addColorStop(0.1, theme.chartColor);
+      gradient.addColorStop(0.4, theme.background);
      ctx.fillStyle = gradient;
      ctx.fillRect(i * width, 0, width - 4, canvas.height);
-      ctx.fillStyle = this.chartBGcolor;
+      ctx.fillStyle = theme.background;
      ctx.font = `${width / 1.4}px "Segoe UI"`;
      ctx.fillText(Math.round(values[i]), i * width + 1, canvas.height - 1, width - 1);
    }
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -1,4 +1,5 @@
 const tf = require('@tensorflow/tfjs');
+const profile = require('../profile.js');

 const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
 const models = {};
@ -33,13 +34,21 @@ async function predict(image, config) {
  blueNorm.dispose();
  const obj = [];
  if (config.face.emotion.enabled) {
-    const emotionT = await models.emotion.predict(grayscale);
-    const data = await emotionT.data();
+    let data;
+    if (!config.profile) {
+      const emotionT = await models.emotion.predict(grayscale);
+      data = await emotionT.data();
+      tf.dispose(emotionT);
+    } else {
+      const profileData = await tf.profile(() => models.emotion.predict(grayscale));
+      data = await profileData.result.data();
+      profileData.result.dispose();
+      profile.run('emotion', profileData);
+    }
    for (let i = 0; i < data.length; i++) {
      if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
    }
    obj.sort((a, b) => b.score - a.score);
-    tf.dispose(emotionT);
  }
  tf.dispose(grayscale);
  last = obj;
--- a/src/human.js
+++ b/src/human.js
@ -5,6 +5,7 @@ const emotion = require('./emotion/emotion.js');
 const posenet = require('./posenet/posenet.js');
 const handpose = require('./handpose/handpose.js');
 const fxImage = require('./imagefx.js');
+const profile = require('./profile.js');
 const defaults = require('../config.js').default;
 const app = require('../package.json');

@ -88,6 +89,11 @@ class Human {
    if (msg && this.config.console) console.log('Human:', ...msg);
  }

+  profile() {
+    if (this.config.profile) return profile.data;
+    return {};
+  }
+
  // helper function: measure tensor leak
  analyze(...msg) {
    if (!this.analyzeMemoryLeaks) return;
@ -129,16 +135,27 @@ class Human {
  async checkBackend() {
    if (tf.getBackend() !== this.config.backend) {
      this.state = 'backend';
+      /* force backend reload
      if (this.config.backend in tf.engine().registry) {
-        this.log('Setting backend:', this.config.backend);
-        // const backendFactory = tf.findBackendFactory(backendName);
-        // tf.removeBackend(backendName);
-        // tf.registerBackend(backendName, backendFactory);
-        await tf.setBackend(this.config.backend);
-        await tf.ready();
+        const backendFactory = tf.findBackendFactory(this.config.backend);
+        tf.removeBackend(this.config.backend);
+        tf.registerBackend(this.config.backend, backendFactory);
      } else {
        this.log('Backend not registred:', this.config.backend);
      }
+      */
+      this.log('Setting backend:', this.config.backend);
+      await tf.setBackend(this.config.backend);
+      tf.enableProdMode();
+      /* debug mode is really too mcuh
+      if (this.config.profile) tf.enableDebugMode();
+      else tf.enableProdMode();
+      */
+      if (this.config.deallocate && this.config.backend === 'webgl') {
+        this.log('Changing WebGL: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
+        tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', this.config.deallocate ? 0 : -1);
+      }
+      await tf.ready();
    }
  }

--- a/src/profile.js
+++ b/src/profile.js
@ -0,0 +1,24 @@
+const profileData = {};
+
+function profile(name, data) {
+  if (!data || !data.kernels) return;
+  const maxResults = 5;
+  const time = data.kernels
+    .filter((a) => a.kernelTimeMs > 0)
+    .reduce((a, b) => a += b.kernelTimeMs, 0);
+  const slowest = data.kernels
+    .map((a, i) => { a.id = i; return a; })
+    .filter((a) => a.kernelTimeMs > 0)
+    .sort((a, b) => b.kernelTimeMs - a.kernelTimeMs);
+  const largest = data.kernels
+    .map((a, i) => { a.id = i; return a; })
+    .filter((a) => a.totalBytesSnapshot > 0)
+    .sort((a, b) => b.totalBytesSnapshot - a.totalBytesSnapshot);
+  if (slowest.length > maxResults) slowest.length = maxResults;
+  if (largest.length > maxResults) largest.length = maxResults;
+  const res = { newBytes: data.newBytes, newTensors: data.newTensors, peakBytes: data.peakBytes, numKernelOps: data.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest };
+  profileData[name] = res;
+}
+
+exports.run = profile;
+exports.data = profileData;