major performance improvements for all models

2020-11-02 18:54:03 -05:00 · 2020-11-02 18:54:03 -05:00 · d2bf2aeade
parent 18ec5f211f
commit d2bf2aeade
6 changed files with 103 additions and 80 deletions
--- a/config.js
+++ b/config.js
@ -4,6 +4,9 @@
 export default {
  backend: 'webgl',          // select tfjs backend to use
  console: true,             // enable debugging output to console
+  async: false,               // execute enabled models in parallel
+                             // this disables per-model performance data but slightly increases performance
+                             // cannot be used if profiling is enabled
  profile: false,            // enable tfjs profiling
                             // this has significant performance impact, only enable for debugging purposes
                             // currently only implemented for age,gender,emotion models
--- a/demo/browser.js
+++ b/demo/browser.js
@ -90,14 +90,18 @@ const log = (...msg) => {

 // draws processed results and starts processing of a next frame
 function drawResults(input, result, canvas) {
-  // update fps
+  // update fps data
  fps.push(1000 / (performance.now() - timeStamp));
  if (fps.length > ui.maxFrames) fps.shift();
-  menu.updateChart('FPS', fps);
+
+  // enable for continous performance monitoring
+  // console.log(result.performance);

  // eslint-disable-next-line no-use-before-define
-  requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop
+  requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results

+  // draw fps chart
+  menu.updateChart('FPS', fps);
  // draw image from video
  const ctx = canvas.getContext('2d');
  ctx.fillStyle = ui.baseBackground;
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -37,11 +37,11 @@ async function predict(image, config) {
    let data;
    if (!config.profile) {
      const emotionT = await models.emotion.predict(grayscale);
-      data = await emotionT.data();
+      data = emotionT.dataSync();
      tf.dispose(emotionT);
    } else {
      const profileData = await tf.profile(() => models.emotion.predict(grayscale));
-      data = await profileData.result.data();
+      data = profileData.result.dataSync();
      profileData.result.dispose();
      profile.run('emotion', profileData);
    }
--- a/src/handpose/box.js
+++ b/src/handpose/box.js
@ -30,10 +30,7 @@ exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize;
 function scaleBoxCoordinates(box, factor) {
  const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
  const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
-  const palmLandmarks = box.palmLandmarks.map((coord) => {
-    const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]];
-    return scaledCoord;
-  });
+  const palmLandmarks = box.palmLandmarks.map((coord) => [coord[0] * factor[0], coord[1] * factor[1]]);
  return { startPoint, endPoint, palmLandmarks };
 }
 exports.scaleBoxCoordinates = scaleBoxCoordinates;
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@ -40,8 +40,7 @@ class HandDetector {
    const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]);
    const boxes = this.normalizeBoxes(rawBoxes);
    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
-    const boxesWithHands = await boxesWithHandsTensor.array();
-    const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
+    const boxesWithHands = boxesWithHandsTensor.arraySync();
    const detectedHands = tf.tidy(() => {
      const detectedBoxes = [];
      for (const i in boxesWithHands) {
@ -53,7 +52,7 @@ class HandDetector {
      }
      return detectedBoxes;
    });
-    toDispose.forEach((tensor) => tensor.dispose());
+    [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores].forEach((tensor) => tensor.dispose());
    return detectedHands;
  }

@ -64,28 +63,24 @@ class HandDetector {
     * @param input The image to classify.
     */
  async estimateHandBounds(input, config) {
-    // const inputHeight = input.shape[2];
-    // const inputWidth = input.shape[1];
    this.iouThreshold = config.iouThreshold;
    this.scoreThreshold = config.scoreThreshold;
    this.maxHands = config.maxHands;
    const resized = input.resizeBilinear([this.width, this.height]);
-    const divided = resized.div(255);
-    const normalized = divided.sub(0.5);
-    const image = normalized.mul(2.0);
+    const divided = resized.mul([1 / 127.5]);
+    const image = divided.sub(0.5);
    resized.dispose();
    divided.dispose();
-    normalized.dispose();
    const predictions = await this.getBoundingBoxes(image);
    image.dispose();
    if (!predictions || (predictions.length === 0)) return null;
    const hands = [];
    for (const i in predictions) {
      const prediction = predictions[i];
-      const boundingBoxes = await prediction.boxes.array();
-      const startPoint = boundingBoxes[0].slice(0, 2);
-      const endPoint = boundingBoxes[0].slice(2, 4);
-      const palmLandmarks = await prediction.palmLandmarks.array();
+      const boundingBoxes = prediction.boxes.dataSync();
+      const startPoint = [boundingBoxes[0], boundingBoxes[1]];
+      const endPoint = [boundingBoxes[2], boundingBoxes[3]];
+      const palmLandmarks = prediction.palmLandmarks.arraySync();
      prediction.boxes.dispose();
      prediction.palmLandmarks.dispose();
      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
--- a/src/human.js
+++ b/src/human.js
@ -61,10 +61,13 @@ class Human {
    this.version = app.version;
    this.defaults = defaults;
    this.config = defaults;
-    this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas() : null;
+    this.fx = null;
    this.state = 'idle';
    this.numTensors = 0;
    this.analyzeMemoryLeaks = false;
+    // internal temp canvases
+    this.inCanvas = null;
+    this.outCanvas = null;
    // object that contains all initialized models
    this.models = {
      facemesh: null,
@ -160,56 +163,62 @@ class Human {
  }

  tfImage(input) {
-    // let imageData;
-    let filtered;
-    const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
-    const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
-    let targetWidth = originalWidth;
-    let targetHeight = originalHeight;
-    if (this.fx && this.config.filter.enabled && !(input instanceof tf.Tensor)) {
-      if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
-      else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
-      if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
-      else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
-      const offscreenCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
-      if (offscreenCanvas.width !== targetWidth) offscreenCanvas.width = targetWidth;
-      if (offscreenCanvas.height !== targetHeight) offscreenCanvas.height = targetHeight;
-      const ctx = offscreenCanvas.getContext('2d');
-      if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
-      else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, offscreenCanvas.width, offscreenCanvas.height);
-      this.fx.reset();
-      this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
-      if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
-      if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
-      if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
-      if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
-      if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
-      if (this.config.filter.negative) this.fx.addFilter('negative');
-      if (this.config.filter.sepia) this.fx.addFilter('sepia');
-      if (this.config.filter.vintage) this.fx.addFilter('brownie');
-      if (this.config.filter.sepia) this.fx.addFilter('sepia');
-      if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
-      if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
-      if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
-      if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
-      filtered = this.fx.apply(offscreenCanvas);
-    }
    let tensor;
    if (input instanceof tf.Tensor) {
      tensor = tf.clone(input);
    } else {
-      const canvas = filtered || input;
+      const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
+      const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
+      let targetWidth = originalWidth;
+      let targetHeight = originalHeight;
+      if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
+      else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
+      if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
+      else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
+      if (!this.inCanvas || (this.inCanvas.width !== originalWidth) || (this.inCanvas.height !== originalHeight)) {
+        this.inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
+        if (this.inCanvas.width !== targetWidth) this.inCanvas.width = targetWidth;
+        if (this.inCanvas.height !== targetHeight) this.inCanvas.height = targetHeight;
+      }
+      const ctx = this.inCanvas.getContext('2d');
+      if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
+      else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, this.inCanvas.width, this.inCanvas.height);
+      if (this.config.filter.enabled) {
+        if (!this.outCanvas || (this.inCanvas.width !== this.outCanvas.width) || (this.inCanvas.height !== this.outCanvas.height)) {
+          this.outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(this.inCanvas.width, this.inCanvas.height) : document.createElement('canvas');
+          if (this.outCanvas.width !== this.inCanvas.width) this.outCanvas.width = this.inCanvas.width;
+          if (this.outCanvas.height !== this.inCanvas.height) this.outCanvas.height = this.inCanvas.height;
+        }
+        if (!this.fx) this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: this.outCanvas }) : null;
+        this.fx.reset();
+        this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
+        if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
+        if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
+        if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
+        if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
+        if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
+        if (this.config.filter.negative) this.fx.addFilter('negative');
+        if (this.config.filter.sepia) this.fx.addFilter('sepia');
+        if (this.config.filter.vintage) this.fx.addFilter('brownie');
+        if (this.config.filter.sepia) this.fx.addFilter('sepia');
+        if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
+        if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
+        if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
+        if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
+        this.fx.apply(this.inCanvas);
+      }
+      if (!this.outCanvas) this.outCanvas = this.inCanvas;
      let pixels;
-      if ((this.config.backend === 'webgl') || (canvas instanceof ImageData)) {
+      if ((this.config.backend === 'webgl') || (this.outCanvas instanceof ImageData)) {
        // tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
-        pixels = tf.browser.fromPixels(canvas);
+        pixels = tf.browser.fromPixels(this.outCanvas);
      } else {
        // cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
        const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
        tempCanvas.width = targetWidth;
        tempCanvas.height = targetHeight;
        const tempCtx = tempCanvas.getContext('2d');
-        tempCtx.drawImage(canvas, 0, 0);
+        tempCtx.drawImage(this.outCanvas, 0, 0);
        const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
        pixels = tf.browser.fromPixels(data);
      }
@ -218,7 +227,7 @@ class Human {
      pixels.dispose();
      casted.dispose();
    }
-    return { tensor, canvas: this.config.filter.return ? filtered : null };
+    return { tensor, canvas: this.config.filter.return ? this.outCanvas : null };
  }

  async detect(input, userConfig = {}) {
@ -239,6 +248,11 @@ class Human {

    // eslint-disable-next-line no-async-promise-executor
    return new Promise(async (resolve) => {
+      let poseRes;
+      let handRes;
+      let ssrRes;
+      let emotionRes;
+
      const timeStart = now();

      // configure backend
@ -270,20 +284,30 @@ class Human {
      const imageTensor = image.tensor;

      // run posenet
-      this.state = 'run:body';
-      timeStamp = now();
-      this.analyze('Start PoseNet');
-      const poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
-      this.analyze('End PoseNet:');
-      perf.body = Math.trunc(now() - timeStamp);
+      if (this.config.async) {
+        poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
+      } else {
+        this.state = 'run:body';
+        timeStamp = now();
+        this.analyze('Start PoseNet');
+        poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
+        this.analyze('End PoseNet:');
+        perf.body = Math.trunc(now() - timeStamp);
+      }

      // run handpose
-      this.state = 'run:hand';
-      timeStamp = now();
-      this.analyze('Start HandPose:');
-      const handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
-      this.analyze('End HandPose:');
-      perf.hand = Math.trunc(now() - timeStamp);
+      if (this.config.async) {
+        handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
+      } else {
+        this.state = 'run:hand';
+        timeStamp = now();
+        this.analyze('Start HandPose:');
+        handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
+        this.analyze('End HandPose:');
+        perf.hand = Math.trunc(now() - timeStamp);
+      }
+
+      if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);

      // run facemesh, includes blazeface and iris
      const faceRes = [];
@ -302,12 +326,12 @@ class Human {
          // run ssr-net age & gender, inherits face from blazeface
          this.state = 'run:agegender';
          timeStamp = now();
-          const ssrData = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {};
+          ssrRes = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {};
          perf.agegender = Math.trunc(now() - timeStamp);
          // run emotion, inherits face from blazeface
          this.state = 'run:emotion';
          timeStamp = now();
-          const emotionData = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {};
+          emotionRes = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {};
          perf.emotion = Math.trunc(now() - timeStamp);

          // dont need face anymore
@ -322,10 +346,10 @@ class Human {
            box: face.box,
            mesh: face.mesh,
            annotations: face.annotations,
-            age: ssrData.age,
-            gender: ssrData.gender,
-            agConfidence: ssrData.confidence,
-            emotion: emotionData,
+            age: ssrRes.age,
+            gender: ssrRes.gender,
+            agConfidence: ssrRes.confidence,
+            emotion: emotionRes,
            iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
          });
          this.analyze('End FaceMesh:');