pure tensor pipeline without image converts

2020-10-18 09:21:53 -04:00 · 2020-10-18 09:21:53 -04:00 · 5884c8cfe4
parent d44ff5dbb2
commit 5884c8cfe4
6 changed files with 54 additions and 54 deletions
--- a/README.md
+++ b/README.md
@ -268,7 +268,6 @@ config = {
      inputSize: 64,         // fixed value
      minConfidence: 0.5,    // threshold for discarding a prediction
      skipFrames: 10,        // how many frames to go without re-running the detector, only used for video inputs
-      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
--- a/config.js
+++ b/config.js
@ -51,7 +51,6 @@ export default {
      inputSize: 64,         // fixed value
      minConfidence: 0.5,    // threshold for discarding a prediction
      skipFrames: 10,        // how many frames to go without re-running the detector
-      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -6,16 +6,6 @@ let last = [];
 let frame = 0;
 const multiplier = 1.5;

-function getImage(image, size) {
-  const tensor = tf.tidy(() => {
-    const buffer = tf.browser.fromPixels(image, 1);
-    const resize = tf.image.resizeBilinear(buffer, [size, size]);
-    const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
-    return expand;
-  });
-  return tensor;
-}
-
 async function load(config) {
  if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
  return models.emotion;
@ -27,25 +17,23 @@ async function predict(image, config) {
    return last;
  }
  frame = 0;
-  const enhance = tf.tidy(() => {
-    if (image instanceof tf.Tensor) {
-      const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
-      const [r, g, b] = tf.split(resize, 3, 3);
-      if (config.face.emotion.useGrayscale) {
-        // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
-        const r1 = tf.mul(r, [0.2989]);
-        const g1 = tf.mul(g, [0.5870]);
-        const b1 = tf.mul(b, [0.1140]);
-        const grayscale = tf.addN([r1, g1, b1]);
-        return grayscale;
-      }
-      return g;
-    }
-    return getImage(image, config.face.emotion.inputSize);
-  });
+  const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+  const [red, green, blue] = tf.split(resize, 3, 3);
+  resize.dispose();
+  // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
+  const redNorm = tf.mul(red, [0.2989]);
+  const greenNorm = tf.mul(green, [0.5870]);
+  const blueNorm = tf.mul(blue, [0.1140]);
+  red.dispose();
+  green.dispose();
+  blue.dispose();
+  const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
+  redNorm.dispose();
+  greenNorm.dispose();
+  blueNorm.dispose();
  const obj = [];
  if (config.face.emotion.enabled) {
-    const emotionT = await models.emotion.predict(enhance);
+    const emotionT = await models.emotion.predict(grayscale);
    const data = await emotionT.data();
    for (let i = 0; i < data.length; i++) {
      if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
@ -53,7 +41,7 @@ async function predict(image, config) {
    obj.sort((a, b) => b.score - a.score);
    tf.dispose(emotionT);
  }
-  tf.dispose(enhance);
+  tf.dispose(grayscale);
  last = obj;
  return obj;
 }
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@ -32,9 +32,9 @@ class HandDetector {
  }

  async getBoundingBoxes(input) {
-    const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
-    const batchedPrediction = this.model.predict(normalizedInput);
+    const batchedPrediction = this.model.predict(input);
    const prediction = batchedPrediction.squeeze();
+    console.log(prediction);
    // Regression score for each anchor point.
    const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
    // Bounding box for each anchor point.
@ -42,11 +42,7 @@ class HandDetector {
    const boxes = this.normalizeBoxes(rawBoxes);
    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
    const boxesWithHands = await boxesWithHandsTensor.array();
-    const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
-    // if (boxesWithHands.length === 0) {
-    // toDispose.forEach((tensor) => tensor.dispose());
-    //  return null;
-    // }
+    const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
    const detectedHands = tf.tidy(() => {
      const detectedBoxes = [];
      for (const i in boxesWithHands) {
@ -69,12 +65,18 @@ class HandDetector {
     * @param input The image to classify.
     */
  async estimateHandBounds(input, config) {
-    const inputHeight = input.shape[1];
-    const inputWidth = input.shape[2];
+    // const inputHeight = input.shape[2];
+    // const inputWidth = input.shape[1];
    this.iouThreshold = config.iouThreshold;
    this.scoreThreshold = config.scoreThreshold;
    this.maxHands = config.maxHands;
-    const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
+    const resized = input.resizeBilinear([this.width, this.height]);
+    const divided = resized.div(255);
+    const normalized = divided.sub(0.5);
+    const image = normalized.mul(2.0);
+    resized.dispose();
+    divided.dispose();
+    normalized.dispose();
    const predictions = await this.getBoundingBoxes(image);
    image.dispose();
    if (!predictions || (predictions.length === 0)) return null;
@ -87,7 +89,7 @@ class HandDetector {
      const palmLandmarks = await prediction.palmLandmarks.array();
      prediction.boxes.dispose();
      prediction.palmLandmarks.dispose();
-      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
+      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
    }
    return hands;
  }
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@ -12,14 +12,7 @@ class HandPose {
    this.skipFrames = config.skipFrames;
    this.detectionConfidence = config.minConfidence;
    this.maxHands = config.maxHands;
-    const image = tf.tidy(() => {
-      if (!(input instanceof tf.Tensor)) {
-        input = tf.browser.fromPixels(input);
-      }
-      return input.toFloat().expandDims(0);
-    });
-    const predictions = await this.pipeline.estimateHands(image, config);
-    image.dispose();
+    const predictions = await this.pipeline.estimateHands(input, config);
    const hands = [];
    if (!predictions) return hands;
    for (const prediction of predictions) {
--- a/src/human.js
+++ b/src/human.js
@ -71,7 +71,9 @@ function mergeDeep(...objects) {

 function sanity(input) {
  if (!input) return 'input is not defined';
-  if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
+  if (!(input instanceof tf.Tensor)
+      || (tf.ENV.flags.IS_BROWSER
+         && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
    const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
    if (!width || (width === 0)) return 'input is empty';
  }
@ -99,6 +101,20 @@ async function load(userConfig) {
  if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
 }

+function tfImage(input) {
+  let image;
+  if (input instanceof tf.Tensor) {
+    image = tf.clone(input);
+  } else {
+    const pixels = tf.browser.fromPixels(input);
+    const casted = pixels.toFloat();
+    image = casted.expandDims(0);
+    pixels.dispose();
+    casted.dispose();
+  }
+  return image;
+}
+
 async function detect(input, userConfig = {}) {
  state = 'config';
  const perf = {};
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {

    analyze('Start Detect:');

+    const imageTensor = tfImage(input);
+
    // run posenet
    state = 'run:body';
    timeStamp = now();
    analyze('Start PoseNet');
-    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
+    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
    analyze('End PoseNet:');
    perf.body = Math.trunc(now() - timeStamp);

@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
    state = 'run:hand';
    timeStamp = now();
    analyze('Start HandPose:');
-    const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
+    const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
    analyze('End HandPose:');
    perf.hand = Math.trunc(now() - timeStamp);

@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
      state = 'run:face';
      timeStamp = now();
      analyze('Start FaceMesh:');
-      const faces = await models.facemesh.estimateFaces(input, config.face);
+      const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
      perf.face = Math.trunc(now() - timeStamp);
      for (const face of faces) {
        // is something went wrong, skip the face
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
          emotion: emotionData,
          iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
        });
+        analyze('End FaceMesh:');
      }
-      analyze('End FaceMesh:');
    }

+    imageTensor.dispose();
    state = 'idle';

    if (config.scoped) tf.engine().endScope();