pure tensor pipeline without image converts

2020-10-18 09:21:53 -04:00 · 2020-10-18 09:21:53 -04:00 · b146a0a64e
parent 5e90ab0d51
commit b146a0a64e
24 changed files with 721 additions and 868 deletions
--- a/README.md
+++ b/README.md
@ -268,7 +268,6 @@ config = {
      inputSize: 64,         // fixed value
      minConfidence: 0.5,    // threshold for discarding a prediction
      skipFrames: 10,        // how many frames to go without re-running the detector, only used for video inputs
-      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
--- a/config.js
+++ b/config.js
@ -51,7 +51,6 @@ export default {
      inputSize: 64,         // fixed value
      minConfidence: 0.5,    // threshold for discarding a prediction
      skipFrames: 10,        // how many frames to go without re-running the detector
-      useGrayscale: true,    // convert image to grayscale before prediction or use highest channel
      modelPath: '../models/emotion/model.json',
    },
  },
--- a/dist/human.cjs
+++ b/dist/human.cjs
@ -144,13 +144,7 @@ var require_blazeface = __commonJS((exports2) => {
      };
    }
    async estimateFaces(input) {
-      const imageRaw = !(input instanceof tf2.Tensor) ? tf2.browser.fromPixels(input) : input;
-      const imageCast = imageRaw.toFloat();
-      const image = imageCast.expandDims(0);
-      imageRaw.dispose();
-      imageCast.dispose();
-      const {boxes, scaleFactor} = await this.getBoundingBoxes(image);
-      image.dispose();
+      const {boxes, scaleFactor} = await this.getBoundingBoxes(input);
      return Promise.all(boxes.map(async (face) => {
        const scaledBox = scaleBoxFromPrediction(face, scaleFactor);
        const [landmarkData, boxData, probabilityData] = await Promise.all([face.landmarks, scaledBox, face.probability].map(async (d) => d.array()));
@ -3818,13 +3812,7 @@ var require_facemesh = __commonJS((exports2) => {
    async estimateFaces(input, config2) {
      if (config2)
        this.config = config2;
-      const imageRaw = !(input instanceof tf2.Tensor) ? tf2.browser.fromPixels(input) : input;
-      const imageCast = imageRaw.toFloat();
-      const image = imageCast.expandDims(0);
-      imageRaw.dispose();
-      imageCast.dispose();
-      const predictions = await this.pipeline.predict(image, config2);
-      tf2.dispose(image);
+      const predictions = await this.pipeline.predict(input, config2);
      const results = [];
      for (const prediction of predictions || []) {
        if (prediction.isDisposedInternal)
@ -3879,12 +3867,6 @@ var require_ssrnet = __commonJS((exports2) => {
  const models2 = {};
  let last = {age: 0, gender: ""};
  let frame = 0;
-  async function getImage(image, size) {
-    const buffer = tf2.browser.fromPixels(image);
-    const resize = tf2.image.resizeBilinear(buffer, [size, size]);
-    const expand = tf2.cast(tf2.expandDims(resize, 0), "float32");
-    return expand;
-  }
  async function loadAge(config2) {
    if (!models2.age)
      models2.age = await tf2.loadGraphModel(config2.face.age.modelPath);
@ -3901,14 +3883,9 @@ var require_ssrnet = __commonJS((exports2) => {
      return last;
    }
    frame = 0;
-    let enhance;
-    if (image instanceof tf2.Tensor) {
-      const resize = tf2.image.resizeBilinear(image, [config2.face.age.inputSize, config2.face.age.inputSize], false);
-      enhance = tf2.mul(resize, [255]);
-      tf2.dispose(resize);
-    } else {
-      enhance = await getImage(image, config2.face.age.inputSize);
-    }
+    const resize = tf2.image.resizeBilinear(image, [config2.face.age.inputSize, config2.face.age.inputSize], false);
+    const enhance = tf2.mul(resize, [255]);
+    tf2.dispose(resize);
    const promises = [];
    let ageT;
    let genderT;
@ -3949,15 +3926,6 @@ var require_emotion = __commonJS((exports2) => {
  let last = [];
  let frame = 0;
  const multiplier = 1.5;
-  function getImage(image, size) {
-    const tensor = tf2.tidy(() => {
-      const buffer = tf2.browser.fromPixels(image, 1);
-      const resize = tf2.image.resizeBilinear(buffer, [size, size]);
-      const expand = tf2.cast(tf2.expandDims(resize, 0), "float32");
-      return expand;
-    });
-    return tensor;
-  }
  async function load2(config2) {
    if (!models2.emotion)
      models2.emotion = await tf2.loadGraphModel(config2.face.emotion.modelPath);
@ -3969,24 +3937,22 @@ var require_emotion = __commonJS((exports2) => {
      return last;
    }
    frame = 0;
-    const enhance = tf2.tidy(() => {
-      if (image instanceof tf2.Tensor) {
-        const resize = tf2.image.resizeBilinear(image, [config2.face.emotion.inputSize, config2.face.emotion.inputSize], false);
-        const [r, g, b] = tf2.split(resize, 3, 3);
-        if (config2.face.emotion.useGrayscale) {
-          const r1 = tf2.mul(r, [0.2989]);
-          const g1 = tf2.mul(g, [0.587]);
-          const b1 = tf2.mul(b, [0.114]);
-          const grayscale = tf2.addN([r1, g1, b1]);
-          return grayscale;
-        }
-        return g;
-      }
-      return getImage(image, config2.face.emotion.inputSize);
-    });
+    const resize = tf2.image.resizeBilinear(image, [config2.face.emotion.inputSize, config2.face.emotion.inputSize], false);
+    const [red, green, blue] = tf2.split(resize, 3, 3);
+    resize.dispose();
+    const redNorm = tf2.mul(red, [0.2989]);
+    const greenNorm = tf2.mul(green, [0.587]);
+    const blueNorm = tf2.mul(blue, [0.114]);
+    red.dispose();
+    green.dispose();
+    blue.dispose();
+    const grayscale = tf2.addN([redNorm, greenNorm, blueNorm]);
+    redNorm.dispose();
+    greenNorm.dispose();
+    blueNorm.dispose();
    const obj = [];
    if (config2.face.emotion.enabled) {
-      const emotionT = await models2.emotion.predict(enhance);
+      const emotionT = await models2.emotion.predict(grayscale);
      const data = await emotionT.data();
      for (let i = 0; i < data.length; i++) {
        if (multiplier * data[i] > config2.face.emotion.minConfidence)
@ -3995,7 +3961,7 @@ var require_emotion = __commonJS((exports2) => {
      obj.sort((a, b) => b.score - a.score);
      tf2.dispose(emotionT);
    }
-    tf2.dispose(enhance);
+    tf2.dispose(grayscale);
    last = obj;
    return obj;
  }
@ -4407,7 +4373,6 @@ var require_decodeMultiple = __commonJS((exports2) => {

 // src/posenet/util.js
 var require_util2 = __commonJS((exports2) => {
-  const tf2 = require("@tensorflow/tfjs");
  const kpt = require_keypoints2();
  function eitherPointDoesntMeetConfidence(a, b, minConfidence) {
    return a < minConfidence || b < minConfidence;
@ -4446,70 +4411,26 @@ var require_util2 = __commonJS((exports2) => {
    return Promise.all(tensors.map((tensor) => tensor.buffer()));
  }
  exports2.toTensorBuffers3D = toTensorBuffers3D;
-  function scalePose(pose, scaleY, scaleX, offsetY = 0, offsetX = 0) {
+  function scalePose(pose, scaleY, scaleX) {
    return {
      score: pose.score,
      keypoints: pose.keypoints.map(({score, part, position}) => ({
        score,
        part,
-        position: {
-          x: position.x * scaleX + offsetX,
-          y: position.y * scaleY + offsetY
-        }
+        position: {x: position.x * scaleX, y: position.y * scaleY}
      }))
    };
  }
  exports2.scalePose = scalePose;
-  function scalePoses(poses, scaleY, scaleX, offsetY = 0, offsetX = 0) {
-    if (scaleX === 1 && scaleY === 1 && offsetY === 0 && offsetX === 0) {
-      return poses;
-    }
-    return poses.map((pose) => scalePose(pose, scaleY, scaleX, offsetY, offsetX));
+  function resizeTo(image, [targetH, targetW]) {
+    const input = image.squeeze(0);
+    const resized = input.resizeBilinear([targetH, targetW]);
+    input.dispose();
+    return resized;
  }
-  exports2.scalePoses = scalePoses;
-  function getInputTensorDimensions(input) {
-    return input instanceof tf2.Tensor ? [input.shape[0], input.shape[1]] : [input.height, input.width];
-  }
-  exports2.getInputTensorDimensions = getInputTensorDimensions;
-  function toInputTensor(input) {
-    return input instanceof tf2.Tensor ? input : tf2.browser.fromPixels(input);
-  }
-  exports2.toInputTensor = toInputTensor;
-  function toResizedInputTensor(input, resizeHeight, resizeWidth) {
-    return tf2.tidy(() => {
-      const imageTensor = toInputTensor(input);
-      return imageTensor.resizeBilinear([resizeHeight, resizeWidth]);
-    });
-  }
-  exports2.toResizedInputTensor = toResizedInputTensor;
-  function padAndResizeTo(input, [targetH, targetW]) {
-    const [height, width] = getInputTensorDimensions(input);
-    const targetAspect = targetW / targetH;
-    const aspect = width / height;
-    let [padT, padB, padL, padR] = [0, 0, 0, 0];
-    if (aspect < targetAspect) {
-      padT = 0;
-      padB = 0;
-      padL = Math.round(0.5 * (targetAspect * height - width));
-      padR = Math.round(0.5 * (targetAspect * height - width));
-    } else {
-      padT = Math.round(0.5 * (1 / targetAspect * width - height));
-      padB = Math.round(0.5 * (1 / targetAspect * width - height));
-      padL = 0;
-      padR = 0;
-    }
-    const resized = tf2.tidy(() => {
-      let imageTensor = toInputTensor(input);
-      imageTensor = tf2.pad3d(imageTensor, [[padT, padB], [padL, padR], [0, 0]]);
-      return imageTensor.resizeBilinear([targetH, targetW]);
-    });
-    return {resized, padding: {top: padT, left: padL, right: padR, bottom: padB}};
-  }
-  exports2.padAndResizeTo = padAndResizeTo;
-  function scaleAndFlipPoses(poses, [height, width], [inputResolutionHeight, inputResolutionWidth], padding) {
-    const scaleY = (height + padding.top + padding.bottom) / inputResolutionHeight;
-    const scaleX = (width + padding.left + padding.right) / inputResolutionWidth;
-    const scaledPoses = scalePoses(poses, scaleY, scaleX, -padding.top, -padding.left);
+  exports2.resizeTo = resizeTo;
+  function scaleAndFlipPoses(poses, [height, width], [inputResolutionHeight, inputResolutionWidth]) {
+    const scaledPoses = poses.map((pose) => scalePose(pose, height / inputResolutionHeight, width / inputResolutionWidth));
    return scaledPoses;
  }
  exports2.scaleAndFlipPoses = scaleAndFlipPoses;
@ -4527,8 +4448,9 @@ var require_modelPoseNet = __commonJS((exports2) => {
    }
    async estimatePoses(input, config2) {
      const outputStride = config2.outputStride;
-      const [height, width] = util.getInputTensorDimensions(input);
-      const {resized, padding} = util.padAndResizeTo(input, [config2.inputResolution, config2.inputResolution]);
+      const height = input.shape[1];
+      const width = input.shape[2];
+      const resized = util.resizeTo(input, [config2.inputResolution, config2.inputResolution]);
      const {heatmapScores, offsets, displacementFwd, displacementBwd} = this.baseModel.predict(resized);
      const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]);
      const scoresBuffer = allTensorBuffers[0];
@ -4536,7 +4458,7 @@ var require_modelPoseNet = __commonJS((exports2) => {
      const displacementsFwdBuffer = allTensorBuffers[2];
      const displacementsBwdBuffer = allTensorBuffers[3];
      const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config2.maxDetections, config2.scoreThreshold, config2.nmsRadius);
-      const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config2.inputResolution, config2.inputResolution], padding);
+      const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config2.inputResolution, config2.inputResolution]);
      heatmapScores.dispose();
      offsets.dispose();
      displacementFwd.dispose();
@ -4685,15 +4607,15 @@ var require_handdetector = __commonJS((exports2) => {
      });
    }
    async getBoundingBoxes(input) {
-      const normalizedInput = tf2.tidy(() => tf2.mul(tf2.sub(input, 0.5), 2));
-      const batchedPrediction = this.model.predict(normalizedInput);
+      const batchedPrediction = this.model.predict(input);
      const prediction = batchedPrediction.squeeze();
+      console.log(prediction);
      const scores = tf2.tidy(() => tf2.sigmoid(tf2.slice(prediction, [0, 0], [-1, 1])).squeeze());
      const rawBoxes = tf2.slice(prediction, [0, 1], [-1, 4]);
      const boxes = this.normalizeBoxes(rawBoxes);
      const boxesWithHandsTensor = await tf2.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
      const boxesWithHands = await boxesWithHandsTensor.array();
-      const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
+      const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
      const detectedHands = tf2.tidy(() => {
        const detectedBoxes = [];
        for (const i in boxesWithHands) {
@ -4709,12 +4631,16 @@ var require_handdetector = __commonJS((exports2) => {
      return detectedHands;
    }
    async estimateHandBounds(input, config2) {
-      const inputHeight = input.shape[1];
-      const inputWidth = input.shape[2];
      this.iouThreshold = config2.iouThreshold;
      this.scoreThreshold = config2.scoreThreshold;
      this.maxHands = config2.maxHands;
-      const image = tf2.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
+      const resized = input.resizeBilinear([this.width, this.height]);
+      const divided = resized.div(255);
+      const normalized = divided.sub(0.5);
+      const image = normalized.mul(2);
+      resized.dispose();
+      divided.dispose();
+      normalized.dispose();
      const predictions = await this.getBoundingBoxes(image);
      image.dispose();
      if (!predictions || predictions.length === 0)
@ -4728,7 +4654,7 @@ var require_handdetector = __commonJS((exports2) => {
        const palmLandmarks = await prediction.palmLandmarks.array();
        prediction.boxes.dispose();
        prediction.palmLandmarks.dispose();
-        hands.push(bounding.scaleBoxCoordinates({startPoint, endPoint, palmLandmarks}, [inputWidth / this.width, inputHeight / this.height]));
+        hands.push(bounding.scaleBoxCoordinates({startPoint, endPoint, palmLandmarks}, [input.shape[2] / this.width, input.shape[1] / this.height]));
      }
      return hands;
    }
@ -4886,7 +4812,7 @@ var require_pipeline2 = __commonJS((exports2) => {
      ]);
    }
    async estimateHands(image, config2) {
-      this.maxContinuousChecks = config2.skipFrames;
+      this.skipFrames = config2.skipFrames;
      this.detectionConfidence = config2.minConfidence;
      this.maxHands = config2.maxHands;
      this.runsWithoutHandDetector++;
@ -4996,14 +4922,7 @@ var require_handpose = __commonJS((exports2) => {
      this.skipFrames = config2.skipFrames;
      this.detectionConfidence = config2.minConfidence;
      this.maxHands = config2.maxHands;
-      const image = tf2.tidy(() => {
-        if (!(input instanceof tf2.Tensor)) {
-          input = tf2.browser.fromPixels(input);
-        }
-        return input.toFloat().expandDims(0);
-      });
-      const predictions = await this.pipeline.estimateHands(image, config2);
-      image.dispose();
+      const predictions = await this.pipeline.estimateHands(input, config2);
      const hands = [];
      if (!predictions)
        return hands;
@ -5094,7 +5013,6 @@ var require_config = __commonJS((exports2) => {
        inputSize: 64,
        minConfidence: 0.5,
        skipFrames: 10,
-        useGrayscale: true,
        modelPath: "../models/emotion/model.json"
      }
    },
@ -5258,7 +5176,7 @@ function mergeDeep(...objects) {
 function sanity(input) {
  if (!input)
    return "input is not defined";
-  if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
+  if (!(input instanceof tf.Tensor) || tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
    const width = input.naturalWidth || input.videoWidth || input.width || input.shape && input.shape[1] > 0;
    if (!width || width === 0)
      return "input is empty";
@ -5293,6 +5211,19 @@ async function load(userConfig) {
  if (config.face.enabled && config.face.emotion.enabled && !models.emotion)
    models.emotion = await emotion.load(config);
 }
+function tfImage(input) {
+  let image;
+  if (input instanceof tf.Tensor) {
+    image = tf.clone(input);
+  } else {
+    const pixels = tf.browser.fromPixels(input);
+    const casted = pixels.toFloat();
+    image = casted.expandDims(0);
+    pixels.dispose();
+    casted.dispose();
+  }
+  return image;
+}
 async function detect(input, userConfig = {}) {
  state = "config";
  const perf = {};
@ -5332,16 +5263,17 @@ async function detect(input, userConfig = {}) {
    if (config.scoped)
      tf.engine().startScope();
    analyze("Start Detect:");
+    const imageTensor = tfImage(input);
    state = "run:body";
    timeStamp = now();
    analyze("Start PoseNet");
-    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
+    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
    analyze("End PoseNet:");
    perf.body = Math.trunc(now() - timeStamp);
    state = "run:hand";
    timeStamp = now();
    analyze("Start HandPose:");
-    const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
+    const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
    analyze("End HandPose:");
    perf.hand = Math.trunc(now() - timeStamp);
    const faceRes = [];
@ -5349,7 +5281,7 @@ async function detect(input, userConfig = {}) {
      state = "run:face";
      timeStamp = now();
      analyze("Start FaceMesh:");
-      const faces = await models.facemesh.estimateFaces(input, config.face);
+      const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
      perf.face = Math.trunc(now() - timeStamp);
      for (const face of faces) {
        if (!face.image || face.image.isDisposedInternal) {
@ -5377,9 +5309,10 @@ async function detect(input, userConfig = {}) {
          emotion: emotionData,
          iris: iris !== 0 ? Math.trunc(100 * 11.7 / iris) / 100 : 0
        });
+        analyze("End FaceMesh:");
      }
-      analyze("End FaceMesh:");
    }
+    imageTensor.dispose();
    state = "idle";
    if (config.scoped)
      tf.engine().endScope();
--- a/dist/human.cjs.json
+++ b/dist/human.cjs.json
@ -1,7 +1,7 @@
 {
  "inputs": {
    "config.js": {
-      "bytes": 4862,
+      "bytes": 4762,
      "imports": []
    },
    "package.json": {
@ -9,11 +9,11 @@
      "imports": []
    },
    "src/emotion/emotion.js": {
-      "bytes": 2019,
+      "bytes": 1646,
      "imports": []
    },
    "src/facemesh/blazeface.js": {
-      "bytes": 7407,
+      "bytes": 7161,
      "imports": []
    },
    "src/facemesh/box.js": {
@ -21,7 +21,7 @@
      "imports": []
    },
    "src/facemesh/facemesh.js": {
-      "bytes": 2816,
+      "bytes": 2568,
      "imports": [
        {
          "path": "src/facemesh/blazeface.js"
@ -75,7 +75,7 @@
      "imports": []
    },
    "src/handpose/handdetector.js": {
-      "bytes": 4296,
+      "bytes": 4277,
      "imports": [
        {
          "path": "src/handpose/box.js"
@ -83,7 +83,7 @@
      ]
    },
    "src/handpose/handpose.js": {
-      "bytes": 2356,
+      "bytes": 2152,
      "imports": [
        {
          "path": "src/handpose/handdetector.js"
@ -101,7 +101,7 @@
      "imports": []
    },
    "src/handpose/pipeline.js": {
-      "bytes": 8178,
+      "bytes": 8169,
      "imports": [
        {
          "path": "src/handpose/box.js"
@ -116,7 +116,7 @@
      "imports": []
    },
    "src/human.js": {
-      "bytes": 8299,
+      "bytes": 8784,
      "imports": [
        {
          "path": "src/facemesh/facemesh.js"
@ -195,7 +195,7 @@
      ]
    },
    "src/posenet/modelPoseNet.js": {
-      "bytes": 3472,
+      "bytes": 3447,
      "imports": [
        {
          "path": "src/posenet/modelMobileNet.js"
@ -229,7 +229,7 @@
      ]
    },
    "src/posenet/util.js": {
-      "bytes": 4202,
+      "bytes": 2260,
      "imports": [
        {
          "path": "src/posenet/keypoints.js"
@ -245,7 +245,7 @@
      ]
    },
    "src/ssrnet/ssrnet.js": {
-      "bytes": 1937,
+      "bytes": 1574,
      "imports": []
    }
  },
@ -253,13 +253,13 @@
    "dist/human.cjs.map": {
      "imports": [],
      "inputs": {},
-      "bytes": 220934
+      "bytes": 215962
    },
    "dist/human.cjs": {
      "imports": [],
      "inputs": {
        "src/facemesh/blazeface.js": {
-          "bytesInOutput": 7398
+          "bytesInOutput": 7138
        },
        "src/facemesh/keypoints.js": {
          "bytesInOutput": 2771
@ -280,13 +280,13 @@
          "bytesInOutput": 23311
        },
        "src/facemesh/facemesh.js": {
-          "bytesInOutput": 2950
+          "bytesInOutput": 2687
        },
        "src/ssrnet/ssrnet.js": {
-          "bytesInOutput": 2158
+          "bytesInOutput": 1768
        },
        "src/emotion/emotion.js": {
-          "bytesInOutput": 2133
+          "bytesInOutput": 1736
        },
        "src/posenet/modelBase.js": {
          "bytesInOutput": 1120
@ -313,10 +313,10 @@
          "bytesInOutput": 1992
        },
        "src/posenet/util.js": {
-          "bytesInOutput": 4383
+          "bytesInOutput": 2404
        },
        "src/posenet/modelPoseNet.js": {
-          "bytesInOutput": 1976
+          "bytesInOutput": 1955
        },
        "src/posenet/posenet.js": {
          "bytesInOutput": 917
@ -325,7 +325,7 @@
          "bytesInOutput": 2813
        },
        "src/handpose/handdetector.js": {
-          "bytesInOutput": 4135
+          "bytesInOutput": 4161
        },
        "src/handpose/keypoints.js": {
          "bytesInOutput": 265
@ -334,22 +334,22 @@
          "bytesInOutput": 2671
        },
        "src/handpose/pipeline.js": {
-          "bytesInOutput": 7625
+          "bytesInOutput": 7616
        },
        "src/handpose/handpose.js": {
-          "bytesInOutput": 2509
+          "bytesInOutput": 2288
        },
        "config.js": {
-          "bytesInOutput": 1872
+          "bytesInOutput": 1844
        },
        "package.json": {
          "bytesInOutput": 2778
        },
        "src/human.js": {
-          "bytesInOutput": 7273
+          "bytesInOutput": 7694
        }
      },
-      "bytes": 134728
+      "bytes": 131607
    }
  }
 }
--- a/dist/human.cjs.map
+++ b/dist/human.cjs.map
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm-nobundle.json
+++ b/dist/human.esm-nobundle.json
@ -1,7 +1,7 @@
 {
  "inputs": {
    "config.js": {
-      "bytes": 4862,
+      "bytes": 4762,
      "imports": []
    },
    "package.json": {
@ -9,11 +9,11 @@
      "imports": []
    },
    "src/emotion/emotion.js": {
-      "bytes": 2019,
+      "bytes": 1646,
      "imports": []
    },
    "src/facemesh/blazeface.js": {
-      "bytes": 7407,
+      "bytes": 7161,
      "imports": []
    },
    "src/facemesh/box.js": {
@ -21,7 +21,7 @@
      "imports": []
    },
    "src/facemesh/facemesh.js": {
-      "bytes": 2816,
+      "bytes": 2568,
      "imports": [
        {
          "path": "src/facemesh/blazeface.js"
@ -75,7 +75,7 @@
      "imports": []
    },
    "src/handpose/handdetector.js": {
-      "bytes": 4296,
+      "bytes": 4277,
      "imports": [
        {
          "path": "src/handpose/box.js"
@ -83,7 +83,7 @@
      ]
    },
    "src/handpose/handpose.js": {
-      "bytes": 2356,
+      "bytes": 2152,
      "imports": [
        {
          "path": "src/handpose/handdetector.js"
@ -101,7 +101,7 @@
      "imports": []
    },
    "src/handpose/pipeline.js": {
-      "bytes": 8178,
+      "bytes": 8169,
      "imports": [
        {
          "path": "src/handpose/box.js"
@ -116,7 +116,7 @@
      "imports": []
    },
    "src/human.js": {
-      "bytes": 8299,
+      "bytes": 8784,
      "imports": [
        {
          "path": "src/facemesh/facemesh.js"
@ -195,7 +195,7 @@
      ]
    },
    "src/posenet/modelPoseNet.js": {
-      "bytes": 3472,
+      "bytes": 3447,
      "imports": [
        {
          "path": "src/posenet/modelMobileNet.js"
@ -229,7 +229,7 @@
      ]
    },
    "src/posenet/util.js": {
-      "bytes": 4202,
+      "bytes": 2260,
      "imports": [
        {
          "path": "src/posenet/keypoints.js"
@ -245,7 +245,7 @@
      ]
    },
    "src/ssrnet/ssrnet.js": {
-      "bytes": 1937,
+      "bytes": 1574,
      "imports": []
    }
  },
@ -253,13 +253,13 @@
    "dist/human.esm-nobundle.js.map": {
      "imports": [],
      "inputs": {},
-      "bytes": 199199
+      "bytes": 194427
    },
    "dist/human.esm-nobundle.js": {
      "imports": [],
      "inputs": {
        "src/facemesh/blazeface.js": {
-          "bytesInOutput": 3255
+          "bytesInOutput": 3131
        },
        "src/facemesh/keypoints.js": {
          "bytesInOutput": 1950
@ -280,13 +280,13 @@
          "bytesInOutput": 9995
        },
        "src/facemesh/facemesh.js": {
-          "bytesInOutput": 1391
+          "bytesInOutput": 1259
        },
        "src/ssrnet/ssrnet.js": {
-          "bytesInOutput": 1142
+          "bytesInOutput": 934
        },
        "src/emotion/emotion.js": {
-          "bytesInOutput": 1147
+          "bytesInOutput": 944
        },
        "src/posenet/modelBase.js": {
          "bytesInOutput": 597
@ -313,10 +313,10 @@
          "bytesInOutput": 608
        },
        "src/posenet/util.js": {
-          "bytesInOutput": 1850
+          "bytesInOutput": 1058
        },
        "src/posenet/modelPoseNet.js": {
-          "bytesInOutput": 885
+          "bytesInOutput": 846
        },
        "src/posenet/posenet.js": {
          "bytesInOutput": 464
@ -325,31 +325,31 @@
          "bytesInOutput": 1400
        },
        "src/handpose/handdetector.js": {
-          "bytesInOutput": 2040
+          "bytesInOutput": 2067
        },
        "src/handpose/keypoints.js": {
          "bytesInOutput": 160
        },
        "src/handpose/util.js": {
-          "bytesInOutput": 984
+          "bytesInOutput": 977
        },
        "src/handpose/pipeline.js": {
-          "bytesInOutput": 3218
+          "bytesInOutput": 3209
        },
        "src/handpose/handpose.js": {
-          "bytesInOutput": 1317
+          "bytesInOutput": 1211
        },
        "config.js": {
-          "bytesInOutput": 1146
+          "bytesInOutput": 1130
        },
        "package.json": {
          "bytesInOutput": 2305
        },
        "src/human.js": {
-          "bytesInOutput": 4135
+          "bytesInOutput": 4349
        }
      },
-      "bytes": 69965
+      "bytes": 68570
    }
  }
 }
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.esm.json
+++ b/dist/human.esm.json
@ -1,7 +1,7 @@
 {
  "inputs": {
    "config.js": {
-      "bytes": 4862,
+      "bytes": 4762,
      "imports": []
    },
    "node_modules/@tensorflow/tfjs-backend-cpu/dist/tf-backend-cpu.node.js": {
@ -153,7 +153,7 @@
      "imports": []
    },
    "src/emotion/emotion.js": {
-      "bytes": 2019,
+      "bytes": 1646,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -161,7 +161,7 @@
      ]
    },
    "src/facemesh/blazeface.js": {
-      "bytes": 7407,
+      "bytes": 7161,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -177,7 +177,7 @@
      ]
    },
    "src/facemesh/facemesh.js": {
-      "bytes": 2816,
+      "bytes": 2568,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -241,7 +241,7 @@
      ]
    },
    "src/handpose/handdetector.js": {
-      "bytes": 4296,
+      "bytes": 4277,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -252,7 +252,7 @@
      ]
    },
    "src/handpose/handpose.js": {
-      "bytes": 2356,
+      "bytes": 2152,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -273,7 +273,7 @@
      "imports": []
    },
    "src/handpose/pipeline.js": {
-      "bytes": 8178,
+      "bytes": 8169,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -291,7 +291,7 @@
      "imports": []
    },
    "src/human.js": {
-      "bytes": 8299,
+      "bytes": 8784,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -380,7 +380,7 @@
      ]
    },
    "src/posenet/modelPoseNet.js": {
-      "bytes": 3472,
+      "bytes": 3447,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -417,11 +417,8 @@
      ]
    },
    "src/posenet/util.js": {
-      "bytes": 4202,
+      "bytes": 2260,
      "imports": [
-        {
-          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
-        },
        {
          "path": "src/posenet/keypoints.js"
        }
@ -436,7 +433,7 @@
      ]
    },
    "src/ssrnet/ssrnet.js": {
-      "bytes": 1937,
+      "bytes": 1574,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -464,7 +461,7 @@
    "dist/human.esm.js.map": {
      "imports": [],
      "inputs": {},
-      "bytes": 4960250
+      "bytes": 4955479
    },
    "dist/human.esm.js": {
      "imports": [],
@ -527,7 +524,7 @@
          "bytesInOutput": 765
        },
        "src/facemesh/blazeface.js": {
-          "bytesInOutput": 3268
+          "bytesInOutput": 3142
        },
        "src/facemesh/keypoints.js": {
          "bytesInOutput": 1951
@ -548,13 +545,13 @@
          "bytesInOutput": 9996
        },
        "src/facemesh/facemesh.js": {
-          "bytesInOutput": 1376
+          "bytesInOutput": 1242
        },
        "src/ssrnet/ssrnet.js": {
-          "bytesInOutput": 1143
+          "bytesInOutput": 930
        },
        "src/emotion/emotion.js": {
-          "bytesInOutput": 1142
+          "bytesInOutput": 932
        },
        "src/posenet/modelBase.js": {
          "bytesInOutput": 575
@ -581,10 +578,10 @@
          "bytesInOutput": 609
        },
        "src/posenet/util.js": {
-          "bytesInOutput": 1840
+          "bytesInOutput": 1067
        },
        "src/posenet/modelPoseNet.js": {
-          "bytesInOutput": 863
+          "bytesInOutput": 824
        },
        "src/posenet/posenet.js": {
          "bytesInOutput": 479
@ -593,7 +590,7 @@
          "bytesInOutput": 1386
        },
        "src/handpose/handdetector.js": {
-          "bytesInOutput": 2050
+          "bytesInOutput": 2073
        },
        "src/handpose/keypoints.js": {
          "bytesInOutput": 161
@ -602,22 +599,22 @@
          "bytesInOutput": 993
        },
        "src/handpose/pipeline.js": {
-          "bytesInOutput": 3214
+          "bytesInOutput": 3205
        },
        "src/handpose/handpose.js": {
-          "bytesInOutput": 1303
+          "bytesInOutput": 1189
        },
        "config.js": {
-          "bytesInOutput": 1147
+          "bytesInOutput": 1131
        },
        "package.json": {
          "bytesInOutput": 2306
        },
        "src/human.js": {
-          "bytesInOutput": 4246
+          "bytesInOutput": 4464
        }
      },
-      "bytes": 1106891
+      "bytes": 1105498
    }
  }
 }
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.js.map
+++ b/dist/human.js.map
--- a/dist/human.json
+++ b/dist/human.json
@ -1,7 +1,7 @@
 {
  "inputs": {
    "config.js": {
-      "bytes": 4862,
+      "bytes": 4762,
      "imports": []
    },
    "node_modules/@tensorflow/tfjs-backend-cpu/dist/tf-backend-cpu.node.js": {
@ -153,7 +153,7 @@
      "imports": []
    },
    "src/emotion/emotion.js": {
-      "bytes": 2019,
+      "bytes": 1646,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -161,7 +161,7 @@
      ]
    },
    "src/facemesh/blazeface.js": {
-      "bytes": 7407,
+      "bytes": 7161,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -177,7 +177,7 @@
      ]
    },
    "src/facemesh/facemesh.js": {
-      "bytes": 2816,
+      "bytes": 2568,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -241,7 +241,7 @@
      ]
    },
    "src/handpose/handdetector.js": {
-      "bytes": 4296,
+      "bytes": 4277,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -252,7 +252,7 @@
      ]
    },
    "src/handpose/handpose.js": {
-      "bytes": 2356,
+      "bytes": 2152,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -273,7 +273,7 @@
      "imports": []
    },
    "src/handpose/pipeline.js": {
-      "bytes": 8178,
+      "bytes": 8169,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -291,7 +291,7 @@
      "imports": []
    },
    "src/human.js": {
-      "bytes": 8299,
+      "bytes": 8784,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -380,7 +380,7 @@
      ]
    },
    "src/posenet/modelPoseNet.js": {
-      "bytes": 3472,
+      "bytes": 3447,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -417,11 +417,8 @@
      ]
    },
    "src/posenet/util.js": {
-      "bytes": 4202,
+      "bytes": 2260,
      "imports": [
-        {
-          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
-        },
        {
          "path": "src/posenet/keypoints.js"
        }
@ -436,7 +433,7 @@
      ]
    },
    "src/ssrnet/ssrnet.js": {
-      "bytes": 1937,
+      "bytes": 1574,
      "imports": [
        {
          "path": "node_modules/@tensorflow/tfjs/dist/tf.node.js"
@ -464,7 +461,7 @@
    "dist/human.js.map": {
      "imports": [],
      "inputs": {},
-      "bytes": 4960250
+      "bytes": 4955479
    },
    "dist/human.js": {
      "imports": [],
@ -527,7 +524,7 @@
          "bytesInOutput": 765
        },
        "src/facemesh/blazeface.js": {
-          "bytesInOutput": 3268
+          "bytesInOutput": 3142
        },
        "src/facemesh/keypoints.js": {
          "bytesInOutput": 1951
@ -548,13 +545,13 @@
          "bytesInOutput": 9996
        },
        "src/facemesh/facemesh.js": {
-          "bytesInOutput": 1376
+          "bytesInOutput": 1242
        },
        "src/ssrnet/ssrnet.js": {
-          "bytesInOutput": 1143
+          "bytesInOutput": 930
        },
        "src/emotion/emotion.js": {
-          "bytesInOutput": 1142
+          "bytesInOutput": 932
        },
        "src/posenet/modelBase.js": {
          "bytesInOutput": 575
@ -581,10 +578,10 @@
          "bytesInOutput": 609
        },
        "src/posenet/util.js": {
-          "bytesInOutput": 1840
+          "bytesInOutput": 1067
        },
        "src/posenet/modelPoseNet.js": {
-          "bytesInOutput": 863
+          "bytesInOutput": 824
        },
        "src/posenet/posenet.js": {
          "bytesInOutput": 479
@ -593,7 +590,7 @@
          "bytesInOutput": 1386
        },
        "src/handpose/handdetector.js": {
-          "bytesInOutput": 2050
+          "bytesInOutput": 2073
        },
        "src/handpose/keypoints.js": {
          "bytesInOutput": 161
@ -602,22 +599,22 @@
          "bytesInOutput": 993
        },
        "src/handpose/pipeline.js": {
-          "bytesInOutput": 3214
+          "bytesInOutput": 3205
        },
        "src/handpose/handpose.js": {
-          "bytesInOutput": 1303
+          "bytesInOutput": 1189
        },
        "config.js": {
-          "bytesInOutput": 1147
+          "bytesInOutput": 1131
        },
        "package.json": {
          "bytesInOutput": 2306
        },
        "src/human.js": {
-          "bytesInOutput": 4246
+          "bytesInOutput": 4464
        }
      },
-      "bytes": 1106900
+      "bytes": 1105507
    }
  }
 }
--- a/src/emotion/emotion.js
+++ b/src/emotion/emotion.js
@ -6,16 +6,6 @@ let last = [];
 let frame = 0;
 const multiplier = 1.5;

-function getImage(image, size) {
-  const tensor = tf.tidy(() => {
-    const buffer = tf.browser.fromPixels(image, 1);
-    const resize = tf.image.resizeBilinear(buffer, [size, size]);
-    const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
-    return expand;
-  });
-  return tensor;
-}
-
 async function load(config) {
  if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
  return models.emotion;
@ -27,25 +17,23 @@ async function predict(image, config) {
    return last;
  }
  frame = 0;
-  const enhance = tf.tidy(() => {
-    if (image instanceof tf.Tensor) {
-      const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
-      const [r, g, b] = tf.split(resize, 3, 3);
-      if (config.face.emotion.useGrayscale) {
-        // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
-        const r1 = tf.mul(r, [0.2989]);
-        const g1 = tf.mul(g, [0.5870]);
-        const b1 = tf.mul(b, [0.1140]);
-        const grayscale = tf.addN([r1, g1, b1]);
-        return grayscale;
-      }
-      return g;
-    }
-    return getImage(image, config.face.emotion.inputSize);
-  });
+  const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+  const [red, green, blue] = tf.split(resize, 3, 3);
+  resize.dispose();
+  // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html
+  const redNorm = tf.mul(red, [0.2989]);
+  const greenNorm = tf.mul(green, [0.5870]);
+  const blueNorm = tf.mul(blue, [0.1140]);
+  red.dispose();
+  green.dispose();
+  blue.dispose();
+  const grayscale = tf.addN([redNorm, greenNorm, blueNorm]);
+  redNorm.dispose();
+  greenNorm.dispose();
+  blueNorm.dispose();
  const obj = [];
  if (config.face.emotion.enabled) {
-    const emotionT = await models.emotion.predict(enhance);
+    const emotionT = await models.emotion.predict(grayscale);
    const data = await emotionT.data();
    for (let i = 0; i < data.length; i++) {
      if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
@ -53,7 +41,7 @@ async function predict(image, config) {
    obj.sort((a, b) => b.score - a.score);
    tf.dispose(emotionT);
  }
-  tf.dispose(enhance);
+  tf.dispose(grayscale);
  last = obj;
  return obj;
 }
--- a/src/facemesh/blazeface.js
+++ b/src/facemesh/blazeface.js
@ -144,13 +144,7 @@ class BlazeFaceModel {
  }

  async estimateFaces(input) {
-    const imageRaw = !(input instanceof tf.Tensor) ? tf.browser.fromPixels(input) : input;
-    const imageCast = imageRaw.toFloat();
-    const image = imageCast.expandDims(0);
-    imageRaw.dispose();
-    imageCast.dispose();
-    const { boxes, scaleFactor } = await this.getBoundingBoxes(image);
-    image.dispose();
+    const { boxes, scaleFactor } = await this.getBoundingBoxes(input);
    return Promise.all(boxes.map(async (face) => {
      const scaledBox = scaleBoxFromPrediction(face, scaleFactor);
      const [landmarkData, boxData, probabilityData] = await Promise.all([face.landmarks, scaledBox, face.probability].map(async (d) => d.array()));
--- a/src/facemesh/facemesh.js
+++ b/src/facemesh/facemesh.js
@ -13,13 +13,7 @@ class MediaPipeFaceMesh {

  async estimateFaces(input, config) {
    if (config) this.config = config;
-    const imageRaw = !(input instanceof tf.Tensor) ? tf.browser.fromPixels(input) : input;
-    const imageCast = imageRaw.toFloat();
-    const image = imageCast.expandDims(0);
-    imageRaw.dispose();
-    imageCast.dispose();
-    const predictions = await this.pipeline.predict(image, config);
-    tf.dispose(image);
+    const predictions = await this.pipeline.predict(input, config);
    const results = [];
    for (const prediction of (predictions || [])) {
      // guard against disposed tensors on long running operations such as pause in middle of processing
--- a/src/handpose/handdetector.js
+++ b/src/handpose/handdetector.js
@ -32,9 +32,9 @@ class HandDetector {
  }

  async getBoundingBoxes(input) {
-    const normalizedInput = tf.tidy(() => tf.mul(tf.sub(input, 0.5), 2));
-    const batchedPrediction = this.model.predict(normalizedInput);
+    const batchedPrediction = this.model.predict(input);
    const prediction = batchedPrediction.squeeze();
+    console.log(prediction);
    // Regression score for each anchor point.
    const scores = tf.tidy(() => tf.sigmoid(tf.slice(prediction, [0, 0], [-1, 1])).squeeze());
    // Bounding box for each anchor point.
@ -42,11 +42,7 @@ class HandDetector {
    const boxes = this.normalizeBoxes(rawBoxes);
    const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold);
    const boxesWithHands = await boxesWithHandsTensor.array();
-    const toDispose = [normalizedInput, batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
-    // if (boxesWithHands.length === 0) {
-    // toDispose.forEach((tensor) => tensor.dispose());
-    //  return null;
-    // }
+    const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores];
    const detectedHands = tf.tidy(() => {
      const detectedBoxes = [];
      for (const i in boxesWithHands) {
@ -69,12 +65,18 @@ class HandDetector {
     * @param input The image to classify.
     */
  async estimateHandBounds(input, config) {
-    const inputHeight = input.shape[1];
-    const inputWidth = input.shape[2];
+    // const inputHeight = input.shape[2];
+    // const inputWidth = input.shape[1];
    this.iouThreshold = config.iouThreshold;
    this.scoreThreshold = config.scoreThreshold;
    this.maxHands = config.maxHands;
-    const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255));
+    const resized = input.resizeBilinear([this.width, this.height]);
+    const divided = resized.div(255);
+    const normalized = divided.sub(0.5);
+    const image = normalized.mul(2.0);
+    resized.dispose();
+    divided.dispose();
+    normalized.dispose();
    const predictions = await this.getBoundingBoxes(image);
    image.dispose();
    if (!predictions || (predictions.length === 0)) return null;
@ -87,7 +89,7 @@ class HandDetector {
      const palmLandmarks = await prediction.palmLandmarks.array();
      prediction.boxes.dispose();
      prediction.palmLandmarks.dispose();
-      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [inputWidth / this.width, inputHeight / this.height]));
+      hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height]));
    }
    return hands;
  }
--- a/src/handpose/handpose.js
+++ b/src/handpose/handpose.js
@ -12,14 +12,7 @@ class HandPose {
    this.skipFrames = config.skipFrames;
    this.detectionConfidence = config.minConfidence;
    this.maxHands = config.maxHands;
-    const image = tf.tidy(() => {
-      if (!(input instanceof tf.Tensor)) {
-        input = tf.browser.fromPixels(input);
-      }
-      return input.toFloat().expandDims(0);
-    });
-    const predictions = await this.pipeline.estimateHands(image, config);
-    image.dispose();
+    const predictions = await this.pipeline.estimateHands(input, config);
    const hands = [];
    if (!predictions) return hands;
    for (const prediction of predictions) {
--- a/src/handpose/pipeline.js
+++ b/src/handpose/pipeline.js
@ -76,7 +76,7 @@ class HandPipeline {
  }

  async estimateHands(image, config) {
-    this.maxContinuousChecks = config.skipFrames;
+    this.skipFrames = config.skipFrames;
    this.detectionConfidence = config.minConfidence;
    this.maxHands = config.maxHands;
    this.runsWithoutHandDetector++;
--- a/src/human.js
+++ b/src/human.js
@ -71,7 +71,9 @@ function mergeDeep(...objects) {

 function sanity(input) {
  if (!input) return 'input is not defined';
-  if (tf.ENV.flags.IS_BROWSER && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement)) {
+  if (!(input instanceof tf.Tensor)
+      || (tf.ENV.flags.IS_BROWSER
+         && (input instanceof ImageData || input instanceof HTMLImageElement || input instanceof HTMLCanvasElement || input instanceof HTMLVideoElement || input instanceof HTMLMediaElement))) {
    const width = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
    if (!width || (width === 0)) return 'input is empty';
  }
@ -99,6 +101,20 @@ async function load(userConfig) {
  if (config.face.enabled && config.face.emotion.enabled && !models.emotion) models.emotion = await emotion.load(config);
 }

+function tfImage(input) {
+  let image;
+  if (input instanceof tf.Tensor) {
+    image = tf.clone(input);
+  } else {
+    const pixels = tf.browser.fromPixels(input);
+    const casted = pixels.toFloat();
+    image = casted.expandDims(0);
+    pixels.dispose();
+    casted.dispose();
+  }
+  return image;
+}
+
 async function detect(input, userConfig = {}) {
  state = 'config';
  const perf = {};
@ -151,11 +167,13 @@ async function detect(input, userConfig = {}) {

    analyze('Start Detect:');

+    const imageTensor = tfImage(input);
+
    // run posenet
    state = 'run:body';
    timeStamp = now();
    analyze('Start PoseNet');
-    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
+    const poseRes = config.body.enabled ? await models.posenet.estimatePoses(imageTensor, config.body) : [];
    analyze('End PoseNet:');
    perf.body = Math.trunc(now() - timeStamp);

@ -163,7 +181,7 @@ async function detect(input, userConfig = {}) {
    state = 'run:hand';
    timeStamp = now();
    analyze('Start HandPose:');
-    const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
+    const handRes = config.hand.enabled ? await models.handpose.estimateHands(imageTensor, config.hand) : [];
    analyze('End HandPose:');
    perf.hand = Math.trunc(now() - timeStamp);

@ -173,7 +191,7 @@ async function detect(input, userConfig = {}) {
      state = 'run:face';
      timeStamp = now();
      analyze('Start FaceMesh:');
-      const faces = await models.facemesh.estimateFaces(input, config.face);
+      const faces = await models.facemesh.estimateFaces(imageTensor, config.face);
      perf.face = Math.trunc(now() - timeStamp);
      for (const face of faces) {
        // is something went wrong, skip the face
@ -210,10 +228,11 @@ async function detect(input, userConfig = {}) {
          emotion: emotionData,
          iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
        });
+        analyze('End FaceMesh:');
      }
-      analyze('End FaceMesh:');
    }

+    imageTensor.dispose();
    state = 'idle';

    if (config.scoped) tf.engine().endScope();
--- a/src/posenet/modelPoseNet.js
+++ b/src/posenet/modelPoseNet.js
@ -31,8 +31,9 @@ class PoseNet {
  async estimatePoses(input, config) {
    const outputStride = config.outputStride;
    // const inputResolution = config.inputResolution;
-    const [height, width] = util.getInputTensorDimensions(input);
-    const { resized, padding } = util.padAndResizeTo(input, [config.inputResolution, config.inputResolution]);
+    const height = input.shape[1];
+    const width = input.shape[2];
+    const resized = util.resizeTo(input, [config.inputResolution, config.inputResolution]);
    const { heatmapScores, offsets, displacementFwd, displacementBwd } = this.baseModel.predict(resized);
    const allTensorBuffers = await util.toTensorBuffers3D([heatmapScores, offsets, displacementFwd, displacementBwd]);
    const scoresBuffer = allTensorBuffers[0];
@ -40,7 +41,7 @@ class PoseNet {
    const displacementsFwdBuffer = allTensorBuffers[2];
    const displacementsBwdBuffer = allTensorBuffers[3];
    const poses = await decodeMultiple.decodeMultiplePoses(scoresBuffer, offsetsBuffer, displacementsFwdBuffer, displacementsBwdBuffer, outputStride, config.maxDetections, config.scoreThreshold, config.nmsRadius);
-    const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config.inputResolution, config.inputResolution], padding);
+    const resultPoses = util.scaleAndFlipPoses(poses, [height, width], [config.inputResolution, config.inputResolution]);
    heatmapScores.dispose();
    offsets.dispose();
    displacementFwd.dispose();
--- a/src/posenet/util.js
+++ b/src/posenet/util.js
@ -1,4 +1,3 @@
-const tf = require('@tensorflow/tfjs');
 const kpt = require('./keypoints');

 function eitherPointDoesntMeetConfidence(a, b, minConfidence) {
@ -41,78 +40,28 @@ async function toTensorBuffers3D(tensors) {
 }
 exports.toTensorBuffers3D = toTensorBuffers3D;

-function scalePose(pose, scaleY, scaleX, offsetY = 0, offsetX = 0) {
+function scalePose(pose, scaleY, scaleX) {
  return {
    score: pose.score,
    keypoints: pose.keypoints.map(({ score, part, position }) => ({
      score,
      part,
-      position: {
-        x: position.x * scaleX + offsetX,
-        y: position.y * scaleY + offsetY,
-      },
+      position: { x: position.x * scaleX, y: position.y * scaleY },
    })),
  };
 }
 exports.scalePose = scalePose;

-function scalePoses(poses, scaleY, scaleX, offsetY = 0, offsetX = 0) {
-  if (scaleX === 1 && scaleY === 1 && offsetY === 0 && offsetX === 0) {
-    return poses;
-  }
-  return poses.map((pose) => scalePose(pose, scaleY, scaleX, offsetY, offsetX));
+function resizeTo(image, [targetH, targetW]) {
+  const input = image.squeeze(0);
+  const resized = input.resizeBilinear([targetH, targetW]);
+  input.dispose();
+  return resized;
 }
-exports.scalePoses = scalePoses;
+exports.resizeTo = resizeTo;

-function getInputTensorDimensions(input) {
-  return input instanceof tf.Tensor ? [input.shape[0], input.shape[1]] : [input.height, input.width];
-}
-exports.getInputTensorDimensions = getInputTensorDimensions;
-
-function toInputTensor(input) {
-  return input instanceof tf.Tensor ? input : tf.browser.fromPixels(input);
-}
-exports.toInputTensor = toInputTensor;
-
-function toResizedInputTensor(input, resizeHeight, resizeWidth) {
-  return tf.tidy(() => {
-    const imageTensor = toInputTensor(input);
-    return imageTensor.resizeBilinear([resizeHeight, resizeWidth]);
-  });
-}
-exports.toResizedInputTensor = toResizedInputTensor;
-
-function padAndResizeTo(input, [targetH, targetW]) {
-  const [height, width] = getInputTensorDimensions(input);
-  const targetAspect = targetW / targetH;
-  const aspect = width / height;
-  let [padT, padB, padL, padR] = [0, 0, 0, 0];
-  if (aspect < targetAspect) {
-    // pads the width
-    padT = 0;
-    padB = 0;
-    padL = Math.round(0.5 * (targetAspect * height - width));
-    padR = Math.round(0.5 * (targetAspect * height - width));
-  } else {
-    // pads the height
-    padT = Math.round(0.5 * ((1.0 / targetAspect) * width - height));
-    padB = Math.round(0.5 * ((1.0 / targetAspect) * width - height));
-    padL = 0;
-    padR = 0;
-  }
-  const resized = tf.tidy(() => {
-    let imageTensor = toInputTensor(input);
-    imageTensor = tf.pad3d(imageTensor, [[padT, padB], [padL, padR], [0, 0]]);
-    return imageTensor.resizeBilinear([targetH, targetW]);
-  });
-  return { resized, padding: { top: padT, left: padL, right: padR, bottom: padB } };
-}
-exports.padAndResizeTo = padAndResizeTo;
-
-function scaleAndFlipPoses(poses, [height, width], [inputResolutionHeight, inputResolutionWidth], padding) {
-  const scaleY = (height + padding.top + padding.bottom) / (inputResolutionHeight);
-  const scaleX = (width + padding.left + padding.right) / (inputResolutionWidth);
-  const scaledPoses = scalePoses(poses, scaleY, scaleX, -padding.top, -padding.left);
+function scaleAndFlipPoses(poses, [height, width], [inputResolutionHeight, inputResolutionWidth]) {
+  const scaledPoses = poses.map((pose) => scalePose(pose, height / inputResolutionHeight, width / inputResolutionWidth));
  return scaledPoses;
 }
 exports.scaleAndFlipPoses = scaleAndFlipPoses;
--- a/src/ssrnet/ssrnet.js
+++ b/src/ssrnet/ssrnet.js
@ -4,13 +4,6 @@ const models = {};
 let last = { age: 0, gender: '' };
 let frame = 0;

-async function getImage(image, size) {
-  const buffer = tf.browser.fromPixels(image);
-  const resize = tf.image.resizeBilinear(buffer, [size, size]);
-  const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
-  return expand;
-}
-
 async function loadAge(config) {
  if (!models.age) models.age = await tf.loadGraphModel(config.face.age.modelPath);
  return models.age;
@ -27,14 +20,9 @@ async function predict(image, config) {
    return last;
  }
  frame = 0;
-  let enhance;
-  if (image instanceof tf.Tensor) {
-    const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
-    enhance = tf.mul(resize, [255.0]);
-    tf.dispose(resize);
-  } else {
-    enhance = await getImage(image, config.face.age.inputSize);
-  }
+  const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false);
+  const enhance = tf.mul(resize, [255.0]);
+  tf.dispose(resize);

  const promises = [];
  let ageT;