implemented simple gesture recognition

2020-11-04 10:18:22 -05:00 · 2020-11-04 10:18:22 -05:00 · d94aa0362c
parent 3cec6710d4
commit d94aa0362c
9 changed files with 202 additions and 100 deletions
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # Human Library

-## 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction & Emotion Prediction
+## 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction, Emotion Prediction & Gesture Recognition

 - [**Documentation**](https://github.com/vladmandic/human#readme)
 - [**Code Repository**](https://github.com/vladmandic/human)
@ -361,6 +361,11 @@ config = {
      modelPath: '../models/handskeleton/model.json',
    },
  },
+  gesture: {
+    enabled: true,           // enable simple gesture recognition
+                             // takes processed data and based on geometry detects simple gestures
+                             // easily expandable via code, see `src/gesture.js`
+  },
 };
 ```

@ -408,10 +413,17 @@ result = {
      emotion,     // <string> 'angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'
    }
  ],
+  gesture:         // object containing parsed gestures
+  {
+    face,          // <array of string>
+    body,          // <array of string>
+    hand,          // <array of string>
+  }
  performance = {  // performance data of last execution for each module measuredin miliseconds
    backend,       // time to initialize tf backend, valid only during backend startup
    load,          // time to load models, valid only during model load
    image,         // time for image processing
+    gesture,       // gesture analysis time
    body,          // model time
    hand,          // model time
    face,          // model time
@ -484,6 +496,7 @@ For example, it can perform multiple face detections at 60+ FPS, but drops to ~1

 - Enabled all: 15 FPS
 - Image filters: 80 FPS (standalone)
+- Gesture: 80 FPS (standalone)
 - Face Detect: 80 FPS (standalone)
 - Face Geometry: 30 FPS (includes face detect)
 - Face Iris: 30 FPS (includes face detect and face geometry)
@ -495,8 +508,9 @@ For example, it can perform multiple face detections at 60+ FPS, but drops to ~1

 ### Performance per module on a **smartphone** with Snapdragon 855 on a FullHD input:

- Enabled all: 3 FPS
+- Enabled all: 5 FPS
 - Image filters: 30 FPS (standalone)
+- Gesture: 30 FPS (standalone)
 - Face Detect: 20 FPS (standalone)
 - Face Geometry: 10 FPS (includes face detect)
 - Face Iris: 5 FPS (includes face detect and face geometry)
--- a/config.js
+++ b/config.js
@ -41,6 +41,9 @@ export default {
    polaroid: false,         // image polaroid camera effect
    pixelate: 0,             // range: 0 (no pixelate) to N (number of pixels to pixelate)
  },
+  gesture: {
+    enabled: true,           // enable simple gesture recognition
+  },
  face: {
    enabled: true,           // controls if specified modul is enabled
                             // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
--- a/demo/browser.js
+++ b/demo/browser.js
@ -25,6 +25,8 @@ const ui = {
  useDepth: true,
  console: true,
  maxFrames: 10,
+  modelsPreload: true,
+  modelsWarmup: true,
 };

 // configuration overrides
@ -62,6 +64,7 @@ const config = {
  },
  body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
  hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
+  gesture: { enabled: true },
 };

 // global variables
@ -123,15 +126,17 @@ function drawResults(input, result, canvas) {
  draw.face(result.face, canvas, ui, human.facemesh.triangulation);
  draw.body(result.body, canvas, ui);
  draw.hand(result.hand, canvas, ui);
+  draw.gesture(result.gesture, canvas, ui);
  // update log
  const engine = human.tf.engine();
  const gpu = engine.backendInstance ? `gpu: ${(engine.backendInstance.numBytesInGPU ? engine.backendInstance.numBytesInGPU : 0).toLocaleString()} bytes` : '';
  const memory = `system: ${engine.state.numBytes.toLocaleString()} bytes ${gpu} | tensors: ${engine.state.numTensors.toLocaleString()}`;
  const processing = result.canvas ? `processing: ${result.canvas.width} x ${result.canvas.height}` : '';
+  const avg = Math.trunc(10 * fps.reduce((a, b) => a + b) / fps.length) / 10;
  document.getElementById('log').innerText = `
    video: ${camera.name} | facing: ${camera.facing} | resolution: ${camera.width} x ${camera.height} ${processing}
    backend: ${human.tf.getBackend()} | ${memory} | object size: ${(str(result)).length.toLocaleString()} bytes
-    performance: ${str(result.performance)}
+    performance: ${str(result.performance)} FPS:${avg}
  `;
 }

@ -159,7 +164,13 @@ async function setupCamera() {
  try {
    stream = await navigator.mediaDevices.getUserMedia({
      audio: false,
-      video: { facingMode: (ui.facing ? 'user' : 'environment'), width: window.innerWidth, height: window.innerHeight, resizeMode: 'none' },
+      video: {
+        facingMode: (ui.facing ? 'user' : 'environment'),
+        width: window.innerWidth,
+        height: window.innerHeight,
+        resizeMode: 'none',
+        contrast: 75,
+      },
    });
  } catch (err) {
    output.innerText += '\nCamera permission denied';
@ -267,7 +278,7 @@ async function detectVideo() {
  document.getElementById('canvas').style.display = 'block';
  const video = document.getElementById('video');
  const canvas = document.getElementById('canvas');
-  ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.2rem');
+  ui.baseFont = ui.baseFontProto.replace(/{size}/, '1.3rem');
  ui.baseLineHeight = ui.baseLineHeightProto;
  if ((video.srcObject !== null) && !video.paused) {
    document.getElementById('play').style.display = 'block';
@ -286,7 +297,7 @@ async function detectVideo() {
 async function detectSampleImages() {
  document.getElementById('play').style.display = 'none';
  config.videoOptimized = false;
-  ui.baseFont = ui.baseFontProto.replace(/{size}/, `${1.2 * ui.columns}rem`);
+  ui.baseFont = ui.baseFontProto.replace(/{size}/, `${1.3 * ui.columns}rem`);
  ui.baseLineHeight = ui.baseLineHeightProto * ui.columns;
  document.getElementById('canvas').style.display = 'none';
  document.getElementById('samples-container').style.display = 'block';
@ -318,6 +329,7 @@ function setupMenu() {
  menu.addBool('Face Emotion', config.face.emotion, 'enabled');
  menu.addBool('Body Pose', config.body, 'enabled');
  menu.addBool('Hand Pose', config.hand, 'enabled');
+  menu.addBool('Gesture Analysis', config.gesture, 'enabled');

  menu.addHTML('<hr style="min-width: 200px; border-style: inset; border-color: dimgray">');
  menu.addLabel('Model Parameters');
@ -383,11 +395,15 @@ async function main() {
  setupMenu();
  document.getElementById('log').innerText = `Human: version ${human.version} TensorFlow/JS: version ${human.tf.version_core}`;
  // this is not required, just pre-warms the library
-  status('loading');
-  await human.load();
-  status('initializing');
-  const warmup = new ImageData(50, 50);
-  await human.detect(warmup);
+  if (ui.modelsPreload) {
+    status('loading');
+    await human.load();
+  }
+  if (ui.modelsWarmup) {
+    status('initializing');
+    const warmup = new ImageData(50, 50);
+    await human.detect(warmup);
+  }
  status('human: ready');
  document.getElementById('loader').style.display = 'none';
  document.getElementById('play').style.display = 'block';
--- a/demo/index.html
+++ b/demo/index.html
@ -27,7 +27,7 @@
      .status { position: absolute; width: 100vw; top: 100px; text-align: center; font-size: 4rem; font-weight: 100; text-shadow: 2px 2px darkslategrey; }
      .thumbnail { margin: 8px; box-shadow: 0 0 4px 4px dimgrey; }
      .thumbnail:hover { box-shadow: 0 0 8px 8px dimgrey; filter: grayscale(1); }
-      .log { position: fixed; bottom: 0; }
+      .log { position: fixed; bottom: 0; margin: 0.4rem; }
      .samples-container { display: flex; flex-wrap: wrap; }
      .video { display: none; }
      .canvas { margin: 0 auto; width: 100%; }
--- a/package.json
+++ b/package.json
@ -1,7 +1,7 @@
 {
  "name": "@vladmandic/human",
  "version": "0.6.6",
-  "description": "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction",
+  "description": "human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction, Emotion Prediction & Gesture Recognition",
  "sideEffects": false,
  "main": "dist/human.node.js",
  "module": "dist/human.esm.js",
--- a/src/gesture.js
+++ b/src/gesture.js
@ -0,0 +1,45 @@
+exports.body = (res) => {
+  if (!res) return [];
+  const gestures = [];
+  for (const pose of res) {
+    // raising hands
+    const leftWrist = pose.keypoints.find((a) => (a.part === 'leftWrist'));
+    const rightWrist = pose.keypoints.find((a) => (a.part === 'rightWrist'));
+    const nose = pose.keypoints.find((a) => (a.part === 'nose'));
+    if (nose && leftWrist && rightWrist && (leftWrist.position.y < nose.position.y) && (rightWrist.position.y < nose.position.y)) gestures.push('i give up');
+    else if (nose && leftWrist && (leftWrist.position.y < nose.position.y)) gestures.push('raise left hand');
+    else if (nose && rightWrist && (rightWrist.position.y < nose.position.y)) gestures.push('raise right hand');
+
+    // leaning
+    const leftShoulder = pose.keypoints.find((a) => (a.part === 'leftShoulder'));
+    const rightShoulder = pose.keypoints.find((a) => (a.part === 'rightShoulder'));
+    if (leftShoulder && rightShoulder) gestures.push(`leaning ${(leftShoulder.position.y > rightShoulder.position.y) ? 'left' : 'right'}`);
+  }
+  return gestures;
+};
+
+exports.face = (res) => {
+  if (!res) return [];
+  const gestures = [];
+  for (const face of res) {
+    if (face.annotations['rightCheek'] && face.annotations['leftCheek'] && (face.annotations['rightCheek'].length > 0) && (face.annotations['leftCheek'].length > 0)) {
+      gestures.push(`facing ${((face.annotations['rightCheek'][0][2] > 0) || (face.annotations['leftCheek'][0][2] < 0)) ? 'right' : 'left'}`);
+    }
+  }
+  return gestures;
+};
+
+exports.hand = (res) => {
+  if (!res) return [];
+  const gestures = [];
+  for (const hand of res) {
+    const fingers = [];
+    for (const [finger, pos] of Object.entries(hand['annotations'])) {
+      if (finger !== 'palmBase') fingers.push({ name: finger.toLowerCase(), position: pos[0] }); // get tip of each finger
+    }
+    const closest = fingers.reduce((best, a) => (best.position[2] < a.position[2] ? best : a));
+    const highest = fingers.reduce((best, a) => (best.position[1] < a.position[1] ? best : a));
+    gestures.push(`${closest.name} forward ${highest.name} up`);
+  }
+  return gestures;
+};
--- a/src/handpose/handpipeline.js
+++ b/src/handpose/handpipeline.js
@ -22,8 +22,8 @@ const util = require('./util');
 const UPDATE_REGION_OF_INTEREST_IOU_THRESHOLD = 0.8;
 const PALM_BOX_SHIFT_VECTOR = [0, -0.4];
 const PALM_BOX_ENLARGE_FACTOR = 3;
-const HAND_BOX_SHIFT_VECTOR = [0, -0.1];
-const HAND_BOX_ENLARGE_FACTOR = 1.65;
+const HAND_BOX_SHIFT_VECTOR = [0, -0.1]; // move detected hand box by x,y to ease landmark detection
+const HAND_BOX_ENLARGE_FACTOR = 1.65; // increased from model default 1.65;
 const PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2];
 const PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0;
 const PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2;
--- a/src/human.js
+++ b/src/human.js
@ -4,7 +4,8 @@ const ssrnet = require('./ssrnet/ssrnet.js');
 const emotion = require('./emotion/emotion.js');
 const posenet = require('./posenet/posenet.js');
 const handpose = require('./handpose/handpose.js');
-const fxImage = require('./imagefx.js');
+const gesture = require('./gesture.js');
+const image = require('./image.js');
 const profile = require('./profile.js');
 const defaults = require('../config.js').default;
 const app = require('../package.json');
@ -52,9 +53,6 @@ class Human {
    this.analyzeMemoryLeaks = false;
    this.checkSanity = false;
    this.firstRun = true;
-    // internal temp canvases
-    this.inCanvas = null;
-    this.outCanvas = null;
    // object that contains all initialized models
    this.models = {
      facemesh: null,
@ -94,6 +92,7 @@ class Human {
    if (leaked !== 0) this.log(...msg, leaked);
  }

+  // quick sanity check on inputs
  sanity(input) {
    if (!this.checkSanity) return null;
    if (!input) return 'input is not defined';
@ -108,10 +107,12 @@ class Human {
    return null;
  }

+  // preload models, not explicitly required as it's done automatically on first use
  async load(userConfig) {
    if (userConfig) this.config = mergeDeep(defaults, userConfig);

    if (this.firstRun) {
+      this.checkBackend(true);
      this.log(`version: ${this.version} TensorFlow/JS version: ${tf.version_core}`);
      this.log('configuration:', this.config);
      this.log('flags:', tf.ENV.flags);
@ -144,8 +145,9 @@ class Human {
    }
  }

-  async checkBackend() {
-    if (tf.getBackend() !== this.config.backend) {
+  // check if backend needs initialization if it changed
+  async checkBackend(force) {
+    if (force || (tf.getBackend() !== this.config.backend)) {
      this.state = 'backend';
      /* force backend reload
      if (this.config.backend in tf.engine().registry) {
@ -156,7 +158,7 @@ class Human {
        this.log('Backend not registred:', this.config.backend);
      }
      */
-      this.log('Setting backend:', this.config.backend);
+      this.log('setting backend:', this.config.backend);
      await tf.setBackend(this.config.backend);
      tf.enableProdMode();
      /* debug mode is really too mcuh
@ -167,84 +169,20 @@ class Human {
        this.log('Changing WebGL: WEBGL_DELETE_TEXTURE_THRESHOLD:', this.config.deallocate);
        tf.ENV.set('WEBGL_DELETE_TEXTURE_THRESHOLD', this.config.deallocate ? 0 : -1);
      }
-      tf.ENV.set('WEBGL_CPU_FORWARD', true);
+      // tf.ENV.set('WEBGL_CPU_FORWARD', true);
+      // tf.ENV.set('WEBGL_FORCE_F16_TEXTURES', true);
+      // tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', true);
      await tf.ready();
    }
  }

-  tfImage(input) {
-    let tensor;
-    if (input instanceof tf.Tensor) {
-      tensor = tf.clone(input);
-    } else {
-      const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
-      const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
-      let targetWidth = originalWidth;
-      let targetHeight = originalHeight;
-      if (this.config.filter.width > 0) targetWidth = this.config.filter.width;
-      else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight);
-      if (this.config.filter.height > 0) targetHeight = this.config.filter.height;
-      else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth);
-      if (!this.inCanvas || (this.inCanvas.width !== targetWidth) || (this.inCanvas.height !== targetHeight)) {
-        this.inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
-        if (this.inCanvas.width !== targetWidth) this.inCanvas.width = targetWidth;
-        if (this.inCanvas.height !== targetHeight) this.inCanvas.height = targetHeight;
-      }
-      const ctx = this.inCanvas.getContext('2d');
-      if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
-      else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, this.inCanvas.width, this.inCanvas.height);
-      if (this.config.filter.enabled) {
-        if (!this.fx || !this.outCanvas || (this.inCanvas.width !== this.outCanvas.width) || (this.inCanvas.height !== this.outCanvas.height)) {
-          this.outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(this.inCanvas.width, this.inCanvas.height) : document.createElement('canvas');
-          if (this.outCanvas.width !== this.inCanvas.width) this.outCanvas.width = this.inCanvas.width;
-          if (this.outCanvas.height !== this.inCanvas.height) this.outCanvas.height = this.inCanvas.height;
-          this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: this.outCanvas }) : null;
-        }
-        this.fx.reset();
-        this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled
-        if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast);
-        if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness);
-        if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur);
-        if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation);
-        if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue);
-        if (this.config.filter.negative) this.fx.addFilter('negative');
-        if (this.config.filter.sepia) this.fx.addFilter('sepia');
-        if (this.config.filter.vintage) this.fx.addFilter('brownie');
-        if (this.config.filter.sepia) this.fx.addFilter('sepia');
-        if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome');
-        if (this.config.filter.technicolor) this.fx.addFilter('technicolor');
-        if (this.config.filter.polaroid) this.fx.addFilter('polaroid');
-        if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate);
-        this.fx.apply(this.inCanvas);
-      }
-      if (!this.outCanvas) this.outCanvas = this.inCanvas;
-      let pixels;
-      if ((this.config.backend === 'webgl') || (this.outCanvas instanceof ImageData)) {
-        // tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
-        pixels = tf.browser.fromPixels(this.outCanvas);
-      } else {
-        // cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
-        const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
-        tempCanvas.width = targetWidth;
-        tempCanvas.height = targetHeight;
-        const tempCtx = tempCanvas.getContext('2d');
-        tempCtx.drawImage(this.outCanvas, 0, 0);
-        const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
-        pixels = tf.browser.fromPixels(data);
-      }
-      const casted = pixels.toFloat();
-      tensor = casted.expandDims(0);
-      pixels.dispose();
-      casted.dispose();
-    }
-    return { tensor, canvas: this.config.filter.return ? this.outCanvas : null };
-  }
-
+  // main detect function
  async detect(input, userConfig = {}) {
    this.state = 'config';
    const perf = {};
    let timeStamp;

+    // update configuration
    this.config = mergeDeep(defaults, userConfig);
    if (!this.config.videoOptimized) this.config = mergeDeep(this.config, override);

@ -256,6 +194,7 @@ class Human {
      return { error };
    }

+    // detection happens inside a promise
    // eslint-disable-next-line no-async-promise-executor
    return new Promise(async (resolve) => {
      let poseRes;
@ -281,9 +220,8 @@ class Human {
      this.analyze('Start Detect:');

      timeStamp = now();
-      const image = this.tfImage(input);
+      const process = image.process(input, this.config);
      perf.image = Math.trunc(now() - timeStamp);
-      const imageTensor = image.tensor;

      // run facemesh, includes blazeface and iris
      const faceRes = [];
@ -291,7 +229,7 @@ class Human {
        this.state = 'run:face';
        timeStamp = now();
        this.analyze('Start FaceMesh:');
-        const faces = await this.models.facemesh.estimateFaces(imageTensor, this.config.face);
+        const faces = await this.models.facemesh.estimateFaces(process.tensor, this.config.face);
        perf.face = Math.trunc(now() - timeStamp);
        for (const face of faces) {
          // is something went wrong, skip the face
@ -334,38 +272,45 @@ class Human {

      // run posenet
      if (this.config.async) {
-        poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
+        poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(process.tensor, this.config.body) : [];
      } else {
        this.state = 'run:body';
        timeStamp = now();
        this.analyze('Start PoseNet');
-        poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : [];
+        poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(process.tensor, this.config.body) : [];
        this.analyze('End PoseNet:');
        perf.body = Math.trunc(now() - timeStamp);
      }

      // run handpose
      if (this.config.async) {
-        handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
+        handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(process.tensor, this.config.hand) : [];
      } else {
        this.state = 'run:hand';
        timeStamp = now();
        this.analyze('Start HandPose:');
-        handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : [];
+        handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(process.tensor, this.config.hand) : [];
        this.analyze('End HandPose:');
        perf.hand = Math.trunc(now() - timeStamp);
      }

      if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]);

-      imageTensor.dispose();
+      process.tensor.dispose();
      this.state = 'idle';

      if (this.config.scoped) tf.engine().endScope();
      this.analyze('End Scope:');

+      let gestureRes = [];
+      if (this.config.gesture.enabled) {
+        timeStamp = now();
+        gestureRes = { body: gesture.body(poseRes), hand: gesture.hand(handRes), face: gesture.face(faceRes) };
+        perf.gesture = Math.trunc(now() - timeStamp);
+      }
+
      perf.total = Math.trunc(now() - timeStart);
-      resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf, canvas: image.canvas });
+      resolve({ face: faceRes, body: poseRes, hand: handRes, gesture: gestureRes, performance: perf, canvas: process.canvas });
    });
  }
 }
--- a/src/image.js
+++ b/src/image.js
@ -0,0 +1,79 @@
+const tf = require('@tensorflow/tfjs');
+const fxImage = require('./imagefx.js');
+
+// internal temp canvases
+let inCanvas = null;
+let outCanvas = null;
+
+// process input image and return tensor
+// input can be tensor, imagedata, htmlimageelement, htmlvideoelement
+// input is resized and run through imagefx filter
+function process(input, config) {
+  let tensor;
+  if (input instanceof tf.Tensor) {
+    tensor = tf.clone(input);
+  } else {
+    const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0));
+    const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0));
+    let targetWidth = originalWidth;
+    let targetHeight = originalHeight;
+    if (config.filter.width > 0) targetWidth = config.filter.width;
+    else if (config.filter.height > 0) targetWidth = originalWidth * (config.filter.height / originalHeight);
+    if (config.filter.height > 0) targetHeight = config.filter.height;
+    else if (config.filter.width > 0) targetHeight = originalHeight * (config.filter.width / originalWidth);
+    if (!inCanvas || (inCanvas.width !== targetWidth) || (inCanvas.height !== targetHeight)) {
+      inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
+      if (inCanvas.width !== targetWidth) inCanvas.width = targetWidth;
+      if (inCanvas.height !== targetHeight) inCanvas.height = targetHeight;
+    }
+    const ctx = inCanvas.getContext('2d');
+    if (input instanceof ImageData) ctx.putImageData(input, 0, 0);
+    else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, inCanvas.width, inCanvas.height);
+    if (config.filter.enabled) {
+      if (!this.fx || !outCanvas || (inCanvas.width !== outCanvas.width) || (inCanvas.height !== outCanvas.height)) {
+        outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(inCanvas.width, inCanvas.height) : document.createElement('canvas');
+        if (outCanvas.width !== inCanvas.width) outCanvas.width = inCanvas.width;
+        if (outCanvas.height !== inCanvas.height) outCanvas.height = inCanvas.height;
+        this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: outCanvas }) : null;
+      }
+      this.fx.reset();
+      this.fx.addFilter('brightness', config.filter.brightness); // must have at least one filter enabled
+      if (config.filter.contrast !== 0) this.fx.addFilter('contrast', config.filter.contrast);
+      if (config.filter.sharpness !== 0) this.fx.addFilter('sharpen', config.filter.sharpness);
+      if (config.filter.blur !== 0) this.fx.addFilter('blur', config.filter.blur);
+      if (config.filter.saturation !== 0) this.fx.addFilter('saturation', config.filter.saturation);
+      if (config.filter.hue !== 0) this.fx.addFilter('hue', config.filter.hue);
+      if (config.filter.negative) this.fx.addFilter('negative');
+      if (config.filter.sepia) this.fx.addFilter('sepia');
+      if (config.filter.vintage) this.fx.addFilter('brownie');
+      if (config.filter.sepia) this.fx.addFilter('sepia');
+      if (config.filter.kodachrome) this.fx.addFilter('kodachrome');
+      if (config.filter.technicolor) this.fx.addFilter('technicolor');
+      if (config.filter.polaroid) this.fx.addFilter('polaroid');
+      if (config.filter.pixelate !== 0) this.fx.addFilter('pixelate', config.filter.pixelate);
+      this.fx.apply(inCanvas);
+    }
+    if (!outCanvas) outCanvas = inCanvas;
+    let pixels;
+    if ((config.backend === 'webgl') || (outCanvas instanceof ImageData)) {
+      // tf kernel-optimized method to get imagedata, also if input is imagedata, just use it
+      pixels = tf.browser.fromPixels(outCanvas);
+    } else {
+      // cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas
+      const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
+      tempCanvas.width = targetWidth;
+      tempCanvas.height = targetHeight;
+      const tempCtx = tempCanvas.getContext('2d');
+      tempCtx.drawImage(outCanvas, 0, 0);
+      const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight);
+      pixels = tf.browser.fromPixels(data);
+    }
+    const casted = pixels.toFloat();
+    tensor = casted.expandDims(0);
+    pixels.dispose();
+    casted.dispose();
+  }
+  return { tensor, canvas: config.filter.return ? outCanvas : null };
+}
+
+exports.process = process;