add meet and selfie models

2021-06-04 13:51:01 -04:00 · 2021-06-04 13:51:01 -04:00 · c5f0ebe03f
parent 78431fca00
commit c5f0ebe03f
12 changed files with 555 additions and 41 deletions
--- a/TODO.md
+++ b/TODO.md
@ -11,7 +11,7 @@ N/A
 ## In Progress

 - Switch to TypeScript 4.3
- Add hints to Demo app
+- Implement segmentation model

 ## Known Issues

--- a/demo/index-worker.js
+++ b/demo/index-worker.js
@ -27,9 +27,18 @@ onmessage = async (msg) => {
    result.error = err.message;
    log('worker thread error:', err.message);
  }
-  // must strip canvas from return value as it cannot be transfered from worker thread
-  if (result.canvas) result.canvas = null;
-  // @ts-ignore tslint wrong type matching for worker
-  postMessage({ result });
+
+  if (result.canvas) { // convert canvas to imageData and send it by reference
+    const ctx = result.canvas.getContext('2d');
+    const img = ctx?.getImageData(0, 0, result.canvas.width, result.canvas.height);
+    result.canvas = null; // must strip original canvas from return value as it cannot be transfered from worker thread
+    // @ts-ignore tslint wrong type matching for worker
+    if (img) postMessage({ result, image: img.data.buffer, width: msg.data.width, height: msg.data.height }, [img?.data.buffer]);
+    // @ts-ignore tslint wrong type matching for worker
+    else postMessage({ result });
+  } else {
+    // @ts-ignore tslint wrong type matching for worker
+    postMessage({ result });
+  }
  busy = false;
 };
--- a/demo/index.js
+++ b/demo/index.js
@ -38,19 +38,21 @@ const userConfig = {
    enabled: false,
    flip: false,
  },
-  face: { enabled: true,
+  face: { enabled: false,
    detector: { return: true },
    mesh: { enabled: true },
    iris: { enabled: false },
    description: { enabled: false },
    emotion: { enabled: false },
  },
-  hand: { enabled: false },
-  // body: { enabled: true, modelPath: 'posenet.json' },
-  // body: { enabled: true, modelPath: 'blazepose.json' },
-  body: { enabled: false },
  object: { enabled: false },
  gesture: { enabled: true },
+  hand: { enabled: false },
+  body: { enabled: false },
+  // body: { enabled: true, modelPath: 'posenet.json' },
+  // body: { enabled: true, modelPath: 'blazepose.json' },
+  // segmentation: { enabled: true, modelPath: 'meet.json' },
+  // segmentation: { enabled: true, modelPath: 'selfie.json' },
  */
 };

@ -267,9 +269,11 @@ async function drawResults(input) {
  if (ui.buffered) {
    ui.drawThread = requestAnimationFrame(() => drawResults(input));
  } else {
-    log('stopping buffered refresh');
-    if (ui.drawThread) cancelAnimationFrame(ui.drawThread);
-    ui.drawThread = null;
+    if (ui.drawThread) {
+      log('stopping buffered refresh');
+      cancelAnimationFrame(ui.drawThread);
+      ui.drawThread = null;
+    }
  }
 }

@ -350,6 +354,8 @@ async function setupCamera() {
    video.onloadeddata = () => {
      if (settings.width > settings.height) canvas.style.width = '100vw';
      else canvas.style.height = '100vh';
+      canvas.width = video.videoWidth;
+      canvas.height = video.videoHeight;
      ui.menuWidth.input.setAttribute('value', video.videoWidth);
      ui.menuHeight.input.setAttribute('value', video.videoHeight);
      if (live) video.play();
@ -400,6 +406,16 @@ function webWorker(input, image, canvas, timestamp) {
      }
      if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none';
      lastDetectedResult = msg.data.result;
+
+      if (msg.data.image) {
+        lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
+        lastDetectedResult.canvas.width = msg.data.width;
+        lastDetectedResult.canvas.height = msg.data.height;
+        const ctx = lastDetectedResult.canvas.getContext('2d');
+        const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height);
+        ctx.putImageData(imageData, 0, 0);
+      }
+
      ui.framesDetect++;
      if (!ui.drawThread) drawResults(input);
      // eslint-disable-next-line no-use-before-define
--- a/models/selfie.bin
+++ b/models/selfie.bin
--- a/models/selfie.json
+++ b/models/selfie.json
--- a/src/config.ts
+++ b/src/config.ts
@ -196,6 +196,15 @@ export interface Config {
    maxDetected: number,
    skipFrames: number,
  },
+
+  /** Controlls and configures all body segmentation module
+   * - enabled: true/false
+   * - modelPath: object detection model, can be absolute path or relative to modelBasePath
+  */
+  segmentation: {
+    enabled: boolean,
+    modelPath: string,
+  },
 }

 const config: Config = {
@ -338,5 +347,11 @@ const config: Config = {
    skipFrames: 19,          // how many max frames to go without re-running the detector
                             // only used when cacheSensitivity is not zero
  },
+
+  segmentation: {
+    enabled: false,
+    modelPath: 'selfie.json',  // experimental: object detection model, can be absolute path or relative to modelBasePath
+                             // can be 'selfie' or 'meet'
+  },
 };
 export { config as defaults };
--- a/src/human.ts
+++ b/src/human.ts
@ -24,6 +24,7 @@ import * as image from './image/image';
 import * as draw from './draw/draw';
 import * as persons from './persons';
 import * as interpolate from './interpolate';
+import * as segmentation from './segmentation/segmentation';
 import * as sample from './sample';
 import * as app from '../package.json';
 import { Tensor } from './tfjs/types';
@ -114,16 +115,7 @@ export class Human {
    nanodet: Model | null,
    centernet: Model | null,
    faceres: Model | null,
-  };
-  /** @internal: Currently loaded classes */
-  classes: {
-    facemesh: typeof facemesh;
-    emotion: typeof emotion;
-    body: typeof posenet | typeof blazepose | typeof movenet;
-    hand: typeof handpose;
-    nanodet: typeof nanodet;
-    centernet: typeof centernet;
-    faceres: typeof faceres;
+    segmentation: Model | null,
  };
  /** Reference face triangualtion array of 468 points, used for triangle references between points */
  faceTriangulation: typeof facemesh.triangulation;
@ -173,20 +165,12 @@ export class Human {
      nanodet: null,
      centernet: null,
      faceres: null,
+      segmentation: null,
    };
    // export access to image processing
    // @ts-ignore eslint-typescript cannot correctly infer type in anonymous function
    this.image = (input: Input) => image.process(input, this.config);
    // export raw access to underlying models
-    this.classes = {
-      facemesh,
-      emotion,
-      faceres,
-      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
-      hand: handpose,
-      nanodet,
-      centernet,
-    };
    this.faceTriangulation = facemesh.triangulation;
    this.faceUVMap = facemesh.uvmap;
    // include platform info
@ -274,8 +258,10 @@ export class Human {
    }
    if (this.config.async) { // load models concurrently
      [
+        // @ts-ignore async model loading is not correctly inferred
        this.models.face,
        this.models.emotion,
+        // @ts-ignore async model loading is not correctly inferred
        this.models.handpose,
        this.models.posenet,
        this.models.blazepose,
@ -284,6 +270,7 @@ export class Human {
        this.models.nanodet,
        this.models.centernet,
        this.models.faceres,
+        this.models.segmentation,
      ] = await Promise.all([
        this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null),
        this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
@ -295,6 +282,7 @@ export class Human {
        this.models.nanodet || (this.config.object.enabled && this.config.object.modelPath.includes('nanodet') ? nanodet.load(this.config) : null),
        this.models.centernet || (this.config.object.enabled && this.config.object.modelPath.includes('centernet') ? centernet.load(this.config) : null),
        this.models.faceres || ((this.config.face.enabled && this.config.face.description.enabled) ? faceres.load(this.config) : null),
+        this.models.segmentation || (this.config.segmentation.enabled ? segmentation.load(this.config) : null),
      ]);
    } else { // load models sequentially
      if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@ -307,6 +295,7 @@ export class Human {
      if (this.config.object.enabled && !this.models.nanodet && this.config.object.modelPath.includes('nanodet')) this.models.nanodet = await nanodet.load(this.config);
      if (this.config.object.enabled && !this.models.centernet && this.config.object.modelPath.includes('centernet')) this.models.centernet = await centernet.load(this.config);
      if (this.config.face.enabled && this.config.face.description.enabled && !this.models.faceres) this.models.faceres = await faceres.load(this.config);
+      if (this.config.segmentation.enabled && !this.models.segmentation) this.models.segmentation = await segmentation.load(this.config);
    }

    if (this.#firstRun) { // print memory stats on first run
@ -568,6 +557,17 @@ export class Human {
        else if (this.performance.gesture) delete this.performance.gesture;
      }

+      // run segmentation
+      if (this.config.segmentation.enabled) {
+        this.analyze('Start Segmentation:');
+        this.state = 'run:segmentation';
+        timeStamp = now();
+        await segmentation.predict(process, this.config);
+        elapsedTime = Math.trunc(now() - timeStamp);
+        if (elapsedTime > 0) this.performance.segmentation = elapsedTime;
+        this.analyze('End Segmentation:');
+      }
+
      this.performance.total = Math.trunc(now() - timeStart);
      this.state = 'idle';
      this.result = {
--- a/src/image/image.ts
+++ b/src/image/image.ts
@ -138,7 +138,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
      const shape = [outCanvas.height, outCanvas.width, 3];
      pixels = tf.tensor3d(outCanvas.data, shape, 'int32');
    } else if (outCanvas instanceof ImageData) { // if input is imagedata, just use it
-      pixels = tf.browser.fromPixels(outCanvas);
+      pixels = tf.browser ? tf.browser.fromPixels(outCanvas) : null;
    } else if (config.backend === 'webgl' || config.backend === 'humangl') { // tf kernel-optimized method to get imagedata
      // we can use canvas as-is as it already has a context, so we do a silly one more canvas
      const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
@ -146,7 +146,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
      tempCanvas.height = targetHeight;
      const tempCtx = tempCanvas.getContext('2d');
      tempCtx?.drawImage(outCanvas, 0, 0);
-      pixels = tf.browser.fromPixels(tempCanvas);
+      pixels = tf.browser ? tf.browser.fromPixels(tempCanvas) : null;
    } else { // cpu and wasm kernel does not implement efficient fromPixels method
      // we can use canvas as-is as it already has a context, so we do a silly one more canvas
      const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
@ -155,12 +155,14 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
      const tempCtx = tempCanvas.getContext('2d');
      tempCtx?.drawImage(outCanvas, 0, 0);
      const data = tempCtx?.getImageData(0, 0, targetWidth, targetHeight);
-      pixels = tf.browser.fromPixels(data);
+      pixels = tf.browser ? tf.browser.fromPixels(data) : null;
+    }
+    if (pixels) {
+      const casted = pixels.toFloat();
+      tensor = casted.expandDims(0);
+      pixels.dispose();
+      casted.dispose();
    }
-    const casted = pixels.toFloat();
-    tensor = casted.expandDims(0);
-    pixels.dispose();
-    casted.dispose();
  }
  const canvas = config.filter.return ? outCanvas : null;
  return { tensor, canvas };
--- a/src/interpolate.ts
+++ b/src/interpolate.ts
@ -21,6 +21,8 @@ export function calc(newResult: Result): Result {
  // - at  1sec delay buffer = 1 which means live data is used
  const bufferedFactor = elapsed < 1000 ? 8 - Math.log(elapsed) : 1;

+  bufferedResult.canvas = newResult.canvas;
+
  // interpolate body results
  if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) {
    bufferedResult.body = JSON.parse(JSON.stringify(newResult.body as Body[])); // deep clone once
--- a/src/result.ts
+++ b/src/result.ts
@ -176,7 +176,7 @@ export interface Result {
  /** global performance object with timing values for each operation */
  performance: Record<string, unknown>,
  /** optional processed canvas that can be used to draw input on screen */
-  readonly canvas?: OffscreenCanvas | HTMLCanvasElement,
+  canvas?: OffscreenCanvas | HTMLCanvasElement,
  /** timestamp of detection representing the milliseconds elapsed since the UNIX epoch */
  readonly timestamp: number,
  /** getter property that returns unified persons object  */
--- a/src/segmentation/segmentation.ts
+++ b/src/segmentation/segmentation.ts
@ -0,0 +1,75 @@
+/**
+ * EfficientPose Module
+ */
+
+import { log, join } from '../helpers';
+import * as tf from '../../dist/tfjs.esm.js';
+import { GraphModel, Tensor } from '../tfjs/types';
+import { Config } from '../config';
+// import * as blur from './blur';
+
+let model: GraphModel;
+// let blurKernel;
+
+export type Segmentation = boolean;
+
+export async function load(config: Config): Promise<GraphModel> {
+  if (!model) {
+    // @ts-ignore type mismatch on GraphModel
+    model = await tf.loadGraphModel(join(config.modelBasePath, config.segmentation.modelPath));
+    if (!model || !model['modelUrl']) log('load model failed:', config.segmentation.modelPath);
+    else if (config.debug) log('load model:', model['modelUrl']);
+  } else if (config.debug) log('cached model:', model['modelUrl']);
+  // if (!blurKernel) blurKernel = blur.getGaussianKernel(50, 1, 1);
+  return model;
+}
+
+export async function predict(input: { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement }, config: Config): Promise<Segmentation> {
+  if (!config.segmentation.enabled || !input.tensor || !input.canvas) return false;
+  if (!model || !model.inputs[0].shape) return false;
+  const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false);
+  const norm = resizeInput.div(255);
+  const res = model.predict(norm) as Tensor;
+  tf.dispose(resizeInput);
+  tf.dispose(norm);
+
+  const overlay = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(input.canvas.width, input.canvas.height) : document.createElement('canvas');
+  overlay.width = input.canvas.width;
+  overlay.height = input.canvas.height;
+
+  const squeeze = tf.squeeze(res, 0);
+  let resizeOutput;
+  if (squeeze.shape[2] === 2) { // model meet has two channels for fg and bg
+    const softmax = squeeze.softmax();
+    const [bg, fg] = tf.unstack(softmax, 2);
+    tf.dispose(softmax);
+    const expand = fg.expandDims(2);
+    tf.dispose(bg);
+    tf.dispose(fg);
+    resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]);
+    tf.dispose(expand);
+  } else { // model selfie has a single channel
+    resizeOutput = tf.image.resizeBilinear(squeeze, [input.tensor?.shape[1], input.tensor?.shape[2]]);
+  }
+
+  // const blurred = blur.blur(resizeOutput, blurKernel);
+  if (tf.browser) await tf.browser.toPixels(resizeOutput, overlay);
+  // tf.dispose(blurred);
+  tf.dispose(resizeOutput);
+  tf.dispose(squeeze);
+  tf.dispose(res);
+
+  const ctx = input.canvas.getContext('2d') as CanvasRenderingContext2D;
+  // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation
+  // best options are: darken, color-burn, multiply
+  ctx.globalCompositeOperation = 'darken';
+  await ctx?.drawImage(overlay, 0, 0);
+  ctx.globalCompositeOperation = 'source-in';
+  return true;
+}
+
+/* Segmentation todo:
+- Smoothen
+- Get latest canvas in interpolate
+- Buffered fetches latest from video instead from interpolate
+*/
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 0087af5684c5722b2cf7ffd3db57b8117b7ac8c5
+Subproject commit 8e898a636f5254a3fe451b097c633c9965a8a680