From 2d3e81181c70ca4c48a79b1bb15ad2be1d0bc1e2 Mon Sep 17 00:00:00 2001
From: Vladimir Mandic <mandic00@live.com>
Date: Fri, 4 Jun 2021 13:51:01 -0400
Subject: [PATCH] add meet and selfie models

---
 TODO.md              |  2 +-
 demo/index-worker.js | 17 +++++++++++++----
 demo/index.js        | 32 ++++++++++++++++++++++++--------
 src/config.ts        | 15 +++++++++++++++
 src/human.ts         | 38 +++++++++++++++++++-------------------
 src/image/image.ts   | 16 +++++++++-------
 src/interpolate.ts   |  2 ++
 src/result.ts        |  2 +-
 wiki                 |  2 +-
 9 files changed, 85 insertions(+), 41 deletions(-)

diff --git a/TODO.md b/TODO.md
index 89381e5a..5c1826b6 100644
--- a/TODO.md
+++ b/TODO.md
@@ -11,7 +11,7 @@ N/A
 ## In Progress
 
 - Switch to TypeScript 4.3
-- Add hints to Demo app
+- Implement segmentation model
 
 ## Known Issues
 
diff --git a/demo/index-worker.js b/demo/index-worker.js
index 231abc85..f4cbb499 100644
--- a/demo/index-worker.js
+++ b/demo/index-worker.js
@@ -27,9 +27,18 @@ onmessage = async (msg) => {
     result.error = err.message;
     log('worker thread error:', err.message);
   }
-  // must strip canvas from return value as it cannot be transfered from worker thread
-  if (result.canvas) result.canvas = null;
-  // @ts-ignore tslint wrong type matching for worker
-  postMessage({ result });
+
+  if (result.canvas) { // convert canvas to imageData and send it by reference
+    const ctx = result.canvas.getContext('2d');
+    const img = ctx?.getImageData(0, 0, result.canvas.width, result.canvas.height);
+    result.canvas = null; // must strip original canvas from return value as it cannot be transfered from worker thread
+    // @ts-ignore tslint wrong type matching for worker
+    if (img) postMessage({ result, image: img.data.buffer, width: msg.data.width, height: msg.data.height }, [img?.data.buffer]);
+    // @ts-ignore tslint wrong type matching for worker
+    else postMessage({ result });
+  } else {
+    // @ts-ignore tslint wrong type matching for worker
+    postMessage({ result });
+  }
   busy = false;
 };
diff --git a/demo/index.js b/demo/index.js
index 5ba17d0d..0f3e8b7a 100644
--- a/demo/index.js
+++ b/demo/index.js
@@ -38,19 +38,21 @@ const userConfig = {
     enabled: false,
     flip: false,
   },
-  face: { enabled: true,
+  face: { enabled: false,
     detector: { return: true },
     mesh: { enabled: true },
     iris: { enabled: false },
     description: { enabled: false },
     emotion: { enabled: false },
   },
-  hand: { enabled: false },
-  // body: { enabled: true, modelPath: 'posenet.json' },
-  // body: { enabled: true, modelPath: 'blazepose.json' },
-  body: { enabled: false },
   object: { enabled: false },
   gesture: { enabled: true },
+  hand: { enabled: false },
+  body: { enabled: false },
+  // body: { enabled: true, modelPath: 'posenet.json' },
+  // body: { enabled: true, modelPath: 'blazepose.json' },
+  // segmentation: { enabled: true, modelPath: 'meet.json' },
+  // segmentation: { enabled: true, modelPath: 'selfie.json' },
   */
 };
 
@@ -267,9 +269,11 @@ async function drawResults(input) {
   if (ui.buffered) {
     ui.drawThread = requestAnimationFrame(() => drawResults(input));
   } else {
-    log('stopping buffered refresh');
-    if (ui.drawThread) cancelAnimationFrame(ui.drawThread);
-    ui.drawThread = null;
+    if (ui.drawThread) {
+      log('stopping buffered refresh');
+      cancelAnimationFrame(ui.drawThread);
+      ui.drawThread = null;
+    }
   }
 }
 
@@ -350,6 +354,8 @@ async function setupCamera() {
     video.onloadeddata = () => {
       if (settings.width > settings.height) canvas.style.width = '100vw';
       else canvas.style.height = '100vh';
+      canvas.width = video.videoWidth;
+      canvas.height = video.videoHeight;
       ui.menuWidth.input.setAttribute('value', video.videoWidth);
       ui.menuHeight.input.setAttribute('value', video.videoHeight);
       if (live) video.play();
@@ -400,6 +406,16 @@ function webWorker(input, image, canvas, timestamp) {
       }
       if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none';
       lastDetectedResult = msg.data.result;
+
+      if (msg.data.image) {
+        lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas');
+        lastDetectedResult.canvas.width = msg.data.width;
+        lastDetectedResult.canvas.height = msg.data.height;
+        const ctx = lastDetectedResult.canvas.getContext('2d');
+        const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height);
+        ctx.putImageData(imageData, 0, 0);
+      }
+
       ui.framesDetect++;
       if (!ui.drawThread) drawResults(input);
       // eslint-disable-next-line no-use-before-define
diff --git a/src/config.ts b/src/config.ts
index c893ab0b..986c63d9 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -196,6 +196,15 @@ export interface Config {
     maxDetected: number,
     skipFrames: number,
   },
+
+  /** Controlls and configures all body segmentation module
+   * - enabled: true/false
+   * - modelPath: object detection model, can be absolute path or relative to modelBasePath
+  */
+  segmentation: {
+    enabled: boolean,
+    modelPath: string,
+  },
 }
 
 const config: Config = {
@@ -338,5 +347,11 @@ const config: Config = {
     skipFrames: 19,          // how many max frames to go without re-running the detector
                              // only used when cacheSensitivity is not zero
   },
+
+  segmentation: {
+    enabled: false,
+    modelPath: 'selfie.json',  // experimental: object detection model, can be absolute path or relative to modelBasePath
+                             // can be 'selfie' or 'meet'
+  },
 };
 export { config as defaults };
diff --git a/src/human.ts b/src/human.ts
index 475fc5ab..3c2533b5 100644
--- a/src/human.ts
+++ b/src/human.ts
@@ -24,6 +24,7 @@ import * as image from './image/image';
 import * as draw from './draw/draw';
 import * as persons from './persons';
 import * as interpolate from './interpolate';
+import * as segmentation from './segmentation/segmentation';
 import * as sample from './sample';
 import * as app from '../package.json';
 import { Tensor } from './tfjs/types';
@@ -114,16 +115,7 @@ export class Human {
     nanodet: Model | null,
     centernet: Model | null,
     faceres: Model | null,
-  };
-  /** @internal: Currently loaded classes */
-  classes: {
-    facemesh: typeof facemesh;
-    emotion: typeof emotion;
-    body: typeof posenet | typeof blazepose | typeof movenet;
-    hand: typeof handpose;
-    nanodet: typeof nanodet;
-    centernet: typeof centernet;
-    faceres: typeof faceres;
+    segmentation: Model | null,
   };
   /** Reference face triangualtion array of 468 points, used for triangle references between points */
   faceTriangulation: typeof facemesh.triangulation;
@@ -173,20 +165,12 @@ export class Human {
       nanodet: null,
       centernet: null,
       faceres: null,
+      segmentation: null,
     };
     // export access to image processing
     // @ts-ignore eslint-typescript cannot correctly infer type in anonymous function
     this.image = (input: Input) => image.process(input, this.config);
     // export raw access to underlying models
-    this.classes = {
-      facemesh,
-      emotion,
-      faceres,
-      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
-      hand: handpose,
-      nanodet,
-      centernet,
-    };
     this.faceTriangulation = facemesh.triangulation;
     this.faceUVMap = facemesh.uvmap;
     // include platform info
@@ -274,8 +258,10 @@ export class Human {
     }
     if (this.config.async) { // load models concurrently
       [
+        // @ts-ignore async model loading is not correctly inferred
         this.models.face,
         this.models.emotion,
+        // @ts-ignore async model loading is not correctly inferred
         this.models.handpose,
         this.models.posenet,
         this.models.blazepose,
@@ -284,6 +270,7 @@ export class Human {
         this.models.nanodet,
         this.models.centernet,
         this.models.faceres,
+        this.models.segmentation,
       ] = await Promise.all([
         this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null),
         this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null),
@@ -295,6 +282,7 @@ export class Human {
         this.models.nanodet || (this.config.object.enabled && this.config.object.modelPath.includes('nanodet') ? nanodet.load(this.config) : null),
         this.models.centernet || (this.config.object.enabled && this.config.object.modelPath.includes('centernet') ? centernet.load(this.config) : null),
         this.models.faceres || ((this.config.face.enabled && this.config.face.description.enabled) ? faceres.load(this.config) : null),
+        this.models.segmentation || (this.config.segmentation.enabled ? segmentation.load(this.config) : null),
       ]);
     } else { // load models sequentially
       if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
@@ -307,6 +295,7 @@ export class Human {
       if (this.config.object.enabled && !this.models.nanodet && this.config.object.modelPath.includes('nanodet')) this.models.nanodet = await nanodet.load(this.config);
       if (this.config.object.enabled && !this.models.centernet && this.config.object.modelPath.includes('centernet')) this.models.centernet = await centernet.load(this.config);
       if (this.config.face.enabled && this.config.face.description.enabled && !this.models.faceres) this.models.faceres = await faceres.load(this.config);
+      if (this.config.segmentation.enabled && !this.models.segmentation) this.models.segmentation = await segmentation.load(this.config);
     }
 
     if (this.#firstRun) { // print memory stats on first run
@@ -568,6 +557,17 @@ export class Human {
         else if (this.performance.gesture) delete this.performance.gesture;
       }
 
+      // run segmentation
+      if (this.config.segmentation.enabled) {
+        this.analyze('Start Segmentation:');
+        this.state = 'run:segmentation';
+        timeStamp = now();
+        await segmentation.predict(process, this.config);
+        elapsedTime = Math.trunc(now() - timeStamp);
+        if (elapsedTime > 0) this.performance.segmentation = elapsedTime;
+        this.analyze('End Segmentation:');
+      }
+
       this.performance.total = Math.trunc(now() - timeStart);
       this.state = 'idle';
       this.result = {
diff --git a/src/image/image.ts b/src/image/image.ts
index 2b446511..653b6b62 100644
--- a/src/image/image.ts
+++ b/src/image/image.ts
@@ -138,7 +138,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
       const shape = [outCanvas.height, outCanvas.width, 3];
       pixels = tf.tensor3d(outCanvas.data, shape, 'int32');
     } else if (outCanvas instanceof ImageData) { // if input is imagedata, just use it
-      pixels = tf.browser.fromPixels(outCanvas);
+      pixels = tf.browser ? tf.browser.fromPixels(outCanvas) : null;
     } else if (config.backend === 'webgl' || config.backend === 'humangl') { // tf kernel-optimized method to get imagedata
       // we can use canvas as-is as it already has a context, so we do a silly one more canvas
       const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
@@ -146,7 +146,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
       tempCanvas.height = targetHeight;
       const tempCtx = tempCanvas.getContext('2d');
       tempCtx?.drawImage(outCanvas, 0, 0);
-      pixels = tf.browser.fromPixels(tempCanvas);
+      pixels = tf.browser ? tf.browser.fromPixels(tempCanvas) : null;
     } else { // cpu and wasm kernel does not implement efficient fromPixels method
       // we can use canvas as-is as it already has a context, so we do a silly one more canvas
       const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas');
@@ -155,12 +155,14 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre
       const tempCtx = tempCanvas.getContext('2d');
       tempCtx?.drawImage(outCanvas, 0, 0);
       const data = tempCtx?.getImageData(0, 0, targetWidth, targetHeight);
-      pixels = tf.browser.fromPixels(data);
+      pixels = tf.browser ? tf.browser.fromPixels(data) : null;
+    }
+    if (pixels) {
+      const casted = pixels.toFloat();
+      tensor = casted.expandDims(0);
+      pixels.dispose();
+      casted.dispose();
     }
-    const casted = pixels.toFloat();
-    tensor = casted.expandDims(0);
-    pixels.dispose();
-    casted.dispose();
   }
   const canvas = config.filter.return ? outCanvas : null;
   return { tensor, canvas };
diff --git a/src/interpolate.ts b/src/interpolate.ts
index 0009e23a..7459192c 100644
--- a/src/interpolate.ts
+++ b/src/interpolate.ts
@@ -21,6 +21,8 @@ export function calc(newResult: Result): Result {
   // - at  1sec delay buffer = 1 which means live data is used
   const bufferedFactor = elapsed < 1000 ? 8 - Math.log(elapsed) : 1;
 
+  bufferedResult.canvas = newResult.canvas;
+
   // interpolate body results
   if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) {
     bufferedResult.body = JSON.parse(JSON.stringify(newResult.body as Body[])); // deep clone once
diff --git a/src/result.ts b/src/result.ts
index 203cdd13..d8933576 100644
--- a/src/result.ts
+++ b/src/result.ts
@@ -176,7 +176,7 @@ export interface Result {
   /** global performance object with timing values for each operation */
   performance: Record<string, unknown>,
   /** optional processed canvas that can be used to draw input on screen */
-  readonly canvas?: OffscreenCanvas | HTMLCanvasElement,
+  canvas?: OffscreenCanvas | HTMLCanvasElement,
   /** timestamp of detection representing the milliseconds elapsed since the UNIX epoch */
   readonly timestamp: number,
   /** getter property that returns unified persons object  */
diff --git a/wiki b/wiki
index 0087af56..8e898a63 160000
--- a/wiki
+++ b/wiki
@@ -1 +1 @@
-Subproject commit 0087af5684c5722b2cf7ffd3db57b8117b7ac8c5
+Subproject commit 8e898a636f5254a3fe451b097c633c9965a8a680