work on body segmentation

2021-06-04 20:22:05 -04:00 · 2021-06-04 20:22:05 -04:00 · f0f7e00969
parent 3c43aa57db
commit f0f7e00969
7 changed files with 48 additions and 25 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,9 +11,7 @@ Repository: **<git+https://github.com/vladmandic/human.git>**

 ### **HEAD -> main** 2021/06/04 mandic00@live.com

-
-### **update for tfjs 3.7.0** 2021/06/04 mandic00@live.com
-
+- add meet and selfie models
 - add live hints to demo
 - switch worker from module to iife importscripts
 - release candidate
--- a/TODO.md
+++ b/TODO.md
@ -11,7 +11,7 @@ N/A
 ## In Progress

 - Switch to TypeScript 4.3
- Implement segmentation model
+- Add backgrounds to segmentation

 ## Known Issues

--- a/demo/index.js
+++ b/demo/index.js
@ -50,9 +50,7 @@ const userConfig = {
  hand: { enabled: false },
  body: { enabled: false },
  // body: { enabled: true, modelPath: 'posenet.json' },
-  // body: { enabled: true, modelPath: 'blazepose.json' },
-  // segmentation: { enabled: true, modelPath: 'meet.json' },
-  // segmentation: { enabled: true, modelPath: 'selfie.json' },
+  segmentation: { enabled: true },
  */
 };

@ -211,8 +209,8 @@ async function drawResults(input) {
  // draw fps chart
  await menu.process.updateChart('FPS', ui.detectFPS);

-  // get updated canvas
-  if (ui.buffered || !result.canvas) {
+  // get updated canvas if missing or if we want buffering, but skip if segmentation is enabled
+  if (!result.canvas || (ui.buffered && !human.config.segmentation.enabled)) {
    const image = await human.image(input);
    result.canvas = image.canvas;
    human.tf.dispose(image.tensor);
@ -489,6 +487,7 @@ async function processImage(input, title) {
    image.onload = async () => {
      if (ui.hintsThread) clearInterval(ui.hintsThread);
      ui.interpolated = false; // stop interpolating results if input is image
+      ui.buffered = false; // stop buffering result if input is image
      status(`processing image: ${title}`);
      const canvas = document.getElementById('canvas');
      image.width = image.naturalWidth;
@ -676,6 +675,8 @@ function setupMenu() {
  menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
  menu.models.addBool('gestures', human.config.gesture, 'enabled', (val) => human.config.gesture.enabled = val);
  menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
+  menu.models.addBool('body segmentation', human.config.segmentation, 'enabled', (val) => human.config.segmentation.enabled = val);
+  menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
  menu.models.addBool('object detection', human.config.object, 'enabled', (val) => human.config.object.enabled = val);
  menu.models.addHTML('<hr style="border-style: inset; border-color: dimgray">');
  menu.models.addBool('face compare', compare, 'enabled', (val) => {
--- a/src/config.ts
+++ b/src/config.ts
@ -198,6 +198,8 @@ export interface Config {
  },

  /** Controlls and configures all body segmentation module
+   * if segmentation is enabled, output result.canvas will be augmented with masked image containing only person output
+   *
   * - enabled: true/false
   * - modelPath: object detection model, can be absolute path or relative to modelBasePath
  */
@ -349,7 +351,8 @@ const config: Config = {
  },

  segmentation: {
-    enabled: false,
+    enabled: false,          // if segmentation is enabled, output result.canvas will be augmented
+                             // with masked image containing only person output
    modelPath: 'selfie.json',  // experimental: object detection model, can be absolute path or relative to modelBasePath
                             // can be 'selfie' or 'meet'
  },
--- a/src/helpers.ts
+++ b/src/helpers.ts
@ -39,3 +39,10 @@ export function mergeDeep(...objects) {
    return prev;
  }, {});
 }
+
+// helper function: return min and max from input array
+export const minmax = (data) => data.reduce((acc, val) => {
+  acc[0] = (acc[0] === undefined || val < acc[0]) ? val : acc[0];
+  acc[1] = (acc[1] === undefined || val > acc[1]) ? val : acc[1];
+  return acc;
+}, []);
--- a/src/segmentation/segmentation.ts
+++ b/src/segmentation/segmentation.ts
@ -20,7 +20,7 @@ export async function load(config: Config): Promise<GraphModel> {
    if (!model || !model['modelUrl']) log('load model failed:', config.segmentation.modelPath);
    else if (config.debug) log('load model:', model['modelUrl']);
  } else if (config.debug) log('cached model:', model['modelUrl']);
-  // if (!blurKernel) blurKernel = blur.getGaussianKernel(50, 1, 1);
+  // if (!blurKernel) blurKernel = blur.getGaussianKernel(5, 1, 1);
  return model;
 }

@ -30,6 +30,8 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC
  const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false);
  const norm = resizeInput.div(255);
  const res = model.predict(norm) as Tensor;
+  // meet output:   1,256,256,1
+  // selfie output: 1,144,256,2
  tf.dispose(resizeInput);
  tf.dispose(norm);

@ -39,16 +41,24 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC

  const squeeze = tf.squeeze(res, 0);
  let resizeOutput;
-  if (squeeze.shape[2] === 2) { // model meet has two channels for fg and bg
+  if (squeeze.shape[2] === 2) {
+    // model meet has two channels for fg and bg
    const softmax = squeeze.softmax();
    const [bg, fg] = tf.unstack(softmax, 2);
-    tf.dispose(softmax);
    const expand = fg.expandDims(2);
+    const pad = expand.expandDims(0);
+    tf.dispose(softmax);
    tf.dispose(bg);
    tf.dispose(fg);
-    resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]);
+    // running sofmax before unstack creates 2x2 matrix so we only take upper-left quadrant
+    const crop = tf.image.cropAndResize(pad, [[0, 0, 0.5, 0.5]], [0], [input.tensor?.shape[1], input.tensor?.shape[2]]);
+    // otherwise run softmax after unstack and use standard resize
+    // resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]);
+    resizeOutput = crop.squeeze(0);
+    tf.dispose(crop);
    tf.dispose(expand);
-  } else { // model selfie has a single channel
+    tf.dispose(pad);
+  } else { // model selfie has a single channel that we can use directly
    resizeOutput = tf.image.resizeBilinear(squeeze, [input.tensor?.shape[1], input.tensor?.shape[2]]);
  }

@ -59,17 +69,21 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC
  tf.dispose(squeeze);
  tf.dispose(res);

-  const ctx = input.canvas.getContext('2d') as CanvasRenderingContext2D;
+  const original = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(input.canvas.width, input.canvas.height) : document.createElement('canvas'); // need one more copy since input may already have gl context so 2d context fails
+  original.width = input.canvas.width;
+  original.height = input.canvas.height;
+  const ctx = original.getContext('2d') as CanvasRenderingContext2D;
+
+  await ctx.drawImage(input.canvas, 0, 0);
  // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation
  // best options are: darken, color-burn, multiply
  ctx.globalCompositeOperation = 'darken';
-  await ctx?.drawImage(overlay, 0, 0);
-  ctx.globalCompositeOperation = 'source-in';
+  ctx.filter = 'blur(8px)'; // use css filter for bluring, can be done with gaussian blur manually instead
+  await ctx.drawImage(overlay, 0, 0);
+  ctx.globalCompositeOperation = 'source-in'; // reset
+  ctx.filter = 'none'; // reset
+
+  input.canvas = original;
+
  return true;
 }
-
-/* Segmentation todo:
- Smoothen
- Get latest canvas in interpolate
- Buffered fetches latest from video instead from interpolate
-*/
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 8e898a636f5254a3fe451b097c633c9965a8a680
+Subproject commit a69870f5763ae3fddd1243df10559aaf32c8f0da