support segmentation for nodejs

2021-09-22 19:27:12 -04:00 · 2021-09-22 19:27:12 -04:00 · 88a9701d4a
parent a259b1f0c1
commit 88a9701d4a
10 changed files with 67 additions and 93 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,6 +11,7 @@
  
 ### **HEAD -> main** 2021/09/22 mandic00@live.com

+- redo segmentation and handtracking
 - prototype handtracking
 - automated browser tests
 - support for dynamic backend switching
--- a/TODO.md
+++ b/TODO.md
@ -13,11 +13,6 @@

 <br>

-### Segmentation
-
- Implement `NodeJS` support
- Test for leaks
-
 ### Backends

 - Optimize shader packing for WebGL backend:  
--- a/demo/index.js
+++ b/demo/index.js
@ -245,14 +245,14 @@ async function drawResults(input) {
  if (userConfig.segmentation.enabled && ui.buffered) { // refresh segmentation if using buffered output
    const seg = await human.segmentation(input, ui.background);
    if (seg.alpha) {
-      let c = document.getElementById('segmentation-mask');
-      let ctx = c.getContext('2d');
-      ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds
-      ctx.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height);
-      c = document.getElementById('segmentation-canvas');
-      ctx = c.getContext('2d');
-      ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds
-      ctx.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height);
+      const canvasSegMask = document.getElementById('segmentation-mask');
+      const ctxSegMask = canvasSegMask.getContext('2d');
+      ctxSegMask.clearRect(0, 0, canvasSegMask.width, canvasSegMask.height); // need to clear as seg.alpha is alpha based canvas so it adds
+      ctxSegMask.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegMask.width, canvasSegMask.height);
+      const canvasSegCanvas = document.getElementById('segmentation-canvas');
+      const ctxSegCanvas = canvasSegCanvas.getContext('2d');
+      ctxSegCanvas.clearRect(0, 0, canvasSegCanvas.width, canvasSegCanvas.height); // need to clear as seg.alpha is alpha based canvas so it adds
+      ctxSegCanvas.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegCanvas.width, canvasSegCanvas.height);
    }
    // result.canvas = seg.alpha;
  } else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas
--- a/demo/nodejs/node-canvas.js
+++ b/demo/nodejs/node-canvas.js
@ -21,11 +21,15 @@ const config = { // just enable all and leave default settings
 async function main() {
  log.header();

+  globalThis.Canvas = canvas.Canvas; // patch global namespace with canvas library
+  globalThis.ImageData = canvas.ImageData; // patch global namespace with canvas library
+  // human.env.Canvas = canvas.Canvas; // alternatively monkey-patch human to use external canvas library
+  // human.env.ImageData = canvas.ImageData; // alternatively monkey-patch human to use external canvas library
+
  // init
  const human = new Human.Human(config); // create instance of human
  log.info('Human:', human.version);
-  // @ts-ignore
-  human.env.Canvas = canvas.Canvas; // monkey-patch human to use external canvas library
+
  await human.load(); // pre-load models
  log.info('Loaded models:', Object.keys(human.models).filter((a) => human.models[a]));
  log.info('Memory state:', human.tf.engine().memory());
@ -46,6 +50,10 @@ async function main() {
    // run detection
    const result = await human.detect(inputCanvas);

+    // run segmentation
+    // const seg = await human.segmentation(inputCanvas);
+    // log.data('Segmentation:', { data: seg.data.length, alpha: typeof seg.alpha, canvas: typeof seg.canvas });
+
    // print results summary
    const persons = result.persons; // invoke persons getter, only used to print summary on console
    for (let i = 0; i < persons.length; i++) {
--- a/src/env.ts
+++ b/src/env.ts
@ -34,6 +34,7 @@ export type Env = {
  kernels: string[],
  Canvas: undefined,
  Image: undefined,
+  ImageData: undefined,
 }

 // eslint-disable-next-line import/no-mutable-exports
@ -69,6 +70,7 @@ export let env: Env = {
  kernels: [],
  Canvas: undefined,
  Image: undefined,
+  ImageData: undefined,
 };

 export async function cpuInfo() {
--- a/src/human.ts
+++ b/src/human.ts
@ -297,7 +297,7 @@ export class Human {
   * @param background?: {@link Input}
   * @returns { data, canvas, alpha }
   */
-  async segmentation(input: Input, background?: Input): Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> {
+  async segmentation(input: Input, background?: Input): Promise<{ data: number[], canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> {
    return segmentation.process(input, background, this.config);
  }

@ -441,20 +441,6 @@ export class Human {
      this.performance.image = Math.trunc(now() - timeStamp);
      this.analyze('Get Image:');

-      // segmentation is only run explicitly via human.segmentation() which calls segmentation.process()
-      /*
-      if (this.config.segmentation.enabled && process && img.tensor && img.canvas) {
-        this.analyze('Start Segmentation:');
-        this.state = 'detect:segmentation';
-        timeStamp = now();
-        const seg = await segmentation.predict(img, this.config);
-        img = { canvas: seg.canvas, tensor: seg.tensor };
-        elapsedTime = Math.trunc(now() - timeStamp);
-        if (elapsedTime > 0) this.performance.segmentation = elapsedTime;
-        this.analyze('End Segmentation:');
-      }
-      */
-
      if (!img.tensor) {
        if (this.config.debug) log('could not convert input to tensor');
        resolve({ error: 'could not convert input to tensor' });
--- a/src/image/image.ts
+++ b/src/image/image.ts
@ -30,7 +30,8 @@ export function canvas(width, height): HTMLCanvasElement | OffscreenCanvas {
    }
  } else {
    // @ts-ignore // env.canvas is an external monkey-patch
-    c = (typeof env.Canvas !== 'undefined') ? new env.Canvas(width, height) : null;
+    if (typeof env.Canvas !== 'undefined') c = new env.Canvas(width, height);
+    else if (typeof globalThis.Canvas !== 'undefined') c = new globalThis.Canvas(width, height);
  }
  // if (!c) throw new Error('cannot create canvas');
  return c;
@ -51,6 +52,7 @@ export function process(input: Input, config: Config): { tensor: Tensor | null,
    !(input instanceof tf.Tensor)
    && !(typeof Image !== 'undefined' && input instanceof Image)
    && !(typeof env.Canvas !== 'undefined' && input instanceof env.Canvas)
+    && !(typeof globalThis.Canvas !== 'undefined' && input instanceof globalThis.Canvas)
    && !(typeof ImageData !== 'undefined' && input instanceof ImageData)
    && !(typeof ImageBitmap !== 'undefined' && input instanceof ImageBitmap)
    && !(typeof HTMLImageElement !== 'undefined' && input instanceof HTMLImageElement)
--- a/src/segmentation/segmentation.ts
+++ b/src/segmentation/segmentation.ts
@ -23,61 +23,53 @@ export async function load(config: Config): Promise<GraphModel> {
  return model;
 }

-export async function predict(input: { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null }, config: Config)
-: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> {
-  const width = input.tensor?.shape[2] || 0;
-  const height = input.tensor?.shape[1] || 0;
-  if (!input.tensor || !model || !model.inputs[0].shape) return { data: null, canvas: null, alpha: null };
-  const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false);
-  const norm = tf.div(resizeInput, 255);
-  const res = model.predict(norm) as Tensor;
-  // meet output:   1,256,256,1
-  // selfie output: 1,144,256,2
-  tf.dispose(resizeInput);
-  tf.dispose(norm);
+export async function process(input: Input, background: Input | undefined, config: Config)
+: Promise<{ data: Array<number>, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> {
+  if (busy) return { data: [], canvas: null, alpha: null };
+  busy = true;
+  if (!model) await load(config);
+  const inputImage = image.process(input, config);
+  const width = inputImage.canvas?.width || 0;
+  const height = inputImage.canvas?.height || 0;
+  if (!inputImage.tensor) return { data: [], canvas: null, alpha: null };
+  const t: Record<string, Tensor> = {};

-  const squeeze = tf.squeeze(res, 0);
-  tf.dispose(res);
-  let dataT;
-  if (squeeze.shape[2] === 2) {
-    // model meet has two channels for fg and bg
-    const softmax = squeeze.softmax();
-    const [bg, fg] = tf.unstack(softmax, 2);
-    const expand = tf.expandDims(fg, 2);
-    const pad = tf.expandDims(expand, 0);
-    tf.dispose(softmax);
-    tf.dispose(bg);
-    tf.dispose(fg);
+  t.resize = tf.image.resizeBilinear(inputImage.tensor, [model.inputs[0].shape ? model.inputs[0].shape[1] : 0, model.inputs[0].shape ? model.inputs[0].shape[2] : 0], false);
+  tf.dispose(inputImage.tensor);
+  t.norm = tf.div(t.resize, 255);
+  t.res = model.predict(t.norm) as Tensor;
+
+  t.squeeze = tf.squeeze(t.res, 0); // meet.shape:[1,256,256,1], selfie.shape:[1,144,256,2]
+  if (t.squeeze.shape[2] === 2) {
+    t.softmax = tf.softmax(t.squeeze); // model meet has two channels for fg and bg
+    [t.bg, t.fg] = tf.unstack(t.softmax, 2);
+    t.expand = tf.expandDims(t.fg, 2);
+    t.pad = tf.expandDims(t.expand, 0);
+    t.crop = tf.image.cropAndResize(t.pad, [[0, 0, 0.5, 0.5]], [0], [width, height]);
    // running sofmax before unstack creates 2x2 matrix so we only take upper-left quadrant
-    const crop = tf.image.cropAndResize(pad, [[0, 0, 0.5, 0.5]], [0], [width, height]);
    // otherwise run softmax after unstack and use standard resize
    // resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]);
-    dataT = tf.squeeze(crop, 0);
-    tf.dispose(crop);
-    tf.dispose(expand);
-    tf.dispose(pad);
-  } else { // model selfie has a single channel that we can use directly
-    dataT = tf.image.resizeBilinear(squeeze, [height, width]);
+    t.data = tf.squeeze(t.crop, 0);
+  } else {
+    t.data = tf.image.resizeBilinear(t.squeeze, [height, width]); // model selfie has a single channel that we can use directly
  }
-  tf.dispose(squeeze);
-  const data = await dataT.dataSync();
+  const data = Array.from(await t.data.data());

-  if (env.node) {
-    tf.dispose(dataT);
+  if (env.node && !env.Canvas && (typeof ImageData === 'undefined')) {
+    if (config.debug) log('canvas support missing');
+    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
    return { data, canvas: null, alpha: null }; // running in nodejs so return alpha array as-is
  }

  const alphaCanvas = image.canvas(width, height);
-  await tf.browser.toPixels(dataT, alphaCanvas);
-  tf.dispose(dataT);
+  await tf.browser.toPixels(t.data, alphaCanvas);
  const alphaCtx = alphaCanvas.getContext('2d') as CanvasRenderingContext2D;
  if (config.segmentation.blur && config.segmentation.blur > 0) alphaCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead
  const alphaData = alphaCtx.getImageData(0, 0, width, height);

-  // original canvas where only alpha shows
  const compositeCanvas = image.canvas(width, height);
  const compositeCtx = compositeCanvas.getContext('2d') as CanvasRenderingContext2D;
-  if (input.canvas) compositeCtx.drawImage(input.canvas, 0, 0);
+  if (inputImage.canvas) compositeCtx.drawImage(inputImage.canvas, 0, 0);
  compositeCtx.globalCompositeOperation = 'darken'; // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation // best options are: darken, color-burn, multiply
  if (config.segmentation.blur && config.segmentation.blur > 0) compositeCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead
  compositeCtx.drawImage(alphaCanvas, 0, 0);
@ -87,31 +79,18 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC
  for (let i = 0; i < width * height; i++) compositeData.data[4 * i + 3] = alphaData.data[4 * i + 0]; // copy original alpha value to new composite canvas
  compositeCtx.putImageData(compositeData, 0, 0);

-  return { data, canvas: compositeCanvas, alpha: alphaCanvas };
-}
-
-export async function process(input: Input, background: Input | undefined, config: Config)
-: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> {
-  if (busy) return { data: null, canvas: null, alpha: null };
-  busy = true;
-  if (!model) await load(config);
-  const inputImage = image.process(input, config);
-  const segmentation = await predict(inputImage, config);
-  tf.dispose(inputImage.tensor);
  let mergedCanvas: HTMLCanvasElement | OffscreenCanvas | null = null;
-
-  if (background && segmentation.canvas) { // draw background with segmentation as overlay if background is present
-    mergedCanvas = image.canvas(inputImage.canvas?.width || 0, inputImage.canvas?.height || 0);
+  if (background && compositeCanvas) { // draw background with segmentation as overlay if background is present
+    mergedCanvas = image.canvas(width, height);
    const bgImage = image.process(background, config);
    tf.dispose(bgImage.tensor);
    const ctxMerge = mergedCanvas.getContext('2d') as CanvasRenderingContext2D;
-    // ctxMerge.globalCompositeOperation = 'source-over';
    ctxMerge.drawImage(bgImage.canvas as HTMLCanvasElement, 0, 0, mergedCanvas.width, mergedCanvas.height);
-    // ctxMerge.globalCompositeOperation = 'source-atop';
-    ctxMerge.drawImage(segmentation.canvas as HTMLCanvasElement, 0, 0);
-    // ctxMerge.globalCompositeOperation = 'source-over';
+    ctxMerge.drawImage(compositeCanvas, 0, 0);
  }

+  Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
  busy = false;
-  return { data: segmentation.data, canvas: mergedCanvas || segmentation.canvas, alpha: segmentation.alpha };
+
+  return { data, canvas: mergedCanvas || compositeCanvas, alpha: alphaCanvas };
 }
--- a/test/test-main.js
+++ b/test/test-main.js
@ -327,7 +327,8 @@ async function test(Human, inputConfig) {
  ]);

  // test monkey-patch
-  human.env.Canvas = canvasJS.Canvas; // monkey-patch human to use external canvas library
+  globalThis.Canvas = canvasJS.Canvas; // monkey-patch to use external canvas library
+  globalThis.ImageData = canvasJS.ImageData; // monkey-patch to use external canvas library
  const inputImage = await canvasJS.loadImage('samples/ai-face.jpg'); // load image using canvas library
  const inputCanvas = new canvasJS.Canvas(inputImage.width, inputImage.height); // create canvas
  const ctx = inputCanvas.getContext('2d');
@ -338,7 +339,7 @@ async function test(Human, inputConfig) {

  // test segmentation
  res = await human.segmentation(inputCanvas, inputCanvas);
-  if (!res || !res.data) log('error', 'failed: segmentation', res);
+  if (!res || !res.data || !res.canvas) log('error', 'failed: segmentation');
  else log('state', 'passed: segmentation', [res.data.length]);
  human.env.Canvas = undefined;

--- a/test/test-node-wasm.js
+++ b/test/test-node-wasm.js
@ -5,9 +5,9 @@ const Human = require('../dist/human.node-wasm.js');
 const test = require('./test-main.js').test;

 // @ts-ignore
-Human.env.Canvas = Canvas;
+Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.browser namespace
 // @ts-ignore
-Human.env.Image = Image;
+Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace

 const config = {
  // modelBasePath: 'http://localhost:10030/models/',