diff --git a/CHANGELOG.md b/CHANGELOG.md index c6d8de2f..870fde98 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### **HEAD -> main** 2021/09/22 mandic00@live.com +- redo segmentation and handtracking - prototype handtracking - automated browser tests - support for dynamic backend switching diff --git a/TODO.md b/TODO.md index 057ff3ae..2f65c791 100644 --- a/TODO.md +++ b/TODO.md @@ -13,11 +13,6 @@
-### Segmentation - -- Implement `NodeJS` support -- Test for leaks - ### Backends - Optimize shader packing for WebGL backend: diff --git a/demo/index.js b/demo/index.js index fa56ba26..3a8be7f0 100644 --- a/demo/index.js +++ b/demo/index.js @@ -245,14 +245,14 @@ async function drawResults(input) { if (userConfig.segmentation.enabled && ui.buffered) { // refresh segmentation if using buffered output const seg = await human.segmentation(input, ui.background); if (seg.alpha) { - let c = document.getElementById('segmentation-mask'); - let ctx = c.getContext('2d'); - ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds - ctx.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height); - c = document.getElementById('segmentation-canvas'); - ctx = c.getContext('2d'); - ctx.clearRect(0, 0, c.width, c.height); // need to clear as seg.alpha is alpha based canvas so it adds - ctx.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, c.width, c.height); + const canvasSegMask = document.getElementById('segmentation-mask'); + const ctxSegMask = canvasSegMask.getContext('2d'); + ctxSegMask.clearRect(0, 0, canvasSegMask.width, canvasSegMask.height); // need to clear as seg.alpha is alpha based canvas so it adds + ctxSegMask.drawImage(seg.alpha, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegMask.width, canvasSegMask.height); + const canvasSegCanvas = document.getElementById('segmentation-canvas'); + const ctxSegCanvas = canvasSegCanvas.getContext('2d'); + ctxSegCanvas.clearRect(0, 0, canvasSegCanvas.width, canvasSegCanvas.height); // need to clear as seg.alpha is alpha based canvas so it adds + ctxSegCanvas.drawImage(seg.canvas, 0, 0, seg.alpha.width, seg.alpha.height, 0, 0, canvasSegCanvas.width, canvasSegCanvas.height); } // result.canvas = seg.alpha; } else if (!result.canvas || ui.buffered) { // refresh with input if using buffered output or if missing canvas diff --git a/demo/nodejs/node-canvas.js b/demo/nodejs/node-canvas.js index 72469477..3fa83e04 100644 --- a/demo/nodejs/node-canvas.js +++ b/demo/nodejs/node-canvas.js @@ -21,11 +21,15 @@ const config = { // just enable all and leave default settings async function main() { log.header(); + globalThis.Canvas = canvas.Canvas; // patch global namespace with canvas library + globalThis.ImageData = canvas.ImageData; // patch global namespace with canvas library + // human.env.Canvas = canvas.Canvas; // alternatively monkey-patch human to use external canvas library + // human.env.ImageData = canvas.ImageData; // alternatively monkey-patch human to use external canvas library + // init const human = new Human.Human(config); // create instance of human log.info('Human:', human.version); - // @ts-ignore - human.env.Canvas = canvas.Canvas; // monkey-patch human to use external canvas library + await human.load(); // pre-load models log.info('Loaded models:', Object.keys(human.models).filter((a) => human.models[a])); log.info('Memory state:', human.tf.engine().memory()); @@ -46,6 +50,10 @@ async function main() { // run detection const result = await human.detect(inputCanvas); + // run segmentation + // const seg = await human.segmentation(inputCanvas); + // log.data('Segmentation:', { data: seg.data.length, alpha: typeof seg.alpha, canvas: typeof seg.canvas }); + // print results summary const persons = result.persons; // invoke persons getter, only used to print summary on console for (let i = 0; i < persons.length; i++) { diff --git a/src/env.ts b/src/env.ts index 3b387caa..cbb0f5f4 100644 --- a/src/env.ts +++ b/src/env.ts @@ -34,6 +34,7 @@ export type Env = { kernels: string[], Canvas: undefined, Image: undefined, + ImageData: undefined, } // eslint-disable-next-line import/no-mutable-exports @@ -69,6 +70,7 @@ export let env: Env = { kernels: [], Canvas: undefined, Image: undefined, + ImageData: undefined, }; export async function cpuInfo() { diff --git a/src/human.ts b/src/human.ts index 4935c234..8718444d 100644 --- a/src/human.ts +++ b/src/human.ts @@ -297,7 +297,7 @@ export class Human { * @param background?: {@link Input} * @returns { data, canvas, alpha } */ - async segmentation(input: Input, background?: Input): Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { + async segmentation(input: Input, background?: Input): Promise<{ data: number[], canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { return segmentation.process(input, background, this.config); } @@ -441,20 +441,6 @@ export class Human { this.performance.image = Math.trunc(now() - timeStamp); this.analyze('Get Image:'); - // segmentation is only run explicitly via human.segmentation() which calls segmentation.process() - /* - if (this.config.segmentation.enabled && process && img.tensor && img.canvas) { - this.analyze('Start Segmentation:'); - this.state = 'detect:segmentation'; - timeStamp = now(); - const seg = await segmentation.predict(img, this.config); - img = { canvas: seg.canvas, tensor: seg.tensor }; - elapsedTime = Math.trunc(now() - timeStamp); - if (elapsedTime > 0) this.performance.segmentation = elapsedTime; - this.analyze('End Segmentation:'); - } - */ - if (!img.tensor) { if (this.config.debug) log('could not convert input to tensor'); resolve({ error: 'could not convert input to tensor' }); diff --git a/src/image/image.ts b/src/image/image.ts index 8478e192..40c2b123 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -30,7 +30,8 @@ export function canvas(width, height): HTMLCanvasElement | OffscreenCanvas { } } else { // @ts-ignore // env.canvas is an external monkey-patch - c = (typeof env.Canvas !== 'undefined') ? new env.Canvas(width, height) : null; + if (typeof env.Canvas !== 'undefined') c = new env.Canvas(width, height); + else if (typeof globalThis.Canvas !== 'undefined') c = new globalThis.Canvas(width, height); } // if (!c) throw new Error('cannot create canvas'); return c; @@ -51,6 +52,7 @@ export function process(input: Input, config: Config): { tensor: Tensor | null, !(input instanceof tf.Tensor) && !(typeof Image !== 'undefined' && input instanceof Image) && !(typeof env.Canvas !== 'undefined' && input instanceof env.Canvas) + && !(typeof globalThis.Canvas !== 'undefined' && input instanceof globalThis.Canvas) && !(typeof ImageData !== 'undefined' && input instanceof ImageData) && !(typeof ImageBitmap !== 'undefined' && input instanceof ImageBitmap) && !(typeof HTMLImageElement !== 'undefined' && input instanceof HTMLImageElement) diff --git a/src/segmentation/segmentation.ts b/src/segmentation/segmentation.ts index b0bdd5ae..3adf9487 100644 --- a/src/segmentation/segmentation.ts +++ b/src/segmentation/segmentation.ts @@ -23,61 +23,53 @@ export async function load(config: Config): Promise { return model; } -export async function predict(input: { tensor: Tensor | null, canvas: OffscreenCanvas | HTMLCanvasElement | null }, config: Config) -: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { - const width = input.tensor?.shape[2] || 0; - const height = input.tensor?.shape[1] || 0; - if (!input.tensor || !model || !model.inputs[0].shape) return { data: null, canvas: null, alpha: null }; - const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false); - const norm = tf.div(resizeInput, 255); - const res = model.predict(norm) as Tensor; - // meet output: 1,256,256,1 - // selfie output: 1,144,256,2 - tf.dispose(resizeInput); - tf.dispose(norm); +export async function process(input: Input, background: Input | undefined, config: Config) +: Promise<{ data: Array, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { + if (busy) return { data: [], canvas: null, alpha: null }; + busy = true; + if (!model) await load(config); + const inputImage = image.process(input, config); + const width = inputImage.canvas?.width || 0; + const height = inputImage.canvas?.height || 0; + if (!inputImage.tensor) return { data: [], canvas: null, alpha: null }; + const t: Record = {}; - const squeeze = tf.squeeze(res, 0); - tf.dispose(res); - let dataT; - if (squeeze.shape[2] === 2) { - // model meet has two channels for fg and bg - const softmax = squeeze.softmax(); - const [bg, fg] = tf.unstack(softmax, 2); - const expand = tf.expandDims(fg, 2); - const pad = tf.expandDims(expand, 0); - tf.dispose(softmax); - tf.dispose(bg); - tf.dispose(fg); + t.resize = tf.image.resizeBilinear(inputImage.tensor, [model.inputs[0].shape ? model.inputs[0].shape[1] : 0, model.inputs[0].shape ? model.inputs[0].shape[2] : 0], false); + tf.dispose(inputImage.tensor); + t.norm = tf.div(t.resize, 255); + t.res = model.predict(t.norm) as Tensor; + + t.squeeze = tf.squeeze(t.res, 0); // meet.shape:[1,256,256,1], selfie.shape:[1,144,256,2] + if (t.squeeze.shape[2] === 2) { + t.softmax = tf.softmax(t.squeeze); // model meet has two channels for fg and bg + [t.bg, t.fg] = tf.unstack(t.softmax, 2); + t.expand = tf.expandDims(t.fg, 2); + t.pad = tf.expandDims(t.expand, 0); + t.crop = tf.image.cropAndResize(t.pad, [[0, 0, 0.5, 0.5]], [0], [width, height]); // running sofmax before unstack creates 2x2 matrix so we only take upper-left quadrant - const crop = tf.image.cropAndResize(pad, [[0, 0, 0.5, 0.5]], [0], [width, height]); // otherwise run softmax after unstack and use standard resize // resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]); - dataT = tf.squeeze(crop, 0); - tf.dispose(crop); - tf.dispose(expand); - tf.dispose(pad); - } else { // model selfie has a single channel that we can use directly - dataT = tf.image.resizeBilinear(squeeze, [height, width]); + t.data = tf.squeeze(t.crop, 0); + } else { + t.data = tf.image.resizeBilinear(t.squeeze, [height, width]); // model selfie has a single channel that we can use directly } - tf.dispose(squeeze); - const data = await dataT.dataSync(); + const data = Array.from(await t.data.data()); - if (env.node) { - tf.dispose(dataT); + if (env.node && !env.Canvas && (typeof ImageData === 'undefined')) { + if (config.debug) log('canvas support missing'); + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); return { data, canvas: null, alpha: null }; // running in nodejs so return alpha array as-is } const alphaCanvas = image.canvas(width, height); - await tf.browser.toPixels(dataT, alphaCanvas); - tf.dispose(dataT); + await tf.browser.toPixels(t.data, alphaCanvas); const alphaCtx = alphaCanvas.getContext('2d') as CanvasRenderingContext2D; if (config.segmentation.blur && config.segmentation.blur > 0) alphaCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead const alphaData = alphaCtx.getImageData(0, 0, width, height); - // original canvas where only alpha shows const compositeCanvas = image.canvas(width, height); const compositeCtx = compositeCanvas.getContext('2d') as CanvasRenderingContext2D; - if (input.canvas) compositeCtx.drawImage(input.canvas, 0, 0); + if (inputImage.canvas) compositeCtx.drawImage(inputImage.canvas, 0, 0); compositeCtx.globalCompositeOperation = 'darken'; // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation // best options are: darken, color-burn, multiply if (config.segmentation.blur && config.segmentation.blur > 0) compositeCtx.filter = `blur(${config.segmentation.blur}px)`; // use css filter for bluring, can be done with gaussian blur manually instead compositeCtx.drawImage(alphaCanvas, 0, 0); @@ -87,31 +79,18 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC for (let i = 0; i < width * height; i++) compositeData.data[4 * i + 3] = alphaData.data[4 * i + 0]; // copy original alpha value to new composite canvas compositeCtx.putImageData(compositeData, 0, 0); - return { data, canvas: compositeCanvas, alpha: alphaCanvas }; -} - -export async function process(input: Input, background: Input | undefined, config: Config) -: Promise<{ data: Uint8ClampedArray | null, canvas: HTMLCanvasElement | OffscreenCanvas | null, alpha: HTMLCanvasElement | OffscreenCanvas | null }> { - if (busy) return { data: null, canvas: null, alpha: null }; - busy = true; - if (!model) await load(config); - const inputImage = image.process(input, config); - const segmentation = await predict(inputImage, config); - tf.dispose(inputImage.tensor); let mergedCanvas: HTMLCanvasElement | OffscreenCanvas | null = null; - - if (background && segmentation.canvas) { // draw background with segmentation as overlay if background is present - mergedCanvas = image.canvas(inputImage.canvas?.width || 0, inputImage.canvas?.height || 0); + if (background && compositeCanvas) { // draw background with segmentation as overlay if background is present + mergedCanvas = image.canvas(width, height); const bgImage = image.process(background, config); tf.dispose(bgImage.tensor); const ctxMerge = mergedCanvas.getContext('2d') as CanvasRenderingContext2D; - // ctxMerge.globalCompositeOperation = 'source-over'; ctxMerge.drawImage(bgImage.canvas as HTMLCanvasElement, 0, 0, mergedCanvas.width, mergedCanvas.height); - // ctxMerge.globalCompositeOperation = 'source-atop'; - ctxMerge.drawImage(segmentation.canvas as HTMLCanvasElement, 0, 0); - // ctxMerge.globalCompositeOperation = 'source-over'; + ctxMerge.drawImage(compositeCanvas, 0, 0); } + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); busy = false; - return { data: segmentation.data, canvas: mergedCanvas || segmentation.canvas, alpha: segmentation.alpha }; + + return { data, canvas: mergedCanvas || compositeCanvas, alpha: alphaCanvas }; } diff --git a/test/test-main.js b/test/test-main.js index 79b43fa2..0792e592 100644 --- a/test/test-main.js +++ b/test/test-main.js @@ -327,7 +327,8 @@ async function test(Human, inputConfig) { ]); // test monkey-patch - human.env.Canvas = canvasJS.Canvas; // monkey-patch human to use external canvas library + globalThis.Canvas = canvasJS.Canvas; // monkey-patch to use external canvas library + globalThis.ImageData = canvasJS.ImageData; // monkey-patch to use external canvas library const inputImage = await canvasJS.loadImage('samples/ai-face.jpg'); // load image using canvas library const inputCanvas = new canvasJS.Canvas(inputImage.width, inputImage.height); // create canvas const ctx = inputCanvas.getContext('2d'); @@ -338,7 +339,7 @@ async function test(Human, inputConfig) { // test segmentation res = await human.segmentation(inputCanvas, inputCanvas); - if (!res || !res.data) log('error', 'failed: segmentation', res); + if (!res || !res.data || !res.canvas) log('error', 'failed: segmentation'); else log('state', 'passed: segmentation', [res.data.length]); human.env.Canvas = undefined; diff --git a/test/test-node-wasm.js b/test/test-node-wasm.js index 06e92afb..8969ce25 100644 --- a/test/test-node-wasm.js +++ b/test/test-node-wasm.js @@ -5,9 +5,9 @@ const Human = require('../dist/human.node-wasm.js'); const test = require('./test-main.js').test; // @ts-ignore -Human.env.Canvas = Canvas; +Human.env.Canvas = Canvas; // requires monkey-patch as wasm does not have tf.browser namespace // @ts-ignore -Human.env.Image = Image; +Human.env.Image = Image; // requires monkey-patch as wasm does not have tf.browser namespace const config = { // modelBasePath: 'http://localhost:10030/models/',