diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c98289e..645160fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,7 @@ Repository: **** ### **HEAD -> main** 2021/06/04 mandic00@live.com - -### **update for tfjs 3.7.0** 2021/06/04 mandic00@live.com - +- add meet and selfie models - add live hints to demo - switch worker from module to iife importscripts - release candidate diff --git a/TODO.md b/TODO.md index 5c1826b6..ca121d31 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,7 @@ N/A ## In Progress - Switch to TypeScript 4.3 -- Implement segmentation model +- Add backgrounds to segmentation ## Known Issues diff --git a/demo/index.js b/demo/index.js index 0f3e8b7a..48f5cf62 100644 --- a/demo/index.js +++ b/demo/index.js @@ -50,9 +50,7 @@ const userConfig = { hand: { enabled: false }, body: { enabled: false }, // body: { enabled: true, modelPath: 'posenet.json' }, - // body: { enabled: true, modelPath: 'blazepose.json' }, - // segmentation: { enabled: true, modelPath: 'meet.json' }, - // segmentation: { enabled: true, modelPath: 'selfie.json' }, + segmentation: { enabled: true }, */ }; @@ -211,8 +209,8 @@ async function drawResults(input) { // draw fps chart await menu.process.updateChart('FPS', ui.detectFPS); - // get updated canvas - if (ui.buffered || !result.canvas) { + // get updated canvas if missing or if we want buffering, but skip if segmentation is enabled + if (!result.canvas || (ui.buffered && !human.config.segmentation.enabled)) { const image = await human.image(input); result.canvas = image.canvas; human.tf.dispose(image.tensor); @@ -489,6 +487,7 @@ async function processImage(input, title) { image.onload = async () => { if (ui.hintsThread) clearInterval(ui.hintsThread); ui.interpolated = false; // stop interpolating results if input is image + ui.buffered = false; // stop buffering result if input is image status(`processing image: ${title}`); const canvas = document.getElementById('canvas'); image.width = image.naturalWidth; @@ -676,6 +675,8 @@ function setupMenu() { menu.models.addHTML('
'); menu.models.addBool('gestures', human.config.gesture, 'enabled', (val) => human.config.gesture.enabled = val); menu.models.addHTML('
'); + menu.models.addBool('body segmentation', human.config.segmentation, 'enabled', (val) => human.config.segmentation.enabled = val); + menu.models.addHTML('
'); menu.models.addBool('object detection', human.config.object, 'enabled', (val) => human.config.object.enabled = val); menu.models.addHTML('
'); menu.models.addBool('face compare', compare, 'enabled', (val) => { diff --git a/src/config.ts b/src/config.ts index 986c63d9..d5a613ec 100644 --- a/src/config.ts +++ b/src/config.ts @@ -198,6 +198,8 @@ export interface Config { }, /** Controlls and configures all body segmentation module + * if segmentation is enabled, output result.canvas will be augmented with masked image containing only person output + * * - enabled: true/false * - modelPath: object detection model, can be absolute path or relative to modelBasePath */ @@ -349,7 +351,8 @@ const config: Config = { }, segmentation: { - enabled: false, + enabled: false, // if segmentation is enabled, output result.canvas will be augmented + // with masked image containing only person output modelPath: 'selfie.json', // experimental: object detection model, can be absolute path or relative to modelBasePath // can be 'selfie' or 'meet' }, diff --git a/src/helpers.ts b/src/helpers.ts index 663298cc..4d578cad 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -39,3 +39,10 @@ export function mergeDeep(...objects) { return prev; }, {}); } + +// helper function: return min and max from input array +export const minmax = (data) => data.reduce((acc, val) => { + acc[0] = (acc[0] === undefined || val < acc[0]) ? val : acc[0]; + acc[1] = (acc[1] === undefined || val > acc[1]) ? val : acc[1]; + return acc; +}, []); diff --git a/src/segmentation/segmentation.ts b/src/segmentation/segmentation.ts index cc84881a..126c07a0 100644 --- a/src/segmentation/segmentation.ts +++ b/src/segmentation/segmentation.ts @@ -20,7 +20,7 @@ export async function load(config: Config): Promise { if (!model || !model['modelUrl']) log('load model failed:', config.segmentation.modelPath); else if (config.debug) log('load model:', model['modelUrl']); } else if (config.debug) log('cached model:', model['modelUrl']); - // if (!blurKernel) blurKernel = blur.getGaussianKernel(50, 1, 1); + // if (!blurKernel) blurKernel = blur.getGaussianKernel(5, 1, 1); return model; } @@ -30,6 +30,8 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC const resizeInput = tf.image.resizeBilinear(input.tensor, [model.inputs[0].shape[1], model.inputs[0].shape[2]], false); const norm = resizeInput.div(255); const res = model.predict(norm) as Tensor; + // meet output: 1,256,256,1 + // selfie output: 1,144,256,2 tf.dispose(resizeInput); tf.dispose(norm); @@ -39,16 +41,24 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC const squeeze = tf.squeeze(res, 0); let resizeOutput; - if (squeeze.shape[2] === 2) { // model meet has two channels for fg and bg + if (squeeze.shape[2] === 2) { + // model meet has two channels for fg and bg const softmax = squeeze.softmax(); const [bg, fg] = tf.unstack(softmax, 2); - tf.dispose(softmax); const expand = fg.expandDims(2); + const pad = expand.expandDims(0); + tf.dispose(softmax); tf.dispose(bg); tf.dispose(fg); - resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]); + // running sofmax before unstack creates 2x2 matrix so we only take upper-left quadrant + const crop = tf.image.cropAndResize(pad, [[0, 0, 0.5, 0.5]], [0], [input.tensor?.shape[1], input.tensor?.shape[2]]); + // otherwise run softmax after unstack and use standard resize + // resizeOutput = tf.image.resizeBilinear(expand, [input.tensor?.shape[1], input.tensor?.shape[2]]); + resizeOutput = crop.squeeze(0); + tf.dispose(crop); tf.dispose(expand); - } else { // model selfie has a single channel + tf.dispose(pad); + } else { // model selfie has a single channel that we can use directly resizeOutput = tf.image.resizeBilinear(squeeze, [input.tensor?.shape[1], input.tensor?.shape[2]]); } @@ -59,17 +69,21 @@ export async function predict(input: { tensor: Tensor | null, canvas: OffscreenC tf.dispose(squeeze); tf.dispose(res); - const ctx = input.canvas.getContext('2d') as CanvasRenderingContext2D; + const original = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(input.canvas.width, input.canvas.height) : document.createElement('canvas'); // need one more copy since input may already have gl context so 2d context fails + original.width = input.canvas.width; + original.height = input.canvas.height; + const ctx = original.getContext('2d') as CanvasRenderingContext2D; + + await ctx.drawImage(input.canvas, 0, 0); // https://developer.mozilla.org/en-US/docs/Web/API/CanvasRenderingContext2D/globalCompositeOperation // best options are: darken, color-burn, multiply ctx.globalCompositeOperation = 'darken'; - await ctx?.drawImage(overlay, 0, 0); - ctx.globalCompositeOperation = 'source-in'; + ctx.filter = 'blur(8px)'; // use css filter for bluring, can be done with gaussian blur manually instead + await ctx.drawImage(overlay, 0, 0); + ctx.globalCompositeOperation = 'source-in'; // reset + ctx.filter = 'none'; // reset + + input.canvas = original; + return true; } - -/* Segmentation todo: -- Smoothen -- Get latest canvas in interpolate -- Buffered fetches latest from video instead from interpolate -*/ diff --git a/wiki b/wiki index 8e898a63..a69870f5 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 8e898a636f5254a3fe451b097c633c9965a8a680 +Subproject commit a69870f5763ae3fddd1243df10559aaf32c8f0da