From 2d3e81181c70ca4c48a79b1bb15ad2be1d0bc1e2 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 4 Jun 2021 13:51:01 -0400 Subject: [PATCH] add meet and selfie models --- TODO.md | 2 +- demo/index-worker.js | 17 +++++++++++++---- demo/index.js | 32 ++++++++++++++++++++++++-------- src/config.ts | 15 +++++++++++++++ src/human.ts | 38 +++++++++++++++++++------------------- src/image/image.ts | 16 +++++++++------- src/interpolate.ts | 2 ++ src/result.ts | 2 +- wiki | 2 +- 9 files changed, 85 insertions(+), 41 deletions(-) diff --git a/TODO.md b/TODO.md index 89381e5a..5c1826b6 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,7 @@ N/A ## In Progress - Switch to TypeScript 4.3 -- Add hints to Demo app +- Implement segmentation model ## Known Issues diff --git a/demo/index-worker.js b/demo/index-worker.js index 231abc85..f4cbb499 100644 --- a/demo/index-worker.js +++ b/demo/index-worker.js @@ -27,9 +27,18 @@ onmessage = async (msg) => { result.error = err.message; log('worker thread error:', err.message); } - // must strip canvas from return value as it cannot be transfered from worker thread - if (result.canvas) result.canvas = null; - // @ts-ignore tslint wrong type matching for worker - postMessage({ result }); + + if (result.canvas) { // convert canvas to imageData and send it by reference + const ctx = result.canvas.getContext('2d'); + const img = ctx?.getImageData(0, 0, result.canvas.width, result.canvas.height); + result.canvas = null; // must strip original canvas from return value as it cannot be transfered from worker thread + // @ts-ignore tslint wrong type matching for worker + if (img) postMessage({ result, image: img.data.buffer, width: msg.data.width, height: msg.data.height }, [img?.data.buffer]); + // @ts-ignore tslint wrong type matching for worker + else postMessage({ result }); + } else { + // @ts-ignore tslint wrong type matching for worker + postMessage({ result }); + } busy = false; }; diff --git a/demo/index.js b/demo/index.js index 5ba17d0d..0f3e8b7a 100644 --- a/demo/index.js +++ b/demo/index.js @@ -38,19 +38,21 @@ const userConfig = { enabled: false, flip: false, }, - face: { enabled: true, + face: { enabled: false, detector: { return: true }, mesh: { enabled: true }, iris: { enabled: false }, description: { enabled: false }, emotion: { enabled: false }, }, - hand: { enabled: false }, - // body: { enabled: true, modelPath: 'posenet.json' }, - // body: { enabled: true, modelPath: 'blazepose.json' }, - body: { enabled: false }, object: { enabled: false }, gesture: { enabled: true }, + hand: { enabled: false }, + body: { enabled: false }, + // body: { enabled: true, modelPath: 'posenet.json' }, + // body: { enabled: true, modelPath: 'blazepose.json' }, + // segmentation: { enabled: true, modelPath: 'meet.json' }, + // segmentation: { enabled: true, modelPath: 'selfie.json' }, */ }; @@ -267,9 +269,11 @@ async function drawResults(input) { if (ui.buffered) { ui.drawThread = requestAnimationFrame(() => drawResults(input)); } else { - log('stopping buffered refresh'); - if (ui.drawThread) cancelAnimationFrame(ui.drawThread); - ui.drawThread = null; + if (ui.drawThread) { + log('stopping buffered refresh'); + cancelAnimationFrame(ui.drawThread); + ui.drawThread = null; + } } } @@ -350,6 +354,8 @@ async function setupCamera() { video.onloadeddata = () => { if (settings.width > settings.height) canvas.style.width = '100vw'; else canvas.style.height = '100vh'; + canvas.width = video.videoWidth; + canvas.height = video.videoHeight; ui.menuWidth.input.setAttribute('value', video.videoWidth); ui.menuHeight.input.setAttribute('value', video.videoHeight); if (live) video.play(); @@ -400,6 +406,16 @@ function webWorker(input, image, canvas, timestamp) { } if (document.getElementById('gl-bench')) document.getElementById('gl-bench').style.display = ui.bench ? 'block' : 'none'; lastDetectedResult = msg.data.result; + + if (msg.data.image) { + lastDetectedResult.canvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(msg.data.width, msg.data.height) : document.createElement('canvas'); + lastDetectedResult.canvas.width = msg.data.width; + lastDetectedResult.canvas.height = msg.data.height; + const ctx = lastDetectedResult.canvas.getContext('2d'); + const imageData = new ImageData(new Uint8ClampedArray(msg.data.image), msg.data.width, msg.data.height); + ctx.putImageData(imageData, 0, 0); + } + ui.framesDetect++; if (!ui.drawThread) drawResults(input); // eslint-disable-next-line no-use-before-define diff --git a/src/config.ts b/src/config.ts index c893ab0b..986c63d9 100644 --- a/src/config.ts +++ b/src/config.ts @@ -196,6 +196,15 @@ export interface Config { maxDetected: number, skipFrames: number, }, + + /** Controlls and configures all body segmentation module + * - enabled: true/false + * - modelPath: object detection model, can be absolute path or relative to modelBasePath + */ + segmentation: { + enabled: boolean, + modelPath: string, + }, } const config: Config = { @@ -338,5 +347,11 @@ const config: Config = { skipFrames: 19, // how many max frames to go without re-running the detector // only used when cacheSensitivity is not zero }, + + segmentation: { + enabled: false, + modelPath: 'selfie.json', // experimental: object detection model, can be absolute path or relative to modelBasePath + // can be 'selfie' or 'meet' + }, }; export { config as defaults }; diff --git a/src/human.ts b/src/human.ts index 475fc5ab..3c2533b5 100644 --- a/src/human.ts +++ b/src/human.ts @@ -24,6 +24,7 @@ import * as image from './image/image'; import * as draw from './draw/draw'; import * as persons from './persons'; import * as interpolate from './interpolate'; +import * as segmentation from './segmentation/segmentation'; import * as sample from './sample'; import * as app from '../package.json'; import { Tensor } from './tfjs/types'; @@ -114,16 +115,7 @@ export class Human { nanodet: Model | null, centernet: Model | null, faceres: Model | null, - }; - /** @internal: Currently loaded classes */ - classes: { - facemesh: typeof facemesh; - emotion: typeof emotion; - body: typeof posenet | typeof blazepose | typeof movenet; - hand: typeof handpose; - nanodet: typeof nanodet; - centernet: typeof centernet; - faceres: typeof faceres; + segmentation: Model | null, }; /** Reference face triangualtion array of 468 points, used for triangle references between points */ faceTriangulation: typeof facemesh.triangulation; @@ -173,20 +165,12 @@ export class Human { nanodet: null, centernet: null, faceres: null, + segmentation: null, }; // export access to image processing // @ts-ignore eslint-typescript cannot correctly infer type in anonymous function this.image = (input: Input) => image.process(input, this.config); // export raw access to underlying models - this.classes = { - facemesh, - emotion, - faceres, - body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose, - hand: handpose, - nanodet, - centernet, - }; this.faceTriangulation = facemesh.triangulation; this.faceUVMap = facemesh.uvmap; // include platform info @@ -274,8 +258,10 @@ export class Human { } if (this.config.async) { // load models concurrently [ + // @ts-ignore async model loading is not correctly inferred this.models.face, this.models.emotion, + // @ts-ignore async model loading is not correctly inferred this.models.handpose, this.models.posenet, this.models.blazepose, @@ -284,6 +270,7 @@ export class Human { this.models.nanodet, this.models.centernet, this.models.faceres, + this.models.segmentation, ] = await Promise.all([ this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null), this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null), @@ -295,6 +282,7 @@ export class Human { this.models.nanodet || (this.config.object.enabled && this.config.object.modelPath.includes('nanodet') ? nanodet.load(this.config) : null), this.models.centernet || (this.config.object.enabled && this.config.object.modelPath.includes('centernet') ? centernet.load(this.config) : null), this.models.faceres || ((this.config.face.enabled && this.config.face.description.enabled) ? faceres.load(this.config) : null), + this.models.segmentation || (this.config.segmentation.enabled ? segmentation.load(this.config) : null), ]); } else { // load models sequentially if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config); @@ -307,6 +295,7 @@ export class Human { if (this.config.object.enabled && !this.models.nanodet && this.config.object.modelPath.includes('nanodet')) this.models.nanodet = await nanodet.load(this.config); if (this.config.object.enabled && !this.models.centernet && this.config.object.modelPath.includes('centernet')) this.models.centernet = await centernet.load(this.config); if (this.config.face.enabled && this.config.face.description.enabled && !this.models.faceres) this.models.faceres = await faceres.load(this.config); + if (this.config.segmentation.enabled && !this.models.segmentation) this.models.segmentation = await segmentation.load(this.config); } if (this.#firstRun) { // print memory stats on first run @@ -568,6 +557,17 @@ export class Human { else if (this.performance.gesture) delete this.performance.gesture; } + // run segmentation + if (this.config.segmentation.enabled) { + this.analyze('Start Segmentation:'); + this.state = 'run:segmentation'; + timeStamp = now(); + await segmentation.predict(process, this.config); + elapsedTime = Math.trunc(now() - timeStamp); + if (elapsedTime > 0) this.performance.segmentation = elapsedTime; + this.analyze('End Segmentation:'); + } + this.performance.total = Math.trunc(now() - timeStart); this.state = 'idle'; this.result = { diff --git a/src/image/image.ts b/src/image/image.ts index 2b446511..653b6b62 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -138,7 +138,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre const shape = [outCanvas.height, outCanvas.width, 3]; pixels = tf.tensor3d(outCanvas.data, shape, 'int32'); } else if (outCanvas instanceof ImageData) { // if input is imagedata, just use it - pixels = tf.browser.fromPixels(outCanvas); + pixels = tf.browser ? tf.browser.fromPixels(outCanvas) : null; } else if (config.backend === 'webgl' || config.backend === 'humangl') { // tf kernel-optimized method to get imagedata // we can use canvas as-is as it already has a context, so we do a silly one more canvas const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas'); @@ -146,7 +146,7 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre tempCanvas.height = targetHeight; const tempCtx = tempCanvas.getContext('2d'); tempCtx?.drawImage(outCanvas, 0, 0); - pixels = tf.browser.fromPixels(tempCanvas); + pixels = tf.browser ? tf.browser.fromPixels(tempCanvas) : null; } else { // cpu and wasm kernel does not implement efficient fromPixels method // we can use canvas as-is as it already has a context, so we do a silly one more canvas const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas'); @@ -155,12 +155,14 @@ export function process(input, config): { tensor: Tensor | null, canvas: Offscre const tempCtx = tempCanvas.getContext('2d'); tempCtx?.drawImage(outCanvas, 0, 0); const data = tempCtx?.getImageData(0, 0, targetWidth, targetHeight); - pixels = tf.browser.fromPixels(data); + pixels = tf.browser ? tf.browser.fromPixels(data) : null; + } + if (pixels) { + const casted = pixels.toFloat(); + tensor = casted.expandDims(0); + pixels.dispose(); + casted.dispose(); } - const casted = pixels.toFloat(); - tensor = casted.expandDims(0); - pixels.dispose(); - casted.dispose(); } const canvas = config.filter.return ? outCanvas : null; return { tensor, canvas }; diff --git a/src/interpolate.ts b/src/interpolate.ts index 0009e23a..7459192c 100644 --- a/src/interpolate.ts +++ b/src/interpolate.ts @@ -21,6 +21,8 @@ export function calc(newResult: Result): Result { // - at 1sec delay buffer = 1 which means live data is used const bufferedFactor = elapsed < 1000 ? 8 - Math.log(elapsed) : 1; + bufferedResult.canvas = newResult.canvas; + // interpolate body results if (!bufferedResult.body || (newResult.body.length !== bufferedResult.body.length)) { bufferedResult.body = JSON.parse(JSON.stringify(newResult.body as Body[])); // deep clone once diff --git a/src/result.ts b/src/result.ts index 203cdd13..d8933576 100644 --- a/src/result.ts +++ b/src/result.ts @@ -176,7 +176,7 @@ export interface Result { /** global performance object with timing values for each operation */ performance: Record, /** optional processed canvas that can be used to draw input on screen */ - readonly canvas?: OffscreenCanvas | HTMLCanvasElement, + canvas?: OffscreenCanvas | HTMLCanvasElement, /** timestamp of detection representing the milliseconds elapsed since the UNIX epoch */ readonly timestamp: number, /** getter property that returns unified persons object */ diff --git a/wiki b/wiki index 0087af56..8e898a63 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 0087af5684c5722b2cf7ffd3db57b8117b7ac8c5 +Subproject commit 8e898a636f5254a3fe451b097c633c9965a8a680