From a5b5352ea6ca9424f4c6e53551ed24f4bd56459f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 19 May 2021 08:27:28 -0400 Subject: [PATCH] add experimental mb3-centernet object detection --- demo/index.js | 9 ++-- package.json | 2 +- src/config.ts | 3 +- src/draw/draw.ts | 2 +- src/human.ts | 30 +++++++---- src/object/centernet.ts | 80 ++++++++++++++++++++++++++++++ src/{nanodet => object}/labels.ts | 0 src/{nanodet => object}/nanodet.ts | 2 +- wiki | 2 +- 9 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 src/object/centernet.ts rename src/{nanodet => object}/labels.ts (100%) rename src/{nanodet => object}/nanodet.ts (97%) diff --git a/demo/index.js b/demo/index.js index 44d8acf3..f3039707 100644 --- a/demo/index.js +++ b/demo/index.js @@ -9,10 +9,11 @@ import webRTC from './helpers/webrtc.js'; let human; const userConfig = { - warmup: 'none', + warmup: 'full', /* backend: 'webgl', - async: true, + async: false, + cacheSensitivity: 0, filter: { enabled: false, flip: false, @@ -26,9 +27,9 @@ const userConfig = { }, hand: { enabled: false }, gesture: { enabled: false }, - body: { enabled: true, modelPath: 'posenet.json' }, + body: { enabled: false, modelPath: 'posenet.json' }, // body: { enabled: true, modelPath: 'blazepose.json' }, - // object: { enabled: true }, + object: { enabled: false }, */ }; diff --git a/package.json b/package.json index 908c5fa9..088c359e 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,7 @@ "canvas": "^2.8.0", "chokidar": "^3.5.1", "dayjs": "^1.10.4", - "esbuild": "^0.12.0", + "esbuild": "^0.12.1", "eslint": "^7.26.0", "eslint-config-airbnb-base": "^14.2.1", "eslint-plugin-import": "^2.23.2", diff --git a/src/config.ts b/src/config.ts index 0e774638..d2f64c26 100644 --- a/src/config.ts +++ b/src/config.ts @@ -319,7 +319,8 @@ const config: Config = { object: { enabled: false, - modelPath: 'nanodet.json', // experimental: object detection model, can be absolute path or relative to modelBasePath + modelPath: 'mb3-centernet.json', // experimental: object detection model, can be absolute path or relative to modelBasePath + // can be 'mb3-centernet' or 'nanodet' minConfidence: 0.2, // threshold for discarding a prediction iouThreshold: 0.4, // ammount of overlap between two detected objects before one object is removed maxDetected: 10, // maximum number of objects detected in the input diff --git a/src/draw/draw.ts b/src/draw/draw.ts index fdfe01c0..da7b8e67 100644 --- a/src/draw/draw.ts +++ b/src/draw/draw.ts @@ -54,7 +54,7 @@ export const options: DrawOptions = { roundRect: 28, drawPoints: false, drawLabels: true, - drawBoxes: false, + drawBoxes: true, drawPolygons: true, fillPolygons: false, useDepth: true, diff --git a/src/human.ts b/src/human.ts index 95dcdc0b..ad1b0277 100644 --- a/src/human.ts +++ b/src/human.ts @@ -11,7 +11,8 @@ import * as emotion from './emotion/emotion'; import * as posenet from './posenet/posenet'; import * as handpose from './handpose/handpose'; import * as blazepose from './blazepose/blazepose'; -import * as nanodet from './nanodet/nanodet'; +import * as nanodet from './object/nanodet'; +import * as centernet from './object/centernet'; import * as gesture from './gesture/gesture'; import * as image from './image/image'; import * as draw from './draw/draw'; @@ -93,6 +94,7 @@ export class Human { emotion: Model | null, embedding: Model | null, nanodet: Model | null, + centernet: Model | null, faceres: Model | null, }; /** Internal: Currently loaded classes */ @@ -102,6 +104,7 @@ export class Human { body: typeof posenet | typeof blazepose; hand: typeof handpose; nanodet: typeof nanodet; + centernet: typeof centernet; faceres: typeof faceres; }; /** Face triangualtion array of 468 points, used for triangle references between points */ @@ -148,6 +151,7 @@ export class Human { emotion: null, embedding: null, nanodet: null, + centernet: null, faceres: null, }; // export access to image processing @@ -161,6 +165,7 @@ export class Human { body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose, hand: handpose, nanodet, + centernet, }; this.faceTriangulation = facemesh.triangulation; this.faceUVMap = facemesh.uvmap; @@ -231,7 +236,7 @@ export class Human { const timeStamp = now(); if (userConfig) this.config = mergeDeep(this.config, userConfig); - if (this.#firstRun) { + if (this.#firstRun) { // print version info on first run and check for correct backend setup if (this.config.debug) log(`version: ${this.version}`); if (this.config.debug) log(`tfjs version: ${this.tf.version_core}`); if (this.config.debug) log('platform:', this.sysinfo.platform); @@ -243,7 +248,7 @@ export class Human { if (this.config.debug) log('tf flags:', this.tf.ENV.flags); } } - if (this.config.async) { + if (this.config.async) { // load models concurrently [ this.models.face, this.models.emotion, @@ -251,6 +256,7 @@ export class Human { this.models.posenet, this.models.blazepose, this.models.nanodet, + this.models.centernet, this.models.faceres, ] = await Promise.all([ this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null), @@ -258,20 +264,22 @@ export class Human { this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null), this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null), this.models.blazepose || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null), - this.models.nanodet || (this.config.object.enabled ? nanodet.load(this.config) : null), + this.models.nanodet || (this.config.object.enabled && this.config.object.modelPath.includes('nanodet') ? nanodet.load(this.config) : null), + this.models.centernet || (this.config.object.enabled && this.config.object.modelPath.includes('centernet') ? centernet.load(this.config) : null), this.models.faceres || ((this.config.face.enabled && this.config.face.description.enabled) ? faceres.load(this.config) : null), ]); - } else { + } else { // load models sequentially if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config); if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config); if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config); if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config); if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config); - if (this.config.object.enabled && !this.models.nanodet) this.models.nanodet = await nanodet.load(this.config); + if (this.config.object.enabled && !this.models.nanodet && this.config.object.modelPath.includes('nanodet')) this.models.nanodet = await nanodet.load(this.config); + if (this.config.object.enabled && !this.models.centernet && this.config.object.modelPath.includes('centernet')) this.models.centernet = await centernet.load(this.config); if (this.config.face.enabled && this.config.face.description.enabled && !this.models.faceres) this.models.faceres = await faceres.load(this.config); } - if (this.#firstRun) { + if (this.#firstRun) { // print memory stats on first run if (this.config.debug) log('tf engine state:', this.tf.engine().state.numBytes, 'bytes', this.tf.engine().state.numTensors, 'tensors'); this.#firstRun = false; } @@ -343,7 +351,7 @@ export class Human { // check if input changed sufficiently to trigger new detections /** @hidden */ #skipFrame = async (input) => { - if (this.config.cacheSensitivity === 0) return true; + if (this.config.cacheSensitivity === 0) return false; const resizeFact = 50; const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]); const sumT = this.tf.sum(reduced); @@ -476,12 +484,14 @@ export class Human { // run nanodet this.analyze('Start Object:'); if (this.config.async) { - objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : []; + if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : []; + else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? centernet.predict(process.tensor, this.config) : []; if (this.perf.object) delete this.perf.object; } else { this.state = 'run:object'; timeStamp = now(); - objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : []; + if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : []; + else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? await centernet.predict(process.tensor, this.config) : []; current = Math.trunc(now() - timeStamp); if (current > 0) this.perf.object = current; } diff --git a/src/object/centernet.ts b/src/object/centernet.ts new file mode 100644 index 00000000..364db92d --- /dev/null +++ b/src/object/centernet.ts @@ -0,0 +1,80 @@ +import { log, join } from '../helpers'; +import * as tf from '../../dist/tfjs.esm.js'; +import { labels } from './labels'; + +let model; +let last: Array<{}> = []; +let skipped = Number.MAX_SAFE_INTEGER; + +export async function load(config) { + if (!model) { + model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath)); + const inputs = Object.values(model.modelSignature['inputs']); + model.inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : null; + if (!model.inputSize) throw new Error(`Human: Cannot determine model inputSize: ${config.object.modelPath}`); + if (!model || !model.modelUrl) log('load model failed:', config.object.modelPath); + else if (config.debug) log('load model:', model.modelUrl); + } else if (config.debug) log('cached model:', model.modelUrl); + return model; +} + +async function process(res, inputSize, outputShape, config) { + const results: Array<{ score: number, class: number, label: string, box: number[], boxRaw: number[] }> = []; + const detections = res.arraySync(); + const squeezeT = tf.squeeze(res); + res.dispose(); + const arr = tf.split(squeezeT, 6, 1); // x1, y1, x2, y2, score, class + squeezeT.dispose(); + const stackT = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // tf.nms expects y, x + const boxesT = stackT.squeeze(); + const scoresT = arr[4].squeeze(); + const classesT = arr[5].squeeze(); + arr.forEach((t) => t.dispose()); + // @ts-ignore boxesT type is not correctly inferred + const nmsT = await tf.image.nonMaxSuppressionAsync(boxesT, scoresT, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence); + boxesT.dispose(); + scoresT.dispose(); + classesT.dispose(); + const nms = nmsT.dataSync(); + nmsT.dispose(); + for (const id of nms) { + const score = detections[0][id][4]; + const classVal = detections[0][id][5]; + const label = labels[classVal].label; + const boxRaw = [ + detections[0][id][0] / inputSize, + detections[0][id][1] / inputSize, + detections[0][id][2] / inputSize, + detections[0][id][3] / inputSize, + ]; + const box = [ + Math.trunc(boxRaw[0] * outputShape[0]), + Math.trunc(boxRaw[1] * outputShape[1]), + Math.trunc(boxRaw[2] * outputShape[0]), + Math.trunc(boxRaw[3] * outputShape[1]), + ]; + results.push({ score, class: classVal, label, box, boxRaw }); + } + return results; +} + +export async function predict(image, config) { + if (!model) return null; + if ((skipped < config.object.skipFrames) && config.skipFrame && (last.length > 0)) { + skipped++; + return last; + } + skipped = 0; + return new Promise(async (resolve) => { + const outputSize = [image.shape[2], image.shape[1]]; + const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false); + + let objectT; + if (config.object.enabled) objectT = model.execute(resize, 'tower_0/detections'); + resize.dispose(); + + const obj = await process(objectT, model.inputSize, outputSize, config); + last = obj; + resolve(obj); + }); +} diff --git a/src/nanodet/labels.ts b/src/object/labels.ts similarity index 100% rename from src/nanodet/labels.ts rename to src/object/labels.ts diff --git a/src/nanodet/nanodet.ts b/src/object/nanodet.ts similarity index 97% rename from src/nanodet/nanodet.ts rename to src/object/nanodet.ts index 9cbae6cd..8bee21d6 100644 --- a/src/nanodet/nanodet.ts +++ b/src/object/nanodet.ts @@ -78,7 +78,7 @@ async function process(res, inputSize, outputShape, config) { // normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of // unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered) - const nmsBoxes = results.map((a) => a.boxRaw); + const nmsBoxes = results.map((a) => [a.boxRaw[1], a.boxRaw[0], a.boxRaw[3], a.boxRaw[2]]); // switches coordinates from x,y to y,x as expected by tf.nms const nmsScores = results.map((a) => a.score); let nmsIdx: any[] = []; if (nmsBoxes && nmsBoxes.length > 0) { diff --git a/wiki b/wiki index 534d4d77..fa896c53 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit 534d4d77d99b0fc71913e8ef6242e4c6461614f5 +Subproject commit fa896c5330432f26839d362b81ea9128db60d86b