add experimental mb3-centernet object detection

2021-05-19 08:27:28 -04:00 · 2021-05-19 08:27:28 -04:00 · fa3ab21215
parent 271b821ab7
commit fa3ab21215
21 changed files with 227798 additions and 5288 deletions
--- a/demo/index.js
+++ b/demo/index.js
@ -9,10 +9,11 @@ import webRTC from './helpers/webrtc.js';
 let human;

 const userConfig = {
-  warmup: 'none',
+  warmup: 'full',
  /*
  backend: 'webgl',
-  async: true,
+  async: false,
+  cacheSensitivity: 0,
  filter: {
    enabled: false,
    flip: false,
@ -26,9 +27,9 @@ const userConfig = {
  },
  hand: { enabled: false },
  gesture: { enabled: false },
-  body: { enabled: true, modelPath: 'posenet.json' },
+  body: { enabled: false, modelPath: 'posenet.json' },
  // body: { enabled: true, modelPath: 'blazepose.json' },
-  // object: { enabled: true },
+  object: { enabled: false },
  */
 };

--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
--- a/dist/human.esm-nobundle.js.map
+++ b/dist/human.esm-nobundle.js.map
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
--- a/dist/human.node-wasm.js
+++ b/dist/human.node-wasm.js
--- a/dist/human.node.js
+++ b/dist/human.node.js
--- a/dist/tfjs.esm.js
+++ b/dist/tfjs.esm.js
--- a/dist/tfjs.esm.js.map
+++ b/dist/tfjs.esm.js.map
--- a/models/mb3-centernet.bin
+++ b/models/mb3-centernet.bin
--- a/models/mb3-centernet.json
+++ b/models/mb3-centernet.json
--- a/package.json
+++ b/package.json
@ -68,7 +68,7 @@
    "canvas": "^2.8.0",
    "chokidar": "^3.5.1",
    "dayjs": "^1.10.4",
-    "esbuild": "^0.12.0",
+    "esbuild": "^0.12.1",
    "eslint": "^7.26.0",
    "eslint-config-airbnb-base": "^14.2.1",
    "eslint-plugin-import": "^2.23.2",
--- a/src/config.ts
+++ b/src/config.ts
@ -319,7 +319,8 @@ const config: Config = {

  object: {
    enabled: false,
-    modelPath: 'nanodet.json',  // experimental: object detection model, can be absolute path or relative to modelBasePath
+    modelPath: 'mb3-centernet.json',  // experimental: object detection model, can be absolute path or relative to modelBasePath
+                             // can be 'mb3-centernet' or 'nanodet'
    minConfidence: 0.2,      // threshold for discarding a prediction
    iouThreshold: 0.4,       // ammount of overlap between two detected objects before one object is removed
    maxDetected: 10,         // maximum number of objects detected in the input
--- a/src/draw/draw.ts
+++ b/src/draw/draw.ts
@ -54,7 +54,7 @@ export const options: DrawOptions = {
  roundRect: <number>28,
  drawPoints: <Boolean>false,
  drawLabels: <Boolean>true,
-  drawBoxes: <Boolean>false,
+  drawBoxes: <Boolean>true,
  drawPolygons: <Boolean>true,
  fillPolygons: <Boolean>false,
  useDepth: <Boolean>true,
--- a/src/human.ts
+++ b/src/human.ts
@ -11,7 +11,8 @@ import * as emotion from './emotion/emotion';
 import * as posenet from './posenet/posenet';
 import * as handpose from './handpose/handpose';
 import * as blazepose from './blazepose/blazepose';
-import * as nanodet from './nanodet/nanodet';
+import * as nanodet from './object/nanodet';
+import * as centernet from './object/centernet';
 import * as gesture from './gesture/gesture';
 import * as image from './image/image';
 import * as draw from './draw/draw';
@ -93,6 +94,7 @@ export class Human {
    emotion: Model | null,
    embedding: Model | null,
    nanodet: Model | null,
+    centernet: Model | null,
    faceres: Model | null,
  };
  /** Internal: Currently loaded classes */
@ -102,6 +104,7 @@ export class Human {
    body: typeof posenet | typeof blazepose;
    hand: typeof handpose;
    nanodet: typeof nanodet;
+    centernet: typeof centernet;
    faceres: typeof faceres;
  };
  /** Face triangualtion array of 468 points, used for triangle references between points */
@ -148,6 +151,7 @@ export class Human {
      emotion: null,
      embedding: null,
      nanodet: null,
+      centernet: null,
      faceres: null,
    };
    // export access to image processing
@ -161,6 +165,7 @@ export class Human {
      body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose,
      hand: handpose,
      nanodet,
+      centernet,
    };
    this.faceTriangulation = facemesh.triangulation;
    this.faceUVMap = facemesh.uvmap;
@ -231,7 +236,7 @@ export class Human {
    const timeStamp = now();
    if (userConfig) this.config = mergeDeep(this.config, userConfig);

-    if (this.#firstRun) {
+    if (this.#firstRun) { // print version info on first run and check for correct backend setup
      if (this.config.debug) log(`version: ${this.version}`);
      if (this.config.debug) log(`tfjs version: ${this.tf.version_core}`);
      if (this.config.debug) log('platform:', this.sysinfo.platform);
@ -243,7 +248,7 @@ export class Human {
        if (this.config.debug) log('tf flags:', this.tf.ENV.flags);
      }
    }
-    if (this.config.async) {
+    if (this.config.async) { // load models concurrently
      [
        this.models.face,
        this.models.emotion,
@ -251,6 +256,7 @@ export class Human {
        this.models.posenet,
        this.models.blazepose,
        this.models.nanodet,
+        this.models.centernet,
        this.models.faceres,
      ] = await Promise.all([
        this.models.face || (this.config.face.enabled ? facemesh.load(this.config) : null),
@ -258,20 +264,22 @@ export class Human {
        this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null),
        this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null),
        this.models.blazepose || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null),
-        this.models.nanodet || (this.config.object.enabled ? nanodet.load(this.config) : null),
+        this.models.nanodet || (this.config.object.enabled && this.config.object.modelPath.includes('nanodet') ? nanodet.load(this.config) : null),
+        this.models.centernet || (this.config.object.enabled && this.config.object.modelPath.includes('centernet') ? centernet.load(this.config) : null),
        this.models.faceres || ((this.config.face.enabled && this.config.face.description.enabled) ? faceres.load(this.config) : null),
      ]);
-    } else {
+    } else { // load models sequentially
      if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config);
      if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config);
      if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config);
      if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config);
      if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config);
-      if (this.config.object.enabled && !this.models.nanodet) this.models.nanodet = await nanodet.load(this.config);
+      if (this.config.object.enabled && !this.models.nanodet && this.config.object.modelPath.includes('nanodet')) this.models.nanodet = await nanodet.load(this.config);
+      if (this.config.object.enabled && !this.models.centernet && this.config.object.modelPath.includes('centernet')) this.models.centernet = await centernet.load(this.config);
      if (this.config.face.enabled && this.config.face.description.enabled && !this.models.faceres) this.models.faceres = await faceres.load(this.config);
    }

-    if (this.#firstRun) {
+    if (this.#firstRun) { // print memory stats on first run
      if (this.config.debug) log('tf engine state:', this.tf.engine().state.numBytes, 'bytes', this.tf.engine().state.numTensors, 'tensors');
      this.#firstRun = false;
    }
@ -343,7 +351,7 @@ export class Human {
  // check if input changed sufficiently to trigger new detections
  /** @hidden */
  #skipFrame = async (input) => {
-    if (this.config.cacheSensitivity === 0) return true;
+    if (this.config.cacheSensitivity === 0) return false;
    const resizeFact = 50;
    const reduced = input.resizeBilinear([Math.trunc(input.shape[1] / resizeFact), Math.trunc(input.shape[2] / resizeFact)]);
    const sumT = this.tf.sum(reduced);
@ -476,12 +484,14 @@ export class Human {
      // run nanodet
      this.analyze('Start Object:');
      if (this.config.async) {
-        objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : [];
+        if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? nanodet.predict(process.tensor, this.config) : [];
+        else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? centernet.predict(process.tensor, this.config) : [];
        if (this.perf.object) delete this.perf.object;
      } else {
        this.state = 'run:object';
        timeStamp = now();
-        objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : [];
+        if (this.config.object.modelPath.includes('nanodet')) objectRes = this.config.object.enabled ? await nanodet.predict(process.tensor, this.config) : [];
+        else if (this.config.object.modelPath.includes('centernet')) objectRes = this.config.object.enabled ? await centernet.predict(process.tensor, this.config) : [];
        current = Math.trunc(now() - timeStamp);
        if (current > 0) this.perf.object = current;
      }
--- a/src/object/centernet.ts
+++ b/src/object/centernet.ts
@ -0,0 +1,80 @@
+import { log, join } from '../helpers';
+import * as tf from '../../dist/tfjs.esm.js';
+import { labels } from './labels';
+
+let model;
+let last: Array<{}> = [];
+let skipped = Number.MAX_SAFE_INTEGER;
+
+export async function load(config) {
+  if (!model) {
+    model = await tf.loadGraphModel(join(config.modelBasePath, config.object.modelPath));
+    const inputs = Object.values(model.modelSignature['inputs']);
+    model.inputSize = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : null;
+    if (!model.inputSize) throw new Error(`Human: Cannot determine model inputSize: ${config.object.modelPath}`);
+    if (!model || !model.modelUrl) log('load model failed:', config.object.modelPath);
+    else if (config.debug) log('load model:', model.modelUrl);
+  } else if (config.debug) log('cached model:', model.modelUrl);
+  return model;
+}
+
+async function process(res, inputSize, outputShape, config) {
+  const results: Array<{ score: number, class: number, label: string, box: number[], boxRaw: number[] }> = [];
+  const detections = res.arraySync();
+  const squeezeT = tf.squeeze(res);
+  res.dispose();
+  const arr = tf.split(squeezeT, 6, 1); // x1, y1, x2, y2, score, class
+  squeezeT.dispose();
+  const stackT = tf.stack([arr[1], arr[0], arr[3], arr[2]], 1); // tf.nms expects y, x
+  const boxesT = stackT.squeeze();
+  const scoresT = arr[4].squeeze();
+  const classesT = arr[5].squeeze();
+  arr.forEach((t) => t.dispose());
+  // @ts-ignore boxesT type is not correctly inferred
+  const nmsT = await tf.image.nonMaxSuppressionAsync(boxesT, scoresT, config.object.maxDetected, config.object.iouThreshold, config.object.minConfidence);
+  boxesT.dispose();
+  scoresT.dispose();
+  classesT.dispose();
+  const nms = nmsT.dataSync();
+  nmsT.dispose();
+  for (const id of nms) {
+    const score = detections[0][id][4];
+    const classVal = detections[0][id][5];
+    const label = labels[classVal].label;
+    const boxRaw = [
+      detections[0][id][0] / inputSize,
+      detections[0][id][1] / inputSize,
+      detections[0][id][2] / inputSize,
+      detections[0][id][3] / inputSize,
+    ];
+    const box = [
+      Math.trunc(boxRaw[0] * outputShape[0]),
+      Math.trunc(boxRaw[1] * outputShape[1]),
+      Math.trunc(boxRaw[2] * outputShape[0]),
+      Math.trunc(boxRaw[3] * outputShape[1]),
+    ];
+    results.push({ score, class: classVal, label, box, boxRaw });
+  }
+  return results;
+}
+
+export async function predict(image, config) {
+  if (!model) return null;
+  if ((skipped < config.object.skipFrames) && config.skipFrame && (last.length > 0)) {
+    skipped++;
+    return last;
+  }
+  skipped = 0;
+  return new Promise(async (resolve) => {
+    const outputSize = [image.shape[2], image.shape[1]];
+    const resize = tf.image.resizeBilinear(image, [model.inputSize, model.inputSize], false);
+
+    let objectT;
+    if (config.object.enabled) objectT = model.execute(resize, 'tower_0/detections');
+    resize.dispose();
+
+    const obj = await process(objectT, model.inputSize, outputSize, config);
+    last = obj;
+    resolve(obj);
+  });
+}
--- a/src/nanodet/labels.ts
+++ b/src/nanodet/labels.ts
--- a/src/nanodet/nanodet.ts
+++ b/src/nanodet/nanodet.ts
@ -78,7 +78,7 @@ async function process(res, inputSize, outputShape, config) {

  // normally nms is run on raw results, but since boxes need to be calculated this way we skip calulcation of
  // unnecessary boxes and run nms only on good candidates (basically it just does IOU analysis as scores are already filtered)
-  const nmsBoxes = results.map((a) => a.boxRaw);
+  const nmsBoxes = results.map((a) => [a.boxRaw[1], a.boxRaw[0], a.boxRaw[3], a.boxRaw[2]]); // switches coordinates from x,y to y,x as expected by tf.nms
  const nmsScores = results.map((a) => a.score);
  let nmsIdx: any[] = [];
  if (nmsBoxes && nmsBoxes.length > 0) {
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 534d4d77d99b0fc71913e8ef6242e4c6461614f5
+Subproject commit fa896c5330432f26839d362b81ea9128db60d86b