implemented movenet-multipose model

2021-08-20 09:05:07 -04:00 · 2021-08-20 09:05:07 -04:00 · 54d717bbff
parent 4f5ee67431
commit 54d717bbff
6 changed files with 114 additions and 54 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,12 +9,13 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
 ## Changelog
 ### **HEAD -> main** 2021/08/19 mandic00@live.com
 ### **2.1.4** 2021/08/19 mandic00@live.com
 - add static type definitions to main class
-
+- fix interpolation overflow
 ### **origin/main** 2021/08/18 mandic00@live.com
 - rebuild full
 - improve face box caching
 - strict type checks
--- a/TODO.md
+++ b/TODO.md
@ -7,10 +7,6 @@ WebGL shader optimizations for faster load and initial detection
 - Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205>
 - Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343>
 MoveNet MultiPose Model: <https://github.com/vladmandic/movenet>
 - Implementation is ready, but model is 2x size and 0.5x performance
 <br>
 ## Exploring
@ -45,16 +41,24 @@ Feature is automatically disabled in NodeJS without user impact
 - Backend NodeJS missing kernel op `FlipLeftRight`  
  <https://github.com/tensorflow/tfjs/issues/4066>  
-  *Target: `Human` v2.2 with `TFJS` v3.9*
+  *Target: `Human` v2.2 with `TFJS` v3.9*  
 - Backend NodeJS missing kernel op `RotateWithOffset`  
  <https://github.com/tensorflow/tfjs/issues/5473>  
-  *Target: N/A*
+  *Target: N/A*  
 Hand detection using WASM backend has reduced precision due to math rounding errors in backend  
 *Target: N/A*
 <br>
 ### Body Detection
 MoveNet MultiPose model does not work with WASM backend due to missing F32 implementation
 - Backend WASM missing F32 implementation  
  <https://github.com/tensorflow/tfjs/issues/5516>  
  *Target: N/A*  
 ### Object Detection
 Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS  
--- a/demo/index.js
+++ b/demo/index.js
@ -51,6 +51,7 @@ let userConfig = {
  gesture: { enabled: false },
  hand: { enabled: false },
  body: { enabled: false },
  // body: { enabled: true, modelPath: 'movenet-multipose.json' },
  // body: { enabled: true, modelPath: 'posenet.json' },
  segmentation: { enabled: false },
  */
--- a/package.json
+++ b/package.json
@ -76,7 +76,7 @@
    "esbuild": "^0.12.21",
    "eslint": "^7.32.0",
    "eslint-config-airbnb-base": "^14.2.1",
-    "eslint-plugin-import": "^2.24.0",
+    "eslint-plugin-import": "^2.24.1",
    "eslint-plugin-json": "^3.1.0",
    "eslint-plugin-node": "^11.1.0",
    "eslint-plugin-promise": "^5.1.0",
--- a/src/movenet/movenet.ts
+++ b/src/movenet/movenet.ts
@ -11,8 +11,9 @@ import { Config } from '../config';
 let model: GraphModel;
 type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
 const keypoints: Array<Keypoints> = [];
 type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
 let box: [number, number, number, number] = [0, 0, 0, 0];
 let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
 let score = 0;
@ -29,6 +30,90 @@ export async function load(config: Config): Promise<GraphModel> {
  return model;
 }
 async function parseSinglePose(res, config, image) {
  keypoints.length = 0;
  const kpt = res[0][0];
  for (let id = 0; id < kpt.length; id++) {
    score = kpt[id][2];
    if (score > config.body.minConfidence) {
      keypoints.push({
        score: Math.round(100 * score) / 100,
        part: bodyParts[id],
        positionRaw: [ // normalized to 0..1
          kpt[id][1],
          kpt[id][0],
        ],
        position: [ // normalized to input image size
          Math.round((image.shape[2] || 0) * kpt[id][1]),
          Math.round((image.shape[1] || 0) * kpt[id][0]),
        ],
      });
    }
  }
  score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
  const x = keypoints.map((a) => a.position[0]);
  const y = keypoints.map((a) => a.position[1]);
  box = [
    Math.min(...x),
    Math.min(...y),
    Math.max(...x) - Math.min(...x),
    Math.max(...y) - Math.min(...y),
  ];
  const xRaw = keypoints.map((a) => a.positionRaw[0]);
  const yRaw = keypoints.map((a) => a.positionRaw[1]);
  boxRaw = [
    Math.min(...xRaw),
    Math.min(...yRaw),
    Math.max(...xRaw) - Math.min(...xRaw),
    Math.max(...yRaw) - Math.min(...yRaw),
  ];
  const persons: Array<Person> = [];
  persons.push({ id: 0, score, box, boxRaw, keypoints });
  return persons;
 }
 async function parseMultiPose(res, config, image) {
  const persons: Array<Person> = [];
  for (let p = 0; p < res[0].length; p++) {
    const kpt = res[0][p];
    score = Math.round(100 * kpt[51 + 4]) / 100;
    // eslint-disable-next-line no-continue
    if (score < config.body.minConfidence) continue;
    keypoints.length = 0;
    for (let i = 0; i < 17; i++) {
      const partScore = Math.round(100 * kpt[3 * i + 2]) / 100;
      if (partScore > config.body.minConfidence) {
        keypoints.push({
          part: bodyParts[i],
          score: partScore,
          positionRaw: [
            kpt[3 * i + 1],
            kpt[3 * i + 0],
          ],
          position: [
            Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)),
            Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0)),
          ],
        });
      }
    }
    boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
    persons.push({
      id: p,
      score,
      boxRaw,
      box: [
        Math.trunc(boxRaw[0] * (image.shape[2] || 0)),
        Math.trunc(boxRaw[1] * (image.shape[1] || 0)),
        Math.trunc(boxRaw[2] * (image.shape[2] || 0)),
        Math.trunc(boxRaw[3] * (image.shape[1] || 0)),
      ],
      keypoints,
    });
  }
  return persons;
 }
 export async function predict(image: Tensor, config: Config): Promise<Body[]> {
  if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
    skipped++;
@ -38,7 +123,9 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
  return new Promise(async (resolve) => {
    const tensor = tf.tidy(() => {
      if (!model.inputs[0].shape) return null;
-      const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
+      let inputSize = model.inputs[0].shape[2];
      if (inputSize === -1) inputSize = 256;
      const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
      const cast = tf.cast(resize, 'int32');
      return cast;
    });
@ -47,46 +134,13 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
    if (config.body.enabled) resT = await model.predict(tensor);
    tf.dispose(tensor);
-    if (resT) {
+    if (!resT) resolve([]);
-      keypoints.length = 0;
+    const res = await resT.array();
-      const res = await resT.array();
+    let persons;
-      tf.dispose(resT);
+    if (resT.shape[2] === 17) persons = await parseSinglePose(res, config, image);
-      const kpt = res[0][0];
+    else if (resT.shape[2] === 56) persons = await parseMultiPose(res, config, image);
-      for (let id = 0; id < kpt.length; id++) {
+    tf.dispose(resT);
-        score = kpt[id][2];
+
-        if (score > config.body.minConfidence) {
+    resolve(persons);
          keypoints.push({
            score: Math.round(100 * score) / 100,
            part: bodyParts[id],
            positionRaw: [ // normalized to 0..1
              kpt[id][1],
              kpt[id][0],
            ],
            position: [ // normalized to input image size
              Math.round((image.shape[2] || 0) * kpt[id][1]),
              Math.round((image.shape[1] || 0) * kpt[id][0]),
            ],
          });
        }
      }
    }
    score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
    const x = keypoints.map((a) => a.position[0]);
    const y = keypoints.map((a) => a.position[1]);
    box = [
      Math.min(...x),
      Math.min(...y),
      Math.max(...x) - Math.min(...x),
      Math.max(...y) - Math.min(...y),
    ];
    const xRaw = keypoints.map((a) => a.positionRaw[0]);
    const yRaw = keypoints.map((a) => a.positionRaw[1]);
    boxRaw = [
      Math.min(...xRaw),
      Math.min(...yRaw),
      Math.max(...xRaw) - Math.min(...xRaw),
      Math.max(...yRaw) - Math.min(...yRaw),
    ];
    resolve([{ id: 0, score, box, boxRaw, keypoints }]);
  });
 }
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8
+Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4
		`@ -1 +1 @@`
			`Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8`				`Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4`