implemented movenet-multipose model

pull/280/head
Vladimir Mandic 2021-08-20 09:05:07 -04:00
parent aabe01f9b0
commit 070bb3a2c1
6 changed files with 114 additions and 54 deletions

View File

@ -9,12 +9,13 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
## Changelog ## Changelog
### **HEAD -> main** 2021/08/19 mandic00@live.com
### **2.1.4** 2021/08/19 mandic00@live.com ### **2.1.4** 2021/08/19 mandic00@live.com
- add static type definitions to main class - add static type definitions to main class
- fix interpolation overflow
### **origin/main** 2021/08/18 mandic00@live.com
- rebuild full - rebuild full
- improve face box caching - improve face box caching
- strict type checks - strict type checks

12
TODO.md
View File

@ -7,10 +7,6 @@ WebGL shader optimizations for faster load and initial detection
- Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205> - Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205>
- Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343> - Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343>
MoveNet MultiPose Model: <https://github.com/vladmandic/movenet>
- Implementation is ready, but model is 2x size and 0.5x performance
<br> <br>
## Exploring ## Exploring
@ -55,6 +51,14 @@ Hand detection using WASM backend has reduced precision due to math rounding err
<br> <br>
### Body Detection
MoveNet MultiPose model does not work with WASM backend due to missing F32 implementation
- Backend WASM missing F32 implementation
<https://github.com/tensorflow/tfjs/issues/5516>
*Target: N/A*
### Object Detection ### Object Detection
Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS

View File

@ -51,6 +51,7 @@ let userConfig = {
gesture: { enabled: false }, gesture: { enabled: false },
hand: { enabled: false }, hand: { enabled: false },
body: { enabled: false }, body: { enabled: false },
// body: { enabled: true, modelPath: 'movenet-multipose.json' },
// body: { enabled: true, modelPath: 'posenet.json' }, // body: { enabled: true, modelPath: 'posenet.json' },
segmentation: { enabled: false }, segmentation: { enabled: false },
*/ */

View File

@ -76,7 +76,7 @@
"esbuild": "^0.12.21", "esbuild": "^0.12.21",
"eslint": "^7.32.0", "eslint": "^7.32.0",
"eslint-config-airbnb-base": "^14.2.1", "eslint-config-airbnb-base": "^14.2.1",
"eslint-plugin-import": "^2.24.0", "eslint-plugin-import": "^2.24.1",
"eslint-plugin-json": "^3.1.0", "eslint-plugin-json": "^3.1.0",
"eslint-plugin-node": "^11.1.0", "eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^5.1.0", "eslint-plugin-promise": "^5.1.0",

View File

@ -11,8 +11,9 @@ import { Config } from '../config';
let model: GraphModel; let model: GraphModel;
type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] }; type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
const keypoints: Array<Keypoints> = []; const keypoints: Array<Keypoints> = [];
type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
let box: [number, number, number, number] = [0, 0, 0, 0]; let box: [number, number, number, number] = [0, 0, 0, 0];
let boxRaw: [number, number, number, number] = [0, 0, 0, 0]; let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
let score = 0; let score = 0;
@ -29,28 +30,8 @@ export async function load(config: Config): Promise<GraphModel> {
return model; return model;
} }
export async function predict(image: Tensor, config: Config): Promise<Body[]> { async function parseSinglePose(res, config, image) {
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
skipped++;
return [{ id: 0, score, box, boxRaw, keypoints }];
}
skipped = 0;
return new Promise(async (resolve) => {
const tensor = tf.tidy(() => {
if (!model.inputs[0].shape) return null;
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
const cast = tf.cast(resize, 'int32');
return cast;
});
let resT;
if (config.body.enabled) resT = await model.predict(tensor);
tf.dispose(tensor);
if (resT) {
keypoints.length = 0; keypoints.length = 0;
const res = await resT.array();
tf.dispose(resT);
const kpt = res[0][0]; const kpt = res[0][0];
for (let id = 0; id < kpt.length; id++) { for (let id = 0; id < kpt.length; id++) {
score = kpt[id][2]; score = kpt[id][2];
@ -69,7 +50,6 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
}); });
} }
} }
}
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
const x = keypoints.map((a) => a.position[0]); const x = keypoints.map((a) => a.position[0]);
const y = keypoints.map((a) => a.position[1]); const y = keypoints.map((a) => a.position[1]);
@ -87,6 +67,80 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
Math.max(...xRaw) - Math.min(...xRaw), Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw), Math.max(...yRaw) - Math.min(...yRaw),
]; ];
resolve([{ id: 0, score, box, boxRaw, keypoints }]); const persons: Array<Person> = [];
persons.push({ id: 0, score, box, boxRaw, keypoints });
return persons;
}
async function parseMultiPose(res, config, image) {
const persons: Array<Person> = [];
for (let p = 0; p < res[0].length; p++) {
const kpt = res[0][p];
score = Math.round(100 * kpt[51 + 4]) / 100;
// eslint-disable-next-line no-continue
if (score < config.body.minConfidence) continue;
keypoints.length = 0;
for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt[3 * i + 2]) / 100;
if (partScore > config.body.minConfidence) {
keypoints.push({
part: bodyParts[i],
score: partScore,
positionRaw: [
kpt[3 * i + 1],
kpt[3 * i + 0],
],
position: [
Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)),
Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0)),
],
});
}
}
boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
persons.push({
id: p,
score,
boxRaw,
box: [
Math.trunc(boxRaw[0] * (image.shape[2] || 0)),
Math.trunc(boxRaw[1] * (image.shape[1] || 0)),
Math.trunc(boxRaw[2] * (image.shape[2] || 0)),
Math.trunc(boxRaw[3] * (image.shape[1] || 0)),
],
keypoints,
});
}
return persons;
}
export async function predict(image: Tensor, config: Config): Promise<Body[]> {
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
skipped++;
return [{ id: 0, score, box, boxRaw, keypoints }];
}
skipped = 0;
return new Promise(async (resolve) => {
const tensor = tf.tidy(() => {
if (!model.inputs[0].shape) return null;
let inputSize = model.inputs[0].shape[2];
if (inputSize === -1) inputSize = 256;
const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
const cast = tf.cast(resize, 'int32');
return cast;
});
let resT;
if (config.body.enabled) resT = await model.predict(tensor);
tf.dispose(tensor);
if (!resT) resolve([]);
const res = await resT.array();
let persons;
if (resT.shape[2] === 17) persons = await parseSinglePose(res, config, image);
else if (resT.shape[2] === 56) persons = await parseMultiPose(res, config, image);
tf.dispose(resT);
resolve(persons);
}); });
} }

2
wiki

@ -1 +1 @@
Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8 Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4