implemented movenet-multipose model

pull/356/head
Vladimir Mandic 2021-08-20 09:05:07 -04:00
parent 4f5ee67431
commit 54d717bbff
6 changed files with 114 additions and 54 deletions

View File

@ -9,12 +9,13 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
## Changelog ## Changelog
### **HEAD -> main** 2021/08/19 mandic00@live.com
### **2.1.4** 2021/08/19 mandic00@live.com ### **2.1.4** 2021/08/19 mandic00@live.com
- add static type definitions to main class - add static type definitions to main class
- fix interpolation overflow
### **origin/main** 2021/08/18 mandic00@live.com
- rebuild full - rebuild full
- improve face box caching - improve face box caching
- strict type checks - strict type checks

16
TODO.md
View File

@ -7,10 +7,6 @@ WebGL shader optimizations for faster load and initial detection
- Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205> - Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205>
- Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343> - Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343>
MoveNet MultiPose Model: <https://github.com/vladmandic/movenet>
- Implementation is ready, but model is 2x size and 0.5x performance
<br> <br>
## Exploring ## Exploring
@ -45,16 +41,24 @@ Feature is automatically disabled in NodeJS without user impact
- Backend NodeJS missing kernel op `FlipLeftRight` - Backend NodeJS missing kernel op `FlipLeftRight`
<https://github.com/tensorflow/tfjs/issues/4066> <https://github.com/tensorflow/tfjs/issues/4066>
*Target: `Human` v2.2 with `TFJS` v3.9* *Target: `Human` v2.2 with `TFJS` v3.9*
- Backend NodeJS missing kernel op `RotateWithOffset` - Backend NodeJS missing kernel op `RotateWithOffset`
<https://github.com/tensorflow/tfjs/issues/5473> <https://github.com/tensorflow/tfjs/issues/5473>
*Target: N/A* *Target: N/A*
Hand detection using WASM backend has reduced precision due to math rounding errors in backend Hand detection using WASM backend has reduced precision due to math rounding errors in backend
*Target: N/A* *Target: N/A*
<br> <br>
### Body Detection
MoveNet MultiPose model does not work with WASM backend due to missing F32 implementation
- Backend WASM missing F32 implementation
<https://github.com/tensorflow/tfjs/issues/5516>
*Target: N/A*
### Object Detection ### Object Detection
Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS

View File

@ -51,6 +51,7 @@ let userConfig = {
gesture: { enabled: false }, gesture: { enabled: false },
hand: { enabled: false }, hand: { enabled: false },
body: { enabled: false }, body: { enabled: false },
// body: { enabled: true, modelPath: 'movenet-multipose.json' },
// body: { enabled: true, modelPath: 'posenet.json' }, // body: { enabled: true, modelPath: 'posenet.json' },
segmentation: { enabled: false }, segmentation: { enabled: false },
*/ */

View File

@ -76,7 +76,7 @@
"esbuild": "^0.12.21", "esbuild": "^0.12.21",
"eslint": "^7.32.0", "eslint": "^7.32.0",
"eslint-config-airbnb-base": "^14.2.1", "eslint-config-airbnb-base": "^14.2.1",
"eslint-plugin-import": "^2.24.0", "eslint-plugin-import": "^2.24.1",
"eslint-plugin-json": "^3.1.0", "eslint-plugin-json": "^3.1.0",
"eslint-plugin-node": "^11.1.0", "eslint-plugin-node": "^11.1.0",
"eslint-plugin-promise": "^5.1.0", "eslint-plugin-promise": "^5.1.0",

View File

@ -11,8 +11,9 @@ import { Config } from '../config';
let model: GraphModel; let model: GraphModel;
type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] }; type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
const keypoints: Array<Keypoints> = []; const keypoints: Array<Keypoints> = [];
type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
let box: [number, number, number, number] = [0, 0, 0, 0]; let box: [number, number, number, number] = [0, 0, 0, 0];
let boxRaw: [number, number, number, number] = [0, 0, 0, 0]; let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
let score = 0; let score = 0;
@ -29,6 +30,90 @@ export async function load(config: Config): Promise<GraphModel> {
return model; return model;
} }
async function parseSinglePose(res, config, image) {
keypoints.length = 0;
const kpt = res[0][0];
for (let id = 0; id < kpt.length; id++) {
score = kpt[id][2];
if (score > config.body.minConfidence) {
keypoints.push({
score: Math.round(100 * score) / 100,
part: bodyParts[id],
positionRaw: [ // normalized to 0..1
kpt[id][1],
kpt[id][0],
],
position: [ // normalized to input image size
Math.round((image.shape[2] || 0) * kpt[id][1]),
Math.round((image.shape[1] || 0) * kpt[id][0]),
],
});
}
}
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
const x = keypoints.map((a) => a.position[0]);
const y = keypoints.map((a) => a.position[1]);
box = [
Math.min(...x),
Math.min(...y),
Math.max(...x) - Math.min(...x),
Math.max(...y) - Math.min(...y),
];
const xRaw = keypoints.map((a) => a.positionRaw[0]);
const yRaw = keypoints.map((a) => a.positionRaw[1]);
boxRaw = [
Math.min(...xRaw),
Math.min(...yRaw),
Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw),
];
const persons: Array<Person> = [];
persons.push({ id: 0, score, box, boxRaw, keypoints });
return persons;
}
async function parseMultiPose(res, config, image) {
const persons: Array<Person> = [];
for (let p = 0; p < res[0].length; p++) {
const kpt = res[0][p];
score = Math.round(100 * kpt[51 + 4]) / 100;
// eslint-disable-next-line no-continue
if (score < config.body.minConfidence) continue;
keypoints.length = 0;
for (let i = 0; i < 17; i++) {
const partScore = Math.round(100 * kpt[3 * i + 2]) / 100;
if (partScore > config.body.minConfidence) {
keypoints.push({
part: bodyParts[i],
score: partScore,
positionRaw: [
kpt[3 * i + 1],
kpt[3 * i + 0],
],
position: [
Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)),
Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0)),
],
});
}
}
boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
persons.push({
id: p,
score,
boxRaw,
box: [
Math.trunc(boxRaw[0] * (image.shape[2] || 0)),
Math.trunc(boxRaw[1] * (image.shape[1] || 0)),
Math.trunc(boxRaw[2] * (image.shape[2] || 0)),
Math.trunc(boxRaw[3] * (image.shape[1] || 0)),
],
keypoints,
});
}
return persons;
}
export async function predict(image: Tensor, config: Config): Promise<Body[]> { export async function predict(image: Tensor, config: Config): Promise<Body[]> {
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) { if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
skipped++; skipped++;
@ -38,7 +123,9 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
return new Promise(async (resolve) => { return new Promise(async (resolve) => {
const tensor = tf.tidy(() => { const tensor = tf.tidy(() => {
if (!model.inputs[0].shape) return null; if (!model.inputs[0].shape) return null;
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); let inputSize = model.inputs[0].shape[2];
if (inputSize === -1) inputSize = 256;
const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
const cast = tf.cast(resize, 'int32'); const cast = tf.cast(resize, 'int32');
return cast; return cast;
}); });
@ -47,46 +134,13 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
if (config.body.enabled) resT = await model.predict(tensor); if (config.body.enabled) resT = await model.predict(tensor);
tf.dispose(tensor); tf.dispose(tensor);
if (resT) { if (!resT) resolve([]);
keypoints.length = 0; const res = await resT.array();
const res = await resT.array(); let persons;
tf.dispose(resT); if (resT.shape[2] === 17) persons = await parseSinglePose(res, config, image);
const kpt = res[0][0]; else if (resT.shape[2] === 56) persons = await parseMultiPose(res, config, image);
for (let id = 0; id < kpt.length; id++) { tf.dispose(resT);
score = kpt[id][2];
if (score > config.body.minConfidence) { resolve(persons);
keypoints.push({
score: Math.round(100 * score) / 100,
part: bodyParts[id],
positionRaw: [ // normalized to 0..1
kpt[id][1],
kpt[id][0],
],
position: [ // normalized to input image size
Math.round((image.shape[2] || 0) * kpt[id][1]),
Math.round((image.shape[1] || 0) * kpt[id][0]),
],
});
}
}
}
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
const x = keypoints.map((a) => a.position[0]);
const y = keypoints.map((a) => a.position[1]);
box = [
Math.min(...x),
Math.min(...y),
Math.max(...x) - Math.min(...x),
Math.max(...y) - Math.min(...y),
];
const xRaw = keypoints.map((a) => a.positionRaw[0]);
const yRaw = keypoints.map((a) => a.positionRaw[1]);
boxRaw = [
Math.min(...xRaw),
Math.min(...yRaw),
Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw),
];
resolve([{ id: 0, score, box, boxRaw, keypoints }]);
}); });
} }

2
wiki

@ -1 +1 @@
Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8 Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4