mirror of https://github.com/vladmandic/human
implemented movenet-multipose model
parent
aabe01f9b0
commit
070bb3a2c1
|
@ -9,12 +9,13 @@ Repository: **<git+https://github.com/vladmandic/human.git>**
|
||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
|
### **HEAD -> main** 2021/08/19 mandic00@live.com
|
||||||
|
|
||||||
|
|
||||||
### **2.1.4** 2021/08/19 mandic00@live.com
|
### **2.1.4** 2021/08/19 mandic00@live.com
|
||||||
|
|
||||||
- add static type definitions to main class
|
- add static type definitions to main class
|
||||||
|
- fix interpolation overflow
|
||||||
### **origin/main** 2021/08/18 mandic00@live.com
|
|
||||||
|
|
||||||
- rebuild full
|
- rebuild full
|
||||||
- improve face box caching
|
- improve face box caching
|
||||||
- strict type checks
|
- strict type checks
|
||||||
|
|
16
TODO.md
16
TODO.md
|
@ -7,10 +7,6 @@ WebGL shader optimizations for faster load and initial detection
|
||||||
- Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205>
|
- Implement WebGL uniforms for shaders: <https://github.com/tensorflow/tfjs/issues/5205>
|
||||||
- Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343>
|
- Fix shader packing: <https://github.com/tensorflow/tfjs/issues/5343>
|
||||||
|
|
||||||
MoveNet MultiPose Model: <https://github.com/vladmandic/movenet>
|
|
||||||
|
|
||||||
- Implementation is ready, but model is 2x size and 0.5x performance
|
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
## Exploring
|
## Exploring
|
||||||
|
@ -45,16 +41,24 @@ Feature is automatically disabled in NodeJS without user impact
|
||||||
|
|
||||||
- Backend NodeJS missing kernel op `FlipLeftRight`
|
- Backend NodeJS missing kernel op `FlipLeftRight`
|
||||||
<https://github.com/tensorflow/tfjs/issues/4066>
|
<https://github.com/tensorflow/tfjs/issues/4066>
|
||||||
*Target: `Human` v2.2 with `TFJS` v3.9*
|
*Target: `Human` v2.2 with `TFJS` v3.9*
|
||||||
- Backend NodeJS missing kernel op `RotateWithOffset`
|
- Backend NodeJS missing kernel op `RotateWithOffset`
|
||||||
<https://github.com/tensorflow/tfjs/issues/5473>
|
<https://github.com/tensorflow/tfjs/issues/5473>
|
||||||
*Target: N/A*
|
*Target: N/A*
|
||||||
|
|
||||||
Hand detection using WASM backend has reduced precision due to math rounding errors in backend
|
Hand detection using WASM backend has reduced precision due to math rounding errors in backend
|
||||||
*Target: N/A*
|
*Target: N/A*
|
||||||
|
|
||||||
<br>
|
<br>
|
||||||
|
|
||||||
|
### Body Detection
|
||||||
|
|
||||||
|
MoveNet MultiPose model does not work with WASM backend due to missing F32 implementation
|
||||||
|
|
||||||
|
- Backend WASM missing F32 implementation
|
||||||
|
<https://github.com/tensorflow/tfjs/issues/5516>
|
||||||
|
*Target: N/A*
|
||||||
|
|
||||||
### Object Detection
|
### Object Detection
|
||||||
|
|
||||||
Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS
|
Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS
|
||||||
|
|
|
@ -51,6 +51,7 @@ let userConfig = {
|
||||||
gesture: { enabled: false },
|
gesture: { enabled: false },
|
||||||
hand: { enabled: false },
|
hand: { enabled: false },
|
||||||
body: { enabled: false },
|
body: { enabled: false },
|
||||||
|
// body: { enabled: true, modelPath: 'movenet-multipose.json' },
|
||||||
// body: { enabled: true, modelPath: 'posenet.json' },
|
// body: { enabled: true, modelPath: 'posenet.json' },
|
||||||
segmentation: { enabled: false },
|
segmentation: { enabled: false },
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -76,7 +76,7 @@
|
||||||
"esbuild": "^0.12.21",
|
"esbuild": "^0.12.21",
|
||||||
"eslint": "^7.32.0",
|
"eslint": "^7.32.0",
|
||||||
"eslint-config-airbnb-base": "^14.2.1",
|
"eslint-config-airbnb-base": "^14.2.1",
|
||||||
"eslint-plugin-import": "^2.24.0",
|
"eslint-plugin-import": "^2.24.1",
|
||||||
"eslint-plugin-json": "^3.1.0",
|
"eslint-plugin-json": "^3.1.0",
|
||||||
"eslint-plugin-node": "^11.1.0",
|
"eslint-plugin-node": "^11.1.0",
|
||||||
"eslint-plugin-promise": "^5.1.0",
|
"eslint-plugin-promise": "^5.1.0",
|
||||||
|
|
|
@ -11,8 +11,9 @@ import { Config } from '../config';
|
||||||
let model: GraphModel;
|
let model: GraphModel;
|
||||||
|
|
||||||
type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
|
type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
|
||||||
|
|
||||||
const keypoints: Array<Keypoints> = [];
|
const keypoints: Array<Keypoints> = [];
|
||||||
|
type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array<Keypoints> }
|
||||||
|
|
||||||
let box: [number, number, number, number] = [0, 0, 0, 0];
|
let box: [number, number, number, number] = [0, 0, 0, 0];
|
||||||
let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
|
let boxRaw: [number, number, number, number] = [0, 0, 0, 0];
|
||||||
let score = 0;
|
let score = 0;
|
||||||
|
@ -29,6 +30,90 @@ export async function load(config: Config): Promise<GraphModel> {
|
||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function parseSinglePose(res, config, image) {
|
||||||
|
keypoints.length = 0;
|
||||||
|
const kpt = res[0][0];
|
||||||
|
for (let id = 0; id < kpt.length; id++) {
|
||||||
|
score = kpt[id][2];
|
||||||
|
if (score > config.body.minConfidence) {
|
||||||
|
keypoints.push({
|
||||||
|
score: Math.round(100 * score) / 100,
|
||||||
|
part: bodyParts[id],
|
||||||
|
positionRaw: [ // normalized to 0..1
|
||||||
|
kpt[id][1],
|
||||||
|
kpt[id][0],
|
||||||
|
],
|
||||||
|
position: [ // normalized to input image size
|
||||||
|
Math.round((image.shape[2] || 0) * kpt[id][1]),
|
||||||
|
Math.round((image.shape[1] || 0) * kpt[id][0]),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
||||||
|
const x = keypoints.map((a) => a.position[0]);
|
||||||
|
const y = keypoints.map((a) => a.position[1]);
|
||||||
|
box = [
|
||||||
|
Math.min(...x),
|
||||||
|
Math.min(...y),
|
||||||
|
Math.max(...x) - Math.min(...x),
|
||||||
|
Math.max(...y) - Math.min(...y),
|
||||||
|
];
|
||||||
|
const xRaw = keypoints.map((a) => a.positionRaw[0]);
|
||||||
|
const yRaw = keypoints.map((a) => a.positionRaw[1]);
|
||||||
|
boxRaw = [
|
||||||
|
Math.min(...xRaw),
|
||||||
|
Math.min(...yRaw),
|
||||||
|
Math.max(...xRaw) - Math.min(...xRaw),
|
||||||
|
Math.max(...yRaw) - Math.min(...yRaw),
|
||||||
|
];
|
||||||
|
const persons: Array<Person> = [];
|
||||||
|
persons.push({ id: 0, score, box, boxRaw, keypoints });
|
||||||
|
return persons;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function parseMultiPose(res, config, image) {
|
||||||
|
const persons: Array<Person> = [];
|
||||||
|
for (let p = 0; p < res[0].length; p++) {
|
||||||
|
const kpt = res[0][p];
|
||||||
|
score = Math.round(100 * kpt[51 + 4]) / 100;
|
||||||
|
// eslint-disable-next-line no-continue
|
||||||
|
if (score < config.body.minConfidence) continue;
|
||||||
|
keypoints.length = 0;
|
||||||
|
for (let i = 0; i < 17; i++) {
|
||||||
|
const partScore = Math.round(100 * kpt[3 * i + 2]) / 100;
|
||||||
|
if (partScore > config.body.minConfidence) {
|
||||||
|
keypoints.push({
|
||||||
|
part: bodyParts[i],
|
||||||
|
score: partScore,
|
||||||
|
positionRaw: [
|
||||||
|
kpt[3 * i + 1],
|
||||||
|
kpt[3 * i + 0],
|
||||||
|
],
|
||||||
|
position: [
|
||||||
|
Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)),
|
||||||
|
Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0)),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
|
||||||
|
persons.push({
|
||||||
|
id: p,
|
||||||
|
score,
|
||||||
|
boxRaw,
|
||||||
|
box: [
|
||||||
|
Math.trunc(boxRaw[0] * (image.shape[2] || 0)),
|
||||||
|
Math.trunc(boxRaw[1] * (image.shape[1] || 0)),
|
||||||
|
Math.trunc(boxRaw[2] * (image.shape[2] || 0)),
|
||||||
|
Math.trunc(boxRaw[3] * (image.shape[1] || 0)),
|
||||||
|
],
|
||||||
|
keypoints,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return persons;
|
||||||
|
}
|
||||||
|
|
||||||
export async function predict(image: Tensor, config: Config): Promise<Body[]> {
|
export async function predict(image: Tensor, config: Config): Promise<Body[]> {
|
||||||
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
|
if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) {
|
||||||
skipped++;
|
skipped++;
|
||||||
|
@ -38,7 +123,9 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
const tensor = tf.tidy(() => {
|
const tensor = tf.tidy(() => {
|
||||||
if (!model.inputs[0].shape) return null;
|
if (!model.inputs[0].shape) return null;
|
||||||
const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false);
|
let inputSize = model.inputs[0].shape[2];
|
||||||
|
if (inputSize === -1) inputSize = 256;
|
||||||
|
const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false);
|
||||||
const cast = tf.cast(resize, 'int32');
|
const cast = tf.cast(resize, 'int32');
|
||||||
return cast;
|
return cast;
|
||||||
});
|
});
|
||||||
|
@ -47,46 +134,13 @@ export async function predict(image: Tensor, config: Config): Promise<Body[]> {
|
||||||
if (config.body.enabled) resT = await model.predict(tensor);
|
if (config.body.enabled) resT = await model.predict(tensor);
|
||||||
tf.dispose(tensor);
|
tf.dispose(tensor);
|
||||||
|
|
||||||
if (resT) {
|
if (!resT) resolve([]);
|
||||||
keypoints.length = 0;
|
const res = await resT.array();
|
||||||
const res = await resT.array();
|
let persons;
|
||||||
tf.dispose(resT);
|
if (resT.shape[2] === 17) persons = await parseSinglePose(res, config, image);
|
||||||
const kpt = res[0][0];
|
else if (resT.shape[2] === 56) persons = await parseMultiPose(res, config, image);
|
||||||
for (let id = 0; id < kpt.length; id++) {
|
tf.dispose(resT);
|
||||||
score = kpt[id][2];
|
|
||||||
if (score > config.body.minConfidence) {
|
resolve(persons);
|
||||||
keypoints.push({
|
|
||||||
score: Math.round(100 * score) / 100,
|
|
||||||
part: bodyParts[id],
|
|
||||||
positionRaw: [ // normalized to 0..1
|
|
||||||
kpt[id][1],
|
|
||||||
kpt[id][0],
|
|
||||||
],
|
|
||||||
position: [ // normalized to input image size
|
|
||||||
Math.round((image.shape[2] || 0) * kpt[id][1]),
|
|
||||||
Math.round((image.shape[1] || 0) * kpt[id][0]),
|
|
||||||
],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
|
|
||||||
const x = keypoints.map((a) => a.position[0]);
|
|
||||||
const y = keypoints.map((a) => a.position[1]);
|
|
||||||
box = [
|
|
||||||
Math.min(...x),
|
|
||||||
Math.min(...y),
|
|
||||||
Math.max(...x) - Math.min(...x),
|
|
||||||
Math.max(...y) - Math.min(...y),
|
|
||||||
];
|
|
||||||
const xRaw = keypoints.map((a) => a.positionRaw[0]);
|
|
||||||
const yRaw = keypoints.map((a) => a.positionRaw[1]);
|
|
||||||
boxRaw = [
|
|
||||||
Math.min(...xRaw),
|
|
||||||
Math.min(...yRaw),
|
|
||||||
Math.max(...xRaw) - Math.min(...xRaw),
|
|
||||||
Math.max(...yRaw) - Math.min(...yRaw),
|
|
||||||
];
|
|
||||||
resolve([{ id: 0, score, box, boxRaw, keypoints }]);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
2
wiki
2
wiki
|
@ -1 +1 @@
|
||||||
Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8
|
Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4
|
Loading…
Reference in New Issue