diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b2b42ed..357aad46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,12 +9,13 @@ Repository: **** ## Changelog +### **HEAD -> main** 2021/08/19 mandic00@live.com + + ### **2.1.4** 2021/08/19 mandic00@live.com - add static type definitions to main class - -### **origin/main** 2021/08/18 mandic00@live.com - +- fix interpolation overflow - rebuild full - improve face box caching - strict type checks diff --git a/TODO.md b/TODO.md index 130cc989..ac97e4b2 100644 --- a/TODO.md +++ b/TODO.md @@ -7,10 +7,6 @@ WebGL shader optimizations for faster load and initial detection - Implement WebGL uniforms for shaders: - Fix shader packing: -MoveNet MultiPose Model: - -- Implementation is ready, but model is 2x size and 0.5x performance -
## Exploring @@ -45,16 +41,24 @@ Feature is automatically disabled in NodeJS without user impact - Backend NodeJS missing kernel op `FlipLeftRight` - *Target: `Human` v2.2 with `TFJS` v3.9* + *Target: `Human` v2.2 with `TFJS` v3.9* - Backend NodeJS missing kernel op `RotateWithOffset` - *Target: N/A* + *Target: N/A* Hand detection using WASM backend has reduced precision due to math rounding errors in backend *Target: N/A*
+### Body Detection + +MoveNet MultiPose model does not work with WASM backend due to missing F32 implementation + +- Backend WASM missing F32 implementation + + *Target: N/A* + ### Object Detection Object detection using CenterNet or NanoDet models is not working when using WASM backend due to missing kernel ops in TFJS diff --git a/demo/index.js b/demo/index.js index 9aec5489..4689c487 100644 --- a/demo/index.js +++ b/demo/index.js @@ -51,6 +51,7 @@ let userConfig = { gesture: { enabled: false }, hand: { enabled: false }, body: { enabled: false }, + // body: { enabled: true, modelPath: 'movenet-multipose.json' }, // body: { enabled: true, modelPath: 'posenet.json' }, segmentation: { enabled: false }, */ diff --git a/package.json b/package.json index 1ece6075..fad861a7 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,7 @@ "esbuild": "^0.12.21", "eslint": "^7.32.0", "eslint-config-airbnb-base": "^14.2.1", - "eslint-plugin-import": "^2.24.0", + "eslint-plugin-import": "^2.24.1", "eslint-plugin-json": "^3.1.0", "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^5.1.0", diff --git a/src/movenet/movenet.ts b/src/movenet/movenet.ts index b4cc29ac..9fb4d957 100644 --- a/src/movenet/movenet.ts +++ b/src/movenet/movenet.ts @@ -11,8 +11,9 @@ import { Config } from '../config'; let model: GraphModel; type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] }; - const keypoints: Array = []; +type Person = { id: number, score: number, box: [number, number, number, number], boxRaw: [number, number, number, number], keypoints: Array } + let box: [number, number, number, number] = [0, 0, 0, 0]; let boxRaw: [number, number, number, number] = [0, 0, 0, 0]; let score = 0; @@ -29,6 +30,90 @@ export async function load(config: Config): Promise { return model; } +async function parseSinglePose(res, config, image) { + keypoints.length = 0; + const kpt = res[0][0]; + for (let id = 0; id < kpt.length; id++) { + score = kpt[id][2]; + if (score > config.body.minConfidence) { + keypoints.push({ + score: Math.round(100 * score) / 100, + part: bodyParts[id], + positionRaw: [ // normalized to 0..1 + kpt[id][1], + kpt[id][0], + ], + position: [ // normalized to input image size + Math.round((image.shape[2] || 0) * kpt[id][1]), + Math.round((image.shape[1] || 0) * kpt[id][0]), + ], + }); + } + } + score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); + const x = keypoints.map((a) => a.position[0]); + const y = keypoints.map((a) => a.position[1]); + box = [ + Math.min(...x), + Math.min(...y), + Math.max(...x) - Math.min(...x), + Math.max(...y) - Math.min(...y), + ]; + const xRaw = keypoints.map((a) => a.positionRaw[0]); + const yRaw = keypoints.map((a) => a.positionRaw[1]); + boxRaw = [ + Math.min(...xRaw), + Math.min(...yRaw), + Math.max(...xRaw) - Math.min(...xRaw), + Math.max(...yRaw) - Math.min(...yRaw), + ]; + const persons: Array = []; + persons.push({ id: 0, score, box, boxRaw, keypoints }); + return persons; +} + +async function parseMultiPose(res, config, image) { + const persons: Array = []; + for (let p = 0; p < res[0].length; p++) { + const kpt = res[0][p]; + score = Math.round(100 * kpt[51 + 4]) / 100; + // eslint-disable-next-line no-continue + if (score < config.body.minConfidence) continue; + keypoints.length = 0; + for (let i = 0; i < 17; i++) { + const partScore = Math.round(100 * kpt[3 * i + 2]) / 100; + if (partScore > config.body.minConfidence) { + keypoints.push({ + part: bodyParts[i], + score: partScore, + positionRaw: [ + kpt[3 * i + 1], + kpt[3 * i + 0], + ], + position: [ + Math.trunc(kpt[3 * i + 1] * (image.shape[2] || 0)), + Math.trunc(kpt[3 * i + 0] * (image.shape[1] || 0)), + ], + }); + } + } + boxRaw = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; + persons.push({ + id: p, + score, + boxRaw, + box: [ + Math.trunc(boxRaw[0] * (image.shape[2] || 0)), + Math.trunc(boxRaw[1] * (image.shape[1] || 0)), + Math.trunc(boxRaw[2] * (image.shape[2] || 0)), + Math.trunc(boxRaw[3] * (image.shape[1] || 0)), + ], + keypoints, + }); + } + return persons; +} + export async function predict(image: Tensor, config: Config): Promise { if ((skipped < config.body.skipFrames) && config.skipFrame && Object.keys(keypoints).length > 0) { skipped++; @@ -38,7 +123,9 @@ export async function predict(image: Tensor, config: Config): Promise { return new Promise(async (resolve) => { const tensor = tf.tidy(() => { if (!model.inputs[0].shape) return null; - const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); + let inputSize = model.inputs[0].shape[2]; + if (inputSize === -1) inputSize = 256; + const resize = tf.image.resizeBilinear(image, [inputSize, inputSize], false); const cast = tf.cast(resize, 'int32'); return cast; }); @@ -47,46 +134,13 @@ export async function predict(image: Tensor, config: Config): Promise { if (config.body.enabled) resT = await model.predict(tensor); tf.dispose(tensor); - if (resT) { - keypoints.length = 0; - const res = await resT.array(); - tf.dispose(resT); - const kpt = res[0][0]; - for (let id = 0; id < kpt.length; id++) { - score = kpt[id][2]; - if (score > config.body.minConfidence) { - keypoints.push({ - score: Math.round(100 * score) / 100, - part: bodyParts[id], - positionRaw: [ // normalized to 0..1 - kpt[id][1], - kpt[id][0], - ], - position: [ // normalized to input image size - Math.round((image.shape[2] || 0) * kpt[id][1]), - Math.round((image.shape[1] || 0) * kpt[id][0]), - ], - }); - } - } - } - score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); - const x = keypoints.map((a) => a.position[0]); - const y = keypoints.map((a) => a.position[1]); - box = [ - Math.min(...x), - Math.min(...y), - Math.max(...x) - Math.min(...x), - Math.max(...y) - Math.min(...y), - ]; - const xRaw = keypoints.map((a) => a.positionRaw[0]); - const yRaw = keypoints.map((a) => a.positionRaw[1]); - boxRaw = [ - Math.min(...xRaw), - Math.min(...yRaw), - Math.max(...xRaw) - Math.min(...xRaw), - Math.max(...yRaw) - Math.min(...yRaw), - ]; - resolve([{ id: 0, score, box, boxRaw, keypoints }]); + if (!resT) resolve([]); + const res = await resT.array(); + let persons; + if (resT.shape[2] === 17) persons = await parseSinglePose(res, config, image); + else if (resT.shape[2] === 56) persons = await parseMultiPose(res, config, image); + tf.dispose(resT); + + resolve(persons); }); } diff --git a/wiki b/wiki index bdc4077a..c12e036a 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit bdc4077a3df07abdf4a2d5b2d2beadf2e573e8d8 +Subproject commit c12e036ac382043f4b3a85cf71f93927af56cfe4