diff --git a/CHANGELOG.md b/CHANGELOG.md index 09ec1c17..fc904112 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,14 +11,13 @@ ### **HEAD -> main** 2021/11/23 mandic00@live.com +- fix face box scaling on detection - cleanup ### **2.5.4** 2021/11/22 mandic00@live.com - prototype blazepose detector - -### **origin/main** 2021/11/21 mandic00@live.com - +- minor fixes - add body 3d interpolation - edit blazepose keypoints - new build process diff --git a/demo/typescript/index.ts b/demo/typescript/index.ts index a58ff182..784dd0e0 100644 --- a/demo/typescript/index.ts +++ b/demo/typescript/index.ts @@ -10,7 +10,7 @@ import { Human, Config } from '../../dist/human.esm.js'; // equivalent of @vladmandic/Human const humanConfig: Partial = { // user configuration for human, used to fine-tune behavior - // backend: 'webgpu' as 'webgpu, + // backend: 'webgpu' as const, // async: true, modelBasePath: '../../models', filter: { enabled: true, equalization: false }, diff --git a/package.json b/package.json index d07ee729..0ad5361d 100644 --- a/package.json +++ b/package.json @@ -65,7 +65,7 @@ "@tensorflow/tfjs-layers": "^3.11.0", "@tensorflow/tfjs-node": "^3.11.0", "@tensorflow/tfjs-node-gpu": "^3.11.0", - "@types/node": "^16.11.9", + "@types/node": "^16.11.10", "@types/offscreencanvas": "^2019.6.4", "@typescript-eslint/eslint-plugin": "^5.4.0", "@typescript-eslint/parser": "^5.4.0", diff --git a/src/body/blazepose.ts b/src/body/blazepose.ts index 17d98e5a..b21f8649 100644 --- a/src/body/blazepose.ts +++ b/src/body/blazepose.ts @@ -10,8 +10,7 @@ import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import * as coords from './blazeposecoords'; import * as detect from './blazeposedetector'; - -interface DetectedBox { box: Box, boxRaw: Box, score: number } +import * as box from '../util/box'; const env = { initial: true }; // const models: [GraphModel | null, GraphModel | null] = [null, null]; @@ -24,7 +23,7 @@ const outputNodes: { detector: string[], landmarks: string[] } = { }; let cache: BodyResult | null = null; -let lastBox: Box | undefined; +let cropBox: Box | undefined; let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]]; let lastTime = 0; @@ -63,50 +62,43 @@ export async function load(config: Config): Promise<[GraphModel | null, GraphMod return [models.detector, models.landmarks]; } -function calculateBoxes(keypoints: Array, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } { - const x = keypoints.map((a) => a.position[0]); - const y = keypoints.map((a) => a.position[1]); - const keypointsBox: Box = [Math.min(...x), Math.min(...y), Math.max(...x) - Math.min(...x), Math.max(...y) - Math.min(...y)]; - const keypointsBoxRaw: Box = [keypointsBox[0] / outputSize[0], keypointsBox[1] / outputSize[1], keypointsBox[2] / outputSize[0], keypointsBox[3] / outputSize[1]]; - return { keypointsBox, keypointsBoxRaw }; -} - -async function prepareImage(input: Tensor, size: number, box?: Box): Promise { +async function prepareImage(input: Tensor, size: number): Promise { const t: Record = {}; if (!input.shape || !input.shape[1] || !input.shape[2]) return input; let final: Tensor; + if (cropBox) { + t.cropped = tf.image.cropAndResize(input, [cropBox], [0], [input.shape[1], input.shape[2]]); // if we have cached box use it to crop input + } if (input.shape[1] !== input.shape[2]) { // only pad if width different than height - const height: [number, number] = box - ? [Math.trunc(input.shape[1] * box[1]), Math.trunc(input.shape[1] * (box[1] + box[3]))] - : [input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0]; - const width: [number, number] = box - ? [Math.trunc(input.shape[2] * box[0]), Math.trunc(input.shape[2] * (box[0] + box[2]))] - : [input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0]; + const height: [number, number] = [ + input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, + input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, + ]; + const width: [number, number] = [ + input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, + input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, + ]; padding = [ [0, 0], // dont touch batch height, // height before&after width, // width before&after [0, 0], // dont touch rbg ]; - if (box) { - t.resize = tf.image.cropAndResize(input, [box], [0], [size, size]); - } else { - t.pad = tf.pad(input, padding); - t.resize = tf.image.resizeBilinear(t.pad, [size, size]); - } + t.pad = tf.pad(t.cropped || input, padding); // use cropped box if it exists + t.resize = tf.image.resizeBilinear(t.pad, [size, size]); final = tf.div(t.resize, constants.tf255); } else if (input.shape[1] !== size) { // if input needs resizing - t.resize = tf.image.resizeBilinear(input, [size, size]); + t.resize = tf.image.resizeBilinear(t.cropped || input, [size, size]); final = tf.div(t.resize, constants.tf255); } else { // if input is already in a correct resolution just normalize it - final = tf.div(input, constants.tf255); + final = tf.div(t.cropped || input, constants.tf255); } Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); return final; } function rescaleKeypoints(keypoints: Array, outputSize: [number, number]): Array { - for (const kpt of keypoints) { + for (const kpt of keypoints) { // first rescale due to padding kpt.position = [ Math.trunc(kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0]), Math.trunc(kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0]), @@ -114,20 +106,21 @@ function rescaleKeypoints(keypoints: Array, outputSize: [number, n ]; kpt.positionRaw = [kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number]; } - return keypoints; -} - -function rescaleBoxes(boxes: Array, outputSize: [number, number]): Array { - for (const box of boxes) { - box.box = [ - Math.trunc(box.box[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]), - Math.trunc(box.box[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]), - Math.trunc(box.box[2] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]), - Math.trunc(box.box[3] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]), - ]; - box.boxRaw = [box.box[0] / outputSize[0], box.box[1] / outputSize[1], box.box[2] / outputSize[0], box.box[3] / outputSize[1]]; + if (cropBox) { // second rescale due to cropping + for (const kpt of keypoints) { + kpt.positionRaw = [ + kpt.positionRaw[0] + cropBox[1], // correct offset due to crop + kpt.positionRaw[1] + cropBox[0], // correct offset due to crop + kpt.positionRaw[2] as number, + ]; + kpt.position = [ + Math.trunc(kpt.positionRaw[0] * outputSize[0]), + Math.trunc(kpt.positionRaw[1] * outputSize[1]), + kpt.positionRaw[2] as number, + ]; + } } - return boxes; + return keypoints; } async function detectLandmarks(input: Tensor, config: Config, outputSize: [number, number]): Promise { @@ -155,22 +148,38 @@ async function detectLandmarks(input: Tensor, config: Config, outputSize: [numbe } if (poseScore < (config.body.minConfidence || 0)) return null; const keypoints: Array = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is padded - const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints + const kpts = keypoints.map((k) => k.position); + const boxes = box.calc(kpts, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints const annotations: Record = {}; for (const [name, indexes] of Object.entries(coords.connected)) { const pt: Array = []; for (let i = 0; i < indexes.length - 1; i++) { const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]); const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]); - // if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); if (pt0 && pt1) pt.push([pt0.position, pt1.position]); } annotations[name] = pt; } - const body = { id: 0, score: Math.trunc(100 * poseScore) / 100, box: boxes.keypointsBox, boxRaw: boxes.keypointsBoxRaw, keypoints, annotations }; + const body = { id: 0, score: Math.trunc(100 * poseScore) / 100, box: boxes.box, boxRaw: boxes.boxRaw, keypoints, annotations }; return body; } +/* +interface DetectedBox { box: Box, boxRaw: Box, score: number } + +function rescaleBoxes(boxes: Array, outputSize: [number, number]): Array { + for (const b of boxes) { + b.box = [ + Math.trunc(b.box[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]), + Math.trunc(b.box[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]), + Math.trunc(b.box[2] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]), + Math.trunc(b.box[3] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]), + ]; + b.boxRaw = [b.box[0] / outputSize[0], b.box[1] / outputSize[1], b.box[2] / outputSize[0], b.box[3] / outputSize[1]]; + } + return boxes; +} + async function detectBoxes(input: Tensor, config: Config, outputSize: [number, number]) { const t: Record = {}; t.res = models.detector?.execute(input, ['Identity']) as Tensor; // @@ -183,6 +192,7 @@ async function detectBoxes(input: Tensor, config: Config, outputSize: [number, n Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); return boxes; } +*/ export async function predict(input: Tensor, config: Config): Promise { const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0]; @@ -192,33 +202,31 @@ export async function predict(input: Tensor, config: Config): Promise = {}; + /* if (config.body['detector'] && config.body['detector']['enabled']) { t.detector = await prepareImage(input, 224); const boxes = await detectBoxes(t.detector, config, outputSize); - if (boxes && boxes.length === 1) { - t.landmarks = await prepareImage(input, 256, boxes[0].box); // padded and resized according to detector - cache = await detectLandmarks(t.landmarks, config, outputSize); - } - if (cache) cache.score = boxes[0].score; - } else { - t.landmarks = await prepareImage(input, 256, lastBox); // padded and resized - cache = await detectLandmarks(t.landmarks, config, outputSize); - /* - lastBox = undefined; - if (cache?.box) { - const cx = cache.boxRaw[0] + (cache.boxRaw[2] / 2); - const cy = cache.boxRaw[1] + (cache.boxRaw[3] / 2); - let size = cache.boxRaw[2] > cache.boxRaw[3] ? cache.boxRaw[2] : cache.boxRaw[3]; - size = (size * 1.2) / 2; // enlarge and half it - lastBox = [cx - size, cy - size, 2 * size, 2 * size]; - } - */ } + */ + t.landmarks = await prepareImage(input, 256); // padded and resized + cache = await detectLandmarks(t.landmarks, config, outputSize); + /* + cropBox = [0, 0, 1, 1]; // reset crop coordinates + if (cache?.boxRaw && config.skipAllowed) { + const cx = (2.0 * cache.boxRaw[0] + cache.boxRaw[2]) / 2; + const cy = (2.0 * cache.boxRaw[1] + cache.boxRaw[3]) / 2; + let size = cache.boxRaw[2] > cache.boxRaw[3] ? cache.boxRaw[2] : cache.boxRaw[3]; + size = (size * 1.0) / 2; // enlarge and half it + if (cx > 0.1 && cx < 0.9 && cy > 0.1 && cy < 0.9 && size > 0.1) { // only update if box is sane + const y = 0; // cy - size; + const x = cx - size; + cropBox = [y, x, y + 1, x + 1]; // [y0,x0,y1,x1] used for cropping but width/height are not yet implemented so we only reposition image to center of body + } + } + */ Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); - // if (cache && boxes.length > 0) cache.box = boxes[0].box; lastTime = now(); skipped = 0; } - if (cache) return [cache]; - return []; + return cache ? [cache] : []; } diff --git a/src/face/angles.ts b/src/face/angles.ts index 9e23f5cb..5be7894f 100644 --- a/src/face/angles.ts +++ b/src/face/angles.ts @@ -78,7 +78,7 @@ export const calculateFaceAngle = (face, imageSize): { if (isNaN(thetaX)) thetaX = 0; if (isNaN(thetaY)) thetaY = 0; if (isNaN(thetaZ)) thetaZ = 0; - return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ }; + return { pitch: -thetaX, yaw: -thetaY, roll: -thetaZ }; }; // simple Euler angle calculation based existing 3D mesh // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index b3491714..6af74e69 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -51,11 +51,11 @@ const cache: { }; const fingerMap = { - thumb: [1, 2, 3, 4], - index: [5, 6, 7, 8], - middle: [9, 10, 11, 12], - ring: [13, 14, 15, 16], - pinky: [17, 18, 19, 20], + thumb: [0, 1, 2, 3, 4], + index: [0, 5, 6, 7, 8], + middle: [0, 9, 10, 11, 12], + ring: [0, 13, 14, 15, 16], + pinky: [0, 17, 18, 19, 20], palm: [0], };