update blazepose and extend hand annotations

pull/356/head
Vladimir Mandic 2021-11-24 16:17:03 -05:00
parent c919784f68
commit a90e8ee723
6 changed files with 82 additions and 75 deletions

View File

@ -11,14 +11,13 @@
### **HEAD -> main** 2021/11/23 mandic00@live.com
- fix face box scaling on detection
- cleanup
### **2.5.4** 2021/11/22 mandic00@live.com
- prototype blazepose detector
### **origin/main** 2021/11/21 mandic00@live.com
- minor fixes
- add body 3d interpolation
- edit blazepose keypoints
- new build process

View File

@ -10,7 +10,7 @@
import { Human, Config } from '../../dist/human.esm.js'; // equivalent of @vladmandic/Human
const humanConfig: Partial<Config> = { // user configuration for human, used to fine-tune behavior
// backend: 'webgpu' as 'webgpu,
// backend: 'webgpu' as const,
// async: true,
modelBasePath: '../../models',
filter: { enabled: true, equalization: false },

View File

@ -65,7 +65,7 @@
"@tensorflow/tfjs-layers": "^3.11.0",
"@tensorflow/tfjs-node": "^3.11.0",
"@tensorflow/tfjs-node-gpu": "^3.11.0",
"@types/node": "^16.11.9",
"@types/node": "^16.11.10",
"@types/offscreencanvas": "^2019.6.4",
"@typescript-eslint/eslint-plugin": "^5.4.0",
"@typescript-eslint/parser": "^5.4.0",

View File

@ -10,8 +10,7 @@ import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import * as coords from './blazeposecoords';
import * as detect from './blazeposedetector';
interface DetectedBox { box: Box, boxRaw: Box, score: number }
import * as box from '../util/box';
const env = { initial: true };
// const models: [GraphModel | null, GraphModel | null] = [null, null];
@ -24,7 +23,7 @@ const outputNodes: { detector: string[], landmarks: string[] } = {
};
let cache: BodyResult | null = null;
let lastBox: Box | undefined;
let cropBox: Box | undefined;
let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]];
let lastTime = 0;
@ -63,50 +62,43 @@ export async function load(config: Config): Promise<[GraphModel | null, GraphMod
return [models.detector, models.landmarks];
}
function calculateBoxes(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } {
const x = keypoints.map((a) => a.position[0]);
const y = keypoints.map((a) => a.position[1]);
const keypointsBox: Box = [Math.min(...x), Math.min(...y), Math.max(...x) - Math.min(...x), Math.max(...y) - Math.min(...y)];
const keypointsBoxRaw: Box = [keypointsBox[0] / outputSize[0], keypointsBox[1] / outputSize[1], keypointsBox[2] / outputSize[0], keypointsBox[3] / outputSize[1]];
return { keypointsBox, keypointsBoxRaw };
}
async function prepareImage(input: Tensor, size: number, box?: Box): Promise<Tensor> {
async function prepareImage(input: Tensor, size: number): Promise<Tensor> {
const t: Record<string, Tensor> = {};
if (!input.shape || !input.shape[1] || !input.shape[2]) return input;
let final: Tensor;
if (cropBox) {
t.cropped = tf.image.cropAndResize(input, [cropBox], [0], [input.shape[1], input.shape[2]]); // if we have cached box use it to crop input
}
if (input.shape[1] !== input.shape[2]) { // only pad if width different than height
const height: [number, number] = box
? [Math.trunc(input.shape[1] * box[1]), Math.trunc(input.shape[1] * (box[1] + box[3]))]
: [input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0];
const width: [number, number] = box
? [Math.trunc(input.shape[2] * box[0]), Math.trunc(input.shape[2] * (box[0] + box[2]))]
: [input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0];
const height: [number, number] = [
input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0,
input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0,
];
const width: [number, number] = [
input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0,
input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0,
];
padding = [
[0, 0], // dont touch batch
height, // height before&after
width, // width before&after
[0, 0], // dont touch rbg
];
if (box) {
t.resize = tf.image.cropAndResize(input, [box], [0], [size, size]);
} else {
t.pad = tf.pad(input, padding);
t.resize = tf.image.resizeBilinear(t.pad, [size, size]);
}
t.pad = tf.pad(t.cropped || input, padding); // use cropped box if it exists
t.resize = tf.image.resizeBilinear(t.pad, [size, size]);
final = tf.div(t.resize, constants.tf255);
} else if (input.shape[1] !== size) { // if input needs resizing
t.resize = tf.image.resizeBilinear(input, [size, size]);
t.resize = tf.image.resizeBilinear(t.cropped || input, [size, size]);
final = tf.div(t.resize, constants.tf255);
} else { // if input is already in a correct resolution just normalize it
final = tf.div(input, constants.tf255);
final = tf.div(t.cropped || input, constants.tf255);
}
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
return final;
}
function rescaleKeypoints(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): Array<BodyKeypoint> {
for (const kpt of keypoints) {
for (const kpt of keypoints) { // first rescale due to padding
kpt.position = [
Math.trunc(kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0]),
Math.trunc(kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0]),
@ -114,20 +106,21 @@ function rescaleKeypoints(keypoints: Array<BodyKeypoint>, outputSize: [number, n
];
kpt.positionRaw = [kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number];
}
return keypoints;
}
function rescaleBoxes(boxes: Array<DetectedBox>, outputSize: [number, number]): Array<DetectedBox> {
for (const box of boxes) {
box.box = [
Math.trunc(box.box[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
Math.trunc(box.box[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
Math.trunc(box.box[2] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
Math.trunc(box.box[3] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
];
box.boxRaw = [box.box[0] / outputSize[0], box.box[1] / outputSize[1], box.box[2] / outputSize[0], box.box[3] / outputSize[1]];
if (cropBox) { // second rescale due to cropping
for (const kpt of keypoints) {
kpt.positionRaw = [
kpt.positionRaw[0] + cropBox[1], // correct offset due to crop
kpt.positionRaw[1] + cropBox[0], // correct offset due to crop
kpt.positionRaw[2] as number,
];
kpt.position = [
Math.trunc(kpt.positionRaw[0] * outputSize[0]),
Math.trunc(kpt.positionRaw[1] * outputSize[1]),
kpt.positionRaw[2] as number,
];
}
}
return boxes;
return keypoints;
}
async function detectLandmarks(input: Tensor, config: Config, outputSize: [number, number]): Promise<BodyResult | null> {
@ -155,22 +148,38 @@ async function detectLandmarks(input: Tensor, config: Config, outputSize: [numbe
}
if (poseScore < (config.body.minConfidence || 0)) return null;
const keypoints: Array<BodyKeypoint> = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is padded
const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints
const kpts = keypoints.map((k) => k.position);
const boxes = box.calc(kpts, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
for (let i = 0; i < indexes.length - 1; i++) {
const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]);
const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]);
// if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
if (pt0 && pt1) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
const body = { id: 0, score: Math.trunc(100 * poseScore) / 100, box: boxes.keypointsBox, boxRaw: boxes.keypointsBoxRaw, keypoints, annotations };
const body = { id: 0, score: Math.trunc(100 * poseScore) / 100, box: boxes.box, boxRaw: boxes.boxRaw, keypoints, annotations };
return body;
}
/*
interface DetectedBox { box: Box, boxRaw: Box, score: number }
function rescaleBoxes(boxes: Array<DetectedBox>, outputSize: [number, number]): Array<DetectedBox> {
for (const b of boxes) {
b.box = [
Math.trunc(b.box[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
Math.trunc(b.box[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
Math.trunc(b.box[2] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
Math.trunc(b.box[3] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
];
b.boxRaw = [b.box[0] / outputSize[0], b.box[1] / outputSize[1], b.box[2] / outputSize[0], b.box[3] / outputSize[1]];
}
return boxes;
}
async function detectBoxes(input: Tensor, config: Config, outputSize: [number, number]) {
const t: Record<string, Tensor> = {};
t.res = models.detector?.execute(input, ['Identity']) as Tensor; //
@ -183,6 +192,7 @@ async function detectBoxes(input: Tensor, config: Config, outputSize: [number, n
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
return boxes;
}
*/
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
@ -192,33 +202,31 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
skipped++;
} else {
const t: Record<string, Tensor> = {};
/*
if (config.body['detector'] && config.body['detector']['enabled']) {
t.detector = await prepareImage(input, 224);
const boxes = await detectBoxes(t.detector, config, outputSize);
if (boxes && boxes.length === 1) {
t.landmarks = await prepareImage(input, 256, boxes[0].box); // padded and resized according to detector
cache = await detectLandmarks(t.landmarks, config, outputSize);
}
if (cache) cache.score = boxes[0].score;
} else {
t.landmarks = await prepareImage(input, 256, lastBox); // padded and resized
cache = await detectLandmarks(t.landmarks, config, outputSize);
/*
lastBox = undefined;
if (cache?.box) {
const cx = cache.boxRaw[0] + (cache.boxRaw[2] / 2);
const cy = cache.boxRaw[1] + (cache.boxRaw[3] / 2);
let size = cache.boxRaw[2] > cache.boxRaw[3] ? cache.boxRaw[2] : cache.boxRaw[3];
size = (size * 1.2) / 2; // enlarge and half it
lastBox = [cx - size, cy - size, 2 * size, 2 * size];
}
*/
}
*/
t.landmarks = await prepareImage(input, 256); // padded and resized
cache = await detectLandmarks(t.landmarks, config, outputSize);
/*
cropBox = [0, 0, 1, 1]; // reset crop coordinates
if (cache?.boxRaw && config.skipAllowed) {
const cx = (2.0 * cache.boxRaw[0] + cache.boxRaw[2]) / 2;
const cy = (2.0 * cache.boxRaw[1] + cache.boxRaw[3]) / 2;
let size = cache.boxRaw[2] > cache.boxRaw[3] ? cache.boxRaw[2] : cache.boxRaw[3];
size = (size * 1.0) / 2; // enlarge and half it
if (cx > 0.1 && cx < 0.9 && cy > 0.1 && cy < 0.9 && size > 0.1) { // only update if box is sane
const y = 0; // cy - size;
const x = cx - size;
cropBox = [y, x, y + 1, x + 1]; // [y0,x0,y1,x1] used for cropping but width/height are not yet implemented so we only reposition image to center of body
}
}
*/
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
// if (cache && boxes.length > 0) cache.box = boxes[0].box;
lastTime = now();
skipped = 0;
}
if (cache) return [cache];
return [];
return cache ? [cache] : [];
}

View File

@ -78,7 +78,7 @@ export const calculateFaceAngle = (face, imageSize): {
if (isNaN(thetaX)) thetaX = 0;
if (isNaN(thetaY)) thetaY = 0;
if (isNaN(thetaZ)) thetaZ = 0;
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
return { pitch: -thetaX, yaw: -thetaY, roll: -thetaZ };
};
// simple Euler angle calculation based existing 3D mesh
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars

View File

@ -51,11 +51,11 @@ const cache: {
};
const fingerMap = {
thumb: [1, 2, 3, 4],
index: [5, 6, 7, 8],
middle: [9, 10, 11, 12],
ring: [13, 14, 15, 16],
pinky: [17, 18, 19, 20],
thumb: [0, 1, 2, 3, 4],
index: [0, 5, 6, 7, 8],
middle: [0, 9, 10, 11, 12],
ring: [0, 13, 14, 15, 16],
pinky: [0, 17, 18, 19, 20],
palm: [0],
};