add blazepose v2 and add annotations to body results

pull/280/head
Vladimir Mandic 2021-10-04 16:29:15 -04:00
parent 0ee5d5eceb
commit 051ab8c9f5
15 changed files with 300 additions and 234 deletions

View File

@ -9,8 +9,9 @@
## Changelog
### **HEAD -> main** 2021/10/02 mandic00@live.com
### **HEAD -> main** 2021/10/03 mandic00@live.com
- added docker notes
- breaking change: new similarity and match methods
- release candidate
- tweaked default values

View File

@ -31,6 +31,13 @@ import jsonView from './helpers/jsonview.js';
let human;
let userConfig = {
face: { enabled: false },
object: { enabled: false },
gesture: { enabled: true },
hand: { enabled: false },
body: { enabled: true, modelPath: 'https://vladmandic.github.io/human-models/models/blazepose-lite.json' },
segmentation: { enabled: false },
/*
warmup: 'none',
backend: 'humangl',
@ -108,7 +115,7 @@ const ui = {
lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image
exceptionHandler: true, // should capture all unhandled exceptions
exceptionHandler: false, // should capture all unhandled exceptions
// webrtc
useWebRTC: false, // use webrtc as camera source instead of local webcam

View File

@ -1,60 +1,33 @@
/**
* BlazePose model implementation
*
* Based on : [**BlazePose**](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection)
*/
import * as tf from '@tensorflow/tfjs';
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import type { BodyResult, Box, Point } from '../result';
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
import * as annotations from './annotations';
import * as coords from './blazeposecoords';
// const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
const env = { initial: true };
const models: [GraphModel | null, GraphModel | null] = [null, null];
const outputNodes = ['ld_3d', 'activation_segmentation', 'activation_heatmap', 'world_3d', 'output_poseflag'];
const inputSize = [[0, 0], [0, 0]];
// let skipped = 0;
let outputSize: [number, number] = [0, 0];
type Keypoints = { score: number, part: string, position: Point, positionRaw: Point };
/*
type BodyDetectResult = {
id: number,
score: number,
box: Box,
boxRaw: Box,
label: string,
yxBox: Box,
}
const cache: {
bodyBoxes: Array<BodyDetectResult>,
partBoxes: Array<BodyDetectResult>
tmpBoxes: Array<BodyDetectResult>
} = {
bodyBoxes: [],
partBoxes: [],
tmpBoxes: [],
};
*/
let skipped = Number.MAX_SAFE_INTEGER;
let outputNodes: string[]; // different for lite/full/heavy
let cache: BodyResult | null = null;
let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]];
export async function loadDetect(config: Config): Promise<GraphModel> {
if (env.initial) models[0] = null;
if (!models[0]) {
if (!models[0] && config.body.detector?.modelPath || '') {
models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.body.detector?.modelPath || '')) as unknown as GraphModel;
const inputs = Object.values(models[0].modelSignature['inputs']);
inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.object.modelPath);
else if (config.debug) log('load model:', models[0]['modelUrl']);
} else if (config.debug) log('cached model:', models[0]['modelUrl']);
return models[0];
} else if (config.debug && models[0]) log('cached model:', models[0]['modelUrl']);
return models[0] as GraphModel;
}
export async function loadPose(config: Config): Promise<GraphModel> {
@ -64,6 +37,8 @@ export async function loadPose(config: Config): Promise<GraphModel> {
const inputs = Object.values(models[1].modelSignature['inputs']);
inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
if (config.body.modelPath?.includes('lite')) outputNodes = ['ld_3d', 'output_segmentation', 'output_heatmap', 'world_3d', 'output_poseflag'];
else outputNodes = ['Identity', 'Identity_2', 'Identity_3', 'Identity_4', 'Identity_1']; // v2 from pinto full and heavy
if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.object.modelPath);
else if (config.debug) log('load model:', models[1]['modelUrl']);
} else if (config.debug) log('cached model:', models[1]['modelUrl']);
@ -76,86 +51,104 @@ export async function load(config: Config): Promise<[GraphModel | null, GraphMod
return models;
}
/*
async function detectBody(input: Tensor, config: Config): Promise<BodyDetectResult[]> {
if ((config.body.detector?.modelPath.length || 0) > 0 && models[0]) {
const t: Record<string, Tensor> = {};
t.resize = tf.image.resizeBilinear(input, [inputSize[0][0], inputSize[0][1]]);
t.res = await models[0]?.predict(t.resize) as Tensor; // [1,2254,13]
t.logits = tf.slice(t.res, [0, 0, 0], [1, -1, 1]);
t.sigmoid = tf.sigmoid(t.logits);
t.rawBoxes = tf.slice(t.res, [0, 0, 1], [1, -1, -1]);
t.packedBoxes = tf.squeeze(t.rawBoxes); // [2254,12]
t.scores = tf.squeeze(t.sigmoid); // [2254,1]
// boxes need to be decoded based on anchors
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
}
return [];
}
*/
async function detectParts(input: Tensor, config: Config): Promise<BodyResult> {
const t: Record<string, Tensor> = {};
t.resize = tf.image.resizeBilinear(input, [inputSize[1][0], inputSize[1][1]]);
[t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.resize, outputNodes) as Tensor[]; // [1,2254,13]
const points = await t.ld.data();
const keypoints: Array<Keypoints> = [];
const labels = points?.length === 195 ? annotations.full : annotations.upper; // full model has 39 keypoints, upper has 31 keypoints
const depth = 5; // each points has x,y,z,visibility,presence
for (let i = 0; i < points.length / depth; i++) {
const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // reverse sigmoid value
// const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value
if (score > (config.body.minConfidence || 0)) {
keypoints.push({
part: labels[i],
position: [
Math.trunc(outputSize[0] * points[depth * i + 0] / 255), // return normalized x value istead of 0..255
Math.trunc(outputSize[1] * points[depth * i + 1] / 255), // return normalized y value istead of 0..255
Math.trunc(points[depth * i + 2]) + 0, // fix negative zero
],
positionRaw: [
points[depth * i + 0] / 255, // return x value normalized to 0..1
points[depth * i + 1] / 255, // return y value normalized to 0..1
points[depth * i + 2] + 0, // fix negative zero
],
score,
});
}
}
function calculateBoxes(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } {
const x = keypoints.map((a) => a.position[0]);
const y = keypoints.map((a) => a.position[1]);
const box: Box = [
Math.min(...x),
Math.min(...y),
Math.max(...x) - Math.min(...x),
Math.max(...y) - Math.min(...x),
const keypointsBox: Box = [Math.min(...x), Math.min(...y), Math.max(...x) - Math.min(...x), Math.max(...y) - Math.min(...y)];
const keypointsBoxRaw: Box = [keypointsBox[0] / outputSize[0], keypointsBox[1] / outputSize[1], keypointsBox[2] / outputSize[0], keypointsBox[3] / outputSize[1]];
/*
const leftShoulder = keypoints.find((kpt) => kpt.part === 'leftShoulder');
const rightShoulder = keypoints.find((kpt) => kpt.part === 'rightShoulder');
if (!leftShoulder || !rightShoulder || !config.skipFrame) { // reset cache box coords
cache.box = [0, 0, 1, 1];
cache.boxRaw = cache.box;
} else { // recalculate cache box coords
const size = [leftShoulder.position[0] - rightShoulder.position[0], leftShoulder.position[1] - rightShoulder.position[1]];
const shoulderWidth = Math.sqrt((size[0] * size[0]) + (size[1] * size[1])); // distance between left and right shoulder
const shoulderCenter: Point = [(leftShoulder.position[0] + rightShoulder.position[0]) / 2, (leftShoulder.position[1] + rightShoulder.position[1]) / 2]; // center point between left and right shoulder
const bodyCenter: Point = [shoulderCenter[0], shoulderCenter[0] + (shoulderWidth), 0]; // approximate center of the body
const bodyCenterRaw: Point = [bodyCenter[0] / outputSize[0], bodyCenter[1] / outputSize[1], 0];
const bodyCenterKpt: Keypoint = { part: 'bodyCenter', positionRaw: bodyCenterRaw, position: bodyCenter, score: 1 }; // add virtual keypoint
keypoints.push(bodyCenterKpt);
const scaleFact = 2.5;
cache.box = [Math.trunc(bodyCenter[0] - (scaleFact * shoulderWidth)), Math.trunc(bodyCenter[1] - (scaleFact * shoulderWidth)), Math.trunc(2 * scaleFact * shoulderWidth), Math.trunc(2 * scaleFact * shoulderWidth)];
cache.boxRaw = [cache.box[0] / outputSize[0], cache.box[1] / outputSize[1], cache.box[2] / outputSize[0], cache.box[3] / outputSize[1]];
}
*/
return { keypointsBox, keypointsBoxRaw };
}
async function prepareImage(input: Tensor): Promise<Tensor> {
const t: Record<string, Tensor> = {};
if (!input.shape || !input.shape[1] || !input.shape[2]) return input;
padding = [
[0, 0], // dont touch batch
[input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0], // height before&after
[input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0], // width before&after
[0, 0], // dont touch rbg
];
const boxRaw: Box = [0, 0, 0, 0]; // not yet implemented
const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
t.pad = tf.pad(input as tf.Tensor4D, padding);
t.resize = tf.image.resizeBilinear(t.pad as tf.Tensor4D, [inputSize[1][0], inputSize[1][1]]);
const final = tf.div(t.resize, 255);
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
return { id: 0, score, box, boxRaw, keypoints };
return final;
}
function rescaleKeypoints(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): Array<BodyKeypoint> {
for (const kpt of keypoints) {
kpt.position = [
kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0],
kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0],
kpt.position[2] as number,
];
kpt.positionRaw = [
kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number,
];
}
return keypoints;
}
async function detectParts(input: Tensor, config: Config, outputSize: [number, number]): Promise<BodyResult | null> {
const t: Record<string, Tensor> = {};
t.input = await prepareImage(input);
[t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.input, outputNodes) as Tensor[]; // run model
const points = await t.ld.data();
const keypointsRelative: Array<BodyKeypoint> = [];
const depth = 5; // each points has x,y,z,visibility,presence
for (let i = 0; i < points.length / depth; i++) {
const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // normally this is from tf.sigmoid but no point of running sigmoid on full array which has coords as well
// const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value
const positionRaw: Point = [points[depth * i + 0] / inputSize[1][0], points[depth * i + 1] / inputSize[1][1], points[depth * i + 2] + 0];
const position: Point = [Math.trunc(outputSize[0] * positionRaw[0]), Math.trunc(outputSize[1] * positionRaw[1]), positionRaw[2] as number];
// if (positionRaw[0] < 0 || positionRaw[1] < 0 || positionRaw[0] > 1 || positionRaw[1] > 1) score = 0;
keypointsRelative.push({ part: coords.kpt[i], positionRaw, position, score });
}
const avgScore = Math.round(100 * keypointsRelative.reduce((prev, curr) => prev += curr.score, 0) / keypointsRelative.length) / 100; // average score of keypoints
if (avgScore < (config.body.minConfidence || 0)) return null;
const keypoints: Array<BodyKeypoint> = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is cropped
const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
for (let i = 0; i < indexes.length - 1; i++) {
const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]);
const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]);
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
return { id: 0, score: avgScore, box: boxes.keypointsBox, boxRaw: boxes.keypointsBoxRaw, keypoints, annotations };
}
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
outputSize = [input.shape[2] || 0, input.shape[1] || 0];
const bodies: Array<BodyResult> = [];
const body = await detectParts(input, config);
bodies.push(body);
/*
cache.tmpBoxes = []; // clear temp cache
if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { // just run part detection while reusing cached boxes
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) {
skipped++;
bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from parts box cache
} else { // calculate new boxes and run part detection
} else {
cache = await detectParts(input, config, outputSize);
skipped = 0;
bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from part box cache
if (bodies.length !== config.body.maxDetected) { // run body detection only if we dont have enough bodies in cache
cache.bodyBoxes = await detectBody(input, config);
const newBodies = await Promise.all(cache.bodyBoxes.map((body) => detectParts(input, body, config)));
bodies = bodies.concat(newBodies);
}
}
cache.partBoxes = [...cache.tmpBoxes]; // repopulate cache with validated bodies
*/
return bodies as BodyResult[];
if (cache) return [cache];
return [];
}

View File

@ -0,0 +1,54 @@
/* eslint-disable no-multi-spaces */
export const kpt = [
'nose', // 0
'leftEyeInside', // 1
'leftEye', // 2
'leftEyeOutside', // 3
'rightEyeInside', // 4
'rightEye', // 5
'rightEyeOutside', // 6
'leftEar', // 7
'rightEar', // 8
'leftMouth', // 9
'rightMouth', // 10
'leftShoulder', // 11
'rightShoulder', // 12
'leftElbow', // 13
'rightElbow', // 14
'leftWrist', // 15
'rightWrist', // 16
'leftPalm', // 17
'rightPalm', // 18
'leftIndex', // 19
'rightIndex', // 20
'leftPinky', // 21
'rightPinky', // 22
'leftHip', // 23
'rightHip', // 24
'leftKnee', // 25
'rightKnee', // 26
'leftAnkle', // 27
'rightAnkle', // 28
'leftHeel', // 29
'rightHeel', // 30
'leftFoot', // 31
'rightFoot', // 32
'bodyCenter', // 33
'bodyTop', // 34
'leftThumb', // 35
'leftHand', // 36
'rightThumb', // 37
'rightHand', // 38
];
export const connected = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle', 'leftHeel', 'leftFoot'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle', 'rightHeel', 'rightFoot'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
leftArm: ['leftShoulder', 'leftElbow', 'leftWrist', 'leftPalm'],
rightArm: ['rightShoulder', 'rightElbow', 'rightWrist', 'rightPalm'],
leftHand: [],
rightHand: [],
head: [],
};

View File

@ -6,23 +6,20 @@
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import type { BodyResult, Box, Point } from '../result';
import * as coords from './efficientposecoords';
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model: GraphModel | null;
type Keypoints = { score: number, part: string, position: Point, positionRaw: Point };
const keypoints: Array<Keypoints> = [];
const keypoints: Array<BodyKeypoint> = [];
let box: Box = [0, 0, 0, 0];
let boxRaw: Box = [0, 0, 0, 0];
let score = 0;
let skipped = Number.MAX_SAFE_INTEGER;
const bodyParts = ['head', 'neck', 'rightShoulder', 'rightElbow', 'rightWrist', 'chest', 'leftShoulder', 'leftElbow', 'leftWrist', 'pelvis', 'rightHip', 'rightKnee', 'rightAnkle', 'leftHip', 'leftKnee', 'leftAnkle'];
export async function load(config: Config): Promise<GraphModel> {
if (env.initial) model = null;
if (!model) {
@ -41,9 +38,9 @@ function max2d(inputs, minScore) {
const reshaped = tf.reshape(inputs, [height * width]); // combine all data
const newScore = tf.max(reshaped, 0).dataSync()[0]; // get highest score // inside tf.tidy
if (newScore > minScore) { // skip coordinate calculation is score is too low
const coords = tf.argMax(reshaped, 0);
const x = mod(coords, width).dataSync()[0]; // inside tf.tidy
const y = tf.div(coords, tf.scalar(width, 'int32')).dataSync()[0]; // inside tf.tidy
const coordinates = tf.argMax(reshaped, 0);
const x = mod(coordinates, width).dataSync()[0]; // inside tf.tidy
const y = tf.div(coordinates, tf.scalar(width, 'int32')).dataSync()[0]; // inside tf.tidy
return [x, y, newScore];
}
return [0, 0, newScore];
@ -53,7 +50,7 @@ function max2d(inputs, minScore) {
export async function predict(image: Tensor, config: Config): Promise<BodyResult[]> {
if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(keypoints).length > 0) {
skipped++;
return [{ id: 0, score, box, boxRaw, keypoints }];
return [{ id: 0, score, box, boxRaw, keypoints, annotations: {} }];
}
skipped = 0;
return new Promise(async (resolve) => {
@ -83,7 +80,7 @@ export async function predict(image: Tensor, config: Config): Promise<BodyResult
if (score > (config.body?.minConfidence || 0)) {
keypoints.push({
score: Math.round(100 * partScore) / 100,
part: bodyParts[id],
part: coords.kpt[id],
positionRaw: [ // normalized to 0..1
// @ts-ignore model is not undefined here
x / model.inputs[0].shape[2], y / model.inputs[0].shape[1],
@ -114,6 +111,16 @@ export async function predict(image: Tensor, config: Config): Promise<BodyResult
Math.max(...xRaw) - Math.min(...xRaw),
Math.max(...yRaw) - Math.min(...yRaw),
];
resolve([{ id: 0, score, box, boxRaw, keypoints }]);
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
for (let i = 0; i < indexes.length - 1; i++) {
const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]);
const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]);
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
resolve([{ id: 0, score, box, boxRaw, keypoints, annotations }]);
});
}

View File

@ -0,0 +1,27 @@
export const kpt = [
'head',
'neck',
'rightShoulder',
'rightElbow',
'rightWrist',
'chest',
'leftShoulder',
'leftElbow',
'leftWrist',
'bodyCenter',
'rightHip',
'rightKnee',
'rightAnkle',
'leftHip',
'leftKnee',
'leftAnkle',
];
export const connected = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
leftArm: ['leftShoulder', 'leftElbow', 'leftWrist'],
rightArm: ['rightShoulder', 'rightElbow', 'rightWrist'],
head: [],
};

View File

@ -7,7 +7,8 @@
import { log, join } from '../util/util';
import { scale } from '../util/box';
import * as tf from '../../dist/tfjs.esm.js';
import type { BodyResult, Box, Point } from '../result';
import * as coords from './movenetcoords';
import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { fakeOps } from '../tfjs/backend';
@ -17,13 +18,8 @@ let model: GraphModel | null;
let inputSize = 0;
const cachedBoxes: Array<Box> = [];
type Keypoints = { score: number, part: string, position: Point, positionRaw: Point };
type Body = { id: number, score: number, box: Box, boxRaw: Box, keypoints: Array<Keypoints> }
let skipped = Number.MAX_SAFE_INTEGER;
const keypoints: Array<Keypoints> = [];
const bodyParts = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle'];
const keypoints: Array<BodyKeypoint> = [];
export async function load(config: Config): Promise<GraphModel> {
if (env.initial) model = null;
@ -71,7 +67,7 @@ async function parseSinglePose(res, config, image, inputBox) {
];
keypoints.push({
score: Math.round(100 * score) / 100,
part: bodyParts[id],
part: coords.kpt[id],
positionRaw,
position: [ // normalized to input image size
Math.round((image.shape[2] || 0) * positionRaw[0]),
@ -81,14 +77,24 @@ async function parseSinglePose(res, config, image, inputBox) {
}
}
score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
const bodies: Array<Body> = [];
const bodies: Array<BodyResult> = [];
const [box, boxRaw] = createBox(keypoints);
bodies.push({ id: 0, score, box, boxRaw, keypoints });
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
for (let i = 0; i < indexes.length - 1; i++) {
const pt0 = keypoints.find((kp) => kp.part === indexes[i]);
const pt1 = keypoints.find((kp) => kp.part === indexes[i + 1]);
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
bodies.push({ id: 0, score, box, boxRaw, keypoints, annotations });
return bodies;
}
async function parseMultiPose(res, config, image, inputBox) {
const bodies: Array<Body> = [];
const bodies: Array<BodyResult> = [];
for (let id = 0; id < res[0].length; id++) {
const kpt = res[0][id];
const totalScore = Math.round(100 * kpt[51 + 4]) / 100;
@ -102,7 +108,7 @@ async function parseMultiPose(res, config, image, inputBox) {
(inputBox[2] - inputBox[0]) * kpt[3 * i + 0] + inputBox[0],
];
keypoints.push({
part: bodyParts[i],
part: coords.kpt[i],
score: Math.round(100 * score) / 100,
positionRaw,
position: [
@ -112,11 +118,21 @@ async function parseMultiPose(res, config, image, inputBox) {
});
}
}
// const [box, boxRaw] = createBox(keypoints);
const [box, boxRaw] = createBox(keypoints);
// movenet-multipose has built-in box details
const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))];
bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints] });
// const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]];
// const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))];
const annotations: Record<string, Point[][]> = {};
for (const [name, indexes] of Object.entries(coords.connected)) {
const pt: Array<Point[]> = [];
for (let i = 0; i < indexes.length - 1; i++) {
const pt0 = keypoints.find((kp) => kp.part === indexes[i]);
const pt1 = keypoints.find((kp) => kp.part === indexes[i + 1]);
if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]);
}
annotations[name] = pt;
}
bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints], annotations });
}
}
bodies.sort((a, b) => b.score - a.score);
@ -129,7 +145,7 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
return new Promise(async (resolve) => {
const t: Record<string, Tensor> = {};
let bodies: Array<Body> = [];
let bodies: Array<BodyResult> = [];
if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not
skipped++;

28
src/body/movenetcoords.ts Normal file
View File

@ -0,0 +1,28 @@
export const kpt = [
'nose',
'leftEye',
'rightEye',
'leftEar',
'rightEar',
'leftShoulder',
'rightShoulder',
'leftElbow',
'rightElbow',
'leftWrist',
'rightWrist',
'leftHip',
'rightHip',
'leftKnee',
'rightKnee',
'leftAnkle',
'rightAnkle',
];
export const connected = {
leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'],
rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'],
torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'],
leftArm: ['leftShoulder', 'leftElbow', 'leftWrist'],
rightArm: ['rightShoulder', 'rightElbow', 'rightWrist'],
head: [],
};

View File

@ -411,7 +411,7 @@ const config: Config = {
// only valid for posenet and movenet-multipose as other models detects single pose
// set to -1 to autodetect based on number of detected faces
minConfidence: 0.2, // threshold for discarding a prediction
skipFrames: 1, // how many max frames to go without re-running the detector
skipFrames: 5, // how many max frames to go without re-running the detector
// only used when cacheSensitivity is not zero
},

View File

@ -8,7 +8,6 @@ import { defaults } from './config';
import * as tf from '../dist/tfjs.esm.js';
import * as app from '../package.json';
import * as backend from './tfjs/backend';
// import * as blazepose from './body/blazepose-v1';
import * as blazepose from './body/blazepose';
import * as centernet from './object/centernet';
import * as draw from './util/draw';

View File

@ -183,6 +183,8 @@ export function process(input: Input, config: Config): { tensor: Tensor | null,
tempCanvas.height = targetHeight;
const tempCtx = tempCanvas.getContext('2d');
tempCtx?.drawImage(outCanvas, 0, 0);
console.log('PIXELS', tempCanvas);
pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null;
try {
pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null;
} catch (err) {

View File

@ -59,6 +59,13 @@ export interface FaceResult {
tensor?: Tensor,
}
export type BodyKeypoint = {
part: string,
position: Point,
positionRaw: Point,
score: number,
}
/** Body results
*
* Each results has:
@ -77,13 +84,8 @@ export interface BodyResult {
score: number,
box: Box,
boxRaw: Box,
keypoints: Array<{
part: string,
position: Point,
positionRaw: Point,
score: number,
presence?: number,
}>
annotations: Record<string, Point[][]>,
keypoints: Array<BodyKeypoint>
}
/** Hand results

View File

@ -90,10 +90,8 @@ export async function check(instance, force = false) {
}
// handle webgpu
if (tf.getBackend() === 'humangl') {
if (tf.getBackend() === 'webgpu') {
tf.ENV.set('WEBGPU_USE_GLSL', true);
tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', false);
tf.ENV.set('WEBGL_USE_SHAPES_UNIFORMS', true);
}
// wait for ready

View File

@ -67,7 +67,7 @@ export async function register(instance): Promise<void> {
// log('gpu memory usage:', instance.tf.engine().backendInstance.numBytesInGPU);
log('possible browser memory leak using webgl');
instance.emit('error');
throw new Error('browser webgl error');
// throw new Error('browser webgl error');
/*
log('resetting humangl backend');
env.initial = true;

View File

@ -4,7 +4,7 @@
import { TRI468 as triangulation } from '../face/facemeshcoords';
import { mergeDeep, now } from './util';
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result';
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Point } from '../result';
/**
* Draw Options
@ -102,7 +102,7 @@ function rect(ctx, x, y, width, height, localOptions) {
ctx.stroke();
}
function lines(ctx, points: [number, number, number?][] = [], localOptions) {
function lines(ctx, points: Point[] = [], localOptions) {
if (points === undefined || points.length === 0) return;
ctx.beginPath();
ctx.moveTo(points[0][0], points[0][1]);
@ -119,7 +119,7 @@ function lines(ctx, points: [number, number, number?][] = [], localOptions) {
}
}
function curves(ctx, points: [number, number, number?][] = [], localOptions) {
function curves(ctx, points: Point[] = [], localOptions) {
if (points === undefined || points.length === 0) return;
if (!localOptions.useCurves || points.length <= 2) {
lines(ctx, points, localOptions);
@ -288,91 +288,23 @@ export async function body(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
ctx.fillText(`body ${100 * result[i].score}%`, result[i].box[0] + 2, 0 + result[i].box[1] + localOptions.lineHeight, result[i].box[2]);
}
}
if (localOptions.drawPoints) {
if (localOptions.drawPoints && result[i].keypoints) {
for (let pt = 0; pt < result[i].keypoints.length; pt++) {
ctx.fillStyle = localOptions.useDepth && result[i].keypoints[pt].position[2] ? `rgba(${127.5 + (2 * (result[i].keypoints[pt].position[2] || 0))}, ${127.5 - (2 * (result[i].keypoints[pt].position[2] || 0))}, 255, 0.5)` : localOptions.color;
point(ctx, result[i].keypoints[pt].position[0], result[i].keypoints[pt].position[1], 0, localOptions);
}
}
if (localOptions.drawLabels) {
if (localOptions.drawLabels && result[i].keypoints) {
ctx.font = localOptions.font;
if (result[i].keypoints) {
for (const pt of result[i].keypoints) {
ctx.fillStyle = localOptions.useDepth && pt.position[2] ? `rgba(${127.5 + (2 * pt.position[2])}, ${127.5 - (2 * pt.position[2])}, 255, 0.5)` : localOptions.color;
ctx.fillText(`${pt.part} ${Math.trunc(100 * pt.score)}%`, pt.position[0] + 4, pt.position[1] + 4);
}
for (const pt of result[i].keypoints) {
ctx.fillStyle = localOptions.useDepth && pt.position[2] ? `rgba(${127.5 + (2 * pt.position[2])}, ${127.5 - (2 * pt.position[2])}, 255, 0.5)` : localOptions.color;
ctx.fillText(`${pt.part} ${Math.trunc(100 * pt.score)}%`, pt.position[0] + 4, pt.position[1] + 4);
}
}
if (localOptions.drawPolygons && result[i].keypoints) {
let part;
const points: [number, number, number?][] = [];
// shoulder line
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'leftShoulder');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightShoulder');
if (part) points.push([part.position[0], part.position[1]]);
curves(ctx, points, localOptions);
// torso main
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'rightShoulder');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightHip');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftHip');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftShoulder');
if (part) points.push([part.position[0], part.position[1]]);
if (points.length === 4) lines(ctx, points, localOptions); // only draw if we have complete torso
// leg left
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'leftHip');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftKnee');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftAnkle');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftHeel');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftFoot');
if (part) points.push([part.position[0], part.position[1]]);
curves(ctx, points, localOptions);
// leg right
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'rightHip');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightKnee');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightAnkle');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightHeel');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightFoot');
if (part) points.push([part.position[0], part.position[1]]);
curves(ctx, points, localOptions);
// arm left
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'leftShoulder');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftElbow');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftWrist');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'leftPalm');
if (part) points.push([part.position[0], part.position[1]]);
curves(ctx, points, localOptions);
// arm right
points.length = 0;
part = result[i].keypoints.find((a) => a.part === 'rightShoulder');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightElbow');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightWrist');
if (part) points.push([part.position[0], part.position[1]]);
part = result[i].keypoints.find((a) => a.part === 'rightPalm');
if (part) points.push([part.position[0], part.position[1]]);
curves(ctx, points, localOptions);
// draw all
if (localOptions.drawPolygons && result[i].keypoints && result[i].annotations) {
for (const part of Object.values(result[i].annotations)) {
for (const connected of part) curves(ctx, connected, localOptions);
}
}
}
}