mirror of https://github.com/vladmandic/human
prototype blazepose detector
parent
02d883c00f
commit
cf304bc514
|
@ -9,8 +9,9 @@
|
||||||
|
|
||||||
## Changelog
|
## Changelog
|
||||||
|
|
||||||
### **HEAD -> main** 2021/11/19 mandic00@live.com
|
### **HEAD -> main** 2021/11/21 mandic00@live.com
|
||||||
|
|
||||||
|
- add body 3d interpolation
|
||||||
- edit blazepose keypoints
|
- edit blazepose keypoints
|
||||||
- new build process
|
- new build process
|
||||||
|
|
||||||
|
|
|
@ -7,9 +7,9 @@
|
||||||
* @license MIT
|
* @license MIT
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { Human } from '../../dist/human.esm.js'; // equivalent of @vladmandic/Human
|
import { Human, Config } from '../../dist/human.esm.js'; // equivalent of @vladmandic/Human
|
||||||
|
|
||||||
const humanConfig = { // user configuration for human, used to fine-tune behavior
|
const humanConfig: Partial<Config> = { // user configuration for human, used to fine-tune behavior
|
||||||
// backend: 'webgpu' as 'webgpu,
|
// backend: 'webgpu' as 'webgpu,
|
||||||
// async: true,
|
// async: true,
|
||||||
modelBasePath: '../../models',
|
modelBasePath: '../../models',
|
||||||
|
|
|
@ -9,48 +9,58 @@ import type { BodyKeypoint, BodyResult, Box, Point } from '../result';
|
||||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||||
import type { Config } from '../config';
|
import type { Config } from '../config';
|
||||||
import * as coords from './blazeposecoords';
|
import * as coords from './blazeposecoords';
|
||||||
|
import * as detect from './blazeposedetector';
|
||||||
|
|
||||||
|
interface DetectedBox { box: Box, boxRaw: Box, score: number }
|
||||||
|
|
||||||
const env = { initial: true };
|
const env = { initial: true };
|
||||||
const models: [GraphModel | null, GraphModel | null] = [null, null];
|
// const models: [GraphModel | null, GraphModel | null] = [null, null];
|
||||||
const inputSize = [[0, 0], [0, 0]];
|
const models: { detector: GraphModel | null, landmarks: GraphModel | null } = { detector: null, landmarks: null };
|
||||||
|
const inputSize: { detector: [number, number], landmarks: [number, number] } = { detector: [224, 224], landmarks: [256, 256] };
|
||||||
let skipped = Number.MAX_SAFE_INTEGER;
|
let skipped = Number.MAX_SAFE_INTEGER;
|
||||||
let outputNodes: string[]; // different for lite/full/heavy
|
const outputNodes: { detector: string[], landmarks: string[] } = {
|
||||||
|
landmarks: ['ld_3d', 'activation_segmentation', 'activation_heatmap', 'world_3d', 'output_poseflag'],
|
||||||
|
detector: [],
|
||||||
|
};
|
||||||
|
|
||||||
let cache: BodyResult | null = null;
|
let cache: BodyResult | null = null;
|
||||||
|
let lastBox: Box | undefined;
|
||||||
let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]];
|
let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]];
|
||||||
let lastTime = 0;
|
let lastTime = 0;
|
||||||
|
|
||||||
|
const sigmoid = (x) => (1 - (1 / (1 + Math.exp(x))));
|
||||||
|
|
||||||
export async function loadDetect(config: Config): Promise<GraphModel> {
|
export async function loadDetect(config: Config): Promise<GraphModel> {
|
||||||
if (env.initial) models[0] = null;
|
if (env.initial) models.detector = null;
|
||||||
if (!models[0] && config.body.detector?.modelPath || '') {
|
if (!models.detector && config.body['detector'] && config.body['detector']['modelPath'] || '') {
|
||||||
models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.body.detector?.modelPath || '')) as unknown as GraphModel;
|
models.detector = await tf.loadGraphModel(join(config.modelBasePath, config.body['detector']['modelPath'] || '')) as unknown as GraphModel;
|
||||||
const inputs = Object.values(models[0].modelSignature['inputs']);
|
const inputs = Object.values(models.detector.modelSignature['inputs']);
|
||||||
inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
|
inputSize.detector[0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
|
||||||
inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
|
inputSize.detector[1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
|
||||||
if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.body.detector?.modelPath);
|
if (!models.detector || !models.detector['modelUrl']) log('load model failed:', config.body['detector']['modelPath']);
|
||||||
else if (config.debug) log('load model:', models[0]['modelUrl']);
|
else if (config.debug) log('load model:', models.detector['modelUrl']);
|
||||||
} else if (config.debug && models[0]) log('cached model:', models[0]['modelUrl']);
|
} else if (config.debug && models.detector) log('cached model:', models.detector['modelUrl']);
|
||||||
return models[0] as GraphModel;
|
await detect.createAnchors();
|
||||||
|
return models.detector as GraphModel;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function loadPose(config: Config): Promise<GraphModel> {
|
export async function loadPose(config: Config): Promise<GraphModel> {
|
||||||
if (env.initial) models[1] = null;
|
if (env.initial) models.landmarks = null;
|
||||||
if (!models[1]) {
|
if (!models.landmarks) {
|
||||||
models[1] = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
|
models.landmarks = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
|
||||||
const inputs = Object.values(models[1].modelSignature['inputs']);
|
const inputs = Object.values(models.landmarks.modelSignature['inputs']);
|
||||||
inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
|
inputSize.landmarks[0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
|
||||||
inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
|
inputSize.landmarks[1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
|
||||||
if (config.body.modelPath?.includes('lite')) outputNodes = ['ld_3d', 'output_segmentation', 'output_heatmap', 'world_3d', 'output_poseflag'];
|
if (!models.landmarks || !models.landmarks['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||||
else outputNodes = ['Identity', 'Identity_2', 'Identity_3', 'Identity_4', 'Identity_1']; // v2 from pinto full and heavy
|
else if (config.debug) log('load model:', models.landmarks['modelUrl']);
|
||||||
if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.body.modelPath);
|
} else if (config.debug) log('cached model:', models.landmarks['modelUrl']);
|
||||||
else if (config.debug) log('load model:', models[1]['modelUrl']);
|
return models.landmarks;
|
||||||
} else if (config.debug) log('cached model:', models[1]['modelUrl']);
|
|
||||||
return models[1];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function load(config: Config): Promise<[GraphModel | null, GraphModel | null]> {
|
export async function load(config: Config): Promise<[GraphModel | null, GraphModel | null]> {
|
||||||
if (!models[0]) await loadDetect(config);
|
if (!models.detector) await loadDetect(config);
|
||||||
if (!models[1]) await loadPose(config);
|
if (!models.landmarks) await loadPose(config);
|
||||||
return models;
|
return [models.detector, models.landmarks];
|
||||||
}
|
}
|
||||||
|
|
||||||
function calculateBoxes(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } {
|
function calculateBoxes(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } {
|
||||||
|
@ -61,22 +71,32 @@ function calculateBoxes(keypoints: Array<BodyKeypoint>, outputSize: [number, num
|
||||||
return { keypointsBox, keypointsBoxRaw };
|
return { keypointsBox, keypointsBoxRaw };
|
||||||
}
|
}
|
||||||
|
|
||||||
async function prepareImage(input: Tensor): Promise<Tensor> {
|
async function prepareImage(input: Tensor, size: number, box?: Box): Promise<Tensor> {
|
||||||
const t: Record<string, Tensor> = {};
|
const t: Record<string, Tensor> = {};
|
||||||
if (!input.shape || !input.shape[1] || !input.shape[2]) return input;
|
if (!input.shape || !input.shape[1] || !input.shape[2]) return input;
|
||||||
let final: Tensor;
|
let final: Tensor;
|
||||||
if (input.shape[1] !== input.shape[2]) { // only pad if width different than height
|
if (input.shape[1] !== input.shape[2]) { // only pad if width different than height
|
||||||
|
const height: [number, number] = box
|
||||||
|
? [Math.trunc(input.shape[1] * box[1]), Math.trunc(input.shape[1] * (box[1] + box[3]))]
|
||||||
|
: [input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0];
|
||||||
|
const width: [number, number] = box
|
||||||
|
? [Math.trunc(input.shape[2] * box[0]), Math.trunc(input.shape[2] * (box[0] + box[2]))]
|
||||||
|
: [input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0];
|
||||||
padding = [
|
padding = [
|
||||||
[0, 0], // dont touch batch
|
[0, 0], // dont touch batch
|
||||||
[input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0], // height before&after
|
height, // height before&after
|
||||||
[input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0], // width before&after
|
width, // width before&after
|
||||||
[0, 0], // dont touch rbg
|
[0, 0], // dont touch rbg
|
||||||
];
|
];
|
||||||
t.pad = tf.pad(input, padding);
|
if (box) {
|
||||||
t.resize = tf.image.resizeBilinear(t.pad, [inputSize[1][0], inputSize[1][1]]);
|
t.resize = tf.image.cropAndResize(input, [box], [0], [size, size]);
|
||||||
|
} else {
|
||||||
|
t.pad = tf.pad(input, padding);
|
||||||
|
t.resize = tf.image.resizeBilinear(t.pad, [size, size]);
|
||||||
|
}
|
||||||
final = tf.div(t.resize, constants.tf255);
|
final = tf.div(t.resize, constants.tf255);
|
||||||
} else if (input.shape[1] !== inputSize[1][0]) { // if input needs resizing
|
} else if (input.shape[1] !== size) { // if input needs resizing
|
||||||
t.resize = tf.image.resizeBilinear(input, [inputSize[1][0], inputSize[1][1]]);
|
t.resize = tf.image.resizeBilinear(input, [size, size]);
|
||||||
final = tf.div(t.resize, constants.tf255);
|
final = tf.div(t.resize, constants.tf255);
|
||||||
} else { // if input is already in a correct resolution just normalize it
|
} else { // if input is already in a correct resolution just normalize it
|
||||||
final = tf.div(input, constants.tf255);
|
final = tf.div(input, constants.tf255);
|
||||||
|
@ -88,47 +108,54 @@ async function prepareImage(input: Tensor): Promise<Tensor> {
|
||||||
function rescaleKeypoints(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): Array<BodyKeypoint> {
|
function rescaleKeypoints(keypoints: Array<BodyKeypoint>, outputSize: [number, number]): Array<BodyKeypoint> {
|
||||||
for (const kpt of keypoints) {
|
for (const kpt of keypoints) {
|
||||||
kpt.position = [
|
kpt.position = [
|
||||||
kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0],
|
Math.trunc(kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0]),
|
||||||
kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0],
|
Math.trunc(kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0]),
|
||||||
kpt.position[2] as number,
|
kpt.position[2] as number,
|
||||||
];
|
];
|
||||||
kpt.positionRaw = [
|
kpt.positionRaw = [kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number];
|
||||||
kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number,
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
return keypoints;
|
return keypoints;
|
||||||
}
|
}
|
||||||
|
|
||||||
const sigmoid = (x) => (1 - (1 / (1 + Math.exp(x))));
|
function rescaleBoxes(boxes: Array<DetectedBox>, outputSize: [number, number]): Array<DetectedBox> {
|
||||||
|
for (const box of boxes) {
|
||||||
|
box.box = [
|
||||||
|
Math.trunc(box.box[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
|
||||||
|
Math.trunc(box.box[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
|
||||||
|
Math.trunc(box.box[2] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0]),
|
||||||
|
Math.trunc(box.box[3] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1]),
|
||||||
|
];
|
||||||
|
box.boxRaw = [box.box[0] / outputSize[0], box.box[1] / outputSize[1], box.box[2] / outputSize[0], box.box[3] / outputSize[1]];
|
||||||
|
}
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
async function detectParts(input: Tensor, config: Config, outputSize: [number, number]): Promise<BodyResult | null> {
|
async function detectLandmarks(input: Tensor, config: Config, outputSize: [number, number]): Promise<BodyResult | null> {
|
||||||
const t: Record<string, Tensor> = {};
|
|
||||||
t.input = await prepareImage(input);
|
|
||||||
/**
|
/**
|
||||||
* t.ld: 39 keypoints [x,y,z,score,presence] normalized to input size
|
* t.ld: 39 keypoints [x,y,z,score,presence] normalized to input size
|
||||||
* t.segmentation:
|
* t.segmentation:
|
||||||
* t.heatmap:
|
* t.heatmap:
|
||||||
* t.world: 39 keypoints [x,y,z] normalized to -1..1
|
* t.world: 39 keypoints [x,y,z] normalized to -1..1
|
||||||
* t.poseflag: body score
|
* t.poseflag: body score
|
||||||
*/
|
*/
|
||||||
[t.ld/* 1,195(39*5) */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117(39*3) */, t.poseflag/* 1,1 */] = models[1]?.execute(t.input, outputNodes) as Tensor[]; // run model
|
const t: Record<string, Tensor> = {};
|
||||||
const poseScoreRaw = (await t.poseflag.data())[0];
|
[t.ld/* 1,195(39*5) */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117(39*3) */, t.poseflag/* 1,1 */] = models.landmarks?.execute(input, outputNodes.landmarks) as Tensor[]; // run model
|
||||||
const poseScore = Math.max(0, (poseScoreRaw - 0.8) / (1 - 0.8)); // blow up score variance 5x
|
const poseScore = (await t.poseflag.data())[0];
|
||||||
const points = await t.ld.data();
|
const points = await t.ld.data();
|
||||||
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); // dont need tensors after this
|
||||||
const keypointsRelative: Array<BodyKeypoint> = [];
|
const keypointsRelative: Array<BodyKeypoint> = [];
|
||||||
const depth = 5; // each points has x,y,z,visibility,presence
|
const depth = 5; // each points has x,y,z,visibility,presence
|
||||||
for (let i = 0; i < points.length / depth; i++) {
|
for (let i = 0; i < points.length / depth; i++) {
|
||||||
const score = sigmoid(points[depth * i + 3]);
|
const score = sigmoid(points[depth * i + 3]);
|
||||||
const presence = sigmoid(points[depth * i + 4]);
|
const presence = sigmoid(points[depth * i + 4]);
|
||||||
const adjScore = Math.trunc(100 * score * presence * poseScore) / 100;
|
const adjScore = Math.trunc(100 * score * presence * poseScore) / 100;
|
||||||
const positionRaw: Point = [points[depth * i + 0] / inputSize[1][0], points[depth * i + 1] / inputSize[1][1], points[depth * i + 2] + 0];
|
const positionRaw: Point = [points[depth * i + 0] / inputSize.landmarks[0], points[depth * i + 1] / inputSize.landmarks[1], points[depth * i + 2] + 0];
|
||||||
const position: Point = [Math.trunc(outputSize[0] * positionRaw[0]), Math.trunc(outputSize[1] * positionRaw[1]), positionRaw[2] as number];
|
const position: Point = [Math.trunc(outputSize[0] * positionRaw[0]), Math.trunc(outputSize[1] * positionRaw[1]), positionRaw[2] as number];
|
||||||
keypointsRelative.push({ part: coords.kpt[i], positionRaw, position, score: adjScore });
|
keypointsRelative.push({ part: coords.kpt[i], positionRaw, position, score: adjScore });
|
||||||
}
|
}
|
||||||
if (poseScore < (config.body.minConfidence || 0)) return null;
|
if (poseScore < (config.body.minConfidence || 0)) return null;
|
||||||
const keypoints: Array<BodyKeypoint> = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is cropped
|
const keypoints: Array<BodyKeypoint> = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is padded
|
||||||
const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints
|
const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints
|
||||||
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
|
||||||
const annotations: Record<string, Point[][]> = {};
|
const annotations: Record<string, Point[][]> = {};
|
||||||
for (const [name, indexes] of Object.entries(coords.connected)) {
|
for (const [name, indexes] of Object.entries(coords.connected)) {
|
||||||
const pt: Array<Point[]> = [];
|
const pt: Array<Point[]> = [];
|
||||||
|
@ -144,6 +171,19 @@ async function detectParts(input: Tensor, config: Config, outputSize: [number, n
|
||||||
return body;
|
return body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function detectBoxes(input: Tensor, config: Config, outputSize: [number, number]) {
|
||||||
|
const t: Record<string, Tensor> = {};
|
||||||
|
t.res = models.detector?.execute(input, ['Identity']) as Tensor; //
|
||||||
|
t.logitsRaw = tf.slice(t.res, [0, 0, 0], [1, -1, 1]);
|
||||||
|
t.boxesRaw = tf.slice(t.res, [0, 0, 1], [1, -1, -1]);
|
||||||
|
t.logits = tf.squeeze(t.logitsRaw);
|
||||||
|
t.boxes = tf.squeeze(t.boxesRaw);
|
||||||
|
const boxes = await detect.decode(t.boxes, t.logits, config, outputSize);
|
||||||
|
rescaleBoxes(boxes, outputSize);
|
||||||
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||||
|
return boxes;
|
||||||
|
}
|
||||||
|
|
||||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||||
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
|
const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0];
|
||||||
const skipTime = (config.body.skipTime || 0) > (now() - lastTime);
|
const skipTime = (config.body.skipTime || 0) > (now() - lastTime);
|
||||||
|
@ -151,7 +191,31 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
|
||||||
if (config.skipAllowed && skipTime && skipFrame && cache !== null) {
|
if (config.skipAllowed && skipTime && skipFrame && cache !== null) {
|
||||||
skipped++;
|
skipped++;
|
||||||
} else {
|
} else {
|
||||||
cache = await detectParts(input, config, outputSize);
|
const t: Record<string, Tensor> = {};
|
||||||
|
if (config.body['detector'] && config.body['detector']['enabled']) {
|
||||||
|
t.detector = await prepareImage(input, 224);
|
||||||
|
const boxes = await detectBoxes(t.detector, config, outputSize);
|
||||||
|
if (boxes && boxes.length === 1) {
|
||||||
|
t.landmarks = await prepareImage(input, 256, boxes[0].box); // padded and resized according to detector
|
||||||
|
cache = await detectLandmarks(t.landmarks, config, outputSize);
|
||||||
|
}
|
||||||
|
if (cache) cache.score = boxes[0].score;
|
||||||
|
} else {
|
||||||
|
t.landmarks = await prepareImage(input, 256, lastBox); // padded and resized
|
||||||
|
cache = await detectLandmarks(t.landmarks, config, outputSize);
|
||||||
|
/*
|
||||||
|
lastBox = undefined;
|
||||||
|
if (cache?.box) {
|
||||||
|
const cx = cache.boxRaw[0] + (cache.boxRaw[2] / 2);
|
||||||
|
const cy = cache.boxRaw[1] + (cache.boxRaw[3] / 2);
|
||||||
|
let size = cache.boxRaw[2] > cache.boxRaw[3] ? cache.boxRaw[2] : cache.boxRaw[3];
|
||||||
|
size = (size * 1.2) / 2; // enlarge and half it
|
||||||
|
lastBox = [cx - size, cy - size, 2 * size, 2 * size];
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||||
|
// if (cache && boxes.length > 0) cache.box = boxes[0].box;
|
||||||
lastTime = now();
|
lastTime = now();
|
||||||
skipped = 0;
|
skipped = 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
import * as tf from '../../dist/tfjs.esm.js';
|
||||||
|
import type { Tensor } from '../tfjs/types';
|
||||||
|
import type { Box } from '../result';
|
||||||
|
import type { Config } from '../config';
|
||||||
|
|
||||||
|
interface DetectedBox { box: Box, boxRaw: Box, score: number }
|
||||||
|
|
||||||
|
const inputSize = 224;
|
||||||
|
let anchorTensor: { x, y };
|
||||||
|
const numLayers = 5;
|
||||||
|
const strides = [8, 16, 32, 32, 32];
|
||||||
|
|
||||||
|
export async function createAnchors() {
|
||||||
|
const anchors: Array<{ x: number, y: number }> = [];
|
||||||
|
let layerId = 0;
|
||||||
|
while (layerId < numLayers) {
|
||||||
|
let anchorCount = 0;
|
||||||
|
let lastSameStrideLayer = layerId;
|
||||||
|
while (lastSameStrideLayer < strides.length && strides[lastSameStrideLayer] === strides[layerId]) {
|
||||||
|
anchorCount += 2;
|
||||||
|
lastSameStrideLayer++;
|
||||||
|
}
|
||||||
|
const stride = strides[layerId];
|
||||||
|
const featureMapHeight = Math.ceil(inputSize / stride);
|
||||||
|
const featureMapWidth = Math.ceil(inputSize / stride);
|
||||||
|
for (let y = 0; y < featureMapHeight; ++y) {
|
||||||
|
for (let x = 0; x < featureMapWidth; ++x) {
|
||||||
|
for (let anchorId = 0; anchorId < anchorCount; ++anchorId) {
|
||||||
|
anchors.push({ x: (x + 0.5) / featureMapWidth, y: (y + 0.5) / featureMapHeight });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
layerId = lastSameStrideLayer;
|
||||||
|
}
|
||||||
|
anchorTensor = { x: tf.tensor1d(anchors.map((a) => a.x)), y: tf.tensor1d(anchors.map((a) => a.y)) };
|
||||||
|
}
|
||||||
|
|
||||||
|
const cropFactor = [5.0, 5.0];
|
||||||
|
function decodeBoxes(boxesTensor, anchor): Tensor {
|
||||||
|
return tf.tidy(() => {
|
||||||
|
const split = tf.split(boxesTensor, 12, 1); // first 4 are box data [x,y,w,h] and 4 are keypoints data [x,y] for total of 12
|
||||||
|
let xCenter = tf.squeeze(split[0]);
|
||||||
|
let yCenter = tf.squeeze(split[1]);
|
||||||
|
let width = tf.squeeze(split[2]);
|
||||||
|
let height = tf.squeeze(split[3]);
|
||||||
|
xCenter = tf.add(tf.div(xCenter, inputSize), anchor.x);
|
||||||
|
yCenter = tf.add(tf.div(yCenter, inputSize), anchor.y);
|
||||||
|
width = tf.mul(tf.div(width, inputSize), cropFactor[0]);
|
||||||
|
height = tf.mul(tf.div(height, inputSize), cropFactor[1]);
|
||||||
|
const xMin = tf.sub(xCenter, tf.div(width, 2));
|
||||||
|
const yMin = tf.sub(yCenter, tf.div(height, 2));
|
||||||
|
const boxes = tf.stack([xMin, yMin, width, height], 1);
|
||||||
|
return boxes;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function decode(boxesTensor: Tensor, logitsTensor: Tensor, config: Config, outputSize: [number, number]): Promise<DetectedBox[]> {
|
||||||
|
const t: Record<string, Tensor> = {};
|
||||||
|
t.boxes = decodeBoxes(boxesTensor, anchorTensor);
|
||||||
|
t.scores = tf.sigmoid(logitsTensor);
|
||||||
|
t.argmax = tf.argMax(t.scores);
|
||||||
|
const i = (await t.argmax.data())[0] as number;
|
||||||
|
const scores = await t.scores.data();
|
||||||
|
const detected: Array<{ box: Box, boxRaw: Box, score: number }> = [];
|
||||||
|
const minScore = (config.body['detector'] && config.body['detector']['minConfidence']) ? config.body['detector']['minConfidence'] : 0;
|
||||||
|
if (scores[i] >= minScore) {
|
||||||
|
const boxes = await t.boxes.array();
|
||||||
|
const boxRaw: Box = boxes[i];
|
||||||
|
const box: Box = [boxRaw[0] * outputSize[0], boxRaw[1] * outputSize[1], boxRaw[2] * outputSize[0], boxRaw[3] * outputSize[1]];
|
||||||
|
// console.log(box);
|
||||||
|
detected.push({ box, boxRaw, score: scores[i] });
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
t.nms = await tf.image.nonMaxSuppressionAsync(t.boxes, t.scores, 1, config.body.detector?.minConfidence || 0.1, config.body.detector?.iouThreshold || 0.1);
|
||||||
|
const boxes = t.boxes.arraySync();
|
||||||
|
const scores = t.scores.dataSync();
|
||||||
|
const nms = t.nms.dataSync();
|
||||||
|
const detected: Array<DetectedBox> = [];
|
||||||
|
for (const i of Array.from(nms)) {
|
||||||
|
const boxRaw: Box = boxes[i];
|
||||||
|
const box: Box = [boxRaw[0] * outputSize[0], boxRaw[0] * outputSize[1], boxRaw[3] * outputSize[0], boxRaw[2] * outputSize[1]];
|
||||||
|
detected.push({ box, boxRaw, score: scores[i] });
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
|
||||||
|
return detected;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
const humanConfig: Partial<Config> = {
|
||||||
|
warmup: 'full' as const,
|
||||||
|
modelBasePath: '../../models',
|
||||||
|
cacheSensitivity: 0,
|
||||||
|
filter: { enabled: false },
|
||||||
|
face: { enabled: false },
|
||||||
|
hand: { enabled: false },
|
||||||
|
object: { enabled: false },
|
||||||
|
gesture: { enabled: false },
|
||||||
|
body: {
|
||||||
|
enabled: true,
|
||||||
|
minConfidence: 0.1,
|
||||||
|
modelPath: 'blazepose/blazepose-full.json',
|
||||||
|
detector: {
|
||||||
|
enabled: false,
|
||||||
|
modelPath: 'blazepose/blazepose-detector.json',
|
||||||
|
minConfidence: 0.1,
|
||||||
|
iouThreshold: 0.1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
*/
|
|
@ -78,11 +78,19 @@ export interface BodyConfig extends GenericConfig {
|
||||||
maxDetected: number,
|
maxDetected: number,
|
||||||
/** minimum confidence for a detected body before results are discarded */
|
/** minimum confidence for a detected body before results are discarded */
|
||||||
minConfidence: number,
|
minConfidence: number,
|
||||||
/** detector used for body model before actual analysis */
|
/* experimental
|
||||||
|
/** experimental: detector used for body model before actual analysis
|
||||||
detector?: {
|
detector?: {
|
||||||
/** path to optional body detector model json file */
|
/** experimental: enable body detector before body landmarks
|
||||||
modelPath: string
|
enabled: boolean,
|
||||||
|
/** experimental: path to optional body detector model json file
|
||||||
|
modelPath: string,
|
||||||
|
/** experimental: minimum confidence for a detected body before results are discarded
|
||||||
|
minConfidence: number,
|
||||||
|
/** experimental: minimum overlap between two detected bodies before one is discarded
|
||||||
|
iouThreshold: number
|
||||||
},
|
},
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Configures all hand detection specific options */
|
/** Configures all hand detection specific options */
|
||||||
|
@ -365,9 +373,6 @@ const config: Config = {
|
||||||
body: {
|
body: {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
modelPath: 'movenet-lightning.json',
|
modelPath: 'movenet-lightning.json',
|
||||||
detector: {
|
|
||||||
modelPath: '',
|
|
||||||
},
|
|
||||||
maxDetected: -1,
|
maxDetected: -1,
|
||||||
minConfidence: 0.3,
|
minConfidence: 0.3,
|
||||||
skipFrames: 1,
|
skipFrames: 1,
|
||||||
|
|
Loading…
Reference in New Issue