From 051ab8c9f58a9114bf7aeae1b5986168c2b03ce1 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 4 Oct 2021 16:29:15 -0400 Subject: [PATCH] add blazepose v2 and add annotations to body results --- CHANGELOG.md | 3 +- demo/index.js | 9 +- src/body/blazepose.ts | 213 +++++++++++++++----------------- src/body/blazeposecoords.ts | 54 ++++++++ src/body/efficientpose.ts | 31 +++-- src/body/efficientposecoords.ts | 27 ++++ src/body/movenet.ts | 50 +++++--- src/body/movenetcoords.ts | 28 +++++ src/config.ts | 2 +- src/human.ts | 1 - src/image/image.ts | 2 + src/result.ts | 16 +-- src/tfjs/backend.ts | 4 +- src/tfjs/humangl.ts | 2 +- src/util/draw.ts | 92 ++------------ 15 files changed, 300 insertions(+), 234 deletions(-) create mode 100644 src/body/blazeposecoords.ts create mode 100644 src/body/efficientposecoords.ts create mode 100644 src/body/movenetcoords.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index c0cf8881..369751a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,9 @@ ## Changelog -### **HEAD -> main** 2021/10/02 mandic00@live.com +### **HEAD -> main** 2021/10/03 mandic00@live.com +- added docker notes - breaking change: new similarity and match methods - release candidate - tweaked default values diff --git a/demo/index.js b/demo/index.js index 97fec5dc..7586a74d 100644 --- a/demo/index.js +++ b/demo/index.js @@ -31,6 +31,13 @@ import jsonView from './helpers/jsonview.js'; let human; let userConfig = { + face: { enabled: false }, + object: { enabled: false }, + gesture: { enabled: true }, + hand: { enabled: false }, + body: { enabled: true, modelPath: 'https://vladmandic.github.io/human-models/models/blazepose-lite.json' }, + segmentation: { enabled: false }, + /* warmup: 'none', backend: 'humangl', @@ -108,7 +115,7 @@ const ui = { lastFrame: 0, // time of last frame processing viewportSet: false, // internal, has custom viewport been set background: null, // holds instance of segmentation background image - exceptionHandler: true, // should capture all unhandled exceptions + exceptionHandler: false, // should capture all unhandled exceptions // webrtc useWebRTC: false, // use webrtc as camera source instead of local webcam diff --git a/src/body/blazepose.ts b/src/body/blazepose.ts index c151c013..5ffa0226 100644 --- a/src/body/blazepose.ts +++ b/src/body/blazepose.ts @@ -1,60 +1,33 @@ /** * BlazePose model implementation - * - * Based on : [**BlazePose**](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection) */ +import * as tf from '@tensorflow/tfjs'; import { log, join } from '../util/util'; -import * as tf from '../../dist/tfjs.esm.js'; -import type { BodyResult, Box, Point } from '../result'; +import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../util/env'; -import * as annotations from './annotations'; +import * as coords from './blazeposecoords'; -// const boxScaleFact = 1.5; // hand finger model prefers slighly larger box +const env = { initial: true }; const models: [GraphModel | null, GraphModel | null] = [null, null]; -const outputNodes = ['ld_3d', 'activation_segmentation', 'activation_heatmap', 'world_3d', 'output_poseflag']; - const inputSize = [[0, 0], [0, 0]]; - -// let skipped = 0; -let outputSize: [number, number] = [0, 0]; - -type Keypoints = { score: number, part: string, position: Point, positionRaw: Point }; - -/* -type BodyDetectResult = { - id: number, - score: number, - box: Box, - boxRaw: Box, - label: string, - yxBox: Box, -} - -const cache: { - bodyBoxes: Array, - partBoxes: Array - tmpBoxes: Array -} = { - bodyBoxes: [], - partBoxes: [], - tmpBoxes: [], -}; -*/ +let skipped = Number.MAX_SAFE_INTEGER; +let outputNodes: string[]; // different for lite/full/heavy +let cache: BodyResult | null = null; +let padding: [number, number][] = [[0, 0], [0, 0], [0, 0], [0, 0]]; export async function loadDetect(config: Config): Promise { if (env.initial) models[0] = null; - if (!models[0]) { + if (!models[0] && config.body.detector?.modelPath || '') { models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.body.detector?.modelPath || '')) as unknown as GraphModel; const inputs = Object.values(models[0].modelSignature['inputs']); inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.object.modelPath); else if (config.debug) log('load model:', models[0]['modelUrl']); - } else if (config.debug) log('cached model:', models[0]['modelUrl']); - return models[0]; + } else if (config.debug && models[0]) log('cached model:', models[0]['modelUrl']); + return models[0] as GraphModel; } export async function loadPose(config: Config): Promise { @@ -64,6 +37,8 @@ export async function loadPose(config: Config): Promise { const inputs = Object.values(models[1].modelSignature['inputs']); inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + if (config.body.modelPath?.includes('lite')) outputNodes = ['ld_3d', 'output_segmentation', 'output_heatmap', 'world_3d', 'output_poseflag']; + else outputNodes = ['Identity', 'Identity_2', 'Identity_3', 'Identity_4', 'Identity_1']; // v2 from pinto full and heavy if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.object.modelPath); else if (config.debug) log('load model:', models[1]['modelUrl']); } else if (config.debug) log('cached model:', models[1]['modelUrl']); @@ -76,86 +51,104 @@ export async function load(config: Config): Promise<[GraphModel | null, GraphMod return models; } -/* -async function detectBody(input: Tensor, config: Config): Promise { - if ((config.body.detector?.modelPath.length || 0) > 0 && models[0]) { - const t: Record = {}; - t.resize = tf.image.resizeBilinear(input, [inputSize[0][0], inputSize[0][1]]); - t.res = await models[0]?.predict(t.resize) as Tensor; // [1,2254,13] - t.logits = tf.slice(t.res, [0, 0, 0], [1, -1, 1]); - t.sigmoid = tf.sigmoid(t.logits); - t.rawBoxes = tf.slice(t.res, [0, 0, 1], [1, -1, -1]); - t.packedBoxes = tf.squeeze(t.rawBoxes); // [2254,12] - t.scores = tf.squeeze(t.sigmoid); // [2254,1] - // boxes need to be decoded based on anchors - Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); - } - return []; -} -*/ - -async function detectParts(input: Tensor, config: Config): Promise { - const t: Record = {}; - t.resize = tf.image.resizeBilinear(input, [inputSize[1][0], inputSize[1][1]]); - [t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.resize, outputNodes) as Tensor[]; // [1,2254,13] - const points = await t.ld.data(); - const keypoints: Array = []; - const labels = points?.length === 195 ? annotations.full : annotations.upper; // full model has 39 keypoints, upper has 31 keypoints - const depth = 5; // each points has x,y,z,visibility,presence - for (let i = 0; i < points.length / depth; i++) { - const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // reverse sigmoid value - // const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value - if (score > (config.body.minConfidence || 0)) { - keypoints.push({ - part: labels[i], - position: [ - Math.trunc(outputSize[0] * points[depth * i + 0] / 255), // return normalized x value istead of 0..255 - Math.trunc(outputSize[1] * points[depth * i + 1] / 255), // return normalized y value istead of 0..255 - Math.trunc(points[depth * i + 2]) + 0, // fix negative zero - ], - positionRaw: [ - points[depth * i + 0] / 255, // return x value normalized to 0..1 - points[depth * i + 1] / 255, // return y value normalized to 0..1 - points[depth * i + 2] + 0, // fix negative zero - ], - score, - }); - } - } +function calculateBoxes(keypoints: Array, outputSize: [number, number]): { keypointsBox: Box, keypointsBoxRaw: Box } { const x = keypoints.map((a) => a.position[0]); const y = keypoints.map((a) => a.position[1]); - const box: Box = [ - Math.min(...x), - Math.min(...y), - Math.max(...x) - Math.min(...x), - Math.max(...y) - Math.min(...x), + const keypointsBox: Box = [Math.min(...x), Math.min(...y), Math.max(...x) - Math.min(...x), Math.max(...y) - Math.min(...y)]; + const keypointsBoxRaw: Box = [keypointsBox[0] / outputSize[0], keypointsBox[1] / outputSize[1], keypointsBox[2] / outputSize[0], keypointsBox[3] / outputSize[1]]; + /* + const leftShoulder = keypoints.find((kpt) => kpt.part === 'leftShoulder'); + const rightShoulder = keypoints.find((kpt) => kpt.part === 'rightShoulder'); + if (!leftShoulder || !rightShoulder || !config.skipFrame) { // reset cache box coords + cache.box = [0, 0, 1, 1]; + cache.boxRaw = cache.box; + } else { // recalculate cache box coords + const size = [leftShoulder.position[0] - rightShoulder.position[0], leftShoulder.position[1] - rightShoulder.position[1]]; + const shoulderWidth = Math.sqrt((size[0] * size[0]) + (size[1] * size[1])); // distance between left and right shoulder + const shoulderCenter: Point = [(leftShoulder.position[0] + rightShoulder.position[0]) / 2, (leftShoulder.position[1] + rightShoulder.position[1]) / 2]; // center point between left and right shoulder + const bodyCenter: Point = [shoulderCenter[0], shoulderCenter[0] + (shoulderWidth), 0]; // approximate center of the body + const bodyCenterRaw: Point = [bodyCenter[0] / outputSize[0], bodyCenter[1] / outputSize[1], 0]; + const bodyCenterKpt: Keypoint = { part: 'bodyCenter', positionRaw: bodyCenterRaw, position: bodyCenter, score: 1 }; // add virtual keypoint + keypoints.push(bodyCenterKpt); + const scaleFact = 2.5; + cache.box = [Math.trunc(bodyCenter[0] - (scaleFact * shoulderWidth)), Math.trunc(bodyCenter[1] - (scaleFact * shoulderWidth)), Math.trunc(2 * scaleFact * shoulderWidth), Math.trunc(2 * scaleFact * shoulderWidth)]; + cache.boxRaw = [cache.box[0] / outputSize[0], cache.box[1] / outputSize[1], cache.box[2] / outputSize[0], cache.box[3] / outputSize[1]]; + } + */ + return { keypointsBox, keypointsBoxRaw }; +} + +async function prepareImage(input: Tensor): Promise { + const t: Record = {}; + if (!input.shape || !input.shape[1] || !input.shape[2]) return input; + padding = [ + [0, 0], // dont touch batch + [input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0, input.shape[2] > input.shape[1] ? Math.trunc((input.shape[2] - input.shape[1]) / 2) : 0], // height before&after + [input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0, input.shape[1] > input.shape[2] ? Math.trunc((input.shape[1] - input.shape[2]) / 2) : 0], // width before&after + [0, 0], // dont touch rbg ]; - const boxRaw: Box = [0, 0, 0, 0]; // not yet implemented - const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); + t.pad = tf.pad(input as tf.Tensor4D, padding); + t.resize = tf.image.resizeBilinear(t.pad as tf.Tensor4D, [inputSize[1][0], inputSize[1][1]]); + const final = tf.div(t.resize, 255); Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); - return { id: 0, score, box, boxRaw, keypoints }; + return final; +} + +function rescaleKeypoints(keypoints: Array, outputSize: [number, number]): Array { + for (const kpt of keypoints) { + kpt.position = [ + kpt.position[0] * (outputSize[0] + padding[2][0] + padding[2][1]) / outputSize[0] - padding[2][0], + kpt.position[1] * (outputSize[1] + padding[1][0] + padding[1][1]) / outputSize[1] - padding[1][0], + kpt.position[2] as number, + ]; + kpt.positionRaw = [ + kpt.position[0] / outputSize[0], kpt.position[1] / outputSize[1], kpt.position[2] as number, + ]; + } + return keypoints; +} + +async function detectParts(input: Tensor, config: Config, outputSize: [number, number]): Promise { + const t: Record = {}; + t.input = await prepareImage(input); + [t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.input, outputNodes) as Tensor[]; // run model + const points = await t.ld.data(); + const keypointsRelative: Array = []; + const depth = 5; // each points has x,y,z,visibility,presence + for (let i = 0; i < points.length / depth; i++) { + const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // normally this is from tf.sigmoid but no point of running sigmoid on full array which has coords as well + // const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value + const positionRaw: Point = [points[depth * i + 0] / inputSize[1][0], points[depth * i + 1] / inputSize[1][1], points[depth * i + 2] + 0]; + const position: Point = [Math.trunc(outputSize[0] * positionRaw[0]), Math.trunc(outputSize[1] * positionRaw[1]), positionRaw[2] as number]; + // if (positionRaw[0] < 0 || positionRaw[1] < 0 || positionRaw[0] > 1 || positionRaw[1] > 1) score = 0; + keypointsRelative.push({ part: coords.kpt[i], positionRaw, position, score }); + } + const avgScore = Math.round(100 * keypointsRelative.reduce((prev, curr) => prev += curr.score, 0) / keypointsRelative.length) / 100; // average score of keypoints + if (avgScore < (config.body.minConfidence || 0)) return null; + const keypoints: Array = rescaleKeypoints(keypointsRelative, outputSize); // keypoints were relative to input image which is cropped + const boxes = calculateBoxes(keypoints, [outputSize[0], outputSize[1]]); // now find boxes based on rescaled keypoints + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + const annotations: Record = {}; + for (const [name, indexes] of Object.entries(coords.connected)) { + const pt: Array = []; + for (let i = 0; i < indexes.length - 1; i++) { + const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]); + const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]); + if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); + } + annotations[name] = pt; + } + return { id: 0, score: avgScore, box: boxes.keypointsBox, boxRaw: boxes.keypointsBoxRaw, keypoints, annotations }; } export async function predict(input: Tensor, config: Config): Promise { - outputSize = [input.shape[2] || 0, input.shape[1] || 0]; - const bodies: Array = []; - const body = await detectParts(input, config); - bodies.push(body); - /* - cache.tmpBoxes = []; // clear temp cache - if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { // just run part detection while reusing cached boxes + const outputSize: [number, number] = [input.shape[2] || 0, input.shape[1] || 0]; + if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { skipped++; - bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from parts box cache - } else { // calculate new boxes and run part detection + } else { + cache = await detectParts(input, config, outputSize); skipped = 0; - bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from part box cache - if (bodies.length !== config.body.maxDetected) { // run body detection only if we dont have enough bodies in cache - cache.bodyBoxes = await detectBody(input, config); - const newBodies = await Promise.all(cache.bodyBoxes.map((body) => detectParts(input, body, config))); - bodies = bodies.concat(newBodies); - } } - cache.partBoxes = [...cache.tmpBoxes]; // repopulate cache with validated bodies - */ - return bodies as BodyResult[]; + if (cache) return [cache]; + return []; } diff --git a/src/body/blazeposecoords.ts b/src/body/blazeposecoords.ts new file mode 100644 index 00000000..721a7e8f --- /dev/null +++ b/src/body/blazeposecoords.ts @@ -0,0 +1,54 @@ +/* eslint-disable no-multi-spaces */ + +export const kpt = [ + 'nose', // 0 + 'leftEyeInside', // 1 + 'leftEye', // 2 + 'leftEyeOutside', // 3 + 'rightEyeInside', // 4 + 'rightEye', // 5 + 'rightEyeOutside', // 6 + 'leftEar', // 7 + 'rightEar', // 8 + 'leftMouth', // 9 + 'rightMouth', // 10 + 'leftShoulder', // 11 + 'rightShoulder', // 12 + 'leftElbow', // 13 + 'rightElbow', // 14 + 'leftWrist', // 15 + 'rightWrist', // 16 + 'leftPalm', // 17 + 'rightPalm', // 18 + 'leftIndex', // 19 + 'rightIndex', // 20 + 'leftPinky', // 21 + 'rightPinky', // 22 + 'leftHip', // 23 + 'rightHip', // 24 + 'leftKnee', // 25 + 'rightKnee', // 26 + 'leftAnkle', // 27 + 'rightAnkle', // 28 + 'leftHeel', // 29 + 'rightHeel', // 30 + 'leftFoot', // 31 + 'rightFoot', // 32 + 'bodyCenter', // 33 + 'bodyTop', // 34 + 'leftThumb', // 35 + 'leftHand', // 36 + 'rightThumb', // 37 + 'rightHand', // 38 +]; + +export const connected = { + leftLeg: ['leftHip', 'leftKnee', 'leftAnkle', 'leftHeel', 'leftFoot'], + rightLeg: ['rightHip', 'rightKnee', 'rightAnkle', 'rightHeel', 'rightFoot'], + torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], + leftArm: ['leftShoulder', 'leftElbow', 'leftWrist', 'leftPalm'], + rightArm: ['rightShoulder', 'rightElbow', 'rightWrist', 'rightPalm'], + leftHand: [], + rightHand: [], + head: [], +}; diff --git a/src/body/efficientpose.ts b/src/body/efficientpose.ts index e6eff298..bb76cdd9 100644 --- a/src/body/efficientpose.ts +++ b/src/body/efficientpose.ts @@ -6,23 +6,20 @@ import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; -import type { BodyResult, Box, Point } from '../result'; +import * as coords from './efficientposecoords'; +import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { env } from '../util/env'; let model: GraphModel | null; -type Keypoints = { score: number, part: string, position: Point, positionRaw: Point }; - -const keypoints: Array = []; +const keypoints: Array = []; let box: Box = [0, 0, 0, 0]; let boxRaw: Box = [0, 0, 0, 0]; let score = 0; let skipped = Number.MAX_SAFE_INTEGER; -const bodyParts = ['head', 'neck', 'rightShoulder', 'rightElbow', 'rightWrist', 'chest', 'leftShoulder', 'leftElbow', 'leftWrist', 'pelvis', 'rightHip', 'rightKnee', 'rightAnkle', 'leftHip', 'leftKnee', 'leftAnkle']; - export async function load(config: Config): Promise { if (env.initial) model = null; if (!model) { @@ -41,9 +38,9 @@ function max2d(inputs, minScore) { const reshaped = tf.reshape(inputs, [height * width]); // combine all data const newScore = tf.max(reshaped, 0).dataSync()[0]; // get highest score // inside tf.tidy if (newScore > minScore) { // skip coordinate calculation is score is too low - const coords = tf.argMax(reshaped, 0); - const x = mod(coords, width).dataSync()[0]; // inside tf.tidy - const y = tf.div(coords, tf.scalar(width, 'int32')).dataSync()[0]; // inside tf.tidy + const coordinates = tf.argMax(reshaped, 0); + const x = mod(coordinates, width).dataSync()[0]; // inside tf.tidy + const y = tf.div(coordinates, tf.scalar(width, 'int32')).dataSync()[0]; // inside tf.tidy return [x, y, newScore]; } return [0, 0, newScore]; @@ -53,7 +50,7 @@ function max2d(inputs, minScore) { export async function predict(image: Tensor, config: Config): Promise { if ((skipped < (config.body?.skipFrames || 0)) && config.skipFrame && Object.keys(keypoints).length > 0) { skipped++; - return [{ id: 0, score, box, boxRaw, keypoints }]; + return [{ id: 0, score, box, boxRaw, keypoints, annotations: {} }]; } skipped = 0; return new Promise(async (resolve) => { @@ -83,7 +80,7 @@ export async function predict(image: Tensor, config: Config): Promise (config.body?.minConfidence || 0)) { keypoints.push({ score: Math.round(100 * partScore) / 100, - part: bodyParts[id], + part: coords.kpt[id], positionRaw: [ // normalized to 0..1 // @ts-ignore model is not undefined here x / model.inputs[0].shape[2], y / model.inputs[0].shape[1], @@ -114,6 +111,16 @@ export async function predict(image: Tensor, config: Config): Promise = {}; + for (const [name, indexes] of Object.entries(coords.connected)) { + const pt: Array = []; + for (let i = 0; i < indexes.length - 1; i++) { + const pt0 = keypoints.find((kpt) => kpt.part === indexes[i]); + const pt1 = keypoints.find((kpt) => kpt.part === indexes[i + 1]); + if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); + } + annotations[name] = pt; + } + resolve([{ id: 0, score, box, boxRaw, keypoints, annotations }]); }); } diff --git a/src/body/efficientposecoords.ts b/src/body/efficientposecoords.ts new file mode 100644 index 00000000..d6e66644 --- /dev/null +++ b/src/body/efficientposecoords.ts @@ -0,0 +1,27 @@ +export const kpt = [ + 'head', + 'neck', + 'rightShoulder', + 'rightElbow', + 'rightWrist', + 'chest', + 'leftShoulder', + 'leftElbow', + 'leftWrist', + 'bodyCenter', + 'rightHip', + 'rightKnee', + 'rightAnkle', + 'leftHip', + 'leftKnee', + 'leftAnkle', +]; + +export const connected = { + leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], + rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], + torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], + leftArm: ['leftShoulder', 'leftElbow', 'leftWrist'], + rightArm: ['rightShoulder', 'rightElbow', 'rightWrist'], + head: [], +}; diff --git a/src/body/movenet.ts b/src/body/movenet.ts index f288b02e..24a6eb87 100644 --- a/src/body/movenet.ts +++ b/src/body/movenet.ts @@ -7,7 +7,8 @@ import { log, join } from '../util/util'; import { scale } from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; -import type { BodyResult, Box, Point } from '../result'; +import * as coords from './movenetcoords'; +import type { BodyKeypoint, BodyResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { fakeOps } from '../tfjs/backend'; @@ -17,13 +18,8 @@ let model: GraphModel | null; let inputSize = 0; const cachedBoxes: Array = []; -type Keypoints = { score: number, part: string, position: Point, positionRaw: Point }; -type Body = { id: number, score: number, box: Box, boxRaw: Box, keypoints: Array } - let skipped = Number.MAX_SAFE_INTEGER; -const keypoints: Array = []; - -const bodyParts = ['nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle']; +const keypoints: Array = []; export async function load(config: Config): Promise { if (env.initial) model = null; @@ -71,7 +67,7 @@ async function parseSinglePose(res, config, image, inputBox) { ]; keypoints.push({ score: Math.round(100 * score) / 100, - part: bodyParts[id], + part: coords.kpt[id], positionRaw, position: [ // normalized to input image size Math.round((image.shape[2] || 0) * positionRaw[0]), @@ -81,14 +77,24 @@ async function parseSinglePose(res, config, image, inputBox) { } } score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); - const bodies: Array = []; + const bodies: Array = []; const [box, boxRaw] = createBox(keypoints); - bodies.push({ id: 0, score, box, boxRaw, keypoints }); + const annotations: Record = {}; + for (const [name, indexes] of Object.entries(coords.connected)) { + const pt: Array = []; + for (let i = 0; i < indexes.length - 1; i++) { + const pt0 = keypoints.find((kp) => kp.part === indexes[i]); + const pt1 = keypoints.find((kp) => kp.part === indexes[i + 1]); + if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); + } + annotations[name] = pt; + } + bodies.push({ id: 0, score, box, boxRaw, keypoints, annotations }); return bodies; } async function parseMultiPose(res, config, image, inputBox) { - const bodies: Array = []; + const bodies: Array = []; for (let id = 0; id < res[0].length; id++) { const kpt = res[0][id]; const totalScore = Math.round(100 * kpt[51 + 4]) / 100; @@ -102,7 +108,7 @@ async function parseMultiPose(res, config, image, inputBox) { (inputBox[2] - inputBox[0]) * kpt[3 * i + 0] + inputBox[0], ]; keypoints.push({ - part: bodyParts[i], + part: coords.kpt[i], score: Math.round(100 * score) / 100, positionRaw, position: [ @@ -112,11 +118,21 @@ async function parseMultiPose(res, config, image, inputBox) { }); } } - // const [box, boxRaw] = createBox(keypoints); + const [box, boxRaw] = createBox(keypoints); // movenet-multipose has built-in box details - const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; - const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))]; - bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints] }); + // const boxRaw: Box = [kpt[51 + 1], kpt[51 + 0], kpt[51 + 3] - kpt[51 + 1], kpt[51 + 2] - kpt[51 + 0]]; + // const box: Box = [Math.trunc(boxRaw[0] * (image.shape[2] || 0)), Math.trunc(boxRaw[1] * (image.shape[1] || 0)), Math.trunc(boxRaw[2] * (image.shape[2] || 0)), Math.trunc(boxRaw[3] * (image.shape[1] || 0))]; + const annotations: Record = {}; + for (const [name, indexes] of Object.entries(coords.connected)) { + const pt: Array = []; + for (let i = 0; i < indexes.length - 1; i++) { + const pt0 = keypoints.find((kp) => kp.part === indexes[i]); + const pt1 = keypoints.find((kp) => kp.part === indexes[i + 1]); + if (pt0 && pt1 && pt0.score > (config.body.minConfidence || 0) && pt1.score > (config.body.minConfidence || 0)) pt.push([pt0.position, pt1.position]); + } + annotations[name] = pt; + } + bodies.push({ id, score: totalScore, boxRaw, box, keypoints: [...keypoints], annotations }); } } bodies.sort((a, b) => b.score - a.score); @@ -129,7 +145,7 @@ export async function predict(input: Tensor, config: Config): Promise { const t: Record = {}; - let bodies: Array = []; + let bodies: Array = []; if (!config.skipFrame) cachedBoxes.length = 0; // allowed to use cache or not skipped++; diff --git a/src/body/movenetcoords.ts b/src/body/movenetcoords.ts new file mode 100644 index 00000000..9c881c2e --- /dev/null +++ b/src/body/movenetcoords.ts @@ -0,0 +1,28 @@ +export const kpt = [ + 'nose', + 'leftEye', + 'rightEye', + 'leftEar', + 'rightEar', + 'leftShoulder', + 'rightShoulder', + 'leftElbow', + 'rightElbow', + 'leftWrist', + 'rightWrist', + 'leftHip', + 'rightHip', + 'leftKnee', + 'rightKnee', + 'leftAnkle', + 'rightAnkle', +]; + +export const connected = { + leftLeg: ['leftHip', 'leftKnee', 'leftAnkle'], + rightLeg: ['rightHip', 'rightKnee', 'rightAnkle'], + torso: ['leftShoulder', 'rightShoulder', 'rightHip', 'leftHip', 'leftShoulder'], + leftArm: ['leftShoulder', 'leftElbow', 'leftWrist'], + rightArm: ['rightShoulder', 'rightElbow', 'rightWrist'], + head: [], +}; diff --git a/src/config.ts b/src/config.ts index ae66be17..1f498bba 100644 --- a/src/config.ts +++ b/src/config.ts @@ -411,7 +411,7 @@ const config: Config = { // only valid for posenet and movenet-multipose as other models detects single pose // set to -1 to autodetect based on number of detected faces minConfidence: 0.2, // threshold for discarding a prediction - skipFrames: 1, // how many max frames to go without re-running the detector + skipFrames: 5, // how many max frames to go without re-running the detector // only used when cacheSensitivity is not zero }, diff --git a/src/human.ts b/src/human.ts index 8789c014..33cd3ee5 100644 --- a/src/human.ts +++ b/src/human.ts @@ -8,7 +8,6 @@ import { defaults } from './config'; import * as tf from '../dist/tfjs.esm.js'; import * as app from '../package.json'; import * as backend from './tfjs/backend'; -// import * as blazepose from './body/blazepose-v1'; import * as blazepose from './body/blazepose'; import * as centernet from './object/centernet'; import * as draw from './util/draw'; diff --git a/src/image/image.ts b/src/image/image.ts index 8a58db0a..356a1d88 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -183,6 +183,8 @@ export function process(input: Input, config: Config): { tensor: Tensor | null, tempCanvas.height = targetHeight; const tempCtx = tempCanvas.getContext('2d'); tempCtx?.drawImage(outCanvas, 0, 0); + console.log('PIXELS', tempCanvas); + pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null; try { pixels = (tf.browser && env.browser) ? tf.browser.fromPixels(tempCanvas) : null; } catch (err) { diff --git a/src/result.ts b/src/result.ts index 75babe52..4dfc16e7 100644 --- a/src/result.ts +++ b/src/result.ts @@ -59,6 +59,13 @@ export interface FaceResult { tensor?: Tensor, } +export type BodyKeypoint = { + part: string, + position: Point, + positionRaw: Point, + score: number, +} + /** Body results * * Each results has: @@ -77,13 +84,8 @@ export interface BodyResult { score: number, box: Box, boxRaw: Box, - keypoints: Array<{ - part: string, - position: Point, - positionRaw: Point, - score: number, - presence?: number, - }> + annotations: Record, + keypoints: Array } /** Hand results diff --git a/src/tfjs/backend.ts b/src/tfjs/backend.ts index 3b5de6d5..533461bc 100644 --- a/src/tfjs/backend.ts +++ b/src/tfjs/backend.ts @@ -90,10 +90,8 @@ export async function check(instance, force = false) { } // handle webgpu - if (tf.getBackend() === 'humangl') { + if (tf.getBackend() === 'webgpu') { tf.ENV.set('WEBGPU_USE_GLSL', true); - tf.ENV.set('WEBGL_PACK_DEPTHWISECONV', false); - tf.ENV.set('WEBGL_USE_SHAPES_UNIFORMS', true); } // wait for ready diff --git a/src/tfjs/humangl.ts b/src/tfjs/humangl.ts index f6fdec71..7d5033f9 100644 --- a/src/tfjs/humangl.ts +++ b/src/tfjs/humangl.ts @@ -67,7 +67,7 @@ export async function register(instance): Promise { // log('gpu memory usage:', instance.tf.engine().backendInstance.numBytesInGPU); log('possible browser memory leak using webgl'); instance.emit('error'); - throw new Error('browser webgl error'); + // throw new Error('browser webgl error'); /* log('resetting humangl backend'); env.initial = true; diff --git a/src/util/draw.ts b/src/util/draw.ts index cc0b9183..60585961 100644 --- a/src/util/draw.ts +++ b/src/util/draw.ts @@ -4,7 +4,7 @@ import { TRI468 as triangulation } from '../face/facemeshcoords'; import { mergeDeep, now } from './util'; -import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result'; +import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Point } from '../result'; /** * Draw Options @@ -102,7 +102,7 @@ function rect(ctx, x, y, width, height, localOptions) { ctx.stroke(); } -function lines(ctx, points: [number, number, number?][] = [], localOptions) { +function lines(ctx, points: Point[] = [], localOptions) { if (points === undefined || points.length === 0) return; ctx.beginPath(); ctx.moveTo(points[0][0], points[0][1]); @@ -119,7 +119,7 @@ function lines(ctx, points: [number, number, number?][] = [], localOptions) { } } -function curves(ctx, points: [number, number, number?][] = [], localOptions) { +function curves(ctx, points: Point[] = [], localOptions) { if (points === undefined || points.length === 0) return; if (!localOptions.useCurves || points.length <= 2) { lines(ctx, points, localOptions); @@ -288,91 +288,23 @@ export async function body(inCanvas: HTMLCanvasElement | OffscreenCanvas, result ctx.fillText(`body ${100 * result[i].score}%`, result[i].box[0] + 2, 0 + result[i].box[1] + localOptions.lineHeight, result[i].box[2]); } } - if (localOptions.drawPoints) { + if (localOptions.drawPoints && result[i].keypoints) { for (let pt = 0; pt < result[i].keypoints.length; pt++) { ctx.fillStyle = localOptions.useDepth && result[i].keypoints[pt].position[2] ? `rgba(${127.5 + (2 * (result[i].keypoints[pt].position[2] || 0))}, ${127.5 - (2 * (result[i].keypoints[pt].position[2] || 0))}, 255, 0.5)` : localOptions.color; point(ctx, result[i].keypoints[pt].position[0], result[i].keypoints[pt].position[1], 0, localOptions); } } - if (localOptions.drawLabels) { + if (localOptions.drawLabels && result[i].keypoints) { ctx.font = localOptions.font; - if (result[i].keypoints) { - for (const pt of result[i].keypoints) { - ctx.fillStyle = localOptions.useDepth && pt.position[2] ? `rgba(${127.5 + (2 * pt.position[2])}, ${127.5 - (2 * pt.position[2])}, 255, 0.5)` : localOptions.color; - ctx.fillText(`${pt.part} ${Math.trunc(100 * pt.score)}%`, pt.position[0] + 4, pt.position[1] + 4); - } + for (const pt of result[i].keypoints) { + ctx.fillStyle = localOptions.useDepth && pt.position[2] ? `rgba(${127.5 + (2 * pt.position[2])}, ${127.5 - (2 * pt.position[2])}, 255, 0.5)` : localOptions.color; + ctx.fillText(`${pt.part} ${Math.trunc(100 * pt.score)}%`, pt.position[0] + 4, pt.position[1] + 4); } } - if (localOptions.drawPolygons && result[i].keypoints) { - let part; - const points: [number, number, number?][] = []; - // shoulder line - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - curves(ctx, points, localOptions); - // torso main - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'rightShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightHip'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftHip'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - if (points.length === 4) lines(ctx, points, localOptions); // only draw if we have complete torso - // leg left - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'leftHip'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftKnee'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftAnkle'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftHeel'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftFoot'); - if (part) points.push([part.position[0], part.position[1]]); - curves(ctx, points, localOptions); - // leg right - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'rightHip'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightKnee'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightAnkle'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightHeel'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightFoot'); - if (part) points.push([part.position[0], part.position[1]]); - curves(ctx, points, localOptions); - // arm left - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'leftShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftElbow'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftWrist'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'leftPalm'); - if (part) points.push([part.position[0], part.position[1]]); - curves(ctx, points, localOptions); - // arm right - points.length = 0; - part = result[i].keypoints.find((a) => a.part === 'rightShoulder'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightElbow'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightWrist'); - if (part) points.push([part.position[0], part.position[1]]); - part = result[i].keypoints.find((a) => a.part === 'rightPalm'); - if (part) points.push([part.position[0], part.position[1]]); - curves(ctx, points, localOptions); - // draw all + if (localOptions.drawPolygons && result[i].keypoints && result[i].annotations) { + for (const part of Object.values(result[i].annotations)) { + for (const connected of part) curves(ctx, connected, localOptions); + } } } }