From e3fd7a5b61d3dd049ff6d9208bbaaf712a387714 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Mon, 27 Sep 2021 13:58:13 -0400 Subject: [PATCH] refactoring --- CHANGELOG.md | 1 + src/body/blazepose.ts | 161 ++++++++++++++++++ src/{efficientpose => body}/efficientpose.ts | 4 +- src/{movenet => body}/movenet.ts | 11 +- src/config.ts | 7 + src/{ => face}/face.ts | 14 +- src/{faceres => face}/faceres.ts | 4 +- src/{emotion => gear}/emotion.ts | 4 +- src/{ssrnet/age.ts => gear/ssrnet-age.ts} | 4 +- .../gender.ts => gear/ssrnet-gender.ts} | 4 +- src/{handtrack => hand}/handtrack.ts | 7 +- src/handpose/handpipeline.ts | 2 +- src/handpose/handpose.ts | 4 +- src/human.ts | 27 +-- src/image/image.ts | 4 +- src/models.ts | 24 +-- src/object/centernet.ts | 4 +- src/object/nanodet.ts | 4 +- src/segmentation/segmentation.ts | 4 +- src/tfjs/backend.ts | 4 +- src/tfjs/humangl.ts | 2 +- src/util/box.ts | 28 +++ src/{ => util}/draw.ts | 4 +- src/{ => util}/env.ts | 4 +- src/{ => util}/interpolate.ts | 2 +- src/{ => util}/profile.ts | 0 src/{ => util}/util.ts | 29 ---- src/warmup.ts | 4 +- 28 files changed, 272 insertions(+), 99 deletions(-) create mode 100644 src/body/blazepose.ts rename src/{efficientpose => body}/efficientpose.ts (98%) rename src/{movenet => body}/movenet.ts (95%) rename src/{ => face}/face.ts (97%) rename src/{faceres => face}/faceres.ts (98%) rename src/{emotion => gear}/emotion.ts (97%) rename src/{ssrnet/age.ts => gear/ssrnet-age.ts} (96%) rename src/{ssrnet/gender.ts => gear/ssrnet-gender.ts} (98%) rename src/{handtrack => hand}/handtrack.ts (97%) create mode 100644 src/util/box.ts rename src/{ => util}/draw.ts (99%) rename src/{ => util}/env.ts (98%) rename src/{ => util}/interpolate.ts (99%) rename src/{ => util}/profile.ts (100%) rename src/{ => util}/util.ts (72%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3779468d..4ba1d415 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### **HEAD -> main** 2021/09/27 mandic00@live.com +- implement box caching for movenet - autodetect number of bodies and hands - upload new samples - new samples gallery and major code folder restructure diff --git a/src/body/blazepose.ts b/src/body/blazepose.ts new file mode 100644 index 00000000..c151c013 --- /dev/null +++ b/src/body/blazepose.ts @@ -0,0 +1,161 @@ +/** + * BlazePose model implementation + * + * Based on : [**BlazePose**](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection) + */ + +import { log, join } from '../util/util'; +import * as tf from '../../dist/tfjs.esm.js'; +import type { BodyResult, Box, Point } from '../result'; +import type { GraphModel, Tensor } from '../tfjs/types'; +import type { Config } from '../config'; +import { env } from '../util/env'; +import * as annotations from './annotations'; + +// const boxScaleFact = 1.5; // hand finger model prefers slighly larger box +const models: [GraphModel | null, GraphModel | null] = [null, null]; +const outputNodes = ['ld_3d', 'activation_segmentation', 'activation_heatmap', 'world_3d', 'output_poseflag']; + +const inputSize = [[0, 0], [0, 0]]; + +// let skipped = 0; +let outputSize: [number, number] = [0, 0]; + +type Keypoints = { score: number, part: string, position: Point, positionRaw: Point }; + +/* +type BodyDetectResult = { + id: number, + score: number, + box: Box, + boxRaw: Box, + label: string, + yxBox: Box, +} + +const cache: { + bodyBoxes: Array, + partBoxes: Array + tmpBoxes: Array +} = { + bodyBoxes: [], + partBoxes: [], + tmpBoxes: [], +}; +*/ + +export async function loadDetect(config: Config): Promise { + if (env.initial) models[0] = null; + if (!models[0]) { + models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.body.detector?.modelPath || '')) as unknown as GraphModel; + const inputs = Object.values(models[0].modelSignature['inputs']); + inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; + inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.object.modelPath); + else if (config.debug) log('load model:', models[0]['modelUrl']); + } else if (config.debug) log('cached model:', models[0]['modelUrl']); + return models[0]; +} + +export async function loadPose(config: Config): Promise { + if (env.initial) models[1] = null; + if (!models[1]) { + models[1] = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel; + const inputs = Object.values(models[1].modelSignature['inputs']); + inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0; + inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0; + if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.object.modelPath); + else if (config.debug) log('load model:', models[1]['modelUrl']); + } else if (config.debug) log('cached model:', models[1]['modelUrl']); + return models[1]; +} + +export async function load(config: Config): Promise<[GraphModel | null, GraphModel | null]> { + if (!models[0]) await loadDetect(config); + if (!models[1]) await loadPose(config); + return models; +} + +/* +async function detectBody(input: Tensor, config: Config): Promise { + if ((config.body.detector?.modelPath.length || 0) > 0 && models[0]) { + const t: Record = {}; + t.resize = tf.image.resizeBilinear(input, [inputSize[0][0], inputSize[0][1]]); + t.res = await models[0]?.predict(t.resize) as Tensor; // [1,2254,13] + t.logits = tf.slice(t.res, [0, 0, 0], [1, -1, 1]); + t.sigmoid = tf.sigmoid(t.logits); + t.rawBoxes = tf.slice(t.res, [0, 0, 1], [1, -1, -1]); + t.packedBoxes = tf.squeeze(t.rawBoxes); // [2254,12] + t.scores = tf.squeeze(t.sigmoid); // [2254,1] + // boxes need to be decoded based on anchors + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + } + return []; +} +*/ + +async function detectParts(input: Tensor, config: Config): Promise { + const t: Record = {}; + t.resize = tf.image.resizeBilinear(input, [inputSize[1][0], inputSize[1][1]]); + [t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.resize, outputNodes) as Tensor[]; // [1,2254,13] + const points = await t.ld.data(); + const keypoints: Array = []; + const labels = points?.length === 195 ? annotations.full : annotations.upper; // full model has 39 keypoints, upper has 31 keypoints + const depth = 5; // each points has x,y,z,visibility,presence + for (let i = 0; i < points.length / depth; i++) { + const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // reverse sigmoid value + // const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value + if (score > (config.body.minConfidence || 0)) { + keypoints.push({ + part: labels[i], + position: [ + Math.trunc(outputSize[0] * points[depth * i + 0] / 255), // return normalized x value istead of 0..255 + Math.trunc(outputSize[1] * points[depth * i + 1] / 255), // return normalized y value istead of 0..255 + Math.trunc(points[depth * i + 2]) + 0, // fix negative zero + ], + positionRaw: [ + points[depth * i + 0] / 255, // return x value normalized to 0..1 + points[depth * i + 1] / 255, // return y value normalized to 0..1 + points[depth * i + 2] + 0, // fix negative zero + ], + score, + }); + } + } + const x = keypoints.map((a) => a.position[0]); + const y = keypoints.map((a) => a.position[1]); + const box: Box = [ + Math.min(...x), + Math.min(...y), + Math.max(...x) - Math.min(...x), + Math.max(...y) - Math.min(...x), + ]; + const boxRaw: Box = [0, 0, 0, 0]; // not yet implemented + const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0); + Object.keys(t).forEach((tensor) => tf.dispose(t[tensor])); + return { id: 0, score, box, boxRaw, keypoints }; +} + +export async function predict(input: Tensor, config: Config): Promise { + outputSize = [input.shape[2] || 0, input.shape[1] || 0]; + const bodies: Array = []; + const body = await detectParts(input, config); + bodies.push(body); + /* + cache.tmpBoxes = []; // clear temp cache + if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { // just run part detection while reusing cached boxes + skipped++; + bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from parts box cache + } else { // calculate new boxes and run part detection + skipped = 0; + bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from part box cache + if (bodies.length !== config.body.maxDetected) { // run body detection only if we dont have enough bodies in cache + cache.bodyBoxes = await detectBody(input, config); + const newBodies = await Promise.all(cache.bodyBoxes.map((body) => detectParts(input, body, config))); + bodies = bodies.concat(newBodies); + } + } + cache.partBoxes = [...cache.tmpBoxes]; // repopulate cache with validated bodies + */ + return bodies as BodyResult[]; +} diff --git a/src/efficientpose/efficientpose.ts b/src/body/efficientpose.ts similarity index 98% rename from src/efficientpose/efficientpose.ts rename to src/body/efficientpose.ts index 2f319005..7b4a5ec4 100644 --- a/src/efficientpose/efficientpose.ts +++ b/src/body/efficientpose.ts @@ -4,12 +4,12 @@ * Based on: [**EfficientPose**](https://github.com/daniegr/EfficientPose) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import type { BodyResult, Box } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; let model: GraphModel | null; diff --git a/src/movenet/movenet.ts b/src/body/movenet.ts similarity index 95% rename from src/movenet/movenet.ts rename to src/body/movenet.ts index e29322bd..a014d73d 100644 --- a/src/movenet/movenet.ts +++ b/src/body/movenet.ts @@ -4,19 +4,20 @@ * Based on: [**MoveNet**](https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html) */ -import { log, join, scaleBox } from '../util'; +import { log, join } from '../util/util'; +import { scale } from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; -import type { BodyResult, Box } from '../result'; +import type { BodyResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { fakeOps } from '../tfjs/backend'; -import { env } from '../env'; +import { env } from '../util/env'; let model: GraphModel | null; let inputSize = 0; const cachedBoxes: Array = []; -type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] }; +type Keypoints = { score: number, part: string, position: Point, positionRaw: Point }; type Body = { id: number, score: number, box: Box, boxRaw: Box, keypoints: Array } let skipped = Number.MAX_SAFE_INTEGER; @@ -157,7 +158,7 @@ export async function predict(input: Tensor, config: Config): Promise 10) { // only update cache if we detected sufficient number of keypoints const kpts = bodies[i].keypoints.map((kpt) => kpt.position); - const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]); + const newBox = scale(kpts, 1.5, [input.shape[2], input.shape[1]]); cachedBoxes.push([...newBox.yxBox]); } } diff --git a/src/config.ts b/src/config.ts index 6f4c828e..50e88ab4 100644 --- a/src/config.ts +++ b/src/config.ts @@ -70,6 +70,7 @@ export interface FaceConfig { * - modelPath: body pose model, can be absolute path or relative to modelBasePath * - minConfidence: threshold for discarding a prediction * - maxDetected: maximum number of people detected in the input, should be set to the minimum number for performance + * - detector: optional body detector * * `maxDetected` is valid for `posenet` and `movenet-multipose` as other models are single-pose only * `maxDetected` can be set to -1 to auto-detect based on number of detected faces @@ -83,6 +84,9 @@ export interface BodyConfig { maxDetected: number, minConfidence: number, skipFrames: number, + detector?: { + modelPath: string + }, } /** Controlls and configures all hand detection specific options @@ -399,6 +403,9 @@ const config: Config = { enabled: true, modelPath: 'movenet-lightning.json', // body model, can be absolute path or relative to modelBasePath // can be 'posenet', 'blazepose', 'efficientpose', 'movenet-lightning', 'movenet-thunder' + detector: { + modelPath: '', // optional body detector + }, maxDetected: -1, // maximum number of people detected in the input // should be set to the minimum number for performance // only valid for posenet and movenet-multipose as other models detects single pose diff --git a/src/face.ts b/src/face/face.ts similarity index 97% rename from src/face.ts rename to src/face/face.ts index dc3b3adb..4745abc2 100644 --- a/src/face.ts +++ b/src/face/face.ts @@ -3,13 +3,13 @@ * Uses FaceMesh, Emotion and FaceRes models to create a unified pipeline */ -import { log, now } from './util'; -import * as tf from '../dist/tfjs.esm.js'; -import * as facemesh from './blazeface/facemesh'; -import * as emotion from './emotion/emotion'; -import * as faceres from './faceres/faceres'; -import type { FaceResult } from './result'; -import type { Tensor } from './tfjs/types'; +import { log, now } from '../util/util'; +import * as tf from '../../dist/tfjs.esm.js'; +import * as facemesh from '../blazeface/facemesh'; +import * as emotion from '../gear/emotion'; +import * as faceres from './faceres'; +import type { FaceResult } from '../result'; +import type { Tensor } from '../tfjs/types'; // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars const rad2deg = (theta) => Math.round((theta * 180) / Math.PI); diff --git a/src/faceres/faceres.ts b/src/face/faceres.ts similarity index 98% rename from src/faceres/faceres.ts rename to src/face/faceres.ts index 249e4962..7543a35f 100644 --- a/src/faceres/faceres.ts +++ b/src/face/faceres.ts @@ -7,11 +7,11 @@ * Based on: [**HSE-FaceRes**](https://github.com/HSE-asavchenko/HSE_FaceRec_tf) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import type { Tensor, GraphModel } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; let model: GraphModel | null; const last: Array<{ diff --git a/src/emotion/emotion.ts b/src/gear/emotion.ts similarity index 97% rename from src/emotion/emotion.ts rename to src/gear/emotion.ts index f8012106..f91b58e4 100644 --- a/src/emotion/emotion.ts +++ b/src/gear/emotion.ts @@ -4,11 +4,11 @@ * [**Oarriaga**](https://github.com/oarriaga/face_classification) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import type { Config } from '../config'; import type { GraphModel, Tensor } from '../tfjs/types'; import * as tf from '../../dist/tfjs.esm.js'; -import { env } from '../env'; +import { env } from '../util/env'; const annotations = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']; let model: GraphModel | null; diff --git a/src/ssrnet/age.ts b/src/gear/ssrnet-age.ts similarity index 96% rename from src/ssrnet/age.ts rename to src/gear/ssrnet-age.ts index 48ffa4fb..1e5f6d0e 100644 --- a/src/ssrnet/age.ts +++ b/src/gear/ssrnet-age.ts @@ -6,11 +6,11 @@ * Obsolete and replaced by `faceres` that performs age/gender/descriptor analysis */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import type { Config } from '../config'; import type { GraphModel, Tensor } from '../tfjs/types'; -import { env } from '../env'; +import { env } from '../util/env'; let model: GraphModel | null; diff --git a/src/ssrnet/gender.ts b/src/gear/ssrnet-gender.ts similarity index 98% rename from src/ssrnet/gender.ts rename to src/gear/ssrnet-gender.ts index a9b1d91b..9c18716b 100644 --- a/src/ssrnet/gender.ts +++ b/src/gear/ssrnet-gender.ts @@ -6,11 +6,11 @@ * Obsolete and replaced by `faceres` that performs age/gender/descriptor analysis */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import type { Config } from '../config'; import type { GraphModel, Tensor } from '../tfjs/types'; -import { env } from '../env'; +import { env } from '../util/env'; let model: GraphModel | null; let last = { gender: '' }; diff --git a/src/handtrack/handtrack.ts b/src/hand/handtrack.ts similarity index 97% rename from src/handtrack/handtrack.ts rename to src/hand/handtrack.ts index f45d4d94..6ecfb707 100644 --- a/src/handtrack/handtrack.ts +++ b/src/hand/handtrack.ts @@ -6,12 +6,13 @@ * - Hand Tracking: [**HandTracking**](https://github.com/victordibia/handtracking) */ -import { log, join, scaleBox } from '../util'; +import { log, join } from '../util/util'; +import { scale } from '../util/box'; import * as tf from '../../dist/tfjs.esm.js'; import type { HandResult, Box } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; import * as fingerPose from '../fingerpose/fingerpose'; import { fakeOps } from '../tfjs/backend'; @@ -168,7 +169,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config) (h.box[3] * coord[1] / inputSize[1][1]) + h.box[1], (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2], ]); - const updatedBox = scaleBox(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints + const updatedBox = scale(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints h.box = updatedBox.box; h.boxRaw = updatedBox.boxRaw; h.yxBox = updatedBox.yxBox; diff --git a/src/handpose/handpipeline.ts b/src/handpose/handpipeline.ts index 57be4a2c..6872da73 100644 --- a/src/handpose/handpipeline.ts +++ b/src/handpose/handpipeline.ts @@ -8,7 +8,7 @@ import * as box from './box'; import * as util from './util'; import type * as detector from './handdetector'; import type { Tensor, GraphModel } from '../tfjs/types'; -import { env } from '../env'; +import { env } from '../util/env'; const palmBoxEnlargeFactor = 5; // default 3 const handBoxEnlargeFactor = 1.65; // default 1.65 diff --git a/src/handpose/handpose.ts b/src/handpose/handpose.ts index 262eabd9..7fa599a4 100644 --- a/src/handpose/handpose.ts +++ b/src/handpose/handpose.ts @@ -4,7 +4,7 @@ * Based on: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import * as handdetector from './handdetector'; import * as handpipeline from './handpipeline'; @@ -12,7 +12,7 @@ import * as fingerPose from '../fingerpose/fingerpose'; import type { HandResult, Box, Point } from '../result'; import type { Tensor, GraphModel } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; const meshAnnotations = { thumb: [1, 2, 3, 4], diff --git a/src/human.ts b/src/human.ts index 098efe68..762a2f83 100644 --- a/src/human.ts +++ b/src/human.ts @@ -2,41 +2,42 @@ * Human main module */ -import { log, now, mergeDeep, validate } from './util'; +import { log, now, mergeDeep, validate } from './util/util'; import { Config, defaults } from './config'; import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureResult, PersonResult } from './result'; import * as tf from '../dist/tfjs.esm.js'; import * as models from './models'; -import * as face from './face'; +import * as face from './face/face'; import * as facemesh from './blazeface/facemesh'; -import * as faceres from './faceres/faceres'; +import * as faceres from './face/faceres'; import * as posenet from './posenet/posenet'; -import * as handtrack from './handtrack/handtrack'; +import * as handtrack from './hand/handtrack'; import * as handpose from './handpose/handpose'; -import * as blazepose from './blazepose/blazepose'; -import * as efficientpose from './efficientpose/efficientpose'; -import * as movenet from './movenet/movenet'; +// import * as blazepose from './body/blazepose-v1'; +import * as blazepose from './body/blazepose'; +import * as efficientpose from './body/efficientpose'; +import * as movenet from './body/movenet'; import * as nanodet from './object/nanodet'; import * as centernet from './object/centernet'; import * as segmentation from './segmentation/segmentation'; import * as gesture from './gesture/gesture'; import * as image from './image/image'; -import * as draw from './draw'; +import * as draw from './util/draw'; import * as persons from './persons'; -import * as interpolate from './interpolate'; -import * as env from './env'; +import * as interpolate from './util/interpolate'; +import * as env from './util/env'; import * as backend from './tfjs/backend'; import * as humangl from './tfjs/humangl'; import * as app from '../package.json'; import * as warmups from './warmup'; import type { Tensor } from './tfjs/types'; -import type { DrawOptions } from './draw'; +import type { DrawOptions } from './util/draw'; // export types export * from './config'; export * from './result'; -export type { DrawOptions } from './draw'; -export { env, Env } from './env'; +export type { DrawOptions } from './util/draw'; +export { env, Env } from './util/env'; export { Box, Point } from './result'; export { Models } from './models'; diff --git a/src/image/image.ts b/src/image/image.ts index 73e22f92..0a104885 100644 --- a/src/image/image.ts +++ b/src/image/image.ts @@ -6,8 +6,8 @@ import * as tf from '../../dist/tfjs.esm.js'; import * as fxImage from './imagefx'; import type { Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; -import { log } from '../util'; +import { env } from '../util/env'; +import { log } from '../util/util'; type Input = Tensor | ImageData | ImageBitmap | HTMLImageElement | HTMLMediaElement | HTMLVideoElement | HTMLCanvasElement | OffscreenCanvas | typeof Image | typeof env.Canvas; diff --git a/src/models.ts b/src/models.ts index 2e9595b2..ed846bd7 100644 --- a/src/models.ts +++ b/src/models.ts @@ -2,23 +2,23 @@ * Loader and Validator for all models used by Human */ -import { log } from './util'; +import { log } from './util/util'; import type { GraphModel } from './tfjs/types'; import * as facemesh from './blazeface/facemesh'; -import * as faceres from './faceres/faceres'; -import * as emotion from './emotion/emotion'; +import * as faceres from './face/faceres'; +import * as emotion from './gear/emotion'; import * as posenet from './posenet/posenet'; import * as handpose from './handpose/handpose'; -import * as handtrack from './handtrack/handtrack'; -import * as blazepose from './blazepose/blazepose'; -import * as efficientpose from './efficientpose/efficientpose'; -import * as movenet from './movenet/movenet'; +import * as handtrack from './hand/handtrack'; +import * as blazepose from './body/blazepose'; +import * as efficientpose from './body/efficientpose'; +import * as movenet from './body/movenet'; import * as nanodet from './object/nanodet'; import * as centernet from './object/centernet'; import * as segmentation from './segmentation/segmentation'; import type { Human } from './human'; -import { env } from './env'; -import * as agegenderrace from './gear/agegenderrace'; +import { env } from './util/env'; +import * as agegenderrace from './gear/gear-agegenderrace'; /** Instances of all possible TFJS Graph Models used by Human * - loaded as needed based on configuration @@ -29,6 +29,7 @@ import * as agegenderrace from './gear/agegenderrace'; export class Models { age: null | GraphModel | Promise = null; agegenderrace: null | GraphModel | Promise = null; + blazeposedetect: null | GraphModel | Promise = null; blazepose: null | GraphModel | Promise = null; centernet: null | GraphModel | Promise = null; efficientpose: null | GraphModel | Promise = null; @@ -69,8 +70,9 @@ export async function load(instance: Human) { if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config); if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config); if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = efficientpose.load(instance.config); - if (instance.config.body.enabled && !instance.models.blazepose && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazepose = blazepose.load(instance.config); - if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = blazepose.load(instance.config); + if (instance.config.body.enabled && !instance.models.blazepose && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazepose = blazepose.loadPose(instance.config); + if (instance.config.body.enabled && !instance.models.blazeposedetect && instance.config.body.detector?.modelPath && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazeposedetect = blazepose.loadDetect(instance.config); + if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = efficientpose.load(instance.config); if (instance.config.body.enabled && !instance.models.movenet && instance.config.body?.modelPath?.includes('movenet')) instance.models.movenet = movenet.load(instance.config); if (instance.config.object.enabled && !instance.models.nanodet && instance.config.object?.modelPath?.includes('nanodet')) instance.models.nanodet = nanodet.load(instance.config); if (instance.config.object.enabled && !instance.models.centernet && instance.config.object?.modelPath?.includes('centernet')) instance.models.centernet = centernet.load(instance.config); diff --git a/src/object/centernet.ts b/src/object/centernet.ts index d6ab47e4..24294b76 100644 --- a/src/object/centernet.ts +++ b/src/object/centernet.ts @@ -4,13 +4,13 @@ * Based on: [**NanoDet**](https://github.com/RangiLyu/nanodet) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import { labels } from './labels'; import type { ObjectResult, Box } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; import { fakeOps } from '../tfjs/backend'; let model: GraphModel | null; diff --git a/src/object/nanodet.ts b/src/object/nanodet.ts index bbb0c91a..463f9bd7 100644 --- a/src/object/nanodet.ts +++ b/src/object/nanodet.ts @@ -4,13 +4,13 @@ * Based on: [**MB3-CenterNet**](https://github.com/610265158/mobilenetv3_centernet) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import { labels } from './labels'; import type { ObjectResult, Box } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; let model; let last: Array = []; diff --git a/src/segmentation/segmentation.ts b/src/segmentation/segmentation.ts index 7f78163f..d3dafeee 100644 --- a/src/segmentation/segmentation.ts +++ b/src/segmentation/segmentation.ts @@ -6,12 +6,12 @@ * - [**MediaPipe Selfie**](https://drive.google.com/file/d/1dCfozqknMa068vVsO2j_1FgZkW_e3VWv/preview) */ -import { log, join } from '../util'; +import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import * as image from '../image/image'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; -import { env } from '../env'; +import { env } from '../util/env'; type Input = Tensor | typeof Image | ImageData | ImageBitmap | HTMLImageElement | HTMLMediaElement | HTMLVideoElement | HTMLCanvasElement | OffscreenCanvas; diff --git a/src/tfjs/backend.ts b/src/tfjs/backend.ts index 7251b97e..7fba0a31 100644 --- a/src/tfjs/backend.ts +++ b/src/tfjs/backend.ts @@ -1,8 +1,8 @@ /** TFJS backend initialization and customization */ -import { log, now } from '../util'; +import { log, now } from '../util/util'; import * as humangl from './humangl'; -import * as env from '../env'; +import * as env from '../util/env'; import * as tf from '../../dist/tfjs.esm.js'; export async function check(instance, force = false) { diff --git a/src/tfjs/humangl.ts b/src/tfjs/humangl.ts index e3b8295e..f3500f49 100644 --- a/src/tfjs/humangl.ts +++ b/src/tfjs/humangl.ts @@ -1,6 +1,6 @@ /** TFJS custom backend registration */ -import { log } from '../util'; +import { log } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import * as image from '../image/image'; import * as models from '../models'; diff --git a/src/util/box.ts b/src/util/box.ts new file mode 100644 index 00000000..c6054f55 --- /dev/null +++ b/src/util/box.ts @@ -0,0 +1,28 @@ +import type { Box } from '../result'; + +// helper function: find box around keypoints, square it and scale it +export function scale(keypoints, boxScaleFact, outputSize) { + const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords + const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates + const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers + const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction + const box = [ + Math.trunc(center[0] - diff), + Math.trunc(center[1] - diff), + Math.trunc(2 * diff), + Math.trunc(2 * diff), + ] as Box; + const boxRaw = [ // work backwards + box[0] / outputSize[0], + box[1] / outputSize[1], + box[2] / outputSize[0], + box[3] / outputSize[1], + ] as Box; + const yxBox = [ // work backwards + boxRaw[1], + boxRaw[0], + boxRaw[3] + boxRaw[1], + boxRaw[2] + boxRaw[0], + ] as Box; + return { box, boxRaw, yxBox }; +} diff --git a/src/draw.ts b/src/util/draw.ts similarity index 99% rename from src/draw.ts rename to src/util/draw.ts index b5fe1edb..427be227 100644 --- a/src/draw.ts +++ b/src/util/draw.ts @@ -2,9 +2,9 @@ * Module that implements helper draw functions, exposed as human.draw */ -import { TRI468 as triangulation } from './blazeface/coords'; +import { TRI468 as triangulation } from '../blazeface/coords'; import { mergeDeep, now } from './util'; -import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from './result'; +import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result'; /** * Draw Options diff --git a/src/env.ts b/src/util/env.ts similarity index 98% rename from src/env.ts rename to src/util/env.ts index 9b71891c..7aec99d1 100644 --- a/src/env.ts +++ b/src/util/env.ts @@ -1,5 +1,5 @@ -import * as tf from '../dist/tfjs.esm.js'; -import * as image from './image/image'; +import * as tf from '../../dist/tfjs.esm.js'; +import * as image from '../image/image'; import { mergeDeep } from './util'; export type Env = { diff --git a/src/interpolate.ts b/src/util/interpolate.ts similarity index 99% rename from src/interpolate.ts rename to src/util/interpolate.ts index 4df8bd72..903b3b48 100644 --- a/src/interpolate.ts +++ b/src/util/interpolate.ts @@ -2,7 +2,7 @@ * Results interpolation for smoothening of video detection results inbetween detected frames */ -import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from './result'; +import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from '../result'; const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 }; diff --git a/src/profile.ts b/src/util/profile.ts similarity index 100% rename from src/profile.ts rename to src/util/profile.ts diff --git a/src/util.ts b/src/util/util.ts similarity index 72% rename from src/util.ts rename to src/util/util.ts index 5f845e45..dc73184c 100644 --- a/src/util.ts +++ b/src/util/util.ts @@ -2,8 +2,6 @@ * Simple helper functions used accross codebase */ -import type { Box } from './result'; - // helper function: join two paths export function join(folder: string, file: string): string { const separator = folder.endsWith('/') ? '' : '/'; @@ -71,30 +69,3 @@ export async function wait(time) { const waiting = new Promise((resolve) => setTimeout(() => resolve(true), time)); await waiting; } - -// helper function: find box around keypoints, square it and scale it -export function scaleBox(keypoints, boxScaleFact, outputSize) { - const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords - const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates - const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers - const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction - const box = [ - Math.trunc(center[0] - diff), - Math.trunc(center[1] - diff), - Math.trunc(2 * diff), - Math.trunc(2 * diff), - ] as Box; - const boxRaw = [ // work backwards - box[0] / outputSize[0], - box[1] / outputSize[1], - box[2] / outputSize[0], - box[3] / outputSize[1], - ] as Box; - const yxBox = [ // work backwards - boxRaw[1], - boxRaw[0], - boxRaw[3] + boxRaw[1], - boxRaw[2] + boxRaw[0], - ] as Box; - return { box, boxRaw, yxBox }; -} diff --git a/src/warmup.ts b/src/warmup.ts index c19e7c0c..ff0e701a 100644 --- a/src/warmup.ts +++ b/src/warmup.ts @@ -2,13 +2,13 @@ * Warmup algorithm that uses embedded images to excercise loaded models for faster future inference */ -import { log, now, mergeDeep } from './util'; +import { log, now, mergeDeep } from './util/util'; import * as sample from './sample'; import * as tf from '../dist/tfjs.esm.js'; import * as image from './image/image'; import type { Config } from './config'; import type { Result } from './result'; -import { env } from './env'; +import { env } from './util/env'; async function warmupBitmap(instance) { const b64toBlob = (base64: string, type = 'application/octet-stream') => fetch(`data:${type};base64,${base64}`).then((res) => res.blob());