refactoring

2021-09-27 13:58:13 -04:00 · 2021-09-27 13:58:13 -04:00 · e3fd7a5b61
parent 72aad6e812
commit e3fd7a5b61
28 changed files with 272 additions and 99 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,6 +11,7 @@
  
 ### **HEAD -> main** 2021/09/27 mandic00@live.com

+- implement box caching for movenet
 - autodetect number of bodies and hands
 - upload new samples
 - new samples gallery and major code folder restructure
--- a/src/body/blazepose.ts
+++ b/src/body/blazepose.ts
@ -0,0 +1,161 @@
+/**
+ * BlazePose model implementation
+ *
+ * Based on : [**BlazePose**](https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection)
+ */
+
+import { log, join } from '../util/util';
+import * as tf from '../../dist/tfjs.esm.js';
+import type { BodyResult, Box, Point } from '../result';
+import type { GraphModel, Tensor } from '../tfjs/types';
+import type { Config } from '../config';
+import { env } from '../util/env';
+import * as annotations from './annotations';
+
+// const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
+const models: [GraphModel | null, GraphModel | null] = [null, null];
+const outputNodes = ['ld_3d', 'activation_segmentation', 'activation_heatmap', 'world_3d', 'output_poseflag'];
+
+const inputSize = [[0, 0], [0, 0]];
+
+// let skipped = 0;
+let outputSize: [number, number] = [0, 0];
+
+type Keypoints = { score: number, part: string, position: Point, positionRaw: Point };
+
+/*
+type BodyDetectResult = {
+  id: number,
+  score: number,
+  box: Box,
+  boxRaw: Box,
+  label: string,
+  yxBox: Box,
+}
+
+const cache: {
+  bodyBoxes: Array<BodyDetectResult>,
+  partBoxes: Array<BodyDetectResult>
+  tmpBoxes: Array<BodyDetectResult>
+} = {
+  bodyBoxes: [],
+  partBoxes: [],
+  tmpBoxes: [],
+};
+*/
+
+export async function loadDetect(config: Config): Promise<GraphModel> {
+  if (env.initial) models[0] = null;
+  if (!models[0]) {
+    models[0] = await tf.loadGraphModel(join(config.modelBasePath, config.body.detector?.modelPath || '')) as unknown as GraphModel;
+    const inputs = Object.values(models[0].modelSignature['inputs']);
+    inputSize[0][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
+    inputSize[0][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
+    if (!models[0] || !models[0]['modelUrl']) log('load model failed:', config.object.modelPath);
+    else if (config.debug) log('load model:', models[0]['modelUrl']);
+  } else if (config.debug) log('cached model:', models[0]['modelUrl']);
+  return models[0];
+}
+
+export async function loadPose(config: Config): Promise<GraphModel> {
+  if (env.initial) models[1] = null;
+  if (!models[1]) {
+    models[1] = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
+    const inputs = Object.values(models[1].modelSignature['inputs']);
+    inputSize[1][0] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[1].size) : 0;
+    inputSize[1][1] = Array.isArray(inputs) ? parseInt(inputs[0].tensorShape.dim[2].size) : 0;
+    if (!models[1] || !models[1]['modelUrl']) log('load model failed:', config.object.modelPath);
+    else if (config.debug) log('load model:', models[1]['modelUrl']);
+  } else if (config.debug) log('cached model:', models[1]['modelUrl']);
+  return models[1];
+}
+
+export async function load(config: Config): Promise<[GraphModel | null, GraphModel | null]> {
+  if (!models[0]) await loadDetect(config);
+  if (!models[1]) await loadPose(config);
+  return models;
+}
+
+/*
+async function detectBody(input: Tensor, config: Config): Promise<BodyDetectResult[]> {
+  if ((config.body.detector?.modelPath.length || 0) > 0 && models[0]) {
+    const t: Record<string, Tensor> = {};
+    t.resize = tf.image.resizeBilinear(input, [inputSize[0][0], inputSize[0][1]]);
+    t.res = await models[0]?.predict(t.resize) as Tensor; // [1,2254,13]
+    t.logits = tf.slice(t.res, [0, 0, 0], [1, -1, 1]);
+    t.sigmoid = tf.sigmoid(t.logits);
+    t.rawBoxes = tf.slice(t.res, [0, 0, 1], [1, -1, -1]);
+    t.packedBoxes = tf.squeeze(t.rawBoxes); // [2254,12]
+    t.scores = tf.squeeze(t.sigmoid); // [2254,1]
+    // boxes need to be decoded based on anchors
+    Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+  }
+  return [];
+}
+*/
+
+async function detectParts(input: Tensor, config: Config): Promise<BodyResult> {
+  const t: Record<string, Tensor> = {};
+  t.resize = tf.image.resizeBilinear(input, [inputSize[1][0], inputSize[1][1]]);
+  [t.ld/* 1,195 */, t.segmentation/* 1,256,256,1 */, t.heatmap/* 1,64,64,39 */, t.world/* 1,117 */, t.poseflag/* 1,1 */] = await models[1]?.execute(t.resize, outputNodes) as Tensor[]; // [1,2254,13]
+  const points = await t.ld.data();
+  const keypoints: Array<Keypoints> = [];
+  const labels = points?.length === 195 ? annotations.full : annotations.upper; // full model has 39 keypoints, upper has 31 keypoints
+  const depth = 5; // each points has x,y,z,visibility,presence
+  for (let i = 0; i < points.length / depth; i++) {
+    const score = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 3])))) / 100; // reverse sigmoid value
+    // const presence = (100 - Math.trunc(100 / (1 + Math.exp(points[depth * i + 4])))) / 100; // reverse sigmoid value
+    if (score > (config.body.minConfidence || 0)) {
+      keypoints.push({
+        part: labels[i],
+        position: [
+          Math.trunc(outputSize[0] * points[depth * i + 0] / 255), // return normalized x value istead of 0..255
+          Math.trunc(outputSize[1] * points[depth * i + 1] / 255), // return normalized y value istead of 0..255
+          Math.trunc(points[depth * i + 2]) + 0, // fix negative zero
+        ],
+        positionRaw: [
+          points[depth * i + 0] / 255, // return x value normalized to 0..1
+          points[depth * i + 1] / 255, // return y value normalized to 0..1
+          points[depth * i + 2] + 0, // fix negative zero
+        ],
+        score,
+      });
+    }
+  }
+  const x = keypoints.map((a) => a.position[0]);
+  const y = keypoints.map((a) => a.position[1]);
+  const box: Box = [
+    Math.min(...x),
+    Math.min(...y),
+    Math.max(...x) - Math.min(...x),
+    Math.max(...y) - Math.min(...x),
+  ];
+  const boxRaw: Box = [0, 0, 0, 0]; // not yet implemented
+  const score = keypoints.reduce((prev, curr) => (curr.score > prev ? curr.score : prev), 0);
+  Object.keys(t).forEach((tensor) => tf.dispose(t[tensor]));
+  return { id: 0, score, box, boxRaw, keypoints };
+}
+
+export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
+  outputSize = [input.shape[2] || 0, input.shape[1] || 0];
+  const bodies: Array<BodyResult> = [];
+  const body = await detectParts(input, config);
+  bodies.push(body);
+  /*
+  cache.tmpBoxes = []; // clear temp cache
+  if ((skipped < (config.body.skipFrames || 0)) && config.skipFrame) { // just run part detection while reusing cached boxes
+    skipped++;
+    bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from parts box cache
+  } else { // calculate new boxes and run part detection
+    skipped = 0;
+    bodies = await Promise.all(cache.partBoxes.map((body) => detectParts(input, body, config))); // run from part box cache
+    if (bodies.length !== config.body.maxDetected) { // run body detection only if we dont have enough bodies in cache
+      cache.bodyBoxes = await detectBody(input, config);
+      const newBodies = await Promise.all(cache.bodyBoxes.map((body) => detectParts(input, body, config)));
+      bodies = bodies.concat(newBodies);
+    }
+  }
+  cache.partBoxes = [...cache.tmpBoxes]; // repopulate cache with validated bodies
+  */
+  return bodies as BodyResult[];
+}
--- a/src/efficientpose/efficientpose.ts
+++ b/src/efficientpose/efficientpose.ts
@ -4,12 +4,12 @@
 * Based on: [**EfficientPose**](https://github.com/daniegr/EfficientPose)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import type { BodyResult, Box } from '../result';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';

 let model: GraphModel | null;

--- a/src/movenet/movenet.ts
+++ b/src/movenet/movenet.ts
@ -4,19 +4,20 @@
 * Based on: [**MoveNet**](https://blog.tensorflow.org/2021/05/next-generation-pose-detection-with-movenet-and-tensorflowjs.html)
 */

-import { log, join, scaleBox } from '../util';
+import { log, join } from '../util/util';
+import { scale } from '../util/box';
 import * as tf from '../../dist/tfjs.esm.js';
-import type { BodyResult, Box } from '../result';
+import type { BodyResult, Box, Point } from '../result';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
 import { fakeOps } from '../tfjs/backend';
-import { env } from '../env';
+import { env } from '../util/env';

 let model: GraphModel | null;
 let inputSize = 0;
 const cachedBoxes: Array<Box> = [];

-type Keypoints = { score: number, part: string, position: [number, number], positionRaw: [number, number] };
+type Keypoints = { score: number, part: string, position: Point, positionRaw: Point };
 type Body = { id: number, score: number, box: Box, boxRaw: Box, keypoints: Array<Keypoints> }

 let skipped = Number.MAX_SAFE_INTEGER;
@ -157,7 +158,7 @@ export async function predict(input: Tensor, config: Config): Promise<BodyResult
      for (let i = 0; i < bodies.length; i++) {
        if (bodies[i].keypoints.length > 10) { // only update cache if we detected sufficient number of keypoints
          const kpts = bodies[i].keypoints.map((kpt) => kpt.position);
-          const newBox = scaleBox(kpts, 1.5, [input.shape[2], input.shape[1]]);
+          const newBox = scale(kpts, 1.5, [input.shape[2], input.shape[1]]);
          cachedBoxes.push([...newBox.yxBox]);
        }
      }
--- a/src/config.ts
+++ b/src/config.ts
@ -70,6 +70,7 @@ export interface FaceConfig {
 * - modelPath: body pose model, can be absolute path or relative to modelBasePath
 * - minConfidence: threshold for discarding a prediction
 * - maxDetected: maximum number of people detected in the input, should be set to the minimum number for performance
+ * - detector: optional body detector
 *
 * `maxDetected` is valid for `posenet` and `movenet-multipose` as other models are single-pose only
 * `maxDetected` can be set to -1 to auto-detect based on number of detected faces
@ -83,6 +84,9 @@ export interface BodyConfig {
  maxDetected: number,
  minConfidence: number,
  skipFrames: number,
+  detector?: {
+    modelPath: string
+  },
 }

 /** Controlls and configures all hand detection specific options
@ -399,6 +403,9 @@ const config: Config = {
    enabled: true,
    modelPath: 'movenet-lightning.json',  // body model, can be absolute path or relative to modelBasePath
                             // can be 'posenet', 'blazepose', 'efficientpose', 'movenet-lightning', 'movenet-thunder'
+    detector: {
+      modelPath: '',         // optional body detector
+    },
    maxDetected: -1,         // maximum number of people detected in the input
                             // should be set to the minimum number for performance
                             // only valid for posenet and movenet-multipose as other models detects single pose
--- a/src/face/face.ts
+++ b/src/face/face.ts
@ -3,13 +3,13 @@
 * Uses FaceMesh, Emotion and FaceRes models to create a unified pipeline
 */

-import { log, now } from './util';
-import * as tf from '../dist/tfjs.esm.js';
-import * as facemesh from './blazeface/facemesh';
-import * as emotion from './emotion/emotion';
-import * as faceres from './faceres/faceres';
-import type { FaceResult } from './result';
-import type { Tensor } from './tfjs/types';
+import { log, now } from '../util/util';
+import * as tf from '../../dist/tfjs.esm.js';
+import * as facemesh from '../blazeface/facemesh';
+import * as emotion from '../gear/emotion';
+import * as faceres from './faceres';
+import type { FaceResult } from '../result';
+import type { Tensor } from '../tfjs/types';

 // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
 const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
--- a/src/faceres/faceres.ts
+++ b/src/faceres/faceres.ts
@ -7,11 +7,11 @@
 * Based on: [**HSE-FaceRes**](https://github.com/HSE-asavchenko/HSE_FaceRec_tf)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import type { Tensor, GraphModel } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';

 let model: GraphModel | null;
 const last: Array<{
--- a/src/emotion/emotion.ts
+++ b/src/emotion/emotion.ts
@ -4,11 +4,11 @@
 * [**Oarriaga**](https://github.com/oarriaga/face_classification)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import type { Config } from '../config';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import * as tf from '../../dist/tfjs.esm.js';
-import { env } from '../env';
+import { env } from '../util/env';

 const annotations = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral'];
 let model: GraphModel | null;
--- a/src/gear/ssrnet-age.ts
+++ b/src/gear/ssrnet-age.ts
@ -6,11 +6,11 @@
 * Obsolete and replaced by `faceres` that performs age/gender/descriptor analysis
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import type { Config } from '../config';
 import type { GraphModel, Tensor } from '../tfjs/types';
-import { env } from '../env';
+import { env } from '../util/env';

 let model: GraphModel | null;

--- a/src/gear/ssrnet-gender.ts
+++ b/src/gear/ssrnet-gender.ts
@ -6,11 +6,11 @@
 * Obsolete and replaced by `faceres` that performs age/gender/descriptor analysis
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import type { Config } from '../config';
 import type { GraphModel, Tensor } from '../tfjs/types';
-import { env } from '../env';
+import { env } from '../util/env';

 let model: GraphModel | null;
 let last = { gender: '' };
--- a/src/handtrack/handtrack.ts
+++ b/src/handtrack/handtrack.ts
@ -6,12 +6,13 @@
 * - Hand Tracking: [**HandTracking**](https://github.com/victordibia/handtracking)
 */

-import { log, join, scaleBox } from '../util';
+import { log, join } from '../util/util';
+import { scale } from '../util/box';
 import * as tf from '../../dist/tfjs.esm.js';
 import type { HandResult, Box } from '../result';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';
 import * as fingerPose from '../fingerpose/fingerpose';
 import { fakeOps } from '../tfjs/backend';

@ -168,7 +169,7 @@ async function detectFingers(input: Tensor, h: HandDetectResult, config: Config)
        (h.box[3] * coord[1] / inputSize[1][1]) + h.box[1],
        (h.box[2] + h.box[3]) / 2 / inputSize[1][0] * coord[2],
      ]);
-      const updatedBox = scaleBox(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints
+      const updatedBox = scale(hand.keypoints, boxScaleFact, outputSize); // replace detected box with box calculated around keypoints
      h.box = updatedBox.box;
      h.boxRaw = updatedBox.boxRaw;
      h.yxBox = updatedBox.yxBox;
--- a/src/handpose/handpipeline.ts
+++ b/src/handpose/handpipeline.ts
@ -8,7 +8,7 @@ import * as box from './box';
 import * as util from './util';
 import type * as detector from './handdetector';
 import type { Tensor, GraphModel } from '../tfjs/types';
-import { env } from '../env';
+import { env } from '../util/env';

 const palmBoxEnlargeFactor = 5; // default 3
 const handBoxEnlargeFactor = 1.65; // default 1.65
--- a/src/handpose/handpose.ts
+++ b/src/handpose/handpose.ts
@ -4,7 +4,7 @@
 * Based on: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import * as handdetector from './handdetector';
 import * as handpipeline from './handpipeline';
@ -12,7 +12,7 @@ import * as fingerPose from '../fingerpose/fingerpose';
 import type { HandResult, Box, Point } from '../result';
 import type { Tensor, GraphModel } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';

 const meshAnnotations = {
  thumb: [1, 2, 3, 4],
--- a/src/human.ts
+++ b/src/human.ts
@ -2,41 +2,42 @@
 * Human main module
 */

-import { log, now, mergeDeep, validate } from './util';
+import { log, now, mergeDeep, validate } from './util/util';
 import { Config, defaults } from './config';
 import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureResult, PersonResult } from './result';
 import * as tf from '../dist/tfjs.esm.js';
 import * as models from './models';
-import * as face from './face';
+import * as face from './face/face';
 import * as facemesh from './blazeface/facemesh';
-import * as faceres from './faceres/faceres';
+import * as faceres from './face/faceres';
 import * as posenet from './posenet/posenet';
-import * as handtrack from './handtrack/handtrack';
+import * as handtrack from './hand/handtrack';
 import * as handpose from './handpose/handpose';
-import * as blazepose from './blazepose/blazepose';
-import * as efficientpose from './efficientpose/efficientpose';
-import * as movenet from './movenet/movenet';
+// import * as blazepose from './body/blazepose-v1';
+import * as blazepose from './body/blazepose';
+import * as efficientpose from './body/efficientpose';
+import * as movenet from './body/movenet';
 import * as nanodet from './object/nanodet';
 import * as centernet from './object/centernet';
 import * as segmentation from './segmentation/segmentation';
 import * as gesture from './gesture/gesture';
 import * as image from './image/image';
-import * as draw from './draw';
+import * as draw from './util/draw';
 import * as persons from './persons';
-import * as interpolate from './interpolate';
-import * as env from './env';
+import * as interpolate from './util/interpolate';
+import * as env from './util/env';
 import * as backend from './tfjs/backend';
 import * as humangl from './tfjs/humangl';
 import * as app from '../package.json';
 import * as warmups from './warmup';
 import type { Tensor } from './tfjs/types';
-import type { DrawOptions } from './draw';
+import type { DrawOptions } from './util/draw';

 // export types
 export * from './config';
 export * from './result';
-export type { DrawOptions } from './draw';
-export { env, Env } from './env';
+export type { DrawOptions } from './util/draw';
+export { env, Env } from './util/env';
 export { Box, Point } from './result';
 export { Models } from './models';

--- a/src/image/image.ts
+++ b/src/image/image.ts
@ -6,8 +6,8 @@ import * as tf from '../../dist/tfjs.esm.js';
 import * as fxImage from './imagefx';
 import type { Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
-import { log } from '../util';
+import { env } from '../util/env';
+import { log } from '../util/util';

 type Input = Tensor | ImageData | ImageBitmap | HTMLImageElement | HTMLMediaElement | HTMLVideoElement | HTMLCanvasElement | OffscreenCanvas | typeof Image | typeof env.Canvas;

--- a/src/models.ts
+++ b/src/models.ts
@ -2,23 +2,23 @@
 * Loader and Validator for all models used by Human
 */

-import { log } from './util';
+import { log } from './util/util';
 import type { GraphModel } from './tfjs/types';
 import * as facemesh from './blazeface/facemesh';
-import * as faceres from './faceres/faceres';
-import * as emotion from './emotion/emotion';
+import * as faceres from './face/faceres';
+import * as emotion from './gear/emotion';
 import * as posenet from './posenet/posenet';
 import * as handpose from './handpose/handpose';
-import * as handtrack from './handtrack/handtrack';
-import * as blazepose from './blazepose/blazepose';
-import * as efficientpose from './efficientpose/efficientpose';
-import * as movenet from './movenet/movenet';
+import * as handtrack from './hand/handtrack';
+import * as blazepose from './body/blazepose';
+import * as efficientpose from './body/efficientpose';
+import * as movenet from './body/movenet';
 import * as nanodet from './object/nanodet';
 import * as centernet from './object/centernet';
 import * as segmentation from './segmentation/segmentation';
 import type { Human } from './human';
-import { env } from './env';
-import * as agegenderrace from './gear/agegenderrace';
+import { env } from './util/env';
+import * as agegenderrace from './gear/gear-agegenderrace';

 /** Instances of all possible TFJS Graph Models used by Human
 * - loaded as needed based on configuration
@ -29,6 +29,7 @@ import * as agegenderrace from './gear/agegenderrace';
 export class Models {
  age: null | GraphModel | Promise<GraphModel> = null;
  agegenderrace: null | GraphModel | Promise<GraphModel> = null;
+  blazeposedetect: null | GraphModel | Promise<GraphModel> = null;
  blazepose: null | GraphModel | Promise<GraphModel> = null;
  centernet: null | GraphModel | Promise<GraphModel> = null;
  efficientpose: null | GraphModel | Promise<GraphModel> = null;
@ -69,8 +70,9 @@ export async function load(instance: Human) {
  if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
  if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config);
  if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = efficientpose.load(instance.config);
-  if (instance.config.body.enabled && !instance.models.blazepose && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazepose = blazepose.load(instance.config);
-  if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = blazepose.load(instance.config);
+  if (instance.config.body.enabled && !instance.models.blazepose && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazepose = blazepose.loadPose(instance.config);
+  if (instance.config.body.enabled && !instance.models.blazeposedetect && instance.config.body.detector?.modelPath && instance.config.body?.modelPath?.includes('blazepose')) instance.models.blazeposedetect = blazepose.loadDetect(instance.config);
+  if (instance.config.body.enabled && !instance.models.efficientpose && instance.config.body?.modelPath?.includes('efficientpose')) instance.models.efficientpose = efficientpose.load(instance.config);
  if (instance.config.body.enabled && !instance.models.movenet && instance.config.body?.modelPath?.includes('movenet')) instance.models.movenet = movenet.load(instance.config);
  if (instance.config.object.enabled && !instance.models.nanodet && instance.config.object?.modelPath?.includes('nanodet')) instance.models.nanodet = nanodet.load(instance.config);
  if (instance.config.object.enabled && !instance.models.centernet && instance.config.object?.modelPath?.includes('centernet')) instance.models.centernet = centernet.load(instance.config);
--- a/src/object/centernet.ts
+++ b/src/object/centernet.ts
@ -4,13 +4,13 @@
 * Based on: [**NanoDet**](https://github.com/RangiLyu/nanodet)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import { labels } from './labels';
 import type { ObjectResult, Box } from '../result';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';
 import { fakeOps } from '../tfjs/backend';

 let model: GraphModel | null;
--- a/src/object/nanodet.ts
+++ b/src/object/nanodet.ts
@ -4,13 +4,13 @@
 * Based on: [**MB3-CenterNet**](https://github.com/610265158/mobilenetv3_centernet)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import { labels } from './labels';
 import type { ObjectResult, Box } from '../result';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';

 let model;
 let last: Array<ObjectResult> = [];
--- a/src/segmentation/segmentation.ts
+++ b/src/segmentation/segmentation.ts
@ -6,12 +6,12 @@
 * - [**MediaPipe Selfie**](https://drive.google.com/file/d/1dCfozqknMa068vVsO2j_1FgZkW_e3VWv/preview)
 */

-import { log, join } from '../util';
+import { log, join } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import * as image from '../image/image';
 import type { GraphModel, Tensor } from '../tfjs/types';
 import type { Config } from '../config';
-import { env } from '../env';
+import { env } from '../util/env';

 type Input = Tensor | typeof Image | ImageData | ImageBitmap | HTMLImageElement | HTMLMediaElement | HTMLVideoElement | HTMLCanvasElement | OffscreenCanvas;

--- a/src/tfjs/backend.ts
+++ b/src/tfjs/backend.ts
@ -1,8 +1,8 @@
 /** TFJS backend initialization and customization */

-import { log, now } from '../util';
+import { log, now } from '../util/util';
 import * as humangl from './humangl';
-import * as env from '../env';
+import * as env from '../util/env';
 import * as tf from '../../dist/tfjs.esm.js';

 export async function check(instance, force = false) {
--- a/src/tfjs/humangl.ts
+++ b/src/tfjs/humangl.ts
@ -1,6 +1,6 @@
 /** TFJS custom backend registration */

-import { log } from '../util';
+import { log } from '../util/util';
 import * as tf from '../../dist/tfjs.esm.js';
 import * as image from '../image/image';
 import * as models from '../models';
--- a/src/util/box.ts
+++ b/src/util/box.ts
@ -0,0 +1,28 @@
+import type { Box } from '../result';
+
+// helper function: find box around keypoints, square it and scale it
+export function scale(keypoints, boxScaleFact, outputSize) {
+  const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
+  const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates
+  const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers
+  const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction
+  const box = [
+    Math.trunc(center[0] - diff),
+    Math.trunc(center[1] - diff),
+    Math.trunc(2 * diff),
+    Math.trunc(2 * diff),
+  ] as Box;
+  const boxRaw = [ // work backwards
+    box[0] / outputSize[0],
+    box[1] / outputSize[1],
+    box[2] / outputSize[0],
+    box[3] / outputSize[1],
+  ] as Box;
+  const yxBox = [ // work backwards
+    boxRaw[1],
+    boxRaw[0],
+    boxRaw[3] + boxRaw[1],
+    boxRaw[2] + boxRaw[0],
+  ] as Box;
+  return { box, boxRaw, yxBox };
+}
--- a/src/util/draw.ts
+++ b/src/util/draw.ts
@ -2,9 +2,9 @@
 * Module that implements helper draw functions, exposed as human.draw
 */

-import { TRI468 as triangulation } from './blazeface/coords';
+import { TRI468 as triangulation } from '../blazeface/coords';
 import { mergeDeep, now } from './util';
-import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from './result';
+import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result';

 /**
 * Draw Options
--- a/src/util/env.ts
+++ b/src/util/env.ts
@ -1,5 +1,5 @@
-import * as tf from '../dist/tfjs.esm.js';
-import * as image from './image/image';
+import * as tf from '../../dist/tfjs.esm.js';
+import * as image from '../image/image';
 import { mergeDeep } from './util';

 export type Env = {
--- a/src/util/interpolate.ts
+++ b/src/util/interpolate.ts
@ -2,7 +2,7 @@
 * Results interpolation for smoothening of video detection results inbetween detected frames
 */

-import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from './result';
+import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult, Box, Point } from '../result';

 const bufferedResult: Result = { face: [], body: [], hand: [], gesture: [], object: [], persons: [], performance: {}, timestamp: 0 };

--- a/src/util/profile.ts
+++ b/src/util/profile.ts
--- a/src/util/util.ts
+++ b/src/util/util.ts
@ -2,8 +2,6 @@
 * Simple helper functions used accross codebase
 */

-import type { Box } from './result';
-
 // helper function: join two paths
 export function join(folder: string, file: string): string {
  const separator = folder.endsWith('/') ? '' : '/';
@ -71,30 +69,3 @@ export async function wait(time) {
  const waiting = new Promise((resolve) => setTimeout(() => resolve(true), time));
  await waiting;
 }
-
-// helper function: find box around keypoints, square it and scale it
-export function scaleBox(keypoints, boxScaleFact, outputSize) {
-  const coords = [keypoints.map((pt) => pt[0]), keypoints.map((pt) => pt[1])]; // all x/y coords
-  const maxmin = [Math.max(...coords[0]), Math.min(...coords[0]), Math.max(...coords[1]), Math.min(...coords[1])]; // find min/max x/y coordinates
-  const center = [(maxmin[0] + maxmin[1]) / 2, (maxmin[2] + maxmin[3]) / 2]; // find center x and y coord of all fingers
-  const diff = Math.max(center[0] - maxmin[1], center[1] - maxmin[3], -center[0] + maxmin[0], -center[1] + maxmin[2]) * boxScaleFact; // largest distance from center in any direction
-  const box = [
-    Math.trunc(center[0] - diff),
-    Math.trunc(center[1] - diff),
-    Math.trunc(2 * diff),
-    Math.trunc(2 * diff),
-  ] as Box;
-  const boxRaw = [ // work backwards
-    box[0] / outputSize[0],
-    box[1] / outputSize[1],
-    box[2] / outputSize[0],
-    box[3] / outputSize[1],
-  ] as Box;
-  const yxBox = [ // work backwards
-    boxRaw[1],
-    boxRaw[0],
-    boxRaw[3] + boxRaw[1],
-    boxRaw[2] + boxRaw[0],
-  ] as Box;
-  return { box, boxRaw, yxBox };
-}
--- a/src/warmup.ts
+++ b/src/warmup.ts
@ -2,13 +2,13 @@
 * Warmup algorithm that uses embedded images to excercise loaded models for faster future inference
 */

-import { log, now, mergeDeep } from './util';
+import { log, now, mergeDeep } from './util/util';
 import * as sample from './sample';
 import * as tf from '../dist/tfjs.esm.js';
 import * as image from './image/image';
 import type { Config } from './config';
 import type { Result } from './result';
-import { env } from './env';
+import { env } from './util/env';

 async function warmupBitmap(instance) {
  const b64toBlob = (base64: string, type = 'application/octet-stream') => fetch(`data:${type};base64,${base64}`).then((res) => res.blob());