add config.face.detector.square option

2024-09-11 11:16:07 -04:00 · 2024-09-11 11:16:07 -04:00 · 715210db51
parent 9e2c612c1f
commit 715210db51
17 changed files with 4354 additions and 286 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,3 +5,5 @@ package-lock.json
 *.swp
 samples/**/*.mp4
 samples/**/*.webm
+temp
+tmp
--- a/demo/facedetect/facedetect.js
+++ b/demo/facedetect/facedetect.js
@ -10,12 +10,13 @@ import { Human } from '../../dist/human.esm.js';
 let loader;

 const humanConfig = { // user configuration for human, used to fine-tune behavior
+  cacheSensitivity: 0.0001,
  debug: true,
  modelBasePath: 'https://vladmandic.github.io/human-models/models/',
  filter: { enabled: true, equalization: false, flip: false },
  face: {
    enabled: true,
-    detector: { rotation: false, maxDetected: 100, minConfidence: 0.2, return: true },
+    detector: { rotation: false, maxDetected: 100, minConfidence: 0.2, return: true, square: true },
    iris: { enabled: true },
    description: { enabled: true },
    emotion: { enabled: true },
@ -149,7 +150,7 @@ async function main() {
  showLoader('compiling models');
  await human.warmup();
  showLoader('loading images');
-  const images = ['group-1.jpg', 'group-2.jpg', 'group-3.jpg', 'group-4.jpg', 'group-5.jpg', 'group-6.jpg', 'group-7.jpg', 'solvay1927.jpg', 'stock-group-1.jpg', 'stock-group-2.jpg'];
+  const images = ['group-1.jpg', 'group-2.jpg', 'group-3.jpg', 'group-4.jpg', 'group-5.jpg', 'group-6.jpg', 'group-7.jpg', 'solvay1927.jpg', 'stock-group-1.jpg', 'stock-group-2.jpg', 'stock-models-6.jpg', 'stock-models-7.jpg'];
  const imageUris = images.map((a) => `../../samples/in/${a}`);
  for (let i = 0; i < imageUris.length; i++) addImage(imageUris[i]);
  initDragAndDrop();
--- a/dist/human.esm-nobundle.js
+++ b/dist/human.esm-nobundle.js
@ -1463,11 +1463,11 @@ var WebCam = class {
        return "webcam error no stream";
      }
      this.element.srcObject = this.stream;
-      const ready3 = new Promise((resolve) => {
+      const ready4 = new Promise((resolve) => {
        if (!this.element) resolve(false);
        else this.element.onloadeddata = () => resolve(true);
      });
-      await ready3;
+      await ready4;
      await this.element.play();
      if (this.config.debug) {
        log("webcam", {
@ -7367,10 +7367,21 @@ function decodeBoxes2(boxOutputs) {
  return boxes;
 }
 async function getBoxes(inputImage, config3) {
-  var _a, _b, _c, _d, _e, _f, _g;
+  var _a, _b, _c, _d, _e, _f, _g, _h, _i;
  if (!inputImage || inputImage["isDisposedInternal"] || inputImage.shape.length !== 4 || inputImage.shape[1] < 1 || inputImage.shape[2] < 1) return [];
  const t2 = {};
-  t2.resized = tfjs_esm_exports.image.resizeBilinear(inputImage, [inputSize4, inputSize4]);
+  let pad4 = [0, 0];
+  let scale2 = [1, 1];
+  if ((_b = (_a = config3 == null ? void 0 : config3.face) == null ? void 0 : _a.detector) == null ? void 0 : _b.square) {
+    const xy = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad4 = [Math.floor((xy - inputImage.shape[2]) / 2), Math.floor((xy - inputImage.shape[1]) / 2)];
+    t2.padded = tfjs_esm_exports.pad(inputImage, [[0, 0], [pad4[1], pad4[1]], [pad4[0], pad4[0]], [0, 0]]);
+    scale2 = [inputImage.shape[2] / xy, inputImage.shape[1] / xy];
+    pad4 = [pad4[0] / inputSize4, pad4[1] / inputSize4];
+  } else {
+    t2.padded = inputImage;
+  }
+  t2.resized = tfjs_esm_exports.image.resizeBilinear(t2.padded, [inputSize4, inputSize4]);
  t2.div = tfjs_esm_exports.div(t2.resized, constants.tf127);
  t2.normalized = tfjs_esm_exports.sub(t2.div, constants.tf1);
  const res = model5 == null ? void 0 : model5.execute(t2.normalized);
@ -7390,31 +7401,38 @@ async function getBoxes(inputImage, config3) {
  t2.logits = tfjs_esm_exports.slice(t2.batch, [0, 0], [-1, 1]);
  t2.sigmoid = tfjs_esm_exports.sigmoid(t2.logits);
  t2.scores = tfjs_esm_exports.squeeze(t2.sigmoid);
-  t2.nms = await tfjs_esm_exports.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_a = config3.face.detector) == null ? void 0 : _a.maxDetected) || 0, ((_b = config3.face.detector) == null ? void 0 : _b.iouThreshold) || 0, ((_c = config3.face.detector) == null ? void 0 : _c.minConfidence) || 0);
+  t2.nms = await tfjs_esm_exports.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_c = config3.face.detector) == null ? void 0 : _c.maxDetected) || 0, ((_d = config3.face.detector) == null ? void 0 : _d.iouThreshold) || 0, ((_e = config3.face.detector) == null ? void 0 : _e.minConfidence) || 0);
  const nms = await t2.nms.array();
  const boxes = [];
  const scores = await t2.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-    if (confidence > (((_d = config3.face.detector) == null ? void 0 : _d.minConfidence) || 0)) {
+    if (confidence > (((_f = config3.face.detector) == null ? void 0 : _f.minConfidence) || 0)) {
      const b = {};
      b.bbox = tfjs_esm_exports.slice(t2.boxes, [nms[i], 0], [1, -1]);
      b.slice = tfjs_esm_exports.slice(t2.batch, [nms[i], keypointsCount - 1], [1, -1]);
      b.squeeze = tfjs_esm_exports.squeeze(b.slice);
      b.landmarks = tfjs_esm_exports.reshape(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [
+        // TODO fix this math
+        points[0] * scale2[0] - pad4[0],
+        points[1] * scale2[1] - pad4[1],
+        points[2] * scale2[0] - pad4[0],
+        points[3] * scale2[1] - pad4[1]
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]],
-        endPoint: [points[2], points[3]],
+        startPoint: [unpadded[0], unpadded[1]],
+        endPoint: [unpadded[2], unpadded[3]],
        landmarks: await b.landmarks.array(),
        confidence
      };
      b.anchor = tfjs_esm_exports.slice(anchors, [nms[i], 0], [1, 2]);
      const anchor = await b.anchor.data();
      const scaledBox = scaleBoxCoordinates(rawBox, [(inputImage.shape[2] || 0) / inputSize4, (inputImage.shape[1] || 0) / inputSize4], anchor);
-      const enlargedBox = enlargeBox(scaledBox, ((_e = config3.face.detector) == null ? void 0 : _e.scale) || 1.4);
+      const enlargedBox = enlargeBox(scaledBox, ((_g = config3.face.detector) == null ? void 0 : _g.scale) || 1.4);
      const squaredBox = squarifyBox(enlargedBox);
-      if (squaredBox.size[0] > (((_f = config3.face.detector) == null ? void 0 : _f["minSize"]) || 0) && squaredBox.size[1] > (((_g = config3.face.detector) == null ? void 0 : _g["minSize"]) || 0)) boxes.push(squaredBox);
+      if (squaredBox.size[0] > (((_h = config3.face.detector) == null ? void 0 : _h["minSize"]) || 0) && squaredBox.size[1] > (((_i = config3.face.detector) == null ? void 0 : _i["minSize"]) || 0)) boxes.push(squaredBox);
      Object.keys(b).forEach((tensor6) => tfjs_esm_exports.dispose(b[tensor6]));
    }
  }
@ -14598,6 +14616,7 @@ async function warmup(instance, userConfig) {
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await tfjs_esm_exports.ready();
    await runCompile(instance);
    const res = await runInference(instance);
    const t1 = now();
--- a/dist/human.esm.js
+++ b/dist/human.esm.js
@ -39140,10 +39140,21 @@ function decodeBoxes2(boxOutputs) {
  return boxes;
 }
 async function getBoxes(inputImage, config3) {
-  var _a, _b, _c2, _d2, _e, _f2, _g2;
+  var _a, _b, _c2, _d2, _e, _f2, _g2, _h2, _i2;
  if (!inputImage || inputImage["isDisposedInternal"] || inputImage.shape.length !== 4 || inputImage.shape[1] < 1 || inputImage.shape[2] < 1) return [];
  const t10 = {};
-  t10.resized = eX.resizeBilinear(inputImage, [inputSize4, inputSize4]);
+  let pad = [0, 0];
+  let scale2 = [1, 1];
+  if ((_b = (_a = config3 == null ? void 0 : config3.face) == null ? void 0 : _a.detector) == null ? void 0 : _b.square) {
+    const xy2 = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad = [Math.floor((xy2 - inputImage.shape[2]) / 2), Math.floor((xy2 - inputImage.shape[1]) / 2)];
+    t10.padded = Aa(inputImage, [[0, 0], [pad[1], pad[1]], [pad[0], pad[0]], [0, 0]]);
+    scale2 = [inputImage.shape[2] / xy2, inputImage.shape[1] / xy2];
+    pad = [pad[0] / inputSize4, pad[1] / inputSize4];
+  } else {
+    t10.padded = inputImage;
+  }
+  t10.resized = eX.resizeBilinear(t10.padded, [inputSize4, inputSize4]);
  t10.div = je(t10.resized, constants.tf127);
  t10.normalized = Te(t10.div, constants.tf1);
  const res = model5 == null ? void 0 : model5.execute(t10.normalized);
@ -39163,31 +39174,38 @@ async function getBoxes(inputImage, config3) {
  t10.logits = Xe(t10.batch, [0, 0], [-1, 1]);
  t10.sigmoid = Ea(t10.logits);
  t10.scores = cc(t10.sigmoid);
-  t10.nms = await eX.nonMaxSuppressionAsync(t10.boxes, t10.scores, ((_a = config3.face.detector) == null ? void 0 : _a.maxDetected) || 0, ((_b = config3.face.detector) == null ? void 0 : _b.iouThreshold) || 0, ((_c2 = config3.face.detector) == null ? void 0 : _c2.minConfidence) || 0);
+  t10.nms = await eX.nonMaxSuppressionAsync(t10.boxes, t10.scores, ((_c2 = config3.face.detector) == null ? void 0 : _c2.maxDetected) || 0, ((_d2 = config3.face.detector) == null ? void 0 : _d2.iouThreshold) || 0, ((_e = config3.face.detector) == null ? void 0 : _e.minConfidence) || 0);
  const nms = await t10.nms.array();
  const boxes = [];
  const scores = await t10.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-    if (confidence > (((_d2 = config3.face.detector) == null ? void 0 : _d2.minConfidence) || 0)) {
+    if (confidence > (((_f2 = config3.face.detector) == null ? void 0 : _f2.minConfidence) || 0)) {
      const b = {};
      b.bbox = Xe(t10.boxes, [nms[i], 0], [1, -1]);
      b.slice = Xe(t10.batch, [nms[i], keypointsCount - 1], [1, -1]);
      b.squeeze = cc(b.slice);
      b.landmarks = W(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [
+        // TODO fix this math
+        points[0] * scale2[0] - pad[0],
+        points[1] * scale2[1] - pad[1],
+        points[2] * scale2[0] - pad[0],
+        points[3] * scale2[1] - pad[1]
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]],
-        endPoint: [points[2], points[3]],
+        startPoint: [unpadded[0], unpadded[1]],
+        endPoint: [unpadded[2], unpadded[3]],
        landmarks: await b.landmarks.array(),
        confidence
      };
      b.anchor = Xe(anchors, [nms[i], 0], [1, 2]);
      const anchor = await b.anchor.data();
      const scaledBox = scaleBoxCoordinates(rawBox, [(inputImage.shape[2] || 0) / inputSize4, (inputImage.shape[1] || 0) / inputSize4], anchor);
-      const enlargedBox = enlargeBox(scaledBox, ((_e = config3.face.detector) == null ? void 0 : _e.scale) || 1.4);
+      const enlargedBox = enlargeBox(scaledBox, ((_g2 = config3.face.detector) == null ? void 0 : _g2.scale) || 1.4);
      const squaredBox = squarifyBox(enlargedBox);
-      if (squaredBox.size[0] > (((_f2 = config3.face.detector) == null ? void 0 : _f2["minSize"]) || 0) && squaredBox.size[1] > (((_g2 = config3.face.detector) == null ? void 0 : _g2["minSize"]) || 0)) boxes.push(squaredBox);
+      if (squaredBox.size[0] > (((_h2 = config3.face.detector) == null ? void 0 : _h2["minSize"]) || 0) && squaredBox.size[1] > (((_i2 = config3.face.detector) == null ? void 0 : _i2["minSize"]) || 0)) boxes.push(squaredBox);
      Object.keys(b).forEach((tensor) => Ot(b[tensor]));
    }
  }
@ -46371,6 +46389,7 @@ async function warmup(instance, userConfig) {
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await Ime();
    await runCompile(instance);
    const res = await runInference(instance);
    const t12 = now();
--- a/dist/human.esm.js.map
+++ b/dist/human.esm.js.map
--- a/dist/human.js
+++ b/dist/human.js
--- a/dist/human.node-gpu.js
+++ b/dist/human.node-gpu.js
@ -1511,11 +1511,11 @@ var WebCam = class {
        return "webcam error no stream";
      }
      this.element.srcObject = this.stream;
-      const ready3 = new Promise((resolve) => {
+      const ready4 = new Promise((resolve) => {
        if (!this.element) resolve(false);
        else this.element.onloadeddata = () => resolve(true);
      });
-      await ready3;
+      await ready4;
      await this.element.play();
      if (this.config.debug) {
        log("webcam", {
@ -7443,10 +7443,21 @@ function decodeBoxes2(boxOutputs) {
  return boxes;
 }
 async function getBoxes(inputImage, config3) {
-  var _a, _b, _c, _d, _e, _f, _g;
+  var _a, _b, _c, _d, _e, _f, _g, _h, _i;
  if (!inputImage || inputImage["isDisposedInternal"] || inputImage.shape.length !== 4 || inputImage.shape[1] < 1 || inputImage.shape[2] < 1) return [];
  const t2 = {};
-  t2.resized = tf13.image.resizeBilinear(inputImage, [inputSize4, inputSize4]);
+  let pad4 = [0, 0];
+  let scale2 = [1, 1];
+  if ((_b = (_a = config3 == null ? void 0 : config3.face) == null ? void 0 : _a.detector) == null ? void 0 : _b.square) {
+    const xy = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad4 = [Math.floor((xy - inputImage.shape[2]) / 2), Math.floor((xy - inputImage.shape[1]) / 2)];
+    t2.padded = tf13.pad(inputImage, [[0, 0], [pad4[1], pad4[1]], [pad4[0], pad4[0]], [0, 0]]);
+    scale2 = [inputImage.shape[2] / xy, inputImage.shape[1] / xy];
+    pad4 = [pad4[0] / inputSize4, pad4[1] / inputSize4];
+  } else {
+    t2.padded = inputImage;
+  }
+  t2.resized = tf13.image.resizeBilinear(t2.padded, [inputSize4, inputSize4]);
  t2.div = tf13.div(t2.resized, constants.tf127);
  t2.normalized = tf13.sub(t2.div, constants.tf1);
  const res = model5 == null ? void 0 : model5.execute(t2.normalized);
@ -7466,31 +7477,38 @@ async function getBoxes(inputImage, config3) {
  t2.logits = tf13.slice(t2.batch, [0, 0], [-1, 1]);
  t2.sigmoid = tf13.sigmoid(t2.logits);
  t2.scores = tf13.squeeze(t2.sigmoid);
-  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_a = config3.face.detector) == null ? void 0 : _a.maxDetected) || 0, ((_b = config3.face.detector) == null ? void 0 : _b.iouThreshold) || 0, ((_c = config3.face.detector) == null ? void 0 : _c.minConfidence) || 0);
+  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_c = config3.face.detector) == null ? void 0 : _c.maxDetected) || 0, ((_d = config3.face.detector) == null ? void 0 : _d.iouThreshold) || 0, ((_e = config3.face.detector) == null ? void 0 : _e.minConfidence) || 0);
  const nms = await t2.nms.array();
  const boxes = [];
  const scores = await t2.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-    if (confidence > (((_d = config3.face.detector) == null ? void 0 : _d.minConfidence) || 0)) {
+    if (confidence > (((_f = config3.face.detector) == null ? void 0 : _f.minConfidence) || 0)) {
      const b = {};
      b.bbox = tf13.slice(t2.boxes, [nms[i], 0], [1, -1]);
      b.slice = tf13.slice(t2.batch, [nms[i], keypointsCount - 1], [1, -1]);
      b.squeeze = tf13.squeeze(b.slice);
      b.landmarks = tf13.reshape(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [
+        // TODO fix this math
+        points[0] * scale2[0] - pad4[0],
+        points[1] * scale2[1] - pad4[1],
+        points[2] * scale2[0] - pad4[0],
+        points[3] * scale2[1] - pad4[1]
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]],
-        endPoint: [points[2], points[3]],
+        startPoint: [unpadded[0], unpadded[1]],
+        endPoint: [unpadded[2], unpadded[3]],
        landmarks: await b.landmarks.array(),
        confidence
      };
      b.anchor = tf13.slice(anchors, [nms[i], 0], [1, 2]);
      const anchor = await b.anchor.data();
      const scaledBox = scaleBoxCoordinates(rawBox, [(inputImage.shape[2] || 0) / inputSize4, (inputImage.shape[1] || 0) / inputSize4], anchor);
-      const enlargedBox = enlargeBox(scaledBox, ((_e = config3.face.detector) == null ? void 0 : _e.scale) || 1.4);
+      const enlargedBox = enlargeBox(scaledBox, ((_g = config3.face.detector) == null ? void 0 : _g.scale) || 1.4);
      const squaredBox = squarifyBox(enlargedBox);
-      if (squaredBox.size[0] > (((_f = config3.face.detector) == null ? void 0 : _f["minSize"]) || 0) && squaredBox.size[1] > (((_g = config3.face.detector) == null ? void 0 : _g["minSize"]) || 0)) boxes.push(squaredBox);
+      if (squaredBox.size[0] > (((_h = config3.face.detector) == null ? void 0 : _h["minSize"]) || 0) && squaredBox.size[1] > (((_i = config3.face.detector) == null ? void 0 : _i["minSize"]) || 0)) boxes.push(squaredBox);
      Object.keys(b).forEach((tensor6) => tf13.dispose(b[tensor6]));
    }
  }
@ -14704,6 +14722,7 @@ async function warmup(instance, userConfig) {
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await tf37.ready();
    await runCompile(instance);
    const res = await runInference(instance);
    const t1 = now();
--- a/dist/human.node-wasm.js
+++ b/dist/human.node-wasm.js
@ -1513,11 +1513,11 @@ var WebCam = class {
        return "webcam error no stream";
      }
      this.element.srcObject = this.stream;
-      const ready3 = new Promise((resolve) => {
+      const ready4 = new Promise((resolve) => {
        if (!this.element) resolve(false);
        else this.element.onloadeddata = () => resolve(true);
      });
-      await ready3;
+      await ready4;
      await this.element.play();
      if (this.config.debug) {
        log("webcam", {
@ -7445,10 +7445,21 @@ function decodeBoxes2(boxOutputs) {
  return boxes;
 }
 async function getBoxes(inputImage, config3) {
-  var _a, _b, _c, _d, _e, _f, _g;
+  var _a, _b, _c, _d, _e, _f, _g, _h, _i;
  if (!inputImage || inputImage["isDisposedInternal"] || inputImage.shape.length !== 4 || inputImage.shape[1] < 1 || inputImage.shape[2] < 1) return [];
  const t2 = {};
-  t2.resized = tf13.image.resizeBilinear(inputImage, [inputSize4, inputSize4]);
+  let pad4 = [0, 0];
+  let scale2 = [1, 1];
+  if ((_b = (_a = config3 == null ? void 0 : config3.face) == null ? void 0 : _a.detector) == null ? void 0 : _b.square) {
+    const xy = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad4 = [Math.floor((xy - inputImage.shape[2]) / 2), Math.floor((xy - inputImage.shape[1]) / 2)];
+    t2.padded = tf13.pad(inputImage, [[0, 0], [pad4[1], pad4[1]], [pad4[0], pad4[0]], [0, 0]]);
+    scale2 = [inputImage.shape[2] / xy, inputImage.shape[1] / xy];
+    pad4 = [pad4[0] / inputSize4, pad4[1] / inputSize4];
+  } else {
+    t2.padded = inputImage;
+  }
+  t2.resized = tf13.image.resizeBilinear(t2.padded, [inputSize4, inputSize4]);
  t2.div = tf13.div(t2.resized, constants.tf127);
  t2.normalized = tf13.sub(t2.div, constants.tf1);
  const res = model5 == null ? void 0 : model5.execute(t2.normalized);
@ -7468,31 +7479,38 @@ async function getBoxes(inputImage, config3) {
  t2.logits = tf13.slice(t2.batch, [0, 0], [-1, 1]);
  t2.sigmoid = tf13.sigmoid(t2.logits);
  t2.scores = tf13.squeeze(t2.sigmoid);
-  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_a = config3.face.detector) == null ? void 0 : _a.maxDetected) || 0, ((_b = config3.face.detector) == null ? void 0 : _b.iouThreshold) || 0, ((_c = config3.face.detector) == null ? void 0 : _c.minConfidence) || 0);
+  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_c = config3.face.detector) == null ? void 0 : _c.maxDetected) || 0, ((_d = config3.face.detector) == null ? void 0 : _d.iouThreshold) || 0, ((_e = config3.face.detector) == null ? void 0 : _e.minConfidence) || 0);
  const nms = await t2.nms.array();
  const boxes = [];
  const scores = await t2.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-    if (confidence > (((_d = config3.face.detector) == null ? void 0 : _d.minConfidence) || 0)) {
+    if (confidence > (((_f = config3.face.detector) == null ? void 0 : _f.minConfidence) || 0)) {
      const b = {};
      b.bbox = tf13.slice(t2.boxes, [nms[i], 0], [1, -1]);
      b.slice = tf13.slice(t2.batch, [nms[i], keypointsCount - 1], [1, -1]);
      b.squeeze = tf13.squeeze(b.slice);
      b.landmarks = tf13.reshape(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [
+        // TODO fix this math
+        points[0] * scale2[0] - pad4[0],
+        points[1] * scale2[1] - pad4[1],
+        points[2] * scale2[0] - pad4[0],
+        points[3] * scale2[1] - pad4[1]
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]],
-        endPoint: [points[2], points[3]],
+        startPoint: [unpadded[0], unpadded[1]],
+        endPoint: [unpadded[2], unpadded[3]],
        landmarks: await b.landmarks.array(),
        confidence
      };
      b.anchor = tf13.slice(anchors, [nms[i], 0], [1, 2]);
      const anchor = await b.anchor.data();
      const scaledBox = scaleBoxCoordinates(rawBox, [(inputImage.shape[2] || 0) / inputSize4, (inputImage.shape[1] || 0) / inputSize4], anchor);
-      const enlargedBox = enlargeBox(scaledBox, ((_e = config3.face.detector) == null ? void 0 : _e.scale) || 1.4);
+      const enlargedBox = enlargeBox(scaledBox, ((_g = config3.face.detector) == null ? void 0 : _g.scale) || 1.4);
      const squaredBox = squarifyBox(enlargedBox);
-      if (squaredBox.size[0] > (((_f = config3.face.detector) == null ? void 0 : _f["minSize"]) || 0) && squaredBox.size[1] > (((_g = config3.face.detector) == null ? void 0 : _g["minSize"]) || 0)) boxes.push(squaredBox);
+      if (squaredBox.size[0] > (((_h = config3.face.detector) == null ? void 0 : _h["minSize"]) || 0) && squaredBox.size[1] > (((_i = config3.face.detector) == null ? void 0 : _i["minSize"]) || 0)) boxes.push(squaredBox);
      Object.keys(b).forEach((tensor6) => tf13.dispose(b[tensor6]));
    }
  }
@ -14706,6 +14724,7 @@ async function warmup(instance, userConfig) {
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await tf37.ready();
    await runCompile(instance);
    const res = await runInference(instance);
    const t1 = now();
--- a/dist/human.node.js
+++ b/dist/human.node.js
@ -1511,11 +1511,11 @@ var WebCam = class {
        return "webcam error no stream";
      }
      this.element.srcObject = this.stream;
-      const ready3 = new Promise((resolve) => {
+      const ready4 = new Promise((resolve) => {
        if (!this.element) resolve(false);
        else this.element.onloadeddata = () => resolve(true);
      });
-      await ready3;
+      await ready4;
      await this.element.play();
      if (this.config.debug) {
        log("webcam", {
@ -7443,10 +7443,21 @@ function decodeBoxes2(boxOutputs) {
  return boxes;
 }
 async function getBoxes(inputImage, config3) {
-  var _a, _b, _c, _d, _e, _f, _g;
+  var _a, _b, _c, _d, _e, _f, _g, _h, _i;
  if (!inputImage || inputImage["isDisposedInternal"] || inputImage.shape.length !== 4 || inputImage.shape[1] < 1 || inputImage.shape[2] < 1) return [];
  const t2 = {};
-  t2.resized = tf13.image.resizeBilinear(inputImage, [inputSize4, inputSize4]);
+  let pad4 = [0, 0];
+  let scale2 = [1, 1];
+  if ((_b = (_a = config3 == null ? void 0 : config3.face) == null ? void 0 : _a.detector) == null ? void 0 : _b.square) {
+    const xy = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad4 = [Math.floor((xy - inputImage.shape[2]) / 2), Math.floor((xy - inputImage.shape[1]) / 2)];
+    t2.padded = tf13.pad(inputImage, [[0, 0], [pad4[1], pad4[1]], [pad4[0], pad4[0]], [0, 0]]);
+    scale2 = [inputImage.shape[2] / xy, inputImage.shape[1] / xy];
+    pad4 = [pad4[0] / inputSize4, pad4[1] / inputSize4];
+  } else {
+    t2.padded = inputImage;
+  }
+  t2.resized = tf13.image.resizeBilinear(t2.padded, [inputSize4, inputSize4]);
  t2.div = tf13.div(t2.resized, constants.tf127);
  t2.normalized = tf13.sub(t2.div, constants.tf1);
  const res = model5 == null ? void 0 : model5.execute(t2.normalized);
@ -7466,31 +7477,38 @@ async function getBoxes(inputImage, config3) {
  t2.logits = tf13.slice(t2.batch, [0, 0], [-1, 1]);
  t2.sigmoid = tf13.sigmoid(t2.logits);
  t2.scores = tf13.squeeze(t2.sigmoid);
-  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_a = config3.face.detector) == null ? void 0 : _a.maxDetected) || 0, ((_b = config3.face.detector) == null ? void 0 : _b.iouThreshold) || 0, ((_c = config3.face.detector) == null ? void 0 : _c.minConfidence) || 0);
+  t2.nms = await tf13.image.nonMaxSuppressionAsync(t2.boxes, t2.scores, ((_c = config3.face.detector) == null ? void 0 : _c.maxDetected) || 0, ((_d = config3.face.detector) == null ? void 0 : _d.iouThreshold) || 0, ((_e = config3.face.detector) == null ? void 0 : _e.minConfidence) || 0);
  const nms = await t2.nms.array();
  const boxes = [];
  const scores = await t2.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-    if (confidence > (((_d = config3.face.detector) == null ? void 0 : _d.minConfidence) || 0)) {
+    if (confidence > (((_f = config3.face.detector) == null ? void 0 : _f.minConfidence) || 0)) {
      const b = {};
      b.bbox = tf13.slice(t2.boxes, [nms[i], 0], [1, -1]);
      b.slice = tf13.slice(t2.batch, [nms[i], keypointsCount - 1], [1, -1]);
      b.squeeze = tf13.squeeze(b.slice);
      b.landmarks = tf13.reshape(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [
+        // TODO fix this math
+        points[0] * scale2[0] - pad4[0],
+        points[1] * scale2[1] - pad4[1],
+        points[2] * scale2[0] - pad4[0],
+        points[3] * scale2[1] - pad4[1]
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]],
-        endPoint: [points[2], points[3]],
+        startPoint: [unpadded[0], unpadded[1]],
+        endPoint: [unpadded[2], unpadded[3]],
        landmarks: await b.landmarks.array(),
        confidence
      };
      b.anchor = tf13.slice(anchors, [nms[i], 0], [1, 2]);
      const anchor = await b.anchor.data();
      const scaledBox = scaleBoxCoordinates(rawBox, [(inputImage.shape[2] || 0) / inputSize4, (inputImage.shape[1] || 0) / inputSize4], anchor);
-      const enlargedBox = enlargeBox(scaledBox, ((_e = config3.face.detector) == null ? void 0 : _e.scale) || 1.4);
+      const enlargedBox = enlargeBox(scaledBox, ((_g = config3.face.detector) == null ? void 0 : _g.scale) || 1.4);
      const squaredBox = squarifyBox(enlargedBox);
-      if (squaredBox.size[0] > (((_f = config3.face.detector) == null ? void 0 : _f["minSize"]) || 0) && squaredBox.size[1] > (((_g = config3.face.detector) == null ? void 0 : _g["minSize"]) || 0)) boxes.push(squaredBox);
+      if (squaredBox.size[0] > (((_h = config3.face.detector) == null ? void 0 : _h["minSize"]) || 0) && squaredBox.size[1] > (((_i = config3.face.detector) == null ? void 0 : _i["minSize"]) || 0)) boxes.push(squaredBox);
      Object.keys(b).forEach((tensor6) => tf13.dispose(b[tensor6]));
    }
  }
@ -14704,6 +14722,7 @@ async function warmup(instance, userConfig) {
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await tf37.ready();
    await runCompile(instance);
    const res = await runInference(instance);
    const t1 = now();
--- a/samples/in/stock-models-6.jpg
+++ b/samples/in/stock-models-6.jpg
--- a/samples/in/stock-models-7.jpg
+++ b/samples/in/stock-models-7.jpg
--- a/src/config.ts
+++ b/src/config.ts
@ -39,6 +39,8 @@ export interface FaceDetectorConfig extends GenericConfig {
  iouThreshold: number,
  /** how much should face box be enlarged over the min/max facial coordinates */
  scale: number,
+  /** automatically pad image to square */
+  square: boolean,
  /** should child models perform on masked image of a face */
  mask: boolean,
  /** should face detection return processed and cropped face tensor that can with an external model for addtional processing?
--- a/src/face/angles.ts
+++ b/src/face/angles.ts
@ -78,7 +78,6 @@ export const calculateFaceAngle = (face: FaceResult, imageSize: [number, number]
    if (Number.isNaN(thetaZ)) thetaZ = 0;
    return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
  };
-
  /*
  const meshToEulerAngle = (mesh) => { // simple Euler angle calculation based existing 3D mesh
    const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
--- a/src/face/blazeface.ts
+++ b/src/face/blazeface.ts
@ -55,7 +55,18 @@ export async function getBoxes(inputImage: Tensor4D, config: Config): Promise<De
  // sanity check on input
  if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return [];
  const t: Record<string, Tensor> = {};
-  t.resized = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
+  let pad = [0, 0];
+  let scale = [1, 1];
+  if (config?.face?.detector?.square) {
+    const xy = Math.max(inputImage.shape[2], inputImage.shape[1]);
+    pad = [Math.floor((xy - inputImage.shape[2]) / 2), Math.floor((xy - inputImage.shape[1]) / 2)];
+    t.padded = tf.pad(inputImage, [[0, 0], [pad[1], pad[1]], [pad[0], pad[0]], [0, 0]]);
+    scale = [inputImage.shape[2] / xy, inputImage.shape[1] / xy];
+    pad = [pad[0] / inputSize, pad[1] / inputSize];
+  } else {
+    t.padded = inputImage;
+  }
+  t.resized = tf.image.resizeBilinear(t.padded as Tensor4D, [inputSize, inputSize]);
  t.div = tf.div(t.resized, constants.tf127);
  t.normalized = tf.sub(t.div, constants.tf1);
  const res = model?.execute(t.normalized) as Tensor[];
@ -81,7 +92,6 @@ export async function getBoxes(inputImage: Tensor4D, config: Config): Promise<De
  const scores = await t.scores.data();
  for (let i = 0; i < nms.length; i++) {
    const confidence = scores[nms[i]];
-
    if (confidence > (config.face.detector?.minConfidence || 0)) {
      const b: Record<string, Tensor> = {};
      b.bbox = tf.slice(t.boxes, [nms[i], 0], [1, -1]);
@ -89,9 +99,15 @@ export async function getBoxes(inputImage: Tensor4D, config: Config): Promise<De
      b.squeeze = tf.squeeze(b.slice);
      b.landmarks = tf.reshape(b.squeeze, [keypointsCount, -1]);
      const points = await b.bbox.data();
+      const unpadded = [ // TODO fix this math
+        points[0] * scale[0] - pad[0],
+        points[1] * scale[1] - pad[1],
+        points[2] * scale[0] - pad[0],
+        points[3] * scale[1] - pad[1],
+      ];
      const rawBox = {
-        startPoint: [points[0], points[1]] as Point,
-        endPoint: [points[2], points[3]] as Point,
+        startPoint: [unpadded[0], unpadded[1]] as Point,
+        endPoint: [unpadded[2], unpadded[3]] as Point,
        landmarks: (await b.landmarks.array()) as Point[],
        confidence,
      };
--- a/src/image/image.ts
+++ b/src/image/image.ts
@ -290,7 +290,6 @@ const checksum = async (input: Tensor): Promise<number> => { // use tf sum or js
 export async function skip(config: Partial<Config>, input: Tensor) {
  let skipFrame = false;
  if (config.cacheSensitivity === 0 || !input.shape || input.shape.length !== 4 || input.shape[1] > 3840 || input.shape[2] > 2160) return skipFrame; // cache disabled or input is invalid or too large for cache analysis
-
  /*
  const checkSum = await checksum(input);
  const diff = 100 * (Math.max(checkSum, last.inputSum) / Math.min(checkSum, last.inputSum) - 1);
--- a/src/warmup.ts
+++ b/src/warmup.ts
@ -166,6 +166,7 @@ export async function warmup(instance: Human, userConfig?: Partial<Config>): Pro
  }
  return new Promise(async (resolve) => {
    await instance.models.load();
+    await tf.ready();
    await runCompile(instance);
    const res = await runInference(instance);
    const t1 = now();
--- a/test/build.log
+++ b/test/build.log