diff --git a/TODO.md b/TODO.md
index 1fcda431..2794f56c 100644
--- a/TODO.md
+++ b/TODO.md
@@ -10,11 +10,6 @@
- Evaluate and switch default default model from `handdetect` to `handtrack`
-#### Body
-
-- Implement new variations of `BlazePose` models
-- Add virtual box frame caching to `MoveNet`
-
#### Face
- Reimplement `BlazeFace`, `FaceMesh`, `Iris` with new pipeline and frame caching
diff --git a/demo/index.js b/demo/index.js
index c17aba49..97fec5dc 100644
--- a/demo/index.js
+++ b/demo/index.js
@@ -67,6 +67,9 @@ const drawOptions = {
drawLabels: true,
drawPolygons: true,
drawPoints: false,
+ fillPolygons: false,
+ useCurves: false,
+ useDepth: true,
};
// ui options
@@ -105,7 +108,7 @@ const ui = {
lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image
- exceptionHandler: false, // should capture all unhandled exceptions
+ exceptionHandler: true, // should capture all unhandled exceptions
// webrtc
useWebRTC: false, // use webrtc as camera source instead of local webcam
@@ -684,13 +687,13 @@ function setupMenu() {
setupCamera();
});
menu.display.addHTML('
');
- menu.display.addBool('use depth', human.draw.options, 'useDepth');
- menu.display.addBool('use curves', human.draw.options, 'useCurves');
- menu.display.addBool('print labels', human.draw.options, 'drawLabels');
- menu.display.addBool('draw points', human.draw.options, 'drawPoints');
- menu.display.addBool('draw boxes', human.draw.options, 'drawBoxes');
- menu.display.addBool('draw polygons', human.draw.options, 'drawPolygons');
- menu.display.addBool('fill polygons', human.draw.options, 'fillPolygons');
+ menu.display.addBool('use depth', drawOptions, 'useDepth');
+ menu.display.addBool('use curves', drawOptions, 'useCurves');
+ menu.display.addBool('print labels', drawOptions, 'drawLabels');
+ menu.display.addBool('draw points', drawOptions, 'drawPoints');
+ menu.display.addBool('draw boxes', drawOptions, 'drawBoxes');
+ menu.display.addBool('draw polygons', drawOptions, 'drawPolygons');
+ menu.display.addBool('fill polygons', drawOptions, 'fillPolygons');
menu.image = new Menu(document.body, '', { top, left: x[1] });
menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val);
diff --git a/package.json b/package.json
index 51e24130..5063fb9d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "@vladmandic/human",
- "version": "2.2.3",
+ "version": "2.3.0",
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
"sideEffects": false,
"main": "dist/human.node.js",
diff --git a/src/posenet/poses.ts b/src/body/posenet.ts
similarity index 72%
rename from src/posenet/poses.ts
rename to src/body/posenet.ts
index 2b8c75a9..6ab241c9 100644
--- a/src/posenet/poses.ts
+++ b/src/body/posenet.ts
@@ -1,11 +1,19 @@
/**
* PoseNet body detection model implementation
- * See `posenet.ts` for entry point
+ *
+ * Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
*/
-import * as utils from './utils';
-import * as kpt from './keypoints';
-import type { Box } from '../result';
+import { log, join } from '../util/util';
+import * as tf from '../../dist/tfjs.esm.js';
+import type { BodyResult, Box } from '../result';
+import type { Tensor, GraphModel } from '../tfjs/types';
+import type { Config } from '../config';
+import { env } from '../util/env';
+import * as utils from './posenetutils';
+
+let model: GraphModel;
+const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
const localMaximumRadius = 1;
const outputStride = 16;
@@ -37,11 +45,11 @@ function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacemen
}
const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId);
- return { position: targetKeypoint, part: kpt.partNames[targetId], score };
+ return { position: targetKeypoint, part: utils.partNames[targetId], score };
}
export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) {
- const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
+ const tuples = utils.poseChain.map(([parentJoinName, childJoinName]) => ([utils.partIds[parentJoinName], utils.partIds[childJoinName]]));
const edgesFwd = tuples.map(([, childJointId]) => childJointId);
const edgesBwd = tuples.map(([parentJointId]) => parentJointId);
const numParts = scores.shape[2]; // [21,21,17]
@@ -51,7 +59,7 @@ export function decodePose(root, scores, offsets, displacementsFwd, displacement
const rootPoint = utils.getImageCoords(root.part, outputStride, offsets);
keypoints[root.part.id] = {
score: root.score,
- part: kpt.partNames[root.part.id],
+ part: utils.partNames[root.part.id],
position: rootPoint,
};
// Decode the part positions upwards in the tree, following the backward displacements.
@@ -146,3 +154,32 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD
}
return poses;
}
+
+export async function predict(input: Tensor, config: Config): Promise {
+ const res = tf.tidy(() => {
+ if (!model.inputs[0].shape) return [];
+ const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
+ const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
+ const results: Array = model.execute(normalized, poseNetOutputs) as Array;
+ const results3d = results.map((y) => tf.squeeze(y, [0]));
+ results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
+ return results3d;
+ });
+
+ const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
+ for (const t of res) tf.dispose(t);
+
+ const decoded = await decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
+ if (!model.inputs[0].shape) return [];
+ const scaled = utils.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
+ return scaled;
+}
+
+export async function load(config: Config): Promise {
+ if (!model || env.initial) {
+ model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
+ if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
+ else if (config.debug) log('load model:', model['modelUrl']);
+ } else if (config.debug) log('cached model:', model['modelUrl']);
+ return model;
+}
diff --git a/src/posenet/utils.ts b/src/body/posenetutils.ts
similarity index 75%
rename from src/posenet/utils.ts
rename to src/body/posenetutils.ts
index a495a17e..1d963692 100644
--- a/src/posenet/utils.ts
+++ b/src/body/posenetutils.ts
@@ -3,15 +3,48 @@
* See `posenet.ts` for entry point
*/
-import * as kpt from './keypoints';
import type { BodyResult } from '../result';
+export const partNames = [
+ 'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
+ 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
+ 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
+];
+
+export const count = partNames.length; // 17 keypoints
+
+export const partIds = partNames.reduce((result, jointName, i) => {
+ result[jointName] = i;
+ return result;
+}, {});
+
+const connectedPartNames = [
+ ['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
+ ['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
+ ['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
+ ['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
+ ['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
+ ['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
+];
+export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
+
+export const poseChain = [
+ ['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
+ ['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
+ ['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
+ ['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
+ ['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
+ ['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
+ ['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
+ ['rightKnee', 'rightAnkle'],
+];
+
export function eitherPointDoesntMeetConfidence(a: number, b: number, minConfidence: number) {
return (a < minConfidence || b < minConfidence);
}
export function getAdjacentKeyPoints(keypoints, minConfidence: number) {
- return kpt.connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
+ return connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
if (eitherPointDoesntMeetConfidence(keypoints[leftJoint].score, keypoints[rightJoint].score, minConfidence)) {
return result;
}
@@ -123,7 +156,7 @@ export class MaxHeap {
export function getOffsetPoint(y, x, keypoint, offsets) {
return {
y: offsets.get(y, x, keypoint),
- x: offsets.get(y, x, keypoint + kpt.count),
+ x: offsets.get(y, x, keypoint + count),
};
}
diff --git a/src/face/angles.ts b/src/face/angles.ts
new file mode 100644
index 00000000..9e23f5cb
--- /dev/null
+++ b/src/face/angles.ts
@@ -0,0 +1,133 @@
+// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
+const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
+
+const calculateGaze = (face): { bearing: number, strength: number } => {
+ const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
+ if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
+
+ const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
+ const eyeRatio = 1; // factor to normalize changes x vs y
+
+ const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
+ const irisCenter = left ? face.mesh[473] : face.mesh[468];
+ const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
+ ? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
+ : [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
+ const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
+ ? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
+ : [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
+
+ const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
+ (eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
+ eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
+ ];
+ let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
+ strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
+ const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
+
+ return { bearing, strength };
+};
+
+export const calculateFaceAngle = (face, imageSize): {
+ angle: { pitch: number, yaw: number, roll: number },
+ matrix: [number, number, number, number, number, number, number, number, number],
+ gaze: { bearing: number, strength: number },
+} => {
+ // const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
+ const normalize = (v) => { // normalize vector
+ const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
+ v[0] /= length;
+ v[1] /= length;
+ v[2] /= length;
+ return v;
+ };
+ const subVectors = (a, b) => { // vector subtraction (a - b)
+ const x = a[0] - b[0];
+ const y = a[1] - b[1];
+ const z = a[2] - b[2];
+ return [x, y, z];
+ };
+ const crossVectors = (a, b) => { // vector cross product (a x b)
+ const x = a[1] * b[2] - a[2] * b[1];
+ const y = a[2] * b[0] - a[0] * b[2];
+ const z = a[0] * b[1] - a[1] * b[0];
+ return [x, y, z];
+ };
+ // 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
+ const rotationMatrixToEulerAngle = (r) => {
+ // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
+ const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
+ let thetaX: number;
+ let thetaY: number;
+ let thetaZ: number;
+ if (r10 < 1) { // YZX calculation
+ if (r10 > -1) {
+ thetaZ = Math.asin(r10);
+ thetaY = Math.atan2(-r20, r00);
+ thetaX = Math.atan2(-r12, r11);
+ } else {
+ thetaZ = -Math.PI / 2;
+ thetaY = -Math.atan2(r21, r22);
+ thetaX = 0;
+ }
+ } else {
+ thetaZ = Math.PI / 2;
+ thetaY = Math.atan2(r21, r22);
+ thetaX = 0;
+ }
+ if (isNaN(thetaX)) thetaX = 0;
+ if (isNaN(thetaY)) thetaY = 0;
+ if (isNaN(thetaZ)) thetaZ = 0;
+ return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
+ };
+ // simple Euler angle calculation based existing 3D mesh
+ // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
+ const meshToEulerAngle = (mesh) => {
+ const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
+ // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
+ const angle = {
+ // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
+ // pitch is face move up/down
+ pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
+ // yaw is face turn left/right
+ yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
+ // roll is face lean left/right
+ roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
+ };
+ return angle;
+ };
+
+ // initialize gaze and mesh
+ const mesh = face.meshRaw;
+ if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
+
+ const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
+ // top, bottom, left, right
+ const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
+ // make the xyz coordinates proportional, independent of the image/box size
+ pt[0] * imageSize[0] / size,
+ pt[1] * imageSize[1] / size,
+ pt[2],
+ ]);
+
+ const y_axis = normalize(subVectors(pts[1], pts[0]));
+ let x_axis = normalize(subVectors(pts[3], pts[2]));
+ const z_axis = normalize(crossVectors(x_axis, y_axis));
+ // adjust x_axis to make sure that all axes are perpendicular to each other
+ x_axis = crossVectors(y_axis, z_axis);
+
+ // Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
+ // 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
+ const matrix: [number, number, number, number, number, number, number, number, number] = [
+ x_axis[0], x_axis[1], x_axis[2],
+ y_axis[0], y_axis[1], y_axis[2],
+ z_axis[0], z_axis[1], z_axis[2],
+ ];
+ const angle = rotationMatrixToEulerAngle(matrix);
+ // const angle = meshToEulerAngle(mesh);
+
+ // we have iris keypoints so we can calculate gaze direction
+ const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
+
+ return { angle, matrix, gaze };
+};
diff --git a/src/face/blazeface.ts b/src/face/blazeface.ts
new file mode 100644
index 00000000..afbde8ff
--- /dev/null
+++ b/src/face/blazeface.ts
@@ -0,0 +1,96 @@
+/**
+ * BlazeFace, FaceMesh & Iris model implementation
+ * See `facemesh.ts` for entry point
+ */
+
+import { log, join } from '../util/util';
+import * as tf from '../../dist/tfjs.esm.js';
+import * as util from './facemeshutil';
+import type { Config } from '../config';
+import type { Tensor, GraphModel } from '../tfjs/types';
+import { env } from '../util/env';
+
+const keypointsCount = 6;
+let model: GraphModel | null;
+let anchorsData: [number, number][] = [];
+let anchors: Tensor | null = null;
+let inputSize = 0;
+
+// export const size = () => (model && model.inputs[0].shape ? model.inputs[0].shape[2] : 0);
+export const size = () => inputSize;
+
+export async function load(config: Config): Promise {
+ if (env.initial) model = null;
+ if (!model) {
+ model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || '')) as unknown as GraphModel;
+ if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
+ else if (config.debug) log('load model:', model['modelUrl']);
+ } else if (config.debug) log('cached model:', model['modelUrl']);
+ inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
+ if (inputSize === -1) inputSize = 64;
+ anchorsData = util.generateAnchors(inputSize);
+ anchors = tf.tensor2d(anchorsData);
+ return model;
+}
+
+function decodeBounds(boxOutputs) {
+ const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
+ const centers = tf.add(boxStarts, anchors);
+ const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
+ const boxSizesNormalized = tf.div(boxSizes, inputSize);
+ const centersNormalized = tf.div(centers, inputSize);
+ const halfBoxSize = tf.div(boxSizesNormalized, 2);
+ const starts = tf.sub(centersNormalized, halfBoxSize);
+ const ends = tf.add(centersNormalized, halfBoxSize);
+ const startNormalized = tf.mul(starts, inputSize);
+ const endNormalized = tf.mul(ends, inputSize);
+ const concatAxis = 1;
+ return tf.concat2d([startNormalized, endNormalized], concatAxis);
+}
+
+export async function getBoxes(inputImage: Tensor, config: Config) {
+ // sanity check on input
+ if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
+ const [batch, boxes, scores] = tf.tidy(() => {
+ const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
+ const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
+ const res = model?.execute(normalizedImage);
+ let batchOut;
+ if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
+ const sorted = res.sort((a, b) => a.size - b.size);
+ const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
+ const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
+ const concat = tf.concat([concat512, concat384], 1);
+ batchOut = tf.squeeze(concat, 0);
+ } else {
+ batchOut = tf.squeeze(res); // when using tfhub model
+ }
+ const boxesOut = decodeBounds(batchOut);
+ const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
+ const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
+ return [batchOut, boxesOut, scoresOut];
+ });
+
+ const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
+ const nms = await nmsTensor.array();
+ tf.dispose(nmsTensor);
+ const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
+ const scoresData = await scores.data();
+ for (let i = 0; i < nms.length; i++) {
+ const confidence = scoresData[nms[i]];
+ if (confidence > (config.face.detector?.minConfidence || 0)) {
+ const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
+ const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
+ annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence });
+ tf.dispose(boundingBox);
+ }
+ }
+ tf.dispose(batch);
+ tf.dispose(boxes);
+ tf.dispose(scores);
+
+ return {
+ boxes: annotatedBoxes,
+ scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize],
+ };
+}
diff --git a/src/face/face.ts b/src/face/face.ts
index 4745abc2..4a17b1a2 100644
--- a/src/face/face.ts
+++ b/src/face/face.ts
@@ -5,145 +5,12 @@
import { log, now } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
-import * as facemesh from '../blazeface/facemesh';
+import * as facemesh from './facemesh';
import * as emotion from '../gear/emotion';
import * as faceres from './faceres';
import type { FaceResult } from '../result';
import type { Tensor } from '../tfjs/types';
-
-// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
-const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
-
-const calculateGaze = (face): { bearing: number, strength: number } => {
- const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
- if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
-
- const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
- const eyeRatio = 1; // factor to normalize changes x vs y
-
- const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
- const irisCenter = left ? face.mesh[473] : face.mesh[468];
- const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
- ? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
- : [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
- const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
- ? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
- : [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
-
- const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
- (eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
- eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
- ];
- let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
- strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
- const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
-
- return { bearing, strength };
-};
-
-const calculateFaceAngle = (face, imageSize): {
- angle: { pitch: number, yaw: number, roll: number },
- matrix: [number, number, number, number, number, number, number, number, number],
- gaze: { bearing: number, strength: number },
-} => {
- // const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
- const normalize = (v) => { // normalize vector
- const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
- v[0] /= length;
- v[1] /= length;
- v[2] /= length;
- return v;
- };
- const subVectors = (a, b) => { // vector subtraction (a - b)
- const x = a[0] - b[0];
- const y = a[1] - b[1];
- const z = a[2] - b[2];
- return [x, y, z];
- };
- const crossVectors = (a, b) => { // vector cross product (a x b)
- const x = a[1] * b[2] - a[2] * b[1];
- const y = a[2] * b[0] - a[0] * b[2];
- const z = a[0] * b[1] - a[1] * b[0];
- return [x, y, z];
- };
- // 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
- const rotationMatrixToEulerAngle = (r) => {
- // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
- const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
- let thetaX: number;
- let thetaY: number;
- let thetaZ: number;
- if (r10 < 1) { // YZX calculation
- if (r10 > -1) {
- thetaZ = Math.asin(r10);
- thetaY = Math.atan2(-r20, r00);
- thetaX = Math.atan2(-r12, r11);
- } else {
- thetaZ = -Math.PI / 2;
- thetaY = -Math.atan2(r21, r22);
- thetaX = 0;
- }
- } else {
- thetaZ = Math.PI / 2;
- thetaY = Math.atan2(r21, r22);
- thetaX = 0;
- }
- if (isNaN(thetaX)) thetaX = 0;
- if (isNaN(thetaY)) thetaY = 0;
- if (isNaN(thetaZ)) thetaZ = 0;
- return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
- };
- // simple Euler angle calculation based existing 3D mesh
- // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
- const meshToEulerAngle = (mesh) => {
- const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
- // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
- const angle = {
- // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
- // pitch is face move up/down
- pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
- // yaw is face turn left/right
- yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
- // roll is face lean left/right
- roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
- };
- return angle;
- };
-
- // initialize gaze and mesh
- const mesh = face.meshRaw;
- if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
-
- const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
- // top, bottom, left, right
- const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
- // make the xyz coordinates proportional, independent of the image/box size
- pt[0] * imageSize[0] / size,
- pt[1] * imageSize[1] / size,
- pt[2],
- ]);
-
- const y_axis = normalize(subVectors(pts[1], pts[0]));
- let x_axis = normalize(subVectors(pts[3], pts[2]));
- const z_axis = normalize(crossVectors(x_axis, y_axis));
- // adjust x_axis to make sure that all axes are perpendicular to each other
- x_axis = crossVectors(y_axis, z_axis);
-
- // Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
- // 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
- const matrix: [number, number, number, number, number, number, number, number, number] = [
- x_axis[0], x_axis[1], x_axis[2],
- y_axis[0], y_axis[1], y_axis[2],
- z_axis[0], z_axis[1], z_axis[2],
- ];
- const angle = rotationMatrixToEulerAngle(matrix);
- // const angle = meshToEulerAngle(mesh);
-
- // we have iris keypoints so we can calculate gaze direction
- const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
-
- return { angle, matrix, gaze };
-};
+import { calculateFaceAngle } from './angles';
export const detectFace = async (parent /* instance of human */, input: Tensor): Promise => {
// run facemesh, includes blazeface and iris
@@ -158,6 +25,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
const faceRes: Array = [];
parent.state = 'run:face';
timeStamp = now();
+
const faces = await facemesh.predict(input, parent.config);
parent.performance.face = Math.trunc(now() - timeStamp);
if (!input.shape || input.shape.length !== 4) return [];
@@ -226,7 +94,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
delete faces[i].annotations.leftEyeIris;
delete faces[i].annotations.rightEyeIris;
}
- const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.rightEyeIris
+ const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.leftEyeIris[0] && faces[i].annotations.rightEyeIris && faces[i].annotations.rightEyeIris[0]
&& (faces[i].annotations.leftEyeIris.length > 0) && (faces[i].annotations.rightEyeIris.length > 0)
&& (faces[i].annotations.leftEyeIris[0] !== null) && (faces[i].annotations.rightEyeIris[0] !== null))
? Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1])) / input.shape[2]
diff --git a/src/face/facemesh.ts b/src/face/facemesh.ts
new file mode 100644
index 00000000..256a9ce6
--- /dev/null
+++ b/src/face/facemesh.ts
@@ -0,0 +1,139 @@
+/**
+ * BlazeFace, FaceMesh & Iris model implementation
+ *
+ * Based on:
+ * - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
+ * - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
+ * - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
+ */
+
+import { log, join } from '../util/util';
+import * as tf from '../../dist/tfjs.esm.js';
+import * as blazeface from './blazeface';
+import * as util from './facemeshutil';
+import * as coords from './facemeshcoords';
+import * as iris from './iris';
+import type { GraphModel, Tensor } from '../tfjs/types';
+import type { FaceResult, Point } from '../result';
+import type { Config } from '../config';
+import { env } from '../util/env';
+
+type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array, confidence: number, faceConfidence?: number | undefined };
+let boxCache: Array = [];
+let model: GraphModel | null = null;
+let inputSize = 0;
+let skipped = Number.MAX_SAFE_INTEGER;
+let detectedFaces = 0;
+
+export async function predict(input: Tensor, config: Config): Promise {
+ if (!config.skipFrame || (((detectedFaces !== config.face.detector?.maxDetected) || !config.face.mesh?.enabled)) && (skipped > (config.face.detector?.skipFrames || 0))) { // reset cached boxes
+ const newBoxes = await blazeface.getBoxes(input, config); // get results from blazeface detector
+ boxCache = []; // empty cache
+ for (const possible of newBoxes.boxes) { // extract data from detector
+ const startPoint = await possible.box.startPoint.data() as unknown as Point;
+ const endPoint = await possible.box.endPoint.data() as unknown as Point;
+ const landmarks = await possible.landmarks.array() as Array;
+ boxCache.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
+ }
+ newBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks]));
+ for (let i = 0; i < boxCache.length; i++) { // enlarge and squarify detected boxes
+ const scaledBox = util.scaleBoxCoordinates({ startPoint: boxCache[i].startPoint, endPoint: boxCache[i].endPoint }, newBoxes.scaleFactor);
+ const enlargedBox = util.enlargeBox(scaledBox);
+ const squarifiedBox = util.squarifyBox(enlargedBox);
+ boxCache[i] = { ...squarifiedBox, confidence: boxCache[i].confidence, landmarks: boxCache[i].landmarks };
+ }
+ skipped = 0;
+ } else {
+ skipped++;
+ }
+
+ const faces: Array = [];
+ const newBoxes: Array = [];
+ let id = 0;
+ for (let box of boxCache) {
+ let angle = 0;
+ let rotationMatrix;
+ const face: FaceResult = {
+ id: id++,
+ mesh: [],
+ meshRaw: [],
+ box: [0, 0, 0, 0],
+ boxRaw: [0, 0, 0, 0],
+ score: 0,
+ boxScore: 0,
+ faceScore: 0,
+ annotations: {},
+ };
+
+ if (config.face.detector?.rotation && config.face.mesh?.enabled && env.kernels.includes('rotatewithoffset')) {
+ [angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
+ } else {
+ rotationMatrix = util.IDENTITY_MATRIX;
+ const cut = util.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, input, config.face.mesh?.enabled ? [inputSize, inputSize] : [blazeface.size(), blazeface.size()]);
+ face.tensor = tf.div(cut, 255);
+ tf.dispose(cut);
+ }
+ face.boxScore = Math.round(100 * box.confidence) / 100;
+ if (!config.face.mesh?.enabled) { // mesh not enabled, return resuts from detector only
+ face.box = util.getClampedBox(box, input);
+ face.boxRaw = util.getRawBox(box, input);
+ face.score = Math.round(100 * box.confidence || 0) / 100;
+ face.mesh = box.landmarks.map((pt) => [
+ ((box.startPoint[0] + box.endPoint[0])) / 2 + ((box.endPoint[0] + box.startPoint[0]) * pt[0] / blazeface.size()),
+ ((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()),
+ ]);
+ face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
+ for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
+ } else if (!model) { // mesh enabled, but not loaded
+ if (config.debug) log('face mesh detection requested, but model is not loaded');
+ } else { // mesh enabled
+ const [contours, confidence, contourCoords] = model.execute(face.tensor as Tensor) as Array; // first returned tensor represents facial contours which are already included in the coordinates.
+ tf.dispose(contours);
+ const faceConfidence = (await confidence.data())[0] as number;
+ tf.dispose(confidence);
+ const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
+ let rawCoords = await coordsReshaped.array();
+ tf.dispose(contourCoords);
+ tf.dispose(coordsReshaped);
+ if (faceConfidence < (config.face.detector?.minConfidence || 1)) {
+ box.confidence = faceConfidence; // reset confidence of cached box
+ tf.dispose(face.tensor);
+ } else {
+ if (config.face.iris?.enabled) rawCoords = await iris.augmentIris(rawCoords, face.tensor, config, inputSize); // augment results with iris
+ face.mesh = util.transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize); // get processed mesh
+ face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
+ box = { ...util.enlargeBox(util.calculateLandmarksBoundingBox(face.mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
+ for (const key of Object.keys(coords.meshAnnotations)) face.annotations[key] = coords.meshAnnotations[key].map((index) => face.mesh[index]); // add annotations
+ if (config.face.detector?.rotation && config.face.mesh.enabled && config.face.description?.enabled && env.kernels.includes('rotatewithoffset')) { // do rotation one more time with mesh keypoints if we want to return perfect image
+ tf.dispose(face.tensor); // dispose so we can overwrite original face
+ [angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
+ }
+ face.box = util.getClampedBox(box, input); // update detected box with box around the face mesh
+ face.boxRaw = util.getRawBox(box, input);
+ face.score = Math.round(100 * faceConfidence || 100 * box.confidence || 0) / 100;
+ face.faceScore = Math.round(100 * faceConfidence) / 100;
+ box = { ...util.squarifyBox(box), confidence: box.confidence, faceConfidence }; // updated stored cache values
+ }
+ }
+ faces.push(face);
+ newBoxes.push(box);
+ }
+ if (config.face.mesh?.enabled) boxCache = newBoxes.filter((a) => a.confidence > (config.face.detector?.minConfidence || 0)); // remove cache entries for detected boxes on low confidence
+ detectedFaces = faces.length;
+ return faces;
+}
+
+export async function load(config: Config): Promise {
+ if (env.initial) model = null;
+ if (!model) {
+ model = await tf.loadGraphModel(join(config.modelBasePath, config.face.mesh?.modelPath || '')) as unknown as GraphModel;
+ if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
+ else if (config.debug) log('load model:', model['modelUrl']);
+ } else if (config.debug) log('cached model:', model['modelUrl']);
+ inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
+ if (inputSize === -1) inputSize = 64;
+ return model;
+}
+
+export const triangulation = coords.TRI468;
+export const uvmap = coords.UV468;
diff --git a/src/blazeface/coords.ts b/src/face/facemeshcoords.ts
similarity index 99%
rename from src/blazeface/coords.ts
rename to src/face/facemeshcoords.ts
index 6f420fd9..c06dd6f1 100644
--- a/src/blazeface/coords.ts
+++ b/src/face/facemeshcoords.ts
@@ -3,7 +3,7 @@
* See `facemesh.ts` for entry point
*/
-export const MESH_ANNOTATIONS = {
+export const meshAnnotations = {
silhouette: [
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
@@ -42,6 +42,22 @@ export const MESH_ANNOTATIONS = {
leftCheek: [425],
};
+export const meshLandmarks = {
+ count: 468,
+ mouth: 13,
+ symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
+};
+
+export const blazeFaceLandmarks = {
+ leftEye: 0,
+ rightEye: 1,
+ nose: 2,
+ mouth: 3,
+ leftEar: 4,
+ rightEar: 5,
+ symmetryLine: [3, 2],
+};
+
export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
diff --git a/src/face/facemeshutil.ts b/src/face/facemeshutil.ts
new file mode 100644
index 00000000..aa5cdc68
--- /dev/null
+++ b/src/face/facemeshutil.ts
@@ -0,0 +1,166 @@
+/**
+ * BlazeFace, FaceMesh & Iris model implementation
+ * See `facemesh.ts` for entry point
+ */
+
+import * as tf from '../../dist/tfjs.esm.js';
+import * as coords from './facemeshcoords';
+import type { Box, Point } from '../result';
+
+export const createBox = (startEndTensor) => ({ startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]), endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]) });
+
+export const disposeBox = (t) => tf.dispose([t.startPoint, t.endPoint]);
+
+export const getBoxSize = (box): [number, number] => [Math.abs(box.endPoint[0] - box.startPoint[0]), Math.abs(box.endPoint[1] - box.startPoint[1])];
+
+export const getBoxCenter = (box): [number, number] => [box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2];
+
+export const getClampedBox = (box, input): Box => (box ? [
+ Math.trunc(Math.max(0, box.startPoint[0])),
+ Math.trunc(Math.max(0, box.startPoint[1])),
+ Math.trunc(Math.min((input.shape[2] || 0), box.endPoint[0]) - Math.max(0, box.startPoint[0])),
+ Math.trunc(Math.min((input.shape[1] || 0), box.endPoint[1]) - Math.max(0, box.startPoint[1])),
+] : [0, 0, 0, 0]);
+
+export const getRawBox = (box, input): Box => (box ? [
+ box.startPoint[0] / (input.shape[2] || 0),
+ box.startPoint[1] / (input.shape[1] || 0),
+ (box.endPoint[0] - box.startPoint[0]) / (input.shape[2] || 0),
+ (box.endPoint[1] - box.startPoint[1]) / (input.shape[1] || 0),
+] : [0, 0, 0, 0]);
+
+export const scaleBoxCoordinates = (box, factor) => {
+ const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
+ const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
+ return { startPoint, endPoint };
+};
+
+export const cutBoxFromImageAndResize = (box, image, cropSize) => {
+ const h = image.shape[1];
+ const w = image.shape[2];
+ return tf.image.cropAndResize(image, [[box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, box.endPoint[0] / w]], [0], cropSize);
+};
+
+export const enlargeBox = (box, factor = 1.5) => {
+ const center = getBoxCenter(box);
+ const size = getBoxSize(box);
+ const halfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
+ return { startPoint: [center[0] - halfSize[0], center[1] - halfSize[1]] as Point, endPoint: [center[0] + halfSize[0], center[1] + halfSize[1]] as Point, landmarks: box.landmarks };
+};
+
+export const squarifyBox = (box) => {
+ const centers = getBoxCenter(box);
+ const size = getBoxSize(box);
+ const halfSize = Math.max(...size) / 2;
+ return { startPoint: [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)] as Point, endPoint: [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)] as Point, landmarks: box.landmarks };
+};
+
+export const calculateLandmarksBoundingBox = (landmarks) => {
+ const xs = landmarks.map((d) => d[0]);
+ const ys = landmarks.map((d) => d[1]);
+ return { startPoint: [Math.min(...xs), Math.min(...ys)], endPoint: [Math.max(...xs), Math.max(...ys)], landmarks };
+};
+
+export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
+
+export const normalizeRadians = (angle) => angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
+
+export const computeRotation = (point1, point2) => normalizeRadians(Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]));
+
+export const radToDegrees = (rad) => rad * 180 / Math.PI;
+
+export const buildTranslationMatrix = (x, y) => [[1, 0, x], [0, 1, y], [0, 0, 1]];
+
+export const dot = (v1, v2) => {
+ let product = 0;
+ for (let i = 0; i < v1.length; i++) product += v1[i] * v2[i];
+ return product;
+};
+
+export const getColumnFrom2DArr = (arr, columnIndex) => {
+ const column: Array = [];
+ for (let i = 0; i < arr.length; i++) column.push(arr[i][columnIndex]);
+ return column;
+};
+
+export const multiplyTransformMatrices = (mat1, mat2) => {
+ const product: Array = [];
+ const size = mat1.length;
+ for (let row = 0; row < size; row++) {
+ product.push([]);
+ for (let col = 0; col < size; col++) product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
+ }
+ return product;
+};
+
+export const buildRotationMatrix = (rotation, center) => {
+ const cosA = Math.cos(rotation);
+ const sinA = Math.sin(rotation);
+ const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
+ const translationMatrix = buildTranslationMatrix(center[0], center[1]);
+ const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
+ const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
+ return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
+};
+
+export const invertTransformMatrix = (matrix) => {
+ const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
+ const translationComponent = [matrix[0][2], matrix[1][2]];
+ const invertedTranslation = [-dot(rotationComponent[0], translationComponent), -dot(rotationComponent[1], translationComponent)];
+ return [rotationComponent[0].concat(invertedTranslation[0]), rotationComponent[1].concat(invertedTranslation[1]), [0, 0, 1]];
+};
+
+export const rotatePoint = (homogeneousCoordinate, rotationMatrix) => [dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[1])];
+
+export const xyDistanceBetweenPoints = (a, b) => Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
+
+export function generateAnchors(inputSize) {
+ const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
+ const anchors: Array<[number, number]> = [];
+ for (let i = 0; i < spec.strides.length; i++) {
+ const stride = spec.strides[i];
+ const gridRows = Math.floor((inputSize + stride - 1) / stride);
+ const gridCols = Math.floor((inputSize + stride - 1) / stride);
+ const anchorsNum = spec.anchors[i];
+ for (let gridY = 0; gridY < gridRows; gridY++) {
+ const anchorY = stride * (gridY + 0.5);
+ for (let gridX = 0; gridX < gridCols; gridX++) {
+ const anchorX = stride * (gridX + 0.5);
+ for (let n = 0; n < anchorsNum; n++) anchors.push([anchorX, anchorY]);
+ }
+ }
+ }
+ return anchors;
+}
+
+export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize) {
+ const boxSize = getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
+ const coordsScaled = rawCoords.map((coord) => ([
+ boxSize[0] / inputSize * (coord[0] - inputSize / 2),
+ boxSize[1] / inputSize * (coord[1] - inputSize / 2),
+ coord[2] || 0,
+ ]));
+ const coordsRotationMatrix = (angle !== 0) ? buildRotationMatrix(angle, [0, 0]) : IDENTITY_MATRIX;
+ const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
+ const inverseRotationMatrix = (angle !== 0) ? invertTransformMatrix(rotationMatrix) : IDENTITY_MATRIX;
+ const boxCenter = [...getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
+ return coordsRotated.map((coord) => ([
+ Math.round(coord[0] + dot(boxCenter, inverseRotationMatrix[0])),
+ Math.round(coord[1] + dot(boxCenter, inverseRotationMatrix[1])),
+ Math.round(coord[2] || 0),
+ ]));
+}
+
+export function correctFaceRotation(box, input, inputSize) {
+ const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
+ const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
+ const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
+ const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
+ const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
+ const rotationMatrix = buildRotationMatrix(-angle, faceCenter);
+ const cut = cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [inputSize, inputSize]);
+ const face = tf.div(cut, 255);
+ tf.dispose(cut);
+ tf.dispose(rotated);
+ return [angle, rotationMatrix, face];
+}
diff --git a/src/face/iris.ts b/src/face/iris.ts
new file mode 100644
index 00000000..7e5e3179
--- /dev/null
+++ b/src/face/iris.ts
@@ -0,0 +1,150 @@
+import * as coords from './facemeshcoords';
+import * as util from './facemeshutil';
+import * as tf from '../../dist/tfjs.esm.js';
+import type { Tensor, GraphModel } from '../tfjs/types';
+import { env } from '../util/env';
+import { log, join } from '../util/util';
+import type { Config } from '../config';
+import type { Point } from '../result';
+
+let model: GraphModel | null;
+let inputSize = 0;
+
+const irisEnlarge = 2.3;
+
+const leftOutline = coords.meshAnnotations['leftEyeLower0'];
+const rightOutline = coords.meshAnnotations['rightEyeLower0'];
+
+const eyeLandmarks = {
+ leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
+ rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
+};
+
+const irisLandmarks = {
+ upperCenter: 3,
+ lowerCenter: 4,
+ index: 71,
+ numCoordinates: 76,
+};
+
+export async function load(config: Config): Promise {
+ if (env.initial) model = null;
+ if (!model) {
+ model = await tf.loadGraphModel(join(config.modelBasePath, config.face.iris?.modelPath || '')) as unknown as GraphModel;
+ if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
+ else if (config.debug) log('load model:', model['modelUrl']);
+ } else if (config.debug) log('cached model:', model['modelUrl']);
+ inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
+ if (inputSize === -1) inputSize = 64;
+ return model;
+}
+
+// Replace the raw coordinates returned by facemesh with refined iris model coordinates
+// Update the z coordinate to be an average of the original and the new.
+function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
+ for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
+ const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
+ const originalIndices = coords.meshAnnotations[`${prefix}${key}`];
+ if (!keys || keys.includes(key)) {
+ for (let j = 0; j < indices.length; j++) {
+ const index = indices[j];
+ rawCoords[originalIndices[j]] = [
+ newCoords[index][0], newCoords[index][1],
+ (newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
+ ];
+ }
+ }
+ }
+}
+
+// eslint-disable-next-line class-methods-use-this
+export const getLeftToRightEyeDepthDifference = (rawCoords) => {
+ const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
+ const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
+ return leftEyeZ - rightEyeZ;
+};
+
+// Returns a box describing a cropped region around the eye fit for passing to the iris model.
+export const getEyeBox = (rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false, meshSize) => {
+ const box = util.squarifyBox(util.enlargeBox(util.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), irisEnlarge));
+ const boxSize = util.getBoxSize(box);
+ let crop = tf.image.cropAndResize(face, [[
+ box.startPoint[1] / meshSize,
+ box.startPoint[0] / meshSize, box.endPoint[1] / meshSize,
+ box.endPoint[0] / meshSize,
+ ]], [0], [inputSize, inputSize]);
+ if (flip && env.kernels.includes('flipleftright')) {
+ const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
+ tf.dispose(crop);
+ crop = flipped;
+ }
+ return { box, boxSize, crop };
+};
+
+// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
+export const getEyeCoords = (eyeData, eyeBox, eyeBoxSize, flip = false) => {
+ const eyeRawCoords: Array = [];
+ for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
+ const x = eyeData[i * 3];
+ const y = eyeData[i * 3 + 1];
+ const z = eyeData[i * 3 + 2];
+ eyeRawCoords.push([
+ (flip ? (1 - (x / inputSize)) : (x / inputSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
+ (y / inputSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
+ ]);
+ }
+ return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
+};
+
+// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
+// eslint-disable-next-line class-methods-use-this
+export const getAdjustedIrisCoords = (rawCoords, irisCoords, direction) => {
+ const upperCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
+ const lowerCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
+ const averageZ = (upperCenterZ + lowerCenterZ) / 2;
+ // Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
+ return irisCoords.map((coord, i) => {
+ let z = averageZ;
+ if (i === 2) {
+ z = upperCenterZ;
+ } else if (i === 4) {
+ z = lowerCenterZ;
+ }
+ return [coord[0], coord[1], z];
+ });
+};
+
+export async function augmentIris(rawCoords, face, config, meshSize) {
+ if (!model) {
+ if (config.debug) log('face mesh iris detection requested, but model is not loaded');
+ return rawCoords;
+ }
+ const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true, meshSize);
+ const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1], true, meshSize);
+ const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
+ tf.dispose(leftEyeCrop);
+ tf.dispose(rightEyeCrop);
+ const eyePredictions = model.predict(combined) as Tensor;
+ tf.dispose(combined);
+ const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
+ tf.dispose(eyePredictions);
+ const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
+ const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
+ const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
+ const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
+ const leftToRightEyeDepthDifference = getLeftToRightEyeDepthDifference(rawCoords);
+ if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
+ replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
+ replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
+ // If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
+ // So we only update a single contour line above and below the eye.
+ } else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
+ replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
+ } else { // User is looking towards the left.
+ replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
+ }
+ const adjustedLeftIrisCoords = getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
+ const adjustedRightIrisCoords = getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
+ const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
+ return newCoords;
+}
diff --git a/src/gesture/gesture.ts b/src/gesture/gesture.ts
index 1609ea38..a5a1a586 100644
--- a/src/gesture/gesture.ts
+++ b/src/gesture/gesture.ts
@@ -3,7 +3,7 @@
*/
import type { GestureResult } from '../result';
-import * as fingerPose from '../fingerpose/fingerpose';
+import * as fingerPose from '../hand/fingerpose';
/**
* @typedef FaceGesture
@@ -63,7 +63,7 @@ export const face = (res): GestureResult[] => {
if (!res) return [];
const gestures: Array<{ face: number, gesture: FaceGesture }> = [];
for (let i = 0; i < res.length; i++) {
- if (res[i].mesh && res[i].mesh.length > 0) {
+ if (res[i].mesh && res[i].mesh.length > 450) {
const eyeFacing = res[i].mesh[33][2] - res[i].mesh[263][2];
if (Math.abs(eyeFacing) < 10) gestures.push({ face: i, gesture: 'facing center' });
else gestures.push({ face: i, gesture: `facing ${eyeFacing < 0 ? 'left' : 'right'}` });
@@ -84,7 +84,7 @@ export const iris = (res): GestureResult[] => {
if (!res) return [];
const gestures: Array<{ iris: number, gesture: IrisGesture }> = [];
for (let i = 0; i < res.length; i++) {
- if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.rightEyeIris) continue;
+ if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.leftEyeIris[0] || !res[i].annotations.rightEyeIris || !res[i].annotations.rightEyeIris[0]) continue;
const sizeXLeft = res[i].annotations.leftEyeIris[3][0] - res[i].annotations.leftEyeIris[1][0];
const sizeYLeft = res[i].annotations.leftEyeIris[4][1] - res[i].annotations.leftEyeIris[2][1];
const areaLeft = Math.abs(sizeXLeft * sizeYLeft);
diff --git a/src/fingerpose/gesture.ts b/src/hand/fingerdef.ts
similarity index 64%
rename from src/fingerpose/gesture.ts
rename to src/hand/fingerdef.ts
index 6ce56751..2d348299 100644
--- a/src/fingerpose/gesture.ts
+++ b/src/hand/fingerdef.ts
@@ -3,7 +3,54 @@
* See `fingerpose.ts` for entry point
*/
-export default class Gesture {
+export const Finger = {
+ thumb: 0,
+ index: 1,
+ middle: 2,
+ ring: 3,
+ pinky: 4,
+ all: [0, 1, 2, 3, 4], // just for convenience
+ nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
+ // Describes mapping of joints based on the 21 points returned by handpose.
+ // [0] Palm
+ // [1-4] Thumb
+ // [5-8] Index
+ // [9-12] Middle
+ // [13-16] Ring
+ // [17-20] Pinky
+ pointsMapping: {
+ 0: [[0, 1], [1, 2], [2, 3], [3, 4]],
+ 1: [[0, 5], [5, 6], [6, 7], [7, 8]],
+ 2: [[0, 9], [9, 10], [10, 11], [11, 12]],
+ 3: [[0, 13], [13, 14], [14, 15], [15, 16]],
+ 4: [[0, 17], [17, 18], [18, 19], [19, 20]],
+ },
+ getName: (value) => Finger.nameMapping[value],
+ getPoints: (value) => Finger.pointsMapping[value],
+};
+
+export const FingerCurl = {
+ none: 0,
+ half: 1,
+ full: 2,
+ nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
+ getName: (value) => FingerCurl.nameMapping[value],
+};
+
+export const FingerDirection = {
+ verticalUp: 0,
+ verticalDown: 1,
+ horizontalLeft: 2,
+ horizontalRight: 3,
+ diagonalUpRight: 4,
+ diagonalUpLeft: 5,
+ diagonalDownRight: 6,
+ diagonalDownLeft: 7,
+ nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
+ getName: (value) => FingerDirection.nameMapping[value],
+};
+
+export class FingerGesture {
name;
curls;
directions;
diff --git a/src/fingerpose/gestures.ts b/src/hand/fingergesture.ts
similarity index 91%
rename from src/fingerpose/gestures.ts
rename to src/hand/fingergesture.ts
index f413d571..f25d1047 100644
--- a/src/fingerpose/gestures.ts
+++ b/src/hand/fingergesture.ts
@@ -3,11 +3,10 @@
* See `fingerpose.ts` for entry point
*/
-import { Finger, FingerCurl, FingerDirection } from './description';
-import Gesture from './gesture';
+import { Finger, FingerCurl, FingerDirection, FingerGesture } from './fingerdef';
// describe thumbs up gesture 👍
-const ThumbsUp = new Gesture('thumbs up');
+const ThumbsUp = new FingerGesture('thumbs up');
ThumbsUp.addCurl(Finger.thumb, FingerCurl.none, 1.0);
ThumbsUp.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
ThumbsUp.addDirection(Finger.thumb, FingerDirection.diagonalUpLeft, 0.25);
@@ -19,7 +18,7 @@ for (const finger of [Finger.index, Finger.middle, Finger.ring, Finger.pinky]) {
}
// describe Victory gesture ✌️
-const Victory = new Gesture('victory');
+const Victory = new FingerGesture('victory');
Victory.addCurl(Finger.thumb, FingerCurl.half, 0.5);
Victory.addCurl(Finger.thumb, FingerCurl.none, 0.5);
Victory.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
diff --git a/src/fingerpose/estimator.ts b/src/hand/fingerpose.ts
similarity index 88%
rename from src/fingerpose/estimator.ts
rename to src/hand/fingerpose.ts
index 8c2c2884..5aadc35c 100644
--- a/src/fingerpose/estimator.ts
+++ b/src/hand/fingerpose.ts
@@ -1,10 +1,13 @@
/**
- * FingerPose algorithm implementation
- * See `fingerpose.ts` for entry point
+ * FingerPose algorithm implementation constants
+ *
+ * Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
*/
-import { Finger, FingerCurl, FingerDirection } from './description';
+import { Finger, FingerCurl, FingerDirection } from './fingerdef';
+import Gestures from '../hand/fingergesture';
+const minConfidence = 0.7;
const options = {
// curl estimation
HALF_CURL_START_LIMIT: 60.0,
@@ -169,7 +172,7 @@ function calculateFingerDirection(startPoint, midPoint, endPoint, fingerSlopes)
return estimatedDirection;
}
-export function estimate(landmarks) {
+function estimate(landmarks) {
// step 1: calculate slopes
const slopesXY: Array = [];
const slopesYZ: Array = [];
@@ -212,3 +215,29 @@ export function estimate(landmarks) {
}
return { curls: fingerCurls, directions: fingerDirections };
}
+
+export function analyze(keypoints) { // get estimations of curl / direction for each finger
+ if (!keypoints || keypoints.length === 0) return null;
+ const estimatorRes = estimate(keypoints);
+ const landmarks = {};
+ for (const fingerIdx of Finger.all) {
+ landmarks[Finger.getName(fingerIdx)] = {
+ curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
+ direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
+ };
+ }
+ // console.log('finger landmarks', landmarks);
+ return landmarks;
+}
+
+export function match(keypoints) { // compare gesture description to each known gesture
+ const poses: Array<{ name: string, confidence: number }> = [];
+ if (!keypoints || keypoints.length === 0) return poses;
+ const estimatorRes = estimate(keypoints);
+ for (const gesture of Gestures) {
+ const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
+ if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
+ }
+ // console.log('finger poses', poses);
+ return poses;
+}
diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts
index d4214cb7..46eb750a 100644
--- a/src/hand/handtrack.ts
+++ b/src/hand/handtrack.ts
@@ -13,7 +13,7 @@ import type { HandResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
-import * as fingerPose from '../fingerpose/fingerpose';
+import * as fingerPose from './fingerpose';
import { fakeOps } from '../tfjs/backend';
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
diff --git a/src/handpose/handpose.ts b/src/handpose/handpose.ts
index 7fa599a4..7c9b0a5f 100644
--- a/src/handpose/handpose.ts
+++ b/src/handpose/handpose.ts
@@ -8,7 +8,7 @@ import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as handdetector from './handdetector';
import * as handpipeline from './handpipeline';
-import * as fingerPose from '../fingerpose/fingerpose';
+import * as fingerPose from '../hand/fingerpose';
import type { HandResult, Box, Point } from '../result';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { Config } from '../config';
diff --git a/src/human.ts b/src/human.ts
index 762a2f83..a807c25c 100644
--- a/src/human.ts
+++ b/src/human.ts
@@ -8,9 +8,9 @@ import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureR
import * as tf from '../dist/tfjs.esm.js';
import * as models from './models';
import * as face from './face/face';
-import * as facemesh from './blazeface/facemesh';
+import * as facemesh from './face/facemesh';
import * as faceres from './face/faceres';
-import * as posenet from './posenet/posenet';
+import * as posenet from './body/posenet';
import * as handtrack from './hand/handtrack';
import * as handpose from './handpose/handpose';
// import * as blazepose from './body/blazepose-v1';
@@ -23,7 +23,7 @@ import * as segmentation from './segmentation/segmentation';
import * as gesture from './gesture/gesture';
import * as image from './image/image';
import * as draw from './util/draw';
-import * as persons from './persons';
+import * as persons from './util/persons';
import * as interpolate from './util/interpolate';
import * as env from './util/env';
import * as backend from './tfjs/backend';
diff --git a/src/models.ts b/src/models.ts
index ed846bd7..05010bf9 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -4,10 +4,12 @@
import { log } from './util/util';
import type { GraphModel } from './tfjs/types';
-import * as facemesh from './blazeface/facemesh';
+import * as blazeface from './face/blazeface';
+import * as facemesh from './face/facemesh';
+import * as iris from './face/iris';
import * as faceres from './face/faceres';
import * as emotion from './gear/emotion';
-import * as posenet from './posenet/posenet';
+import * as posenet from './body/posenet';
import * as handpose from './handpose/handpose';
import * as handtrack from './hand/handtrack';
import * as blazepose from './body/blazepose';
@@ -57,15 +59,13 @@ export function reset(instance: Human) {
/** Load method preloads all instance.configured models on-demand */
export async function load(instance: Human) {
if (env.initial) reset(instance);
- if (instance.config.face.enabled) { // face model is a combo that must be loaded as a whole
- if (!instance.models.facedetect) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
- if (instance.config.face.mesh?.enabled && !instance.models.facemesh) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
- if (instance.config.face.iris?.enabled && !instance.models.faceiris) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
- }
if (instance.config.hand.enabled) { // handpose model is a combo that must be loaded as a whole
if (!instance.models.handpose && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
if (!instance.models.handskeleton && instance.config.hand.landmarks && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
}
+ if (instance.config.face.enabled && !instance.models.facedetect) instance.models.facedetect = blazeface.load(instance.config);
+ if (instance.config.face.enabled && instance.config.face.mesh?.enabled && !instance.models.facemesh) instance.models.facemesh = facemesh.load(instance.config);
+ if (instance.config.face.enabled && instance.config.face.iris?.enabled && !instance.models.faceiris) instance.models.faceiris = iris.load(instance.config);
if (instance.config.hand.enabled && !instance.models.handtrack && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handtrack = handtrack.loadDetect(instance.config);
if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config);
diff --git a/src/persons.ts b/src/util/persons.ts
similarity index 98%
rename from src/persons.ts
rename to src/util/persons.ts
index 2529edd2..46c5db08 100644
--- a/src/persons.ts
+++ b/src/util/persons.ts
@@ -2,7 +2,7 @@
* Analyze detection Results and sort&combine them into per-person view
*/
-import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from './result';
+import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from '../result';
export function join(faces: Array, bodies: Array, hands: Array, gestures: Array, shape: Array | undefined): Array {
let id = 0;