From 156e857d324263a54f7a789e3ad95757bc620d8f Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Tue, 28 Sep 2021 12:01:48 -0400 Subject: [PATCH] redesign face processing --- TODO.md | 5 - demo/index.js | 19 +- package.json | 2 +- src/{posenet/poses.ts => body/posenet.ts} | 51 +++++- .../utils.ts => body/posenetutils.ts} | 39 +++- src/face/angles.ts | 133 ++++++++++++++ src/face/blazeface.ts | 96 ++++++++++ src/face/face.ts | 140 +-------------- src/face/facemesh.ts | 139 +++++++++++++++ .../coords.ts => face/facemeshcoords.ts} | 18 +- src/face/facemeshutil.ts | 166 ++++++++++++++++++ src/face/iris.ts | 150 ++++++++++++++++ src/gesture/gesture.ts | 6 +- .../gesture.ts => hand/fingerdef.ts} | 49 +++++- .../gestures.ts => hand/fingergesture.ts} | 7 +- .../estimator.ts => hand/fingerpose.ts} | 37 +++- src/hand/handtrack.ts | 2 +- src/handpose/handpose.ts | 2 +- src/human.ts | 6 +- src/models.ts | 14 +- src/{ => util}/persons.ts | 2 +- 21 files changed, 897 insertions(+), 186 deletions(-) rename src/{posenet/poses.ts => body/posenet.ts} (72%) rename src/{posenet/utils.ts => body/posenetutils.ts} (75%) create mode 100644 src/face/angles.ts create mode 100644 src/face/blazeface.ts create mode 100644 src/face/facemesh.ts rename src/{blazeface/coords.ts => face/facemeshcoords.ts} (99%) create mode 100644 src/face/facemeshutil.ts create mode 100644 src/face/iris.ts rename src/{fingerpose/gesture.ts => hand/fingerdef.ts} (64%) rename src/{fingerpose/gestures.ts => hand/fingergesture.ts} (91%) rename src/{fingerpose/estimator.ts => hand/fingerpose.ts} (88%) rename src/{ => util}/persons.ts (98%) diff --git a/TODO.md b/TODO.md index 1fcda431..2794f56c 100644 --- a/TODO.md +++ b/TODO.md @@ -10,11 +10,6 @@ - Evaluate and switch default default model from `handdetect` to `handtrack` -#### Body - -- Implement new variations of `BlazePose` models -- Add virtual box frame caching to `MoveNet` - #### Face - Reimplement `BlazeFace`, `FaceMesh`, `Iris` with new pipeline and frame caching diff --git a/demo/index.js b/demo/index.js index c17aba49..97fec5dc 100644 --- a/demo/index.js +++ b/demo/index.js @@ -67,6 +67,9 @@ const drawOptions = { drawLabels: true, drawPolygons: true, drawPoints: false, + fillPolygons: false, + useCurves: false, + useDepth: true, }; // ui options @@ -105,7 +108,7 @@ const ui = { lastFrame: 0, // time of last frame processing viewportSet: false, // internal, has custom viewport been set background: null, // holds instance of segmentation background image - exceptionHandler: false, // should capture all unhandled exceptions + exceptionHandler: true, // should capture all unhandled exceptions // webrtc useWebRTC: false, // use webrtc as camera source instead of local webcam @@ -684,13 +687,13 @@ function setupMenu() { setupCamera(); }); menu.display.addHTML('
'); - menu.display.addBool('use depth', human.draw.options, 'useDepth'); - menu.display.addBool('use curves', human.draw.options, 'useCurves'); - menu.display.addBool('print labels', human.draw.options, 'drawLabels'); - menu.display.addBool('draw points', human.draw.options, 'drawPoints'); - menu.display.addBool('draw boxes', human.draw.options, 'drawBoxes'); - menu.display.addBool('draw polygons', human.draw.options, 'drawPolygons'); - menu.display.addBool('fill polygons', human.draw.options, 'fillPolygons'); + menu.display.addBool('use depth', drawOptions, 'useDepth'); + menu.display.addBool('use curves', drawOptions, 'useCurves'); + menu.display.addBool('print labels', drawOptions, 'drawLabels'); + menu.display.addBool('draw points', drawOptions, 'drawPoints'); + menu.display.addBool('draw boxes', drawOptions, 'drawBoxes'); + menu.display.addBool('draw polygons', drawOptions, 'drawPolygons'); + menu.display.addBool('fill polygons', drawOptions, 'fillPolygons'); menu.image = new Menu(document.body, '', { top, left: x[1] }); menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val); diff --git a/package.json b/package.json index 51e24130..5063fb9d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@vladmandic/human", - "version": "2.2.3", + "version": "2.3.0", "description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition", "sideEffects": false, "main": "dist/human.node.js", diff --git a/src/posenet/poses.ts b/src/body/posenet.ts similarity index 72% rename from src/posenet/poses.ts rename to src/body/posenet.ts index 2b8c75a9..6ab241c9 100644 --- a/src/posenet/poses.ts +++ b/src/body/posenet.ts @@ -1,11 +1,19 @@ /** * PoseNet body detection model implementation - * See `posenet.ts` for entry point + * + * Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5) */ -import * as utils from './utils'; -import * as kpt from './keypoints'; -import type { Box } from '../result'; +import { log, join } from '../util/util'; +import * as tf from '../../dist/tfjs.esm.js'; +import type { BodyResult, Box } from '../result'; +import type { Tensor, GraphModel } from '../tfjs/types'; +import type { Config } from '../config'; +import { env } from '../util/env'; +import * as utils from './posenetutils'; + +let model: GraphModel; +const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */]; const localMaximumRadius = 1; const outputStride = 16; @@ -37,11 +45,11 @@ function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacemen } const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width); const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId); - return { position: targetKeypoint, part: kpt.partNames[targetId], score }; + return { position: targetKeypoint, part: utils.partNames[targetId], score }; } export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) { - const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]])); + const tuples = utils.poseChain.map(([parentJoinName, childJoinName]) => ([utils.partIds[parentJoinName], utils.partIds[childJoinName]])); const edgesFwd = tuples.map(([, childJointId]) => childJointId); const edgesBwd = tuples.map(([parentJointId]) => parentJointId); const numParts = scores.shape[2]; // [21,21,17] @@ -51,7 +59,7 @@ export function decodePose(root, scores, offsets, displacementsFwd, displacement const rootPoint = utils.getImageCoords(root.part, outputStride, offsets); keypoints[root.part.id] = { score: root.score, - part: kpt.partNames[root.part.id], + part: utils.partNames[root.part.id], position: rootPoint, }; // Decode the part positions upwards in the tree, following the backward displacements. @@ -146,3 +154,32 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD } return poses; } + +export async function predict(input: Tensor, config: Config): Promise { + const res = tf.tidy(() => { + if (!model.inputs[0].shape) return []; + const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]); + const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0); + const results: Array = model.execute(normalized, poseNetOutputs) as Array; + const results3d = results.map((y) => tf.squeeze(y, [0])); + results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores + return results3d; + }); + + const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer())); + for (const t of res) tf.dispose(t); + + const decoded = await decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence); + if (!model.inputs[0].shape) return []; + const scaled = utils.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[]; + return scaled; +} + +export async function load(config: Config): Promise { + if (!model || env.initial) { + model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel; + if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath); + else if (config.debug) log('load model:', model['modelUrl']); + } else if (config.debug) log('cached model:', model['modelUrl']); + return model; +} diff --git a/src/posenet/utils.ts b/src/body/posenetutils.ts similarity index 75% rename from src/posenet/utils.ts rename to src/body/posenetutils.ts index a495a17e..1d963692 100644 --- a/src/posenet/utils.ts +++ b/src/body/posenetutils.ts @@ -3,15 +3,48 @@ * See `posenet.ts` for entry point */ -import * as kpt from './keypoints'; import type { BodyResult } from '../result'; +export const partNames = [ + 'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder', + 'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist', + 'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle', +]; + +export const count = partNames.length; // 17 keypoints + +export const partIds = partNames.reduce((result, jointName, i) => { + result[jointName] = i; + return result; +}, {}); + +const connectedPartNames = [ + ['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'], + ['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'], + ['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'], + ['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'], + ['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'], + ['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'], +]; +export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]])); + +export const poseChain = [ + ['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'], + ['rightEye', 'rightEar'], ['nose', 'leftShoulder'], + ['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'], + ['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'], + ['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'], + ['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'], + ['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'], + ['rightKnee', 'rightAnkle'], +]; + export function eitherPointDoesntMeetConfidence(a: number, b: number, minConfidence: number) { return (a < minConfidence || b < minConfidence); } export function getAdjacentKeyPoints(keypoints, minConfidence: number) { - return kpt.connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => { + return connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => { if (eitherPointDoesntMeetConfidence(keypoints[leftJoint].score, keypoints[rightJoint].score, minConfidence)) { return result; } @@ -123,7 +156,7 @@ export class MaxHeap { export function getOffsetPoint(y, x, keypoint, offsets) { return { y: offsets.get(y, x, keypoint), - x: offsets.get(y, x, keypoint + kpt.count), + x: offsets.get(y, x, keypoint + count), }; } diff --git a/src/face/angles.ts b/src/face/angles.ts new file mode 100644 index 00000000..9e23f5cb --- /dev/null +++ b/src/face/angles.ts @@ -0,0 +1,133 @@ +// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars +const rad2deg = (theta) => Math.round((theta * 180) / Math.PI); + +const calculateGaze = (face): { bearing: number, strength: number } => { + const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points + if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 }; + + const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes + const eyeRatio = 1; // factor to normalize changes x vs y + + const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis + const irisCenter = left ? face.mesh[473] : face.mesh[468]; + const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise + ? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2] + : [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2]; + const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions + ? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]] + : [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]]; + + const eyeDiff = [ // x distance between extreme point and center point normalized with eye size + (eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0], + eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1], + ]; + let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences + strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision + const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees + + return { bearing, strength }; +}; + +export const calculateFaceAngle = (face, imageSize): { + angle: { pitch: number, yaw: number, roll: number }, + matrix: [number, number, number, number, number, number, number, number, number], + gaze: { bearing: number, strength: number }, +} => { + // const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360); + const normalize = (v) => { // normalize vector + const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); + v[0] /= length; + v[1] /= length; + v[2] /= length; + return v; + }; + const subVectors = (a, b) => { // vector subtraction (a - b) + const x = a[0] - b[0]; + const y = a[1] - b[1]; + const z = a[2] - b[2]; + return [x, y, z]; + }; + const crossVectors = (a, b) => { // vector cross product (a x b) + const x = a[1] * b[2] - a[2] * b[1]; + const y = a[2] * b[0] - a[0] * b[2]; + const z = a[0] * b[1] - a[1] * b[0]; + return [x, y, z]; + }; + // 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf + const rotationMatrixToEulerAngle = (r) => { + // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars + const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r; + let thetaX: number; + let thetaY: number; + let thetaZ: number; + if (r10 < 1) { // YZX calculation + if (r10 > -1) { + thetaZ = Math.asin(r10); + thetaY = Math.atan2(-r20, r00); + thetaX = Math.atan2(-r12, r11); + } else { + thetaZ = -Math.PI / 2; + thetaY = -Math.atan2(r21, r22); + thetaX = 0; + } + } else { + thetaZ = Math.PI / 2; + thetaY = Math.atan2(r21, r22); + thetaX = 0; + } + if (isNaN(thetaX)) thetaX = 0; + if (isNaN(thetaY)) thetaY = 0; + if (isNaN(thetaZ)) thetaZ = 0; + return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ }; + }; + // simple Euler angle calculation based existing 3D mesh + // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars + const meshToEulerAngle = (mesh) => { + const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1); + // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars + const angle = { + // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center + // pitch is face move up/down + pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face + // yaw is face turn left/right + yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye + // roll is face lean left/right + roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye + }; + return angle; + }; + + // initialize gaze and mesh + const mesh = face.meshRaw; + if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } }; + + const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5; + // top, bottom, left, right + const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [ + // make the xyz coordinates proportional, independent of the image/box size + pt[0] * imageSize[0] / size, + pt[1] * imageSize[1] / size, + pt[2], + ]); + + const y_axis = normalize(subVectors(pts[1], pts[0])); + let x_axis = normalize(subVectors(pts[3], pts[2])); + const z_axis = normalize(crossVectors(x_axis, y_axis)); + // adjust x_axis to make sure that all axes are perpendicular to each other + x_axis = crossVectors(y_axis, z_axis); + + // Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html + // 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted. + const matrix: [number, number, number, number, number, number, number, number, number] = [ + x_axis[0], x_axis[1], x_axis[2], + y_axis[0], y_axis[1], y_axis[2], + z_axis[0], z_axis[1], z_axis[2], + ]; + const angle = rotationMatrixToEulerAngle(matrix); + // const angle = meshToEulerAngle(mesh); + + // we have iris keypoints so we can calculate gaze direction + const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 }; + + return { angle, matrix, gaze }; +}; diff --git a/src/face/blazeface.ts b/src/face/blazeface.ts new file mode 100644 index 00000000..afbde8ff --- /dev/null +++ b/src/face/blazeface.ts @@ -0,0 +1,96 @@ +/** + * BlazeFace, FaceMesh & Iris model implementation + * See `facemesh.ts` for entry point + */ + +import { log, join } from '../util/util'; +import * as tf from '../../dist/tfjs.esm.js'; +import * as util from './facemeshutil'; +import type { Config } from '../config'; +import type { Tensor, GraphModel } from '../tfjs/types'; +import { env } from '../util/env'; + +const keypointsCount = 6; +let model: GraphModel | null; +let anchorsData: [number, number][] = []; +let anchors: Tensor | null = null; +let inputSize = 0; + +// export const size = () => (model && model.inputs[0].shape ? model.inputs[0].shape[2] : 0); +export const size = () => inputSize; + +export async function load(config: Config): Promise { + if (env.initial) model = null; + if (!model) { + model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || '')) as unknown as GraphModel; + if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath); + else if (config.debug) log('load model:', model['modelUrl']); + } else if (config.debug) log('cached model:', model['modelUrl']); + inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0; + if (inputSize === -1) inputSize = 64; + anchorsData = util.generateAnchors(inputSize); + anchors = tf.tensor2d(anchorsData); + return model; +} + +function decodeBounds(boxOutputs) { + const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]); + const centers = tf.add(boxStarts, anchors); + const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]); + const boxSizesNormalized = tf.div(boxSizes, inputSize); + const centersNormalized = tf.div(centers, inputSize); + const halfBoxSize = tf.div(boxSizesNormalized, 2); + const starts = tf.sub(centersNormalized, halfBoxSize); + const ends = tf.add(centersNormalized, halfBoxSize); + const startNormalized = tf.mul(starts, inputSize); + const endNormalized = tf.mul(ends, inputSize); + const concatAxis = 1; + return tf.concat2d([startNormalized, endNormalized], concatAxis); +} + +export async function getBoxes(inputImage: Tensor, config: Config) { + // sanity check on input + if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] }; + const [batch, boxes, scores] = tf.tidy(() => { + const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]); + const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5); + const res = model?.execute(normalizedImage); + let batchOut; + if (Array.isArray(res)) { // are we using tfhub or pinto converted model? + const sorted = res.sort((a, b) => a.size - b.size); + const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16 + const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16 + const concat = tf.concat([concat512, concat384], 1); + batchOut = tf.squeeze(concat, 0); + } else { + batchOut = tf.squeeze(res); // when using tfhub model + } + const boxesOut = decodeBounds(batchOut); + const logits = tf.slice(batchOut, [0, 0], [-1, 1]); + const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy + return [batchOut, boxesOut, scoresOut]; + }); + + const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0)); + const nms = await nmsTensor.array(); + tf.dispose(nmsTensor); + const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = []; + const scoresData = await scores.data(); + for (let i = 0; i < nms.length; i++) { + const confidence = scoresData[nms[i]]; + if (confidence > (config.face.detector?.minConfidence || 0)) { + const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]); + const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1])); + annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence }); + tf.dispose(boundingBox); + } + } + tf.dispose(batch); + tf.dispose(boxes); + tf.dispose(scores); + + return { + boxes: annotatedBoxes, + scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize], + }; +} diff --git a/src/face/face.ts b/src/face/face.ts index 4745abc2..4a17b1a2 100644 --- a/src/face/face.ts +++ b/src/face/face.ts @@ -5,145 +5,12 @@ import { log, now } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; -import * as facemesh from '../blazeface/facemesh'; +import * as facemesh from './facemesh'; import * as emotion from '../gear/emotion'; import * as faceres from './faceres'; import type { FaceResult } from '../result'; import type { Tensor } from '../tfjs/types'; - -// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars -const rad2deg = (theta) => Math.round((theta * 180) / Math.PI); - -const calculateGaze = (face): { bearing: number, strength: number } => { - const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points - if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 }; - - const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes - const eyeRatio = 1; // factor to normalize changes x vs y - - const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis - const irisCenter = left ? face.mesh[473] : face.mesh[468]; - const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise - ? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2] - : [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2]; - const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions - ? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]] - : [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]]; - - const eyeDiff = [ // x distance between extreme point and center point normalized with eye size - (eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0], - eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1], - ]; - let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences - strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision - const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees - - return { bearing, strength }; -}; - -const calculateFaceAngle = (face, imageSize): { - angle: { pitch: number, yaw: number, roll: number }, - matrix: [number, number, number, number, number, number, number, number, number], - gaze: { bearing: number, strength: number }, -} => { - // const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360); - const normalize = (v) => { // normalize vector - const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); - v[0] /= length; - v[1] /= length; - v[2] /= length; - return v; - }; - const subVectors = (a, b) => { // vector subtraction (a - b) - const x = a[0] - b[0]; - const y = a[1] - b[1]; - const z = a[2] - b[2]; - return [x, y, z]; - }; - const crossVectors = (a, b) => { // vector cross product (a x b) - const x = a[1] * b[2] - a[2] * b[1]; - const y = a[2] * b[0] - a[0] * b[2]; - const z = a[0] * b[1] - a[1] * b[0]; - return [x, y, z]; - }; - // 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf - const rotationMatrixToEulerAngle = (r) => { - // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars - const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r; - let thetaX: number; - let thetaY: number; - let thetaZ: number; - if (r10 < 1) { // YZX calculation - if (r10 > -1) { - thetaZ = Math.asin(r10); - thetaY = Math.atan2(-r20, r00); - thetaX = Math.atan2(-r12, r11); - } else { - thetaZ = -Math.PI / 2; - thetaY = -Math.atan2(r21, r22); - thetaX = 0; - } - } else { - thetaZ = Math.PI / 2; - thetaY = Math.atan2(r21, r22); - thetaX = 0; - } - if (isNaN(thetaX)) thetaX = 0; - if (isNaN(thetaY)) thetaY = 0; - if (isNaN(thetaZ)) thetaZ = 0; - return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ }; - }; - // simple Euler angle calculation based existing 3D mesh - // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars - const meshToEulerAngle = (mesh) => { - const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1); - // eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars - const angle = { - // values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center - // pitch is face move up/down - pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face - // yaw is face turn left/right - yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye - // roll is face lean left/right - roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye - }; - return angle; - }; - - // initialize gaze and mesh - const mesh = face.meshRaw; - if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } }; - - const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5; - // top, bottom, left, right - const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [ - // make the xyz coordinates proportional, independent of the image/box size - pt[0] * imageSize[0] / size, - pt[1] * imageSize[1] / size, - pt[2], - ]); - - const y_axis = normalize(subVectors(pts[1], pts[0])); - let x_axis = normalize(subVectors(pts[3], pts[2])); - const z_axis = normalize(crossVectors(x_axis, y_axis)); - // adjust x_axis to make sure that all axes are perpendicular to each other - x_axis = crossVectors(y_axis, z_axis); - - // Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html - // 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted. - const matrix: [number, number, number, number, number, number, number, number, number] = [ - x_axis[0], x_axis[1], x_axis[2], - y_axis[0], y_axis[1], y_axis[2], - z_axis[0], z_axis[1], z_axis[2], - ]; - const angle = rotationMatrixToEulerAngle(matrix); - // const angle = meshToEulerAngle(mesh); - - // we have iris keypoints so we can calculate gaze direction - const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 }; - - return { angle, matrix, gaze }; -}; +import { calculateFaceAngle } from './angles'; export const detectFace = async (parent /* instance of human */, input: Tensor): Promise => { // run facemesh, includes blazeface and iris @@ -158,6 +25,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor): const faceRes: Array = []; parent.state = 'run:face'; timeStamp = now(); + const faces = await facemesh.predict(input, parent.config); parent.performance.face = Math.trunc(now() - timeStamp); if (!input.shape || input.shape.length !== 4) return []; @@ -226,7 +94,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor): delete faces[i].annotations.leftEyeIris; delete faces[i].annotations.rightEyeIris; } - const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.rightEyeIris + const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.leftEyeIris[0] && faces[i].annotations.rightEyeIris && faces[i].annotations.rightEyeIris[0] && (faces[i].annotations.leftEyeIris.length > 0) && (faces[i].annotations.rightEyeIris.length > 0) && (faces[i].annotations.leftEyeIris[0] !== null) && (faces[i].annotations.rightEyeIris[0] !== null)) ? Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1])) / input.shape[2] diff --git a/src/face/facemesh.ts b/src/face/facemesh.ts new file mode 100644 index 00000000..256a9ce6 --- /dev/null +++ b/src/face/facemesh.ts @@ -0,0 +1,139 @@ +/** + * BlazeFace, FaceMesh & Iris model implementation + * + * Based on: + * - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view) + * - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) + * - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view) + */ + +import { log, join } from '../util/util'; +import * as tf from '../../dist/tfjs.esm.js'; +import * as blazeface from './blazeface'; +import * as util from './facemeshutil'; +import * as coords from './facemeshcoords'; +import * as iris from './iris'; +import type { GraphModel, Tensor } from '../tfjs/types'; +import type { FaceResult, Point } from '../result'; +import type { Config } from '../config'; +import { env } from '../util/env'; + +type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array, confidence: number, faceConfidence?: number | undefined }; +let boxCache: Array = []; +let model: GraphModel | null = null; +let inputSize = 0; +let skipped = Number.MAX_SAFE_INTEGER; +let detectedFaces = 0; + +export async function predict(input: Tensor, config: Config): Promise { + if (!config.skipFrame || (((detectedFaces !== config.face.detector?.maxDetected) || !config.face.mesh?.enabled)) && (skipped > (config.face.detector?.skipFrames || 0))) { // reset cached boxes + const newBoxes = await blazeface.getBoxes(input, config); // get results from blazeface detector + boxCache = []; // empty cache + for (const possible of newBoxes.boxes) { // extract data from detector + const startPoint = await possible.box.startPoint.data() as unknown as Point; + const endPoint = await possible.box.endPoint.data() as unknown as Point; + const landmarks = await possible.landmarks.array() as Array; + boxCache.push({ startPoint, endPoint, landmarks, confidence: possible.confidence }); + } + newBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks])); + for (let i = 0; i < boxCache.length; i++) { // enlarge and squarify detected boxes + const scaledBox = util.scaleBoxCoordinates({ startPoint: boxCache[i].startPoint, endPoint: boxCache[i].endPoint }, newBoxes.scaleFactor); + const enlargedBox = util.enlargeBox(scaledBox); + const squarifiedBox = util.squarifyBox(enlargedBox); + boxCache[i] = { ...squarifiedBox, confidence: boxCache[i].confidence, landmarks: boxCache[i].landmarks }; + } + skipped = 0; + } else { + skipped++; + } + + const faces: Array = []; + const newBoxes: Array = []; + let id = 0; + for (let box of boxCache) { + let angle = 0; + let rotationMatrix; + const face: FaceResult = { + id: id++, + mesh: [], + meshRaw: [], + box: [0, 0, 0, 0], + boxRaw: [0, 0, 0, 0], + score: 0, + boxScore: 0, + faceScore: 0, + annotations: {}, + }; + + if (config.face.detector?.rotation && config.face.mesh?.enabled && env.kernels.includes('rotatewithoffset')) { + [angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize); + } else { + rotationMatrix = util.IDENTITY_MATRIX; + const cut = util.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, input, config.face.mesh?.enabled ? [inputSize, inputSize] : [blazeface.size(), blazeface.size()]); + face.tensor = tf.div(cut, 255); + tf.dispose(cut); + } + face.boxScore = Math.round(100 * box.confidence) / 100; + if (!config.face.mesh?.enabled) { // mesh not enabled, return resuts from detector only + face.box = util.getClampedBox(box, input); + face.boxRaw = util.getRawBox(box, input); + face.score = Math.round(100 * box.confidence || 0) / 100; + face.mesh = box.landmarks.map((pt) => [ + ((box.startPoint[0] + box.endPoint[0])) / 2 + ((box.endPoint[0] + box.startPoint[0]) * pt[0] / blazeface.size()), + ((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()), + ]); + face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]); + for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations + } else if (!model) { // mesh enabled, but not loaded + if (config.debug) log('face mesh detection requested, but model is not loaded'); + } else { // mesh enabled + const [contours, confidence, contourCoords] = model.execute(face.tensor as Tensor) as Array; // first returned tensor represents facial contours which are already included in the coordinates. + tf.dispose(contours); + const faceConfidence = (await confidence.data())[0] as number; + tf.dispose(confidence); + const coordsReshaped = tf.reshape(contourCoords, [-1, 3]); + let rawCoords = await coordsReshaped.array(); + tf.dispose(contourCoords); + tf.dispose(coordsReshaped); + if (faceConfidence < (config.face.detector?.minConfidence || 1)) { + box.confidence = faceConfidence; // reset confidence of cached box + tf.dispose(face.tensor); + } else { + if (config.face.iris?.enabled) rawCoords = await iris.augmentIris(rawCoords, face.tensor, config, inputSize); // augment results with iris + face.mesh = util.transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize); // get processed mesh + face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]); + box = { ...util.enlargeBox(util.calculateLandmarksBoundingBox(face.mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one + for (const key of Object.keys(coords.meshAnnotations)) face.annotations[key] = coords.meshAnnotations[key].map((index) => face.mesh[index]); // add annotations + if (config.face.detector?.rotation && config.face.mesh.enabled && config.face.description?.enabled && env.kernels.includes('rotatewithoffset')) { // do rotation one more time with mesh keypoints if we want to return perfect image + tf.dispose(face.tensor); // dispose so we can overwrite original face + [angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize); + } + face.box = util.getClampedBox(box, input); // update detected box with box around the face mesh + face.boxRaw = util.getRawBox(box, input); + face.score = Math.round(100 * faceConfidence || 100 * box.confidence || 0) / 100; + face.faceScore = Math.round(100 * faceConfidence) / 100; + box = { ...util.squarifyBox(box), confidence: box.confidence, faceConfidence }; // updated stored cache values + } + } + faces.push(face); + newBoxes.push(box); + } + if (config.face.mesh?.enabled) boxCache = newBoxes.filter((a) => a.confidence > (config.face.detector?.minConfidence || 0)); // remove cache entries for detected boxes on low confidence + detectedFaces = faces.length; + return faces; +} + +export async function load(config: Config): Promise { + if (env.initial) model = null; + if (!model) { + model = await tf.loadGraphModel(join(config.modelBasePath, config.face.mesh?.modelPath || '')) as unknown as GraphModel; + if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath); + else if (config.debug) log('load model:', model['modelUrl']); + } else if (config.debug) log('cached model:', model['modelUrl']); + inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0; + if (inputSize === -1) inputSize = 64; + return model; +} + +export const triangulation = coords.TRI468; +export const uvmap = coords.UV468; diff --git a/src/blazeface/coords.ts b/src/face/facemeshcoords.ts similarity index 99% rename from src/blazeface/coords.ts rename to src/face/facemeshcoords.ts index 6f420fd9..c06dd6f1 100644 --- a/src/blazeface/coords.ts +++ b/src/face/facemeshcoords.ts @@ -3,7 +3,7 @@ * See `facemesh.ts` for entry point */ -export const MESH_ANNOTATIONS = { +export const meshAnnotations = { silhouette: [ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, @@ -42,6 +42,22 @@ export const MESH_ANNOTATIONS = { leftCheek: [425], }; +export const meshLandmarks = { + count: 468, + mouth: 13, + symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]], +}; + +export const blazeFaceLandmarks = { + leftEye: 0, + rightEye: 1, + nose: 2, + mouth: 3, + leftEar: 4, + rightEar: 5, + symmetryLine: [3, 2], +}; + export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints. { key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] }, { key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] }, diff --git a/src/face/facemeshutil.ts b/src/face/facemeshutil.ts new file mode 100644 index 00000000..aa5cdc68 --- /dev/null +++ b/src/face/facemeshutil.ts @@ -0,0 +1,166 @@ +/** + * BlazeFace, FaceMesh & Iris model implementation + * See `facemesh.ts` for entry point + */ + +import * as tf from '../../dist/tfjs.esm.js'; +import * as coords from './facemeshcoords'; +import type { Box, Point } from '../result'; + +export const createBox = (startEndTensor) => ({ startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]), endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]) }); + +export const disposeBox = (t) => tf.dispose([t.startPoint, t.endPoint]); + +export const getBoxSize = (box): [number, number] => [Math.abs(box.endPoint[0] - box.startPoint[0]), Math.abs(box.endPoint[1] - box.startPoint[1])]; + +export const getBoxCenter = (box): [number, number] => [box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2]; + +export const getClampedBox = (box, input): Box => (box ? [ + Math.trunc(Math.max(0, box.startPoint[0])), + Math.trunc(Math.max(0, box.startPoint[1])), + Math.trunc(Math.min((input.shape[2] || 0), box.endPoint[0]) - Math.max(0, box.startPoint[0])), + Math.trunc(Math.min((input.shape[1] || 0), box.endPoint[1]) - Math.max(0, box.startPoint[1])), +] : [0, 0, 0, 0]); + +export const getRawBox = (box, input): Box => (box ? [ + box.startPoint[0] / (input.shape[2] || 0), + box.startPoint[1] / (input.shape[1] || 0), + (box.endPoint[0] - box.startPoint[0]) / (input.shape[2] || 0), + (box.endPoint[1] - box.startPoint[1]) / (input.shape[1] || 0), +] : [0, 0, 0, 0]); + +export const scaleBoxCoordinates = (box, factor) => { + const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]]; + const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]]; + return { startPoint, endPoint }; +}; + +export const cutBoxFromImageAndResize = (box, image, cropSize) => { + const h = image.shape[1]; + const w = image.shape[2]; + return tf.image.cropAndResize(image, [[box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, box.endPoint[0] / w]], [0], cropSize); +}; + +export const enlargeBox = (box, factor = 1.5) => { + const center = getBoxCenter(box); + const size = getBoxSize(box); + const halfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2]; + return { startPoint: [center[0] - halfSize[0], center[1] - halfSize[1]] as Point, endPoint: [center[0] + halfSize[0], center[1] + halfSize[1]] as Point, landmarks: box.landmarks }; +}; + +export const squarifyBox = (box) => { + const centers = getBoxCenter(box); + const size = getBoxSize(box); + const halfSize = Math.max(...size) / 2; + return { startPoint: [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)] as Point, endPoint: [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)] as Point, landmarks: box.landmarks }; +}; + +export const calculateLandmarksBoundingBox = (landmarks) => { + const xs = landmarks.map((d) => d[0]); + const ys = landmarks.map((d) => d[1]); + return { startPoint: [Math.min(...xs), Math.min(...ys)], endPoint: [Math.max(...xs), Math.max(...ys)], landmarks }; +}; + +export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]]; + +export const normalizeRadians = (angle) => angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI)); + +export const computeRotation = (point1, point2) => normalizeRadians(Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0])); + +export const radToDegrees = (rad) => rad * 180 / Math.PI; + +export const buildTranslationMatrix = (x, y) => [[1, 0, x], [0, 1, y], [0, 0, 1]]; + +export const dot = (v1, v2) => { + let product = 0; + for (let i = 0; i < v1.length; i++) product += v1[i] * v2[i]; + return product; +}; + +export const getColumnFrom2DArr = (arr, columnIndex) => { + const column: Array = []; + for (let i = 0; i < arr.length; i++) column.push(arr[i][columnIndex]); + return column; +}; + +export const multiplyTransformMatrices = (mat1, mat2) => { + const product: Array = []; + const size = mat1.length; + for (let row = 0; row < size; row++) { + product.push([]); + for (let col = 0; col < size; col++) product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col))); + } + return product; +}; + +export const buildRotationMatrix = (rotation, center) => { + const cosA = Math.cos(rotation); + const sinA = Math.sin(rotation); + const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]]; + const translationMatrix = buildTranslationMatrix(center[0], center[1]); + const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix); + const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]); + return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix); +}; + +export const invertTransformMatrix = (matrix) => { + const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]]; + const translationComponent = [matrix[0][2], matrix[1][2]]; + const invertedTranslation = [-dot(rotationComponent[0], translationComponent), -dot(rotationComponent[1], translationComponent)]; + return [rotationComponent[0].concat(invertedTranslation[0]), rotationComponent[1].concat(invertedTranslation[1]), [0, 0, 1]]; +}; + +export const rotatePoint = (homogeneousCoordinate, rotationMatrix) => [dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[1])]; + +export const xyDistanceBetweenPoints = (a, b) => Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2)); + +export function generateAnchors(inputSize) { + const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] }; + const anchors: Array<[number, number]> = []; + for (let i = 0; i < spec.strides.length; i++) { + const stride = spec.strides[i]; + const gridRows = Math.floor((inputSize + stride - 1) / stride); + const gridCols = Math.floor((inputSize + stride - 1) / stride); + const anchorsNum = spec.anchors[i]; + for (let gridY = 0; gridY < gridRows; gridY++) { + const anchorY = stride * (gridY + 0.5); + for (let gridX = 0; gridX < gridCols; gridX++) { + const anchorX = stride * (gridX + 0.5); + for (let n = 0; n < anchorsNum; n++) anchors.push([anchorX, anchorY]); + } + } + } + return anchors; +} + +export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize) { + const boxSize = getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint }); + const coordsScaled = rawCoords.map((coord) => ([ + boxSize[0] / inputSize * (coord[0] - inputSize / 2), + boxSize[1] / inputSize * (coord[1] - inputSize / 2), + coord[2] || 0, + ])); + const coordsRotationMatrix = (angle !== 0) ? buildRotationMatrix(angle, [0, 0]) : IDENTITY_MATRIX; + const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled; + const inverseRotationMatrix = (angle !== 0) ? invertTransformMatrix(rotationMatrix) : IDENTITY_MATRIX; + const boxCenter = [...getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1]; + return coordsRotated.map((coord) => ([ + Math.round(coord[0] + dot(boxCenter, inverseRotationMatrix[0])), + Math.round(coord[1] + dot(boxCenter, inverseRotationMatrix[1])), + Math.round(coord[2] || 0), + ])); +} + +export function correctFaceRotation(box, input, inputSize) { + const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine; + const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]); + const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }); + const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]]; + const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node + const rotationMatrix = buildRotationMatrix(-angle, faceCenter); + const cut = cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [inputSize, inputSize]); + const face = tf.div(cut, 255); + tf.dispose(cut); + tf.dispose(rotated); + return [angle, rotationMatrix, face]; +} diff --git a/src/face/iris.ts b/src/face/iris.ts new file mode 100644 index 00000000..7e5e3179 --- /dev/null +++ b/src/face/iris.ts @@ -0,0 +1,150 @@ +import * as coords from './facemeshcoords'; +import * as util from './facemeshutil'; +import * as tf from '../../dist/tfjs.esm.js'; +import type { Tensor, GraphModel } from '../tfjs/types'; +import { env } from '../util/env'; +import { log, join } from '../util/util'; +import type { Config } from '../config'; +import type { Point } from '../result'; + +let model: GraphModel | null; +let inputSize = 0; + +const irisEnlarge = 2.3; + +const leftOutline = coords.meshAnnotations['leftEyeLower0']; +const rightOutline = coords.meshAnnotations['rightEyeLower0']; + +const eyeLandmarks = { + leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]], + rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]], +}; + +const irisLandmarks = { + upperCenter: 3, + lowerCenter: 4, + index: 71, + numCoordinates: 76, +}; + +export async function load(config: Config): Promise { + if (env.initial) model = null; + if (!model) { + model = await tf.loadGraphModel(join(config.modelBasePath, config.face.iris?.modelPath || '')) as unknown as GraphModel; + if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath); + else if (config.debug) log('load model:', model['modelUrl']); + } else if (config.debug) log('cached model:', model['modelUrl']); + inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0; + if (inputSize === -1) inputSize = 64; + return model; +} + +// Replace the raw coordinates returned by facemesh with refined iris model coordinates +// Update the z coordinate to be an average of the original and the new. +function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) { + for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) { + const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i]; + const originalIndices = coords.meshAnnotations[`${prefix}${key}`]; + if (!keys || keys.includes(key)) { + for (let j = 0; j < indices.length; j++) { + const index = indices[j]; + rawCoords[originalIndices[j]] = [ + newCoords[index][0], newCoords[index][1], + (newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2, + ]; + } + } + } +} + +// eslint-disable-next-line class-methods-use-this +export const getLeftToRightEyeDepthDifference = (rawCoords) => { + const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2]; + const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2]; + return leftEyeZ - rightEyeZ; +}; + +// Returns a box describing a cropped region around the eye fit for passing to the iris model. +export const getEyeBox = (rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false, meshSize) => { + const box = util.squarifyBox(util.enlargeBox(util.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), irisEnlarge)); + const boxSize = util.getBoxSize(box); + let crop = tf.image.cropAndResize(face, [[ + box.startPoint[1] / meshSize, + box.startPoint[0] / meshSize, box.endPoint[1] / meshSize, + box.endPoint[0] / meshSize, + ]], [0], [inputSize, inputSize]); + if (flip && env.kernels.includes('flipleftright')) { + const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node + tf.dispose(crop); + crop = flipped; + } + return { box, boxSize, crop }; +}; + +// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris. +export const getEyeCoords = (eyeData, eyeBox, eyeBoxSize, flip = false) => { + const eyeRawCoords: Array = []; + for (let i = 0; i < irisLandmarks.numCoordinates; i++) { + const x = eyeData[i * 3]; + const y = eyeData[i * 3 + 1]; + const z = eyeData[i * 3 + 2]; + eyeRawCoords.push([ + (flip ? (1 - (x / inputSize)) : (x / inputSize)) * eyeBoxSize[0] + eyeBox.startPoint[0], + (y / inputSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z, + ]); + } + return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) }; +}; + +// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints. +// eslint-disable-next-line class-methods-use-this +export const getAdjustedIrisCoords = (rawCoords, irisCoords, direction) => { + const upperCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2]; + const lowerCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2]; + const averageZ = (upperCenterZ + lowerCenterZ) / 2; + // Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below + return irisCoords.map((coord, i) => { + let z = averageZ; + if (i === 2) { + z = upperCenterZ; + } else if (i === 4) { + z = lowerCenterZ; + } + return [coord[0], coord[1], z]; + }); +}; + +export async function augmentIris(rawCoords, face, config, meshSize) { + if (!model) { + if (config.debug) log('face mesh iris detection requested, but model is not loaded'); + return rawCoords; + } + const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true, meshSize); + const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1], true, meshSize); + const combined = tf.concat([leftEyeCrop, rightEyeCrop]); + tf.dispose(leftEyeCrop); + tf.dispose(rightEyeCrop); + const eyePredictions = model.predict(combined) as Tensor; + tf.dispose(combined); + const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy + tf.dispose(eyePredictions); + const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3); + const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true); + const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3); + const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize); + const leftToRightEyeDepthDifference = getLeftToRightEyeDepthDifference(rawCoords); + if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead. + replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null); + replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null); + // If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged + // So we only update a single contour line above and below the eye. + } else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right. + replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']); + } else { // User is looking towards the left. + replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']); + } + const adjustedLeftIrisCoords = getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left'); + const adjustedRightIrisCoords = getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right'); + const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords); + return newCoords; +} diff --git a/src/gesture/gesture.ts b/src/gesture/gesture.ts index 1609ea38..a5a1a586 100644 --- a/src/gesture/gesture.ts +++ b/src/gesture/gesture.ts @@ -3,7 +3,7 @@ */ import type { GestureResult } from '../result'; -import * as fingerPose from '../fingerpose/fingerpose'; +import * as fingerPose from '../hand/fingerpose'; /** * @typedef FaceGesture @@ -63,7 +63,7 @@ export const face = (res): GestureResult[] => { if (!res) return []; const gestures: Array<{ face: number, gesture: FaceGesture }> = []; for (let i = 0; i < res.length; i++) { - if (res[i].mesh && res[i].mesh.length > 0) { + if (res[i].mesh && res[i].mesh.length > 450) { const eyeFacing = res[i].mesh[33][2] - res[i].mesh[263][2]; if (Math.abs(eyeFacing) < 10) gestures.push({ face: i, gesture: 'facing center' }); else gestures.push({ face: i, gesture: `facing ${eyeFacing < 0 ? 'left' : 'right'}` }); @@ -84,7 +84,7 @@ export const iris = (res): GestureResult[] => { if (!res) return []; const gestures: Array<{ iris: number, gesture: IrisGesture }> = []; for (let i = 0; i < res.length; i++) { - if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.rightEyeIris) continue; + if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.leftEyeIris[0] || !res[i].annotations.rightEyeIris || !res[i].annotations.rightEyeIris[0]) continue; const sizeXLeft = res[i].annotations.leftEyeIris[3][0] - res[i].annotations.leftEyeIris[1][0]; const sizeYLeft = res[i].annotations.leftEyeIris[4][1] - res[i].annotations.leftEyeIris[2][1]; const areaLeft = Math.abs(sizeXLeft * sizeYLeft); diff --git a/src/fingerpose/gesture.ts b/src/hand/fingerdef.ts similarity index 64% rename from src/fingerpose/gesture.ts rename to src/hand/fingerdef.ts index 6ce56751..2d348299 100644 --- a/src/fingerpose/gesture.ts +++ b/src/hand/fingerdef.ts @@ -3,7 +3,54 @@ * See `fingerpose.ts` for entry point */ -export default class Gesture { +export const Finger = { + thumb: 0, + index: 1, + middle: 2, + ring: 3, + pinky: 4, + all: [0, 1, 2, 3, 4], // just for convenience + nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' }, + // Describes mapping of joints based on the 21 points returned by handpose. + // [0] Palm + // [1-4] Thumb + // [5-8] Index + // [9-12] Middle + // [13-16] Ring + // [17-20] Pinky + pointsMapping: { + 0: [[0, 1], [1, 2], [2, 3], [3, 4]], + 1: [[0, 5], [5, 6], [6, 7], [7, 8]], + 2: [[0, 9], [9, 10], [10, 11], [11, 12]], + 3: [[0, 13], [13, 14], [14, 15], [15, 16]], + 4: [[0, 17], [17, 18], [18, 19], [19, 20]], + }, + getName: (value) => Finger.nameMapping[value], + getPoints: (value) => Finger.pointsMapping[value], +}; + +export const FingerCurl = { + none: 0, + half: 1, + full: 2, + nameMapping: { 0: 'none', 1: 'half', 2: 'full' }, + getName: (value) => FingerCurl.nameMapping[value], +}; + +export const FingerDirection = { + verticalUp: 0, + verticalDown: 1, + horizontalLeft: 2, + horizontalRight: 3, + diagonalUpRight: 4, + diagonalUpLeft: 5, + diagonalDownRight: 6, + diagonalDownLeft: 7, + nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' }, + getName: (value) => FingerDirection.nameMapping[value], +}; + +export class FingerGesture { name; curls; directions; diff --git a/src/fingerpose/gestures.ts b/src/hand/fingergesture.ts similarity index 91% rename from src/fingerpose/gestures.ts rename to src/hand/fingergesture.ts index f413d571..f25d1047 100644 --- a/src/fingerpose/gestures.ts +++ b/src/hand/fingergesture.ts @@ -3,11 +3,10 @@ * See `fingerpose.ts` for entry point */ -import { Finger, FingerCurl, FingerDirection } from './description'; -import Gesture from './gesture'; +import { Finger, FingerCurl, FingerDirection, FingerGesture } from './fingerdef'; // describe thumbs up gesture 👍 -const ThumbsUp = new Gesture('thumbs up'); +const ThumbsUp = new FingerGesture('thumbs up'); ThumbsUp.addCurl(Finger.thumb, FingerCurl.none, 1.0); ThumbsUp.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0); ThumbsUp.addDirection(Finger.thumb, FingerDirection.diagonalUpLeft, 0.25); @@ -19,7 +18,7 @@ for (const finger of [Finger.index, Finger.middle, Finger.ring, Finger.pinky]) { } // describe Victory gesture ✌️ -const Victory = new Gesture('victory'); +const Victory = new FingerGesture('victory'); Victory.addCurl(Finger.thumb, FingerCurl.half, 0.5); Victory.addCurl(Finger.thumb, FingerCurl.none, 0.5); Victory.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0); diff --git a/src/fingerpose/estimator.ts b/src/hand/fingerpose.ts similarity index 88% rename from src/fingerpose/estimator.ts rename to src/hand/fingerpose.ts index 8c2c2884..5aadc35c 100644 --- a/src/fingerpose/estimator.ts +++ b/src/hand/fingerpose.ts @@ -1,10 +1,13 @@ /** - * FingerPose algorithm implementation - * See `fingerpose.ts` for entry point + * FingerPose algorithm implementation constants + * + * Based on: [**FingerPose***](https://github.com/andypotato/fingerpose) */ -import { Finger, FingerCurl, FingerDirection } from './description'; +import { Finger, FingerCurl, FingerDirection } from './fingerdef'; +import Gestures from '../hand/fingergesture'; +const minConfidence = 0.7; const options = { // curl estimation HALF_CURL_START_LIMIT: 60.0, @@ -169,7 +172,7 @@ function calculateFingerDirection(startPoint, midPoint, endPoint, fingerSlopes) return estimatedDirection; } -export function estimate(landmarks) { +function estimate(landmarks) { // step 1: calculate slopes const slopesXY: Array = []; const slopesYZ: Array = []; @@ -212,3 +215,29 @@ export function estimate(landmarks) { } return { curls: fingerCurls, directions: fingerDirections }; } + +export function analyze(keypoints) { // get estimations of curl / direction for each finger + if (!keypoints || keypoints.length === 0) return null; + const estimatorRes = estimate(keypoints); + const landmarks = {}; + for (const fingerIdx of Finger.all) { + landmarks[Finger.getName(fingerIdx)] = { + curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]), + direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]), + }; + } + // console.log('finger landmarks', landmarks); + return landmarks; +} + +export function match(keypoints) { // compare gesture description to each known gesture + const poses: Array<{ name: string, confidence: number }> = []; + if (!keypoints || keypoints.length === 0) return poses; + const estimatorRes = estimate(keypoints); + for (const gesture of Gestures) { + const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions); + if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence }); + } + // console.log('finger poses', poses); + return poses; +} diff --git a/src/hand/handtrack.ts b/src/hand/handtrack.ts index d4214cb7..46eb750a 100644 --- a/src/hand/handtrack.ts +++ b/src/hand/handtrack.ts @@ -13,7 +13,7 @@ import type { HandResult, Box, Point } from '../result'; import type { GraphModel, Tensor } from '../tfjs/types'; import type { Config } from '../config'; import { env } from '../util/env'; -import * as fingerPose from '../fingerpose/fingerpose'; +import * as fingerPose from './fingerpose'; import { fakeOps } from '../tfjs/backend'; const boxScaleFact = 1.5; // hand finger model prefers slighly larger box diff --git a/src/handpose/handpose.ts b/src/handpose/handpose.ts index 7fa599a4..7c9b0a5f 100644 --- a/src/handpose/handpose.ts +++ b/src/handpose/handpose.ts @@ -8,7 +8,7 @@ import { log, join } from '../util/util'; import * as tf from '../../dist/tfjs.esm.js'; import * as handdetector from './handdetector'; import * as handpipeline from './handpipeline'; -import * as fingerPose from '../fingerpose/fingerpose'; +import * as fingerPose from '../hand/fingerpose'; import type { HandResult, Box, Point } from '../result'; import type { Tensor, GraphModel } from '../tfjs/types'; import type { Config } from '../config'; diff --git a/src/human.ts b/src/human.ts index 762a2f83..a807c25c 100644 --- a/src/human.ts +++ b/src/human.ts @@ -8,9 +8,9 @@ import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureR import * as tf from '../dist/tfjs.esm.js'; import * as models from './models'; import * as face from './face/face'; -import * as facemesh from './blazeface/facemesh'; +import * as facemesh from './face/facemesh'; import * as faceres from './face/faceres'; -import * as posenet from './posenet/posenet'; +import * as posenet from './body/posenet'; import * as handtrack from './hand/handtrack'; import * as handpose from './handpose/handpose'; // import * as blazepose from './body/blazepose-v1'; @@ -23,7 +23,7 @@ import * as segmentation from './segmentation/segmentation'; import * as gesture from './gesture/gesture'; import * as image from './image/image'; import * as draw from './util/draw'; -import * as persons from './persons'; +import * as persons from './util/persons'; import * as interpolate from './util/interpolate'; import * as env from './util/env'; import * as backend from './tfjs/backend'; diff --git a/src/models.ts b/src/models.ts index ed846bd7..05010bf9 100644 --- a/src/models.ts +++ b/src/models.ts @@ -4,10 +4,12 @@ import { log } from './util/util'; import type { GraphModel } from './tfjs/types'; -import * as facemesh from './blazeface/facemesh'; +import * as blazeface from './face/blazeface'; +import * as facemesh from './face/facemesh'; +import * as iris from './face/iris'; import * as faceres from './face/faceres'; import * as emotion from './gear/emotion'; -import * as posenet from './posenet/posenet'; +import * as posenet from './body/posenet'; import * as handpose from './handpose/handpose'; import * as handtrack from './hand/handtrack'; import * as blazepose from './body/blazepose'; @@ -57,15 +59,13 @@ export function reset(instance: Human) { /** Load method preloads all instance.configured models on-demand */ export async function load(instance: Human) { if (env.initial) reset(instance); - if (instance.config.face.enabled) { // face model is a combo that must be loaded as a whole - if (!instance.models.facedetect) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config); - if (instance.config.face.mesh?.enabled && !instance.models.facemesh) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config); - if (instance.config.face.iris?.enabled && !instance.models.faceiris) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config); - } if (instance.config.hand.enabled) { // handpose model is a combo that must be loaded as a whole if (!instance.models.handpose && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config); if (!instance.models.handskeleton && instance.config.hand.landmarks && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config); } + if (instance.config.face.enabled && !instance.models.facedetect) instance.models.facedetect = blazeface.load(instance.config); + if (instance.config.face.enabled && instance.config.face.mesh?.enabled && !instance.models.facemesh) instance.models.facemesh = facemesh.load(instance.config); + if (instance.config.face.enabled && instance.config.face.iris?.enabled && !instance.models.faceiris) instance.models.faceiris = iris.load(instance.config); if (instance.config.hand.enabled && !instance.models.handtrack && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handtrack = handtrack.loadDetect(instance.config); if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config); if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config); diff --git a/src/persons.ts b/src/util/persons.ts similarity index 98% rename from src/persons.ts rename to src/util/persons.ts index 2529edd2..46c5db08 100644 --- a/src/persons.ts +++ b/src/util/persons.ts @@ -2,7 +2,7 @@ * Analyze detection Results and sort&combine them into per-person view */ -import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from './result'; +import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from '../result'; export function join(faces: Array, bodies: Array, hands: Array, gestures: Array, shape: Array | undefined): Array { let id = 0;