redesign face processing

pull/193/head
Vladimir Mandic 2021-09-28 12:01:48 -04:00
parent 01a3c6802e
commit 9274f42fba
40 changed files with 9976 additions and 6991 deletions

View File

@ -10,11 +10,6 @@
- Evaluate and switch default default model from `handdetect` to `handtrack`
#### Body
- Implement new variations of `BlazePose` models
- Add virtual box frame caching to `MoveNet`
#### Face
- Reimplement `BlazeFace`, `FaceMesh`, `Iris` with new pipeline and frame caching

View File

@ -67,6 +67,9 @@ const drawOptions = {
drawLabels: true,
drawPolygons: true,
drawPoints: false,
fillPolygons: false,
useCurves: false,
useDepth: true,
};
// ui options
@ -105,7 +108,7 @@ const ui = {
lastFrame: 0, // time of last frame processing
viewportSet: false, // internal, has custom viewport been set
background: null, // holds instance of segmentation background image
exceptionHandler: false, // should capture all unhandled exceptions
exceptionHandler: true, // should capture all unhandled exceptions
// webrtc
useWebRTC: false, // use webrtc as camera source instead of local webcam
@ -684,13 +687,13 @@ function setupMenu() {
setupCamera();
});
menu.display.addHTML('<hr style="border-style: inset; border-color: dimgray">');
menu.display.addBool('use depth', human.draw.options, 'useDepth');
menu.display.addBool('use curves', human.draw.options, 'useCurves');
menu.display.addBool('print labels', human.draw.options, 'drawLabels');
menu.display.addBool('draw points', human.draw.options, 'drawPoints');
menu.display.addBool('draw boxes', human.draw.options, 'drawBoxes');
menu.display.addBool('draw polygons', human.draw.options, 'drawPolygons');
menu.display.addBool('fill polygons', human.draw.options, 'fillPolygons');
menu.display.addBool('use depth', drawOptions, 'useDepth');
menu.display.addBool('use curves', drawOptions, 'useCurves');
menu.display.addBool('print labels', drawOptions, 'drawLabels');
menu.display.addBool('draw points', drawOptions, 'drawPoints');
menu.display.addBool('draw boxes', drawOptions, 'drawBoxes');
menu.display.addBool('draw polygons', drawOptions, 'drawPolygons');
menu.display.addBool('fill polygons', drawOptions, 'fillPolygons');
menu.image = new Menu(document.body, '', { top, left: x[1] });
menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val);

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

2682
dist/human.esm.js vendored

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

560
dist/human.js vendored

File diff suppressed because one or more lines are too long

2010
dist/human.node-gpu.js vendored

File diff suppressed because it is too large Load Diff

2010
dist/human.node-wasm.js vendored

File diff suppressed because it is too large Load Diff

2010
dist/human.node.js vendored

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"name": "@vladmandic/human",
"version": "2.2.3",
"version": "2.3.0",
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
"sideEffects": false,
"main": "dist/human.node.js",

View File

@ -1,101 +0,0 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
import { log, join, mergeDeep } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as box from './box';
import * as util from './util';
import type { Config } from '../config';
import type { Tensor, GraphModel } from '../tfjs/types';
const keypointsCount = 6;
function decodeBounds(boxOutputs, anchors, inputSize) {
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf.add(boxStarts, anchors);
const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tf.div(boxSizes, inputSize);
const centersNormalized = tf.div(centers, inputSize);
const halfBoxSize = tf.div(boxSizesNormalized, 2);
const starts = tf.sub(centersNormalized, halfBoxSize);
const ends = tf.add(centersNormalized, halfBoxSize);
const startNormalized = tf.mul(starts, inputSize);
const endNormalized = tf.mul(ends, inputSize);
const concatAxis = 1;
return tf.concat2d([startNormalized, endNormalized], concatAxis);
}
export class BlazeFaceModel {
model: GraphModel;
anchorsData: [number, number][];
anchors: Tensor;
inputSize: number;
config: Config;
constructor(model, config: Config) {
this.model = model;
this.anchorsData = util.generateAnchors(model.inputs[0].shape[1]);
this.anchors = tf.tensor2d(this.anchorsData);
this.inputSize = model.inputs[0].shape[2];
this.config = config;
}
async getBoundingBoxes(inputImage: Tensor, userConfig: Config) {
// sanity check on input
if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
const [batch, boxes, scores] = tf.tidy(() => {
const resizedImage = tf.image.resizeBilinear(inputImage, [this.inputSize, this.inputSize]);
const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
const res = this.model.execute(normalizedImage);
let batchOut;
if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
const sorted = res.sort((a, b) => a.size - b.size);
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
const concat = tf.concat([concat512, concat384], 1);
batchOut = tf.squeeze(concat, 0);
} else {
batchOut = tf.squeeze(res); // when using tfhub model
}
const boxesOut = decodeBounds(batchOut, this.anchors, [this.inputSize, this.inputSize]);
const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
return [batchOut, boxesOut, scoresOut];
});
this.config = mergeDeep(this.config, userConfig) as Config;
const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (this.config.face.detector?.maxDetected || 0), (this.config.face.detector?.iouThreshold || 0), (this.config.face.detector?.minConfidence || 0));
const nms = await nmsTensor.array();
tf.dispose(nmsTensor);
const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
const scoresData = await scores.data();
for (let i = 0; i < nms.length; i++) {
const confidence = scoresData[nms[i]];
if (confidence > (this.config.face.detector?.minConfidence || 0)) {
const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
annotatedBoxes.push({ box: box.createBox(boundingBox), landmarks, anchor: this.anchorsData[nms[i]], confidence });
tf.dispose(boundingBox);
}
}
tf.dispose(batch);
tf.dispose(boxes);
tf.dispose(scores);
return {
boxes: annotatedBoxes,
scaleFactor: [inputImage.shape[2] / this.inputSize, inputImage.shape[1] / this.inputSize],
};
}
}
export async function load(config: Config) {
const model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || ''), { fromTFHub: (config.face.detector?.modelPath || '').includes('tfhub.dev') });
const blazeFace = new BlazeFaceModel(model, config);
if (!model || !model.modelUrl) log('load model failed:', config.face.detector?.modelPath || '');
else if (config.debug) log('load model:', model.modelUrl);
return blazeFace;
}

View File

@ -1,75 +0,0 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
import * as tf from '../../dist/tfjs.esm.js';
export function scaleBoxCoordinates(box, factor) {
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
return { startPoint, endPoint };
}
export function getBoxSize(box): [number, number] {
return [
Math.abs(box.endPoint[0] - box.startPoint[0]),
Math.abs(box.endPoint[1] - box.startPoint[1]),
];
}
export function getBoxCenter(box): [number, number] {
return [
box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2,
];
}
export function cutBoxFromImageAndResize(box, image, cropSize) {
const h = image.shape[1];
const w = image.shape[2];
const boxes = [[
box.startPoint[1] / h,
box.startPoint[0] / w,
box.endPoint[1] / h,
box.endPoint[0] / w,
]];
return tf.image.cropAndResize(image, boxes, [0], cropSize);
}
export function enlargeBox(box, factor = 1.5) {
const center = getBoxCenter(box);
const size = getBoxSize(box);
const newHalfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]];
const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]];
return { startPoint, endPoint, landmarks: box.landmarks };
}
export function squarifyBox(box) {
const centers = getBoxCenter(box);
const size = getBoxSize(box);
const maxEdge = Math.max(...size);
const halfSize = maxEdge / 2;
const startPoint = [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)];
const endPoint = [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)];
return { startPoint, endPoint, landmarks: box.landmarks };
}
export function calculateLandmarksBoundingBox(landmarks) {
const xs = landmarks.map((d) => d[0]);
const ys = landmarks.map((d) => d[1]);
const startPoint = [Math.min(...xs), Math.min(...ys)];
const endPoint = [Math.max(...xs), Math.max(...ys)];
return { startPoint, endPoint, landmarks };
}
export const disposeBox = (t) => {
tf.dispose(t.startPoint);
tf.dispose(t.endPoint);
};
export const createBox = (startEndTensor) => ({
startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
});

View File

@ -1,92 +0,0 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
*
* Based on:
* - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
* - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
* - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
*/
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as blazeface from './blazeface';
import * as facepipeline from './facepipeline';
import * as coords from './coords';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { FaceResult, Box } from '../result';
import type { Config } from '../config';
import { env } from '../util/env';
let faceModels: [blazeface.BlazeFaceModel | null, GraphModel | null, GraphModel | null] = [null, null, null];
let facePipeline;
export async function predict(input: Tensor, config: Config): Promise<FaceResult[]> {
const predictions = await facePipeline.predict(input, config);
const results: Array<FaceResult> = [];
let id = 0;
for (const prediction of (predictions || [])) {
if (!prediction || prediction.isDisposedInternal) continue; // guard against disposed tensors on long running operations such as pause in middle of processing
const meshRaw = prediction.mesh.map((pt) => [
pt[0] / (input.shape[2] || 0),
pt[1] / (input.shape[1] || 0),
pt[2] / facePipeline.meshSize,
]);
const annotations = {};
if (prediction.mesh && prediction.mesh.length > 0) {
for (const key of Object.keys(coords.MESH_ANNOTATIONS)) annotations[key] = coords.MESH_ANNOTATIONS[key].map((index) => prediction.mesh[index]);
}
const clampedBox: Box = prediction.box ? [
Math.trunc(Math.max(0, prediction.box.startPoint[0])),
Math.trunc(Math.max(0, prediction.box.startPoint[1])),
Math.trunc(Math.min((input.shape[2] || 0), prediction.box.endPoint[0]) - Math.max(0, prediction.box.startPoint[0])),
Math.trunc(Math.min((input.shape[1] || 0), prediction.box.endPoint[1]) - Math.max(0, prediction.box.startPoint[1])),
] : [0, 0, 0, 0];
const boxRaw: Box = prediction.box ? [
prediction.box.startPoint[0] / (input.shape[2] || 0),
prediction.box.startPoint[1] / (input.shape[1] || 0),
(prediction.box.endPoint[0] - prediction.box.startPoint[0]) / (input.shape[2] || 0),
(prediction.box.endPoint[1] - prediction.box.startPoint[1]) / (input.shape[1] || 0),
] : [0, 0, 0, 0];
results.push({
id: id++,
score: Math.round(100 * prediction.faceConfidence || 100 * prediction.boxConfidence || 0) / 100,
boxScore: Math.round(100 * prediction.boxConfidence) / 100,
faceScore: Math.round(100 * prediction.faceConfidence) / 100,
box: clampedBox,
boxRaw,
mesh: prediction.mesh,
meshRaw,
annotations,
tensor: prediction.image,
});
}
return results;
}
export async function load(config): Promise<[GraphModel | null, GraphModel | null, GraphModel | null]> {
if (env.initial) faceModels = [null, null, null];
if ((!faceModels[0] && config.face.enabled) || (!faceModels[1] && config.face.mesh.enabled) || (!faceModels[2] && config.face.iris.enabled) || env.initial) {
faceModels = await Promise.all([
(!faceModels[0] && config.face.enabled) ? blazeface.load(config) : null,
(!faceModels[1] && config.face.mesh.enabled) ? tf.loadGraphModel(join(config.modelBasePath, config.face.mesh.modelPath), { fromTFHub: config.face.mesh.modelPath.includes('tfhub.dev') }) as unknown as GraphModel : null,
(!faceModels[2] && config.face.iris.enabled) ? tf.loadGraphModel(join(config.modelBasePath, config.face.iris.modelPath), { fromTFHub: config.face.iris.modelPath.includes('tfhub.dev') }) as unknown as GraphModel : null,
]);
if (config.face.mesh.enabled) {
if (!faceModels[1] || !faceModels[1]['modelUrl']) log('load model failed:', config.face.mesh.modelPath);
else if (config.debug) log('load model:', faceModels[1]['modelUrl']);
}
if (config.face.iris.enabled) {
if (!faceModels[2] || !faceModels[2]['modelUrl']) log('load model failed:', config.face.iris.modelPath);
else if (config.debug) log('load model:', faceModels[2]['modelUrl']);
}
} else if (config.debug) {
if (faceModels[0]) log('cached model:', faceModels[0].model['modelUrl']);
if (faceModels[1]) log('cached model:', faceModels[1]['modelUrl']);
if (faceModels[2]) log('cached model:', faceModels[2]['modelUrl']);
}
facePipeline = new facepipeline.Pipeline(faceModels[0], faceModels[1], faceModels[2]);
return [faceModels[0]?.model || null, faceModels[1], faceModels[2]];
}
export const triangulation = coords.TRI468;
export const uvmap = coords.UV468;

View File

@ -1,346 +0,0 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
import * as tf from '../../dist/tfjs.esm.js';
import * as bounding from './box';
import * as util from './util';
import * as coords from './coords';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { BlazeFaceModel } from './blazeface';
import { env } from '../util/env';
import { log } from '../util/util';
import type { Point } from '../result';
const leftOutline = coords.MESH_ANNOTATIONS['leftEyeLower0'];
const rightOutline = coords.MESH_ANNOTATIONS['rightEyeLower0'];
const eyeLandmarks = {
leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
};
const meshLandmarks = {
count: 468,
mouth: 13,
symmetryLine: [13, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]],
};
const blazeFaceLandmarks = {
leftEye: 0,
rightEye: 1,
nose: 2,
mouth: 3,
leftEar: 4,
rightEar: 5,
symmetryLine: [3, 2],
};
const irisLandmarks = {
upperCenter: 3,
lowerCenter: 4,
index: 71,
numCoordinates: 76,
};
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
// Update the z coordinate to be an average of the original and the new.
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = coords.MESH_ANNOTATIONS[`${prefix}${key}`];
if (!keys || keys.includes(key)) {
for (let j = 0; j < indices.length; j++) {
const index = indices[j];
rawCoords[originalIndices[j]] = [
newCoords[index][0], newCoords[index][1],
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
];
}
}
}
}
// The Pipeline coordinates between the bounding box and skeleton models.
export class Pipeline {
storedBoxes: Array<{ startPoint: number[], endPoint: number[], landmarks: Array<number>, confidence: number, faceConfidence?: number | undefined }>;
boundingBoxDetector: BlazeFaceModel; // tf.GraphModel
meshDetector: GraphModel; // tf.GraphModel
irisModel: GraphModel; // tf.GraphModel
boxSize: number;
meshSize: number;
irisSize: number;
irisEnlarge: number;
skipped: number;
detectedFaces: number;
constructor(boundingBoxDetector, meshDetector, irisModel) {
// An array of facial bounding boxes.
this.storedBoxes = [];
this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector;
this.irisModel = irisModel;
this.boxSize = boundingBoxDetector?.model?.inputs[0].shape[2] || 0;
this.meshSize = meshDetector?.inputs[0].shape[2] || boundingBoxDetector?.model?.inputs[0].shape[2];
this.irisSize = irisModel?.inputs[0].shape[1] || 0;
this.irisEnlarge = 2.3;
this.skipped = 0;
this.detectedFaces = 0;
}
transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
const coordsScaled = rawCoords.map((coord) => ([
boxSize[0] / this.meshSize * (coord[0] - this.meshSize / 2),
boxSize[1] / this.meshSize * (coord[1] - this.meshSize / 2),
coord[2],
]));
const coordsRotationMatrix = (angle !== 0) ? util.buildRotationMatrix(angle, [0, 0]) : util.IDENTITY_MATRIX;
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
const inverseRotationMatrix = (angle !== 0) ? util.invertTransformMatrix(rotationMatrix) : util.IDENTITY_MATRIX;
const boxCenter = [...bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
return coordsRotated.map((coord) => ([
Math.round(coord[0] + util.dot(boxCenter, inverseRotationMatrix[0])),
Math.round(coord[1] + util.dot(boxCenter, inverseRotationMatrix[1])),
Math.round(coord[2]),
]));
}
// eslint-disable-next-line class-methods-use-this
getLeftToRightEyeDepthDifference(rawCoords) {
const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
return leftEyeZ - rightEyeZ;
}
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(bounding.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / this.meshSize,
box.startPoint[0] / this.meshSize, box.endPoint[1] / this.meshSize,
box.endPoint[0] / this.meshSize,
]], [0], [this.irisSize, this.irisSize]);
if (flip && env.kernels.includes('flipleftright')) {
const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
tf.dispose(crop);
crop = flipped;
}
return { box, boxSize, crop };
}
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
const eyeRawCoords: Array<Point> = [];
for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
const x = eyeData[i * 3];
const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2];
eyeRawCoords.push([
(flip ? (1 - (x / this.irisSize)) : (x / this.irisSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
(y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
]);
}
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
}
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
// eslint-disable-next-line class-methods-use-this
getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
return irisCoords.map((coord, i) => {
let z = averageZ;
if (i === 2) {
z = upperCenterZ;
} else if (i === 4) {
z = lowerCenterZ;
}
return [coord[0], coord[1], z];
});
}
correctFaceRotation(config, box, input) {
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
const angle: number = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
const faceCenter: Point = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
const rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
const cut = config.face.mesh.enabled
? bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [this.meshSize, this.meshSize])
: bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [this.boxSize, this.boxSize]);
const face = tf.div(cut, 255);
tf.dispose(cut);
tf.dispose(rotated);
return [angle, rotationMatrix, face];
}
async augmentIris(rawCoords, face, config) {
if (!this.irisModel) {
if (config.debug) log('face mesh detection requested, but model is not loaded');
return rawCoords;
}
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true);
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1]);
const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
tf.dispose(leftEyeCrop);
tf.dispose(rightEyeCrop);
const eyePredictions = this.irisModel.predict(combined) as Tensor;
tf.dispose(combined);
const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
tf.dispose(eyePredictions);
const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
// So we only update a single contour line above and below the eye.
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
} else { // User is looking towards the left.
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
}
const adjustedLeftIrisCoords = this.getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
const adjustedRightIrisCoords = this.getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
return newCoords;
}
async predict(input, config) {
let useFreshBox = false;
// run new detector every skipFrames unless we only want box to start with
let detector;
if ((this.skipped === 0) || (this.skipped > config.face.detector.skipFrames) || !config.face.mesh.enabled || !config.skipFrame) {
detector = await this.boundingBoxDetector.getBoundingBoxes(input, config);
this.skipped = 0;
}
if (config.skipFrame) this.skipped++;
// if detector result count doesn't match current working set, use it to reset current working set
if (!config.skipFrame || (detector && detector.boxes && (!config.face.mesh.enabled || (detector.boxes.length !== this.detectedFaces) && (this.detectedFaces !== config.face.detector.maxDetected)))) {
this.storedBoxes = [];
this.detectedFaces = 0;
for (const possible of detector.boxes) {
const startPoint = await possible.box.startPoint.data();
const endPoint = await possible.box.endPoint.data();
const landmarks = await possible.landmarks.array();
this.storedBoxes.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
}
if (this.storedBoxes.length > 0) useFreshBox = true;
}
if (useFreshBox) {
if (!detector || !detector.boxes || (detector.boxes.length === 0)) {
this.storedBoxes = [];
this.detectedFaces = 0;
return null;
}
for (let i = 0; i < this.storedBoxes.length; i++) {
const scaledBox = bounding.scaleBoxCoordinates({ startPoint: this.storedBoxes[i].startPoint, endPoint: this.storedBoxes[i].endPoint }, detector.scaleFactor);
const enlargedBox = bounding.enlargeBox(scaledBox);
const squarifiedBox = bounding.squarifyBox(enlargedBox);
const landmarks = this.storedBoxes[i].landmarks;
const confidence = this.storedBoxes[i].confidence;
this.storedBoxes[i] = { ...squarifiedBox, confidence, landmarks };
}
}
if (detector && detector.boxes) {
detector.boxes.forEach((prediction) => {
tf.dispose(prediction.box.startPoint);
tf.dispose(prediction.box.endPoint);
tf.dispose(prediction.landmarks);
});
}
const results: Array<{ mesh, box, faceConfidence, boxConfidence, confidence, image }> = [];
// for (let i = 0; i < this.storedBoxes.length; i++) {
const newBoxes: Array<{ startPoint: number[]; endPoint: number[]; landmarks: number[]; confidence: number; faceConfidence?: number | undefined; }> = [];
for (let box of this.storedBoxes) {
// let box = this.storedBoxes[i]; // The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
let face;
let angle = 0;
let rotationMatrix;
if (config.face.detector.rotation && config.face.mesh.enabled && env.kernels.includes('rotatewithoffset')) {
[angle, rotationMatrix, face] = this.correctFaceRotation(config, box, input);
} else {
rotationMatrix = util.IDENTITY_MATRIX;
const cloned = input.clone();
const cut = config.face.mesh.enabled
? bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.meshSize, this.meshSize])
: bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.boxSize, this.boxSize]);
face = tf.div(cut, 255);
tf.dispose(cut);
tf.dispose(cloned);
}
// if we're not going to produce mesh, don't spend time with further processing
if (!config.face.mesh.enabled) {
results.push({
mesh: [],
box,
faceConfidence: null,
boxConfidence: box.confidence,
confidence: box.confidence,
image: face,
});
} else if (!this.meshDetector) {
if (config.debug) log('face mesh detection requested, but model is not loaded');
} else {
const [contours, confidence, contourCoords] = this.meshDetector.execute(face) as Array<Tensor>; // The first returned tensor represents facial contours which are already included in the coordinates.
tf.dispose(contours);
const faceConfidence = (await confidence.data())[0] as number; // inside tf.tidy
tf.dispose(confidence);
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
let rawCoords = await coordsReshaped.array();
tf.dispose(contourCoords);
tf.dispose(coordsReshaped);
if (faceConfidence < config.face.detector.minConfidence) {
// if (!this.storedBoxes[i]) console.log('2', i, this.storedBoxes.length, this.storedBoxes[i], box, this.storedBoxes);
// this.storedBoxes[i].confidence = faceConfidence; // reset confidence of cached box
box.confidence = faceConfidence; // reset confidence of cached box
tf.dispose(face);
} else {
if (config.face.iris.enabled) rawCoords = await this.augmentIris(rawCoords, face, config);
// override box from detection with one calculated from mesh
const mesh = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
box = { ...bounding.enlargeBox(bounding.calculateLandmarksBoundingBox(mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
// do rotation one more time with mesh keypoints if we want to return perfect image
if (config.face.detector.rotation && config.face.mesh.enabled && config.face.description.enabled && env.kernels.includes('rotatewithoffset')) {
tf.dispose(face); // we'll overwrite original face
[angle, rotationMatrix, face] = this.correctFaceRotation(config, box, input);
}
results.push({
mesh,
box,
faceConfidence,
boxConfidence: box.confidence,
confidence: faceConfidence,
image: face,
});
// updated stored cache values
// this.storedBoxes[i] = { ...bounding.squarifyBox(box), confidence: box.confidence, faceConfidence };
box = { ...bounding.squarifyBox(box), confidence: box.confidence, faceConfidence };
}
}
newBoxes.push(box);
}
// results = results.filter((a) => a !== null);
// remove cache entries for detected boxes on low confidence
if (config.face.mesh.enabled) this.storedBoxes = newBoxes.filter((a) => a.confidence > config.face.detector.minConfidence);
this.detectedFaces = results.length;
return results;
}
}

View File

@ -1,115 +0,0 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
/**
* Normalizes the provided angle to the range -pi to pi.
* @param angle The angle in radians to be normalized.
*/
export function normalizeRadians(angle) {
return angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
}
/**
* Computes the angle of rotation between two anchor points.
* @param point1 First anchor point
* @param point2 Second anchor point
*/
export function computeRotation(point1, point2) {
const radians = Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]);
return normalizeRadians(radians);
}
export function radToDegrees(rad) {
return rad * 180 / Math.PI;
}
export function buildTranslationMatrix(x, y) {
return [[1, 0, x], [0, 1, y], [0, 0, 1]];
}
export function dot(v1, v2) {
let product = 0;
for (let i = 0; i < v1.length; i++) {
product += v1[i] * v2[i];
}
return product;
}
export function getColumnFrom2DArr(arr, columnIndex) {
const column: Array<number> = [];
for (let i = 0; i < arr.length; i++) {
column.push(arr[i][columnIndex]);
}
return column;
}
export function multiplyTransformMatrices(mat1, mat2) {
const product: Array<number[]> = [];
const size = mat1.length;
for (let row = 0; row < size; row++) {
product.push([]);
for (let col = 0; col < size; col++) {
product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
}
}
return product;
}
export function buildRotationMatrix(rotation, center) {
const cosA = Math.cos(rotation);
const sinA = Math.sin(rotation);
const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
const translationMatrix = buildTranslationMatrix(center[0], center[1]);
const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
}
export function invertTransformMatrix(matrix) {
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
const translationComponent = [matrix[0][2], matrix[1][2]];
const invertedTranslation = [
-dot(rotationComponent[0], translationComponent),
-dot(rotationComponent[1], translationComponent),
];
return [
rotationComponent[0].concat(invertedTranslation[0]),
rotationComponent[1].concat(invertedTranslation[1]),
[0, 0, 1],
];
}
export function rotatePoint(homogeneousCoordinate, rotationMatrix) {
return [
dot(homogeneousCoordinate, rotationMatrix[0]),
dot(homogeneousCoordinate, rotationMatrix[1]),
];
}
export function xyDistanceBetweenPoints(a, b) {
return Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
}
export function generateAnchors(inputSize) {
const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
const anchors: Array<[number, number]> = [];
for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i];
const gridRows = Math.floor((inputSize + stride - 1) / stride);
const gridCols = Math.floor((inputSize + stride - 1) / stride);
const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5);
for (let gridX = 0; gridX < gridCols; gridX++) {
const anchorX = stride * (gridX + 0.5);
for (let n = 0; n < anchorsNum; n++) {
anchors.push([anchorX, anchorY]);
}
}
}
}
return anchors;
}

View File

@ -1,11 +1,19 @@
/**
* PoseNet body detection model implementation
* See `posenet.ts` for entry point
*
* Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
*/
import * as utils from './utils';
import * as kpt from './keypoints';
import type { Box } from '../result';
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import type { BodyResult, Box } from '../result';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
import * as utils from './posenetutils';
let model: GraphModel;
const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
const localMaximumRadius = 1;
const outputStride = 16;
@ -37,11 +45,11 @@ function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacemen
}
const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId);
return { position: targetKeypoint, part: kpt.partNames[targetId], score };
return { position: targetKeypoint, part: utils.partNames[targetId], score };
}
export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) {
const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
const tuples = utils.poseChain.map(([parentJoinName, childJoinName]) => ([utils.partIds[parentJoinName], utils.partIds[childJoinName]]));
const edgesFwd = tuples.map(([, childJointId]) => childJointId);
const edgesBwd = tuples.map(([parentJointId]) => parentJointId);
const numParts = scores.shape[2]; // [21,21,17]
@ -51,7 +59,7 @@ export function decodePose(root, scores, offsets, displacementsFwd, displacement
const rootPoint = utils.getImageCoords(root.part, outputStride, offsets);
keypoints[root.part.id] = {
score: root.score,
part: kpt.partNames[root.part.id],
part: utils.partNames[root.part.id],
position: rootPoint,
};
// Decode the part positions upwards in the tree, following the backward displacements.
@ -146,3 +154,32 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD
}
return poses;
}
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
const res = tf.tidy(() => {
if (!model.inputs[0].shape) return [];
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
const results: Array<Tensor> = model.execute(normalized, poseNetOutputs) as Array<Tensor>;
const results3d = results.map((y) => tf.squeeze(y, [0]));
results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
return results3d;
});
const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
for (const t of res) tf.dispose(t);
const decoded = await decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
if (!model.inputs[0].shape) return [];
const scaled = utils.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
return scaled;
}
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}

View File

@ -3,15 +3,48 @@
* See `posenet.ts` for entry point
*/
import * as kpt from './keypoints';
import type { BodyResult } from '../result';
export const partNames = [
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
];
export const count = partNames.length; // 17 keypoints
export const partIds = partNames.reduce((result, jointName, i) => {
result[jointName] = i;
return result;
}, {});
const connectedPartNames = [
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
];
export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
export const poseChain = [
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
['rightKnee', 'rightAnkle'],
];
export function eitherPointDoesntMeetConfidence(a: number, b: number, minConfidence: number) {
return (a < minConfidence || b < minConfidence);
}
export function getAdjacentKeyPoints(keypoints, minConfidence: number) {
return kpt.connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
return connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
if (eitherPointDoesntMeetConfidence(keypoints[leftJoint].score, keypoints[rightJoint].score, minConfidence)) {
return result;
}
@ -123,7 +156,7 @@ export class MaxHeap {
export function getOffsetPoint(y, x, keypoint, offsets) {
return {
y: offsets.get(y, x, keypoint),
x: offsets.get(y, x, keypoint + kpt.count),
x: offsets.get(y, x, keypoint + count),
};
}

133
src/face/angles.ts Normal file
View File

@ -0,0 +1,133 @@
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
const calculateGaze = (face): { bearing: number, strength: number } => {
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
const eyeRatio = 1; // factor to normalize changes x vs y
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
const irisCenter = left ? face.mesh[473] : face.mesh[468];
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
];
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
return { bearing, strength };
};
export const calculateFaceAngle = (face, imageSize): {
angle: { pitch: number, yaw: number, roll: number },
matrix: [number, number, number, number, number, number, number, number, number],
gaze: { bearing: number, strength: number },
} => {
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
const normalize = (v) => { // normalize vector
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
v[0] /= length;
v[1] /= length;
v[2] /= length;
return v;
};
const subVectors = (a, b) => { // vector subtraction (a - b)
const x = a[0] - b[0];
const y = a[1] - b[1];
const z = a[2] - b[2];
return [x, y, z];
};
const crossVectors = (a, b) => { // vector cross product (a x b)
const x = a[1] * b[2] - a[2] * b[1];
const y = a[2] * b[0] - a[0] * b[2];
const z = a[0] * b[1] - a[1] * b[0];
return [x, y, z];
};
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
const rotationMatrixToEulerAngle = (r) => {
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
let thetaX: number;
let thetaY: number;
let thetaZ: number;
if (r10 < 1) { // YZX calculation
if (r10 > -1) {
thetaZ = Math.asin(r10);
thetaY = Math.atan2(-r20, r00);
thetaX = Math.atan2(-r12, r11);
} else {
thetaZ = -Math.PI / 2;
thetaY = -Math.atan2(r21, r22);
thetaX = 0;
}
} else {
thetaZ = Math.PI / 2;
thetaY = Math.atan2(r21, r22);
thetaX = 0;
}
if (isNaN(thetaX)) thetaX = 0;
if (isNaN(thetaY)) thetaY = 0;
if (isNaN(thetaZ)) thetaZ = 0;
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
};
// simple Euler angle calculation based existing 3D mesh
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const meshToEulerAngle = (mesh) => {
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const angle = {
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
// pitch is face move up/down
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
// yaw is face turn left/right
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
// roll is face lean left/right
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
};
return angle;
};
// initialize gaze and mesh
const mesh = face.meshRaw;
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
// top, bottom, left, right
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
// make the xyz coordinates proportional, independent of the image/box size
pt[0] * imageSize[0] / size,
pt[1] * imageSize[1] / size,
pt[2],
]);
const y_axis = normalize(subVectors(pts[1], pts[0]));
let x_axis = normalize(subVectors(pts[3], pts[2]));
const z_axis = normalize(crossVectors(x_axis, y_axis));
// adjust x_axis to make sure that all axes are perpendicular to each other
x_axis = crossVectors(y_axis, z_axis);
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
const matrix: [number, number, number, number, number, number, number, number, number] = [
x_axis[0], x_axis[1], x_axis[2],
y_axis[0], y_axis[1], y_axis[2],
z_axis[0], z_axis[1], z_axis[2],
];
const angle = rotationMatrixToEulerAngle(matrix);
// const angle = meshToEulerAngle(mesh);
// we have iris keypoints so we can calculate gaze direction
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
return { angle, matrix, gaze };
};

96
src/face/blazeface.ts Normal file
View File

@ -0,0 +1,96 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as util from './facemeshutil';
import type { Config } from '../config';
import type { Tensor, GraphModel } from '../tfjs/types';
import { env } from '../util/env';
const keypointsCount = 6;
let model: GraphModel | null;
let anchorsData: [number, number][] = [];
let anchors: Tensor | null = null;
let inputSize = 0;
// export const size = () => (model && model.inputs[0].shape ? model.inputs[0].shape[2] : 0);
export const size = () => inputSize;
export async function load(config: Config): Promise<GraphModel> {
if (env.initial) model = null;
if (!model) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || '')) as unknown as GraphModel;
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
if (inputSize === -1) inputSize = 64;
anchorsData = util.generateAnchors(inputSize);
anchors = tf.tensor2d(anchorsData);
return model;
}
function decodeBounds(boxOutputs) {
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
const centers = tf.add(boxStarts, anchors);
const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
const boxSizesNormalized = tf.div(boxSizes, inputSize);
const centersNormalized = tf.div(centers, inputSize);
const halfBoxSize = tf.div(boxSizesNormalized, 2);
const starts = tf.sub(centersNormalized, halfBoxSize);
const ends = tf.add(centersNormalized, halfBoxSize);
const startNormalized = tf.mul(starts, inputSize);
const endNormalized = tf.mul(ends, inputSize);
const concatAxis = 1;
return tf.concat2d([startNormalized, endNormalized], concatAxis);
}
export async function getBoxes(inputImage: Tensor, config: Config) {
// sanity check on input
if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
const [batch, boxes, scores] = tf.tidy(() => {
const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
const res = model?.execute(normalizedImage);
let batchOut;
if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
const sorted = res.sort((a, b) => a.size - b.size);
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
const concat = tf.concat([concat512, concat384], 1);
batchOut = tf.squeeze(concat, 0);
} else {
batchOut = tf.squeeze(res); // when using tfhub model
}
const boxesOut = decodeBounds(batchOut);
const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
return [batchOut, boxesOut, scoresOut];
});
const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
const nms = await nmsTensor.array();
tf.dispose(nmsTensor);
const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
const scoresData = await scores.data();
for (let i = 0; i < nms.length; i++) {
const confidence = scoresData[nms[i]];
if (confidence > (config.face.detector?.minConfidence || 0)) {
const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence });
tf.dispose(boundingBox);
}
}
tf.dispose(batch);
tf.dispose(boxes);
tf.dispose(scores);
return {
boxes: annotatedBoxes,
scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize],
};
}

View File

@ -5,145 +5,12 @@
import { log, now } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as facemesh from '../blazeface/facemesh';
import * as facemesh from './facemesh';
import * as emotion from '../gear/emotion';
import * as faceres from './faceres';
import type { FaceResult } from '../result';
import type { Tensor } from '../tfjs/types';
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
const calculateGaze = (face): { bearing: number, strength: number } => {
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
const eyeRatio = 1; // factor to normalize changes x vs y
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
const irisCenter = left ? face.mesh[473] : face.mesh[468];
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
];
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
return { bearing, strength };
};
const calculateFaceAngle = (face, imageSize): {
angle: { pitch: number, yaw: number, roll: number },
matrix: [number, number, number, number, number, number, number, number, number],
gaze: { bearing: number, strength: number },
} => {
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
const normalize = (v) => { // normalize vector
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
v[0] /= length;
v[1] /= length;
v[2] /= length;
return v;
};
const subVectors = (a, b) => { // vector subtraction (a - b)
const x = a[0] - b[0];
const y = a[1] - b[1];
const z = a[2] - b[2];
return [x, y, z];
};
const crossVectors = (a, b) => { // vector cross product (a x b)
const x = a[1] * b[2] - a[2] * b[1];
const y = a[2] * b[0] - a[0] * b[2];
const z = a[0] * b[1] - a[1] * b[0];
return [x, y, z];
};
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
const rotationMatrixToEulerAngle = (r) => {
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
let thetaX: number;
let thetaY: number;
let thetaZ: number;
if (r10 < 1) { // YZX calculation
if (r10 > -1) {
thetaZ = Math.asin(r10);
thetaY = Math.atan2(-r20, r00);
thetaX = Math.atan2(-r12, r11);
} else {
thetaZ = -Math.PI / 2;
thetaY = -Math.atan2(r21, r22);
thetaX = 0;
}
} else {
thetaZ = Math.PI / 2;
thetaY = Math.atan2(r21, r22);
thetaX = 0;
}
if (isNaN(thetaX)) thetaX = 0;
if (isNaN(thetaY)) thetaY = 0;
if (isNaN(thetaZ)) thetaZ = 0;
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
};
// simple Euler angle calculation based existing 3D mesh
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const meshToEulerAngle = (mesh) => {
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
const angle = {
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
// pitch is face move up/down
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
// yaw is face turn left/right
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
// roll is face lean left/right
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
};
return angle;
};
// initialize gaze and mesh
const mesh = face.meshRaw;
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
// top, bottom, left, right
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
// make the xyz coordinates proportional, independent of the image/box size
pt[0] * imageSize[0] / size,
pt[1] * imageSize[1] / size,
pt[2],
]);
const y_axis = normalize(subVectors(pts[1], pts[0]));
let x_axis = normalize(subVectors(pts[3], pts[2]));
const z_axis = normalize(crossVectors(x_axis, y_axis));
// adjust x_axis to make sure that all axes are perpendicular to each other
x_axis = crossVectors(y_axis, z_axis);
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
const matrix: [number, number, number, number, number, number, number, number, number] = [
x_axis[0], x_axis[1], x_axis[2],
y_axis[0], y_axis[1], y_axis[2],
z_axis[0], z_axis[1], z_axis[2],
];
const angle = rotationMatrixToEulerAngle(matrix);
// const angle = meshToEulerAngle(mesh);
// we have iris keypoints so we can calculate gaze direction
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
return { angle, matrix, gaze };
};
import { calculateFaceAngle } from './angles';
export const detectFace = async (parent /* instance of human */, input: Tensor): Promise<FaceResult[]> => {
// run facemesh, includes blazeface and iris
@ -158,6 +25,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
const faceRes: Array<FaceResult> = [];
parent.state = 'run:face';
timeStamp = now();
const faces = await facemesh.predict(input, parent.config);
parent.performance.face = Math.trunc(now() - timeStamp);
if (!input.shape || input.shape.length !== 4) return [];
@ -226,7 +94,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
delete faces[i].annotations.leftEyeIris;
delete faces[i].annotations.rightEyeIris;
}
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.rightEyeIris
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.leftEyeIris[0] && faces[i].annotations.rightEyeIris && faces[i].annotations.rightEyeIris[0]
&& (faces[i].annotations.leftEyeIris.length > 0) && (faces[i].annotations.rightEyeIris.length > 0)
&& (faces[i].annotations.leftEyeIris[0] !== null) && (faces[i].annotations.rightEyeIris[0] !== null))
? Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1])) / input.shape[2]

139
src/face/facemesh.ts Normal file
View File

@ -0,0 +1,139 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
*
* Based on:
* - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
* - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
* - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
*/
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as blazeface from './blazeface';
import * as util from './facemeshutil';
import * as coords from './facemeshcoords';
import * as iris from './iris';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { FaceResult, Point } from '../result';
import type { Config } from '../config';
import { env } from '../util/env';
type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array<Point>, confidence: number, faceConfidence?: number | undefined };
let boxCache: Array<BoxCache> = [];
let model: GraphModel | null = null;
let inputSize = 0;
let skipped = Number.MAX_SAFE_INTEGER;
let detectedFaces = 0;
export async function predict(input: Tensor, config: Config): Promise<FaceResult[]> {
if (!config.skipFrame || (((detectedFaces !== config.face.detector?.maxDetected) || !config.face.mesh?.enabled)) && (skipped > (config.face.detector?.skipFrames || 0))) { // reset cached boxes
const newBoxes = await blazeface.getBoxes(input, config); // get results from blazeface detector
boxCache = []; // empty cache
for (const possible of newBoxes.boxes) { // extract data from detector
const startPoint = await possible.box.startPoint.data() as unknown as Point;
const endPoint = await possible.box.endPoint.data() as unknown as Point;
const landmarks = await possible.landmarks.array() as Array<Point>;
boxCache.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
}
newBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks]));
for (let i = 0; i < boxCache.length; i++) { // enlarge and squarify detected boxes
const scaledBox = util.scaleBoxCoordinates({ startPoint: boxCache[i].startPoint, endPoint: boxCache[i].endPoint }, newBoxes.scaleFactor);
const enlargedBox = util.enlargeBox(scaledBox);
const squarifiedBox = util.squarifyBox(enlargedBox);
boxCache[i] = { ...squarifiedBox, confidence: boxCache[i].confidence, landmarks: boxCache[i].landmarks };
}
skipped = 0;
} else {
skipped++;
}
const faces: Array<FaceResult> = [];
const newBoxes: Array<BoxCache> = [];
let id = 0;
for (let box of boxCache) {
let angle = 0;
let rotationMatrix;
const face: FaceResult = {
id: id++,
mesh: [],
meshRaw: [],
box: [0, 0, 0, 0],
boxRaw: [0, 0, 0, 0],
score: 0,
boxScore: 0,
faceScore: 0,
annotations: {},
};
if (config.face.detector?.rotation && config.face.mesh?.enabled && env.kernels.includes('rotatewithoffset')) {
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
} else {
rotationMatrix = util.IDENTITY_MATRIX;
const cut = util.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, input, config.face.mesh?.enabled ? [inputSize, inputSize] : [blazeface.size(), blazeface.size()]);
face.tensor = tf.div(cut, 255);
tf.dispose(cut);
}
face.boxScore = Math.round(100 * box.confidence) / 100;
if (!config.face.mesh?.enabled) { // mesh not enabled, return resuts from detector only
face.box = util.getClampedBox(box, input);
face.boxRaw = util.getRawBox(box, input);
face.score = Math.round(100 * box.confidence || 0) / 100;
face.mesh = box.landmarks.map((pt) => [
((box.startPoint[0] + box.endPoint[0])) / 2 + ((box.endPoint[0] + box.startPoint[0]) * pt[0] / blazeface.size()),
((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()),
]);
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
} else if (!model) { // mesh enabled, but not loaded
if (config.debug) log('face mesh detection requested, but model is not loaded');
} else { // mesh enabled
const [contours, confidence, contourCoords] = model.execute(face.tensor as Tensor) as Array<Tensor>; // first returned tensor represents facial contours which are already included in the coordinates.
tf.dispose(contours);
const faceConfidence = (await confidence.data())[0] as number;
tf.dispose(confidence);
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
let rawCoords = await coordsReshaped.array();
tf.dispose(contourCoords);
tf.dispose(coordsReshaped);
if (faceConfidence < (config.face.detector?.minConfidence || 1)) {
box.confidence = faceConfidence; // reset confidence of cached box
tf.dispose(face.tensor);
} else {
if (config.face.iris?.enabled) rawCoords = await iris.augmentIris(rawCoords, face.tensor, config, inputSize); // augment results with iris
face.mesh = util.transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize); // get processed mesh
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
box = { ...util.enlargeBox(util.calculateLandmarksBoundingBox(face.mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
for (const key of Object.keys(coords.meshAnnotations)) face.annotations[key] = coords.meshAnnotations[key].map((index) => face.mesh[index]); // add annotations
if (config.face.detector?.rotation && config.face.mesh.enabled && config.face.description?.enabled && env.kernels.includes('rotatewithoffset')) { // do rotation one more time with mesh keypoints if we want to return perfect image
tf.dispose(face.tensor); // dispose so we can overwrite original face
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
}
face.box = util.getClampedBox(box, input); // update detected box with box around the face mesh
face.boxRaw = util.getRawBox(box, input);
face.score = Math.round(100 * faceConfidence || 100 * box.confidence || 0) / 100;
face.faceScore = Math.round(100 * faceConfidence) / 100;
box = { ...util.squarifyBox(box), confidence: box.confidence, faceConfidence }; // updated stored cache values
}
}
faces.push(face);
newBoxes.push(box);
}
if (config.face.mesh?.enabled) boxCache = newBoxes.filter((a) => a.confidence > (config.face.detector?.minConfidence || 0)); // remove cache entries for detected boxes on low confidence
detectedFaces = faces.length;
return faces;
}
export async function load(config: Config): Promise<GraphModel> {
if (env.initial) model = null;
if (!model) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.mesh?.modelPath || '')) as unknown as GraphModel;
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
if (inputSize === -1) inputSize = 64;
return model;
}
export const triangulation = coords.TRI468;
export const uvmap = coords.UV468;

View File

@ -3,7 +3,7 @@
* See `facemesh.ts` for entry point
*/
export const MESH_ANNOTATIONS = {
export const meshAnnotations = {
silhouette: [
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
@ -42,6 +42,22 @@ export const MESH_ANNOTATIONS = {
leftCheek: [425],
};
export const meshLandmarks = {
count: 468,
mouth: 13,
symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
};
export const blazeFaceLandmarks = {
leftEye: 0,
rightEye: 1,
nose: 2,
mouth: 3,
leftEar: 4,
rightEar: 5,
symmetryLine: [3, 2],
};
export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },

166
src/face/facemeshutil.ts Normal file
View File

@ -0,0 +1,166 @@
/**
* BlazeFace, FaceMesh & Iris model implementation
* See `facemesh.ts` for entry point
*/
import * as tf from '../../dist/tfjs.esm.js';
import * as coords from './facemeshcoords';
import type { Box, Point } from '../result';
export const createBox = (startEndTensor) => ({ startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]), endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]) });
export const disposeBox = (t) => tf.dispose([t.startPoint, t.endPoint]);
export const getBoxSize = (box): [number, number] => [Math.abs(box.endPoint[0] - box.startPoint[0]), Math.abs(box.endPoint[1] - box.startPoint[1])];
export const getBoxCenter = (box): [number, number] => [box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2];
export const getClampedBox = (box, input): Box => (box ? [
Math.trunc(Math.max(0, box.startPoint[0])),
Math.trunc(Math.max(0, box.startPoint[1])),
Math.trunc(Math.min((input.shape[2] || 0), box.endPoint[0]) - Math.max(0, box.startPoint[0])),
Math.trunc(Math.min((input.shape[1] || 0), box.endPoint[1]) - Math.max(0, box.startPoint[1])),
] : [0, 0, 0, 0]);
export const getRawBox = (box, input): Box => (box ? [
box.startPoint[0] / (input.shape[2] || 0),
box.startPoint[1] / (input.shape[1] || 0),
(box.endPoint[0] - box.startPoint[0]) / (input.shape[2] || 0),
(box.endPoint[1] - box.startPoint[1]) / (input.shape[1] || 0),
] : [0, 0, 0, 0]);
export const scaleBoxCoordinates = (box, factor) => {
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
return { startPoint, endPoint };
};
export const cutBoxFromImageAndResize = (box, image, cropSize) => {
const h = image.shape[1];
const w = image.shape[2];
return tf.image.cropAndResize(image, [[box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, box.endPoint[0] / w]], [0], cropSize);
};
export const enlargeBox = (box, factor = 1.5) => {
const center = getBoxCenter(box);
const size = getBoxSize(box);
const halfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
return { startPoint: [center[0] - halfSize[0], center[1] - halfSize[1]] as Point, endPoint: [center[0] + halfSize[0], center[1] + halfSize[1]] as Point, landmarks: box.landmarks };
};
export const squarifyBox = (box) => {
const centers = getBoxCenter(box);
const size = getBoxSize(box);
const halfSize = Math.max(...size) / 2;
return { startPoint: [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)] as Point, endPoint: [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)] as Point, landmarks: box.landmarks };
};
export const calculateLandmarksBoundingBox = (landmarks) => {
const xs = landmarks.map((d) => d[0]);
const ys = landmarks.map((d) => d[1]);
return { startPoint: [Math.min(...xs), Math.min(...ys)], endPoint: [Math.max(...xs), Math.max(...ys)], landmarks };
};
export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
export const normalizeRadians = (angle) => angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
export const computeRotation = (point1, point2) => normalizeRadians(Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]));
export const radToDegrees = (rad) => rad * 180 / Math.PI;
export const buildTranslationMatrix = (x, y) => [[1, 0, x], [0, 1, y], [0, 0, 1]];
export const dot = (v1, v2) => {
let product = 0;
for (let i = 0; i < v1.length; i++) product += v1[i] * v2[i];
return product;
};
export const getColumnFrom2DArr = (arr, columnIndex) => {
const column: Array<number> = [];
for (let i = 0; i < arr.length; i++) column.push(arr[i][columnIndex]);
return column;
};
export const multiplyTransformMatrices = (mat1, mat2) => {
const product: Array<number[]> = [];
const size = mat1.length;
for (let row = 0; row < size; row++) {
product.push([]);
for (let col = 0; col < size; col++) product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
}
return product;
};
export const buildRotationMatrix = (rotation, center) => {
const cosA = Math.cos(rotation);
const sinA = Math.sin(rotation);
const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
const translationMatrix = buildTranslationMatrix(center[0], center[1]);
const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
};
export const invertTransformMatrix = (matrix) => {
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
const translationComponent = [matrix[0][2], matrix[1][2]];
const invertedTranslation = [-dot(rotationComponent[0], translationComponent), -dot(rotationComponent[1], translationComponent)];
return [rotationComponent[0].concat(invertedTranslation[0]), rotationComponent[1].concat(invertedTranslation[1]), [0, 0, 1]];
};
export const rotatePoint = (homogeneousCoordinate, rotationMatrix) => [dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[1])];
export const xyDistanceBetweenPoints = (a, b) => Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
export function generateAnchors(inputSize) {
const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
const anchors: Array<[number, number]> = [];
for (let i = 0; i < spec.strides.length; i++) {
const stride = spec.strides[i];
const gridRows = Math.floor((inputSize + stride - 1) / stride);
const gridCols = Math.floor((inputSize + stride - 1) / stride);
const anchorsNum = spec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5);
for (let gridX = 0; gridX < gridCols; gridX++) {
const anchorX = stride * (gridX + 0.5);
for (let n = 0; n < anchorsNum; n++) anchors.push([anchorX, anchorY]);
}
}
}
return anchors;
}
export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize) {
const boxSize = getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
const coordsScaled = rawCoords.map((coord) => ([
boxSize[0] / inputSize * (coord[0] - inputSize / 2),
boxSize[1] / inputSize * (coord[1] - inputSize / 2),
coord[2] || 0,
]));
const coordsRotationMatrix = (angle !== 0) ? buildRotationMatrix(angle, [0, 0]) : IDENTITY_MATRIX;
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
const inverseRotationMatrix = (angle !== 0) ? invertTransformMatrix(rotationMatrix) : IDENTITY_MATRIX;
const boxCenter = [...getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
return coordsRotated.map((coord) => ([
Math.round(coord[0] + dot(boxCenter, inverseRotationMatrix[0])),
Math.round(coord[1] + dot(boxCenter, inverseRotationMatrix[1])),
Math.round(coord[2] || 0),
]));
}
export function correctFaceRotation(box, input, inputSize) {
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
const rotationMatrix = buildRotationMatrix(-angle, faceCenter);
const cut = cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [inputSize, inputSize]);
const face = tf.div(cut, 255);
tf.dispose(cut);
tf.dispose(rotated);
return [angle, rotationMatrix, face];
}

150
src/face/iris.ts Normal file
View File

@ -0,0 +1,150 @@
import * as coords from './facemeshcoords';
import * as util from './facemeshutil';
import * as tf from '../../dist/tfjs.esm.js';
import type { Tensor, GraphModel } from '../tfjs/types';
import { env } from '../util/env';
import { log, join } from '../util/util';
import type { Config } from '../config';
import type { Point } from '../result';
let model: GraphModel | null;
let inputSize = 0;
const irisEnlarge = 2.3;
const leftOutline = coords.meshAnnotations['leftEyeLower0'];
const rightOutline = coords.meshAnnotations['rightEyeLower0'];
const eyeLandmarks = {
leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
};
const irisLandmarks = {
upperCenter: 3,
lowerCenter: 4,
index: 71,
numCoordinates: 76,
};
export async function load(config: Config): Promise<GraphModel> {
if (env.initial) model = null;
if (!model) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.iris?.modelPath || '')) as unknown as GraphModel;
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
if (inputSize === -1) inputSize = 64;
return model;
}
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
// Update the z coordinate to be an average of the original and the new.
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = coords.meshAnnotations[`${prefix}${key}`];
if (!keys || keys.includes(key)) {
for (let j = 0; j < indices.length; j++) {
const index = indices[j];
rawCoords[originalIndices[j]] = [
newCoords[index][0], newCoords[index][1],
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
];
}
}
}
}
// eslint-disable-next-line class-methods-use-this
export const getLeftToRightEyeDepthDifference = (rawCoords) => {
const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
return leftEyeZ - rightEyeZ;
};
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
export const getEyeBox = (rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false, meshSize) => {
const box = util.squarifyBox(util.enlargeBox(util.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), irisEnlarge));
const boxSize = util.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / meshSize,
box.startPoint[0] / meshSize, box.endPoint[1] / meshSize,
box.endPoint[0] / meshSize,
]], [0], [inputSize, inputSize]);
if (flip && env.kernels.includes('flipleftright')) {
const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
tf.dispose(crop);
crop = flipped;
}
return { box, boxSize, crop };
};
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
export const getEyeCoords = (eyeData, eyeBox, eyeBoxSize, flip = false) => {
const eyeRawCoords: Array<Point> = [];
for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
const x = eyeData[i * 3];
const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2];
eyeRawCoords.push([
(flip ? (1 - (x / inputSize)) : (x / inputSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
(y / inputSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
]);
}
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
};
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
// eslint-disable-next-line class-methods-use-this
export const getAdjustedIrisCoords = (rawCoords, irisCoords, direction) => {
const upperCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
const lowerCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
return irisCoords.map((coord, i) => {
let z = averageZ;
if (i === 2) {
z = upperCenterZ;
} else if (i === 4) {
z = lowerCenterZ;
}
return [coord[0], coord[1], z];
});
};
export async function augmentIris(rawCoords, face, config, meshSize) {
if (!model) {
if (config.debug) log('face mesh iris detection requested, but model is not loaded');
return rawCoords;
}
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true, meshSize);
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1], true, meshSize);
const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
tf.dispose(leftEyeCrop);
tf.dispose(rightEyeCrop);
const eyePredictions = model.predict(combined) as Tensor;
tf.dispose(combined);
const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
tf.dispose(eyePredictions);
const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
const leftToRightEyeDepthDifference = getLeftToRightEyeDepthDifference(rawCoords);
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
// So we only update a single contour line above and below the eye.
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
} else { // User is looking towards the left.
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
}
const adjustedLeftIrisCoords = getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
const adjustedRightIrisCoords = getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
return newCoords;
}

View File

@ -1,53 +0,0 @@
/**
* FingerPose algorithm implementation constants
* See `fingerpose.ts` for entry point
*/
const Finger = {
thumb: 0,
index: 1,
middle: 2,
ring: 3,
pinky: 4,
all: [0, 1, 2, 3, 4], // just for convenience
nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
// Describes mapping of joints based on the 21 points returned by handpose.
// [0] Palm
// [1-4] Thumb
// [5-8] Index
// [9-12] Middle
// [13-16] Ring
// [17-20] Pinky
pointsMapping: {
0: [[0, 1], [1, 2], [2, 3], [3, 4]],
1: [[0, 5], [5, 6], [6, 7], [7, 8]],
2: [[0, 9], [9, 10], [10, 11], [11, 12]],
3: [[0, 13], [13, 14], [14, 15], [15, 16]],
4: [[0, 17], [17, 18], [18, 19], [19, 20]],
},
getName: (value) => Finger.nameMapping[value],
getPoints: (value) => Finger.pointsMapping[value],
};
const FingerCurl = {
none: 0,
half: 1,
full: 2,
nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
getName: (value) => FingerCurl.nameMapping[value],
};
const FingerDirection = {
verticalUp: 0,
verticalDown: 1,
horizontalLeft: 2,
horizontalRight: 3,
diagonalUpRight: 4,
diagonalUpLeft: 5,
diagonalDownRight: 6,
diagonalDownLeft: 7,
nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
getName: (value) => FingerDirection.nameMapping[value],
};
export { Finger, FingerCurl, FingerDirection };

View File

@ -1,37 +0,0 @@
/**
* FingerPose algorithm implementation constants
*
* Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
*/
import * as estimator from './estimator';
import { Finger, FingerCurl, FingerDirection } from './description';
import Gestures from './gestures';
const minConfidence = 0.7;
export function analyze(keypoints) { // get estimations of curl / direction for each finger
if (!keypoints || keypoints.length === 0) return null;
const estimatorRes = estimator.estimate(keypoints);
const landmarks = {};
for (const fingerIdx of Finger.all) {
landmarks[Finger.getName(fingerIdx)] = {
curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
};
}
// console.log('finger landmarks', landmarks);
return landmarks;
}
export function match(keypoints) { // compare gesture description to each known gesture
const poses: Array<{ name: string, confidence: number }> = [];
if (!keypoints || keypoints.length === 0) return poses;
const estimatorRes = estimator.estimate(keypoints);
for (const gesture of Gestures) {
const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
}
// console.log('finger poses', poses);
return poses;
}

View File

@ -3,7 +3,7 @@
*/
import type { GestureResult } from '../result';
import * as fingerPose from '../fingerpose/fingerpose';
import * as fingerPose from '../hand/fingerpose';
/**
* @typedef FaceGesture
@ -63,7 +63,7 @@ export const face = (res): GestureResult[] => {
if (!res) return [];
const gestures: Array<{ face: number, gesture: FaceGesture }> = [];
for (let i = 0; i < res.length; i++) {
if (res[i].mesh && res[i].mesh.length > 0) {
if (res[i].mesh && res[i].mesh.length > 450) {
const eyeFacing = res[i].mesh[33][2] - res[i].mesh[263][2];
if (Math.abs(eyeFacing) < 10) gestures.push({ face: i, gesture: 'facing center' });
else gestures.push({ face: i, gesture: `facing ${eyeFacing < 0 ? 'left' : 'right'}` });
@ -84,7 +84,7 @@ export const iris = (res): GestureResult[] => {
if (!res) return [];
const gestures: Array<{ iris: number, gesture: IrisGesture }> = [];
for (let i = 0; i < res.length; i++) {
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.rightEyeIris) continue;
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.leftEyeIris[0] || !res[i].annotations.rightEyeIris || !res[i].annotations.rightEyeIris[0]) continue;
const sizeXLeft = res[i].annotations.leftEyeIris[3][0] - res[i].annotations.leftEyeIris[1][0];
const sizeYLeft = res[i].annotations.leftEyeIris[4][1] - res[i].annotations.leftEyeIris[2][1];
const areaLeft = Math.abs(sizeXLeft * sizeYLeft);

View File

@ -3,7 +3,54 @@
* See `fingerpose.ts` for entry point
*/
export default class Gesture {
export const Finger = {
thumb: 0,
index: 1,
middle: 2,
ring: 3,
pinky: 4,
all: [0, 1, 2, 3, 4], // just for convenience
nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
// Describes mapping of joints based on the 21 points returned by handpose.
// [0] Palm
// [1-4] Thumb
// [5-8] Index
// [9-12] Middle
// [13-16] Ring
// [17-20] Pinky
pointsMapping: {
0: [[0, 1], [1, 2], [2, 3], [3, 4]],
1: [[0, 5], [5, 6], [6, 7], [7, 8]],
2: [[0, 9], [9, 10], [10, 11], [11, 12]],
3: [[0, 13], [13, 14], [14, 15], [15, 16]],
4: [[0, 17], [17, 18], [18, 19], [19, 20]],
},
getName: (value) => Finger.nameMapping[value],
getPoints: (value) => Finger.pointsMapping[value],
};
export const FingerCurl = {
none: 0,
half: 1,
full: 2,
nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
getName: (value) => FingerCurl.nameMapping[value],
};
export const FingerDirection = {
verticalUp: 0,
verticalDown: 1,
horizontalLeft: 2,
horizontalRight: 3,
diagonalUpRight: 4,
diagonalUpLeft: 5,
diagonalDownRight: 6,
diagonalDownLeft: 7,
nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
getName: (value) => FingerDirection.nameMapping[value],
};
export class FingerGesture {
name;
curls;
directions;

View File

@ -3,11 +3,10 @@
* See `fingerpose.ts` for entry point
*/
import { Finger, FingerCurl, FingerDirection } from './description';
import Gesture from './gesture';
import { Finger, FingerCurl, FingerDirection, FingerGesture } from './fingerdef';
// describe thumbs up gesture 👍
const ThumbsUp = new Gesture('thumbs up');
const ThumbsUp = new FingerGesture('thumbs up');
ThumbsUp.addCurl(Finger.thumb, FingerCurl.none, 1.0);
ThumbsUp.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
ThumbsUp.addDirection(Finger.thumb, FingerDirection.diagonalUpLeft, 0.25);
@ -19,7 +18,7 @@ for (const finger of [Finger.index, Finger.middle, Finger.ring, Finger.pinky]) {
}
// describe Victory gesture ✌️
const Victory = new Gesture('victory');
const Victory = new FingerGesture('victory');
Victory.addCurl(Finger.thumb, FingerCurl.half, 0.5);
Victory.addCurl(Finger.thumb, FingerCurl.none, 0.5);
Victory.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);

View File

@ -1,10 +1,13 @@
/**
* FingerPose algorithm implementation
* See `fingerpose.ts` for entry point
* FingerPose algorithm implementation constants
*
* Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
*/
import { Finger, FingerCurl, FingerDirection } from './description';
import { Finger, FingerCurl, FingerDirection } from './fingerdef';
import Gestures from '../hand/fingergesture';
const minConfidence = 0.7;
const options = {
// curl estimation
HALF_CURL_START_LIMIT: 60.0,
@ -169,7 +172,7 @@ function calculateFingerDirection(startPoint, midPoint, endPoint, fingerSlopes)
return estimatedDirection;
}
export function estimate(landmarks) {
function estimate(landmarks) {
// step 1: calculate slopes
const slopesXY: Array<number[]> = [];
const slopesYZ: Array<number[]> = [];
@ -212,3 +215,29 @@ export function estimate(landmarks) {
}
return { curls: fingerCurls, directions: fingerDirections };
}
export function analyze(keypoints) { // get estimations of curl / direction for each finger
if (!keypoints || keypoints.length === 0) return null;
const estimatorRes = estimate(keypoints);
const landmarks = {};
for (const fingerIdx of Finger.all) {
landmarks[Finger.getName(fingerIdx)] = {
curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
};
}
// console.log('finger landmarks', landmarks);
return landmarks;
}
export function match(keypoints) { // compare gesture description to each known gesture
const poses: Array<{ name: string, confidence: number }> = [];
if (!keypoints || keypoints.length === 0) return poses;
const estimatorRes = estimate(keypoints);
for (const gesture of Gestures) {
const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
}
// console.log('finger poses', poses);
return poses;
}

View File

@ -13,7 +13,7 @@ import type { HandResult, Box, Point } from '../result';
import type { GraphModel, Tensor } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
import * as fingerPose from '../fingerpose/fingerpose';
import * as fingerPose from './fingerpose';
import { fakeOps } from '../tfjs/backend';
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box

View File

@ -8,7 +8,7 @@ import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as handdetector from './handdetector';
import * as handpipeline from './handpipeline';
import * as fingerPose from '../fingerpose/fingerpose';
import * as fingerPose from '../hand/fingerpose';
import type { HandResult, Box, Point } from '../result';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { Config } from '../config';

View File

@ -8,9 +8,9 @@ import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureR
import * as tf from '../dist/tfjs.esm.js';
import * as models from './models';
import * as face from './face/face';
import * as facemesh from './blazeface/facemesh';
import * as facemesh from './face/facemesh';
import * as faceres from './face/faceres';
import * as posenet from './posenet/posenet';
import * as posenet from './body/posenet';
import * as handtrack from './hand/handtrack';
import * as handpose from './handpose/handpose';
// import * as blazepose from './body/blazepose-v1';
@ -23,7 +23,7 @@ import * as segmentation from './segmentation/segmentation';
import * as gesture from './gesture/gesture';
import * as image from './image/image';
import * as draw from './util/draw';
import * as persons from './persons';
import * as persons from './util/persons';
import * as interpolate from './util/interpolate';
import * as env from './util/env';
import * as backend from './tfjs/backend';

View File

@ -4,10 +4,12 @@
import { log } from './util/util';
import type { GraphModel } from './tfjs/types';
import * as facemesh from './blazeface/facemesh';
import * as blazeface from './face/blazeface';
import * as facemesh from './face/facemesh';
import * as iris from './face/iris';
import * as faceres from './face/faceres';
import * as emotion from './gear/emotion';
import * as posenet from './posenet/posenet';
import * as posenet from './body/posenet';
import * as handpose from './handpose/handpose';
import * as handtrack from './hand/handtrack';
import * as blazepose from './body/blazepose';
@ -57,15 +59,13 @@ export function reset(instance: Human) {
/** Load method preloads all instance.configured models on-demand */
export async function load(instance: Human) {
if (env.initial) reset(instance);
if (instance.config.face.enabled) { // face model is a combo that must be loaded as a whole
if (!instance.models.facedetect) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
if (instance.config.face.mesh?.enabled && !instance.models.facemesh) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
if (instance.config.face.iris?.enabled && !instance.models.faceiris) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
}
if (instance.config.hand.enabled) { // handpose model is a combo that must be loaded as a whole
if (!instance.models.handpose && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
if (!instance.models.handskeleton && instance.config.hand.landmarks && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
}
if (instance.config.face.enabled && !instance.models.facedetect) instance.models.facedetect = blazeface.load(instance.config);
if (instance.config.face.enabled && instance.config.face.mesh?.enabled && !instance.models.facemesh) instance.models.facemesh = facemesh.load(instance.config);
if (instance.config.face.enabled && instance.config.face.iris?.enabled && !instance.models.faceiris) instance.models.faceiris = iris.load(instance.config);
if (instance.config.hand.enabled && !instance.models.handtrack && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handtrack = handtrack.loadDetect(instance.config);
if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config);

View File

@ -1,38 +0,0 @@
/**
* PoseNet body detection model implementation constants
* See `posenet.ts` for entry point
*/
export const partNames = [
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
];
export const count = partNames.length; // 17 keypoints
export const partIds = partNames.reduce((result, jointName, i) => {
result[jointName] = i;
return result;
}, {});
const connectedPartNames = [
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
];
export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
export const poseChain = [
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
['rightKnee', 'rightAnkle'],
];

View File

@ -1,46 +0,0 @@
/**
* PoseNet body detection model implementation
*
* Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
*/
import { log, join } from '../util/util';
import * as tf from '../../dist/tfjs.esm.js';
import * as poses from './poses';
import * as util from './utils';
import type { BodyResult } from '../result';
import type { Tensor, GraphModel } from '../tfjs/types';
import type { Config } from '../config';
import { env } from '../util/env';
let model: GraphModel;
const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
const res = tf.tidy(() => {
if (!model.inputs[0].shape) return [];
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
const results: Array<Tensor> = model.execute(normalized, poseNetOutputs) as Array<Tensor>;
const results3d = results.map((y) => tf.squeeze(y, [0]));
results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
return results3d;
});
const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
for (const t of res) tf.dispose(t);
const decoded = await poses.decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
if (!model.inputs[0].shape) return [];
const scaled = util.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
return scaled;
}
export async function load(config: Config): Promise<GraphModel> {
if (!model || env.initial) {
model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
else if (config.debug) log('load model:', model['modelUrl']);
} else if (config.debug) log('cached model:', model['modelUrl']);
return model;
}

View File

@ -2,7 +2,7 @@
* Module that implements helper draw functions, exposed as human.draw
*/
import { TRI468 as triangulation } from '../blazeface/coords';
import { TRI468 as triangulation } from '../face/facemeshcoords';
import { mergeDeep, now } from './util';
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result';
@ -204,20 +204,21 @@ export async function face(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
if (f.mesh && f.mesh.length > 0) {
if (localOptions.drawPoints) {
for (const pt of f.mesh) point(ctx, pt[0], pt[1], pt[2], localOptions);
// for (const pt of f.meshRaw) point(ctx, pt[0] * inCanvas.offsetWidth, pt[1] * inCanvas.offsetHeight, pt[2]);
}
if (localOptions.drawPolygons) {
ctx.lineWidth = 1;
for (let i = 0; i < triangulation.length / 3; i++) {
const points = [
triangulation[i * 3 + 0],
triangulation[i * 3 + 1],
triangulation[i * 3 + 2],
].map((index) => f.mesh[index]);
lines(ctx, points, localOptions);
if (f.mesh.length > 450) {
for (let i = 0; i < triangulation.length / 3; i++) {
const points = [
triangulation[i * 3 + 0],
triangulation[i * 3 + 1],
triangulation[i * 3 + 2],
].map((index) => f.mesh[index]);
lines(ctx, points, localOptions);
}
}
// iris: array[center, left, top, right, bottom]
if (f.annotations && f.annotations['leftEyeIris']) {
if (f.annotations && f.annotations['leftEyeIris'] && f.annotations['leftEyeIris'][0]) {
ctx.strokeStyle = localOptions.useDepth ? 'rgba(255, 200, 255, 0.3)' : localOptions.color;
ctx.beginPath();
const sizeX = Math.abs(f.annotations['leftEyeIris'][3][0] - f.annotations['leftEyeIris'][1][0]) / 2;
@ -229,7 +230,7 @@ export async function face(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
ctx.fill();
}
}
if (f.annotations && f.annotations['rightEyeIris']) {
if (f.annotations && f.annotations['rightEyeIris'] && f.annotations['rightEyeIris'][0]) {
ctx.strokeStyle = localOptions.useDepth ? 'rgba(255, 200, 255, 0.3)' : localOptions.color;
ctx.beginPath();
const sizeX = Math.abs(f.annotations['rightEyeIris'][3][0] - f.annotations['rightEyeIris'][1][0]) / 2;

View File

@ -2,7 +2,7 @@
* Analyze detection Results and sort&combine them into per-person view
*/
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from './result';
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from '../result';
export function join(faces: Array<FaceResult>, bodies: Array<BodyResult>, hands: Array<HandResult>, gestures: Array<GestureResult>, shape: Array<number> | undefined): Array<PersonResult> {
let id = 0;

File diff suppressed because it is too large Load Diff