mirror of https://github.com/vladmandic/human
redesign face processing
parent
28a957316b
commit
156e857d32
5
TODO.md
5
TODO.md
|
@ -10,11 +10,6 @@
|
|||
|
||||
- Evaluate and switch default default model from `handdetect` to `handtrack`
|
||||
|
||||
#### Body
|
||||
|
||||
- Implement new variations of `BlazePose` models
|
||||
- Add virtual box frame caching to `MoveNet`
|
||||
|
||||
#### Face
|
||||
|
||||
- Reimplement `BlazeFace`, `FaceMesh`, `Iris` with new pipeline and frame caching
|
||||
|
|
|
@ -67,6 +67,9 @@ const drawOptions = {
|
|||
drawLabels: true,
|
||||
drawPolygons: true,
|
||||
drawPoints: false,
|
||||
fillPolygons: false,
|
||||
useCurves: false,
|
||||
useDepth: true,
|
||||
};
|
||||
|
||||
// ui options
|
||||
|
@ -105,7 +108,7 @@ const ui = {
|
|||
lastFrame: 0, // time of last frame processing
|
||||
viewportSet: false, // internal, has custom viewport been set
|
||||
background: null, // holds instance of segmentation background image
|
||||
exceptionHandler: false, // should capture all unhandled exceptions
|
||||
exceptionHandler: true, // should capture all unhandled exceptions
|
||||
|
||||
// webrtc
|
||||
useWebRTC: false, // use webrtc as camera source instead of local webcam
|
||||
|
@ -684,13 +687,13 @@ function setupMenu() {
|
|||
setupCamera();
|
||||
});
|
||||
menu.display.addHTML('<hr style="border-style: inset; border-color: dimgray">');
|
||||
menu.display.addBool('use depth', human.draw.options, 'useDepth');
|
||||
menu.display.addBool('use curves', human.draw.options, 'useCurves');
|
||||
menu.display.addBool('print labels', human.draw.options, 'drawLabels');
|
||||
menu.display.addBool('draw points', human.draw.options, 'drawPoints');
|
||||
menu.display.addBool('draw boxes', human.draw.options, 'drawBoxes');
|
||||
menu.display.addBool('draw polygons', human.draw.options, 'drawPolygons');
|
||||
menu.display.addBool('fill polygons', human.draw.options, 'fillPolygons');
|
||||
menu.display.addBool('use depth', drawOptions, 'useDepth');
|
||||
menu.display.addBool('use curves', drawOptions, 'useCurves');
|
||||
menu.display.addBool('print labels', drawOptions, 'drawLabels');
|
||||
menu.display.addBool('draw points', drawOptions, 'drawPoints');
|
||||
menu.display.addBool('draw boxes', drawOptions, 'drawBoxes');
|
||||
menu.display.addBool('draw polygons', drawOptions, 'drawPolygons');
|
||||
menu.display.addBool('fill polygons', drawOptions, 'fillPolygons');
|
||||
|
||||
menu.image = new Menu(document.body, '', { top, left: x[1] });
|
||||
menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val);
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@vladmandic/human",
|
||||
"version": "2.2.3",
|
||||
"version": "2.3.0",
|
||||
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
|
||||
"sideEffects": false,
|
||||
"main": "dist/human.node.js",
|
||||
|
|
|
@ -1,11 +1,19 @@
|
|||
/**
|
||||
* PoseNet body detection model implementation
|
||||
* See `posenet.ts` for entry point
|
||||
*
|
||||
* Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
|
||||
*/
|
||||
|
||||
import * as utils from './utils';
|
||||
import * as kpt from './keypoints';
|
||||
import type { Box } from '../result';
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { BodyResult, Box } from '../result';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
import * as utils from './posenetutils';
|
||||
|
||||
let model: GraphModel;
|
||||
const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
|
||||
|
||||
const localMaximumRadius = 1;
|
||||
const outputStride = 16;
|
||||
|
@ -37,11 +45,11 @@ function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacemen
|
|||
}
|
||||
const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
|
||||
const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId);
|
||||
return { position: targetKeypoint, part: kpt.partNames[targetId], score };
|
||||
return { position: targetKeypoint, part: utils.partNames[targetId], score };
|
||||
}
|
||||
|
||||
export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) {
|
||||
const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
|
||||
const tuples = utils.poseChain.map(([parentJoinName, childJoinName]) => ([utils.partIds[parentJoinName], utils.partIds[childJoinName]]));
|
||||
const edgesFwd = tuples.map(([, childJointId]) => childJointId);
|
||||
const edgesBwd = tuples.map(([parentJointId]) => parentJointId);
|
||||
const numParts = scores.shape[2]; // [21,21,17]
|
||||
|
@ -51,7 +59,7 @@ export function decodePose(root, scores, offsets, displacementsFwd, displacement
|
|||
const rootPoint = utils.getImageCoords(root.part, outputStride, offsets);
|
||||
keypoints[root.part.id] = {
|
||||
score: root.score,
|
||||
part: kpt.partNames[root.part.id],
|
||||
part: utils.partNames[root.part.id],
|
||||
position: rootPoint,
|
||||
};
|
||||
// Decode the part positions upwards in the tree, following the backward displacements.
|
||||
|
@ -146,3 +154,32 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD
|
|||
}
|
||||
return poses;
|
||||
}
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||
const res = tf.tidy(() => {
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
|
||||
const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
|
||||
const results: Array<Tensor> = model.execute(normalized, poseNetOutputs) as Array<Tensor>;
|
||||
const results3d = results.map((y) => tf.squeeze(y, [0]));
|
||||
results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
|
||||
return results3d;
|
||||
});
|
||||
|
||||
const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
|
||||
for (const t of res) tf.dispose(t);
|
||||
|
||||
const decoded = await decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const scaled = utils.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
|
||||
return scaled;
|
||||
}
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (!model || env.initial) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
return model;
|
||||
}
|
|
@ -3,15 +3,48 @@
|
|||
* See `posenet.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as kpt from './keypoints';
|
||||
import type { BodyResult } from '../result';
|
||||
|
||||
export const partNames = [
|
||||
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
|
||||
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
|
||||
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
|
||||
];
|
||||
|
||||
export const count = partNames.length; // 17 keypoints
|
||||
|
||||
export const partIds = partNames.reduce((result, jointName, i) => {
|
||||
result[jointName] = i;
|
||||
return result;
|
||||
}, {});
|
||||
|
||||
const connectedPartNames = [
|
||||
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
|
||||
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
|
||||
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
|
||||
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
|
||||
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
|
||||
];
|
||||
export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
|
||||
|
||||
export const poseChain = [
|
||||
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
|
||||
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
|
||||
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
|
||||
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
|
||||
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
|
||||
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
|
||||
['rightKnee', 'rightAnkle'],
|
||||
];
|
||||
|
||||
export function eitherPointDoesntMeetConfidence(a: number, b: number, minConfidence: number) {
|
||||
return (a < minConfidence || b < minConfidence);
|
||||
}
|
||||
|
||||
export function getAdjacentKeyPoints(keypoints, minConfidence: number) {
|
||||
return kpt.connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
|
||||
return connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
|
||||
if (eitherPointDoesntMeetConfidence(keypoints[leftJoint].score, keypoints[rightJoint].score, minConfidence)) {
|
||||
return result;
|
||||
}
|
||||
|
@ -123,7 +156,7 @@ export class MaxHeap {
|
|||
export function getOffsetPoint(y, x, keypoint, offsets) {
|
||||
return {
|
||||
y: offsets.get(y, x, keypoint),
|
||||
x: offsets.get(y, x, keypoint + kpt.count),
|
||||
x: offsets.get(y, x, keypoint + count),
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
|
||||
|
||||
const calculateGaze = (face): { bearing: number, strength: number } => {
|
||||
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
|
||||
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
|
||||
|
||||
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
|
||||
const eyeRatio = 1; // factor to normalize changes x vs y
|
||||
|
||||
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
|
||||
const irisCenter = left ? face.mesh[473] : face.mesh[468];
|
||||
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
|
||||
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
|
||||
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
|
||||
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
|
||||
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
|
||||
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
|
||||
|
||||
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
|
||||
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
|
||||
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
|
||||
];
|
||||
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
|
||||
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
|
||||
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
|
||||
|
||||
return { bearing, strength };
|
||||
};
|
||||
|
||||
export const calculateFaceAngle = (face, imageSize): {
|
||||
angle: { pitch: number, yaw: number, roll: number },
|
||||
matrix: [number, number, number, number, number, number, number, number, number],
|
||||
gaze: { bearing: number, strength: number },
|
||||
} => {
|
||||
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
|
||||
const normalize = (v) => { // normalize vector
|
||||
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
|
||||
v[0] /= length;
|
||||
v[1] /= length;
|
||||
v[2] /= length;
|
||||
return v;
|
||||
};
|
||||
const subVectors = (a, b) => { // vector subtraction (a - b)
|
||||
const x = a[0] - b[0];
|
||||
const y = a[1] - b[1];
|
||||
const z = a[2] - b[2];
|
||||
return [x, y, z];
|
||||
};
|
||||
const crossVectors = (a, b) => { // vector cross product (a x b)
|
||||
const x = a[1] * b[2] - a[2] * b[1];
|
||||
const y = a[2] * b[0] - a[0] * b[2];
|
||||
const z = a[0] * b[1] - a[1] * b[0];
|
||||
return [x, y, z];
|
||||
};
|
||||
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
|
||||
const rotationMatrixToEulerAngle = (r) => {
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
|
||||
let thetaX: number;
|
||||
let thetaY: number;
|
||||
let thetaZ: number;
|
||||
if (r10 < 1) { // YZX calculation
|
||||
if (r10 > -1) {
|
||||
thetaZ = Math.asin(r10);
|
||||
thetaY = Math.atan2(-r20, r00);
|
||||
thetaX = Math.atan2(-r12, r11);
|
||||
} else {
|
||||
thetaZ = -Math.PI / 2;
|
||||
thetaY = -Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
} else {
|
||||
thetaZ = Math.PI / 2;
|
||||
thetaY = Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
if (isNaN(thetaX)) thetaX = 0;
|
||||
if (isNaN(thetaY)) thetaY = 0;
|
||||
if (isNaN(thetaZ)) thetaZ = 0;
|
||||
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
|
||||
};
|
||||
// simple Euler angle calculation based existing 3D mesh
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const meshToEulerAngle = (mesh) => {
|
||||
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const angle = {
|
||||
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
|
||||
// pitch is face move up/down
|
||||
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
|
||||
// yaw is face turn left/right
|
||||
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
|
||||
// roll is face lean left/right
|
||||
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
|
||||
};
|
||||
return angle;
|
||||
};
|
||||
|
||||
// initialize gaze and mesh
|
||||
const mesh = face.meshRaw;
|
||||
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
|
||||
|
||||
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
|
||||
// top, bottom, left, right
|
||||
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
|
||||
// make the xyz coordinates proportional, independent of the image/box size
|
||||
pt[0] * imageSize[0] / size,
|
||||
pt[1] * imageSize[1] / size,
|
||||
pt[2],
|
||||
]);
|
||||
|
||||
const y_axis = normalize(subVectors(pts[1], pts[0]));
|
||||
let x_axis = normalize(subVectors(pts[3], pts[2]));
|
||||
const z_axis = normalize(crossVectors(x_axis, y_axis));
|
||||
// adjust x_axis to make sure that all axes are perpendicular to each other
|
||||
x_axis = crossVectors(y_axis, z_axis);
|
||||
|
||||
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
|
||||
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
|
||||
const matrix: [number, number, number, number, number, number, number, number, number] = [
|
||||
x_axis[0], x_axis[1], x_axis[2],
|
||||
y_axis[0], y_axis[1], y_axis[2],
|
||||
z_axis[0], z_axis[1], z_axis[2],
|
||||
];
|
||||
const angle = rotationMatrixToEulerAngle(matrix);
|
||||
// const angle = meshToEulerAngle(mesh);
|
||||
|
||||
// we have iris keypoints so we can calculate gaze direction
|
||||
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
|
||||
|
||||
return { angle, matrix, gaze };
|
||||
};
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as util from './facemeshutil';
|
||||
import type { Config } from '../config';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import { env } from '../util/env';
|
||||
|
||||
const keypointsCount = 6;
|
||||
let model: GraphModel | null;
|
||||
let anchorsData: [number, number][] = [];
|
||||
let anchors: Tensor | null = null;
|
||||
let inputSize = 0;
|
||||
|
||||
// export const size = () => (model && model.inputs[0].shape ? model.inputs[0].shape[2] : 0);
|
||||
export const size = () => inputSize;
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
anchorsData = util.generateAnchors(inputSize);
|
||||
anchors = tf.tensor2d(anchorsData);
|
||||
return model;
|
||||
}
|
||||
|
||||
function decodeBounds(boxOutputs) {
|
||||
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
|
||||
const centers = tf.add(boxStarts, anchors);
|
||||
const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
|
||||
const boxSizesNormalized = tf.div(boxSizes, inputSize);
|
||||
const centersNormalized = tf.div(centers, inputSize);
|
||||
const halfBoxSize = tf.div(boxSizesNormalized, 2);
|
||||
const starts = tf.sub(centersNormalized, halfBoxSize);
|
||||
const ends = tf.add(centersNormalized, halfBoxSize);
|
||||
const startNormalized = tf.mul(starts, inputSize);
|
||||
const endNormalized = tf.mul(ends, inputSize);
|
||||
const concatAxis = 1;
|
||||
return tf.concat2d([startNormalized, endNormalized], concatAxis);
|
||||
}
|
||||
|
||||
export async function getBoxes(inputImage: Tensor, config: Config) {
|
||||
// sanity check on input
|
||||
if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
|
||||
const [batch, boxes, scores] = tf.tidy(() => {
|
||||
const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
|
||||
const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
|
||||
const res = model?.execute(normalizedImage);
|
||||
let batchOut;
|
||||
if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
|
||||
const sorted = res.sort((a, b) => a.size - b.size);
|
||||
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
|
||||
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
|
||||
const concat = tf.concat([concat512, concat384], 1);
|
||||
batchOut = tf.squeeze(concat, 0);
|
||||
} else {
|
||||
batchOut = tf.squeeze(res); // when using tfhub model
|
||||
}
|
||||
const boxesOut = decodeBounds(batchOut);
|
||||
const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
|
||||
const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
|
||||
return [batchOut, boxesOut, scoresOut];
|
||||
});
|
||||
|
||||
const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
|
||||
const nms = await nmsTensor.array();
|
||||
tf.dispose(nmsTensor);
|
||||
const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
|
||||
const scoresData = await scores.data();
|
||||
for (let i = 0; i < nms.length; i++) {
|
||||
const confidence = scoresData[nms[i]];
|
||||
if (confidence > (config.face.detector?.minConfidence || 0)) {
|
||||
const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
|
||||
const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
|
||||
annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence });
|
||||
tf.dispose(boundingBox);
|
||||
}
|
||||
}
|
||||
tf.dispose(batch);
|
||||
tf.dispose(boxes);
|
||||
tf.dispose(scores);
|
||||
|
||||
return {
|
||||
boxes: annotatedBoxes,
|
||||
scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize],
|
||||
};
|
||||
}
|
140
src/face/face.ts
140
src/face/face.ts
|
@ -5,145 +5,12 @@
|
|||
|
||||
import { log, now } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as facemesh from '../blazeface/facemesh';
|
||||
import * as facemesh from './facemesh';
|
||||
import * as emotion from '../gear/emotion';
|
||||
import * as faceres from './faceres';
|
||||
import type { FaceResult } from '../result';
|
||||
import type { Tensor } from '../tfjs/types';
|
||||
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
|
||||
|
||||
const calculateGaze = (face): { bearing: number, strength: number } => {
|
||||
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
|
||||
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
|
||||
|
||||
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
|
||||
const eyeRatio = 1; // factor to normalize changes x vs y
|
||||
|
||||
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
|
||||
const irisCenter = left ? face.mesh[473] : face.mesh[468];
|
||||
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
|
||||
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
|
||||
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
|
||||
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
|
||||
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
|
||||
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
|
||||
|
||||
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
|
||||
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
|
||||
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
|
||||
];
|
||||
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
|
||||
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
|
||||
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
|
||||
|
||||
return { bearing, strength };
|
||||
};
|
||||
|
||||
const calculateFaceAngle = (face, imageSize): {
|
||||
angle: { pitch: number, yaw: number, roll: number },
|
||||
matrix: [number, number, number, number, number, number, number, number, number],
|
||||
gaze: { bearing: number, strength: number },
|
||||
} => {
|
||||
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
|
||||
const normalize = (v) => { // normalize vector
|
||||
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
|
||||
v[0] /= length;
|
||||
v[1] /= length;
|
||||
v[2] /= length;
|
||||
return v;
|
||||
};
|
||||
const subVectors = (a, b) => { // vector subtraction (a - b)
|
||||
const x = a[0] - b[0];
|
||||
const y = a[1] - b[1];
|
||||
const z = a[2] - b[2];
|
||||
return [x, y, z];
|
||||
};
|
||||
const crossVectors = (a, b) => { // vector cross product (a x b)
|
||||
const x = a[1] * b[2] - a[2] * b[1];
|
||||
const y = a[2] * b[0] - a[0] * b[2];
|
||||
const z = a[0] * b[1] - a[1] * b[0];
|
||||
return [x, y, z];
|
||||
};
|
||||
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
|
||||
const rotationMatrixToEulerAngle = (r) => {
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
|
||||
let thetaX: number;
|
||||
let thetaY: number;
|
||||
let thetaZ: number;
|
||||
if (r10 < 1) { // YZX calculation
|
||||
if (r10 > -1) {
|
||||
thetaZ = Math.asin(r10);
|
||||
thetaY = Math.atan2(-r20, r00);
|
||||
thetaX = Math.atan2(-r12, r11);
|
||||
} else {
|
||||
thetaZ = -Math.PI / 2;
|
||||
thetaY = -Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
} else {
|
||||
thetaZ = Math.PI / 2;
|
||||
thetaY = Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
if (isNaN(thetaX)) thetaX = 0;
|
||||
if (isNaN(thetaY)) thetaY = 0;
|
||||
if (isNaN(thetaZ)) thetaZ = 0;
|
||||
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
|
||||
};
|
||||
// simple Euler angle calculation based existing 3D mesh
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const meshToEulerAngle = (mesh) => {
|
||||
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const angle = {
|
||||
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
|
||||
// pitch is face move up/down
|
||||
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
|
||||
// yaw is face turn left/right
|
||||
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
|
||||
// roll is face lean left/right
|
||||
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
|
||||
};
|
||||
return angle;
|
||||
};
|
||||
|
||||
// initialize gaze and mesh
|
||||
const mesh = face.meshRaw;
|
||||
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
|
||||
|
||||
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
|
||||
// top, bottom, left, right
|
||||
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
|
||||
// make the xyz coordinates proportional, independent of the image/box size
|
||||
pt[0] * imageSize[0] / size,
|
||||
pt[1] * imageSize[1] / size,
|
||||
pt[2],
|
||||
]);
|
||||
|
||||
const y_axis = normalize(subVectors(pts[1], pts[0]));
|
||||
let x_axis = normalize(subVectors(pts[3], pts[2]));
|
||||
const z_axis = normalize(crossVectors(x_axis, y_axis));
|
||||
// adjust x_axis to make sure that all axes are perpendicular to each other
|
||||
x_axis = crossVectors(y_axis, z_axis);
|
||||
|
||||
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
|
||||
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
|
||||
const matrix: [number, number, number, number, number, number, number, number, number] = [
|
||||
x_axis[0], x_axis[1], x_axis[2],
|
||||
y_axis[0], y_axis[1], y_axis[2],
|
||||
z_axis[0], z_axis[1], z_axis[2],
|
||||
];
|
||||
const angle = rotationMatrixToEulerAngle(matrix);
|
||||
// const angle = meshToEulerAngle(mesh);
|
||||
|
||||
// we have iris keypoints so we can calculate gaze direction
|
||||
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
|
||||
|
||||
return { angle, matrix, gaze };
|
||||
};
|
||||
import { calculateFaceAngle } from './angles';
|
||||
|
||||
export const detectFace = async (parent /* instance of human */, input: Tensor): Promise<FaceResult[]> => {
|
||||
// run facemesh, includes blazeface and iris
|
||||
|
@ -158,6 +25,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
|
|||
const faceRes: Array<FaceResult> = [];
|
||||
parent.state = 'run:face';
|
||||
timeStamp = now();
|
||||
|
||||
const faces = await facemesh.predict(input, parent.config);
|
||||
parent.performance.face = Math.trunc(now() - timeStamp);
|
||||
if (!input.shape || input.shape.length !== 4) return [];
|
||||
|
@ -226,7 +94,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
|
|||
delete faces[i].annotations.leftEyeIris;
|
||||
delete faces[i].annotations.rightEyeIris;
|
||||
}
|
||||
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.rightEyeIris
|
||||
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.leftEyeIris[0] && faces[i].annotations.rightEyeIris && faces[i].annotations.rightEyeIris[0]
|
||||
&& (faces[i].annotations.leftEyeIris.length > 0) && (faces[i].annotations.rightEyeIris.length > 0)
|
||||
&& (faces[i].annotations.leftEyeIris[0] !== null) && (faces[i].annotations.rightEyeIris[0] !== null))
|
||||
? Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1])) / input.shape[2]
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
*
|
||||
* Based on:
|
||||
* - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
|
||||
* - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
|
||||
* - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as blazeface from './blazeface';
|
||||
import * as util from './facemeshutil';
|
||||
import * as coords from './facemeshcoords';
|
||||
import * as iris from './iris';
|
||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { FaceResult, Point } from '../result';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
|
||||
type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array<Point>, confidence: number, faceConfidence?: number | undefined };
|
||||
let boxCache: Array<BoxCache> = [];
|
||||
let model: GraphModel | null = null;
|
||||
let inputSize = 0;
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
let detectedFaces = 0;
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<FaceResult[]> {
|
||||
if (!config.skipFrame || (((detectedFaces !== config.face.detector?.maxDetected) || !config.face.mesh?.enabled)) && (skipped > (config.face.detector?.skipFrames || 0))) { // reset cached boxes
|
||||
const newBoxes = await blazeface.getBoxes(input, config); // get results from blazeface detector
|
||||
boxCache = []; // empty cache
|
||||
for (const possible of newBoxes.boxes) { // extract data from detector
|
||||
const startPoint = await possible.box.startPoint.data() as unknown as Point;
|
||||
const endPoint = await possible.box.endPoint.data() as unknown as Point;
|
||||
const landmarks = await possible.landmarks.array() as Array<Point>;
|
||||
boxCache.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
|
||||
}
|
||||
newBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks]));
|
||||
for (let i = 0; i < boxCache.length; i++) { // enlarge and squarify detected boxes
|
||||
const scaledBox = util.scaleBoxCoordinates({ startPoint: boxCache[i].startPoint, endPoint: boxCache[i].endPoint }, newBoxes.scaleFactor);
|
||||
const enlargedBox = util.enlargeBox(scaledBox);
|
||||
const squarifiedBox = util.squarifyBox(enlargedBox);
|
||||
boxCache[i] = { ...squarifiedBox, confidence: boxCache[i].confidence, landmarks: boxCache[i].landmarks };
|
||||
}
|
||||
skipped = 0;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
|
||||
const faces: Array<FaceResult> = [];
|
||||
const newBoxes: Array<BoxCache> = [];
|
||||
let id = 0;
|
||||
for (let box of boxCache) {
|
||||
let angle = 0;
|
||||
let rotationMatrix;
|
||||
const face: FaceResult = {
|
||||
id: id++,
|
||||
mesh: [],
|
||||
meshRaw: [],
|
||||
box: [0, 0, 0, 0],
|
||||
boxRaw: [0, 0, 0, 0],
|
||||
score: 0,
|
||||
boxScore: 0,
|
||||
faceScore: 0,
|
||||
annotations: {},
|
||||
};
|
||||
|
||||
if (config.face.detector?.rotation && config.face.mesh?.enabled && env.kernels.includes('rotatewithoffset')) {
|
||||
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
|
||||
} else {
|
||||
rotationMatrix = util.IDENTITY_MATRIX;
|
||||
const cut = util.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, input, config.face.mesh?.enabled ? [inputSize, inputSize] : [blazeface.size(), blazeface.size()]);
|
||||
face.tensor = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
}
|
||||
face.boxScore = Math.round(100 * box.confidence) / 100;
|
||||
if (!config.face.mesh?.enabled) { // mesh not enabled, return resuts from detector only
|
||||
face.box = util.getClampedBox(box, input);
|
||||
face.boxRaw = util.getRawBox(box, input);
|
||||
face.score = Math.round(100 * box.confidence || 0) / 100;
|
||||
face.mesh = box.landmarks.map((pt) => [
|
||||
((box.startPoint[0] + box.endPoint[0])) / 2 + ((box.endPoint[0] + box.startPoint[0]) * pt[0] / blazeface.size()),
|
||||
((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()),
|
||||
]);
|
||||
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
|
||||
for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
|
||||
} else if (!model) { // mesh enabled, but not loaded
|
||||
if (config.debug) log('face mesh detection requested, but model is not loaded');
|
||||
} else { // mesh enabled
|
||||
const [contours, confidence, contourCoords] = model.execute(face.tensor as Tensor) as Array<Tensor>; // first returned tensor represents facial contours which are already included in the coordinates.
|
||||
tf.dispose(contours);
|
||||
const faceConfidence = (await confidence.data())[0] as number;
|
||||
tf.dispose(confidence);
|
||||
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
|
||||
let rawCoords = await coordsReshaped.array();
|
||||
tf.dispose(contourCoords);
|
||||
tf.dispose(coordsReshaped);
|
||||
if (faceConfidence < (config.face.detector?.minConfidence || 1)) {
|
||||
box.confidence = faceConfidence; // reset confidence of cached box
|
||||
tf.dispose(face.tensor);
|
||||
} else {
|
||||
if (config.face.iris?.enabled) rawCoords = await iris.augmentIris(rawCoords, face.tensor, config, inputSize); // augment results with iris
|
||||
face.mesh = util.transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize); // get processed mesh
|
||||
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
|
||||
box = { ...util.enlargeBox(util.calculateLandmarksBoundingBox(face.mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
|
||||
for (const key of Object.keys(coords.meshAnnotations)) face.annotations[key] = coords.meshAnnotations[key].map((index) => face.mesh[index]); // add annotations
|
||||
if (config.face.detector?.rotation && config.face.mesh.enabled && config.face.description?.enabled && env.kernels.includes('rotatewithoffset')) { // do rotation one more time with mesh keypoints if we want to return perfect image
|
||||
tf.dispose(face.tensor); // dispose so we can overwrite original face
|
||||
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
|
||||
}
|
||||
face.box = util.getClampedBox(box, input); // update detected box with box around the face mesh
|
||||
face.boxRaw = util.getRawBox(box, input);
|
||||
face.score = Math.round(100 * faceConfidence || 100 * box.confidence || 0) / 100;
|
||||
face.faceScore = Math.round(100 * faceConfidence) / 100;
|
||||
box = { ...util.squarifyBox(box), confidence: box.confidence, faceConfidence }; // updated stored cache values
|
||||
}
|
||||
}
|
||||
faces.push(face);
|
||||
newBoxes.push(box);
|
||||
}
|
||||
if (config.face.mesh?.enabled) boxCache = newBoxes.filter((a) => a.confidence > (config.face.detector?.minConfidence || 0)); // remove cache entries for detected boxes on low confidence
|
||||
detectedFaces = faces.length;
|
||||
return faces;
|
||||
}
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.mesh?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
return model;
|
||||
}
|
||||
|
||||
export const triangulation = coords.TRI468;
|
||||
export const uvmap = coords.UV468;
|
|
@ -3,7 +3,7 @@
|
|||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
export const MESH_ANNOTATIONS = {
|
||||
export const meshAnnotations = {
|
||||
silhouette: [
|
||||
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
|
||||
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
|
||||
|
@ -42,6 +42,22 @@ export const MESH_ANNOTATIONS = {
|
|||
leftCheek: [425],
|
||||
};
|
||||
|
||||
export const meshLandmarks = {
|
||||
count: 468,
|
||||
mouth: 13,
|
||||
symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
|
||||
};
|
||||
|
||||
export const blazeFaceLandmarks = {
|
||||
leftEye: 0,
|
||||
rightEye: 1,
|
||||
nose: 2,
|
||||
mouth: 3,
|
||||
leftEar: 4,
|
||||
rightEar: 5,
|
||||
symmetryLine: [3, 2],
|
||||
};
|
||||
|
||||
export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
|
||||
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
|
||||
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
|
|
@ -0,0 +1,166 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as coords from './facemeshcoords';
|
||||
import type { Box, Point } from '../result';
|
||||
|
||||
export const createBox = (startEndTensor) => ({ startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]), endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]) });
|
||||
|
||||
export const disposeBox = (t) => tf.dispose([t.startPoint, t.endPoint]);
|
||||
|
||||
export const getBoxSize = (box): [number, number] => [Math.abs(box.endPoint[0] - box.startPoint[0]), Math.abs(box.endPoint[1] - box.startPoint[1])];
|
||||
|
||||
export const getBoxCenter = (box): [number, number] => [box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2];
|
||||
|
||||
export const getClampedBox = (box, input): Box => (box ? [
|
||||
Math.trunc(Math.max(0, box.startPoint[0])),
|
||||
Math.trunc(Math.max(0, box.startPoint[1])),
|
||||
Math.trunc(Math.min((input.shape[2] || 0), box.endPoint[0]) - Math.max(0, box.startPoint[0])),
|
||||
Math.trunc(Math.min((input.shape[1] || 0), box.endPoint[1]) - Math.max(0, box.startPoint[1])),
|
||||
] : [0, 0, 0, 0]);
|
||||
|
||||
export const getRawBox = (box, input): Box => (box ? [
|
||||
box.startPoint[0] / (input.shape[2] || 0),
|
||||
box.startPoint[1] / (input.shape[1] || 0),
|
||||
(box.endPoint[0] - box.startPoint[0]) / (input.shape[2] || 0),
|
||||
(box.endPoint[1] - box.startPoint[1]) / (input.shape[1] || 0),
|
||||
] : [0, 0, 0, 0]);
|
||||
|
||||
export const scaleBoxCoordinates = (box, factor) => {
|
||||
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
|
||||
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
|
||||
return { startPoint, endPoint };
|
||||
};
|
||||
|
||||
export const cutBoxFromImageAndResize = (box, image, cropSize) => {
|
||||
const h = image.shape[1];
|
||||
const w = image.shape[2];
|
||||
return tf.image.cropAndResize(image, [[box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, box.endPoint[0] / w]], [0], cropSize);
|
||||
};
|
||||
|
||||
export const enlargeBox = (box, factor = 1.5) => {
|
||||
const center = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const halfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
|
||||
return { startPoint: [center[0] - halfSize[0], center[1] - halfSize[1]] as Point, endPoint: [center[0] + halfSize[0], center[1] + halfSize[1]] as Point, landmarks: box.landmarks };
|
||||
};
|
||||
|
||||
export const squarifyBox = (box) => {
|
||||
const centers = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const halfSize = Math.max(...size) / 2;
|
||||
return { startPoint: [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)] as Point, endPoint: [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)] as Point, landmarks: box.landmarks };
|
||||
};
|
||||
|
||||
export const calculateLandmarksBoundingBox = (landmarks) => {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
return { startPoint: [Math.min(...xs), Math.min(...ys)], endPoint: [Math.max(...xs), Math.max(...ys)], landmarks };
|
||||
};
|
||||
|
||||
export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
|
||||
|
||||
export const normalizeRadians = (angle) => angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
|
||||
|
||||
export const computeRotation = (point1, point2) => normalizeRadians(Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]));
|
||||
|
||||
export const radToDegrees = (rad) => rad * 180 / Math.PI;
|
||||
|
||||
export const buildTranslationMatrix = (x, y) => [[1, 0, x], [0, 1, y], [0, 0, 1]];
|
||||
|
||||
export const dot = (v1, v2) => {
|
||||
let product = 0;
|
||||
for (let i = 0; i < v1.length; i++) product += v1[i] * v2[i];
|
||||
return product;
|
||||
};
|
||||
|
||||
export const getColumnFrom2DArr = (arr, columnIndex) => {
|
||||
const column: Array<number> = [];
|
||||
for (let i = 0; i < arr.length; i++) column.push(arr[i][columnIndex]);
|
||||
return column;
|
||||
};
|
||||
|
||||
export const multiplyTransformMatrices = (mat1, mat2) => {
|
||||
const product: Array<number[]> = [];
|
||||
const size = mat1.length;
|
||||
for (let row = 0; row < size; row++) {
|
||||
product.push([]);
|
||||
for (let col = 0; col < size; col++) product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
|
||||
}
|
||||
return product;
|
||||
};
|
||||
|
||||
export const buildRotationMatrix = (rotation, center) => {
|
||||
const cosA = Math.cos(rotation);
|
||||
const sinA = Math.sin(rotation);
|
||||
const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
|
||||
const translationMatrix = buildTranslationMatrix(center[0], center[1]);
|
||||
const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
|
||||
const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
|
||||
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
|
||||
};
|
||||
|
||||
export const invertTransformMatrix = (matrix) => {
|
||||
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
|
||||
const translationComponent = [matrix[0][2], matrix[1][2]];
|
||||
const invertedTranslation = [-dot(rotationComponent[0], translationComponent), -dot(rotationComponent[1], translationComponent)];
|
||||
return [rotationComponent[0].concat(invertedTranslation[0]), rotationComponent[1].concat(invertedTranslation[1]), [0, 0, 1]];
|
||||
};
|
||||
|
||||
export const rotatePoint = (homogeneousCoordinate, rotationMatrix) => [dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[1])];
|
||||
|
||||
export const xyDistanceBetweenPoints = (a, b) => Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
|
||||
|
||||
export function generateAnchors(inputSize) {
|
||||
const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
|
||||
const anchors: Array<[number, number]> = [];
|
||||
for (let i = 0; i < spec.strides.length; i++) {
|
||||
const stride = spec.strides[i];
|
||||
const gridRows = Math.floor((inputSize + stride - 1) / stride);
|
||||
const gridCols = Math.floor((inputSize + stride - 1) / stride);
|
||||
const anchorsNum = spec.anchors[i];
|
||||
for (let gridY = 0; gridY < gridRows; gridY++) {
|
||||
const anchorY = stride * (gridY + 0.5);
|
||||
for (let gridX = 0; gridX < gridCols; gridX++) {
|
||||
const anchorX = stride * (gridX + 0.5);
|
||||
for (let n = 0; n < anchorsNum; n++) anchors.push([anchorX, anchorY]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return anchors;
|
||||
}
|
||||
|
||||
export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize) {
|
||||
const boxSize = getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const coordsScaled = rawCoords.map((coord) => ([
|
||||
boxSize[0] / inputSize * (coord[0] - inputSize / 2),
|
||||
boxSize[1] / inputSize * (coord[1] - inputSize / 2),
|
||||
coord[2] || 0,
|
||||
]));
|
||||
const coordsRotationMatrix = (angle !== 0) ? buildRotationMatrix(angle, [0, 0]) : IDENTITY_MATRIX;
|
||||
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
|
||||
const inverseRotationMatrix = (angle !== 0) ? invertTransformMatrix(rotationMatrix) : IDENTITY_MATRIX;
|
||||
const boxCenter = [...getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
|
||||
return coordsRotated.map((coord) => ([
|
||||
Math.round(coord[0] + dot(boxCenter, inverseRotationMatrix[0])),
|
||||
Math.round(coord[1] + dot(boxCenter, inverseRotationMatrix[1])),
|
||||
Math.round(coord[2] || 0),
|
||||
]));
|
||||
}
|
||||
|
||||
export function correctFaceRotation(box, input, inputSize) {
|
||||
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
|
||||
const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
|
||||
const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
|
||||
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
|
||||
const rotationMatrix = buildRotationMatrix(-angle, faceCenter);
|
||||
const cut = cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [inputSize, inputSize]);
|
||||
const face = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
tf.dispose(rotated);
|
||||
return [angle, rotationMatrix, face];
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
import * as coords from './facemeshcoords';
|
||||
import * as util from './facemeshutil';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import { env } from '../util/env';
|
||||
import { log, join } from '../util/util';
|
||||
import type { Config } from '../config';
|
||||
import type { Point } from '../result';
|
||||
|
||||
let model: GraphModel | null;
|
||||
let inputSize = 0;
|
||||
|
||||
const irisEnlarge = 2.3;
|
||||
|
||||
const leftOutline = coords.meshAnnotations['leftEyeLower0'];
|
||||
const rightOutline = coords.meshAnnotations['rightEyeLower0'];
|
||||
|
||||
const eyeLandmarks = {
|
||||
leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
|
||||
rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
|
||||
};
|
||||
|
||||
const irisLandmarks = {
|
||||
upperCenter: 3,
|
||||
lowerCenter: 4,
|
||||
index: 71,
|
||||
numCoordinates: 76,
|
||||
};
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.iris?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
return model;
|
||||
}
|
||||
|
||||
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
|
||||
// Update the z coordinate to be an average of the original and the new.
|
||||
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
|
||||
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
|
||||
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
|
||||
const originalIndices = coords.meshAnnotations[`${prefix}${key}`];
|
||||
if (!keys || keys.includes(key)) {
|
||||
for (let j = 0; j < indices.length; j++) {
|
||||
const index = indices[j];
|
||||
rawCoords[originalIndices[j]] = [
|
||||
newCoords[index][0], newCoords[index][1],
|
||||
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
export const getLeftToRightEyeDepthDifference = (rawCoords) => {
|
||||
const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
|
||||
const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
|
||||
return leftEyeZ - rightEyeZ;
|
||||
};
|
||||
|
||||
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
|
||||
export const getEyeBox = (rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false, meshSize) => {
|
||||
const box = util.squarifyBox(util.enlargeBox(util.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), irisEnlarge));
|
||||
const boxSize = util.getBoxSize(box);
|
||||
let crop = tf.image.cropAndResize(face, [[
|
||||
box.startPoint[1] / meshSize,
|
||||
box.startPoint[0] / meshSize, box.endPoint[1] / meshSize,
|
||||
box.endPoint[0] / meshSize,
|
||||
]], [0], [inputSize, inputSize]);
|
||||
if (flip && env.kernels.includes('flipleftright')) {
|
||||
const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
|
||||
tf.dispose(crop);
|
||||
crop = flipped;
|
||||
}
|
||||
return { box, boxSize, crop };
|
||||
};
|
||||
|
||||
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
|
||||
export const getEyeCoords = (eyeData, eyeBox, eyeBoxSize, flip = false) => {
|
||||
const eyeRawCoords: Array<Point> = [];
|
||||
for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
|
||||
const x = eyeData[i * 3];
|
||||
const y = eyeData[i * 3 + 1];
|
||||
const z = eyeData[i * 3 + 2];
|
||||
eyeRawCoords.push([
|
||||
(flip ? (1 - (x / inputSize)) : (x / inputSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
|
||||
(y / inputSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
|
||||
]);
|
||||
}
|
||||
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
|
||||
};
|
||||
|
||||
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
export const getAdjustedIrisCoords = (rawCoords, irisCoords, direction) => {
|
||||
const upperCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
|
||||
const lowerCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
|
||||
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
|
||||
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
|
||||
return irisCoords.map((coord, i) => {
|
||||
let z = averageZ;
|
||||
if (i === 2) {
|
||||
z = upperCenterZ;
|
||||
} else if (i === 4) {
|
||||
z = lowerCenterZ;
|
||||
}
|
||||
return [coord[0], coord[1], z];
|
||||
});
|
||||
};
|
||||
|
||||
export async function augmentIris(rawCoords, face, config, meshSize) {
|
||||
if (!model) {
|
||||
if (config.debug) log('face mesh iris detection requested, but model is not loaded');
|
||||
return rawCoords;
|
||||
}
|
||||
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true, meshSize);
|
||||
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1], true, meshSize);
|
||||
const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
|
||||
tf.dispose(leftEyeCrop);
|
||||
tf.dispose(rightEyeCrop);
|
||||
const eyePredictions = model.predict(combined) as Tensor;
|
||||
tf.dispose(combined);
|
||||
const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
|
||||
tf.dispose(eyePredictions);
|
||||
const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
|
||||
const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
|
||||
const leftToRightEyeDepthDifference = getLeftToRightEyeDepthDifference(rawCoords);
|
||||
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
|
||||
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
|
||||
// So we only update a single contour line above and below the eye.
|
||||
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
|
||||
} else { // User is looking towards the left.
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
|
||||
}
|
||||
const adjustedLeftIrisCoords = getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
|
||||
const adjustedRightIrisCoords = getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
|
||||
const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
|
||||
return newCoords;
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
import type { GestureResult } from '../result';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from '../hand/fingerpose';
|
||||
|
||||
/**
|
||||
* @typedef FaceGesture
|
||||
|
@ -63,7 +63,7 @@ export const face = (res): GestureResult[] => {
|
|||
if (!res) return [];
|
||||
const gestures: Array<{ face: number, gesture: FaceGesture }> = [];
|
||||
for (let i = 0; i < res.length; i++) {
|
||||
if (res[i].mesh && res[i].mesh.length > 0) {
|
||||
if (res[i].mesh && res[i].mesh.length > 450) {
|
||||
const eyeFacing = res[i].mesh[33][2] - res[i].mesh[263][2];
|
||||
if (Math.abs(eyeFacing) < 10) gestures.push({ face: i, gesture: 'facing center' });
|
||||
else gestures.push({ face: i, gesture: `facing ${eyeFacing < 0 ? 'left' : 'right'}` });
|
||||
|
@ -84,7 +84,7 @@ export const iris = (res): GestureResult[] => {
|
|||
if (!res) return [];
|
||||
const gestures: Array<{ iris: number, gesture: IrisGesture }> = [];
|
||||
for (let i = 0; i < res.length; i++) {
|
||||
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.rightEyeIris) continue;
|
||||
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.leftEyeIris[0] || !res[i].annotations.rightEyeIris || !res[i].annotations.rightEyeIris[0]) continue;
|
||||
const sizeXLeft = res[i].annotations.leftEyeIris[3][0] - res[i].annotations.leftEyeIris[1][0];
|
||||
const sizeYLeft = res[i].annotations.leftEyeIris[4][1] - res[i].annotations.leftEyeIris[2][1];
|
||||
const areaLeft = Math.abs(sizeXLeft * sizeYLeft);
|
||||
|
|
|
@ -3,7 +3,54 @@
|
|||
* See `fingerpose.ts` for entry point
|
||||
*/
|
||||
|
||||
export default class Gesture {
|
||||
export const Finger = {
|
||||
thumb: 0,
|
||||
index: 1,
|
||||
middle: 2,
|
||||
ring: 3,
|
||||
pinky: 4,
|
||||
all: [0, 1, 2, 3, 4], // just for convenience
|
||||
nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
|
||||
// Describes mapping of joints based on the 21 points returned by handpose.
|
||||
// [0] Palm
|
||||
// [1-4] Thumb
|
||||
// [5-8] Index
|
||||
// [9-12] Middle
|
||||
// [13-16] Ring
|
||||
// [17-20] Pinky
|
||||
pointsMapping: {
|
||||
0: [[0, 1], [1, 2], [2, 3], [3, 4]],
|
||||
1: [[0, 5], [5, 6], [6, 7], [7, 8]],
|
||||
2: [[0, 9], [9, 10], [10, 11], [11, 12]],
|
||||
3: [[0, 13], [13, 14], [14, 15], [15, 16]],
|
||||
4: [[0, 17], [17, 18], [18, 19], [19, 20]],
|
||||
},
|
||||
getName: (value) => Finger.nameMapping[value],
|
||||
getPoints: (value) => Finger.pointsMapping[value],
|
||||
};
|
||||
|
||||
export const FingerCurl = {
|
||||
none: 0,
|
||||
half: 1,
|
||||
full: 2,
|
||||
nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
|
||||
getName: (value) => FingerCurl.nameMapping[value],
|
||||
};
|
||||
|
||||
export const FingerDirection = {
|
||||
verticalUp: 0,
|
||||
verticalDown: 1,
|
||||
horizontalLeft: 2,
|
||||
horizontalRight: 3,
|
||||
diagonalUpRight: 4,
|
||||
diagonalUpLeft: 5,
|
||||
diagonalDownRight: 6,
|
||||
diagonalDownLeft: 7,
|
||||
nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
|
||||
getName: (value) => FingerDirection.nameMapping[value],
|
||||
};
|
||||
|
||||
export class FingerGesture {
|
||||
name;
|
||||
curls;
|
||||
directions;
|
|
@ -3,11 +3,10 @@
|
|||
* See `fingerpose.ts` for entry point
|
||||
*/
|
||||
|
||||
import { Finger, FingerCurl, FingerDirection } from './description';
|
||||
import Gesture from './gesture';
|
||||
import { Finger, FingerCurl, FingerDirection, FingerGesture } from './fingerdef';
|
||||
|
||||
// describe thumbs up gesture 👍
|
||||
const ThumbsUp = new Gesture('thumbs up');
|
||||
const ThumbsUp = new FingerGesture('thumbs up');
|
||||
ThumbsUp.addCurl(Finger.thumb, FingerCurl.none, 1.0);
|
||||
ThumbsUp.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
|
||||
ThumbsUp.addDirection(Finger.thumb, FingerDirection.diagonalUpLeft, 0.25);
|
||||
|
@ -19,7 +18,7 @@ for (const finger of [Finger.index, Finger.middle, Finger.ring, Finger.pinky]) {
|
|||
}
|
||||
|
||||
// describe Victory gesture ✌️
|
||||
const Victory = new Gesture('victory');
|
||||
const Victory = new FingerGesture('victory');
|
||||
Victory.addCurl(Finger.thumb, FingerCurl.half, 0.5);
|
||||
Victory.addCurl(Finger.thumb, FingerCurl.none, 0.5);
|
||||
Victory.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
|
|
@ -1,10 +1,13 @@
|
|||
/**
|
||||
* FingerPose algorithm implementation
|
||||
* See `fingerpose.ts` for entry point
|
||||
* FingerPose algorithm implementation constants
|
||||
*
|
||||
* Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
|
||||
*/
|
||||
|
||||
import { Finger, FingerCurl, FingerDirection } from './description';
|
||||
import { Finger, FingerCurl, FingerDirection } from './fingerdef';
|
||||
import Gestures from '../hand/fingergesture';
|
||||
|
||||
const minConfidence = 0.7;
|
||||
const options = {
|
||||
// curl estimation
|
||||
HALF_CURL_START_LIMIT: 60.0,
|
||||
|
@ -169,7 +172,7 @@ function calculateFingerDirection(startPoint, midPoint, endPoint, fingerSlopes)
|
|||
return estimatedDirection;
|
||||
}
|
||||
|
||||
export function estimate(landmarks) {
|
||||
function estimate(landmarks) {
|
||||
// step 1: calculate slopes
|
||||
const slopesXY: Array<number[]> = [];
|
||||
const slopesYZ: Array<number[]> = [];
|
||||
|
@ -212,3 +215,29 @@ export function estimate(landmarks) {
|
|||
}
|
||||
return { curls: fingerCurls, directions: fingerDirections };
|
||||
}
|
||||
|
||||
export function analyze(keypoints) { // get estimations of curl / direction for each finger
|
||||
if (!keypoints || keypoints.length === 0) return null;
|
||||
const estimatorRes = estimate(keypoints);
|
||||
const landmarks = {};
|
||||
for (const fingerIdx of Finger.all) {
|
||||
landmarks[Finger.getName(fingerIdx)] = {
|
||||
curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
|
||||
direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
|
||||
};
|
||||
}
|
||||
// console.log('finger landmarks', landmarks);
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
export function match(keypoints) { // compare gesture description to each known gesture
|
||||
const poses: Array<{ name: string, confidence: number }> = [];
|
||||
if (!keypoints || keypoints.length === 0) return poses;
|
||||
const estimatorRes = estimate(keypoints);
|
||||
for (const gesture of Gestures) {
|
||||
const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
|
||||
if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
|
||||
}
|
||||
// console.log('finger poses', poses);
|
||||
return poses;
|
||||
}
|
|
@ -13,7 +13,7 @@ import type { HandResult, Box, Point } from '../result';
|
|||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from './fingerpose';
|
||||
import { fakeOps } from '../tfjs/backend';
|
||||
|
||||
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
|
||||
|
|
|
@ -8,7 +8,7 @@ import { log, join } from '../util/util';
|
|||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as handdetector from './handdetector';
|
||||
import * as handpipeline from './handpipeline';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from '../hand/fingerpose';
|
||||
import type { HandResult, Box, Point } from '../result';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
|
|
|
@ -8,9 +8,9 @@ import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureR
|
|||
import * as tf from '../dist/tfjs.esm.js';
|
||||
import * as models from './models';
|
||||
import * as face from './face/face';
|
||||
import * as facemesh from './blazeface/facemesh';
|
||||
import * as facemesh from './face/facemesh';
|
||||
import * as faceres from './face/faceres';
|
||||
import * as posenet from './posenet/posenet';
|
||||
import * as posenet from './body/posenet';
|
||||
import * as handtrack from './hand/handtrack';
|
||||
import * as handpose from './handpose/handpose';
|
||||
// import * as blazepose from './body/blazepose-v1';
|
||||
|
@ -23,7 +23,7 @@ import * as segmentation from './segmentation/segmentation';
|
|||
import * as gesture from './gesture/gesture';
|
||||
import * as image from './image/image';
|
||||
import * as draw from './util/draw';
|
||||
import * as persons from './persons';
|
||||
import * as persons from './util/persons';
|
||||
import * as interpolate from './util/interpolate';
|
||||
import * as env from './util/env';
|
||||
import * as backend from './tfjs/backend';
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
|
||||
import { log } from './util/util';
|
||||
import type { GraphModel } from './tfjs/types';
|
||||
import * as facemesh from './blazeface/facemesh';
|
||||
import * as blazeface from './face/blazeface';
|
||||
import * as facemesh from './face/facemesh';
|
||||
import * as iris from './face/iris';
|
||||
import * as faceres from './face/faceres';
|
||||
import * as emotion from './gear/emotion';
|
||||
import * as posenet from './posenet/posenet';
|
||||
import * as posenet from './body/posenet';
|
||||
import * as handpose from './handpose/handpose';
|
||||
import * as handtrack from './hand/handtrack';
|
||||
import * as blazepose from './body/blazepose';
|
||||
|
@ -57,15 +59,13 @@ export function reset(instance: Human) {
|
|||
/** Load method preloads all instance.configured models on-demand */
|
||||
export async function load(instance: Human) {
|
||||
if (env.initial) reset(instance);
|
||||
if (instance.config.face.enabled) { // face model is a combo that must be loaded as a whole
|
||||
if (!instance.models.facedetect) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
if (instance.config.face.mesh?.enabled && !instance.models.facemesh) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
if (instance.config.face.iris?.enabled && !instance.models.faceiris) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
}
|
||||
if (instance.config.hand.enabled) { // handpose model is a combo that must be loaded as a whole
|
||||
if (!instance.models.handpose && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
|
||||
if (!instance.models.handskeleton && instance.config.hand.landmarks && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
|
||||
}
|
||||
if (instance.config.face.enabled && !instance.models.facedetect) instance.models.facedetect = blazeface.load(instance.config);
|
||||
if (instance.config.face.enabled && instance.config.face.mesh?.enabled && !instance.models.facemesh) instance.models.facemesh = facemesh.load(instance.config);
|
||||
if (instance.config.face.enabled && instance.config.face.iris?.enabled && !instance.models.faceiris) instance.models.faceiris = iris.load(instance.config);
|
||||
if (instance.config.hand.enabled && !instance.models.handtrack && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handtrack = handtrack.loadDetect(instance.config);
|
||||
if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
|
||||
if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
* Analyze detection Results and sort&combine them into per-person view
|
||||
*/
|
||||
|
||||
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from './result';
|
||||
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from '../result';
|
||||
|
||||
export function join(faces: Array<FaceResult>, bodies: Array<BodyResult>, hands: Array<HandResult>, gestures: Array<GestureResult>, shape: Array<number> | undefined): Array<PersonResult> {
|
||||
let id = 0;
|
Loading…
Reference in New Issue