human/src/blazeface/facepipeline.ts

279 lines
14 KiB
TypeScript
Raw Normal View History

2020-10-12 01:22:43 +02:00
/* eslint-disable class-methods-use-this */
2020-11-18 14:26:28 +01:00
import * as tf from '../../dist/tfjs.esm.js';
2020-11-10 02:13:38 +01:00
import * as bounding from './box';
import * as util from './util';
2021-02-13 15:16:41 +01:00
import * as coords from './coords';
2020-10-12 01:22:43 +02:00
const LANDMARKS_COUNT = 468;
const MESH_MOUTH_INDEX = 13;
2020-11-12 20:52:32 +01:00
const MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [MESH_MOUTH_INDEX, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]];
2020-10-12 01:22:43 +02:00
const BLAZEFACE_MOUTH_INDEX = 3;
const BLAZEFACE_NOSE_INDEX = 2;
const BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [BLAZEFACE_MOUTH_INDEX, BLAZEFACE_NOSE_INDEX];
2020-11-12 20:52:32 +01:00
const LEFT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['leftEyeLower0'];
2020-10-12 01:22:43 +02:00
const LEFT_EYE_BOUNDS = [LEFT_EYE_OUTLINE[0], LEFT_EYE_OUTLINE[LEFT_EYE_OUTLINE.length - 1]];
2020-11-12 20:52:32 +01:00
const RIGHT_EYE_OUTLINE = coords.MESH_ANNOTATIONS['rightEyeLower0'];
2020-10-12 01:22:43 +02:00
const RIGHT_EYE_BOUNDS = [RIGHT_EYE_OUTLINE[0], RIGHT_EYE_OUTLINE[RIGHT_EYE_OUTLINE.length - 1]];
const IRIS_UPPER_CENTER_INDEX = 3;
const IRIS_LOWER_CENTER_INDEX = 4;
const IRIS_IRIS_INDEX = 71;
const IRIS_NUM_COORDINATES = 76;
2021-03-06 16:38:04 +01:00
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
// Update the z coordinate to be an average of the original and the new.
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
2020-11-12 20:52:32 +01:00
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
const originalIndices = coords.MESH_ANNOTATIONS[`${prefix}${key}`];
2021-02-08 18:47:38 +01:00
// @ts-ignore
if (!keys || keys.includes(key)) {
2020-10-12 01:22:43 +02:00
for (let j = 0; j < indices.length; j++) {
const index = indices[j];
rawCoords[originalIndices[j]] = [
newCoords[index][0], newCoords[index][1],
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
];
}
}
}
}
// The Pipeline coordinates between the bounding box and skeleton models.
2021-02-08 17:39:09 +01:00
export class Pipeline {
storedBoxes: any;
boundingBoxDetector: any;
meshDetector: any;
irisModel: any;
2021-03-11 16:26:14 +01:00
boxSize: number;
meshSize: number;
2021-02-08 17:39:09 +01:00
irisSize: number;
irisEnlarge: number;
skipped: number;
detectedFaces: number;
2021-03-11 16:26:14 +01:00
constructor(boundingBoxDetector, meshDetector, irisModel) {
2020-10-12 01:22:43 +02:00
// An array of facial bounding boxes.
2020-11-09 20:26:10 +01:00
this.storedBoxes = [];
2020-10-12 01:22:43 +02:00
this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector;
this.irisModel = irisModel;
2021-03-11 16:26:14 +01:00
this.boxSize = boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2] || 0;
this.meshSize = meshDetector?.inputs[0].shape[2] || boundingBoxDetector?.blazeFaceModel?.inputs[0].shape[2];
this.irisSize = irisModel?.inputs[0].shape[1] || 0;
2020-11-08 18:26:45 +01:00
this.irisEnlarge = 2.3;
2020-12-11 16:11:49 +01:00
this.skipped = 0;
2020-11-09 20:26:10 +01:00
this.detectedFaces = 0;
2020-10-12 01:22:43 +02:00
}
transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
2021-03-11 16:26:14 +01:00
const scaleFactor = [boxSize[0] / this.meshSize, boxSize[1] / this.boxSize];
2020-10-12 01:22:43 +02:00
const coordsScaled = rawCoords.map((coord) => ([
2021-03-11 16:26:14 +01:00
scaleFactor[0] * (coord[0] - this.boxSize / 2),
scaleFactor[1] * (coord[1] - this.boxSize / 2), coord[2],
2020-10-12 01:22:43 +02:00
]));
2020-12-10 20:47:53 +01:00
const coordsRotationMatrix = (angle !== 0) ? util.buildRotationMatrix(angle, [0, 0]) : util.IDENTITY_MATRIX;
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
const inverseRotationMatrix = (angle !== 0) ? util.invertTransformMatrix(rotationMatrix) : util.IDENTITY_MATRIX;
2020-10-12 01:22:43 +02:00
const boxCenter = [...bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
return coordsRotated.map((coord) => ([
2020-12-10 20:47:53 +01:00
coord[0] + util.dot(boxCenter, inverseRotationMatrix[0]),
coord[1] + util.dot(boxCenter, inverseRotationMatrix[1]),
coord[2],
2020-10-12 01:22:43 +02:00
]));
}
getLeftToRightEyeDepthDifference(rawCoords) {
const leftEyeZ = rawCoords[LEFT_EYE_BOUNDS[0]][2];
const rightEyeZ = rawCoords[RIGHT_EYE_BOUNDS[0]][2];
return leftEyeZ - rightEyeZ;
}
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
2020-10-12 01:22:43 +02:00
const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[
2021-03-11 16:26:14 +01:00
box.startPoint[1] / this.meshSize,
box.startPoint[0] / this.meshSize, box.endPoint[1] / this.meshSize,
box.endPoint[0] / this.meshSize,
]], [0], [this.irisSize, this.irisSize]);
2021-03-10 00:32:35 +01:00
if (flip && tf.ENV.flags.IS_BROWSER) {
crop = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
2020-10-12 01:22:43 +02:00
}
return { box, boxSize, crop };
}
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
2021-02-08 18:47:38 +01:00
const eyeRawCoords: Array<any[]> = [];
2020-10-12 01:22:43 +02:00
for (let i = 0; i < IRIS_NUM_COORDINATES; i++) {
const x = eyeData[i * 3];
const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2];
eyeRawCoords.push([
2021-02-08 18:47:38 +01:00
(flip ? (1 - (x / this.irisSize)) : (x / this.irisSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
(y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
2020-10-12 01:22:43 +02:00
]);
}
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) };
}
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
2020-11-12 20:52:32 +01:00
const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][IRIS_UPPER_CENTER_INDEX]][2];
const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][IRIS_LOWER_CENTER_INDEX]][2];
2020-10-12 01:22:43 +02:00
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
return irisCoords.map((coord, i) => {
let z = averageZ;
if (i === 2) {
z = upperCenterZ;
} else if (i === 4) {
z = lowerCenterZ;
}
return [coord[0], coord[1], z];
});
}
async predict(input, config) {
2020-11-09 20:26:10 +01:00
let useFreshBox = false;
// run new detector every skipFrames unless we only want box to start with
2020-11-06 19:50:16 +01:00
let detector;
2020-12-11 16:11:49 +01:00
if ((this.skipped === 0) || (this.skipped > config.face.detector.skipFrames) || !config.face.mesh.enabled || !config.videoOptimized) {
2020-11-09 20:26:10 +01:00
detector = await this.boundingBoxDetector.getBoundingBoxes(input);
2020-12-11 16:11:49 +01:00
this.skipped = 0;
2020-11-09 20:26:10 +01:00
}
2020-12-11 16:11:49 +01:00
if (config.videoOptimized) this.skipped++;
2020-11-09 20:26:10 +01:00
// if detector result count doesn't match current working set, use it to reset current working set
if (detector && detector.boxes && (!config.face.mesh.enabled || (detector.boxes.length !== this.detectedFaces) && (this.detectedFaces !== config.face.detector.maxFaces))) {
2020-11-09 20:26:10 +01:00
this.storedBoxes = [];
this.detectedFaces = 0;
for (const possible of detector.boxes) {
this.storedBoxes.push({ startPoint: possible.box.startPoint.dataSync(), endPoint: possible.box.endPoint.dataSync(), landmarks: possible.landmarks, confidence: possible.confidence });
}
if (this.storedBoxes.length > 0) useFreshBox = true;
}
2021-03-01 23:20:02 +01:00
if (config.face.detector.skipInitial && this.detectedFaces === 0) this.skipped = 0;
2020-11-06 19:50:16 +01:00
if (useFreshBox) {
2020-11-04 07:11:24 +01:00
if (!detector || !detector.boxes || (detector.boxes.length === 0)) {
2020-11-09 20:26:10 +01:00
this.storedBoxes = [];
2020-11-06 19:50:16 +01:00
this.detectedFaces = 0;
2020-10-12 01:22:43 +02:00
return null;
}
2020-11-26 16:37:04 +01:00
for (let i = 0; i < this.storedBoxes.length; i++) {
2020-11-09 20:26:10 +01:00
const scaledBox = bounding.scaleBoxCoordinates({ startPoint: this.storedBoxes[i].startPoint, endPoint: this.storedBoxes[i].endPoint }, detector.scaleFactor);
2020-10-12 01:22:43 +02:00
const enlargedBox = bounding.enlargeBox(scaledBox);
const squarifiedBox = bounding.squarifyBox(enlargedBox);
2020-11-09 20:26:10 +01:00
const landmarks = this.storedBoxes[i].landmarks.arraySync();
const confidence = this.storedBoxes[i].confidence;
this.storedBoxes[i] = { ...squarifiedBox, confidence, landmarks };
2020-11-09 20:26:10 +01:00
}
2020-11-06 22:21:20 +01:00
}
if (detector && detector.boxes) {
detector.boxes.forEach((prediction) => {
prediction.box.startPoint.dispose();
prediction.box.endPoint.dispose();
2020-10-13 04:01:35 +02:00
prediction.landmarks.dispose();
2020-10-12 01:22:43 +02:00
});
}
2020-11-09 20:26:10 +01:00
let results = tf.tidy(() => this.storedBoxes.map((box, i) => {
2020-10-12 01:22:43 +02:00
// The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
2020-12-10 20:47:53 +01:00
let face;
let angle = 0;
let rotationMatrix;
2021-03-10 00:32:35 +01:00
if (config.face.detector.rotation && config.face.mesh.enabled && tf.ENV.flags.IS_BROWSER) {
2020-12-10 20:47:53 +01:00
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= LANDMARKS_COUNT) ? MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES : BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES;
angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
2021-03-10 00:32:35 +01:00
const rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
2020-10-12 01:22:43 +02:00
rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
2021-03-11 16:26:14 +01:00
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotatedImage, [this.meshSize, this.meshSize]).div(255);
2020-12-10 20:47:53 +01:00
} else {
rotationMatrix = util.IDENTITY_MATRIX;
2020-12-10 21:46:45 +01:00
const cloned = input.clone();
2021-03-11 16:26:14 +01:00
face = bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.boxSize, this.boxSize]).div(255);
2020-10-12 01:22:43 +02:00
}
2020-11-09 20:26:10 +01:00
// if we're not going to produce mesh, don't spend time with further processing
if (!config.face.mesh.enabled) {
2020-11-09 20:26:10 +01:00
const prediction = {
coords: null,
box,
faceConfidence: null,
boxConfidence: box.confidence,
2020-11-09 20:26:10 +01:00
confidence: box.confidence,
2020-12-10 20:47:53 +01:00
image: face,
2020-11-09 20:26:10 +01:00
};
return prediction;
}
2021-03-06 16:38:04 +01:00
const [, confidence, contourCoords] = this.meshDetector.predict(face); // The first returned tensor represents facial contours which are already included in the coordinates.
2021-02-08 17:39:09 +01:00
const faceConfidence = confidence.dataSync()[0];
if (faceConfidence < config.face.detector.minConfidence) return null; // if below confidence just exit
2020-11-12 20:52:32 +01:00
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
2020-10-12 01:22:43 +02:00
let rawCoords = coordsReshaped.arraySync();
2020-12-10 20:47:53 +01:00
if (config.face.iris.enabled) {
2020-10-12 01:22:43 +02:00
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
2020-12-10 20:47:53 +01:00
const eyePredictions = this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop]));
2020-10-12 01:22:43 +02:00
const eyePredictionsData = eyePredictions.dataSync();
const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
2021-03-06 16:38:04 +01:00
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
// So we only update a single contour line above and below the eye.
2020-10-12 01:22:43 +02:00
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
} else { // User is looking towards the left.
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
}
const adjustedLeftIrisCoords = this.getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
const adjustedRightIrisCoords = this.getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
rawCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
}
2020-12-10 20:47:53 +01:00
2020-10-12 01:22:43 +02:00
const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData));
const squarifiedLandmarksBox = bounding.squarifyBox(landmarksBox);
2020-11-09 20:26:10 +01:00
const transformedCoords = tf.tensor2d(transformedCoordsData);
2020-10-12 01:22:43 +02:00
const prediction = {
2020-11-09 20:26:10 +01:00
coords: transformedCoords,
2020-10-12 01:22:43 +02:00
box: landmarksBox,
2021-02-08 17:39:09 +01:00
faceConfidence,
2021-03-03 15:59:04 +01:00
boxConfidence: box.confidence,
2020-12-10 20:47:53 +01:00
image: face,
2021-02-08 17:39:09 +01:00
rawCoords,
2020-10-12 01:22:43 +02:00
};
2021-03-06 16:38:04 +01:00
this.storedBoxes[i] = { ...squarifiedLandmarksBox, landmarks: transformedCoordsData, confidence: box.confidence, faceConfidence };
2020-12-10 20:47:53 +01:00
2020-10-12 01:22:43 +02:00
return prediction;
}));
2020-11-06 19:50:16 +01:00
results = results.filter((a) => a !== null);
// remove cache entries for detected boxes on low confidence
if (config.face.mesh.enabled) this.storedBoxes = this.storedBoxes.filter((a) => a.faceConfidence > config.face.detector.minConfidence);
2020-11-06 19:50:16 +01:00
this.detectedFaces = results.length;
2020-10-13 04:01:35 +02:00
return results;
2020-10-12 01:22:43 +02:00
}
calculateLandmarksBoundingBox(landmarks) {
const xs = landmarks.map((d) => d[0]);
const ys = landmarks.map((d) => d[1]);
const startPoint = [Math.min(...xs), Math.min(...ys)];
const endPoint = [Math.max(...xs), Math.max(...ys)];
return { startPoint, endPoint, landmarks };
2020-10-12 01:22:43 +02:00
}
}