human/src/face/facepipeline.js

272 lines
13 KiB
JavaScript
Raw Normal View History

2020-10-12 01:22:43 +02:00
/* eslint-disable class-methods-use-this */
import { tf } from '../tf.js';
2020-11-10 02:13:38 +01:00
import * as bounding from './box';
import * as keypoints from './keypoints';
import * as util from './util';
2020-10-12 01:22:43 +02:00
const LANDMARKS_COUNT = 468;
const MESH_MOUTH_INDEX = 13;
const MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [MESH_MOUTH_INDEX, keypoints.MESH_ANNOTATIONS['midwayBetweenEyes'][0]];
const BLAZEFACE_MOUTH_INDEX = 3;
const BLAZEFACE_NOSE_INDEX = 2;
const BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES = [BLAZEFACE_MOUTH_INDEX, BLAZEFACE_NOSE_INDEX];
const LEFT_EYE_OUTLINE = keypoints.MESH_ANNOTATIONS['leftEyeLower0'];
const LEFT_EYE_BOUNDS = [LEFT_EYE_OUTLINE[0], LEFT_EYE_OUTLINE[LEFT_EYE_OUTLINE.length - 1]];
const RIGHT_EYE_OUTLINE = keypoints.MESH_ANNOTATIONS['rightEyeLower0'];
const RIGHT_EYE_BOUNDS = [RIGHT_EYE_OUTLINE[0], RIGHT_EYE_OUTLINE[RIGHT_EYE_OUTLINE.length - 1]];
const IRIS_UPPER_CENTER_INDEX = 3;
const IRIS_LOWER_CENTER_INDEX = 4;
const IRIS_IRIS_INDEX = 71;
const IRIS_NUM_COORDINATES = 76;
2020-10-12 01:22:43 +02:00
// Replace the raw coordinates returned by facemesh with refined iris model coordinates. Update the z coordinate to be an average of the original and the new. This produces the best visual effect.
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
for (let i = 0; i < keypoints.MESH_TO_IRIS_INDICES_MAP.length; i++) {
const { key, indices } = keypoints.MESH_TO_IRIS_INDICES_MAP[i];
2020-10-12 01:22:43 +02:00
const originalIndices = keypoints.MESH_ANNOTATIONS[`${prefix}${key}`];
const shouldReplaceAllKeys = keys == null;
if (shouldReplaceAllKeys || keys.includes(key)) {
for (let j = 0; j < indices.length; j++) {
const index = indices[j];
rawCoords[originalIndices[j]] = [
newCoords[index][0], newCoords[index][1],
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
];
}
}
}
}
// The Pipeline coordinates between the bounding box and skeleton models.
class Pipeline {
constructor(boundingBoxDetector, meshDetector, irisModel, config) {
// An array of facial bounding boxes.
2020-11-09 20:26:10 +01:00
this.storedBoxes = [];
2020-10-12 01:22:43 +02:00
this.runsWithoutFaceDetector = 0;
this.boundingBoxDetector = boundingBoxDetector;
this.meshDetector = meshDetector;
this.irisModel = irisModel;
this.meshWidth = config.mesh.inputSize;
this.meshHeight = config.mesh.inputSize;
this.irisSize = config.iris.inputSize;
2020-11-08 18:26:45 +01:00
this.irisEnlarge = 2.3;
2020-11-09 20:26:10 +01:00
this.skipped = 1000;
this.detectedFaces = 0;
2020-10-12 01:22:43 +02:00
}
transformRawCoords(rawCoords, box, angle, rotationMatrix) {
const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
const scaleFactor = [boxSize[0] / this.meshWidth, boxSize[1] / this.meshHeight];
const coordsScaled = rawCoords.map((coord) => ([
scaleFactor[0] * (coord[0] - this.meshWidth / 2),
scaleFactor[1] * (coord[1] - this.meshHeight / 2), coord[2],
]));
const coordsRotationMatrix = util.buildRotationMatrix(angle, [0, 0]);
const coordsRotated = coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]]));
const inverseRotationMatrix = util.invertTransformMatrix(rotationMatrix);
const boxCenter = [...bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
const originalBoxCenter = [
util.dot(boxCenter, inverseRotationMatrix[0]),
util.dot(boxCenter, inverseRotationMatrix[1]),
];
return coordsRotated.map((coord) => ([
coord[0] + originalBoxCenter[0],
coord[1] + originalBoxCenter[1], coord[2],
]));
}
getLeftToRightEyeDepthDifference(rawCoords) {
const leftEyeZ = rawCoords[LEFT_EYE_BOUNDS[0]][2];
const rightEyeZ = rawCoords[RIGHT_EYE_BOUNDS[0]][2];
return leftEyeZ - rightEyeZ;
}
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
const box = bounding.squarifyBox(bounding.enlargeBox(this.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
2020-10-12 01:22:43 +02:00
const boxSize = bounding.getBoxSize(box);
let crop = tf.image.cropAndResize(face, [[
box.startPoint[1] / this.meshHeight,
box.startPoint[0] / this.meshWidth, box.endPoint[1] / this.meshHeight,
box.endPoint[0] / this.meshWidth,
]], [0], [this.irisSize, this.irisSize]);
2020-10-12 01:22:43 +02:00
if (flip) {
crop = tf.image.flipLeftRight(crop);
}
return { box, boxSize, crop };
}
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
const eyeRawCoords = [];
for (let i = 0; i < IRIS_NUM_COORDINATES; i++) {
const x = eyeData[i * 3];
const y = eyeData[i * 3 + 1];
const z = eyeData[i * 3 + 2];
eyeRawCoords.push([
(flip
? (1 - (x / this.irisSize))
: (x / this.irisSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
(y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
2020-10-12 01:22:43 +02:00
]);
}
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(IRIS_IRIS_INDEX) };
}
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
const upperCenterZ = rawCoords[keypoints.MESH_ANNOTATIONS[`${direction}EyeUpper0`][IRIS_UPPER_CENTER_INDEX]][2];
const lowerCenterZ = rawCoords[keypoints.MESH_ANNOTATIONS[`${direction}EyeLower0`][IRIS_LOWER_CENTER_INDEX]][2];
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
return irisCoords.map((coord, i) => {
let z = averageZ;
if (i === 2) {
z = upperCenterZ;
} else if (i === 4) {
z = lowerCenterZ;
}
return [coord[0], coord[1], z];
});
}
async predict(input, config) {
2020-11-09 20:26:10 +01:00
this.skipped++;
let useFreshBox = false;
// run new detector every skipFrames unless we only want box to start with
2020-11-06 19:50:16 +01:00
let detector;
2020-11-09 20:26:10 +01:00
if ((this.skipped > config.detector.skipFrames) || !config.mesh.enabled) {
detector = await this.boundingBoxDetector.getBoundingBoxes(input);
// don't reset on test image
if ((input.shape[1] !== 255) && (input.shape[2] !== 255)) this.skipped = 0;
}
// if detector result count doesn't match current working set, use it to reset current working set
if (detector && detector.boxes && (detector.boxes.length > 0) && (!config.mesh.enabled || (detector.boxes.length !== this.detectedFaces) && (this.detectedFaces !== config.detector.maxFaces))) {
this.storedBoxes = [];
this.detectedFaces = 0;
for (const possible of detector.boxes) {
this.storedBoxes.push({ startPoint: possible.box.startPoint.dataSync(), endPoint: possible.box.endPoint.dataSync(), landmarks: possible.landmarks, confidence: possible.confidence });
}
if (this.storedBoxes.length > 0) useFreshBox = true;
}
2020-11-06 19:50:16 +01:00
if (useFreshBox) {
2020-11-04 07:11:24 +01:00
if (!detector || !detector.boxes || (detector.boxes.length === 0)) {
2020-11-09 20:26:10 +01:00
this.storedBoxes = [];
2020-11-06 19:50:16 +01:00
this.detectedFaces = 0;
2020-10-12 01:22:43 +02:00
return null;
}
2020-11-09 20:26:10 +01:00
for (const i in this.storedBoxes) {
const scaledBox = bounding.scaleBoxCoordinates({ startPoint: this.storedBoxes[i].startPoint, endPoint: this.storedBoxes[i].endPoint }, detector.scaleFactor);
2020-10-12 01:22:43 +02:00
const enlargedBox = bounding.enlargeBox(scaledBox);
2020-11-09 20:26:10 +01:00
const landmarks = this.storedBoxes[i].landmarks.arraySync();
const confidence = this.storedBoxes[i].confidence;
this.storedBoxes[i] = { ...enlargedBox, confidence, landmarks };
}
2020-11-06 22:21:20 +01:00
this.runsWithoutFaceDetector = 0;
}
if (detector && detector.boxes) {
detector.boxes.forEach((prediction) => {
prediction.box.startPoint.dispose();
prediction.box.endPoint.dispose();
2020-10-13 04:01:35 +02:00
prediction.landmarks.dispose();
2020-10-12 01:22:43 +02:00
});
}
2020-11-09 20:26:10 +01:00
// console.log(this.skipped, config.detector.skipFrames, this.detectedFaces, config.detector.maxFaces, detector?.boxes.length, this.storedBoxes.length);
let results = tf.tidy(() => this.storedBoxes.map((box, i) => {
2020-10-12 01:22:43 +02:00
let angle = 0;
// The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
const boxLandmarksFromMeshModel = box.landmarks.length >= LANDMARKS_COUNT;
let [indexOfMouth, indexOfForehead] = MESH_KEYPOINTS_LINE_OF_SYMMETRY_INDICES;
if (boxLandmarksFromMeshModel === false) {
[indexOfMouth, indexOfForehead] = BLAZEFACE_KEYPOINTS_LINE_OF_SYMMETRY_INDICES;
}
angle = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
const faceCenter = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
const faceCenterNormalized = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
let rotatedImage = input;
let rotationMatrix = util.IDENTITY_MATRIX;
if (angle !== 0) {
rotatedImage = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized);
rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
}
const boxCPU = { startPoint: box.startPoint, endPoint: box.endPoint };
const face = bounding.cutBoxFromImageAndResize(boxCPU, rotatedImage, [this.meshHeight, this.meshWidth]).div(255);
2020-11-09 20:26:10 +01:00
// if we're not going to produce mesh, don't spend time with further processing
if (!config.mesh.enabled) {
const prediction = {
coords: null,
box,
faceConfidence: null,
confidence: box.confidence,
image: face,
};
return prediction;
}
2020-10-12 01:22:43 +02:00
// The first returned tensor represents facial contours, which are included in the coordinates.
2020-11-06 19:50:16 +01:00
const [, confidence, coords] = this.meshDetector.predict(face);
const confidenceVal = confidence.dataSync()[0];
confidence.dispose();
if (confidenceVal < config.detector.minConfidence) {
coords.dispose();
return null;
}
2020-10-12 01:22:43 +02:00
const coordsReshaped = tf.reshape(coords, [-1, 3]);
let rawCoords = coordsReshaped.arraySync();
if (config.iris.enabled) {
2020-10-12 01:22:43 +02:00
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, LEFT_EYE_BOUNDS[0], LEFT_EYE_BOUNDS[1], true);
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, RIGHT_EYE_BOUNDS[0], RIGHT_EYE_BOUNDS[1]);
const eyePredictions = (this.irisModel.predict(tf.concat([leftEyeCrop, rightEyeCrop])));
const eyePredictionsData = eyePredictions.dataSync();
2020-10-13 04:01:35 +02:00
eyePredictions.dispose();
2020-10-12 01:22:43 +02:00
const leftEyeData = eyePredictionsData.slice(0, IRIS_NUM_COORDINATES * 3);
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
const rightEyeData = eyePredictionsData.slice(IRIS_NUM_COORDINATES * 3);
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left');
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right');
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged. So we only update a single contour line above and below the eye.
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
} else { // User is looking towards the left.
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
}
const adjustedLeftIrisCoords = this.getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
const adjustedRightIrisCoords = this.getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
rawCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
}
const transformedCoordsData = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
tf.dispose(rawCoords);
const landmarksBox = bounding.enlargeBox(this.calculateLandmarksBoundingBox(transformedCoordsData));
2020-11-09 20:26:10 +01:00
const transformedCoords = tf.tensor2d(transformedCoordsData);
2020-10-12 01:22:43 +02:00
const prediction = {
2020-11-09 20:26:10 +01:00
coords: transformedCoords,
2020-10-12 01:22:43 +02:00
box: landmarksBox,
2020-11-09 20:26:10 +01:00
faceConfidence: confidenceVal,
confidence: box.confidence,
2020-10-12 01:22:43 +02:00
image: face,
};
2020-11-09 20:26:10 +01:00
this.storedBoxes[i] = { ...landmarksBox, landmarks: transformedCoords.arraySync(), confidence: box.confidence, faceConfidence: confidenceVal };
2020-10-12 01:22:43 +02:00
return prediction;
}));
2020-11-06 19:50:16 +01:00
results = results.filter((a) => a !== null);
this.detectedFaces = results.length;
2020-10-13 04:01:35 +02:00
return results;
2020-10-12 01:22:43 +02:00
}
calculateLandmarksBoundingBox(landmarks) {
const xs = landmarks.map((d) => d[0]);
const ys = landmarks.map((d) => d[1]);
const startPoint = [Math.min(...xs), Math.min(...ys)];
const endPoint = [Math.max(...xs), Math.max(...ys)];
return { startPoint, endPoint, landmarks };
2020-10-12 01:22:43 +02:00
}
}
exports.Pipeline = Pipeline;