mirror of https://github.com/vladmandic/human
redesign face processing
parent
01a3c6802e
commit
9274f42fba
5
TODO.md
5
TODO.md
|
@ -10,11 +10,6 @@
|
|||
|
||||
- Evaluate and switch default default model from `handdetect` to `handtrack`
|
||||
|
||||
#### Body
|
||||
|
||||
- Implement new variations of `BlazePose` models
|
||||
- Add virtual box frame caching to `MoveNet`
|
||||
|
||||
#### Face
|
||||
|
||||
- Reimplement `BlazeFace`, `FaceMesh`, `Iris` with new pipeline and frame caching
|
||||
|
|
|
@ -67,6 +67,9 @@ const drawOptions = {
|
|||
drawLabels: true,
|
||||
drawPolygons: true,
|
||||
drawPoints: false,
|
||||
fillPolygons: false,
|
||||
useCurves: false,
|
||||
useDepth: true,
|
||||
};
|
||||
|
||||
// ui options
|
||||
|
@ -105,7 +108,7 @@ const ui = {
|
|||
lastFrame: 0, // time of last frame processing
|
||||
viewportSet: false, // internal, has custom viewport been set
|
||||
background: null, // holds instance of segmentation background image
|
||||
exceptionHandler: false, // should capture all unhandled exceptions
|
||||
exceptionHandler: true, // should capture all unhandled exceptions
|
||||
|
||||
// webrtc
|
||||
useWebRTC: false, // use webrtc as camera source instead of local webcam
|
||||
|
@ -684,13 +687,13 @@ function setupMenu() {
|
|||
setupCamera();
|
||||
});
|
||||
menu.display.addHTML('<hr style="border-style: inset; border-color: dimgray">');
|
||||
menu.display.addBool('use depth', human.draw.options, 'useDepth');
|
||||
menu.display.addBool('use curves', human.draw.options, 'useCurves');
|
||||
menu.display.addBool('print labels', human.draw.options, 'drawLabels');
|
||||
menu.display.addBool('draw points', human.draw.options, 'drawPoints');
|
||||
menu.display.addBool('draw boxes', human.draw.options, 'drawBoxes');
|
||||
menu.display.addBool('draw polygons', human.draw.options, 'drawPolygons');
|
||||
menu.display.addBool('fill polygons', human.draw.options, 'fillPolygons');
|
||||
menu.display.addBool('use depth', drawOptions, 'useDepth');
|
||||
menu.display.addBool('use curves', drawOptions, 'useCurves');
|
||||
menu.display.addBool('print labels', drawOptions, 'drawLabels');
|
||||
menu.display.addBool('draw points', drawOptions, 'drawPoints');
|
||||
menu.display.addBool('draw boxes', drawOptions, 'drawBoxes');
|
||||
menu.display.addBool('draw polygons', drawOptions, 'drawPolygons');
|
||||
menu.display.addBool('fill polygons', drawOptions, 'fillPolygons');
|
||||
|
||||
menu.image = new Menu(document.body, '', { top, left: x[1] });
|
||||
menu.image.addBool('enabled', userConfig.filter, 'enabled', (val) => userConfig.filter.enabled = val);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@vladmandic/human",
|
||||
"version": "2.2.3",
|
||||
"version": "2.3.0",
|
||||
"description": "Human: AI-powered 3D Face Detection & Rotation Tracking, Face Description & Recognition, Body Pose Tracking, 3D Hand & Finger Tracking, Iris Analysis, Age & Gender & Emotion Prediction, Gesture Recognition",
|
||||
"sideEffects": false,
|
||||
"main": "dist/human.node.js",
|
||||
|
|
|
@ -1,101 +0,0 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import { log, join, mergeDeep } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as box from './box';
|
||||
import * as util from './util';
|
||||
import type { Config } from '../config';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
|
||||
const keypointsCount = 6;
|
||||
|
||||
function decodeBounds(boxOutputs, anchors, inputSize) {
|
||||
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
|
||||
const centers = tf.add(boxStarts, anchors);
|
||||
const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
|
||||
const boxSizesNormalized = tf.div(boxSizes, inputSize);
|
||||
const centersNormalized = tf.div(centers, inputSize);
|
||||
const halfBoxSize = tf.div(boxSizesNormalized, 2);
|
||||
const starts = tf.sub(centersNormalized, halfBoxSize);
|
||||
const ends = tf.add(centersNormalized, halfBoxSize);
|
||||
const startNormalized = tf.mul(starts, inputSize);
|
||||
const endNormalized = tf.mul(ends, inputSize);
|
||||
const concatAxis = 1;
|
||||
return tf.concat2d([startNormalized, endNormalized], concatAxis);
|
||||
}
|
||||
|
||||
export class BlazeFaceModel {
|
||||
model: GraphModel;
|
||||
anchorsData: [number, number][];
|
||||
anchors: Tensor;
|
||||
inputSize: number;
|
||||
config: Config;
|
||||
|
||||
constructor(model, config: Config) {
|
||||
this.model = model;
|
||||
this.anchorsData = util.generateAnchors(model.inputs[0].shape[1]);
|
||||
this.anchors = tf.tensor2d(this.anchorsData);
|
||||
this.inputSize = model.inputs[0].shape[2];
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
async getBoundingBoxes(inputImage: Tensor, userConfig: Config) {
|
||||
// sanity check on input
|
||||
if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
|
||||
const [batch, boxes, scores] = tf.tidy(() => {
|
||||
const resizedImage = tf.image.resizeBilinear(inputImage, [this.inputSize, this.inputSize]);
|
||||
const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
|
||||
const res = this.model.execute(normalizedImage);
|
||||
let batchOut;
|
||||
if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
|
||||
const sorted = res.sort((a, b) => a.size - b.size);
|
||||
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
|
||||
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
|
||||
const concat = tf.concat([concat512, concat384], 1);
|
||||
batchOut = tf.squeeze(concat, 0);
|
||||
} else {
|
||||
batchOut = tf.squeeze(res); // when using tfhub model
|
||||
}
|
||||
const boxesOut = decodeBounds(batchOut, this.anchors, [this.inputSize, this.inputSize]);
|
||||
const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
|
||||
const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
|
||||
return [batchOut, boxesOut, scoresOut];
|
||||
});
|
||||
|
||||
this.config = mergeDeep(this.config, userConfig) as Config;
|
||||
|
||||
const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (this.config.face.detector?.maxDetected || 0), (this.config.face.detector?.iouThreshold || 0), (this.config.face.detector?.minConfidence || 0));
|
||||
const nms = await nmsTensor.array();
|
||||
tf.dispose(nmsTensor);
|
||||
const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
|
||||
const scoresData = await scores.data();
|
||||
for (let i = 0; i < nms.length; i++) {
|
||||
const confidence = scoresData[nms[i]];
|
||||
if (confidence > (this.config.face.detector?.minConfidence || 0)) {
|
||||
const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
|
||||
const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
|
||||
annotatedBoxes.push({ box: box.createBox(boundingBox), landmarks, anchor: this.anchorsData[nms[i]], confidence });
|
||||
tf.dispose(boundingBox);
|
||||
}
|
||||
}
|
||||
tf.dispose(batch);
|
||||
tf.dispose(boxes);
|
||||
tf.dispose(scores);
|
||||
|
||||
return {
|
||||
boxes: annotatedBoxes,
|
||||
scaleFactor: [inputImage.shape[2] / this.inputSize, inputImage.shape[1] / this.inputSize],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function load(config: Config) {
|
||||
const model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || ''), { fromTFHub: (config.face.detector?.modelPath || '').includes('tfhub.dev') });
|
||||
const blazeFace = new BlazeFaceModel(model, config);
|
||||
if (!model || !model.modelUrl) log('load model failed:', config.face.detector?.modelPath || '');
|
||||
else if (config.debug) log('load model:', model.modelUrl);
|
||||
return blazeFace;
|
||||
}
|
|
@ -1,75 +0,0 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
|
||||
export function scaleBoxCoordinates(box, factor) {
|
||||
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
|
||||
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
|
||||
return { startPoint, endPoint };
|
||||
}
|
||||
|
||||
export function getBoxSize(box): [number, number] {
|
||||
return [
|
||||
Math.abs(box.endPoint[0] - box.startPoint[0]),
|
||||
Math.abs(box.endPoint[1] - box.startPoint[1]),
|
||||
];
|
||||
}
|
||||
|
||||
export function getBoxCenter(box): [number, number] {
|
||||
return [
|
||||
box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2,
|
||||
box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2,
|
||||
];
|
||||
}
|
||||
|
||||
export function cutBoxFromImageAndResize(box, image, cropSize) {
|
||||
const h = image.shape[1];
|
||||
const w = image.shape[2];
|
||||
const boxes = [[
|
||||
box.startPoint[1] / h,
|
||||
box.startPoint[0] / w,
|
||||
box.endPoint[1] / h,
|
||||
box.endPoint[0] / w,
|
||||
]];
|
||||
return tf.image.cropAndResize(image, boxes, [0], cropSize);
|
||||
}
|
||||
|
||||
export function enlargeBox(box, factor = 1.5) {
|
||||
const center = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const newHalfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
|
||||
const startPoint = [center[0] - newHalfSize[0], center[1] - newHalfSize[1]];
|
||||
const endPoint = [center[0] + newHalfSize[0], center[1] + newHalfSize[1]];
|
||||
return { startPoint, endPoint, landmarks: box.landmarks };
|
||||
}
|
||||
|
||||
export function squarifyBox(box) {
|
||||
const centers = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const maxEdge = Math.max(...size);
|
||||
const halfSize = maxEdge / 2;
|
||||
const startPoint = [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)];
|
||||
const endPoint = [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)];
|
||||
return { startPoint, endPoint, landmarks: box.landmarks };
|
||||
}
|
||||
|
||||
export function calculateLandmarksBoundingBox(landmarks) {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
const startPoint = [Math.min(...xs), Math.min(...ys)];
|
||||
const endPoint = [Math.max(...xs), Math.max(...ys)];
|
||||
return { startPoint, endPoint, landmarks };
|
||||
}
|
||||
|
||||
export const disposeBox = (t) => {
|
||||
tf.dispose(t.startPoint);
|
||||
tf.dispose(t.endPoint);
|
||||
};
|
||||
|
||||
export const createBox = (startEndTensor) => ({
|
||||
startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]),
|
||||
endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]),
|
||||
});
|
|
@ -1,92 +0,0 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
*
|
||||
* Based on:
|
||||
* - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
|
||||
* - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
|
||||
* - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as blazeface from './blazeface';
|
||||
import * as facepipeline from './facepipeline';
|
||||
import * as coords from './coords';
|
||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { FaceResult, Box } from '../result';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
|
||||
let faceModels: [blazeface.BlazeFaceModel | null, GraphModel | null, GraphModel | null] = [null, null, null];
|
||||
let facePipeline;
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<FaceResult[]> {
|
||||
const predictions = await facePipeline.predict(input, config);
|
||||
const results: Array<FaceResult> = [];
|
||||
let id = 0;
|
||||
for (const prediction of (predictions || [])) {
|
||||
if (!prediction || prediction.isDisposedInternal) continue; // guard against disposed tensors on long running operations such as pause in middle of processing
|
||||
const meshRaw = prediction.mesh.map((pt) => [
|
||||
pt[0] / (input.shape[2] || 0),
|
||||
pt[1] / (input.shape[1] || 0),
|
||||
pt[2] / facePipeline.meshSize,
|
||||
]);
|
||||
const annotations = {};
|
||||
if (prediction.mesh && prediction.mesh.length > 0) {
|
||||
for (const key of Object.keys(coords.MESH_ANNOTATIONS)) annotations[key] = coords.MESH_ANNOTATIONS[key].map((index) => prediction.mesh[index]);
|
||||
}
|
||||
const clampedBox: Box = prediction.box ? [
|
||||
Math.trunc(Math.max(0, prediction.box.startPoint[0])),
|
||||
Math.trunc(Math.max(0, prediction.box.startPoint[1])),
|
||||
Math.trunc(Math.min((input.shape[2] || 0), prediction.box.endPoint[0]) - Math.max(0, prediction.box.startPoint[0])),
|
||||
Math.trunc(Math.min((input.shape[1] || 0), prediction.box.endPoint[1]) - Math.max(0, prediction.box.startPoint[1])),
|
||||
] : [0, 0, 0, 0];
|
||||
const boxRaw: Box = prediction.box ? [
|
||||
prediction.box.startPoint[0] / (input.shape[2] || 0),
|
||||
prediction.box.startPoint[1] / (input.shape[1] || 0),
|
||||
(prediction.box.endPoint[0] - prediction.box.startPoint[0]) / (input.shape[2] || 0),
|
||||
(prediction.box.endPoint[1] - prediction.box.startPoint[1]) / (input.shape[1] || 0),
|
||||
] : [0, 0, 0, 0];
|
||||
results.push({
|
||||
id: id++,
|
||||
score: Math.round(100 * prediction.faceConfidence || 100 * prediction.boxConfidence || 0) / 100,
|
||||
boxScore: Math.round(100 * prediction.boxConfidence) / 100,
|
||||
faceScore: Math.round(100 * prediction.faceConfidence) / 100,
|
||||
box: clampedBox,
|
||||
boxRaw,
|
||||
mesh: prediction.mesh,
|
||||
meshRaw,
|
||||
annotations,
|
||||
tensor: prediction.image,
|
||||
});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
export async function load(config): Promise<[GraphModel | null, GraphModel | null, GraphModel | null]> {
|
||||
if (env.initial) faceModels = [null, null, null];
|
||||
if ((!faceModels[0] && config.face.enabled) || (!faceModels[1] && config.face.mesh.enabled) || (!faceModels[2] && config.face.iris.enabled) || env.initial) {
|
||||
faceModels = await Promise.all([
|
||||
(!faceModels[0] && config.face.enabled) ? blazeface.load(config) : null,
|
||||
(!faceModels[1] && config.face.mesh.enabled) ? tf.loadGraphModel(join(config.modelBasePath, config.face.mesh.modelPath), { fromTFHub: config.face.mesh.modelPath.includes('tfhub.dev') }) as unknown as GraphModel : null,
|
||||
(!faceModels[2] && config.face.iris.enabled) ? tf.loadGraphModel(join(config.modelBasePath, config.face.iris.modelPath), { fromTFHub: config.face.iris.modelPath.includes('tfhub.dev') }) as unknown as GraphModel : null,
|
||||
]);
|
||||
if (config.face.mesh.enabled) {
|
||||
if (!faceModels[1] || !faceModels[1]['modelUrl']) log('load model failed:', config.face.mesh.modelPath);
|
||||
else if (config.debug) log('load model:', faceModels[1]['modelUrl']);
|
||||
}
|
||||
if (config.face.iris.enabled) {
|
||||
if (!faceModels[2] || !faceModels[2]['modelUrl']) log('load model failed:', config.face.iris.modelPath);
|
||||
else if (config.debug) log('load model:', faceModels[2]['modelUrl']);
|
||||
}
|
||||
} else if (config.debug) {
|
||||
if (faceModels[0]) log('cached model:', faceModels[0].model['modelUrl']);
|
||||
if (faceModels[1]) log('cached model:', faceModels[1]['modelUrl']);
|
||||
if (faceModels[2]) log('cached model:', faceModels[2]['modelUrl']);
|
||||
}
|
||||
facePipeline = new facepipeline.Pipeline(faceModels[0], faceModels[1], faceModels[2]);
|
||||
return [faceModels[0]?.model || null, faceModels[1], faceModels[2]];
|
||||
}
|
||||
|
||||
export const triangulation = coords.TRI468;
|
||||
export const uvmap = coords.UV468;
|
|
@ -1,346 +0,0 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as bounding from './box';
|
||||
import * as util from './util';
|
||||
import * as coords from './coords';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { BlazeFaceModel } from './blazeface';
|
||||
import { env } from '../util/env';
|
||||
import { log } from '../util/util';
|
||||
import type { Point } from '../result';
|
||||
|
||||
const leftOutline = coords.MESH_ANNOTATIONS['leftEyeLower0'];
|
||||
const rightOutline = coords.MESH_ANNOTATIONS['rightEyeLower0'];
|
||||
|
||||
const eyeLandmarks = {
|
||||
leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
|
||||
rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
|
||||
};
|
||||
|
||||
const meshLandmarks = {
|
||||
count: 468,
|
||||
mouth: 13,
|
||||
symmetryLine: [13, coords.MESH_ANNOTATIONS['midwayBetweenEyes'][0]],
|
||||
};
|
||||
|
||||
const blazeFaceLandmarks = {
|
||||
leftEye: 0,
|
||||
rightEye: 1,
|
||||
nose: 2,
|
||||
mouth: 3,
|
||||
leftEar: 4,
|
||||
rightEar: 5,
|
||||
symmetryLine: [3, 2],
|
||||
};
|
||||
|
||||
const irisLandmarks = {
|
||||
upperCenter: 3,
|
||||
lowerCenter: 4,
|
||||
index: 71,
|
||||
numCoordinates: 76,
|
||||
};
|
||||
|
||||
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
|
||||
// Update the z coordinate to be an average of the original and the new.
|
||||
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
|
||||
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
|
||||
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
|
||||
const originalIndices = coords.MESH_ANNOTATIONS[`${prefix}${key}`];
|
||||
if (!keys || keys.includes(key)) {
|
||||
for (let j = 0; j < indices.length; j++) {
|
||||
const index = indices[j];
|
||||
rawCoords[originalIndices[j]] = [
|
||||
newCoords[index][0], newCoords[index][1],
|
||||
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// The Pipeline coordinates between the bounding box and skeleton models.
|
||||
export class Pipeline {
|
||||
storedBoxes: Array<{ startPoint: number[], endPoint: number[], landmarks: Array<number>, confidence: number, faceConfidence?: number | undefined }>;
|
||||
boundingBoxDetector: BlazeFaceModel; // tf.GraphModel
|
||||
meshDetector: GraphModel; // tf.GraphModel
|
||||
irisModel: GraphModel; // tf.GraphModel
|
||||
boxSize: number;
|
||||
meshSize: number;
|
||||
irisSize: number;
|
||||
irisEnlarge: number;
|
||||
skipped: number;
|
||||
detectedFaces: number;
|
||||
|
||||
constructor(boundingBoxDetector, meshDetector, irisModel) {
|
||||
// An array of facial bounding boxes.
|
||||
this.storedBoxes = [];
|
||||
this.boundingBoxDetector = boundingBoxDetector;
|
||||
this.meshDetector = meshDetector;
|
||||
this.irisModel = irisModel;
|
||||
this.boxSize = boundingBoxDetector?.model?.inputs[0].shape[2] || 0;
|
||||
this.meshSize = meshDetector?.inputs[0].shape[2] || boundingBoxDetector?.model?.inputs[0].shape[2];
|
||||
this.irisSize = irisModel?.inputs[0].shape[1] || 0;
|
||||
this.irisEnlarge = 2.3;
|
||||
this.skipped = 0;
|
||||
this.detectedFaces = 0;
|
||||
}
|
||||
|
||||
transformRawCoords(rawCoords, box, angle, rotationMatrix) {
|
||||
const boxSize = bounding.getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const coordsScaled = rawCoords.map((coord) => ([
|
||||
boxSize[0] / this.meshSize * (coord[0] - this.meshSize / 2),
|
||||
boxSize[1] / this.meshSize * (coord[1] - this.meshSize / 2),
|
||||
coord[2],
|
||||
]));
|
||||
const coordsRotationMatrix = (angle !== 0) ? util.buildRotationMatrix(angle, [0, 0]) : util.IDENTITY_MATRIX;
|
||||
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...util.rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
|
||||
const inverseRotationMatrix = (angle !== 0) ? util.invertTransformMatrix(rotationMatrix) : util.IDENTITY_MATRIX;
|
||||
const boxCenter = [...bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
|
||||
return coordsRotated.map((coord) => ([
|
||||
Math.round(coord[0] + util.dot(boxCenter, inverseRotationMatrix[0])),
|
||||
Math.round(coord[1] + util.dot(boxCenter, inverseRotationMatrix[1])),
|
||||
Math.round(coord[2]),
|
||||
]));
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
getLeftToRightEyeDepthDifference(rawCoords) {
|
||||
const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
|
||||
const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
|
||||
return leftEyeZ - rightEyeZ;
|
||||
}
|
||||
|
||||
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
|
||||
getEyeBox(rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false) {
|
||||
const box = bounding.squarifyBox(bounding.enlargeBox(bounding.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), this.irisEnlarge));
|
||||
const boxSize = bounding.getBoxSize(box);
|
||||
let crop = tf.image.cropAndResize(face, [[
|
||||
box.startPoint[1] / this.meshSize,
|
||||
box.startPoint[0] / this.meshSize, box.endPoint[1] / this.meshSize,
|
||||
box.endPoint[0] / this.meshSize,
|
||||
]], [0], [this.irisSize, this.irisSize]);
|
||||
if (flip && env.kernels.includes('flipleftright')) {
|
||||
const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
|
||||
tf.dispose(crop);
|
||||
crop = flipped;
|
||||
}
|
||||
return { box, boxSize, crop };
|
||||
}
|
||||
|
||||
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
|
||||
getEyeCoords(eyeData, eyeBox, eyeBoxSize, flip = false) {
|
||||
const eyeRawCoords: Array<Point> = [];
|
||||
for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
|
||||
const x = eyeData[i * 3];
|
||||
const y = eyeData[i * 3 + 1];
|
||||
const z = eyeData[i * 3 + 2];
|
||||
eyeRawCoords.push([
|
||||
(flip ? (1 - (x / this.irisSize)) : (x / this.irisSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
|
||||
(y / this.irisSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
|
||||
]);
|
||||
}
|
||||
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
|
||||
}
|
||||
|
||||
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
getAdjustedIrisCoords(rawCoords, irisCoords, direction) {
|
||||
const upperCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
|
||||
const lowerCenterZ = rawCoords[coords.MESH_ANNOTATIONS[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
|
||||
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
|
||||
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
|
||||
return irisCoords.map((coord, i) => {
|
||||
let z = averageZ;
|
||||
if (i === 2) {
|
||||
z = upperCenterZ;
|
||||
} else if (i === 4) {
|
||||
z = lowerCenterZ;
|
||||
}
|
||||
return [coord[0], coord[1], z];
|
||||
});
|
||||
}
|
||||
|
||||
correctFaceRotation(config, box, input) {
|
||||
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= meshLandmarks.count) ? meshLandmarks.symmetryLine : blazeFaceLandmarks.symmetryLine;
|
||||
const angle: number = util.computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
|
||||
const faceCenter: Point = bounding.getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
|
||||
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
|
||||
const rotationMatrix = util.buildRotationMatrix(-angle, faceCenter);
|
||||
const cut = config.face.mesh.enabled
|
||||
? bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [this.meshSize, this.meshSize])
|
||||
: bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [this.boxSize, this.boxSize]);
|
||||
const face = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
tf.dispose(rotated);
|
||||
return [angle, rotationMatrix, face];
|
||||
}
|
||||
|
||||
async augmentIris(rawCoords, face, config) {
|
||||
if (!this.irisModel) {
|
||||
if (config.debug) log('face mesh detection requested, but model is not loaded');
|
||||
return rawCoords;
|
||||
}
|
||||
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true);
|
||||
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = this.getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1]);
|
||||
const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
|
||||
tf.dispose(leftEyeCrop);
|
||||
tf.dispose(rightEyeCrop);
|
||||
const eyePredictions = this.irisModel.predict(combined) as Tensor;
|
||||
tf.dispose(combined);
|
||||
const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
|
||||
tf.dispose(eyePredictions);
|
||||
const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = this.getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
|
||||
const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = this.getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
|
||||
const leftToRightEyeDepthDifference = this.getLeftToRightEyeDepthDifference(rawCoords);
|
||||
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
|
||||
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
|
||||
// So we only update a single contour line above and below the eye.
|
||||
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
|
||||
} else { // User is looking towards the left.
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
|
||||
}
|
||||
const adjustedLeftIrisCoords = this.getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
|
||||
const adjustedRightIrisCoords = this.getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
|
||||
const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
|
||||
return newCoords;
|
||||
}
|
||||
|
||||
async predict(input, config) {
|
||||
let useFreshBox = false;
|
||||
// run new detector every skipFrames unless we only want box to start with
|
||||
let detector;
|
||||
if ((this.skipped === 0) || (this.skipped > config.face.detector.skipFrames) || !config.face.mesh.enabled || !config.skipFrame) {
|
||||
detector = await this.boundingBoxDetector.getBoundingBoxes(input, config);
|
||||
this.skipped = 0;
|
||||
}
|
||||
if (config.skipFrame) this.skipped++;
|
||||
|
||||
// if detector result count doesn't match current working set, use it to reset current working set
|
||||
if (!config.skipFrame || (detector && detector.boxes && (!config.face.mesh.enabled || (detector.boxes.length !== this.detectedFaces) && (this.detectedFaces !== config.face.detector.maxDetected)))) {
|
||||
this.storedBoxes = [];
|
||||
this.detectedFaces = 0;
|
||||
for (const possible of detector.boxes) {
|
||||
const startPoint = await possible.box.startPoint.data();
|
||||
const endPoint = await possible.box.endPoint.data();
|
||||
const landmarks = await possible.landmarks.array();
|
||||
this.storedBoxes.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
|
||||
}
|
||||
if (this.storedBoxes.length > 0) useFreshBox = true;
|
||||
}
|
||||
|
||||
if (useFreshBox) {
|
||||
if (!detector || !detector.boxes || (detector.boxes.length === 0)) {
|
||||
this.storedBoxes = [];
|
||||
this.detectedFaces = 0;
|
||||
return null;
|
||||
}
|
||||
for (let i = 0; i < this.storedBoxes.length; i++) {
|
||||
const scaledBox = bounding.scaleBoxCoordinates({ startPoint: this.storedBoxes[i].startPoint, endPoint: this.storedBoxes[i].endPoint }, detector.scaleFactor);
|
||||
const enlargedBox = bounding.enlargeBox(scaledBox);
|
||||
const squarifiedBox = bounding.squarifyBox(enlargedBox);
|
||||
const landmarks = this.storedBoxes[i].landmarks;
|
||||
const confidence = this.storedBoxes[i].confidence;
|
||||
this.storedBoxes[i] = { ...squarifiedBox, confidence, landmarks };
|
||||
}
|
||||
}
|
||||
if (detector && detector.boxes) {
|
||||
detector.boxes.forEach((prediction) => {
|
||||
tf.dispose(prediction.box.startPoint);
|
||||
tf.dispose(prediction.box.endPoint);
|
||||
tf.dispose(prediction.landmarks);
|
||||
});
|
||||
}
|
||||
|
||||
const results: Array<{ mesh, box, faceConfidence, boxConfidence, confidence, image }> = [];
|
||||
// for (let i = 0; i < this.storedBoxes.length; i++) {
|
||||
const newBoxes: Array<{ startPoint: number[]; endPoint: number[]; landmarks: number[]; confidence: number; faceConfidence?: number | undefined; }> = [];
|
||||
for (let box of this.storedBoxes) {
|
||||
// let box = this.storedBoxes[i]; // The facial bounding box landmarks could come either from blazeface (if we are using a fresh box), or from the mesh model (if we are reusing an old box).
|
||||
let face;
|
||||
let angle = 0;
|
||||
let rotationMatrix;
|
||||
|
||||
if (config.face.detector.rotation && config.face.mesh.enabled && env.kernels.includes('rotatewithoffset')) {
|
||||
[angle, rotationMatrix, face] = this.correctFaceRotation(config, box, input);
|
||||
} else {
|
||||
rotationMatrix = util.IDENTITY_MATRIX;
|
||||
const cloned = input.clone();
|
||||
const cut = config.face.mesh.enabled
|
||||
? bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.meshSize, this.meshSize])
|
||||
: bounding.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, cloned, [this.boxSize, this.boxSize]);
|
||||
face = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
tf.dispose(cloned);
|
||||
}
|
||||
// if we're not going to produce mesh, don't spend time with further processing
|
||||
if (!config.face.mesh.enabled) {
|
||||
results.push({
|
||||
mesh: [],
|
||||
box,
|
||||
faceConfidence: null,
|
||||
boxConfidence: box.confidence,
|
||||
confidence: box.confidence,
|
||||
image: face,
|
||||
});
|
||||
} else if (!this.meshDetector) {
|
||||
if (config.debug) log('face mesh detection requested, but model is not loaded');
|
||||
} else {
|
||||
const [contours, confidence, contourCoords] = this.meshDetector.execute(face) as Array<Tensor>; // The first returned tensor represents facial contours which are already included in the coordinates.
|
||||
tf.dispose(contours);
|
||||
const faceConfidence = (await confidence.data())[0] as number; // inside tf.tidy
|
||||
tf.dispose(confidence);
|
||||
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
|
||||
let rawCoords = await coordsReshaped.array();
|
||||
tf.dispose(contourCoords);
|
||||
tf.dispose(coordsReshaped);
|
||||
if (faceConfidence < config.face.detector.minConfidence) {
|
||||
// if (!this.storedBoxes[i]) console.log('2', i, this.storedBoxes.length, this.storedBoxes[i], box, this.storedBoxes);
|
||||
// this.storedBoxes[i].confidence = faceConfidence; // reset confidence of cached box
|
||||
box.confidence = faceConfidence; // reset confidence of cached box
|
||||
tf.dispose(face);
|
||||
} else {
|
||||
if (config.face.iris.enabled) rawCoords = await this.augmentIris(rawCoords, face, config);
|
||||
|
||||
// override box from detection with one calculated from mesh
|
||||
const mesh = this.transformRawCoords(rawCoords, box, angle, rotationMatrix);
|
||||
box = { ...bounding.enlargeBox(bounding.calculateLandmarksBoundingBox(mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
|
||||
|
||||
// do rotation one more time with mesh keypoints if we want to return perfect image
|
||||
if (config.face.detector.rotation && config.face.mesh.enabled && config.face.description.enabled && env.kernels.includes('rotatewithoffset')) {
|
||||
tf.dispose(face); // we'll overwrite original face
|
||||
[angle, rotationMatrix, face] = this.correctFaceRotation(config, box, input);
|
||||
}
|
||||
results.push({
|
||||
mesh,
|
||||
box,
|
||||
faceConfidence,
|
||||
boxConfidence: box.confidence,
|
||||
confidence: faceConfidence,
|
||||
image: face,
|
||||
});
|
||||
|
||||
// updated stored cache values
|
||||
// this.storedBoxes[i] = { ...bounding.squarifyBox(box), confidence: box.confidence, faceConfidence };
|
||||
box = { ...bounding.squarifyBox(box), confidence: box.confidence, faceConfidence };
|
||||
}
|
||||
}
|
||||
newBoxes.push(box);
|
||||
}
|
||||
|
||||
// results = results.filter((a) => a !== null);
|
||||
// remove cache entries for detected boxes on low confidence
|
||||
if (config.face.mesh.enabled) this.storedBoxes = newBoxes.filter((a) => a.confidence > config.face.detector.minConfidence);
|
||||
this.detectedFaces = results.length;
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
|
||||
/**
|
||||
* Normalizes the provided angle to the range -pi to pi.
|
||||
* @param angle The angle in radians to be normalized.
|
||||
*/
|
||||
export function normalizeRadians(angle) {
|
||||
return angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the angle of rotation between two anchor points.
|
||||
* @param point1 First anchor point
|
||||
* @param point2 Second anchor point
|
||||
*/
|
||||
export function computeRotation(point1, point2) {
|
||||
const radians = Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]);
|
||||
return normalizeRadians(radians);
|
||||
}
|
||||
|
||||
export function radToDegrees(rad) {
|
||||
return rad * 180 / Math.PI;
|
||||
}
|
||||
|
||||
export function buildTranslationMatrix(x, y) {
|
||||
return [[1, 0, x], [0, 1, y], [0, 0, 1]];
|
||||
}
|
||||
|
||||
export function dot(v1, v2) {
|
||||
let product = 0;
|
||||
for (let i = 0; i < v1.length; i++) {
|
||||
product += v1[i] * v2[i];
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
export function getColumnFrom2DArr(arr, columnIndex) {
|
||||
const column: Array<number> = [];
|
||||
for (let i = 0; i < arr.length; i++) {
|
||||
column.push(arr[i][columnIndex]);
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
export function multiplyTransformMatrices(mat1, mat2) {
|
||||
const product: Array<number[]> = [];
|
||||
const size = mat1.length;
|
||||
for (let row = 0; row < size; row++) {
|
||||
product.push([]);
|
||||
for (let col = 0; col < size; col++) {
|
||||
product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
|
||||
}
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
export function buildRotationMatrix(rotation, center) {
|
||||
const cosA = Math.cos(rotation);
|
||||
const sinA = Math.sin(rotation);
|
||||
const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
|
||||
const translationMatrix = buildTranslationMatrix(center[0], center[1]);
|
||||
const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
|
||||
const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
|
||||
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
|
||||
}
|
||||
|
||||
export function invertTransformMatrix(matrix) {
|
||||
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
|
||||
const translationComponent = [matrix[0][2], matrix[1][2]];
|
||||
const invertedTranslation = [
|
||||
-dot(rotationComponent[0], translationComponent),
|
||||
-dot(rotationComponent[1], translationComponent),
|
||||
];
|
||||
return [
|
||||
rotationComponent[0].concat(invertedTranslation[0]),
|
||||
rotationComponent[1].concat(invertedTranslation[1]),
|
||||
[0, 0, 1],
|
||||
];
|
||||
}
|
||||
|
||||
export function rotatePoint(homogeneousCoordinate, rotationMatrix) {
|
||||
return [
|
||||
dot(homogeneousCoordinate, rotationMatrix[0]),
|
||||
dot(homogeneousCoordinate, rotationMatrix[1]),
|
||||
];
|
||||
}
|
||||
|
||||
export function xyDistanceBetweenPoints(a, b) {
|
||||
return Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
|
||||
}
|
||||
|
||||
export function generateAnchors(inputSize) {
|
||||
const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
|
||||
const anchors: Array<[number, number]> = [];
|
||||
for (let i = 0; i < spec.strides.length; i++) {
|
||||
const stride = spec.strides[i];
|
||||
const gridRows = Math.floor((inputSize + stride - 1) / stride);
|
||||
const gridCols = Math.floor((inputSize + stride - 1) / stride);
|
||||
const anchorsNum = spec.anchors[i];
|
||||
for (let gridY = 0; gridY < gridRows; gridY++) {
|
||||
const anchorY = stride * (gridY + 0.5);
|
||||
for (let gridX = 0; gridX < gridCols; gridX++) {
|
||||
const anchorX = stride * (gridX + 0.5);
|
||||
for (let n = 0; n < anchorsNum; n++) {
|
||||
anchors.push([anchorX, anchorY]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return anchors;
|
||||
}
|
|
@ -1,11 +1,19 @@
|
|||
/**
|
||||
* PoseNet body detection model implementation
|
||||
* See `posenet.ts` for entry point
|
||||
*
|
||||
* Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
|
||||
*/
|
||||
|
||||
import * as utils from './utils';
|
||||
import * as kpt from './keypoints';
|
||||
import type { Box } from '../result';
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { BodyResult, Box } from '../result';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
import * as utils from './posenetutils';
|
||||
|
||||
let model: GraphModel;
|
||||
const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
|
||||
|
||||
const localMaximumRadius = 1;
|
||||
const outputStride = 16;
|
||||
|
@ -37,11 +45,11 @@ function traverse(edgeId, sourceKeypoint, targetId, scores, offsets, displacemen
|
|||
}
|
||||
const targetKeyPointIndices = getStridedIndexNearPoint(targetKeypoint, height, width);
|
||||
const score = scores.get(targetKeyPointIndices.y, targetKeyPointIndices.x, targetId);
|
||||
return { position: targetKeypoint, part: kpt.partNames[targetId], score };
|
||||
return { position: targetKeypoint, part: utils.partNames[targetId], score };
|
||||
}
|
||||
|
||||
export function decodePose(root, scores, offsets, displacementsFwd, displacementsBwd) {
|
||||
const tuples = kpt.poseChain.map(([parentJoinName, childJoinName]) => ([kpt.partIds[parentJoinName], kpt.partIds[childJoinName]]));
|
||||
const tuples = utils.poseChain.map(([parentJoinName, childJoinName]) => ([utils.partIds[parentJoinName], utils.partIds[childJoinName]]));
|
||||
const edgesFwd = tuples.map(([, childJointId]) => childJointId);
|
||||
const edgesBwd = tuples.map(([parentJointId]) => parentJointId);
|
||||
const numParts = scores.shape[2]; // [21,21,17]
|
||||
|
@ -51,7 +59,7 @@ export function decodePose(root, scores, offsets, displacementsFwd, displacement
|
|||
const rootPoint = utils.getImageCoords(root.part, outputStride, offsets);
|
||||
keypoints[root.part.id] = {
|
||||
score: root.score,
|
||||
part: kpt.partNames[root.part.id],
|
||||
part: utils.partNames[root.part.id],
|
||||
position: rootPoint,
|
||||
};
|
||||
// Decode the part positions upwards in the tree, following the backward displacements.
|
||||
|
@ -146,3 +154,32 @@ export function decode(offsets, scores, displacementsFwd, displacementsBwd, maxD
|
|||
}
|
||||
return poses;
|
||||
}
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||
const res = tf.tidy(() => {
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
|
||||
const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
|
||||
const results: Array<Tensor> = model.execute(normalized, poseNetOutputs) as Array<Tensor>;
|
||||
const results3d = results.map((y) => tf.squeeze(y, [0]));
|
||||
results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
|
||||
return results3d;
|
||||
});
|
||||
|
||||
const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
|
||||
for (const t of res) tf.dispose(t);
|
||||
|
||||
const decoded = await decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const scaled = utils.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
|
||||
return scaled;
|
||||
}
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (!model || env.initial) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
return model;
|
||||
}
|
|
@ -3,15 +3,48 @@
|
|||
* See `posenet.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as kpt from './keypoints';
|
||||
import type { BodyResult } from '../result';
|
||||
|
||||
export const partNames = [
|
||||
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
|
||||
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
|
||||
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
|
||||
];
|
||||
|
||||
export const count = partNames.length; // 17 keypoints
|
||||
|
||||
export const partIds = partNames.reduce((result, jointName, i) => {
|
||||
result[jointName] = i;
|
||||
return result;
|
||||
}, {});
|
||||
|
||||
const connectedPartNames = [
|
||||
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
|
||||
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
|
||||
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
|
||||
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
|
||||
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
|
||||
];
|
||||
export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
|
||||
|
||||
export const poseChain = [
|
||||
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
|
||||
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
|
||||
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
|
||||
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
|
||||
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
|
||||
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
|
||||
['rightKnee', 'rightAnkle'],
|
||||
];
|
||||
|
||||
export function eitherPointDoesntMeetConfidence(a: number, b: number, minConfidence: number) {
|
||||
return (a < minConfidence || b < minConfidence);
|
||||
}
|
||||
|
||||
export function getAdjacentKeyPoints(keypoints, minConfidence: number) {
|
||||
return kpt.connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
|
||||
return connectedPartIndices.reduce((result, [leftJoint, rightJoint]) => {
|
||||
if (eitherPointDoesntMeetConfidence(keypoints[leftJoint].score, keypoints[rightJoint].score, minConfidence)) {
|
||||
return result;
|
||||
}
|
||||
|
@ -123,7 +156,7 @@ export class MaxHeap {
|
|||
export function getOffsetPoint(y, x, keypoint, offsets) {
|
||||
return {
|
||||
y: offsets.get(y, x, keypoint),
|
||||
x: offsets.get(y, x, keypoint + kpt.count),
|
||||
x: offsets.get(y, x, keypoint + count),
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
|
||||
|
||||
const calculateGaze = (face): { bearing: number, strength: number } => {
|
||||
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
|
||||
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
|
||||
|
||||
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
|
||||
const eyeRatio = 1; // factor to normalize changes x vs y
|
||||
|
||||
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
|
||||
const irisCenter = left ? face.mesh[473] : face.mesh[468];
|
||||
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
|
||||
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
|
||||
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
|
||||
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
|
||||
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
|
||||
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
|
||||
|
||||
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
|
||||
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
|
||||
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
|
||||
];
|
||||
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
|
||||
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
|
||||
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
|
||||
|
||||
return { bearing, strength };
|
||||
};
|
||||
|
||||
export const calculateFaceAngle = (face, imageSize): {
|
||||
angle: { pitch: number, yaw: number, roll: number },
|
||||
matrix: [number, number, number, number, number, number, number, number, number],
|
||||
gaze: { bearing: number, strength: number },
|
||||
} => {
|
||||
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
|
||||
const normalize = (v) => { // normalize vector
|
||||
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
|
||||
v[0] /= length;
|
||||
v[1] /= length;
|
||||
v[2] /= length;
|
||||
return v;
|
||||
};
|
||||
const subVectors = (a, b) => { // vector subtraction (a - b)
|
||||
const x = a[0] - b[0];
|
||||
const y = a[1] - b[1];
|
||||
const z = a[2] - b[2];
|
||||
return [x, y, z];
|
||||
};
|
||||
const crossVectors = (a, b) => { // vector cross product (a x b)
|
||||
const x = a[1] * b[2] - a[2] * b[1];
|
||||
const y = a[2] * b[0] - a[0] * b[2];
|
||||
const z = a[0] * b[1] - a[1] * b[0];
|
||||
return [x, y, z];
|
||||
};
|
||||
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
|
||||
const rotationMatrixToEulerAngle = (r) => {
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
|
||||
let thetaX: number;
|
||||
let thetaY: number;
|
||||
let thetaZ: number;
|
||||
if (r10 < 1) { // YZX calculation
|
||||
if (r10 > -1) {
|
||||
thetaZ = Math.asin(r10);
|
||||
thetaY = Math.atan2(-r20, r00);
|
||||
thetaX = Math.atan2(-r12, r11);
|
||||
} else {
|
||||
thetaZ = -Math.PI / 2;
|
||||
thetaY = -Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
} else {
|
||||
thetaZ = Math.PI / 2;
|
||||
thetaY = Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
if (isNaN(thetaX)) thetaX = 0;
|
||||
if (isNaN(thetaY)) thetaY = 0;
|
||||
if (isNaN(thetaZ)) thetaZ = 0;
|
||||
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
|
||||
};
|
||||
// simple Euler angle calculation based existing 3D mesh
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const meshToEulerAngle = (mesh) => {
|
||||
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const angle = {
|
||||
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
|
||||
// pitch is face move up/down
|
||||
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
|
||||
// yaw is face turn left/right
|
||||
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
|
||||
// roll is face lean left/right
|
||||
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
|
||||
};
|
||||
return angle;
|
||||
};
|
||||
|
||||
// initialize gaze and mesh
|
||||
const mesh = face.meshRaw;
|
||||
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
|
||||
|
||||
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
|
||||
// top, bottom, left, right
|
||||
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
|
||||
// make the xyz coordinates proportional, independent of the image/box size
|
||||
pt[0] * imageSize[0] / size,
|
||||
pt[1] * imageSize[1] / size,
|
||||
pt[2],
|
||||
]);
|
||||
|
||||
const y_axis = normalize(subVectors(pts[1], pts[0]));
|
||||
let x_axis = normalize(subVectors(pts[3], pts[2]));
|
||||
const z_axis = normalize(crossVectors(x_axis, y_axis));
|
||||
// adjust x_axis to make sure that all axes are perpendicular to each other
|
||||
x_axis = crossVectors(y_axis, z_axis);
|
||||
|
||||
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
|
||||
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
|
||||
const matrix: [number, number, number, number, number, number, number, number, number] = [
|
||||
x_axis[0], x_axis[1], x_axis[2],
|
||||
y_axis[0], y_axis[1], y_axis[2],
|
||||
z_axis[0], z_axis[1], z_axis[2],
|
||||
];
|
||||
const angle = rotationMatrixToEulerAngle(matrix);
|
||||
// const angle = meshToEulerAngle(mesh);
|
||||
|
||||
// we have iris keypoints so we can calculate gaze direction
|
||||
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
|
||||
|
||||
return { angle, matrix, gaze };
|
||||
};
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as util from './facemeshutil';
|
||||
import type { Config } from '../config';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import { env } from '../util/env';
|
||||
|
||||
const keypointsCount = 6;
|
||||
let model: GraphModel | null;
|
||||
let anchorsData: [number, number][] = [];
|
||||
let anchors: Tensor | null = null;
|
||||
let inputSize = 0;
|
||||
|
||||
// export const size = () => (model && model.inputs[0].shape ? model.inputs[0].shape[2] : 0);
|
||||
export const size = () => inputSize;
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.detector?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
anchorsData = util.generateAnchors(inputSize);
|
||||
anchors = tf.tensor2d(anchorsData);
|
||||
return model;
|
||||
}
|
||||
|
||||
function decodeBounds(boxOutputs) {
|
||||
const boxStarts = tf.slice(boxOutputs, [0, 1], [-1, 2]);
|
||||
const centers = tf.add(boxStarts, anchors);
|
||||
const boxSizes = tf.slice(boxOutputs, [0, 3], [-1, 2]);
|
||||
const boxSizesNormalized = tf.div(boxSizes, inputSize);
|
||||
const centersNormalized = tf.div(centers, inputSize);
|
||||
const halfBoxSize = tf.div(boxSizesNormalized, 2);
|
||||
const starts = tf.sub(centersNormalized, halfBoxSize);
|
||||
const ends = tf.add(centersNormalized, halfBoxSize);
|
||||
const startNormalized = tf.mul(starts, inputSize);
|
||||
const endNormalized = tf.mul(ends, inputSize);
|
||||
const concatAxis = 1;
|
||||
return tf.concat2d([startNormalized, endNormalized], concatAxis);
|
||||
}
|
||||
|
||||
export async function getBoxes(inputImage: Tensor, config: Config) {
|
||||
// sanity check on input
|
||||
if ((!inputImage) || (inputImage['isDisposedInternal']) || (inputImage.shape.length !== 4) || (inputImage.shape[1] < 1) || (inputImage.shape[2] < 1)) return { boxes: [] };
|
||||
const [batch, boxes, scores] = tf.tidy(() => {
|
||||
const resizedImage = tf.image.resizeBilinear(inputImage, [inputSize, inputSize]);
|
||||
const normalizedImage = tf.sub(tf.div(resizedImage, 127.5), 0.5);
|
||||
const res = model?.execute(normalizedImage);
|
||||
let batchOut;
|
||||
if (Array.isArray(res)) { // are we using tfhub or pinto converted model?
|
||||
const sorted = res.sort((a, b) => a.size - b.size);
|
||||
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
|
||||
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
|
||||
const concat = tf.concat([concat512, concat384], 1);
|
||||
batchOut = tf.squeeze(concat, 0);
|
||||
} else {
|
||||
batchOut = tf.squeeze(res); // when using tfhub model
|
||||
}
|
||||
const boxesOut = decodeBounds(batchOut);
|
||||
const logits = tf.slice(batchOut, [0, 0], [-1, 1]);
|
||||
const scoresOut = tf.squeeze(tf.sigmoid(logits)); // inside tf.tidy
|
||||
return [batchOut, boxesOut, scoresOut];
|
||||
});
|
||||
|
||||
const nmsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, (config.face.detector?.maxDetected || 0), (config.face.detector?.iouThreshold || 0), (config.face.detector?.minConfidence || 0));
|
||||
const nms = await nmsTensor.array();
|
||||
tf.dispose(nmsTensor);
|
||||
const annotatedBoxes: Array<{ box: { startPoint: Tensor, endPoint: Tensor }, landmarks: Tensor, anchor: [number, number] | undefined, confidence: number }> = [];
|
||||
const scoresData = await scores.data();
|
||||
for (let i = 0; i < nms.length; i++) {
|
||||
const confidence = scoresData[nms[i]];
|
||||
if (confidence > (config.face.detector?.minConfidence || 0)) {
|
||||
const boundingBox = tf.slice(boxes, [nms[i], 0], [1, -1]);
|
||||
const landmarks = tf.tidy(() => tf.reshape(tf.squeeze(tf.slice(batch, [nms[i], keypointsCount - 1], [1, -1])), [keypointsCount, -1]));
|
||||
annotatedBoxes.push({ box: util.createBox(boundingBox), landmarks, anchor: anchorsData[nms[i]], confidence });
|
||||
tf.dispose(boundingBox);
|
||||
}
|
||||
}
|
||||
tf.dispose(batch);
|
||||
tf.dispose(boxes);
|
||||
tf.dispose(scores);
|
||||
|
||||
return {
|
||||
boxes: annotatedBoxes,
|
||||
scaleFactor: [inputImage.shape[2] / inputSize, inputImage.shape[1] / inputSize],
|
||||
};
|
||||
}
|
140
src/face/face.ts
140
src/face/face.ts
|
@ -5,145 +5,12 @@
|
|||
|
||||
import { log, now } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as facemesh from '../blazeface/facemesh';
|
||||
import * as facemesh from './facemesh';
|
||||
import * as emotion from '../gear/emotion';
|
||||
import * as faceres from './faceres';
|
||||
import type { FaceResult } from '../result';
|
||||
import type { Tensor } from '../tfjs/types';
|
||||
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const rad2deg = (theta) => Math.round((theta * 180) / Math.PI);
|
||||
|
||||
const calculateGaze = (face): { bearing: number, strength: number } => {
|
||||
const radians = (pt1, pt2) => Math.atan2(pt1[1] - pt2[1], pt1[0] - pt2[0]); // function to calculate angle between any two points
|
||||
if (!face.annotations['rightEyeIris'] || !face.annotations['leftEyeIris']) return { bearing: 0, strength: 0 };
|
||||
|
||||
const offsetIris = [0, -0.1]; // iris center may not align with average of eye extremes
|
||||
const eyeRatio = 1; // factor to normalize changes x vs y
|
||||
|
||||
const left = face.mesh[33][2] > face.mesh[263][2]; // pick left or right eye depending which one is closer bazed on outsize point z axis
|
||||
const irisCenter = left ? face.mesh[473] : face.mesh[468];
|
||||
const eyeCenter = left // eye center is average of extreme points on x axis for both x and y, ignoring y extreme points as eyelids naturally open/close more when gazing up/down so relative point is less precise
|
||||
? [(face.mesh[133][0] + face.mesh[33][0]) / 2, (face.mesh[133][1] + face.mesh[33][1]) / 2]
|
||||
: [(face.mesh[263][0] + face.mesh[362][0]) / 2, (face.mesh[263][1] + face.mesh[362][1]) / 2];
|
||||
const eyeSize = left // eye size is difference between extreme points for both x and y, used to normalize & squarify eye dimensions
|
||||
? [face.mesh[133][0] - face.mesh[33][0], face.mesh[23][1] - face.mesh[27][1]]
|
||||
: [face.mesh[263][0] - face.mesh[362][0], face.mesh[253][1] - face.mesh[257][1]];
|
||||
|
||||
const eyeDiff = [ // x distance between extreme point and center point normalized with eye size
|
||||
(eyeCenter[0] - irisCenter[0]) / eyeSize[0] - offsetIris[0],
|
||||
eyeRatio * (irisCenter[1] - eyeCenter[1]) / eyeSize[1] - offsetIris[1],
|
||||
];
|
||||
let strength = Math.sqrt((eyeDiff[0] ** 2) + (eyeDiff[1] ** 2)); // vector length is a diagonal between two differences
|
||||
strength = Math.min(strength, face.boxRaw[2] / 2, face.boxRaw[3] / 2); // limit strength to half of box size to avoid clipping due to low precision
|
||||
const bearing = (radians([0, 0], eyeDiff) + (Math.PI / 2)) % Math.PI; // using eyeDiff instead eyeCenter/irisCenter combo due to manual adjustments and rotate clockwise 90degrees
|
||||
|
||||
return { bearing, strength };
|
||||
};
|
||||
|
||||
const calculateFaceAngle = (face, imageSize): {
|
||||
angle: { pitch: number, yaw: number, roll: number },
|
||||
matrix: [number, number, number, number, number, number, number, number, number],
|
||||
gaze: { bearing: number, strength: number },
|
||||
} => {
|
||||
// const degrees = (theta) => Math.abs(((theta * 180) / Math.PI) % 360);
|
||||
const normalize = (v) => { // normalize vector
|
||||
const length = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
|
||||
v[0] /= length;
|
||||
v[1] /= length;
|
||||
v[2] /= length;
|
||||
return v;
|
||||
};
|
||||
const subVectors = (a, b) => { // vector subtraction (a - b)
|
||||
const x = a[0] - b[0];
|
||||
const y = a[1] - b[1];
|
||||
const z = a[2] - b[2];
|
||||
return [x, y, z];
|
||||
};
|
||||
const crossVectors = (a, b) => { // vector cross product (a x b)
|
||||
const x = a[1] * b[2] - a[2] * b[1];
|
||||
const y = a[2] * b[0] - a[0] * b[2];
|
||||
const z = a[0] * b[1] - a[1] * b[0];
|
||||
return [x, y, z];
|
||||
};
|
||||
// 3x3 rotation matrix to Euler angles based on https://www.geometrictools.com/Documentation/EulerAngles.pdf
|
||||
const rotationMatrixToEulerAngle = (r) => {
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const [r00, r01, r02, r10, r11, r12, r20, r21, r22] = r;
|
||||
let thetaX: number;
|
||||
let thetaY: number;
|
||||
let thetaZ: number;
|
||||
if (r10 < 1) { // YZX calculation
|
||||
if (r10 > -1) {
|
||||
thetaZ = Math.asin(r10);
|
||||
thetaY = Math.atan2(-r20, r00);
|
||||
thetaX = Math.atan2(-r12, r11);
|
||||
} else {
|
||||
thetaZ = -Math.PI / 2;
|
||||
thetaY = -Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
} else {
|
||||
thetaZ = Math.PI / 2;
|
||||
thetaY = Math.atan2(r21, r22);
|
||||
thetaX = 0;
|
||||
}
|
||||
if (isNaN(thetaX)) thetaX = 0;
|
||||
if (isNaN(thetaY)) thetaY = 0;
|
||||
if (isNaN(thetaZ)) thetaZ = 0;
|
||||
return { pitch: 2 * -thetaX, yaw: 2 * -thetaY, roll: 2 * -thetaZ };
|
||||
};
|
||||
// simple Euler angle calculation based existing 3D mesh
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const meshToEulerAngle = (mesh) => {
|
||||
const radians = (a1, a2, b1, b2) => Math.atan2(b2 - a2, b1 - a1);
|
||||
// eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars
|
||||
const angle = {
|
||||
// values are in radians in range of -pi/2 to pi/2 which is -90 to +90 degrees, value of 0 means center
|
||||
// pitch is face move up/down
|
||||
pitch: radians(mesh[10][1], mesh[10][2], mesh[152][1], mesh[152][2]), // looking at y,z of top and bottom points of the face
|
||||
// yaw is face turn left/right
|
||||
yaw: radians(mesh[33][0], mesh[33][2], mesh[263][0], mesh[263][2]), // looking at x,z of outside corners of leftEye and rightEye
|
||||
// roll is face lean left/right
|
||||
roll: radians(mesh[33][0], mesh[33][1], mesh[263][0], mesh[263][1]), // looking at x,y of outside corners of leftEye and rightEye
|
||||
};
|
||||
return angle;
|
||||
};
|
||||
|
||||
// initialize gaze and mesh
|
||||
const mesh = face.meshRaw;
|
||||
if (!mesh || mesh.length < 300) return { angle: { pitch: 0, yaw: 0, roll: 0 }, matrix: [1, 0, 0, 0, 1, 0, 0, 0, 1], gaze: { bearing: 0, strength: 0 } };
|
||||
|
||||
const size = Math.max(face.boxRaw[2] * imageSize[0], face.boxRaw[3] * imageSize[1]) / 1.5;
|
||||
// top, bottom, left, right
|
||||
const pts = [mesh[10], mesh[152], mesh[234], mesh[454]].map((pt) => [
|
||||
// make the xyz coordinates proportional, independent of the image/box size
|
||||
pt[0] * imageSize[0] / size,
|
||||
pt[1] * imageSize[1] / size,
|
||||
pt[2],
|
||||
]);
|
||||
|
||||
const y_axis = normalize(subVectors(pts[1], pts[0]));
|
||||
let x_axis = normalize(subVectors(pts[3], pts[2]));
|
||||
const z_axis = normalize(crossVectors(x_axis, y_axis));
|
||||
// adjust x_axis to make sure that all axes are perpendicular to each other
|
||||
x_axis = crossVectors(y_axis, z_axis);
|
||||
|
||||
// Rotation Matrix from Axis Vectors - http://renderdan.blogspot.com/2006/05/rotation-matrix-from-axis-vectors.html
|
||||
// 3x3 rotation matrix is flatten to array in row-major order. Note that the rotation represented by this matrix is inverted.
|
||||
const matrix: [number, number, number, number, number, number, number, number, number] = [
|
||||
x_axis[0], x_axis[1], x_axis[2],
|
||||
y_axis[0], y_axis[1], y_axis[2],
|
||||
z_axis[0], z_axis[1], z_axis[2],
|
||||
];
|
||||
const angle = rotationMatrixToEulerAngle(matrix);
|
||||
// const angle = meshToEulerAngle(mesh);
|
||||
|
||||
// we have iris keypoints so we can calculate gaze direction
|
||||
const gaze = mesh.length === 478 ? calculateGaze(face) : { bearing: 0, strength: 0 };
|
||||
|
||||
return { angle, matrix, gaze };
|
||||
};
|
||||
import { calculateFaceAngle } from './angles';
|
||||
|
||||
export const detectFace = async (parent /* instance of human */, input: Tensor): Promise<FaceResult[]> => {
|
||||
// run facemesh, includes blazeface and iris
|
||||
|
@ -158,6 +25,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
|
|||
const faceRes: Array<FaceResult> = [];
|
||||
parent.state = 'run:face';
|
||||
timeStamp = now();
|
||||
|
||||
const faces = await facemesh.predict(input, parent.config);
|
||||
parent.performance.face = Math.trunc(now() - timeStamp);
|
||||
if (!input.shape || input.shape.length !== 4) return [];
|
||||
|
@ -226,7 +94,7 @@ export const detectFace = async (parent /* instance of human */, input: Tensor):
|
|||
delete faces[i].annotations.leftEyeIris;
|
||||
delete faces[i].annotations.rightEyeIris;
|
||||
}
|
||||
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.rightEyeIris
|
||||
const irisSize = (faces[i].annotations && faces[i].annotations.leftEyeIris && faces[i].annotations.leftEyeIris[0] && faces[i].annotations.rightEyeIris && faces[i].annotations.rightEyeIris[0]
|
||||
&& (faces[i].annotations.leftEyeIris.length > 0) && (faces[i].annotations.rightEyeIris.length > 0)
|
||||
&& (faces[i].annotations.leftEyeIris[0] !== null) && (faces[i].annotations.rightEyeIris[0] !== null))
|
||||
? Math.max(Math.abs(faces[i].annotations.leftEyeIris[3][0] - faces[i].annotations.leftEyeIris[1][0]), Math.abs(faces[i].annotations.rightEyeIris[4][1] - faces[i].annotations.rightEyeIris[2][1])) / input.shape[2]
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
*
|
||||
* Based on:
|
||||
* - [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
|
||||
* - Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
|
||||
* - Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as blazeface from './blazeface';
|
||||
import * as util from './facemeshutil';
|
||||
import * as coords from './facemeshcoords';
|
||||
import * as iris from './iris';
|
||||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { FaceResult, Point } from '../result';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
|
||||
type BoxCache = { startPoint: Point, endPoint: Point, landmarks: Array<Point>, confidence: number, faceConfidence?: number | undefined };
|
||||
let boxCache: Array<BoxCache> = [];
|
||||
let model: GraphModel | null = null;
|
||||
let inputSize = 0;
|
||||
let skipped = Number.MAX_SAFE_INTEGER;
|
||||
let detectedFaces = 0;
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<FaceResult[]> {
|
||||
if (!config.skipFrame || (((detectedFaces !== config.face.detector?.maxDetected) || !config.face.mesh?.enabled)) && (skipped > (config.face.detector?.skipFrames || 0))) { // reset cached boxes
|
||||
const newBoxes = await blazeface.getBoxes(input, config); // get results from blazeface detector
|
||||
boxCache = []; // empty cache
|
||||
for (const possible of newBoxes.boxes) { // extract data from detector
|
||||
const startPoint = await possible.box.startPoint.data() as unknown as Point;
|
||||
const endPoint = await possible.box.endPoint.data() as unknown as Point;
|
||||
const landmarks = await possible.landmarks.array() as Array<Point>;
|
||||
boxCache.push({ startPoint, endPoint, landmarks, confidence: possible.confidence });
|
||||
}
|
||||
newBoxes.boxes.forEach((prediction) => tf.dispose([prediction.box.startPoint, prediction.box.endPoint, prediction.landmarks]));
|
||||
for (let i = 0; i < boxCache.length; i++) { // enlarge and squarify detected boxes
|
||||
const scaledBox = util.scaleBoxCoordinates({ startPoint: boxCache[i].startPoint, endPoint: boxCache[i].endPoint }, newBoxes.scaleFactor);
|
||||
const enlargedBox = util.enlargeBox(scaledBox);
|
||||
const squarifiedBox = util.squarifyBox(enlargedBox);
|
||||
boxCache[i] = { ...squarifiedBox, confidence: boxCache[i].confidence, landmarks: boxCache[i].landmarks };
|
||||
}
|
||||
skipped = 0;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
|
||||
const faces: Array<FaceResult> = [];
|
||||
const newBoxes: Array<BoxCache> = [];
|
||||
let id = 0;
|
||||
for (let box of boxCache) {
|
||||
let angle = 0;
|
||||
let rotationMatrix;
|
||||
const face: FaceResult = {
|
||||
id: id++,
|
||||
mesh: [],
|
||||
meshRaw: [],
|
||||
box: [0, 0, 0, 0],
|
||||
boxRaw: [0, 0, 0, 0],
|
||||
score: 0,
|
||||
boxScore: 0,
|
||||
faceScore: 0,
|
||||
annotations: {},
|
||||
};
|
||||
|
||||
if (config.face.detector?.rotation && config.face.mesh?.enabled && env.kernels.includes('rotatewithoffset')) {
|
||||
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
|
||||
} else {
|
||||
rotationMatrix = util.IDENTITY_MATRIX;
|
||||
const cut = util.cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, input, config.face.mesh?.enabled ? [inputSize, inputSize] : [blazeface.size(), blazeface.size()]);
|
||||
face.tensor = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
}
|
||||
face.boxScore = Math.round(100 * box.confidence) / 100;
|
||||
if (!config.face.mesh?.enabled) { // mesh not enabled, return resuts from detector only
|
||||
face.box = util.getClampedBox(box, input);
|
||||
face.boxRaw = util.getRawBox(box, input);
|
||||
face.score = Math.round(100 * box.confidence || 0) / 100;
|
||||
face.mesh = box.landmarks.map((pt) => [
|
||||
((box.startPoint[0] + box.endPoint[0])) / 2 + ((box.endPoint[0] + box.startPoint[0]) * pt[0] / blazeface.size()),
|
||||
((box.startPoint[1] + box.endPoint[1])) / 2 + ((box.endPoint[1] + box.startPoint[1]) * pt[1] / blazeface.size()),
|
||||
]);
|
||||
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
|
||||
for (const key of Object.keys(coords.blazeFaceLandmarks)) face.annotations[key] = [face.mesh[coords.blazeFaceLandmarks[key]]]; // add annotations
|
||||
} else if (!model) { // mesh enabled, but not loaded
|
||||
if (config.debug) log('face mesh detection requested, but model is not loaded');
|
||||
} else { // mesh enabled
|
||||
const [contours, confidence, contourCoords] = model.execute(face.tensor as Tensor) as Array<Tensor>; // first returned tensor represents facial contours which are already included in the coordinates.
|
||||
tf.dispose(contours);
|
||||
const faceConfidence = (await confidence.data())[0] as number;
|
||||
tf.dispose(confidence);
|
||||
const coordsReshaped = tf.reshape(contourCoords, [-1, 3]);
|
||||
let rawCoords = await coordsReshaped.array();
|
||||
tf.dispose(contourCoords);
|
||||
tf.dispose(coordsReshaped);
|
||||
if (faceConfidence < (config.face.detector?.minConfidence || 1)) {
|
||||
box.confidence = faceConfidence; // reset confidence of cached box
|
||||
tf.dispose(face.tensor);
|
||||
} else {
|
||||
if (config.face.iris?.enabled) rawCoords = await iris.augmentIris(rawCoords, face.tensor, config, inputSize); // augment results with iris
|
||||
face.mesh = util.transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize); // get processed mesh
|
||||
face.meshRaw = face.mesh.map((pt) => [pt[0] / (input.shape[2] || 0), pt[1] / (input.shape[1] || 0), (pt[2] || 0) / inputSize]);
|
||||
box = { ...util.enlargeBox(util.calculateLandmarksBoundingBox(face.mesh), 1.5), confidence: box.confidence }; // redefine box with mesh calculated one
|
||||
for (const key of Object.keys(coords.meshAnnotations)) face.annotations[key] = coords.meshAnnotations[key].map((index) => face.mesh[index]); // add annotations
|
||||
if (config.face.detector?.rotation && config.face.mesh.enabled && config.face.description?.enabled && env.kernels.includes('rotatewithoffset')) { // do rotation one more time with mesh keypoints if we want to return perfect image
|
||||
tf.dispose(face.tensor); // dispose so we can overwrite original face
|
||||
[angle, rotationMatrix, face.tensor] = util.correctFaceRotation(box, input, inputSize);
|
||||
}
|
||||
face.box = util.getClampedBox(box, input); // update detected box with box around the face mesh
|
||||
face.boxRaw = util.getRawBox(box, input);
|
||||
face.score = Math.round(100 * faceConfidence || 100 * box.confidence || 0) / 100;
|
||||
face.faceScore = Math.round(100 * faceConfidence) / 100;
|
||||
box = { ...util.squarifyBox(box), confidence: box.confidence, faceConfidence }; // updated stored cache values
|
||||
}
|
||||
}
|
||||
faces.push(face);
|
||||
newBoxes.push(box);
|
||||
}
|
||||
if (config.face.mesh?.enabled) boxCache = newBoxes.filter((a) => a.confidence > (config.face.detector?.minConfidence || 0)); // remove cache entries for detected boxes on low confidence
|
||||
detectedFaces = faces.length;
|
||||
return faces;
|
||||
}
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.mesh?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
return model;
|
||||
}
|
||||
|
||||
export const triangulation = coords.TRI468;
|
||||
export const uvmap = coords.UV468;
|
|
@ -3,7 +3,7 @@
|
|||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
export const MESH_ANNOTATIONS = {
|
||||
export const meshAnnotations = {
|
||||
silhouette: [
|
||||
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288,
|
||||
397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136,
|
||||
|
@ -42,6 +42,22 @@ export const MESH_ANNOTATIONS = {
|
|||
leftCheek: [425],
|
||||
};
|
||||
|
||||
export const meshLandmarks = {
|
||||
count: 468,
|
||||
mouth: 13,
|
||||
symmetryLine: [13, meshAnnotations['midwayBetweenEyes'][0]],
|
||||
};
|
||||
|
||||
export const blazeFaceLandmarks = {
|
||||
leftEye: 0,
|
||||
rightEye: 1,
|
||||
nose: 2,
|
||||
mouth: 3,
|
||||
leftEar: 4,
|
||||
rightEar: 5,
|
||||
symmetryLine: [3, 2],
|
||||
};
|
||||
|
||||
export const MESH_TO_IRIS_INDICES_MAP = [ // A mapping from facemesh model keypoints to iris model keypoints.
|
||||
{ key: 'EyeUpper0', indices: [9, 10, 11, 12, 13, 14, 15] },
|
||||
{ key: 'EyeUpper1', indices: [25, 26, 27, 28, 29, 30, 31] },
|
|
@ -0,0 +1,166 @@
|
|||
/**
|
||||
* BlazeFace, FaceMesh & Iris model implementation
|
||||
* See `facemesh.ts` for entry point
|
||||
*/
|
||||
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as coords from './facemeshcoords';
|
||||
import type { Box, Point } from '../result';
|
||||
|
||||
export const createBox = (startEndTensor) => ({ startPoint: tf.slice(startEndTensor, [0, 0], [-1, 2]), endPoint: tf.slice(startEndTensor, [0, 2], [-1, 2]) });
|
||||
|
||||
export const disposeBox = (t) => tf.dispose([t.startPoint, t.endPoint]);
|
||||
|
||||
export const getBoxSize = (box): [number, number] => [Math.abs(box.endPoint[0] - box.startPoint[0]), Math.abs(box.endPoint[1] - box.startPoint[1])];
|
||||
|
||||
export const getBoxCenter = (box): [number, number] => [box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, box.startPoint[1] + (box.endPoint[1] - box.startPoint[1]) / 2];
|
||||
|
||||
export const getClampedBox = (box, input): Box => (box ? [
|
||||
Math.trunc(Math.max(0, box.startPoint[0])),
|
||||
Math.trunc(Math.max(0, box.startPoint[1])),
|
||||
Math.trunc(Math.min((input.shape[2] || 0), box.endPoint[0]) - Math.max(0, box.startPoint[0])),
|
||||
Math.trunc(Math.min((input.shape[1] || 0), box.endPoint[1]) - Math.max(0, box.startPoint[1])),
|
||||
] : [0, 0, 0, 0]);
|
||||
|
||||
export const getRawBox = (box, input): Box => (box ? [
|
||||
box.startPoint[0] / (input.shape[2] || 0),
|
||||
box.startPoint[1] / (input.shape[1] || 0),
|
||||
(box.endPoint[0] - box.startPoint[0]) / (input.shape[2] || 0),
|
||||
(box.endPoint[1] - box.startPoint[1]) / (input.shape[1] || 0),
|
||||
] : [0, 0, 0, 0]);
|
||||
|
||||
export const scaleBoxCoordinates = (box, factor) => {
|
||||
const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]];
|
||||
const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]];
|
||||
return { startPoint, endPoint };
|
||||
};
|
||||
|
||||
export const cutBoxFromImageAndResize = (box, image, cropSize) => {
|
||||
const h = image.shape[1];
|
||||
const w = image.shape[2];
|
||||
return tf.image.cropAndResize(image, [[box.startPoint[1] / h, box.startPoint[0] / w, box.endPoint[1] / h, box.endPoint[0] / w]], [0], cropSize);
|
||||
};
|
||||
|
||||
export const enlargeBox = (box, factor = 1.5) => {
|
||||
const center = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const halfSize: [number, number] = [factor * size[0] / 2, factor * size[1] / 2];
|
||||
return { startPoint: [center[0] - halfSize[0], center[1] - halfSize[1]] as Point, endPoint: [center[0] + halfSize[0], center[1] + halfSize[1]] as Point, landmarks: box.landmarks };
|
||||
};
|
||||
|
||||
export const squarifyBox = (box) => {
|
||||
const centers = getBoxCenter(box);
|
||||
const size = getBoxSize(box);
|
||||
const halfSize = Math.max(...size) / 2;
|
||||
return { startPoint: [Math.round(centers[0] - halfSize), Math.round(centers[1] - halfSize)] as Point, endPoint: [Math.round(centers[0] + halfSize), Math.round(centers[1] + halfSize)] as Point, landmarks: box.landmarks };
|
||||
};
|
||||
|
||||
export const calculateLandmarksBoundingBox = (landmarks) => {
|
||||
const xs = landmarks.map((d) => d[0]);
|
||||
const ys = landmarks.map((d) => d[1]);
|
||||
return { startPoint: [Math.min(...xs), Math.min(...ys)], endPoint: [Math.max(...xs), Math.max(...ys)], landmarks };
|
||||
};
|
||||
|
||||
export const IDENTITY_MATRIX = [[1, 0, 0], [0, 1, 0], [0, 0, 1]];
|
||||
|
||||
export const normalizeRadians = (angle) => angle - 2 * Math.PI * Math.floor((angle + Math.PI) / (2 * Math.PI));
|
||||
|
||||
export const computeRotation = (point1, point2) => normalizeRadians(Math.PI / 2 - Math.atan2(-(point2[1] - point1[1]), point2[0] - point1[0]));
|
||||
|
||||
export const radToDegrees = (rad) => rad * 180 / Math.PI;
|
||||
|
||||
export const buildTranslationMatrix = (x, y) => [[1, 0, x], [0, 1, y], [0, 0, 1]];
|
||||
|
||||
export const dot = (v1, v2) => {
|
||||
let product = 0;
|
||||
for (let i = 0; i < v1.length; i++) product += v1[i] * v2[i];
|
||||
return product;
|
||||
};
|
||||
|
||||
export const getColumnFrom2DArr = (arr, columnIndex) => {
|
||||
const column: Array<number> = [];
|
||||
for (let i = 0; i < arr.length; i++) column.push(arr[i][columnIndex]);
|
||||
return column;
|
||||
};
|
||||
|
||||
export const multiplyTransformMatrices = (mat1, mat2) => {
|
||||
const product: Array<number[]> = [];
|
||||
const size = mat1.length;
|
||||
for (let row = 0; row < size; row++) {
|
||||
product.push([]);
|
||||
for (let col = 0; col < size; col++) product[row].push(dot(mat1[row], getColumnFrom2DArr(mat2, col)));
|
||||
}
|
||||
return product;
|
||||
};
|
||||
|
||||
export const buildRotationMatrix = (rotation, center) => {
|
||||
const cosA = Math.cos(rotation);
|
||||
const sinA = Math.sin(rotation);
|
||||
const rotationMatrix = [[cosA, -sinA, 0], [sinA, cosA, 0], [0, 0, 1]];
|
||||
const translationMatrix = buildTranslationMatrix(center[0], center[1]);
|
||||
const translationTimesRotation = multiplyTransformMatrices(translationMatrix, rotationMatrix);
|
||||
const negativeTranslationMatrix = buildTranslationMatrix(-center[0], -center[1]);
|
||||
return multiplyTransformMatrices(translationTimesRotation, negativeTranslationMatrix);
|
||||
};
|
||||
|
||||
export const invertTransformMatrix = (matrix) => {
|
||||
const rotationComponent = [[matrix[0][0], matrix[1][0]], [matrix[0][1], matrix[1][1]]];
|
||||
const translationComponent = [matrix[0][2], matrix[1][2]];
|
||||
const invertedTranslation = [-dot(rotationComponent[0], translationComponent), -dot(rotationComponent[1], translationComponent)];
|
||||
return [rotationComponent[0].concat(invertedTranslation[0]), rotationComponent[1].concat(invertedTranslation[1]), [0, 0, 1]];
|
||||
};
|
||||
|
||||
export const rotatePoint = (homogeneousCoordinate, rotationMatrix) => [dot(homogeneousCoordinate, rotationMatrix[0]), dot(homogeneousCoordinate, rotationMatrix[1])];
|
||||
|
||||
export const xyDistanceBetweenPoints = (a, b) => Math.sqrt(((a[0] - b[0]) ** 2) + ((a[1] - b[1]) ** 2));
|
||||
|
||||
export function generateAnchors(inputSize) {
|
||||
const spec = { strides: [inputSize / 16, inputSize / 8], anchors: [2, 6] };
|
||||
const anchors: Array<[number, number]> = [];
|
||||
for (let i = 0; i < spec.strides.length; i++) {
|
||||
const stride = spec.strides[i];
|
||||
const gridRows = Math.floor((inputSize + stride - 1) / stride);
|
||||
const gridCols = Math.floor((inputSize + stride - 1) / stride);
|
||||
const anchorsNum = spec.anchors[i];
|
||||
for (let gridY = 0; gridY < gridRows; gridY++) {
|
||||
const anchorY = stride * (gridY + 0.5);
|
||||
for (let gridX = 0; gridX < gridCols; gridX++) {
|
||||
const anchorX = stride * (gridX + 0.5);
|
||||
for (let n = 0; n < anchorsNum; n++) anchors.push([anchorX, anchorY]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return anchors;
|
||||
}
|
||||
|
||||
export function transformRawCoords(rawCoords, box, angle, rotationMatrix, inputSize) {
|
||||
const boxSize = getBoxSize({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const coordsScaled = rawCoords.map((coord) => ([
|
||||
boxSize[0] / inputSize * (coord[0] - inputSize / 2),
|
||||
boxSize[1] / inputSize * (coord[1] - inputSize / 2),
|
||||
coord[2] || 0,
|
||||
]));
|
||||
const coordsRotationMatrix = (angle !== 0) ? buildRotationMatrix(angle, [0, 0]) : IDENTITY_MATRIX;
|
||||
const coordsRotated = (angle !== 0) ? coordsScaled.map((coord) => ([...rotatePoint(coord, coordsRotationMatrix), coord[2]])) : coordsScaled;
|
||||
const inverseRotationMatrix = (angle !== 0) ? invertTransformMatrix(rotationMatrix) : IDENTITY_MATRIX;
|
||||
const boxCenter = [...getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint }), 1];
|
||||
return coordsRotated.map((coord) => ([
|
||||
Math.round(coord[0] + dot(boxCenter, inverseRotationMatrix[0])),
|
||||
Math.round(coord[1] + dot(boxCenter, inverseRotationMatrix[1])),
|
||||
Math.round(coord[2] || 0),
|
||||
]));
|
||||
}
|
||||
|
||||
export function correctFaceRotation(box, input, inputSize) {
|
||||
const [indexOfMouth, indexOfForehead] = (box.landmarks.length >= coords.meshLandmarks.count) ? coords.meshLandmarks.symmetryLine : coords.blazeFaceLandmarks.symmetryLine;
|
||||
const angle: number = computeRotation(box.landmarks[indexOfMouth], box.landmarks[indexOfForehead]);
|
||||
const faceCenter: Point = getBoxCenter({ startPoint: box.startPoint, endPoint: box.endPoint });
|
||||
const faceCenterNormalized: Point = [faceCenter[0] / input.shape[2], faceCenter[1] / input.shape[1]];
|
||||
const rotated = tf.image.rotateWithOffset(input, angle, 0, faceCenterNormalized); // rotateWithOffset is not defined for tfjs-node
|
||||
const rotationMatrix = buildRotationMatrix(-angle, faceCenter);
|
||||
const cut = cutBoxFromImageAndResize({ startPoint: box.startPoint, endPoint: box.endPoint }, rotated, [inputSize, inputSize]);
|
||||
const face = tf.div(cut, 255);
|
||||
tf.dispose(cut);
|
||||
tf.dispose(rotated);
|
||||
return [angle, rotationMatrix, face];
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
import * as coords from './facemeshcoords';
|
||||
import * as util from './facemeshutil';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import { env } from '../util/env';
|
||||
import { log, join } from '../util/util';
|
||||
import type { Config } from '../config';
|
||||
import type { Point } from '../result';
|
||||
|
||||
let model: GraphModel | null;
|
||||
let inputSize = 0;
|
||||
|
||||
const irisEnlarge = 2.3;
|
||||
|
||||
const leftOutline = coords.meshAnnotations['leftEyeLower0'];
|
||||
const rightOutline = coords.meshAnnotations['rightEyeLower0'];
|
||||
|
||||
const eyeLandmarks = {
|
||||
leftBounds: [leftOutline[0], leftOutline[leftOutline.length - 1]],
|
||||
rightBounds: [rightOutline[0], rightOutline[rightOutline.length - 1]],
|
||||
};
|
||||
|
||||
const irisLandmarks = {
|
||||
upperCenter: 3,
|
||||
lowerCenter: 4,
|
||||
index: 71,
|
||||
numCoordinates: 76,
|
||||
};
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (env.initial) model = null;
|
||||
if (!model) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.face.iris?.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
inputSize = model.inputs[0].shape ? model.inputs[0].shape[2] : 0;
|
||||
if (inputSize === -1) inputSize = 64;
|
||||
return model;
|
||||
}
|
||||
|
||||
// Replace the raw coordinates returned by facemesh with refined iris model coordinates
|
||||
// Update the z coordinate to be an average of the original and the new.
|
||||
function replaceRawCoordinates(rawCoords, newCoords, prefix, keys) {
|
||||
for (let i = 0; i < coords.MESH_TO_IRIS_INDICES_MAP.length; i++) {
|
||||
const { key, indices } = coords.MESH_TO_IRIS_INDICES_MAP[i];
|
||||
const originalIndices = coords.meshAnnotations[`${prefix}${key}`];
|
||||
if (!keys || keys.includes(key)) {
|
||||
for (let j = 0; j < indices.length; j++) {
|
||||
const index = indices[j];
|
||||
rawCoords[originalIndices[j]] = [
|
||||
newCoords[index][0], newCoords[index][1],
|
||||
(newCoords[index][2] + rawCoords[originalIndices[j]][2]) / 2,
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
export const getLeftToRightEyeDepthDifference = (rawCoords) => {
|
||||
const leftEyeZ = rawCoords[eyeLandmarks.leftBounds[0]][2];
|
||||
const rightEyeZ = rawCoords[eyeLandmarks.rightBounds[0]][2];
|
||||
return leftEyeZ - rightEyeZ;
|
||||
};
|
||||
|
||||
// Returns a box describing a cropped region around the eye fit for passing to the iris model.
|
||||
export const getEyeBox = (rawCoords, face, eyeInnerCornerIndex, eyeOuterCornerIndex, flip = false, meshSize) => {
|
||||
const box = util.squarifyBox(util.enlargeBox(util.calculateLandmarksBoundingBox([rawCoords[eyeInnerCornerIndex], rawCoords[eyeOuterCornerIndex]]), irisEnlarge));
|
||||
const boxSize = util.getBoxSize(box);
|
||||
let crop = tf.image.cropAndResize(face, [[
|
||||
box.startPoint[1] / meshSize,
|
||||
box.startPoint[0] / meshSize, box.endPoint[1] / meshSize,
|
||||
box.endPoint[0] / meshSize,
|
||||
]], [0], [inputSize, inputSize]);
|
||||
if (flip && env.kernels.includes('flipleftright')) {
|
||||
const flipped = tf.image.flipLeftRight(crop); // flipLeftRight is not defined for tfjs-node
|
||||
tf.dispose(crop);
|
||||
crop = flipped;
|
||||
}
|
||||
return { box, boxSize, crop };
|
||||
};
|
||||
|
||||
// Given a cropped image of an eye, returns the coordinates of the contours surrounding the eye and the iris.
|
||||
export const getEyeCoords = (eyeData, eyeBox, eyeBoxSize, flip = false) => {
|
||||
const eyeRawCoords: Array<Point> = [];
|
||||
for (let i = 0; i < irisLandmarks.numCoordinates; i++) {
|
||||
const x = eyeData[i * 3];
|
||||
const y = eyeData[i * 3 + 1];
|
||||
const z = eyeData[i * 3 + 2];
|
||||
eyeRawCoords.push([
|
||||
(flip ? (1 - (x / inputSize)) : (x / inputSize)) * eyeBoxSize[0] + eyeBox.startPoint[0],
|
||||
(y / inputSize) * eyeBoxSize[1] + eyeBox.startPoint[1], z,
|
||||
]);
|
||||
}
|
||||
return { rawCoords: eyeRawCoords, iris: eyeRawCoords.slice(irisLandmarks.index) };
|
||||
};
|
||||
|
||||
// The z-coordinates returned for the iris are unreliable, so we take the z values from the surrounding keypoints.
|
||||
// eslint-disable-next-line class-methods-use-this
|
||||
export const getAdjustedIrisCoords = (rawCoords, irisCoords, direction) => {
|
||||
const upperCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeUpper0`][irisLandmarks.upperCenter]][2];
|
||||
const lowerCenterZ = rawCoords[coords.meshAnnotations[`${direction}EyeLower0`][irisLandmarks.lowerCenter]][2];
|
||||
const averageZ = (upperCenterZ + lowerCenterZ) / 2;
|
||||
// Iris indices: 0: center | 1: right | 2: above | 3: left | 4: below
|
||||
return irisCoords.map((coord, i) => {
|
||||
let z = averageZ;
|
||||
if (i === 2) {
|
||||
z = upperCenterZ;
|
||||
} else if (i === 4) {
|
||||
z = lowerCenterZ;
|
||||
}
|
||||
return [coord[0], coord[1], z];
|
||||
});
|
||||
};
|
||||
|
||||
export async function augmentIris(rawCoords, face, config, meshSize) {
|
||||
if (!model) {
|
||||
if (config.debug) log('face mesh iris detection requested, but model is not loaded');
|
||||
return rawCoords;
|
||||
}
|
||||
const { box: leftEyeBox, boxSize: leftEyeBoxSize, crop: leftEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.leftBounds[0], eyeLandmarks.leftBounds[1], true, meshSize);
|
||||
const { box: rightEyeBox, boxSize: rightEyeBoxSize, crop: rightEyeCrop } = getEyeBox(rawCoords, face, eyeLandmarks.rightBounds[0], eyeLandmarks.rightBounds[1], true, meshSize);
|
||||
const combined = tf.concat([leftEyeCrop, rightEyeCrop]);
|
||||
tf.dispose(leftEyeCrop);
|
||||
tf.dispose(rightEyeCrop);
|
||||
const eyePredictions = model.predict(combined) as Tensor;
|
||||
tf.dispose(combined);
|
||||
const eyePredictionsData = await eyePredictions.data(); // inside tf.tidy
|
||||
tf.dispose(eyePredictions);
|
||||
const leftEyeData = eyePredictionsData.slice(0, irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: leftEyeRawCoords, iris: leftIrisRawCoords } = getEyeCoords(leftEyeData, leftEyeBox, leftEyeBoxSize, true);
|
||||
const rightEyeData = eyePredictionsData.slice(irisLandmarks.numCoordinates * 3);
|
||||
const { rawCoords: rightEyeRawCoords, iris: rightIrisRawCoords } = getEyeCoords(rightEyeData, rightEyeBox, rightEyeBoxSize);
|
||||
const leftToRightEyeDepthDifference = getLeftToRightEyeDepthDifference(rawCoords);
|
||||
if (Math.abs(leftToRightEyeDepthDifference) < 30) { // User is looking straight ahead.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', null);
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', null);
|
||||
// If the user is looking to the left or to the right, the iris coordinates tend to diverge too much from the mesh coordinates for them to be merged
|
||||
// So we only update a single contour line above and below the eye.
|
||||
} else if (leftToRightEyeDepthDifference < 1) { // User is looking towards the right.
|
||||
replaceRawCoordinates(rawCoords, leftEyeRawCoords, 'left', ['EyeUpper0', 'EyeLower0']);
|
||||
} else { // User is looking towards the left.
|
||||
replaceRawCoordinates(rawCoords, rightEyeRawCoords, 'right', ['EyeUpper0', 'EyeLower0']);
|
||||
}
|
||||
const adjustedLeftIrisCoords = getAdjustedIrisCoords(rawCoords, leftIrisRawCoords, 'left');
|
||||
const adjustedRightIrisCoords = getAdjustedIrisCoords(rawCoords, rightIrisRawCoords, 'right');
|
||||
const newCoords = rawCoords.concat(adjustedLeftIrisCoords).concat(adjustedRightIrisCoords);
|
||||
return newCoords;
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
/**
|
||||
* FingerPose algorithm implementation constants
|
||||
* See `fingerpose.ts` for entry point
|
||||
*/
|
||||
|
||||
const Finger = {
|
||||
thumb: 0,
|
||||
index: 1,
|
||||
middle: 2,
|
||||
ring: 3,
|
||||
pinky: 4,
|
||||
all: [0, 1, 2, 3, 4], // just for convenience
|
||||
nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
|
||||
// Describes mapping of joints based on the 21 points returned by handpose.
|
||||
// [0] Palm
|
||||
// [1-4] Thumb
|
||||
// [5-8] Index
|
||||
// [9-12] Middle
|
||||
// [13-16] Ring
|
||||
// [17-20] Pinky
|
||||
pointsMapping: {
|
||||
0: [[0, 1], [1, 2], [2, 3], [3, 4]],
|
||||
1: [[0, 5], [5, 6], [6, 7], [7, 8]],
|
||||
2: [[0, 9], [9, 10], [10, 11], [11, 12]],
|
||||
3: [[0, 13], [13, 14], [14, 15], [15, 16]],
|
||||
4: [[0, 17], [17, 18], [18, 19], [19, 20]],
|
||||
},
|
||||
getName: (value) => Finger.nameMapping[value],
|
||||
getPoints: (value) => Finger.pointsMapping[value],
|
||||
};
|
||||
|
||||
const FingerCurl = {
|
||||
none: 0,
|
||||
half: 1,
|
||||
full: 2,
|
||||
nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
|
||||
getName: (value) => FingerCurl.nameMapping[value],
|
||||
};
|
||||
|
||||
const FingerDirection = {
|
||||
verticalUp: 0,
|
||||
verticalDown: 1,
|
||||
horizontalLeft: 2,
|
||||
horizontalRight: 3,
|
||||
diagonalUpRight: 4,
|
||||
diagonalUpLeft: 5,
|
||||
diagonalDownRight: 6,
|
||||
diagonalDownLeft: 7,
|
||||
nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
|
||||
getName: (value) => FingerDirection.nameMapping[value],
|
||||
};
|
||||
|
||||
export { Finger, FingerCurl, FingerDirection };
|
|
@ -1,37 +0,0 @@
|
|||
/**
|
||||
* FingerPose algorithm implementation constants
|
||||
*
|
||||
* Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
|
||||
*/
|
||||
|
||||
import * as estimator from './estimator';
|
||||
import { Finger, FingerCurl, FingerDirection } from './description';
|
||||
import Gestures from './gestures';
|
||||
|
||||
const minConfidence = 0.7;
|
||||
|
||||
export function analyze(keypoints) { // get estimations of curl / direction for each finger
|
||||
if (!keypoints || keypoints.length === 0) return null;
|
||||
const estimatorRes = estimator.estimate(keypoints);
|
||||
const landmarks = {};
|
||||
for (const fingerIdx of Finger.all) {
|
||||
landmarks[Finger.getName(fingerIdx)] = {
|
||||
curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
|
||||
direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
|
||||
};
|
||||
}
|
||||
// console.log('finger landmarks', landmarks);
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
export function match(keypoints) { // compare gesture description to each known gesture
|
||||
const poses: Array<{ name: string, confidence: number }> = [];
|
||||
if (!keypoints || keypoints.length === 0) return poses;
|
||||
const estimatorRes = estimator.estimate(keypoints);
|
||||
for (const gesture of Gestures) {
|
||||
const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
|
||||
if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
|
||||
}
|
||||
// console.log('finger poses', poses);
|
||||
return poses;
|
||||
}
|
|
@ -3,7 +3,7 @@
|
|||
*/
|
||||
|
||||
import type { GestureResult } from '../result';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from '../hand/fingerpose';
|
||||
|
||||
/**
|
||||
* @typedef FaceGesture
|
||||
|
@ -63,7 +63,7 @@ export const face = (res): GestureResult[] => {
|
|||
if (!res) return [];
|
||||
const gestures: Array<{ face: number, gesture: FaceGesture }> = [];
|
||||
for (let i = 0; i < res.length; i++) {
|
||||
if (res[i].mesh && res[i].mesh.length > 0) {
|
||||
if (res[i].mesh && res[i].mesh.length > 450) {
|
||||
const eyeFacing = res[i].mesh[33][2] - res[i].mesh[263][2];
|
||||
if (Math.abs(eyeFacing) < 10) gestures.push({ face: i, gesture: 'facing center' });
|
||||
else gestures.push({ face: i, gesture: `facing ${eyeFacing < 0 ? 'left' : 'right'}` });
|
||||
|
@ -84,7 +84,7 @@ export const iris = (res): GestureResult[] => {
|
|||
if (!res) return [];
|
||||
const gestures: Array<{ iris: number, gesture: IrisGesture }> = [];
|
||||
for (let i = 0; i < res.length; i++) {
|
||||
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.rightEyeIris) continue;
|
||||
if (!res[i].annotations || !res[i].annotations.leftEyeIris || !res[i].annotations.leftEyeIris[0] || !res[i].annotations.rightEyeIris || !res[i].annotations.rightEyeIris[0]) continue;
|
||||
const sizeXLeft = res[i].annotations.leftEyeIris[3][0] - res[i].annotations.leftEyeIris[1][0];
|
||||
const sizeYLeft = res[i].annotations.leftEyeIris[4][1] - res[i].annotations.leftEyeIris[2][1];
|
||||
const areaLeft = Math.abs(sizeXLeft * sizeYLeft);
|
||||
|
|
|
@ -3,7 +3,54 @@
|
|||
* See `fingerpose.ts` for entry point
|
||||
*/
|
||||
|
||||
export default class Gesture {
|
||||
export const Finger = {
|
||||
thumb: 0,
|
||||
index: 1,
|
||||
middle: 2,
|
||||
ring: 3,
|
||||
pinky: 4,
|
||||
all: [0, 1, 2, 3, 4], // just for convenience
|
||||
nameMapping: { 0: 'thumb', 1: 'index', 2: 'middle', 3: 'ring', 4: 'pinky' },
|
||||
// Describes mapping of joints based on the 21 points returned by handpose.
|
||||
// [0] Palm
|
||||
// [1-4] Thumb
|
||||
// [5-8] Index
|
||||
// [9-12] Middle
|
||||
// [13-16] Ring
|
||||
// [17-20] Pinky
|
||||
pointsMapping: {
|
||||
0: [[0, 1], [1, 2], [2, 3], [3, 4]],
|
||||
1: [[0, 5], [5, 6], [6, 7], [7, 8]],
|
||||
2: [[0, 9], [9, 10], [10, 11], [11, 12]],
|
||||
3: [[0, 13], [13, 14], [14, 15], [15, 16]],
|
||||
4: [[0, 17], [17, 18], [18, 19], [19, 20]],
|
||||
},
|
||||
getName: (value) => Finger.nameMapping[value],
|
||||
getPoints: (value) => Finger.pointsMapping[value],
|
||||
};
|
||||
|
||||
export const FingerCurl = {
|
||||
none: 0,
|
||||
half: 1,
|
||||
full: 2,
|
||||
nameMapping: { 0: 'none', 1: 'half', 2: 'full' },
|
||||
getName: (value) => FingerCurl.nameMapping[value],
|
||||
};
|
||||
|
||||
export const FingerDirection = {
|
||||
verticalUp: 0,
|
||||
verticalDown: 1,
|
||||
horizontalLeft: 2,
|
||||
horizontalRight: 3,
|
||||
diagonalUpRight: 4,
|
||||
diagonalUpLeft: 5,
|
||||
diagonalDownRight: 6,
|
||||
diagonalDownLeft: 7,
|
||||
nameMapping: { 0: 'verticalUp', 1: 'verticalDown', 2: 'horizontalLeft', 3: 'horizontalRight', 4: 'diagonalUpRight', 5: 'diagonalUpLeft', 6: 'diagonalDownRight', 7: 'diagonalDownLeft' },
|
||||
getName: (value) => FingerDirection.nameMapping[value],
|
||||
};
|
||||
|
||||
export class FingerGesture {
|
||||
name;
|
||||
curls;
|
||||
directions;
|
|
@ -3,11 +3,10 @@
|
|||
* See `fingerpose.ts` for entry point
|
||||
*/
|
||||
|
||||
import { Finger, FingerCurl, FingerDirection } from './description';
|
||||
import Gesture from './gesture';
|
||||
import { Finger, FingerCurl, FingerDirection, FingerGesture } from './fingerdef';
|
||||
|
||||
// describe thumbs up gesture 👍
|
||||
const ThumbsUp = new Gesture('thumbs up');
|
||||
const ThumbsUp = new FingerGesture('thumbs up');
|
||||
ThumbsUp.addCurl(Finger.thumb, FingerCurl.none, 1.0);
|
||||
ThumbsUp.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
|
||||
ThumbsUp.addDirection(Finger.thumb, FingerDirection.diagonalUpLeft, 0.25);
|
||||
|
@ -19,7 +18,7 @@ for (const finger of [Finger.index, Finger.middle, Finger.ring, Finger.pinky]) {
|
|||
}
|
||||
|
||||
// describe Victory gesture ✌️
|
||||
const Victory = new Gesture('victory');
|
||||
const Victory = new FingerGesture('victory');
|
||||
Victory.addCurl(Finger.thumb, FingerCurl.half, 0.5);
|
||||
Victory.addCurl(Finger.thumb, FingerCurl.none, 0.5);
|
||||
Victory.addDirection(Finger.thumb, FingerDirection.verticalUp, 1.0);
|
|
@ -1,10 +1,13 @@
|
|||
/**
|
||||
* FingerPose algorithm implementation
|
||||
* See `fingerpose.ts` for entry point
|
||||
* FingerPose algorithm implementation constants
|
||||
*
|
||||
* Based on: [**FingerPose***](https://github.com/andypotato/fingerpose)
|
||||
*/
|
||||
|
||||
import { Finger, FingerCurl, FingerDirection } from './description';
|
||||
import { Finger, FingerCurl, FingerDirection } from './fingerdef';
|
||||
import Gestures from '../hand/fingergesture';
|
||||
|
||||
const minConfidence = 0.7;
|
||||
const options = {
|
||||
// curl estimation
|
||||
HALF_CURL_START_LIMIT: 60.0,
|
||||
|
@ -169,7 +172,7 @@ function calculateFingerDirection(startPoint, midPoint, endPoint, fingerSlopes)
|
|||
return estimatedDirection;
|
||||
}
|
||||
|
||||
export function estimate(landmarks) {
|
||||
function estimate(landmarks) {
|
||||
// step 1: calculate slopes
|
||||
const slopesXY: Array<number[]> = [];
|
||||
const slopesYZ: Array<number[]> = [];
|
||||
|
@ -212,3 +215,29 @@ export function estimate(landmarks) {
|
|||
}
|
||||
return { curls: fingerCurls, directions: fingerDirections };
|
||||
}
|
||||
|
||||
export function analyze(keypoints) { // get estimations of curl / direction for each finger
|
||||
if (!keypoints || keypoints.length === 0) return null;
|
||||
const estimatorRes = estimate(keypoints);
|
||||
const landmarks = {};
|
||||
for (const fingerIdx of Finger.all) {
|
||||
landmarks[Finger.getName(fingerIdx)] = {
|
||||
curl: FingerCurl.getName(estimatorRes.curls[fingerIdx]),
|
||||
direction: FingerDirection.getName(estimatorRes.directions[fingerIdx]),
|
||||
};
|
||||
}
|
||||
// console.log('finger landmarks', landmarks);
|
||||
return landmarks;
|
||||
}
|
||||
|
||||
export function match(keypoints) { // compare gesture description to each known gesture
|
||||
const poses: Array<{ name: string, confidence: number }> = [];
|
||||
if (!keypoints || keypoints.length === 0) return poses;
|
||||
const estimatorRes = estimate(keypoints);
|
||||
for (const gesture of Gestures) {
|
||||
const confidence = gesture.matchAgainst(estimatorRes.curls, estimatorRes.directions);
|
||||
if (confidence >= minConfidence) poses.push({ name: gesture.name, confidence });
|
||||
}
|
||||
// console.log('finger poses', poses);
|
||||
return poses;
|
||||
}
|
|
@ -13,7 +13,7 @@ import type { HandResult, Box, Point } from '../result';
|
|||
import type { GraphModel, Tensor } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from './fingerpose';
|
||||
import { fakeOps } from '../tfjs/backend';
|
||||
|
||||
const boxScaleFact = 1.5; // hand finger model prefers slighly larger box
|
||||
|
|
|
@ -8,7 +8,7 @@ import { log, join } from '../util/util';
|
|||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as handdetector from './handdetector';
|
||||
import * as handpipeline from './handpipeline';
|
||||
import * as fingerPose from '../fingerpose/fingerpose';
|
||||
import * as fingerPose from '../hand/fingerpose';
|
||||
import type { HandResult, Box, Point } from '../result';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
|
|
|
@ -8,9 +8,9 @@ import type { Result, FaceResult, HandResult, BodyResult, ObjectResult, GestureR
|
|||
import * as tf from '../dist/tfjs.esm.js';
|
||||
import * as models from './models';
|
||||
import * as face from './face/face';
|
||||
import * as facemesh from './blazeface/facemesh';
|
||||
import * as facemesh from './face/facemesh';
|
||||
import * as faceres from './face/faceres';
|
||||
import * as posenet from './posenet/posenet';
|
||||
import * as posenet from './body/posenet';
|
||||
import * as handtrack from './hand/handtrack';
|
||||
import * as handpose from './handpose/handpose';
|
||||
// import * as blazepose from './body/blazepose-v1';
|
||||
|
@ -23,7 +23,7 @@ import * as segmentation from './segmentation/segmentation';
|
|||
import * as gesture from './gesture/gesture';
|
||||
import * as image from './image/image';
|
||||
import * as draw from './util/draw';
|
||||
import * as persons from './persons';
|
||||
import * as persons from './util/persons';
|
||||
import * as interpolate from './util/interpolate';
|
||||
import * as env from './util/env';
|
||||
import * as backend from './tfjs/backend';
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
|
||||
import { log } from './util/util';
|
||||
import type { GraphModel } from './tfjs/types';
|
||||
import * as facemesh from './blazeface/facemesh';
|
||||
import * as blazeface from './face/blazeface';
|
||||
import * as facemesh from './face/facemesh';
|
||||
import * as iris from './face/iris';
|
||||
import * as faceres from './face/faceres';
|
||||
import * as emotion from './gear/emotion';
|
||||
import * as posenet from './posenet/posenet';
|
||||
import * as posenet from './body/posenet';
|
||||
import * as handpose from './handpose/handpose';
|
||||
import * as handtrack from './hand/handtrack';
|
||||
import * as blazepose from './body/blazepose';
|
||||
|
@ -57,15 +59,13 @@ export function reset(instance: Human) {
|
|||
/** Load method preloads all instance.configured models on-demand */
|
||||
export async function load(instance: Human) {
|
||||
if (env.initial) reset(instance);
|
||||
if (instance.config.face.enabled) { // face model is a combo that must be loaded as a whole
|
||||
if (!instance.models.facedetect) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
if (instance.config.face.mesh?.enabled && !instance.models.facemesh) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
if (instance.config.face.iris?.enabled && !instance.models.faceiris) [instance.models.facedetect, instance.models.facemesh, instance.models.faceiris] = await facemesh.load(instance.config);
|
||||
}
|
||||
if (instance.config.hand.enabled) { // handpose model is a combo that must be loaded as a whole
|
||||
if (!instance.models.handpose && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
|
||||
if (!instance.models.handskeleton && instance.config.hand.landmarks && instance.config.hand.detector?.modelPath?.includes('handdetect')) [instance.models.handpose, instance.models.handskeleton] = await handpose.load(instance.config);
|
||||
}
|
||||
if (instance.config.face.enabled && !instance.models.facedetect) instance.models.facedetect = blazeface.load(instance.config);
|
||||
if (instance.config.face.enabled && instance.config.face.mesh?.enabled && !instance.models.facemesh) instance.models.facemesh = facemesh.load(instance.config);
|
||||
if (instance.config.face.enabled && instance.config.face.iris?.enabled && !instance.models.faceiris) instance.models.faceiris = iris.load(instance.config);
|
||||
if (instance.config.hand.enabled && !instance.models.handtrack && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handtrack = handtrack.loadDetect(instance.config);
|
||||
if (instance.config.hand.enabled && instance.config.hand.landmarks && !instance.models.handskeleton && instance.config.hand.detector?.modelPath?.includes('handtrack')) instance.models.handskeleton = handtrack.loadSkeleton(instance.config);
|
||||
if (instance.config.body.enabled && !instance.models.posenet && instance.config.body?.modelPath?.includes('posenet')) instance.models.posenet = posenet.load(instance.config);
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/**
|
||||
* PoseNet body detection model implementation constants
|
||||
* See `posenet.ts` for entry point
|
||||
*/
|
||||
|
||||
export const partNames = [
|
||||
'nose', 'leftEye', 'rightEye', 'leftEar', 'rightEar', 'leftShoulder',
|
||||
'rightShoulder', 'leftElbow', 'rightElbow', 'leftWrist', 'rightWrist',
|
||||
'leftHip', 'rightHip', 'leftKnee', 'rightKnee', 'leftAnkle', 'rightAnkle',
|
||||
];
|
||||
|
||||
export const count = partNames.length; // 17 keypoints
|
||||
|
||||
export const partIds = partNames.reduce((result, jointName, i) => {
|
||||
result[jointName] = i;
|
||||
return result;
|
||||
}, {});
|
||||
|
||||
const connectedPartNames = [
|
||||
['leftHip', 'leftShoulder'], ['leftElbow', 'leftShoulder'],
|
||||
['leftElbow', 'leftWrist'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['rightHip', 'rightShoulder'],
|
||||
['rightElbow', 'rightShoulder'], ['rightElbow', 'rightWrist'],
|
||||
['rightHip', 'rightKnee'], ['rightKnee', 'rightAnkle'],
|
||||
['leftShoulder', 'rightShoulder'], ['leftHip', 'rightHip'],
|
||||
];
|
||||
export const connectedPartIndices = connectedPartNames.map(([jointNameA, jointNameB]) => ([partIds[jointNameA], partIds[jointNameB]]));
|
||||
|
||||
export const poseChain = [
|
||||
['nose', 'leftEye'], ['leftEye', 'leftEar'], ['nose', 'rightEye'],
|
||||
['rightEye', 'rightEar'], ['nose', 'leftShoulder'],
|
||||
['leftShoulder', 'leftElbow'], ['leftElbow', 'leftWrist'],
|
||||
['leftShoulder', 'leftHip'], ['leftHip', 'leftKnee'],
|
||||
['leftKnee', 'leftAnkle'], ['nose', 'rightShoulder'],
|
||||
['rightShoulder', 'rightElbow'], ['rightElbow', 'rightWrist'],
|
||||
['rightShoulder', 'rightHip'], ['rightHip', 'rightKnee'],
|
||||
['rightKnee', 'rightAnkle'],
|
||||
];
|
|
@ -1,46 +0,0 @@
|
|||
/**
|
||||
* PoseNet body detection model implementation
|
||||
*
|
||||
* Based on: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
|
||||
*/
|
||||
|
||||
import { log, join } from '../util/util';
|
||||
import * as tf from '../../dist/tfjs.esm.js';
|
||||
import * as poses from './poses';
|
||||
import * as util from './utils';
|
||||
import type { BodyResult } from '../result';
|
||||
import type { Tensor, GraphModel } from '../tfjs/types';
|
||||
import type { Config } from '../config';
|
||||
import { env } from '../util/env';
|
||||
|
||||
let model: GraphModel;
|
||||
const poseNetOutputs = ['MobilenetV1/offset_2/BiasAdd'/* offsets */, 'MobilenetV1/heatmap_2/BiasAdd'/* heatmapScores */, 'MobilenetV1/displacement_fwd_2/BiasAdd'/* displacementFwd */, 'MobilenetV1/displacement_bwd_2/BiasAdd'/* displacementBwd */];
|
||||
|
||||
export async function predict(input: Tensor, config: Config): Promise<BodyResult[]> {
|
||||
const res = tf.tidy(() => {
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const resized = tf.image.resizeBilinear(input, [model.inputs[0].shape[2], model.inputs[0].shape[1]]);
|
||||
const normalized = tf.sub(tf.div(tf.cast(resized, 'float32'), 127.5), 1.0);
|
||||
const results: Array<Tensor> = model.execute(normalized, poseNetOutputs) as Array<Tensor>;
|
||||
const results3d = results.map((y) => tf.squeeze(y, [0]));
|
||||
results3d[1] = results3d[1].sigmoid(); // apply sigmoid on scores
|
||||
return results3d;
|
||||
});
|
||||
|
||||
const buffers = await Promise.all(res.map((tensor: Tensor) => tensor.buffer()));
|
||||
for (const t of res) tf.dispose(t);
|
||||
|
||||
const decoded = await poses.decode(buffers[0], buffers[1], buffers[2], buffers[3], config.body.maxDetected, config.body.minConfidence);
|
||||
if (!model.inputs[0].shape) return [];
|
||||
const scaled = util.scalePoses(decoded, [input.shape[1], input.shape[2]], [model.inputs[0].shape[2], model.inputs[0].shape[1]]) as BodyResult[];
|
||||
return scaled;
|
||||
}
|
||||
|
||||
export async function load(config: Config): Promise<GraphModel> {
|
||||
if (!model || env.initial) {
|
||||
model = await tf.loadGraphModel(join(config.modelBasePath, config.body.modelPath || '')) as unknown as GraphModel;
|
||||
if (!model || !model['modelUrl']) log('load model failed:', config.body.modelPath);
|
||||
else if (config.debug) log('load model:', model['modelUrl']);
|
||||
} else if (config.debug) log('cached model:', model['modelUrl']);
|
||||
return model;
|
||||
}
|
|
@ -2,7 +2,7 @@
|
|||
* Module that implements helper draw functions, exposed as human.draw
|
||||
*/
|
||||
|
||||
import { TRI468 as triangulation } from '../blazeface/coords';
|
||||
import { TRI468 as triangulation } from '../face/facemeshcoords';
|
||||
import { mergeDeep, now } from './util';
|
||||
import type { Result, FaceResult, BodyResult, HandResult, ObjectResult, GestureResult, PersonResult } from '../result';
|
||||
|
||||
|
@ -204,10 +204,10 @@ export async function face(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
|
|||
if (f.mesh && f.mesh.length > 0) {
|
||||
if (localOptions.drawPoints) {
|
||||
for (const pt of f.mesh) point(ctx, pt[0], pt[1], pt[2], localOptions);
|
||||
// for (const pt of f.meshRaw) point(ctx, pt[0] * inCanvas.offsetWidth, pt[1] * inCanvas.offsetHeight, pt[2]);
|
||||
}
|
||||
if (localOptions.drawPolygons) {
|
||||
ctx.lineWidth = 1;
|
||||
if (f.mesh.length > 450) {
|
||||
for (let i = 0; i < triangulation.length / 3; i++) {
|
||||
const points = [
|
||||
triangulation[i * 3 + 0],
|
||||
|
@ -216,8 +216,9 @@ export async function face(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
|
|||
].map((index) => f.mesh[index]);
|
||||
lines(ctx, points, localOptions);
|
||||
}
|
||||
}
|
||||
// iris: array[center, left, top, right, bottom]
|
||||
if (f.annotations && f.annotations['leftEyeIris']) {
|
||||
if (f.annotations && f.annotations['leftEyeIris'] && f.annotations['leftEyeIris'][0]) {
|
||||
ctx.strokeStyle = localOptions.useDepth ? 'rgba(255, 200, 255, 0.3)' : localOptions.color;
|
||||
ctx.beginPath();
|
||||
const sizeX = Math.abs(f.annotations['leftEyeIris'][3][0] - f.annotations['leftEyeIris'][1][0]) / 2;
|
||||
|
@ -229,7 +230,7 @@ export async function face(inCanvas: HTMLCanvasElement | OffscreenCanvas, result
|
|||
ctx.fill();
|
||||
}
|
||||
}
|
||||
if (f.annotations && f.annotations['rightEyeIris']) {
|
||||
if (f.annotations && f.annotations['rightEyeIris'] && f.annotations['rightEyeIris'][0]) {
|
||||
ctx.strokeStyle = localOptions.useDepth ? 'rgba(255, 200, 255, 0.3)' : localOptions.color;
|
||||
ctx.beginPath();
|
||||
const sizeX = Math.abs(f.annotations['rightEyeIris'][3][0] - f.annotations['rightEyeIris'][1][0]) / 2;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
* Analyze detection Results and sort&combine them into per-person view
|
||||
*/
|
||||
|
||||
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from './result';
|
||||
import type { FaceResult, BodyResult, HandResult, GestureResult, PersonResult, Box } from '../result';
|
||||
|
||||
export function join(faces: Array<FaceResult>, bodies: Array<BodyResult>, hands: Array<HandResult>, gestures: Array<GestureResult>, shape: Array<number> | undefined): Array<PersonResult> {
|
||||
let id = 0;
|
3740
test/build.log
3740
test/build.log
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue