added blazeface back and front models

pull/50/head
Vladimir Mandic 2020-10-15 20:20:37 -04:00
parent f6059df802
commit 86bb3f1c87
22 changed files with 105 additions and 90 deletions

View File

@ -218,12 +218,13 @@ human.defaults = {
face: { face: {
enabled: true, // controls if specified modul is enabled (note: module is not loaded until it is required) enabled: true, // controls if specified modul is enabled (note: module is not loaded until it is required)
detector: { detector: {
modelPath: '../models/blazeface/model.json', // path to specific pre-trained model modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
inputSize: 128, // 128 for tfhub and front models, 256 for back
maxFaces: 10, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance maxFaces: 10, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
skipFrames: 10, // how many frames to skip before re-running bounding box detection skipFrames: 10, // how many frames to skip before re-running bounding box detection
minConfidence: 0.8, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
}, },
mesh: { mesh: {
enabled: true, enabled: true,
@ -235,12 +236,12 @@ human.defaults = {
}, },
age: { age: {
enabled: true, enabled: true,
modelPath: '../models/ssrnet-imdb-age/model.json', modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
skipFrames: 10, // how many frames to skip before re-running bounding box detection skipFrames: 10, // how many frames to skip before re-running bounding box detection
}, },
gender: { gender: {
enabled: true, enabled: true,
modelPath: '../models/ssrnet-imdb-gender/model.json', modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
}, },
emotion: { emotion: {
enabled: true, enabled: true,
@ -254,15 +255,15 @@ human.defaults = {
enabled: true, enabled: true,
modelPath: '../models/posenet/model.json', modelPath: '../models/posenet/model.json',
maxDetections: 5, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance maxDetections: 5, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
}, },
hand: { hand: {
enabled: true, enabled: true,
skipFrames: 10, // how many frames to skip before re-running bounding box detection skipFrames: 10, // how many frames to skip before re-running bounding box detection
minConfidence: 0.8, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.75, // threshold for deciding when to remove boxes based on score in non-maximum suppression scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
detector: { detector: {
anchors: '../models/handdetect/anchors.json', anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json', modelPath: '../models/handdetect/model.json',
@ -361,14 +362,14 @@ For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple
Performance per module: Performance per module:
- Enabled all: 10 FPS - Enabled all: 10 FPS
- Face Detect: 80 FPS - Face Detect: 80 FPS (standalone)
- Face Geometry: 30 FPS (includes face detect) - Face Geometry: 30 FPS (includes face detect)
- Face Iris: 25 FPS (includes face detect and face geometry) - Face Iris: 25 FPS (includes face detect and face geometry)
- Age: 60 FPS (includes face detect) - Age: 60 FPS (includes face detect)
- Gender: 60 FPS (includes face detect) - Gender: 60 FPS (includes face detect)
- Emotion: 60 FPS (includes face detect) - Emotion: 60 FPS (includes face detect)
- Hand: 40 FPS - Hand: 40 FPS (standalone)
- Body: 50 FPS - Body: 50 FPS (standalone)
Library can also be used on mobile devices Library can also be used on mobile devices

View File

@ -13,8 +13,7 @@ onmessage = async (msg) => {
config = msg.data.config; config = msg.data.config;
let result = {}; let result = {};
try { try {
// result = await human.detect(image, config); result = await human.detect(image, config);
result = {};
} catch (err) { } catch (err) {
result.error = err.message; result.error = err.message;
log('Worker thread error:', err.message); log('Worker thread error:', err.message);

View File

@ -60,7 +60,7 @@ async function drawFace(result, canvas) {
const labelIris = face.iris ? `iris: ${face.iris}` : ''; const labelIris = face.iris ? `iris: ${face.iris}` : '';
const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : ''; const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
ctx.fillStyle = ui.baseLabel; ctx.fillStyle = ui.baseLabel;
ctx.fillText(`face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22, face.box[2]); ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
ctx.stroke(); ctx.stroke();
ctx.lineWidth = 1; ctx.lineWidth = 1;
if (face.mesh) { if (face.mesh) {
@ -238,7 +238,7 @@ function webWorker(input, image, canvas) {
log('Creating worker thread'); log('Creating worker thread');
worker = new Worker('demo-esm-webworker.js', { type: 'module' }); worker = new Worker('demo-esm-webworker.js', { type: 'module' });
// after receiving message from webworker, parse&draw results and send new frame for processing // after receiving message from webworker, parse&draw results and send new frame for processing
worker.addEventListener('message', async (msg) => drawResults(input, msg.data, canvas)); worker.addEventListener('message', (msg) => drawResults(input, msg.data, canvas));
} }
// pass image data as arraybuffer to worker by reference to avoid copy // pass image data as arraybuffer to worker by reference to avoid copy
worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]); worker.postMessage({ image: image.data.buffer, width: canvas.width, height: canvas.height, config }, [image.data.buffer]);

View File

@ -23,12 +23,12 @@ var require_blazeface = __commonJS((exports2) => {
anchors: [2, 6] anchors: [2, 6]
}; };
const NUM_LANDMARKS = 6; const NUM_LANDMARKS = 6;
function generateAnchors(width, height, outputSpec) { function generateAnchors(anchorSize, outputSpec) {
const anchors = []; const anchors = [];
for (let i = 0; i < outputSpec.strides.length; i++) { for (let i = 0; i < outputSpec.strides.length; i++) {
const stride = outputSpec.strides[i]; const stride = outputSpec.strides[i];
const gridRows = Math.floor((height + stride - 1) / stride); const gridRows = Math.floor((anchorSize + stride - 1) / stride);
const gridCols = Math.floor((width + stride - 1) / stride); const gridCols = Math.floor((anchorSize + stride - 1) / stride);
const anchorsNum = outputSpec.anchors[i]; const anchorsNum = outputSpec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -83,11 +83,11 @@ var require_blazeface = __commonJS((exports2) => {
this.blazeFaceModel = model; this.blazeFaceModel = model;
this.width = config2.detector.inputSize; this.width = config2.detector.inputSize;
this.height = config2.detector.inputSize; this.height = config2.detector.inputSize;
this.anchorSize = config2.detector.anchorSize;
this.maxFaces = config2.detector.maxFaces; this.maxFaces = config2.detector.maxFaces;
this.anchorsData = generateAnchors(config2.detector.inputSize, config2.detector.inputSize, ANCHORS_CONFIG); this.anchorsData = generateAnchors(config2.detector.anchorSize, ANCHORS_CONFIG);
this.anchors = tf2.tensor2d(this.anchorsData); this.anchors = tf2.tensor2d(this.anchorsData);
this.inputSizeData = [config2.detector.inputSize, config2.detector.inputSize]; this.inputSize = tf2.tensor1d([this.width, this.height]);
this.inputSize = tf2.tensor1d([config2.detector.inputSize, config2.detector.inputSize]);
this.iouThreshold = config2.detector.iouThreshold; this.iouThreshold = config2.detector.iouThreshold;
this.scaleFaces = 0.8; this.scaleFaces = 0.8;
this.scoreThreshold = config2.detector.scoreThreshold; this.scoreThreshold = config2.detector.scoreThreshold;
@ -97,7 +97,16 @@ var require_blazeface = __commonJS((exports2) => {
const resizedImage = inputImage.resizeBilinear([this.width, this.height]); const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
const normalizedImage = tf2.mul(tf2.sub(resizedImage.div(255), 0.5), 2); const normalizedImage = tf2.mul(tf2.sub(resizedImage.div(255), 0.5), 2);
const batchedPrediction = this.blazeFaceModel.predict(normalizedImage); const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
const prediction = batchedPrediction.squeeze(); let prediction;
if (Array.isArray(batchedPrediction)) {
const sorted = batchedPrediction.sort((a, b) => a.size - b.size);
const concat384 = tf2.concat([sorted[0], sorted[2]], 2);
const concat512 = tf2.concat([sorted[1], sorted[3]], 2);
const concat = tf2.concat([concat512, concat384], 1);
prediction = concat.squeeze(0);
} else {
prediction = batchedPrediction.squeeze();
}
const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize); const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
const logits = tf2.slice(prediction, [0, 0], [-1, 1]); const logits = tf2.slice(prediction, [0, 0], [-1, 1]);
const scoresOut = tf2.sigmoid(logits).squeeze(); const scoresOut = tf2.sigmoid(logits).squeeze();
@ -130,7 +139,7 @@ var require_blazeface = __commonJS((exports2) => {
detectedOutputs.dispose(); detectedOutputs.dispose();
return { return {
boxes: annotatedBoxes, boxes: annotatedBoxes,
scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]] scaleFactor: [inputImage.shape[2] / this.width, inputImage.shape[1] / this.height]
}; };
} }
async estimateFaces(input) { async estimateFaces(input) {
@ -5041,7 +5050,8 @@ var require_config = __commonJS((exports2) => {
face: { face: {
enabled: true, enabled: true,
detector: { detector: {
modelPath: "../models/blazeface/model.json", modelPath: "../models/blazeface/tfhub/model.json",
anchorSize: 128,
inputSize: 128, inputSize: 128,
maxFaces: 10, maxFaces: 10,
skipFrames: 10, skipFrames: 10,
@ -5112,7 +5122,7 @@ var require_config = __commonJS((exports2) => {
var require_package = __commonJS((exports2, module2) => { var require_package = __commonJS((exports2, module2) => {
module2.exports = { module2.exports = {
name: "@vladmandic/human", name: "@vladmandic/human",
version: "0.3.1", version: "0.3.2",
description: "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction", description: "human: 3D Face Detection, Iris Tracking and Age & Gender Prediction",
sideEffects: false, sideEffects: false,
main: "dist/human.cjs", main: "dist/human.cjs",
@ -5242,17 +5252,13 @@ async function detect(input, userConfig) {
const perf = {}; const perf = {};
let timeStamp; let timeStamp;
timeStamp = performance.now(); timeStamp = performance.now();
let poseRes = [];
tf.engine().startScope(); tf.engine().startScope();
if (config.body.enabled) const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
poseRes = await models.posenet.estimatePoses(input, config.body);
tf.engine().endScope(); tf.engine().endScope();
perf.body = Math.trunc(performance.now() - timeStamp); perf.body = Math.trunc(performance.now() - timeStamp);
timeStamp = performance.now(); timeStamp = performance.now();
let handRes = [];
tf.engine().startScope(); tf.engine().startScope();
if (config.hand.enabled) const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
handRes = await models.handpose.estimateHands(input, config.hand);
tf.engine().endScope(); tf.engine().endScope();
perf.hand = Math.trunc(performance.now() - timeStamp); perf.hand = Math.trunc(performance.now() - timeStamp);
const faceRes = []; const faceRes = [];

File diff suppressed because one or more lines are too long

22
dist/human.cjs vendored

File diff suppressed because one or more lines are too long

4
dist/human.cjs.map vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

22
dist/human.esm.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

22
dist/human.js vendored

File diff suppressed because one or more lines are too long

4
dist/human.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -2,8 +2,9 @@ export default {
face: { face: {
enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
detector: { detector: {
modelPath: '../models/blazeface/model.json', modelPath: '../models/blazeface/tfhub/model.json', // can be blazeface-front or blazeface-back
inputSize: 128, // fixed value anchorSize: 128, // fixed regardless of model
inputSize: 128, // fixed value: 128 for front and tfhub and 256 for back
maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
skipFrames: 10, // how many frames to go without running the bounding box detector skipFrames: 10, // how many frames to go without running the bounding box detector
minConfidence: 0.5, // threshold for discarding a prediction minConfidence: 0.5, // threshold for discarding a prediction

View File

@ -6,12 +6,12 @@ const ANCHORS_CONFIG = {
}; };
const NUM_LANDMARKS = 6; const NUM_LANDMARKS = 6;
function generateAnchors(width, height, outputSpec) { function generateAnchors(anchorSize, outputSpec) {
const anchors = []; const anchors = [];
for (let i = 0; i < outputSpec.strides.length; i++) { for (let i = 0; i < outputSpec.strides.length; i++) {
const stride = outputSpec.strides[i]; const stride = outputSpec.strides[i];
const gridRows = Math.floor((height + stride - 1) / stride); const gridRows = Math.floor((anchorSize + stride - 1) / stride);
const gridCols = Math.floor((width + stride - 1) / stride); const gridCols = Math.floor((anchorSize + stride - 1) / stride);
const anchorsNum = outputSpec.anchors[i]; const anchorsNum = outputSpec.anchors[i];
for (let gridY = 0; gridY < gridRows; gridY++) { for (let gridY = 0; gridY < gridRows; gridY++) {
const anchorY = stride * (gridY + 0.5); const anchorY = stride * (gridY + 0.5);
@ -72,11 +72,11 @@ class BlazeFaceModel {
this.blazeFaceModel = model; this.blazeFaceModel = model;
this.width = config.detector.inputSize; this.width = config.detector.inputSize;
this.height = config.detector.inputSize; this.height = config.detector.inputSize;
this.anchorSize = config.detector.anchorSize;
this.maxFaces = config.detector.maxFaces; this.maxFaces = config.detector.maxFaces;
this.anchorsData = generateAnchors(config.detector.inputSize, config.detector.inputSize, ANCHORS_CONFIG); this.anchorsData = generateAnchors(config.detector.anchorSize, ANCHORS_CONFIG);
this.anchors = tf.tensor2d(this.anchorsData); this.anchors = tf.tensor2d(this.anchorsData);
this.inputSizeData = [config.detector.inputSize, config.detector.inputSize]; this.inputSize = tf.tensor1d([this.width, this.height]);
this.inputSize = tf.tensor1d([config.detector.inputSize, config.detector.inputSize]);
this.iouThreshold = config.detector.iouThreshold; this.iouThreshold = config.detector.iouThreshold;
this.scaleFaces = 0.8; this.scaleFaces = 0.8;
this.scoreThreshold = config.detector.scoreThreshold; this.scoreThreshold = config.detector.scoreThreshold;
@ -87,11 +87,21 @@ class BlazeFaceModel {
const resizedImage = inputImage.resizeBilinear([this.width, this.height]); const resizedImage = inputImage.resizeBilinear([this.width, this.height]);
const normalizedImage = tf.mul(tf.sub(resizedImage.div(255), 0.5), 2); const normalizedImage = tf.mul(tf.sub(resizedImage.div(255), 0.5), 2);
const batchedPrediction = this.blazeFaceModel.predict(normalizedImage); const batchedPrediction = this.blazeFaceModel.predict(normalizedImage);
// todo: add handler for blazeface-front and blazeface-back let prediction;
const prediction = batchedPrediction.squeeze(); // are we using tfhub or pinto converted model?
if (Array.isArray(batchedPrediction)) {
const sorted = batchedPrediction.sort((a, b) => a.size - b.size);
const concat384 = tf.concat([sorted[0], sorted[2]], 2); // dim: 384, 1 + 16
const concat512 = tf.concat([sorted[1], sorted[3]], 2); // dim: 512, 1 + 16
const concat = tf.concat([concat512, concat384], 1);
prediction = concat.squeeze(0);
} else {
prediction = batchedPrediction.squeeze(); // when using tfhub model
}
const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize); const decodedBounds = decodeBounds(prediction, this.anchors, this.inputSize);
const logits = tf.slice(prediction, [0, 0], [-1, 1]); const logits = tf.slice(prediction, [0, 0], [-1, 1]);
const scoresOut = tf.sigmoid(logits).squeeze(); const scoresOut = tf.sigmoid(logits).squeeze();
// console.log(prediction, decodedBounds, logits, scoresOut);
return [prediction, decodedBounds, scoresOut]; return [prediction, decodedBounds, scoresOut];
}); });
@ -125,7 +135,7 @@ class BlazeFaceModel {
detectedOutputs.dispose(); detectedOutputs.dispose();
return { return {
boxes: annotatedBoxes, boxes: annotatedBoxes,
scaleFactor: [inputImage.shape[2] / this.inputSizeData[0], inputImage.shape[1] / this.inputSizeData[1]], scaleFactor: [inputImage.shape[2] / this.width, inputImage.shape[1] / this.height],
}; };
} }

View File

@ -79,17 +79,15 @@ async function detect(input, userConfig) {
// run posenet // run posenet
timeStamp = performance.now(); timeStamp = performance.now();
let poseRes = [];
tf.engine().startScope(); tf.engine().startScope();
if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body); const poseRes = config.body.enabled ? await models.posenet.estimatePoses(input, config.body) : [];
tf.engine().endScope(); tf.engine().endScope();
perf.body = Math.trunc(performance.now() - timeStamp); perf.body = Math.trunc(performance.now() - timeStamp);
// run handpose // run handpose
timeStamp = performance.now(); timeStamp = performance.now();
let handRes = [];
tf.engine().startScope(); tf.engine().startScope();
if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand); const handRes = config.hand.enabled ? await models.handpose.estimateHands(input, config.hand) : [];
tf.engine().endScope(); tf.engine().endScope();
perf.hand = Math.trunc(performance.now() - timeStamp); perf.hand = Math.trunc(performance.now() - timeStamp);