added emotion backend

pull/293/head
Vladimir Mandic 2020-10-14 18:22:38 -04:00
parent 23aeb81b76
commit 3ca9edeaa5
6 changed files with 180 additions and 53 deletions

View File

@ -1,4 +1,4 @@
# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction
# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction & Emotion Prediction
- [**Documentation**](https://github.com/vladmandic/human#readme)
- [**Code Repository**](https://github.com/vladmandic/human)
@ -22,19 +22,6 @@ Compatible with Browser, WebWorker and NodeJS execution!
<hr>
## Credits
This is an amalgamation of multiple existing models:
- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net)
<hr>
## Installation
**Important**
@ -198,7 +185,7 @@ human.defaults = {
detector: {
modelPath: '../models/blazeface/model.json',
maxFaces: 10,
skipFrames: 5,
skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
@ -214,12 +201,19 @@ human.defaults = {
age: {
enabled: true,
modelPath: '../models/ssrnet-imdb-age/model.json',
skipFrames: 5,
skipFrames: 10,
},
gender: {
enabled: true,
modelPath: '../models/ssrnet-imdb-gender/model.json',
},
emotion: {
enabled: true,
minConfidence: 0.5,
skipFrames: 10,
useGrayscale: true,
modelPath: '../models/emotion/model.json',
},
},
body: {
enabled: true,
@ -230,7 +224,7 @@ human.defaults = {
},
hand: {
enabled: true,
skipFrames: 5,
skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
@ -253,6 +247,7 @@ Where:
- `minConfidence`: threshold for discarding a prediction
- `iouThreshold`: threshold for deciding whether boxes overlap too much in non-maximum suppression
- `scoreThreshold`: threshold for deciding when to remove boxes based on score in non-maximum suppression
- `useGrayscale`: convert color input to grayscale before processing or use single channels when color input is not supported
- `nmsRadius`: radius for deciding points are too close in non-maximum suppression
<hr>
@ -268,18 +263,18 @@ result = {
{
confidence, // <number>
box, // <array [x, y, width, height]>
mesh, // <array of 3D points [x, y, z]> (468 base points & 10 iris points)
annotations, // <list of object { landmark: array of points }> (32 base annotated landmarks & 2 iris annotations)
iris, // <number> (relative distance of iris to camera, multiple by focal lenght to get actual distance)
age, // <number> (estimated age)
gender, // <string> (male or female)
mesh, // <array of 3D points [x, y, z]> 468 base points & 10 iris points
annotations, // <list of object { landmark: array of points }> 32 base annotated landmarks & 2 iris annotations
iris, // <number> relative distance of iris to camera, multiple by focal lenght to get actual distance
age, // <number> estimated age
gender, // <string> 'male', 'female'
}
],
body: // <array of detected objects>
[
{
score, // <number>,
keypoints, // <array of 2D landmarks [ score, landmark, position [x, y] ]> (17 annotated landmarks)
keypoints, // <array of 2D landmarks [ score, landmark, position [x, y] ]> 17 annotated landmarks
}
],
hand: // <array of detected objects>
@ -287,8 +282,15 @@ result = {
{
confidence, // <number>,
box, // <array [x, y, width, height]>,
landmarks, // <array of 3D points [x, y,z]> (21 points)
annotations, // <array of 3D landmarks [ landmark: <array of points> ]> (5 annotated landmakrs)
landmarks, // <array of 3D points [x, y,z]> 21 points
annotations, // <array of 3D landmarks [ landmark: <array of points> ]> 5 annotated landmakrs
}
],
emotion: // <array of emotions>
[
{
score, // <number> probabily of emotion
emotion, // <string> 'angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'
}
],
}
@ -302,6 +304,7 @@ Additionally, `result` object includes internal performance data - total time sp
hand,
face,
agegender,
emotion,
total,
}
```
@ -343,6 +346,7 @@ Performance per module:
- Face Iris: 25 FPS (includes face detect and face geometry)
- Age: 60 FPS (includes face detect)
- Gender: 60 FPS (includes face detect)
- Emotion: 60 FPS (includes face detect)
- Hand: 40 FPS
- Body: 50 FPS
@ -350,7 +354,19 @@ Library can also be used on mobile devices
<hr>
## Credits
- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net)
- Emotion Prediction: [**Oarriaga**](https://github.com/oarriaga/face_classification)
<hr>
## Todo
- Tweak default parameters
- Tweak default parameters and factorization for age/gender/emotion
- Verify age/gender models

View File

@ -4,36 +4,52 @@ import human from '../dist/human.esm.js';
const ui = {
baseColor: 'rgba(255, 200, 255, 0.3)',
baseLabel: 'rgba(255, 200, 255, 0.8)',
baseFont: 'small-caps 1.2rem "Segoe UI"',
baseLineWidth: 16,
};
const config = {
face: {
enabled: false,
enabled: true,
detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
mesh: { enabled: true },
iris: { enabled: true },
age: { enabled: true, skipFrames: 10 },
gender: { enabled: true },
emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
},
body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
};
let settings;
function str(...msg) {
if (!Array.isArray(msg)) return msg;
let line = '';
for (const entry of msg) {
if (typeof entry === 'object') line += JSON.stringify(entry).replace(/{|}|"|\[|\]/g, '').replace(/,/g, ', ');
else line += entry;
}
return line;
}
async function drawFace(result, canvas) {
const ctx = canvas.getContext('2d');
ctx.fillStyle = ui.baseColor;
ctx.strokeStyle = ui.baseColor;
ctx.font = ui.baseFont;
for (const face of result) {
ctx.fillStyle = ui.baseColor;
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
if (settings.getValue('Draw Boxes')) {
ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
}
ctx.fillText(`face ${face.gender || ''} ${face.age || ''} ${face.iris ? 'iris: ' + face.iris : ''}`, face.box[0] + 2, face.box[1] + 22, face.box[2]);
const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
const labelIris = face.iris ? `iris: ${face.iris}` : '';
const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
ctx.fillStyle = ui.baseLabel;
ctx.fillText(`face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22, face.box[2]);
ctx.stroke();
ctx.lineWidth = 1;
if (face.mesh) {
@ -140,7 +156,9 @@ async function drawHand(result, canvas) {
if (settings.getValue('Draw Boxes')) {
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
ctx.fillStyle = ui.baseColor;
ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
ctx.fillStyle = ui.baseLabel;
ctx.fillText('hand', hand.box[0] + 2, hand.box[1] + 22, hand.box[2]);
ctx.stroke();
}
@ -199,11 +217,10 @@ async function runHumanDetect(input, canvas) {
drawHand(result.hand, canvas);
// update log
const engine = await human.tf.engine();
const memory = `Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors`;
log.innerText = `
TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
Performance: ${JSON.stringify(result.performance)}
TFJS Version: ${human.tf.version_core} | ${memory} | GPU: ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes
Performance: ${str(result.performance)} | Object size: ${(str(result)).length.toLocaleString()} bytes
`;
// rinse & repeate
// if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes
@ -214,28 +231,36 @@ async function runHumanDetect(input, canvas) {
function setupGUI() {
// add all variables to ui control panel
settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main'));
settings.addRange('FPS', 0, 100, 0, 1);
settings.addBoolean('Pause', false, (val) => {
const style = document.createElement('style');
// style.type = 'text/css';
style.innerHTML = `
.qs_main { font: 1rem "Segoe UI"; }
.qs_label { font: 0.8rem "Segoe UI"; }
.qs_title_bar { display: none; }
.qs_content { background: darkslategray; }
.qs_container { background: transparent; color: white; margin: 6px; padding: 6px; }
.qs_checkbox_label { top: 2px; }
.qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; }
`;
document.getElementsByTagName('head')[0].appendChild(style);
settings.addButton('Play/Pause', () => {
const video = document.getElementById('video');
const canvas = document.getElementById('canvas');
if (val) video.pause();
else video.play();
if (!video.paused) {
document.getElementById('log').innerText = 'Paused ...';
video.pause();
} else {
document.getElementById('log').innerText = 'Starting Human Library ...';
video.play();
}
runHumanDetect(video, canvas);
});
settings.addHTML('line1', '<hr>'); settings.hideTitle('line1');
settings.addBoolean('Draw Boxes', false);
settings.addBoolean('Draw Points', true);
settings.addBoolean('Draw Polygons', true);
settings.addBoolean('Fill Polygons', true);
settings.bindText('baseColor', ui.baseColor, config);
settings.bindText('baseFont', ui.baseFont, config);
settings.bindRange('baseLineWidth', 1, 100, ui.baseLineWidth, 1, config);
settings.addHTML('line2', '<hr>'); settings.hideTitle('line2');
settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val);
settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val);
settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val);
settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val);
settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val);
settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val);
settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val);
settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val);
settings.addHTML('line3', '<hr>'); settings.hideTitle('line3');
@ -245,11 +270,13 @@ function setupGUI() {
});
settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => {
config.face.detector.skipFrames = parseInt(val);
config.face.emotion.skipFrames = parseInt(val);
config.face.age.skipFrames = parseInt(val);
config.hand.skipFrames = parseInt(val);
});
settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => {
config.face.detector.minConfidence = parseFloat(val);
config.face.emotion.minConfidence = parseFloat(val);
config.hand.minConfidence = parseFloat(val);
});
settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => {
@ -261,6 +288,13 @@ function setupGUI() {
config.face.detector.iouThreshold = parseFloat(val);
config.hand.iouThreshold = parseFloat(val);
});
settings.addHTML('line1', '<hr>'); settings.hideTitle('line1');
settings.addBoolean('Draw Boxes', true);
settings.addBoolean('Draw Points', true);
settings.addBoolean('Draw Polygons', true);
settings.addBoolean('Fill Polygons', true);
settings.addHTML('line1', '<hr>'); settings.hideTitle('line1');
settings.addRange('FPS', 0, 100, 0, 1);
}
async function setupCanvas(input) {
@ -289,6 +323,7 @@ async function setupCamera() {
video.width = video.videoWidth;
video.height = video.videoHeight;
video.play();
video.pause();
resolve(video);
};
});
@ -316,9 +351,9 @@ async function main() {
// or setup image
// const image = await setupImage();
// setup output canvas from input object, select video or image
const canvas = await setupCanvas(video);
await setupCanvas(video);
// run actual detection. if input is video, it will run in a loop else it will run only once
runHumanDetect(video, canvas);
// runHumanDetect(video, canvas);
}
window.onload = main;

View File

@ -26,7 +26,7 @@ const logger = new console.Console({
const config = {
face: {
enabled: false,
detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
mesh: { enabled: true, modelPath: 'file://models/facemesh/model.json', inputSize: 192 },
iris: { enabled: true, modelPath: 'file://models/iris/model.json', inputSize: 192 },
age: { enabled: true, modelPath: 'file://models/ssrnet-age/imdb/model.json', inputSize: 64, skipFrames: 5 },
@ -36,7 +36,7 @@ const config = {
hand: {
enabled: false,
inputSize: 256,
skipFrames: 5,
skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,

View File

@ -31,6 +31,14 @@ export default {
enabled: true,
modelPath: '../models/ssrnet-gender/imdb/model.json',
},
emotion: {
enabled: true,
inputSize: 64, // fixed value
minConfidence: 0.5,
skipFrames: 10,
useGrayscale: true,
modelPath: '../models/emotion/model.json',
},
},
body: {
enabled: true,

61
src/emotion/emotion.js Normal file
View File

@ -0,0 +1,61 @@
const tf = require('@tensorflow/tfjs');
const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
const models = {};
let last = [];
let frame = 0;
const multiplier = 1.5;
function getImage(image, size) {
const tensor = tf.tidy(() => {
const buffer = tf.browser.fromPixels(image, 1);
const resize = tf.image.resizeBilinear(buffer, [size, size]);
const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
return expand;
});
return tensor;
}
async function load(config) {
if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
}
async function predict(image, config) {
frame += 1;
if (frame >= config.face.emotion.skipFrames) {
frame = 0;
return last;
}
const enhance = tf.tidy(() => {
if (image instanceof tf.Tensor) {
const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
const [r, g, b] = tf.split(resize, 3, 3);
if (config.face.emotion.useGrayscale) {
// 0.2989 * R + 0.5870 * G + 0.1140 * B // https://www.mathworks.com/help/matlab/ref/rgb2gray.html
const r1 = tf.mul(r, [0.2989]);
const g1 = tf.mul(g, [0.5870]);
const b1 = tf.mul(b, [0.1140]);
const grayscale = tf.addN([r1, g1, b1]);
return grayscale;
}
return g;
}
return getImage(image, config.face.emotion.inputSize);
});
const obj = [];
if (config.face.emotion.enabled) {
const emotionT = await models.emotion.predict(enhance);
const data = await emotionT.data();
for (let i = 0; i < data.length; i++) {
if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
}
obj.sort((a, b) => b.score - a.score);
tf.dispose(emotionT);
}
tf.dispose(enhance);
last = obj;
return obj;
}
exports.predict = predict;
exports.load = load;

View File

@ -1,6 +1,7 @@
const tf = require('@tensorflow/tfjs');
const facemesh = require('./facemesh/facemesh.js');
const ssrnet = require('./ssrnet/ssrnet.js');
const emotion = require('./emotion/emotion.js');
const posenet = require('./posenet/posenet.js');
const handpose = require('./handpose/handpose.js');
const defaults = require('./config.js').default;
@ -38,6 +39,7 @@ async function detect(input, userConfig) {
// load models if enabled
if (config.face.age.enabled) await ssrnet.loadAge(config);
if (config.face.gender.enabled) await ssrnet.loadGender(config);
if (config.face.emotion.enabled) await emotion.load(config);
if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@ -76,7 +78,12 @@ async function detect(input, userConfig) {
timeStamp = performance.now();
const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
perf.agegender = Math.trunc(performance.now() - timeStamp);
// run emotion, inherits face from blazeface
timeStamp = performance.now();
const emotiondata = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {};
perf.emotion = Math.trunc(performance.now() - timeStamp);
face.image.dispose();
// calculate iris distance
// iris: array[ bottom, left, top, right, center ]
const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0])
@ -88,7 +95,8 @@ async function detect(input, userConfig) {
annotations: face.annotations,
age: ssrdata.age,
gender: ssrdata.gender,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 / iris) / 100 : 0,
emotion: emotiondata,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
});
}
}
@ -98,7 +106,6 @@ async function detect(input, userConfig) {
tf.engine().endScope();
// combine results
perf.total = Object.values(perf).reduce((a, b) => a + b);
console.log('total', perf.total);
resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf });
});
}