diff --git a/README.md b/README.md
index 857a3e5e..c26baa93 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction
+# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction & Emotion Prediction
- [**Documentation**](https://github.com/vladmandic/human#readme)
- [**Code Repository**](https://github.com/vladmandic/human)
@@ -22,19 +22,6 @@ Compatible with Browser, WebWorker and NodeJS execution!
-## Credits
-
-This is an amalgamation of multiple existing models:
-
-- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
-- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
-- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
-- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
-- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
-- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net)
-
-
-
## Installation
**Important**
@@ -198,7 +185,7 @@ human.defaults = {
detector: {
modelPath: '../models/blazeface/model.json',
maxFaces: 10,
- skipFrames: 5,
+ skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
@@ -214,12 +201,19 @@ human.defaults = {
age: {
enabled: true,
modelPath: '../models/ssrnet-imdb-age/model.json',
- skipFrames: 5,
+ skipFrames: 10,
},
gender: {
enabled: true,
modelPath: '../models/ssrnet-imdb-gender/model.json',
},
+ emotion: {
+ enabled: true,
+ minConfidence: 0.5,
+ skipFrames: 10,
+ useGrayscale: true,
+ modelPath: '../models/emotion/model.json',
+ },
},
body: {
enabled: true,
@@ -230,7 +224,7 @@ human.defaults = {
},
hand: {
enabled: true,
- skipFrames: 5,
+ skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
@@ -253,6 +247,7 @@ Where:
- `minConfidence`: threshold for discarding a prediction
- `iouThreshold`: threshold for deciding whether boxes overlap too much in non-maximum suppression
- `scoreThreshold`: threshold for deciding when to remove boxes based on score in non-maximum suppression
+- `useGrayscale`: convert color input to grayscale before processing or use single channels when color input is not supported
- `nmsRadius`: radius for deciding points are too close in non-maximum suppression
@@ -268,18 +263,18 @@ result = {
{
confidence, //
box, //
- mesh, // (468 base points & 10 iris points)
- annotations, // (32 base annotated landmarks & 2 iris annotations)
- iris, // (relative distance of iris to camera, multiple by focal lenght to get actual distance)
- age, // (estimated age)
- gender, // (male or female)
+ mesh, // 468 base points & 10 iris points
+ annotations, // 32 base annotated landmarks & 2 iris annotations
+ iris, // relative distance of iris to camera, multiple by focal lenght to get actual distance
+ age, // estimated age
+ gender, // 'male', 'female'
}
],
body: //
[
{
score, // ,
- keypoints, // (17 annotated landmarks)
+ keypoints, // 17 annotated landmarks
}
],
hand: //
@@ -287,8 +282,15 @@ result = {
{
confidence, // ,
box, // ,
- landmarks, // (21 points)
- annotations, // ]> (5 annotated landmakrs)
+ landmarks, // 21 points
+ annotations, // ]> 5 annotated landmakrs
+ }
+ ],
+ emotion: //
+ [
+ {
+ score, // probabily of emotion
+ emotion, // 'angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'
}
],
}
@@ -302,6 +304,7 @@ Additionally, `result` object includes internal performance data - total time sp
hand,
face,
agegender,
+ emotion,
total,
}
```
@@ -343,6 +346,7 @@ Performance per module:
- Face Iris: 25 FPS (includes face detect and face geometry)
- Age: 60 FPS (includes face detect)
- Gender: 60 FPS (includes face detect)
+- Emotion: 60 FPS (includes face detect)
- Hand: 40 FPS
- Body: 50 FPS
@@ -350,7 +354,19 @@ Library can also be used on mobile devices
+## Credits
+
+- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view)
+- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view)
+- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view)
+- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view)
+- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5)
+- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net)
+- Emotion Prediction: [**Oarriaga**](https://github.com/oarriaga/face_classification)
+
+
+
## Todo
-- Tweak default parameters
+- Tweak default parameters and factorization for age/gender/emotion
- Verify age/gender models
diff --git a/demo/demo-esm.js b/demo/demo-esm.js
index c57dc30c..f8bdf669 100644
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@@ -4,36 +4,52 @@ import human from '../dist/human.esm.js';
const ui = {
baseColor: 'rgba(255, 200, 255, 0.3)',
+ baseLabel: 'rgba(255, 200, 255, 0.8)',
baseFont: 'small-caps 1.2rem "Segoe UI"',
baseLineWidth: 16,
};
const config = {
face: {
- enabled: false,
+ enabled: true,
detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
mesh: { enabled: true },
iris: { enabled: true },
age: { enabled: true, skipFrames: 10 },
gender: { enabled: true },
+ emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
},
- body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+ body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
};
let settings;
+function str(...msg) {
+ if (!Array.isArray(msg)) return msg;
+ let line = '';
+ for (const entry of msg) {
+ if (typeof entry === 'object') line += JSON.stringify(entry).replace(/{|}|"|\[|\]/g, '').replace(/,/g, ', ');
+ else line += entry;
+ }
+ return line;
+}
+
async function drawFace(result, canvas) {
const ctx = canvas.getContext('2d');
- ctx.fillStyle = ui.baseColor;
ctx.strokeStyle = ui.baseColor;
ctx.font = ui.baseFont;
for (const face of result) {
+ ctx.fillStyle = ui.baseColor;
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
if (settings.getValue('Draw Boxes')) {
ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
}
- ctx.fillText(`face ${face.gender || ''} ${face.age || ''} ${face.iris ? 'iris: ' + face.iris : ''}`, face.box[0] + 2, face.box[1] + 22, face.box[2]);
+ const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
+ const labelIris = face.iris ? `iris: ${face.iris}` : '';
+ const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
+ ctx.fillStyle = ui.baseLabel;
+ ctx.fillText(`face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22, face.box[2]);
ctx.stroke();
ctx.lineWidth = 1;
if (face.mesh) {
@@ -140,7 +156,9 @@ async function drawHand(result, canvas) {
if (settings.getValue('Draw Boxes')) {
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
+ ctx.fillStyle = ui.baseColor;
ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
+ ctx.fillStyle = ui.baseLabel;
ctx.fillText('hand', hand.box[0] + 2, hand.box[1] + 22, hand.box[2]);
ctx.stroke();
}
@@ -199,11 +217,10 @@ async function runHumanDetect(input, canvas) {
drawHand(result.hand, canvas);
// update log
const engine = await human.tf.engine();
+ const memory = `Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors`;
log.innerText = `
- TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors
- GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes
- Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes
- Performance: ${JSON.stringify(result.performance)}
+ TFJS Version: ${human.tf.version_core} | ${memory} | GPU: ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes
+ Performance: ${str(result.performance)} | Object size: ${(str(result)).length.toLocaleString()} bytes
`;
// rinse & repeate
// if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes
@@ -214,28 +231,36 @@ async function runHumanDetect(input, canvas) {
function setupGUI() {
// add all variables to ui control panel
settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main'));
- settings.addRange('FPS', 0, 100, 0, 1);
- settings.addBoolean('Pause', false, (val) => {
+ const style = document.createElement('style');
+ // style.type = 'text/css';
+ style.innerHTML = `
+ .qs_main { font: 1rem "Segoe UI"; }
+ .qs_label { font: 0.8rem "Segoe UI"; }
+ .qs_title_bar { display: none; }
+ .qs_content { background: darkslategray; }
+ .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; }
+ .qs_checkbox_label { top: 2px; }
+ .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; }
+ `;
+ document.getElementsByTagName('head')[0].appendChild(style);
+ settings.addButton('Play/Pause', () => {
const video = document.getElementById('video');
const canvas = document.getElementById('canvas');
- if (val) video.pause();
- else video.play();
+ if (!video.paused) {
+ document.getElementById('log').innerText = 'Paused ...';
+ video.pause();
+ } else {
+ document.getElementById('log').innerText = 'Starting Human Library ...';
+ video.play();
+ }
runHumanDetect(video, canvas);
});
- settings.addHTML('line1', '
'); settings.hideTitle('line1');
- settings.addBoolean('Draw Boxes', false);
- settings.addBoolean('Draw Points', true);
- settings.addBoolean('Draw Polygons', true);
- settings.addBoolean('Fill Polygons', true);
- settings.bindText('baseColor', ui.baseColor, config);
- settings.bindText('baseFont', ui.baseFont, config);
- settings.bindRange('baseLineWidth', 1, 100, ui.baseLineWidth, 1, config);
- settings.addHTML('line2', '
'); settings.hideTitle('line2');
settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val);
settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val);
settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val);
settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val);
settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val);
+ settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val);
settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val);
settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val);
settings.addHTML('line3', '
'); settings.hideTitle('line3');
@@ -245,11 +270,13 @@ function setupGUI() {
});
settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => {
config.face.detector.skipFrames = parseInt(val);
+ config.face.emotion.skipFrames = parseInt(val);
config.face.age.skipFrames = parseInt(val);
config.hand.skipFrames = parseInt(val);
});
settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => {
config.face.detector.minConfidence = parseFloat(val);
+ config.face.emotion.minConfidence = parseFloat(val);
config.hand.minConfidence = parseFloat(val);
});
settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => {
@@ -261,6 +288,13 @@ function setupGUI() {
config.face.detector.iouThreshold = parseFloat(val);
config.hand.iouThreshold = parseFloat(val);
});
+ settings.addHTML('line1', '
'); settings.hideTitle('line1');
+ settings.addBoolean('Draw Boxes', true);
+ settings.addBoolean('Draw Points', true);
+ settings.addBoolean('Draw Polygons', true);
+ settings.addBoolean('Fill Polygons', true);
+ settings.addHTML('line1', '
'); settings.hideTitle('line1');
+ settings.addRange('FPS', 0, 100, 0, 1);
}
async function setupCanvas(input) {
@@ -289,6 +323,7 @@ async function setupCamera() {
video.width = video.videoWidth;
video.height = video.videoHeight;
video.play();
+ video.pause();
resolve(video);
};
});
@@ -316,9 +351,9 @@ async function main() {
// or setup image
// const image = await setupImage();
// setup output canvas from input object, select video or image
- const canvas = await setupCanvas(video);
+ await setupCanvas(video);
// run actual detection. if input is video, it will run in a loop else it will run only once
- runHumanDetect(video, canvas);
+ // runHumanDetect(video, canvas);
}
window.onload = main;
diff --git a/demo/demo-node.js b/demo/demo-node.js
index 64cdd2ed..7862fc68 100644
--- a/demo/demo-node.js
+++ b/demo/demo-node.js
@@ -26,7 +26,7 @@ const logger = new console.Console({
const config = {
face: {
enabled: false,
- detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
+ detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 },
mesh: { enabled: true, modelPath: 'file://models/facemesh/model.json', inputSize: 192 },
iris: { enabled: true, modelPath: 'file://models/iris/model.json', inputSize: 192 },
age: { enabled: true, modelPath: 'file://models/ssrnet-age/imdb/model.json', inputSize: 64, skipFrames: 5 },
@@ -36,7 +36,7 @@ const config = {
hand: {
enabled: false,
inputSize: 256,
- skipFrames: 5,
+ skipFrames: 10,
minConfidence: 0.8,
iouThreshold: 0.3,
scoreThreshold: 0.75,
diff --git a/src/config.js b/src/config.js
index ff72d491..883282e7 100644
--- a/src/config.js
+++ b/src/config.js
@@ -31,6 +31,14 @@ export default {
enabled: true,
modelPath: '../models/ssrnet-gender/imdb/model.json',
},
+ emotion: {
+ enabled: true,
+ inputSize: 64, // fixed value
+ minConfidence: 0.5,
+ skipFrames: 10,
+ useGrayscale: true,
+ modelPath: '../models/emotion/model.json',
+ },
},
body: {
enabled: true,
diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js
new file mode 100644
index 00000000..ff607728
--- /dev/null
+++ b/src/emotion/emotion.js
@@ -0,0 +1,61 @@
+const tf = require('@tensorflow/tfjs');
+
+const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral'];
+const models = {};
+let last = [];
+let frame = 0;
+const multiplier = 1.5;
+
+function getImage(image, size) {
+ const tensor = tf.tidy(() => {
+ const buffer = tf.browser.fromPixels(image, 1);
+ const resize = tf.image.resizeBilinear(buffer, [size, size]);
+ const expand = tf.cast(tf.expandDims(resize, 0), 'float32');
+ return expand;
+ });
+ return tensor;
+}
+
+async function load(config) {
+ if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath);
+}
+
+async function predict(image, config) {
+ frame += 1;
+ if (frame >= config.face.emotion.skipFrames) {
+ frame = 0;
+ return last;
+ }
+ const enhance = tf.tidy(() => {
+ if (image instanceof tf.Tensor) {
+ const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false);
+ const [r, g, b] = tf.split(resize, 3, 3);
+ if (config.face.emotion.useGrayscale) {
+ // 0.2989 * R + 0.5870 * G + 0.1140 * B // https://www.mathworks.com/help/matlab/ref/rgb2gray.html
+ const r1 = tf.mul(r, [0.2989]);
+ const g1 = tf.mul(g, [0.5870]);
+ const b1 = tf.mul(b, [0.1140]);
+ const grayscale = tf.addN([r1, g1, b1]);
+ return grayscale;
+ }
+ return g;
+ }
+ return getImage(image, config.face.emotion.inputSize);
+ });
+ const obj = [];
+ if (config.face.emotion.enabled) {
+ const emotionT = await models.emotion.predict(enhance);
+ const data = await emotionT.data();
+ for (let i = 0; i < data.length; i++) {
+ if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] });
+ }
+ obj.sort((a, b) => b.score - a.score);
+ tf.dispose(emotionT);
+ }
+ tf.dispose(enhance);
+ last = obj;
+ return obj;
+}
+
+exports.predict = predict;
+exports.load = load;
diff --git a/src/index.js b/src/index.js
index faf36337..545e11a9 100644
--- a/src/index.js
+++ b/src/index.js
@@ -1,6 +1,7 @@
const tf = require('@tensorflow/tfjs');
const facemesh = require('./facemesh/facemesh.js');
const ssrnet = require('./ssrnet/ssrnet.js');
+const emotion = require('./emotion/emotion.js');
const posenet = require('./posenet/posenet.js');
const handpose = require('./handpose/handpose.js');
const defaults = require('./config.js').default;
@@ -38,6 +39,7 @@ async function detect(input, userConfig) {
// load models if enabled
if (config.face.age.enabled) await ssrnet.loadAge(config);
if (config.face.gender.enabled) await ssrnet.loadGender(config);
+ if (config.face.emotion.enabled) await emotion.load(config);
if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body);
if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand);
if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@@ -76,7 +78,12 @@ async function detect(input, userConfig) {
timeStamp = performance.now();
const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {};
perf.agegender = Math.trunc(performance.now() - timeStamp);
+ // run emotion, inherits face from blazeface
+ timeStamp = performance.now();
+ const emotiondata = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {};
+ perf.emotion = Math.trunc(performance.now() - timeStamp);
face.image.dispose();
+ // calculate iris distance
// iris: array[ bottom, left, top, right, center ]
const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris)
? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0])
@@ -88,7 +95,8 @@ async function detect(input, userConfig) {
annotations: face.annotations,
age: ssrdata.age,
gender: ssrdata.gender,
- iris: (iris !== 0) ? Math.trunc(100 * 11.7 / iris) / 100 : 0,
+ emotion: emotiondata,
+ iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
});
}
}
@@ -98,7 +106,6 @@ async function detect(input, userConfig) {
tf.engine().endScope();
// combine results
perf.total = Object.values(perf).reduce((a, b) => a + b);
- console.log('total', perf.total);
resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf });
});
}