diff --git a/README.md b/README.md index 857a3e5e..c26baa93 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking and Age & Gender Prediction +# Human: 3D Face Detection, Body Pose, Hand & Finger Tracking, Iris Tracking, Age & Gender Prediction & Emotion Prediction - [**Documentation**](https://github.com/vladmandic/human#readme) - [**Code Repository**](https://github.com/vladmandic/human) @@ -22,19 +22,6 @@ Compatible with Browser, WebWorker and NodeJS execution!
-## Credits - -This is an amalgamation of multiple existing models: - -- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view) -- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) -- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view) -- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view) -- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5) -- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net) - -
- ## Installation **Important** @@ -198,7 +185,7 @@ human.defaults = { detector: { modelPath: '../models/blazeface/model.json', maxFaces: 10, - skipFrames: 5, + skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75, @@ -214,12 +201,19 @@ human.defaults = { age: { enabled: true, modelPath: '../models/ssrnet-imdb-age/model.json', - skipFrames: 5, + skipFrames: 10, }, gender: { enabled: true, modelPath: '../models/ssrnet-imdb-gender/model.json', }, + emotion: { + enabled: true, + minConfidence: 0.5, + skipFrames: 10, + useGrayscale: true, + modelPath: '../models/emotion/model.json', + }, }, body: { enabled: true, @@ -230,7 +224,7 @@ human.defaults = { }, hand: { enabled: true, - skipFrames: 5, + skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75, @@ -253,6 +247,7 @@ Where: - `minConfidence`: threshold for discarding a prediction - `iouThreshold`: threshold for deciding whether boxes overlap too much in non-maximum suppression - `scoreThreshold`: threshold for deciding when to remove boxes based on score in non-maximum suppression +- `useGrayscale`: convert color input to grayscale before processing or use single channels when color input is not supported - `nmsRadius`: radius for deciding points are too close in non-maximum suppression
@@ -268,18 +263,18 @@ result = { { confidence, // box, // - mesh, // (468 base points & 10 iris points) - annotations, // (32 base annotated landmarks & 2 iris annotations) - iris, // (relative distance of iris to camera, multiple by focal lenght to get actual distance) - age, // (estimated age) - gender, // (male or female) + mesh, // 468 base points & 10 iris points + annotations, // 32 base annotated landmarks & 2 iris annotations + iris, // relative distance of iris to camera, multiple by focal lenght to get actual distance + age, // estimated age + gender, // 'male', 'female' } ], body: // [ { score, // , - keypoints, // (17 annotated landmarks) + keypoints, // 17 annotated landmarks } ], hand: // @@ -287,8 +282,15 @@ result = { { confidence, // , box, // , - landmarks, // (21 points) - annotations, // ]> (5 annotated landmakrs) + landmarks, // 21 points + annotations, // ]> 5 annotated landmakrs + } + ], + emotion: // + [ + { + score, // probabily of emotion + emotion, // 'angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral' } ], } @@ -302,6 +304,7 @@ Additionally, `result` object includes internal performance data - total time sp hand, face, agegender, + emotion, total, } ``` @@ -343,6 +346,7 @@ Performance per module: - Face Iris: 25 FPS (includes face detect and face geometry) - Age: 60 FPS (includes face detect) - Gender: 60 FPS (includes face detect) +- Emotion: 60 FPS (includes face detect) - Hand: 40 FPS - Body: 50 FPS @@ -350,7 +354,19 @@ Library can also be used on mobile devices
+## Credits + +- Face Detection: [**MediaPipe BlazeFace**](https://drive.google.com/file/d/1f39lSzU5Oq-j_OXgS67KfN5wNsoeAZ4V/view) +- Facial Spacial Geometry: [**MediaPipe FaceMesh**](https://drive.google.com/file/d/1VFC_wIpw4O7xBOiTgUldl79d9LA-LsnA/view) +- Eye Iris Details: [**MediaPipe Iris**](https://drive.google.com/file/d/1bsWbokp9AklH2ANjCfmjqEzzxO1CNbMu/view) +- Hand Detection & Skeleton: [**MediaPipe HandPose**](https://drive.google.com/file/d/1sv4sSb9BSNVZhLzxXJ0jBv9DqD-4jnAz/view) +- Body Pose Detection: [**PoseNet**](https://medium.com/tensorflow/real-time-human-pose-estimation-in-the-browser-with-tensorflow-js-7dd0bc881cd5) +- Age & Gender Prediction: [**SSR-Net**](https://github.com/shamangary/SSR-Net) +- Emotion Prediction: [**Oarriaga**](https://github.com/oarriaga/face_classification) + +
+ ## Todo -- Tweak default parameters +- Tweak default parameters and factorization for age/gender/emotion - Verify age/gender models diff --git a/demo/demo-esm.js b/demo/demo-esm.js index c57dc30c..f8bdf669 100644 --- a/demo/demo-esm.js +++ b/demo/demo-esm.js @@ -4,36 +4,52 @@ import human from '../dist/human.esm.js'; const ui = { baseColor: 'rgba(255, 200, 255, 0.3)', + baseLabel: 'rgba(255, 200, 255, 0.8)', baseFont: 'small-caps 1.2rem "Segoe UI"', baseLineWidth: 16, }; const config = { face: { - enabled: false, + enabled: true, detector: { maxFaces: 10, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 }, mesh: { enabled: true }, iris: { enabled: true }, age: { enabled: true, skipFrames: 10 }, gender: { enabled: true }, + emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true }, }, - body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 }, + body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 }, hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 }, }; let settings; +function str(...msg) { + if (!Array.isArray(msg)) return msg; + let line = ''; + for (const entry of msg) { + if (typeof entry === 'object') line += JSON.stringify(entry).replace(/{|}|"|\[|\]/g, '').replace(/,/g, ', '); + else line += entry; + } + return line; +} + async function drawFace(result, canvas) { const ctx = canvas.getContext('2d'); - ctx.fillStyle = ui.baseColor; ctx.strokeStyle = ui.baseColor; ctx.font = ui.baseFont; for (const face of result) { + ctx.fillStyle = ui.baseColor; ctx.lineWidth = ui.baseLineWidth; ctx.beginPath(); if (settings.getValue('Draw Boxes')) { ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]); } - ctx.fillText(`face ${face.gender || ''} ${face.age || ''} ${face.iris ? 'iris: ' + face.iris : ''}`, face.box[0] + 2, face.box[1] + 22, face.box[2]); + const labelAgeGender = `${face.gender || ''} ${face.age || ''}`; + const labelIris = face.iris ? `iris: ${face.iris}` : ''; + const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : ''; + ctx.fillStyle = ui.baseLabel; + ctx.fillText(`face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22, face.box[2]); ctx.stroke(); ctx.lineWidth = 1; if (face.mesh) { @@ -140,7 +156,9 @@ async function drawHand(result, canvas) { if (settings.getValue('Draw Boxes')) { ctx.lineWidth = ui.baseLineWidth; ctx.beginPath(); + ctx.fillStyle = ui.baseColor; ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]); + ctx.fillStyle = ui.baseLabel; ctx.fillText('hand', hand.box[0] + 2, hand.box[1] + 22, hand.box[2]); ctx.stroke(); } @@ -199,11 +217,10 @@ async function runHumanDetect(input, canvas) { drawHand(result.hand, canvas); // update log const engine = await human.tf.engine(); + const memory = `Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors`; log.innerText = ` - TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors - GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes - Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes - Performance: ${JSON.stringify(result.performance)} + TFJS Version: ${human.tf.version_core} | ${memory} | GPU: ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes + Performance: ${str(result.performance)} | Object size: ${(str(result)).length.toLocaleString()} bytes `; // rinse & repeate // if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes @@ -214,28 +231,36 @@ async function runHumanDetect(input, canvas) { function setupGUI() { // add all variables to ui control panel settings = QuickSettings.create(10, 10, 'Settings', document.getElementById('main')); - settings.addRange('FPS', 0, 100, 0, 1); - settings.addBoolean('Pause', false, (val) => { + const style = document.createElement('style'); + // style.type = 'text/css'; + style.innerHTML = ` + .qs_main { font: 1rem "Segoe UI"; } + .qs_label { font: 0.8rem "Segoe UI"; } + .qs_title_bar { display: none; } + .qs_content { background: darkslategray; } + .qs_container { background: transparent; color: white; margin: 6px; padding: 6px; } + .qs_checkbox_label { top: 2px; } + .qs_button { width: -webkit-fill-available; font: 1rem "Segoe UI"; cursor: pointer; } + `; + document.getElementsByTagName('head')[0].appendChild(style); + settings.addButton('Play/Pause', () => { const video = document.getElementById('video'); const canvas = document.getElementById('canvas'); - if (val) video.pause(); - else video.play(); + if (!video.paused) { + document.getElementById('log').innerText = 'Paused ...'; + video.pause(); + } else { + document.getElementById('log').innerText = 'Starting Human Library ...'; + video.play(); + } runHumanDetect(video, canvas); }); - settings.addHTML('line1', '
'); settings.hideTitle('line1'); - settings.addBoolean('Draw Boxes', false); - settings.addBoolean('Draw Points', true); - settings.addBoolean('Draw Polygons', true); - settings.addBoolean('Fill Polygons', true); - settings.bindText('baseColor', ui.baseColor, config); - settings.bindText('baseFont', ui.baseFont, config); - settings.bindRange('baseLineWidth', 1, 100, ui.baseLineWidth, 1, config); - settings.addHTML('line2', '
'); settings.hideTitle('line2'); settings.addBoolean('Face Detect', config.face.enabled, (val) => config.face.enabled = val); settings.addBoolean('Face Mesh', config.face.mesh.enabled, (val) => config.face.mesh.enabled = val); settings.addBoolean('Face Iris', config.face.iris.enabled, (val) => config.face.iris.enabled = val); settings.addBoolean('Face Age', config.face.age.enabled, (val) => config.face.age.enabled = val); settings.addBoolean('Face Gender', config.face.gender.enabled, (val) => config.face.gender.enabled = val); + settings.addBoolean('Face Emotion', config.face.emotion.enabled, (val) => config.face.emotion.enabled = val); settings.addBoolean('Body Pose', config.body.enabled, (val) => config.body.enabled = val); settings.addBoolean('Hand Pose', config.hand.enabled, (val) => config.hand.enabled = val); settings.addHTML('line3', '
'); settings.hideTitle('line3'); @@ -245,11 +270,13 @@ function setupGUI() { }); settings.addRange('Skip Frames', 1, 20, config.face.detector.skipFrames, 1, (val) => { config.face.detector.skipFrames = parseInt(val); + config.face.emotion.skipFrames = parseInt(val); config.face.age.skipFrames = parseInt(val); config.hand.skipFrames = parseInt(val); }); settings.addRange('Min Confidence', 0.1, 1.0, config.face.detector.minConfidence, 0.05, (val) => { config.face.detector.minConfidence = parseFloat(val); + config.face.emotion.minConfidence = parseFloat(val); config.hand.minConfidence = parseFloat(val); }); settings.addRange('Score Threshold', 0.1, 1.0, config.face.detector.scoreThreshold, 0.05, (val) => { @@ -261,6 +288,13 @@ function setupGUI() { config.face.detector.iouThreshold = parseFloat(val); config.hand.iouThreshold = parseFloat(val); }); + settings.addHTML('line1', '
'); settings.hideTitle('line1'); + settings.addBoolean('Draw Boxes', true); + settings.addBoolean('Draw Points', true); + settings.addBoolean('Draw Polygons', true); + settings.addBoolean('Fill Polygons', true); + settings.addHTML('line1', '
'); settings.hideTitle('line1'); + settings.addRange('FPS', 0, 100, 0, 1); } async function setupCanvas(input) { @@ -289,6 +323,7 @@ async function setupCamera() { video.width = video.videoWidth; video.height = video.videoHeight; video.play(); + video.pause(); resolve(video); }; }); @@ -316,9 +351,9 @@ async function main() { // or setup image // const image = await setupImage(); // setup output canvas from input object, select video or image - const canvas = await setupCanvas(video); + await setupCanvas(video); // run actual detection. if input is video, it will run in a loop else it will run only once - runHumanDetect(video, canvas); + // runHumanDetect(video, canvas); } window.onload = main; diff --git a/demo/demo-node.js b/demo/demo-node.js index 64cdd2ed..7862fc68 100644 --- a/demo/demo-node.js +++ b/demo/demo-node.js @@ -26,7 +26,7 @@ const logger = new console.Console({ const config = { face: { enabled: false, - detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 5, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 }, + detector: { modelPath: 'file://models/blazeface/model.json', inputSize: 128, maxFaces: 10, skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75 }, mesh: { enabled: true, modelPath: 'file://models/facemesh/model.json', inputSize: 192 }, iris: { enabled: true, modelPath: 'file://models/iris/model.json', inputSize: 192 }, age: { enabled: true, modelPath: 'file://models/ssrnet-age/imdb/model.json', inputSize: 64, skipFrames: 5 }, @@ -36,7 +36,7 @@ const config = { hand: { enabled: false, inputSize: 256, - skipFrames: 5, + skipFrames: 10, minConfidence: 0.8, iouThreshold: 0.3, scoreThreshold: 0.75, diff --git a/src/config.js b/src/config.js index ff72d491..883282e7 100644 --- a/src/config.js +++ b/src/config.js @@ -31,6 +31,14 @@ export default { enabled: true, modelPath: '../models/ssrnet-gender/imdb/model.json', }, + emotion: { + enabled: true, + inputSize: 64, // fixed value + minConfidence: 0.5, + skipFrames: 10, + useGrayscale: true, + modelPath: '../models/emotion/model.json', + }, }, body: { enabled: true, diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js new file mode 100644 index 00000000..ff607728 --- /dev/null +++ b/src/emotion/emotion.js @@ -0,0 +1,61 @@ +const tf = require('@tensorflow/tfjs'); + +const annotations = ['angry', 'discust', 'fear', 'happy', 'sad', 'surpise', 'neutral']; +const models = {}; +let last = []; +let frame = 0; +const multiplier = 1.5; + +function getImage(image, size) { + const tensor = tf.tidy(() => { + const buffer = tf.browser.fromPixels(image, 1); + const resize = tf.image.resizeBilinear(buffer, [size, size]); + const expand = tf.cast(tf.expandDims(resize, 0), 'float32'); + return expand; + }); + return tensor; +} + +async function load(config) { + if (!models.emotion) models.emotion = await tf.loadGraphModel(config.face.emotion.modelPath); +} + +async function predict(image, config) { + frame += 1; + if (frame >= config.face.emotion.skipFrames) { + frame = 0; + return last; + } + const enhance = tf.tidy(() => { + if (image instanceof tf.Tensor) { + const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); + const [r, g, b] = tf.split(resize, 3, 3); + if (config.face.emotion.useGrayscale) { + // 0.2989 * R + 0.5870 * G + 0.1140 * B // https://www.mathworks.com/help/matlab/ref/rgb2gray.html + const r1 = tf.mul(r, [0.2989]); + const g1 = tf.mul(g, [0.5870]); + const b1 = tf.mul(b, [0.1140]); + const grayscale = tf.addN([r1, g1, b1]); + return grayscale; + } + return g; + } + return getImage(image, config.face.emotion.inputSize); + }); + const obj = []; + if (config.face.emotion.enabled) { + const emotionT = await models.emotion.predict(enhance); + const data = await emotionT.data(); + for (let i = 0; i < data.length; i++) { + if (multiplier * data[i] > config.face.emotion.minConfidence) obj.push({ score: Math.min(0.99, Math.trunc(100 * multiplier * data[i]) / 100), emotion: annotations[i] }); + } + obj.sort((a, b) => b.score - a.score); + tf.dispose(emotionT); + } + tf.dispose(enhance); + last = obj; + return obj; +} + +exports.predict = predict; +exports.load = load; diff --git a/src/index.js b/src/index.js index faf36337..545e11a9 100644 --- a/src/index.js +++ b/src/index.js @@ -1,6 +1,7 @@ const tf = require('@tensorflow/tfjs'); const facemesh = require('./facemesh/facemesh.js'); const ssrnet = require('./ssrnet/ssrnet.js'); +const emotion = require('./emotion/emotion.js'); const posenet = require('./posenet/posenet.js'); const handpose = require('./handpose/handpose.js'); const defaults = require('./config.js').default; @@ -38,6 +39,7 @@ async function detect(input, userConfig) { // load models if enabled if (config.face.age.enabled) await ssrnet.loadAge(config); if (config.face.gender.enabled) await ssrnet.loadGender(config); + if (config.face.emotion.enabled) await emotion.load(config); if (config.body.enabled && !models.posenet) models.posenet = await posenet.load(config.body); if (config.hand.enabled && !models.handpose) models.handpose = await handpose.load(config.hand); if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); @@ -76,7 +78,12 @@ async function detect(input, userConfig) { timeStamp = performance.now(); const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; perf.agegender = Math.trunc(performance.now() - timeStamp); + // run emotion, inherits face from blazeface + timeStamp = performance.now(); + const emotiondata = config.face.emotion.enabled ? await emotion.predict(face.image, config) : {}; + perf.emotion = Math.trunc(performance.now() - timeStamp); face.image.dispose(); + // calculate iris distance // iris: array[ bottom, left, top, right, center ] const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris) ? Math.max(face.annotations.leftEyeIris[3][0] - face.annotations.leftEyeIris[1][0], face.annotations.rightEyeIris[3][0] - face.annotations.rightEyeIris[1][0]) @@ -88,7 +95,8 @@ async function detect(input, userConfig) { annotations: face.annotations, age: ssrdata.age, gender: ssrdata.gender, - iris: (iris !== 0) ? Math.trunc(100 * 11.7 / iris) / 100 : 0, + emotion: emotiondata, + iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, }); } } @@ -98,7 +106,6 @@ async function detect(input, userConfig) { tf.engine().endScope(); // combine results perf.total = Object.values(perf).reduce((a, b) => a + b); - console.log('total', perf.total); resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf }); }); }