diff --git a/README.md b/README.md
index 050e6b8f..9be06d20 100644
--- a/README.md
+++ b/README.md
@@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!
-**Example using static image:**
-
+## Examples
-**Example using webcam:**
-
+**Using static images:**
+
+
+**Using webcam:**
+
@@ -211,59 +213,85 @@ Below is output of `human.defaults` object
Any property can be overriden by passing user object during `human.detect()`
Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration
+Configurtion object is large, but typically you only need to modify few values:
+
+- `enabled`: Choose which models to use
+- `skipFrames`: Must be set to 0 for static images
+- `modelPath`: Update as needed to reflect your application's relative path
+
+
```js
-human.defaults = {
- console: true, // enable debugging output to console
- backend: 'webgl', // select tfjs backend to use
+export default {
+ backend: 'webgl', // select tfjs backend to use
+ console: true, // enable debugging output to console
face: {
- enabled: true, // controls if specified modul is enabled (note: module is not loaded until it is required)
+ enabled: true, // controls if specified modul is enabled
+ // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+ // note: module is not loaded until it is required
detector: {
- modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
- inputSize: 128, // 128 for tfhub and front models, 256 for back
- maxFaces: 10, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
- skipFrames: 10, // how many frames to skip before re-running bounding box detection
- minConfidence: 0.5, // threshold for discarding a prediction
- iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
- scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+ // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+ inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+ maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
+ skipFrames: 10, // how many frames to go without re-running the face bounding box detector
+ // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+ // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+ minConfidence: 0.5, // threshold for discarding a prediction
+ iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
},
mesh: {
enabled: true,
modelPath: '../models/facemesh/model.json',
+ inputSize: 192, // fixed value
},
iris: {
enabled: true,
modelPath: '../models/iris/model.json',
+ enlargeFactor: 2.3, // empiric tuning
+ inputSize: 64, // fixed value
},
age: {
enabled: true,
modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
- skipFrames: 10, // how many frames to skip before re-running bounding box detection
+ // which determines training set for model
+ inputSize: 64, // fixed value
+ skipFrames: 10, // how many frames to go without re-running the detector
},
gender: {
enabled: true,
- modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
+ minConfidence: 0.8, // threshold for discarding a prediction
+ modelPath: '../models/ssrnet-gender/imdb/model.json',
},
emotion: {
enabled: true,
- minConfidence: 0.5, // threshold for discarding a prediction
- skipFrames: 10, // how many frames to skip before re-running bounding box detection
- useGrayscale: true, // convert color input to grayscale before processing or use single channels when color input is not supported
+ inputSize: 64, // fixed value
+ minConfidence: 0.5, // threshold for discarding a prediction
+ skipFrames: 10, // how many frames to go without re-running the detector
+ useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},
body: {
enabled: true,
modelPath: '../models/posenet/model.json',
- maxDetections: 5, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
- scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
- nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
+ inputResolution: 257, // fixed value
+ outputStride: 16, // fixed value
+ maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
},
hand: {
enabled: true,
- skipFrames: 10, // how many frames to skip before re-running bounding box detection
- minConfidence: 0.5, // threshold for discarding a prediction
- iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
- scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ inputSize: 256, // fixed value
+ skipFrames: 10, // how many frames to go without re-running the hand bounding box detector
+ // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+ // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+ minConfidence: 0.5, // threshold for discarding a prediction
+ iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
+ maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: {
anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json',
diff --git a/demo/demo-esm.js b/demo/demo-esm.js
index 73b64bca..7a505050 100644
--- a/demo/demo-esm.js
+++ b/demo/demo-esm.js
@@ -4,9 +4,11 @@ import human from '../dist/human.esm.js';
const ui = {
baseColor: 'rgba(255, 200, 255, 0.3)',
- baseLabel: 'rgba(255, 200, 255, 0.8)',
+ baseLabel: 'rgba(255, 200, 255, 0.9)',
baseFont: 'small-caps 1.2rem "Segoe UI"',
baseLineWidth: 16,
+ baseLineHeight: 2,
+ columns: 3,
busy: false,
facing: 'user',
};
@@ -23,8 +25,8 @@ const config = {
gender: { enabled: true },
emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
},
- body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
- hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
+ body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+ hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
};
let settings;
let worker;
@@ -49,20 +51,23 @@ const log = (...msg) => {
async function drawFace(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
- ctx.strokeStyle = ui.baseColor;
- ctx.font = ui.baseFont;
for (const face of result) {
+ ctx.font = ui.baseFont;
+ ctx.strokeStyle = ui.baseColor;
ctx.fillStyle = ui.baseColor;
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
if (settings.getValue('Draw Boxes')) {
ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
}
- const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
- const labelIris = face.iris ? `iris: ${face.iris}` : '';
- const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
+ // silly hack since fillText does not suport new line
+ const labels = [];
+ if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`);
+ if (face.age) labels.push(`Age:${face.age || ''}`);
+ if (face.iris) labels.push(`iris: ${face.iris}`);
+ if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`);
ctx.fillStyle = ui.baseLabel;
- ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
+ for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight));
ctx.stroke();
ctx.lineWidth = 1;
if (face.mesh) {
@@ -102,11 +107,11 @@ async function drawFace(result, canvas) {
async function drawBody(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
- ctx.fillStyle = ui.baseColor;
- ctx.strokeStyle = ui.baseColor;
- ctx.font = ui.baseFont;
- ctx.lineWidth = ui.baseLineWidth;
for (const pose of result) {
+ ctx.fillStyle = ui.baseColor;
+ ctx.strokeStyle = ui.baseColor;
+ ctx.font = ui.baseFont;
+ ctx.lineWidth = ui.baseLineWidth;
if (settings.getValue('Draw Points')) {
for (const point of pose.keypoints) {
ctx.beginPath();
@@ -164,13 +169,13 @@ async function drawBody(result, canvas) {
async function drawHand(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
- ctx.font = ui.baseFont;
- ctx.lineWidth = ui.baseLineWidth;
- window.result = result;
for (const hand of result) {
+ ctx.font = ui.baseFont;
+ ctx.lineWidth = ui.baseLineWidth;
if (settings.getValue('Draw Boxes')) {
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
+ ctx.strokeStyle = ui.baseColor;
ctx.fillStyle = ui.baseColor;
ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
ctx.fillStyle = ui.baseLabel;
@@ -398,34 +403,74 @@ async function setupCamera() {
});
}
-// eslint-disable-next-line no-unused-vars
-async function setupImage() {
- const image = document.getElementById('image');
- image.width = window.innerWidth;
- image.height = window.innerHeight;
+async function processImage(input) {
+ ui.baseColor = 'rgba(200, 255, 255, 0.5)';
+ ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
+ ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
+ ui.baseLineWidth = 16;
+ ui.baseLineHeight = 5;
+ ui.columns = 3;
+ const cfg = {
+ backend: 'webgl',
+ console: true,
+ face: {
+ enabled: true,
+ detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 },
+ mesh: { enabled: true },
+ iris: { enabled: true },
+ age: { enabled: true, skipFrames: 0 },
+ gender: { enabled: true },
+ emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true },
+ },
+ body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
+ hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 },
+ };
return new Promise((resolve) => {
- image.onload = () => resolve(image);
- image.src = 'sample.jpg';
+ const image = document.getElementById('image');
+ image.onload = async () => {
+ log('Processing image:', image.src);
+ const canvas = document.getElementById('canvas');
+ image.width = image.naturalWidth;
+ image.height = image.naturalHeight;
+ canvas.width = image.naturalWidth;
+ canvas.height = image.naturalHeight;
+ const result = await human.detect(image, cfg);
+ await drawResults(image, result, canvas);
+ const thumb = document.createElement('canvas');
+ thumb.width = window.innerWidth / (ui.columns + 0.02);
+ thumb.height = canvas.height / (window.innerWidth / thumb.width);
+ const ctx = thumb.getContext('2d');
+ ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height);
+ document.getElementById('samples').appendChild(thumb);
+ image.src = '';
+ resolve(true);
+ };
+ image.src = input;
});
}
+// eslint-disable-next-line no-unused-vars
+async function detectSampleImages() {
+ ui.baseFont = 'small-caps 3rem "Segoe UI"';
+ document.getElementById('canvas').style.display = 'none';
+ log('Running detection of sample images');
+ const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
+ for (const sample of samples) await processImage(sample);
+}
+
async function main() {
log('Human demo starting ...');
// setup ui control panel
await setupUI();
- // setup webcam
- await setupCamera();
-
- // or setup image
- // const input = await setupImage();
const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
document.getElementById('log').innerText += '\n' + msg;
log(msg);
- // run actual detection. if input is video, it will run in a loop else it will run only once
- // runHumanDetect(video, canvas);
+ // use one of the two:
+ await setupCamera();
+ // await detectSampleImages();
}
window.onload = main;
diff --git a/src/config.js b/src/config.js
index 1b4b00db..39448db0 100644
--- a/src/config.js
+++ b/src/config.js
@@ -1,65 +1,77 @@
+/* eslint-disable indent */
+/* eslint-disable no-multi-spaces */
+
export default {
- backend: 'webgl',
- console: true,
+ backend: 'webgl', // select tfjs backend to use
+ console: true, // enable debugging output to console
face: {
- enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
+ enabled: true, // controls if specified modul is enabled
+ // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
+ // (note: module is not loaded until it is required)
detector: {
- modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back
- inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back
- maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
- skipFrames: 10, // how many frames to go without running the bounding box detector
- minConfidence: 0.5, // threshold for discarding a prediction
- iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
- scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
+ // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
+ inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
+ maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
+ skipFrames: 10, // how many frames to go without re-running the face bounding box detector
+ // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
+ // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+ minConfidence: 0.5, // threshold for discarding a prediction
+ iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
},
mesh: {
enabled: true,
modelPath: '../models/facemesh/model.json',
- inputSize: 192, // fixed value
+ inputSize: 192, // fixed value
},
iris: {
enabled: true,
modelPath: '../models/iris/model.json',
- enlargeFactor: 2.3, // empiric tuning
- inputSize: 64, // fixed value
+ enlargeFactor: 2.3, // empiric tuning
+ inputSize: 64, // fixed value
},
age: {
enabled: true,
- modelPath: '../models/ssrnet-age/imdb/model.json',
- inputSize: 64, // fixed value
- skipFrames: 10,
+ modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
+ // which determines training set for model
+ inputSize: 64, // fixed value
+ skipFrames: 10, // how many frames to go without re-running the detector
},
gender: {
enabled: true,
+ minConfidence: 0.8, // threshold for discarding a prediction
modelPath: '../models/ssrnet-gender/imdb/model.json',
},
emotion: {
enabled: true,
- inputSize: 64, // fixed value
- minConfidence: 0.5,
- skipFrames: 10,
- useGrayscale: true,
+ inputSize: 64, // fixed value
+ minConfidence: 0.5, // threshold for discarding a prediction
+ skipFrames: 10, // how many frames to go without re-running the detector
+ useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},
body: {
enabled: true,
modelPath: '../models/posenet/model.json',
- inputResolution: 257, // fixed value
- outputStride: 16, // fixed value
- maxDetections: 5,
- scoreThreshold: 0.7,
- nmsRadius: 20,
+ inputResolution: 257, // fixed value
+ outputStride: 16, // fixed value
+ maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
},
hand: {
enabled: true,
- inputSize: 256, // fixed value
- skipFrames: 10,
- minConfidence: 0.5,
- iouThreshold: 0.3,
- scoreThreshold: 0.7,
- enlargeFactor: 1.65, // empiric tuning
- maxHands: 2,
+ inputSize: 256, // fixed value
+ skipFrames: 10, // how many frames to go without re-running the hand bounding box detector
+ // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
+ // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
+ minConfidence: 0.5, // threshold for discarding a prediction
+ iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
+ scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
+ enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
+ maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: {
anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json',
diff --git a/src/index.js b/src/index.js
index 1ea656b2..2456104d 100644
--- a/src/index.js
+++ b/src/index.js
@@ -84,11 +84,11 @@ async function detect(input, userConfig) {
await tf.ready();
}
// explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
- let savedWebglPackDepthwiseConvFlag;
- if (tf.getBackend() === 'webgl') {
- savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
- tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
- }
+ // let savedWebglPackDepthwiseConvFlag;
+ // if (tf.getBackend() === 'webgl') {
+ // savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
+ // tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
+ // }
// load models if enabled
if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@@ -149,6 +149,7 @@ async function detect(input, userConfig) {
annotations: face.annotations,
age: ssrData.age,
gender: ssrData.gender,
+ agConfidence: ssrData.confidence,
emotion: emotionData,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
});
@@ -157,7 +158,7 @@ async function detect(input, userConfig) {
}
// set depthwiseconv to original value
- tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
+ // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
// combine and return results
perf.total = Object.values(perf).reduce((a, b) => a + b);