updated examples plus bugfixes

pull/293/head
Vladimir Mandic 2020-10-16 15:04:51 -04:00
parent 924eb3eb25
commit c82b1698d5
4 changed files with 181 additions and 95 deletions

View File

@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!
<hr>
**Example using static image:**
![Example Using Image](demo/sample-image.jpg)
## Examples
**Example using webcam:**
![Example Using WebCam](demo/sample-video.jpg)
**Using static images:**
![Example Using Image](assets/screenshot1.jpg)
**Using webcam:**
![Example Using WebCam](assets/screenshot2.jpg)
<hr>
@ -211,17 +213,29 @@ Below is output of `human.defaults` object
Any property can be overriden by passing user object during `human.detect()`
Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration
Configurtion object is large, but typically you only need to modify few values:
- `enabled`: Choose which models to use
- `skipFrames`: Must be set to 0 for static images
- `modelPath`: Update as needed to reflect your application's relative path
```js
human.defaults = {
console: true, // enable debugging output to console
export default {
backend: 'webgl', // select tfjs backend to use
console: true, // enable debugging output to console
face: {
enabled: true, // controls if specified modul is enabled (note: module is not loaded until it is required)
enabled: true, // controls if specified modul is enabled
// face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
// note: module is not loaded until it is required
detector: {
modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back'
inputSize: 128, // 128 for tfhub and front models, 256 for back
maxFaces: 10, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
skipFrames: 10, // how many frames to skip before re-running bounding box detection
modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
// 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
skipFrames: 10, // how many frames to go without re-running the face bounding box detector
// if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
// as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
@ -229,41 +243,55 @@ human.defaults = {
mesh: {
enabled: true,
modelPath: '../models/facemesh/model.json',
inputSize: 192, // fixed value
},
iris: {
enabled: true,
modelPath: '../models/iris/model.json',
enlargeFactor: 2.3, // empiric tuning
inputSize: 64, // fixed value
},
age: {
enabled: true,
modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
skipFrames: 10, // how many frames to skip before re-running bounding box detection
// which determines training set for model
inputSize: 64, // fixed value
skipFrames: 10, // how many frames to go without re-running the detector
},
gender: {
enabled: true,
modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki'
minConfidence: 0.8, // threshold for discarding a prediction
modelPath: '../models/ssrnet-gender/imdb/model.json',
},
emotion: {
enabled: true,
inputSize: 64, // fixed value
minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to skip before re-running bounding box detection
useGrayscale: true, // convert color input to grayscale before processing or use single channels when color input is not supported
skipFrames: 10, // how many frames to go without re-running the detector
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},
body: {
enabled: true,
modelPath: '../models/posenet/model.json',
maxDetections: 5, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance
inputResolution: 257, // fixed value
outputStride: 16, // fixed value
maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
},
hand: {
enabled: true,
skipFrames: 10, // how many frames to skip before re-running bounding box detection
inputSize: 256, // fixed value
skipFrames: 10, // how many frames to go without re-running the hand bounding box detector
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
// as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: {
anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json',

View File

@ -4,9 +4,11 @@ import human from '../dist/human.esm.js';
const ui = {
baseColor: 'rgba(255, 200, 255, 0.3)',
baseLabel: 'rgba(255, 200, 255, 0.8)',
baseLabel: 'rgba(255, 200, 255, 0.9)',
baseFont: 'small-caps 1.2rem "Segoe UI"',
baseLineWidth: 16,
baseLineHeight: 2,
columns: 3,
busy: false,
facing: 'user',
};
@ -23,8 +25,8 @@ const config = {
gender: { enabled: true },
emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true },
},
body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 },
};
let settings;
let worker;
@ -49,20 +51,23 @@ const log = (...msg) => {
async function drawFace(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
ctx.strokeStyle = ui.baseColor;
ctx.font = ui.baseFont;
for (const face of result) {
ctx.font = ui.baseFont;
ctx.strokeStyle = ui.baseColor;
ctx.fillStyle = ui.baseColor;
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
if (settings.getValue('Draw Boxes')) {
ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]);
}
const labelAgeGender = `${face.gender || ''} ${face.age || ''}`;
const labelIris = face.iris ? `iris: ${face.iris}` : '';
const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : '';
// silly hack since fillText does not suport new line
const labels = [];
if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`);
if (face.age) labels.push(`Age:${face.age || ''}`);
if (face.iris) labels.push(`iris: ${face.iris}`);
if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`);
ctx.fillStyle = ui.baseLabel;
ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22);
for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight));
ctx.stroke();
ctx.lineWidth = 1;
if (face.mesh) {
@ -102,11 +107,11 @@ async function drawFace(result, canvas) {
async function drawBody(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
for (const pose of result) {
ctx.fillStyle = ui.baseColor;
ctx.strokeStyle = ui.baseColor;
ctx.font = ui.baseFont;
ctx.lineWidth = ui.baseLineWidth;
for (const pose of result) {
if (settings.getValue('Draw Points')) {
for (const point of pose.keypoints) {
ctx.beginPath();
@ -164,13 +169,13 @@ async function drawBody(result, canvas) {
async function drawHand(result, canvas) {
if (!result) return;
const ctx = canvas.getContext('2d');
for (const hand of result) {
ctx.font = ui.baseFont;
ctx.lineWidth = ui.baseLineWidth;
window.result = result;
for (const hand of result) {
if (settings.getValue('Draw Boxes')) {
ctx.lineWidth = ui.baseLineWidth;
ctx.beginPath();
ctx.strokeStyle = ui.baseColor;
ctx.fillStyle = ui.baseColor;
ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]);
ctx.fillStyle = ui.baseLabel;
@ -398,34 +403,74 @@ async function setupCamera() {
});
}
// eslint-disable-next-line no-unused-vars
async function setupImage() {
const image = document.getElementById('image');
image.width = window.innerWidth;
image.height = window.innerHeight;
async function processImage(input) {
ui.baseColor = 'rgba(200, 255, 255, 0.5)';
ui.baseLabel = 'rgba(200, 255, 255, 0.8)';
ui.baseFont = 'small-caps 3.5rem "Segoe UI"';
ui.baseLineWidth = 16;
ui.baseLineHeight = 5;
ui.columns = 3;
const cfg = {
backend: 'webgl',
console: true,
face: {
enabled: true,
detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 },
mesh: { enabled: true },
iris: { enabled: true },
age: { enabled: true, skipFrames: 0 },
gender: { enabled: true },
emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true },
},
body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 },
hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 },
};
return new Promise((resolve) => {
image.onload = () => resolve(image);
image.src = 'sample.jpg';
const image = document.getElementById('image');
image.onload = async () => {
log('Processing image:', image.src);
const canvas = document.getElementById('canvas');
image.width = image.naturalWidth;
image.height = image.naturalHeight;
canvas.width = image.naturalWidth;
canvas.height = image.naturalHeight;
const result = await human.detect(image, cfg);
await drawResults(image, result, canvas);
const thumb = document.createElement('canvas');
thumb.width = window.innerWidth / (ui.columns + 0.02);
thumb.height = canvas.height / (window.innerWidth / thumb.width);
const ctx = thumb.getContext('2d');
ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height);
document.getElementById('samples').appendChild(thumb);
image.src = '';
resolve(true);
};
image.src = input;
});
}
// eslint-disable-next-line no-unused-vars
async function detectSampleImages() {
ui.baseFont = 'small-caps 3rem "Segoe UI"';
document.getElementById('canvas').style.display = 'none';
log('Running detection of sample images');
const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg'];
for (const sample of samples) await processImage(sample);
}
async function main() {
log('Human demo starting ...');
// setup ui control panel
await setupUI();
// setup webcam
await setupCamera();
// or setup image
// const input = await setupImage();
const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`;
document.getElementById('log').innerText += '\n' + msg;
log(msg);
// run actual detection. if input is video, it will run in a loop else it will run only once
// runHumanDetect(video, canvas);
// use one of the two:
await setupCamera();
// await detectSampleImages();
}
window.onload = main;

View File

@ -1,13 +1,21 @@
/* eslint-disable indent */
/* eslint-disable no-multi-spaces */
export default {
backend: 'webgl',
console: true,
backend: 'webgl', // select tfjs backend to use
console: true, // enable debugging output to console
face: {
enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global
enabled: true, // controls if specified modul is enabled
// face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion
// (note: module is not loaded until it is required)
detector: {
modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back
inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back
modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'.
// 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces.
inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back'
maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance
skipFrames: 10, // how many frames to go without running the bounding box detector
skipFrames: 10, // how many frames to go without re-running the face bounding box detector
// if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis
// as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
@ -25,20 +33,22 @@ export default {
},
age: {
enabled: true,
modelPath: '../models/ssrnet-age/imdb/model.json',
modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki'
// which determines training set for model
inputSize: 64, // fixed value
skipFrames: 10,
skipFrames: 10, // how many frames to go without re-running the detector
},
gender: {
enabled: true,
minConfidence: 0.8, // threshold for discarding a prediction
modelPath: '../models/ssrnet-gender/imdb/model.json',
},
emotion: {
enabled: true,
inputSize: 64, // fixed value
minConfidence: 0.5,
skipFrames: 10,
useGrayscale: true,
minConfidence: 0.5, // threshold for discarding a prediction
skipFrames: 10, // how many frames to go without re-running the detector
useGrayscale: true, // convert image to grayscale before prediction or use highest channel
modelPath: '../models/emotion/model.json',
},
},
@ -47,19 +57,21 @@ export default {
modelPath: '../models/posenet/model.json',
inputResolution: 257, // fixed value
outputStride: 16, // fixed value
maxDetections: 5,
scoreThreshold: 0.7,
nmsRadius: 20,
maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression
},
hand: {
enabled: true,
inputSize: 256, // fixed value
skipFrames: 10,
minConfidence: 0.5,
iouThreshold: 0.3,
scoreThreshold: 0.7,
enlargeFactor: 1.65, // empiric tuning
maxHands: 2,
skipFrames: 10, // how many frames to go without re-running the hand bounding box detector
// if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis
// as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec)
minConfidence: 0.5, // threshold for discarding a prediction
iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression
scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression
enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace
maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance
detector: {
anchors: '../models/handdetect/anchors.json',
modelPath: '../models/handdetect/model.json',

View File

@ -84,11 +84,11 @@ async function detect(input, userConfig) {
await tf.ready();
}
// explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders
let savedWebglPackDepthwiseConvFlag;
if (tf.getBackend() === 'webgl') {
savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
}
// let savedWebglPackDepthwiseConvFlag;
// if (tf.getBackend() === 'webgl') {
// savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV');
// tf.env().set('WEBGL_PACK_DEPTHWISECONV', true);
// }
// load models if enabled
if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face);
@ -149,6 +149,7 @@ async function detect(input, userConfig) {
annotations: face.annotations,
age: ssrData.age,
gender: ssrData.gender,
agConfidence: ssrData.confidence,
emotion: emotionData,
iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0,
});
@ -157,7 +158,7 @@ async function detect(input, userConfig) {
}
// set depthwiseconv to original value
tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
// tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag);
// combine and return results
perf.total = Object.values(perf).reduce((a, b) => a + b);