From c82b1698d533eeaf8813f554b464ca64901f2ae7 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 16 Oct 2020 15:04:51 -0400 Subject: [PATCH] updated examples plus bugfixes --- README.md | 82 ++++++++++++++++++++++++------------ demo/demo-esm.js | 105 +++++++++++++++++++++++++++++++++-------------- src/config.js | 76 +++++++++++++++++++--------------- src/index.js | 13 +++--- 4 files changed, 181 insertions(+), 95 deletions(-) diff --git a/README.md b/README.md index 050e6b8f..9be06d20 100644 --- a/README.md +++ b/README.md @@ -16,11 +16,13 @@ Compatible with Browser, WebWorker and NodeJS execution!
-**Example using static image:** -![Example Using Image](demo/sample-image.jpg) +## Examples -**Example using webcam:** -![Example Using WebCam](demo/sample-video.jpg) +**Using static images:** +![Example Using Image](assets/screenshot1.jpg) + +**Using webcam:** +![Example Using WebCam](assets/screenshot2.jpg)
@@ -211,59 +213,85 @@ Below is output of `human.defaults` object Any property can be overriden by passing user object during `human.detect()` Note that user object and default configuration are merged using deep-merge, so you do not need to redefine entire configuration +Configurtion object is large, but typically you only need to modify few values: + +- `enabled`: Choose which models to use +- `skipFrames`: Must be set to 0 for static images +- `modelPath`: Update as needed to reflect your application's relative path + + ```js -human.defaults = { - console: true, // enable debugging output to console - backend: 'webgl', // select tfjs backend to use +export default { + backend: 'webgl', // select tfjs backend to use + console: true, // enable debugging output to console face: { - enabled: true, // controls if specified modul is enabled (note: module is not loaded until it is required) + enabled: true, // controls if specified modul is enabled + // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion + // note: module is not loaded until it is required detector: { - modelPath: '../models/blazeface/tfhub/model.json', // can be 'tfhub', 'front' or 'back' - inputSize: 128, // 128 for tfhub and front models, 256 for back - maxFaces: 10, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance - skipFrames: 10, // how many frames to skip before re-running bounding box detection - minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'. + // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces. + inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back' + maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance + skipFrames: 10, // how many frames to go without re-running the face bounding box detector + // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis + // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + minConfidence: 0.5, // threshold for discarding a prediction + iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression }, mesh: { enabled: true, modelPath: '../models/facemesh/model.json', + inputSize: 192, // fixed value }, iris: { enabled: true, modelPath: '../models/iris/model.json', + enlargeFactor: 2.3, // empiric tuning + inputSize: 64, // fixed value }, age: { enabled: true, modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki' - skipFrames: 10, // how many frames to skip before re-running bounding box detection + // which determines training set for model + inputSize: 64, // fixed value + skipFrames: 10, // how many frames to go without re-running the detector }, gender: { enabled: true, - modelPath: '../models/ssrnet-gender/imdb/model.json', // can be 'imdb' or 'wiki' + minConfidence: 0.8, // threshold for discarding a prediction + modelPath: '../models/ssrnet-gender/imdb/model.json', }, emotion: { enabled: true, - minConfidence: 0.5, // threshold for discarding a prediction - skipFrames: 10, // how many frames to skip before re-running bounding box detection - useGrayscale: true, // convert color input to grayscale before processing or use single channels when color input is not supported + inputSize: 64, // fixed value + minConfidence: 0.5, // threshold for discarding a prediction + skipFrames: 10, // how many frames to go without re-running the detector + useGrayscale: true, // convert image to grayscale before prediction or use highest channel modelPath: '../models/emotion/model.json', }, }, body: { enabled: true, modelPath: '../models/posenet/model.json', - maxDetections: 5, // how many faces are we trying to analyze. limiting number in busy scenes will result in higher performance - scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression - nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression + inputResolution: 257, // fixed value + outputStride: 16, // fixed value + maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression }, hand: { enabled: true, - skipFrames: 10, // how many frames to skip before re-running bounding box detection - minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + inputSize: 256, // fixed value + skipFrames: 10, // how many frames to go without re-running the hand bounding box detector + // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis + // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + minConfidence: 0.5, // threshold for discarding a prediction + iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace + maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance detector: { anchors: '../models/handdetect/anchors.json', modelPath: '../models/handdetect/model.json', diff --git a/demo/demo-esm.js b/demo/demo-esm.js index 73b64bca..7a505050 100644 --- a/demo/demo-esm.js +++ b/demo/demo-esm.js @@ -4,9 +4,11 @@ import human from '../dist/human.esm.js'; const ui = { baseColor: 'rgba(255, 200, 255, 0.3)', - baseLabel: 'rgba(255, 200, 255, 0.8)', + baseLabel: 'rgba(255, 200, 255, 0.9)', baseFont: 'small-caps 1.2rem "Segoe UI"', baseLineWidth: 16, + baseLineHeight: 2, + columns: 3, busy: false, facing: 'user', }; @@ -23,8 +25,8 @@ const config = { gender: { enabled: true }, emotion: { enabled: true, minConfidence: 0.5, useGrayscale: true }, }, - body: { enabled: false, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 }, - hand: { enabled: false, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 }, + body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 }, + hand: { enabled: true, skipFrames: 10, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7 }, }; let settings; let worker; @@ -49,20 +51,23 @@ const log = (...msg) => { async function drawFace(result, canvas) { if (!result) return; const ctx = canvas.getContext('2d'); - ctx.strokeStyle = ui.baseColor; - ctx.font = ui.baseFont; for (const face of result) { + ctx.font = ui.baseFont; + ctx.strokeStyle = ui.baseColor; ctx.fillStyle = ui.baseColor; ctx.lineWidth = ui.baseLineWidth; ctx.beginPath(); if (settings.getValue('Draw Boxes')) { ctx.rect(face.box[0], face.box[1], face.box[2], face.box[3]); } - const labelAgeGender = `${face.gender || ''} ${face.age || ''}`; - const labelIris = face.iris ? `iris: ${face.iris}` : ''; - const labelEmotion = face.emotion && face.emotion[0] ? `emotion: ${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}` : ''; + // silly hack since fillText does not suport new line + const labels = []; + if (face.agConfidence) labels.push(`${Math.trunc(100 * face.agConfidence)}% ${face.gender || ''}`); + if (face.age) labels.push(`Age:${face.age || ''}`); + if (face.iris) labels.push(`iris: ${face.iris}`); + if (face.emotion && face.emotion[0]) labels.push(`${Math.trunc(100 * face.emotion[0].score)}% ${face.emotion[0].emotion}`); ctx.fillStyle = ui.baseLabel; - ctx.fillText(`${Math.trunc(100 * face.confidence)}% face ${labelAgeGender} ${labelIris} ${labelEmotion}`, face.box[0] + 2, face.box[1] + 22); + for (const i in labels) ctx.fillText(labels[i], face.box[0] + 6, face.box[1] + 24 + ((i + 1) * ui.baseLineHeight)); ctx.stroke(); ctx.lineWidth = 1; if (face.mesh) { @@ -102,11 +107,11 @@ async function drawFace(result, canvas) { async function drawBody(result, canvas) { if (!result) return; const ctx = canvas.getContext('2d'); - ctx.fillStyle = ui.baseColor; - ctx.strokeStyle = ui.baseColor; - ctx.font = ui.baseFont; - ctx.lineWidth = ui.baseLineWidth; for (const pose of result) { + ctx.fillStyle = ui.baseColor; + ctx.strokeStyle = ui.baseColor; + ctx.font = ui.baseFont; + ctx.lineWidth = ui.baseLineWidth; if (settings.getValue('Draw Points')) { for (const point of pose.keypoints) { ctx.beginPath(); @@ -164,13 +169,13 @@ async function drawBody(result, canvas) { async function drawHand(result, canvas) { if (!result) return; const ctx = canvas.getContext('2d'); - ctx.font = ui.baseFont; - ctx.lineWidth = ui.baseLineWidth; - window.result = result; for (const hand of result) { + ctx.font = ui.baseFont; + ctx.lineWidth = ui.baseLineWidth; if (settings.getValue('Draw Boxes')) { ctx.lineWidth = ui.baseLineWidth; ctx.beginPath(); + ctx.strokeStyle = ui.baseColor; ctx.fillStyle = ui.baseColor; ctx.rect(hand.box[0], hand.box[1], hand.box[2], hand.box[3]); ctx.fillStyle = ui.baseLabel; @@ -398,34 +403,74 @@ async function setupCamera() { }); } -// eslint-disable-next-line no-unused-vars -async function setupImage() { - const image = document.getElementById('image'); - image.width = window.innerWidth; - image.height = window.innerHeight; +async function processImage(input) { + ui.baseColor = 'rgba(200, 255, 255, 0.5)'; + ui.baseLabel = 'rgba(200, 255, 255, 0.8)'; + ui.baseFont = 'small-caps 3.5rem "Segoe UI"'; + ui.baseLineWidth = 16; + ui.baseLineHeight = 5; + ui.columns = 3; + const cfg = { + backend: 'webgl', + console: true, + face: { + enabled: true, + detector: { maxFaces: 10, skipFrames: 0, minConfidence: 0.1, iouThreshold: 0.3, scoreThreshold: 0.3 }, + mesh: { enabled: true }, + iris: { enabled: true }, + age: { enabled: true, skipFrames: 0 }, + gender: { enabled: true }, + emotion: { enabled: true, minConfidence: 0.1, useGrayscale: true }, + }, + body: { enabled: true, maxDetections: 10, scoreThreshold: 0.7, nmsRadius: 20 }, + hand: { enabled: true, skipFrames: 0, minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.5 }, + }; return new Promise((resolve) => { - image.onload = () => resolve(image); - image.src = 'sample.jpg'; + const image = document.getElementById('image'); + image.onload = async () => { + log('Processing image:', image.src); + const canvas = document.getElementById('canvas'); + image.width = image.naturalWidth; + image.height = image.naturalHeight; + canvas.width = image.naturalWidth; + canvas.height = image.naturalHeight; + const result = await human.detect(image, cfg); + await drawResults(image, result, canvas); + const thumb = document.createElement('canvas'); + thumb.width = window.innerWidth / (ui.columns + 0.02); + thumb.height = canvas.height / (window.innerWidth / thumb.width); + const ctx = thumb.getContext('2d'); + ctx.drawImage(canvas, 0, 0, canvas.width, canvas.height, 0, 0, thumb.width, thumb.height); + document.getElementById('samples').appendChild(thumb); + image.src = ''; + resolve(true); + }; + image.src = input; }); } +// eslint-disable-next-line no-unused-vars +async function detectSampleImages() { + ui.baseFont = 'small-caps 3rem "Segoe UI"'; + document.getElementById('canvas').style.display = 'none'; + log('Running detection of sample images'); + const samples = ['../assets/sample1.jpg', '../assets/sample2.jpg', '../assets/sample3.jpg', '../assets/sample4.jpg', '../assets/sample5.jpg', '../assets/sample6.jpg']; + for (const sample of samples) await processImage(sample); +} + async function main() { log('Human demo starting ...'); // setup ui control panel await setupUI(); - // setup webcam - await setupCamera(); - - // or setup image - // const input = await setupImage(); const msg = `Human ready: version: ${human.version} TensorFlow/JS version: ${human.tf.version_core}`; document.getElementById('log').innerText += '\n' + msg; log(msg); - // run actual detection. if input is video, it will run in a loop else it will run only once - // runHumanDetect(video, canvas); + // use one of the two: + await setupCamera(); + // await detectSampleImages(); } window.onload = main; diff --git a/src/config.js b/src/config.js index 1b4b00db..39448db0 100644 --- a/src/config.js +++ b/src/config.js @@ -1,65 +1,77 @@ +/* eslint-disable indent */ +/* eslint-disable no-multi-spaces */ + export default { - backend: 'webgl', - console: true, + backend: 'webgl', // select tfjs backend to use + console: true, // enable debugging output to console face: { - enabled: true, // refers to detector, but since all other face modules rely on detector, it should be a global + enabled: true, // controls if specified modul is enabled + // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion + // (note: module is not loaded until it is required) detector: { - modelPath: '../models/blazeface/back/model.json', // can be blazeface-front or blazeface-back - inputSize: 256, // fixed value: 128 for front and tfhub and 256 for back - maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance - skipFrames: 10, // how many frames to go without running the bounding box detector - minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + modelPath: '../models/blazeface/back/model.json', // can be 'tfhub', 'front' or 'back'. + // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces. + inputSize: 256, // fixed value: 128 for front and 'tfhub' and 'front' and 256 for 'back' + maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance + skipFrames: 10, // how many frames to go without re-running the face bounding box detector + // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis + // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + minConfidence: 0.5, // threshold for discarding a prediction + iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression }, mesh: { enabled: true, modelPath: '../models/facemesh/model.json', - inputSize: 192, // fixed value + inputSize: 192, // fixed value }, iris: { enabled: true, modelPath: '../models/iris/model.json', - enlargeFactor: 2.3, // empiric tuning - inputSize: 64, // fixed value + enlargeFactor: 2.3, // empiric tuning + inputSize: 64, // fixed value }, age: { enabled: true, - modelPath: '../models/ssrnet-age/imdb/model.json', - inputSize: 64, // fixed value - skipFrames: 10, + modelPath: '../models/ssrnet-age/imdb/model.json', // can be 'imdb' or 'wiki' + // which determines training set for model + inputSize: 64, // fixed value + skipFrames: 10, // how many frames to go without re-running the detector }, gender: { enabled: true, + minConfidence: 0.8, // threshold for discarding a prediction modelPath: '../models/ssrnet-gender/imdb/model.json', }, emotion: { enabled: true, - inputSize: 64, // fixed value - minConfidence: 0.5, - skipFrames: 10, - useGrayscale: true, + inputSize: 64, // fixed value + minConfidence: 0.5, // threshold for discarding a prediction + skipFrames: 10, // how many frames to go without re-running the detector + useGrayscale: true, // convert image to grayscale before prediction or use highest channel modelPath: '../models/emotion/model.json', }, }, body: { enabled: true, modelPath: '../models/posenet/model.json', - inputResolution: 257, // fixed value - outputStride: 16, // fixed value - maxDetections: 5, - scoreThreshold: 0.7, - nmsRadius: 20, + inputResolution: 257, // fixed value + outputStride: 16, // fixed value + maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression }, hand: { enabled: true, - inputSize: 256, // fixed value - skipFrames: 10, - minConfidence: 0.5, - iouThreshold: 0.3, - scoreThreshold: 0.7, - enlargeFactor: 1.65, // empiric tuning - maxHands: 2, + inputSize: 256, // fixed value + skipFrames: 10, // how many frames to go without re-running the hand bounding box detector + // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis + // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + minConfidence: 0.5, // threshold for discarding a prediction + iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression + scoreThreshold: 0.7, // threshold for deciding when to remove boxes based on score in non-maximum suppression + enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace + maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance detector: { anchors: '../models/handdetect/anchors.json', modelPath: '../models/handdetect/model.json', diff --git a/src/index.js b/src/index.js index 1ea656b2..2456104d 100644 --- a/src/index.js +++ b/src/index.js @@ -84,11 +84,11 @@ async function detect(input, userConfig) { await tf.ready(); } // explictly enable depthwiseconv since it's diasabled by default due to issues with large shaders - let savedWebglPackDepthwiseConvFlag; - if (tf.getBackend() === 'webgl') { - savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV'); - tf.env().set('WEBGL_PACK_DEPTHWISECONV', true); - } + // let savedWebglPackDepthwiseConvFlag; + // if (tf.getBackend() === 'webgl') { + // savedWebglPackDepthwiseConvFlag = tf.env().get('WEBGL_PACK_DEPTHWISECONV'); + // tf.env().set('WEBGL_PACK_DEPTHWISECONV', true); + // } // load models if enabled if (config.face.enabled && !models.facemesh) models.facemesh = await facemesh.load(config.face); @@ -149,6 +149,7 @@ async function detect(input, userConfig) { annotations: face.annotations, age: ssrData.age, gender: ssrData.gender, + agConfidence: ssrData.confidence, emotion: emotionData, iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, }); @@ -157,7 +158,7 @@ async function detect(input, userConfig) { } // set depthwiseconv to original value - tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag); + // tf.env().set('WEBGL_PACK_DEPTHWISECONV', savedWebglPackDepthwiseConvFlag); // combine and return results perf.total = Object.values(perf).reduce((a, b) => a + b);