From 5ecc072f0f87c80131ba582ac44d21570099dfcb Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Fri, 6 Nov 2020 15:35:58 -0500 Subject: [PATCH] model tuning --- config.js | 22 ++++++++-------- demo/browser.js | 5 ++-- demo/menu.js | 2 +- src/age/{ssrnet.js => age.js} | 0 src/gender/{ssrnet.js => gender.js} | 39 +++++++++++++++++++++++------ src/human.js | 12 ++++----- 6 files changed, 53 insertions(+), 27 deletions(-) rename src/age/{ssrnet.js => age.js} (100%) rename src/gender/{ssrnet.js => gender.js} (53%) diff --git a/config.js b/config.js index 68381f3b..0f218540 100644 --- a/config.js +++ b/config.js @@ -56,9 +56,9 @@ export default { skipFrames: 15, // how many frames to go without re-running the face bounding box detector, only used for video inputs // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression + minConfidence: 0.1, // threshold for discarding a prediction + iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression (0.1 means drop if overlap 10%) + scoreThreshold: 0.1, // threshold for deciding when to remove boxes based on score in non-maximum suppression, this is applied on detection objects only and before minConfidence }, mesh: { enabled: true, @@ -73,20 +73,22 @@ export default { }, age: { enabled: true, - modelPath: '../models/ssrnet-age-imdb.json', // can be 'imdb' or 'wiki' + modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki' // which determines training set for model inputSize: 64, // fixed value skipFrames: 15, // how many frames to go without re-running the detector, only used for video inputs }, gender: { enabled: true, - minConfidence: 0.5, // threshold for discarding a prediction - modelPath: '../models/ssrnet-gender-imdb.json', + minConfidence: 0.1, // threshold for discarding a prediction + modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki' + inputSize: 64, // fixed value + skipFrames: 15, // how many frames to go without re-running the detector, only used for video inputs }, emotion: { enabled: true, inputSize: 64, // fixed value - minConfidence: 0.5, // threshold for discarding a prediction + minConfidence: 0.2, // threshold for discarding a prediction skipFrames: 15, // how many frames to go without re-running the detector modelPath: '../models/emotion-large.json', // can be 'mini', 'large' }, @@ -106,9 +108,9 @@ export default { skipFrames: 15, // how many frames to go without re-running the hand bounding box detector, only used for video inputs // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) - minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.3, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression + minConfidence: 0.1, // threshold for discarding a prediction + iouThreshold: 0.2, // threshold for deciding whether boxes overlap too much in non-maximum suppression + scoreThreshold: 0.1, // threshold for deciding when to remove boxes based on score in non-maximum suppression enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace maxHands: 10, // maximum number of hands detected in the input, should be set to the minimum number for performance detector: { diff --git a/demo/browser.js b/demo/browser.js index f2015ba8..90277c1e 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -69,7 +69,7 @@ function drawResults(input, result, canvas) { // console.log(result.performance); // eslint-disable-next-line no-use-before-define - requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results + if (input.srcObject) requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results // draw fps chart menu.updateChart('FPS', fps); @@ -187,7 +187,7 @@ function runHumanDetect(input, canvas) { timeStamp = performance.now(); // if live video const live = input.srcObject && (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused); - if (!live) { + if (!live && input.srcObject) { // if we want to continue and camera not ready, retry in 0.5sec, else just give up if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500); else log(`camera not ready: track state: ${input.srcObject?.getVideoTracks()[0].readyState} stream state: ${input.readyState}`); @@ -317,6 +317,7 @@ function setupMenu() { }); menu.addRange('Min Confidence', human.config.face.detector, 'minConfidence', 0.0, 1.0, 0.05, (val) => { human.config.face.detector.minConfidence = parseFloat(val); + human.config.face.gender.minConfidence = parseFloat(val); human.config.face.emotion.minConfidence = parseFloat(val); human.config.hand.minConfidence = parseFloat(val); }); diff --git a/demo/menu.js b/demo/menu.js index e3758785..9375859f 100644 --- a/demo/menu.js +++ b/demo/menu.js @@ -213,7 +213,7 @@ class Menu { el.innerHTML = `${title}`; this.container.appendChild(el); el.addEventListener('change', (evt) => { - object[variable] = evt.target.value; + object[variable] = parseInt(evt.target.value) === parseFloat(evt.target.value) ? parseInt(evt.target.value) : parseFloat(evt.target.value); evt.target.setAttribute('value', evt.target.value); if (callback) callback(evt.target.value); }); diff --git a/src/age/ssrnet.js b/src/age/age.js similarity index 100% rename from src/age/ssrnet.js rename to src/age/age.js diff --git a/src/gender/ssrnet.js b/src/gender/gender.js similarity index 53% rename from src/gender/ssrnet.js rename to src/gender/gender.js index fcc5ec52..5524d18a 100644 --- a/src/gender/ssrnet.js +++ b/src/gender/gender.js @@ -4,17 +4,20 @@ const profile = require('../profile.js'); const models = {}; let last = { gender: '' }; let frame = Number.MAX_SAFE_INTEGER; +let alternative = false; // tuning values const zoom = [0, 0]; // 0..1 meaning 0%..100% +const rgb = [0.2989, 0.5870, 0.1140]; // factors for red/green/blue colors when converting to grayscale async function load(config) { if (!models.gender) models.gender = await tf.loadGraphModel(config.face.gender.modelPath); + alternative = models.gender.inputs[0].shape[3] === 1; return models.gender; } async function predict(image, config) { - if ((frame < config.face.age.skipFrames) && last.gender !== '') { + if ((frame < config.face.gender.skipFrames) && last.gender !== '') { frame += 1; return last; } @@ -26,9 +29,21 @@ async function predict(image, config) { (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1], (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], ]]; - const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]); + const resize = tf.image.cropAndResize(image, box, [0], [config.face.gender.inputSize, config.face.gender.inputSize]); + let enhance; + if (alternative) { + enhance = tf.tidy(() => { + const [red, green, blue] = tf.split(resize, 3, 3); + const redNorm = tf.mul(red, rgb[0]); + const greenNorm = tf.mul(green, rgb[1]); + const blueNorm = tf.mul(blue, rgb[2]); + const grayscale = tf.addN([redNorm, greenNorm, blueNorm]); + return grayscale.sub(0.5).mul(2); + }); + } else { + enhance = tf.mul(resize, [255.0]); + } // const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false); - const enhance = tf.mul(resize, [255.0]); tf.dispose(resize); let genderT; @@ -46,10 +61,20 @@ async function predict(image, config) { if (genderT) { const data = genderT.dataSync(); - const confidence = Math.trunc(Math.abs(1.9 * 100 * (data[0] - 0.5))) / 100; - if (confidence > config.face.gender.minConfidence) { - obj.gender = data[0] <= 0.5 ? 'female' : 'male'; - obj.confidence = confidence; + if (alternative) { + // returns two values 0..1, bigger one is prediction + const confidence = Math.trunc(100 * Math.abs(data[0] - data[1])) / 100; + if (confidence > config.face.gender.minConfidence) { + obj.gender = data[0] > data[1] ? 'female' : 'male'; + obj.confidence = confidence; + } + } else { + // returns one value 0..1, .5 is prediction threshold + const confidence = Math.trunc(200 * Math.abs((data[0] - 0.5))) / 100; + if (confidence > config.face.gender.minConfidence) { + obj.gender = data[0] <= 0.5 ? 'female' : 'male'; + obj.confidence = confidence; + } } } genderT.dispose(); diff --git a/src/human.js b/src/human.js index 13b6ecab..69406544 100644 --- a/src/human.js +++ b/src/human.js @@ -1,7 +1,7 @@ const tf = require('@tensorflow/tfjs'); const facemesh = require('./face/facemesh.js'); -const age = require('./age/ssrnet.js'); -const gender = require('./gender/ssrnet.js'); +const age = require('./age/age.js'); +const gender = require('./gender/gender.js'); const emotion = require('./emotion/emotion.js'); const posenet = require('./body/posenet.js'); const handpose = require('./hand/handpose.js'); @@ -13,8 +13,7 @@ const app = require('../package.json'); // static config override for non-video detection const override = { - face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, emotion: { skipFrames: 0 } }, - hand: { skipFrames: 0 }, + face: { detector: { skipFrames: 0 }, age: { skipFrames: 0 }, gender: { skipFrames: 0 }, emotion: { skipFrames: 0 } }, hand: { skipFrames: 0 }, }; // helper function: gets elapsed time on both browser and nodejs @@ -46,7 +45,6 @@ class Human { constructor() { this.tf = tf; this.version = app.version; - this.defaults = defaults; this.config = defaults; this.fx = null; this.state = 'idle'; @@ -114,7 +112,7 @@ class Human { async load(userConfig) { this.state = 'load'; const timeStamp = now(); - if (userConfig) this.config = mergeDeep(defaults, userConfig); + if (userConfig) this.config = mergeDeep(this.config, userConfig); if (this.firstRun) { this.checkBackend(true); @@ -300,7 +298,7 @@ class Human { let timeStamp; // update configuration - this.config = mergeDeep(defaults, userConfig); + this.config = mergeDeep(this.config, userConfig); if (!this.config.videoOptimized) this.config = mergeDeep(this.config, override); // sanity checks