diff --git a/config.js b/config.js index c49c1999..32768142 100644 --- a/config.js +++ b/config.js @@ -5,20 +5,27 @@ export default { backend: 'webgl', // select tfjs backend to use console: true, // enable debugging output to console async: true, // execute enabled models in parallel - // this disables per-model performance data but slightly increases performance + // this disables per-model performance data but + // slightly increases performance // cannot be used if profiling is enabled profile: false, // enable tfjs profiling - // this has significant performance impact, only enable for debugging purposes + // this has significant performance impact + // only enable for debugging purposes // currently only implemented for age,gender,emotion models deallocate: false, // aggresively deallocate gpu memory after each usage - // only valid for webgl backend and only during first call, cannot be changed unless library is reloaded - // this has significant performance impact, only enable on low-memory devices + // only valid for webgl backend and only during first call + // cannot be changed unless library is reloaded + // this has significant performance impact + // only enable on low-memory devices scoped: false, // enable scoped runs - // some models *may* have memory leaks, this wrapps everything in a local scope at a cost of performance + // some models *may* have memory leaks, + // this wrapps everything in a local scope at a cost of performance // typically not needed - videoOptimized: true, // perform additional optimizations when input is video, must be disabled for images + videoOptimized: true, // perform additional optimizations when input is video, + // must be disabled for images // basically this skips object box boundary detection for every n frames // while maintaining in-box detection since objects cannot move that fast + filter: { enabled: true, // enable image pre-processing filters width: 0, // resize input width @@ -41,50 +48,67 @@ export default { polaroid: false, // image polaroid camera effect pixelate: 0, // range: 0 (no pixelate) to N (number of pixels to pixelate) }, + gesture: { enabled: true, // enable simple gesture recognition }, + face: { enabled: true, // controls if specified modul is enabled - // face.enabled is required for all face models: detector, mesh, iris, age, gender, emotion + // face.enabled is required for all face models: + // detector, mesh, iris, age, gender, emotion // (note: module is not loaded until it is required) detector: { modelPath: '../models/blazeface-back.json', // can be 'front' or 'back'. - // 'front' is optimized for large faces such as front-facing camera and 'back' is optimized for distanct faces. + // 'front' is optimized for large faces + // such as front-facing camera and + // 'back' is optimized for distanct faces. inputSize: 256, // fixed value: 128 for front and 256 for 'back' - maxFaces: 10, // maximum number of faces detected in the input, should be set to the minimum number for performance - skipFrames: 15, // how many frames to go without re-running the face bounding box detector, only used for video inputs - // if model is running st 25 FPS, we can re-use existing bounding box for updated face mesh analysis - // as face probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + maxFaces: 10, // maximum number of faces detected in the input + // should be set to the minimum number for performance + skipFrames: 15, // how many frames to go without re-running the face bounding box detector + // only used for video inputs + // e.g., if model is running st 25 FPS, we can re-use existing bounding + // box for updated face analysis as the head probably hasn't moved much + // in short time (10 * 1/25 = 0.25 sec) minConfidence: 0.1, // threshold for discarding a prediction - iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression (0.1 means drop if overlap 10%) - scoreThreshold: 0.2, // threshold for deciding when to remove boxes based on score in non-maximum suppression, this is applied on detection objects only and before minConfidence + iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in + // non-maximum suppression (0.1 means drop if overlap 10%) + scoreThreshold: 0.2, // threshold for deciding when to remove boxes based on score + // in non-maximum suppression, + // this is applied on detection objects only and before minConfidence }, + mesh: { enabled: true, modelPath: '../models/facemesh.json', inputSize: 192, // fixed value }, + iris: { enabled: true, modelPath: '../models/iris.json', - enlargeFactor: 2.3, // empiric tuning inputSize: 64, // fixed value }, + age: { enabled: true, modelPath: '../models/age-ssrnet-imdb.json', // can be 'age-ssrnet-imdb' or 'age-ssrnet-wiki' // which determines training set for model inputSize: 64, // fixed value - skipFrames: 15, // how many frames to go without re-running the detector, only used for video inputs + skipFrames: 15, // how many frames to go without re-running the detector + // only used for video inputs }, + gender: { enabled: true, minConfidence: 0.1, // threshold for discarding a prediction modelPath: '../models/gender-ssrnet-imdb.json', // can be 'gender', 'gender-ssrnet-imdb' or 'gender-ssrnet-wiki' inputSize: 64, // fixed value - skipFrames: 15, // how many frames to go without re-running the detector, only used for video inputs + skipFrames: 15, // how many frames to go without re-running the detector + // only used for video inputs }, + emotion: { enabled: true, inputSize: 64, // fixed value @@ -93,26 +117,33 @@ export default { modelPath: '../models/emotion-large.json', // can be 'mini', 'large' }, }, + body: { enabled: true, modelPath: '../models/posenet.json', inputResolution: 257, // fixed value - outputStride: 16, // fixed value - maxDetections: 10, // maximum number of people detected in the input, should be set to the minimum number for performance - scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression + maxDetections: 10, // maximum number of people detected in the input + // should be set to the minimum number for performance + scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score + // in non-maximum suppression nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression }, + hand: { enabled: true, inputSize: 256, // fixed value - skipFrames: 15, // how many frames to go without re-running the hand bounding box detector, only used for video inputs - // if model is running st 25 FPS, we can re-use existing bounding box for updated hand skeleton analysis - // as the hand probably hasn't moved much in short time (10 * 1/25 = 0.25 sec) + skipFrames: 15, // how many frames to go without re-running the hand bounding box detector + // only used for video inputs + // e.g., if model is running st 25 FPS, we can re-use existing bounding + // box for updated hand skeleton analysis as the hand probably + // hasn't moved much in short time (10 * 1/25 = 0.25 sec) minConfidence: 0.5, // threshold for discarding a prediction - iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much in non-maximum suppression - scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on score in non-maximum suppression - enlargeFactor: 1.65, // empiric tuning as skeleton prediction prefers hand box with some whitespace - maxHands: 1, // maximum number of hands detected in the input, should be set to the minimum number for performance + iouThreshold: 0.1, // threshold for deciding whether boxes overlap too much + // in non-maximum suppression + scoreThreshold: 0.8, // threshold for deciding when to remove boxes based on + // score in non-maximum suppression + maxHands: 1, // maximum number of hands detected in the input + // should be set to the minimum number for performance landmarks: true, // detect hand landmarks or just hand boundary box detector: { modelPath: '../models/handdetect.json', diff --git a/demo/browser.js b/demo/browser.js index a5e436e1..2e5a0d7b 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -12,6 +12,7 @@ const ui = { baseFontProto: 'small-caps {size} "Segoe UI"', baseLineWidth: 12, baseLineHeightProto: 2, + crop: true, columns: 2, busy: false, facing: true, @@ -21,7 +22,7 @@ const ui = { drawBoxes: true, drawPoints: false, drawPolygons: true, - fillPolygons: true, + fillPolygons: false, useDepth: true, console: true, maxFrames: 10, @@ -132,7 +133,7 @@ async function setupCamera() { audio: false, video: { facingMode: (ui.facing ? 'user' : 'environment'), - resizeMode: 'none', + resizeMode: ui.crop ? 'crop-and-scale' : 'none', width: { ideal: window.innerWidth }, height: { ideal: window.innerHeight }, }, @@ -206,7 +207,8 @@ function runHumanDetect(input, canvas) { const live = input.srcObject && (input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState > 2) && (!input.paused); if (!live && input.srcObject) { // if we want to continue and camera not ready, retry in 0.5sec, else just give up - if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500); + if (input.paused) log('camera paused'); + else if ((input.srcObject.getVideoTracks()[0].readyState === 'live') && (input.readyState <= 2)) setTimeout(() => runHumanDetect(input, canvas), 500); else log(`camera not ready: track state: ${input.srcObject?.getVideoTracks()[0].readyState} stream state: ${input.readyState}`); return; } @@ -223,7 +225,6 @@ function runHumanDetect(input, canvas) { human.detect(input).then((result) => { if (result.error) log(result.error); else drawResults(input, result, canvas); - if (human.config.profile) log('profile data:', human.profile()); }); } } @@ -300,48 +301,48 @@ function setupMenu() { document.getElementById('play').addEventListener('click', () => btn.click()); menu.addHTML('
'); - menu.addList('Backend', ['cpu', 'webgl', 'wasm', 'webgpu'], human.config.backend, (val) => human.config.backend = val); - menu.addBool('Async Operations', human.config, 'async', (val) => human.config.async = val); - menu.addBool('Enable Profiler', human.config, 'profile', (val) => human.config.profile = val); - menu.addBool('Memory Shield', human.config, 'deallocate', (val) => human.config.deallocate = val); - menu.addBool('Use Web Worker', ui, 'useWorker'); + menu.addList('backend', ['cpu', 'webgl', 'wasm', 'webgpu'], human.config.backend, (val) => human.config.backend = val); + menu.addBool('async operations', human.config, 'async', (val) => human.config.async = val); + menu.addBool('enable profiler', human.config, 'profile', (val) => human.config.profile = val); + menu.addBool('memory shield', human.config, 'deallocate', (val) => human.config.deallocate = val); + menu.addBool('use web worker', ui, 'useWorker'); menu.addHTML('
'); - menu.addLabel('Enabled Models'); - menu.addBool('Face Detect', human.config.face, 'enabled'); - menu.addBool('Face Mesh', human.config.face.mesh, 'enabled'); - menu.addBool('Face Iris', human.config.face.iris, 'enabled'); - menu.addBool('Face Age', human.config.face.age, 'enabled'); - menu.addBool('Face Gender', human.config.face.gender, 'enabled'); - menu.addBool('Face Emotion', human.config.face.emotion, 'enabled'); - menu.addBool('Body Pose', human.config.body, 'enabled'); - menu.addBool('Hand Pose', human.config.hand, 'enabled'); - menu.addBool('Gesture Analysis', human.config.gesture, 'enabled'); + menu.addLabel('enabled models'); + menu.addBool('face detect', human.config.face, 'enabled'); + menu.addBool('face mesh', human.config.face.mesh, 'enabled'); + menu.addBool('face iris', human.config.face.iris, 'enabled'); + menu.addBool('face age', human.config.face.age, 'enabled'); + menu.addBool('face gender', human.config.face.gender, 'enabled'); + menu.addBool('face emotion', human.config.face.emotion, 'enabled'); + menu.addBool('body pose', human.config.body, 'enabled'); + menu.addBool('hand pose', human.config.hand, 'enabled'); + menu.addBool('gesture analysis', human.config.gesture, 'enabled'); menu.addHTML('
'); - menu.addLabel('Model Parameters'); - menu.addRange('Max Objects', human.config.face.detector, 'maxFaces', 1, 50, 1, (val) => { + menu.addLabel('model parameters'); + menu.addRange('max objects', human.config.face.detector, 'maxFaces', 1, 50, 1, (val) => { human.config.face.detector.maxFaces = parseInt(val); human.config.body.maxDetections = parseInt(val); human.config.hand.maxHands = parseInt(val); }); - menu.addRange('Skip Frames', human.config.face.detector, 'skipFrames', 0, 50, 1, (val) => { + menu.addRange('skip frames', human.config.face.detector, 'skipFrames', 0, 50, 1, (val) => { human.config.face.detector.skipFrames = parseInt(val); human.config.face.emotion.skipFrames = parseInt(val); human.config.face.age.skipFrames = parseInt(val); human.config.hand.skipFrames = parseInt(val); }); - menu.addRange('Min Confidence', human.config.face.detector, 'minConfidence', 0.0, 1.0, 0.05, (val) => { + menu.addRange('min confidence', human.config.face.detector, 'minConfidence', 0.0, 1.0, 0.05, (val) => { human.config.face.detector.minConfidence = parseFloat(val); human.config.face.gender.minConfidence = parseFloat(val); human.config.face.emotion.minConfidence = parseFloat(val); human.config.hand.minConfidence = parseFloat(val); }); - menu.addRange('Score Threshold', human.config.face.detector, 'scoreThreshold', 0.1, 1.0, 0.05, (val) => { + menu.addRange('score threshold', human.config.face.detector, 'scoreThreshold', 0.1, 1.0, 0.05, (val) => { human.config.face.detector.scoreThreshold = parseFloat(val); human.config.hand.scoreThreshold = parseFloat(val); human.config.body.scoreThreshold = parseFloat(val); }); - menu.addRange('IOU Threshold', human.config.face.detector, 'iouThreshold', 0.1, 1.0, 0.05, (val) => { + menu.addRange('overlap', human.config.face.detector, 'iouThreshold', 0.1, 1.0, 0.05, (val) => { human.config.face.detector.iouThreshold = parseFloat(val); human.config.hand.iouThreshold = parseFloat(val); }); @@ -350,31 +351,32 @@ function setupMenu() { menu.addChart('FPS', 'FPS'); menuFX = new Menu(document.body, '...', { top: '1rem', right: '18rem' }); - menuFX.addLabel('UI Options'); - menuFX.addBool('Camera Front/Back', ui, 'facing', () => setupCamera()); - menuFX.addBool('Use 3D Depth', ui, 'useDepth'); - menuFX.addBool('Draw Boxes', ui, 'drawBoxes'); - menuFX.addBool('Draw Points', ui, 'drawPoints'); - menuFX.addBool('Draw Polygons', ui, 'drawPolygons'); + menuFX.addLabel('ui options'); + menuFX.addBool('crop & scale', ui, 'crop', () => setupCamera()); + menuFX.addBool('camera front/back', ui, 'facing', () => setupCamera()); + menuFX.addBool('use 3D depth', ui, 'useDepth'); + menuFX.addBool('draw boxes', ui, 'drawBoxes'); + menuFX.addBool('draw polygons', ui, 'drawPolygons'); menuFX.addBool('Fill Polygons', ui, 'fillPolygons'); + menuFX.addBool('draw points', ui, 'drawPoints'); menuFX.addHTML('
'); - menuFX.addLabel('Image Processing'); - menuFX.addBool('Enabled', human.config.filter, 'enabled'); - ui.menuWidth = menuFX.addRange('Image width', human.config.filter, 'width', 0, 3840, 10, (val) => human.config.filter.width = parseInt(val)); - ui.menuHeight = menuFX.addRange('Image height', human.config.filter, 'height', 0, 2160, 10, (val) => human.config.filter.height = parseInt(val)); - menuFX.addRange('Brightness', human.config.filter, 'brightness', -1.0, 1.0, 0.05, (val) => human.config.filter.brightness = parseFloat(val)); - menuFX.addRange('Contrast', human.config.filter, 'contrast', -1.0, 1.0, 0.05, (val) => human.config.filter.contrast = parseFloat(val)); - menuFX.addRange('Sharpness', human.config.filter, 'sharpness', 0, 1.0, 0.05, (val) => human.config.filter.sharpness = parseFloat(val)); - menuFX.addRange('Blur', human.config.filter, 'blur', 0, 20, 1, (val) => human.config.filter.blur = parseInt(val)); - menuFX.addRange('Saturation', human.config.filter, 'saturation', -1.0, 1.0, 0.05, (val) => human.config.filter.saturation = parseFloat(val)); - menuFX.addRange('Hue', human.config.filter, 'hue', 0, 360, 5, (val) => human.config.filter.hue = parseInt(val)); - menuFX.addRange('Pixelate', human.config.filter, 'pixelate', 0, 32, 1, (val) => human.config.filter.pixelate = parseInt(val)); - menuFX.addBool('Negative', human.config.filter, 'negative'); - menuFX.addBool('Sepia', human.config.filter, 'sepia'); - menuFX.addBool('Vintage', human.config.filter, 'vintage'); - menuFX.addBool('Kodachrome', human.config.filter, 'kodachrome'); - menuFX.addBool('Technicolor', human.config.filter, 'technicolor'); - menuFX.addBool('Polaroid', human.config.filter, 'polaroid'); + menuFX.addLabel('image processing'); + menuFX.addBool('enabled', human.config.filter, 'enabled'); + ui.menuWidth = menuFX.addRange('image width', human.config.filter, 'width', 0, 3840, 10, (val) => human.config.filter.width = parseInt(val)); + ui.menuHeight = menuFX.addRange('image height', human.config.filter, 'height', 0, 2160, 10, (val) => human.config.filter.height = parseInt(val)); + menuFX.addRange('brightness', human.config.filter, 'brightness', -1.0, 1.0, 0.05, (val) => human.config.filter.brightness = parseFloat(val)); + menuFX.addRange('contrast', human.config.filter, 'contrast', -1.0, 1.0, 0.05, (val) => human.config.filter.contrast = parseFloat(val)); + menuFX.addRange('sharpness', human.config.filter, 'sharpness', 0, 1.0, 0.05, (val) => human.config.filter.sharpness = parseFloat(val)); + menuFX.addRange('blur', human.config.filter, 'blur', 0, 20, 1, (val) => human.config.filter.blur = parseInt(val)); + menuFX.addRange('saturation', human.config.filter, 'saturation', -1.0, 1.0, 0.05, (val) => human.config.filter.saturation = parseFloat(val)); + menuFX.addRange('hue', human.config.filter, 'hue', 0, 360, 5, (val) => human.config.filter.hue = parseInt(val)); + menuFX.addRange('pixelate', human.config.filter, 'pixelate', 0, 32, 1, (val) => human.config.filter.pixelate = parseInt(val)); + menuFX.addBool('negative', human.config.filter, 'negative'); + menuFX.addBool('sepia', human.config.filter, 'sepia'); + menuFX.addBool('vintage', human.config.filter, 'vintage'); + menuFX.addBool('kodachrome', human.config.filter, 'kodachrome'); + menuFX.addBool('technicolor', human.config.filter, 'technicolor'); + menuFX.addBool('polaroid', human.config.filter, 'polaroid'); } async function main() { diff --git a/demo/menu.js b/demo/menu.js index a5fb0b51..7067e89e 100644 --- a/demo/menu.js +++ b/demo/menu.js @@ -29,7 +29,7 @@ function createCSS() { .menu-item { display: flex; white-space: nowrap; padding: 0.2rem; width: max-content; cursor: default; } .menu-title { text-align: right; cursor: pointer; } .menu-hr { margin: 0.2rem; border: 1px solid rgba(0, 0, 0, 0.5) } - .menu-label { padding: 0; } + .menu-label { padding: 0; font-weight: 800; } .menu-list { margin-right: 0.8rem; } select:focus { outline: none; } diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js index b7ed811d..771aca50 100644 --- a/src/emotion/emotion.js +++ b/src/emotion/emotion.js @@ -58,7 +58,7 @@ async function predict(image, config) { data = emotionT.dataSync(); tf.dispose(emotionT); } else { - const profileData = await tf.profile(() => models.emotion.predict(grayscale)); + const profileData = await tf.profile(() => models.emotion.predict(normalize)); data = profileData.result.dataSync(); profileData.result.dispose(); profile.run('emotion', profileData); diff --git a/src/hand/handdetector.js b/src/hand/handdetector.js index 66d57628..5170f93c 100644 --- a/src/hand/handdetector.js +++ b/src/hand/handdetector.js @@ -57,6 +57,7 @@ class HandDetector { rawBoxes.dispose(); const filteredT = await tf.image.nonMaxSuppressionAsync(boxes, scores, config.maxHands, config.iouThreshold, config.scoreThreshold); const filtered = filteredT.arraySync(); + scores.dispose(); filteredT.dispose(); const hands = []; diff --git a/src/human.js b/src/human.js index 4ea797d0..b23fa91d 100644 --- a/src/human.js +++ b/src/human.js @@ -134,12 +134,12 @@ class Human { this.models.gender || gender.load(this.config), this.models.emotion || emotion.load(this.config), this.models.facemesh || facemesh.load(this.config.face), - this.models.posenet || posenet.load(this.config.body), + this.models.posenet || posenet.load(this.config), this.models.handpose || handpose.load(this.config.hand), ]); } else { if (this.config.face.enabled && !this.models.facemesh) this.models.facemesh = await facemesh.load(this.config.face); - if (this.config.body.enabled && !this.models.posenet) this.models.posenet = await posenet.load(this.config.body); + if (this.config.body.enabled && !this.models.posenet) this.models.posenet = await posenet.load(this.config); if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config.hand); if (this.config.face.enabled && this.config.face.age.enabled && !this.models.age) this.models.age = await age.load(this.config); if (this.config.face.enabled && this.config.face.gender.enabled && !this.models.gender) this.models.gender = await gender.load(this.config); @@ -327,12 +327,12 @@ class Human { // run posenet this.analyze('Start Body:'); if (this.config.async) { - poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(process.tensor, this.config.body) : []; + poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(process.tensor, this.config) : []; if (this.perf.body) delete this.perf.body; } else { this.state = 'run:body'; timeStamp = now(); - poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(process.tensor, this.config.body) : []; + poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(process.tensor, this.config) : []; this.perf.body = Math.trunc(now() - timeStamp); } this.analyze('End Body:'); diff --git a/src/profile.js b/src/profile.js index 1dacbc89..f5cddce7 100644 --- a/src/profile.js +++ b/src/profile.js @@ -18,7 +18,8 @@ function profile(name, data) { if (largest.length > maxResults) largest.length = maxResults; const res = { newBytes: data.newBytes, newTensors: data.newTensors, peakBytes: data.peakBytes, numKernelOps: data.kernels.length, timeKernelOps: time, slowestKernelOps: slowest, largestKernelOps: largest }; profileData[name] = res; + // eslint-disable-next-line no-console + console.log('Human profiler', name, res); } exports.run = profile; -exports.data = profileData; diff --git a/wiki b/wiki index cb7e64e4..e73a55ab 160000 --- a/wiki +++ b/wiki @@ -1 +1 @@ -Subproject commit cb7e64e4ff87f5288d9a9e2b4250c33e74911c68 +Subproject commit e73a55ab96efd7d1672d4c71dcef27dd1bee9f1d