From d5b620dbe8e845c3f3856deb65f915091801ded7 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Thu, 11 Mar 2021 10:26:14 -0500 Subject: [PATCH] autodetect inputSizes --- config.js | 12 +----------- demo/browser.js | 17 ++++++----------- demo/node.js | 6 +++--- package.json | 2 +- src/age/age.ts | 12 +----------- src/emotion/emotion.ts | 12 +----------- src/gender/gender.ts | 2 +- src/handpose/handdetector.ts | 6 ++++-- src/handpose/handpose.ts | 4 ++-- src/human.ts | 14 +++++++------- 10 files changed, 27 insertions(+), 60 deletions(-) diff --git a/config.js b/config.js index 855b33bf..f3b6c57a 100644 --- a/config.js +++ b/config.js @@ -67,7 +67,6 @@ export default { // (note: module is not loaded until it is required) detector: { modelPath: '../models/blazeface-back.json', - inputSize: 256, // fixed value rotation: true, // use best-guess rotated face image or just box with rotation as-is // false means higher performance, but incorrect mesh mapping if face angle is above 20 degrees // this parameter is not valid in nodejs @@ -91,19 +90,16 @@ export default { mesh: { enabled: true, modelPath: '../models/facemesh.json', - inputSize: 192, // fixed value }, iris: { enabled: true, modelPath: '../models/iris.json', - inputSize: 64, // fixed value }, age: { enabled: true, - modelPath: '../models/age-ssrnet-imdb.json', - inputSize: 64, // fixed value + modelPath: '../models/age.json', skipFrames: 31, // how many frames to go without re-running the detector // only used for video inputs }, @@ -112,14 +108,12 @@ export default { enabled: true, minConfidence: 0.1, // threshold for discarding a prediction modelPath: '../models/gender.json', // can be 'gender' or 'gender-ssrnet-imdb' - inputSize: 64, // fixed value skipFrames: 32, // how many frames to go without re-running the detector // only used for video inputs }, emotion: { enabled: true, - inputSize: 64, // fixed value minConfidence: 0.1, // threshold for discarding a prediction skipFrames: 33, // how many frames to go without re-running the detector modelPath: '../models/emotion.json', @@ -127,7 +121,6 @@ export default { embedding: { enabled: false, - inputSize: 112, // fixed value modelPath: '../models/mobilefacenet.json', }, }, @@ -135,7 +128,6 @@ export default { body: { enabled: true, modelPath: '../models/posenet.json', // can be 'posenet' or 'blazepose' - inputSize: 257, // fixed value, 257 for posenet and 256 for blazepose maxDetections: 10, // maximum number of people detected in the input // should be set to the minimum number for performance // only valid for posenet as blazepose only detects single pose @@ -144,14 +136,12 @@ export default { // only valid for posenet as blazepose only detects single pose nmsRadius: 20, // radius for deciding points are too close in non-maximum suppression // only valid for posenet as blazepose only detects single pose - modelType: 'posenet-mobilenet', // can be 'posenet-mobilenet', 'posenet-resnet', 'blazepose' }, hand: { enabled: true, rotation: false, // use best-guess rotated hand image or just box with rotation as-is // false means higher performance, but incorrect finger mapping if hand is inverted - inputSize: 256, // fixed value skipFrames: 12, // how many frames to go without re-running the hand bounding box detector // only used for video inputs // e.g., if model is running st 25 FPS, we can re-use existing bounding diff --git a/demo/browser.js b/demo/browser.js index 045372ed..e4294947 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -3,20 +3,18 @@ import Human from '../src/human'; import Menu from './menu.js'; import GLBench from './gl-bench.js'; -const userConfig = { backend: 'webgl' }; // add any user configuration overrides +// const userConfig = { backend: 'webgl' }; // add any user configuration overrides -/* const userConfig = { - backend: 'wasm', + backend: 'webgl', async: false, - warmup: 'none', + warmup: 'face', videoOptimized: false, - face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: false } }, + face: { enabled: true, mesh: { enabled: false }, iris: { enabled: false }, age: { enabled: false }, gender: { enabled: false }, emotion: { enabled: false }, embedding: { enabled: true } }, hand: { enabled: false }, gesture: { enabled: false }, - body: { enabled: false, modelType: 'blazepose', modelPath: '../models/blazepose.json' }, + body: { enabled: false, modelPath: '../models/blazepose.json' }, }; -*/ const human = new Human(userConfig); @@ -40,7 +38,7 @@ const ui = { detectFPS: [], // internal, holds fps values for detection performance drawFPS: [], // internal, holds fps values for draw performance buffered: false, // experimental, should output be buffered between frames - drawWarmup: false, // debug only, should warmup image processing be displayed on startup + drawWarmup: true, // debug only, should warmup image processing be displayed on startup drawThread: null, // internl, perform draw operations in a separate thread detectThread: null, // internl, perform detect operations in a separate thread framesDraw: 0, // internal, statistics on frames drawn @@ -104,9 +102,6 @@ async function drawResults(input) { if (ui.drawFPS.length > ui.maxFPSframes) ui.drawFPS.shift(); lastDraw = performance.now(); - // enable for continous performance monitoring - // console.log(result.performance); - // draw fps chart await menu.process.updateChart('FPS', ui.detectFPS); diff --git a/demo/node.js b/demo/node.js index 7d0a83ce..afe00a53 100644 --- a/demo/node.js +++ b/demo/node.js @@ -18,12 +18,12 @@ const myConfig = { detector: { modelPath: 'file://models/blazeface-back.json', enabled: true }, mesh: { modelPath: 'file://models/facemesh.json', enabled: true }, iris: { modelPath: 'file://models/iris.json', enabled: true }, - age: { modelPath: 'file://models/age-ssrnet-imdb.json', enabled: true }, + age: { modelPath: 'file://models/age.json', enabled: true }, gender: { modelPath: 'file://models/gender.json', enabled: true }, emotion: { modelPath: 'file://models/emotion.json', enabled: true }, }, - // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', inputSize: 256, enabled: true }, - body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', inputSize: 257, enabled: true }, + // body: { modelPath: 'file://models/blazepose.json', modelType: 'blazepose', enabled: true }, + body: { modelPath: 'file://models/posenet.json', modelType: 'posenet', enabled: true }, hand: { enabled: true, detector: { modelPath: 'file://models/handdetect.json' }, diff --git a/package.json b/package.json index baa9a624..d7059415 100644 --- a/package.json +++ b/package.json @@ -68,7 +68,7 @@ "eslint-plugin-node": "^11.1.0", "eslint-plugin-promise": "^4.3.1", "rimraf": "^3.0.2", - "simple-git": "^2.36.1", + "simple-git": "^2.36.2", "tslib": "^2.1.0", "typescript": "^4.2.3" } diff --git a/src/age/age.ts b/src/age/age.ts index b869e197..404b4fc4 100644 --- a/src/age/age.ts +++ b/src/age/age.ts @@ -23,17 +23,7 @@ export async function predict(image, config) { if (config.videoOptimized) skipped = 0; else skipped = Number.MAX_SAFE_INTEGER; return new Promise(async (resolve) => { - /* - const zoom = [0, 0]; // 0..1 meaning 0%..100% - const box = [[ - (image.shape[1] * zoom[0]) / image.shape[1], - (image.shape[2] * zoom[1]) / image.shape[2], - (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1], - (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], - ]]; - const resize = tf.image.cropAndResize(image, box, [0], [config.face.age.inputSize, config.face.age.inputSize]); - */ - const resize = tf.image.resizeBilinear(image, [config.face.age.inputSize, config.face.age.inputSize], false); + const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); const enhance = tf.mul(resize, [255.0]); tf.dispose(resize); diff --git a/src/emotion/emotion.ts b/src/emotion/emotion.ts index ea1a3b62..5c0744da 100644 --- a/src/emotion/emotion.ts +++ b/src/emotion/emotion.ts @@ -27,17 +27,7 @@ export async function predict(image, config) { if (config.videoOptimized) skipped = 0; else skipped = Number.MAX_SAFE_INTEGER; return new Promise(async (resolve) => { - /* - const zoom = [0, 0]; // 0..1 meaning 0%..100% - const box = [[ - (image.shape[1] * zoom[0]) / image.shape[1], - (image.shape[2] * zoom[1]) / image.shape[2], - (image.shape[1] - (image.shape[1] * zoom[0])) / image.shape[1], - (image.shape[2] - (image.shape[2] * zoom[1])) / image.shape[2], - ]]; - const resize = tf.image.cropAndResize(image, box, [0], [config.face.emotion.inputSize, config.face.emotion.inputSize]); - */ - const resize = tf.image.resizeBilinear(image, [config.face.emotion.inputSize, config.face.emotion.inputSize], false); + const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); const [red, green, blue] = tf.split(resize, 3, 3); resize.dispose(); // weighted rgb to grayscale: https://www.mathworks.com/help/matlab/ref/rgb2gray.html diff --git a/src/gender/gender.ts b/src/gender/gender.ts index 88a73986..72733477 100644 --- a/src/gender/gender.ts +++ b/src/gender/gender.ts @@ -28,7 +28,7 @@ export async function predict(image, config) { if (config.videoOptimized) skipped = 0; else skipped = Number.MAX_SAFE_INTEGER; return new Promise(async (resolve) => { - const resize = tf.image.resizeBilinear(image, [config.face.gender.inputSize, config.face.gender.inputSize], false); + const resize = tf.image.resizeBilinear(image, [model.inputs[0].shape[2], model.inputs[0].shape[1]], false); let enhance; if (alternative) { enhance = tf.tidy(() => { diff --git a/src/handpose/handdetector.ts b/src/handpose/handdetector.ts index 27d50a63..2d861b2d 100644 --- a/src/handpose/handdetector.ts +++ b/src/handpose/handdetector.ts @@ -5,6 +5,7 @@ export class HandDetector { model: any; anchors: any; anchorsTensor: any; + inputSize: number; inputSizeTensor: any; doubleInputSizeTensor: any; @@ -12,6 +13,7 @@ export class HandDetector { this.model = model; this.anchors = anchorsAnnotated.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchorsTensor = tf.tensor2d(this.anchors); + this.inputSize = inputSize; this.inputSizeTensor = tf.tensor1d([inputSize, inputSize]); this.doubleInputSizeTensor = tf.tensor1d([inputSize * 2, inputSize * 2]); } @@ -67,7 +69,7 @@ export class HandDetector { async estimateHandBounds(input, config) { const inputHeight = input.shape[1]; const inputWidth = input.shape[2]; - const image = tf.tidy(() => input.resizeBilinear([config.hand.inputSize, config.hand.inputSize]).div(127.5).sub(1)); + const image = tf.tidy(() => input.resizeBilinear([this.inputSize, this.inputSize]).div(127.5).sub(1)); const predictions = await this.getBoxes(image, config); image.dispose(); const hands: Array<{}> = []; @@ -79,7 +81,7 @@ export class HandDetector { const palmLandmarks = prediction.palmLandmarks.arraySync(); prediction.box.dispose(); prediction.palmLandmarks.dispose(); - hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / config.hand.inputSize, inputHeight / config.hand.inputSize])); + hands.push(box.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks, confidence: prediction.confidence }, [inputWidth / this.inputSize, inputHeight / this.inputSize])); } return hands; } diff --git a/src/handpose/handpose.ts b/src/handpose/handpose.ts index 4595798d..80b6f758 100644 --- a/src/handpose/handpose.ts +++ b/src/handpose/handpose.ts @@ -54,8 +54,8 @@ export async function load(config) { config.hand.enabled ? tf.loadGraphModel(config.hand.detector.modelPath, { fromTFHub: config.hand.detector.modelPath.includes('tfhub.dev') }) : null, config.hand.landmarks ? tf.loadGraphModel(config.hand.skeleton.modelPath, { fromTFHub: config.hand.skeleton.modelPath.includes('tfhub.dev') }) : null, ]); - const handDetector = new handdetector.HandDetector(handDetectorModel, config.hand.inputSize, anchors.anchors); - const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, config.hand.inputSize); + const handDetector = new handdetector.HandDetector(handDetectorModel, handDetectorModel?.inputs[0].shape[2], anchors.anchors); + const handPipeline = new handpipeline.HandPipeline(handDetector, handPoseModel, handPoseModel?.inputs[0].shape[2]); const handPose = new HandPose(handPipeline); if (config.hand.enabled && config.debug) log(`load model: ${config.hand.detector.modelPath.match(/\/(.*)\./)[1]}`); if (config.hand.landmarks && config.debug) log(`load model: ${config.hand.skeleton.modelPath.match(/\/(.*)\./)[1]}`); diff --git a/src/human.ts b/src/human.ts index eecad033..806cb217 100644 --- a/src/human.ts +++ b/src/human.ts @@ -109,7 +109,7 @@ class Human { age, gender, emotion, - body: this.config.body.modelType.startsWith('posenet') ? posenet : blazepose, + body: this.config.body.modelPath.includes('posenet') ? posenet : blazepose, hand: handpose, }; // include platform info @@ -186,8 +186,8 @@ class Human { this.models.emotion || ((this.config.face.enabled && this.config.face.emotion.enabled) ? emotion.load(this.config) : null), this.models.embedding || ((this.config.face.enabled && this.config.face.embedding.enabled) ? embedding.load(this.config) : null), this.models.handpose || (this.config.hand.enabled ? handpose.load(this.config) : null), - this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('posenet') ? posenet.load(this.config) : null), - this.models.posenet || (this.config.body.enabled && this.config.body.modelType.startsWith('blazepose') ? blazepose.load(this.config) : null), + this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('posenet') ? posenet.load(this.config) : null), + this.models.posenet || (this.config.body.enabled && this.config.body.modelPath.includes('blazepose') ? blazepose.load(this.config) : null), ]); } else { if (this.config.face.enabled && !this.models.face) this.models.face = await facemesh.load(this.config); @@ -196,8 +196,8 @@ class Human { if (this.config.face.enabled && this.config.face.emotion.enabled && !this.models.emotion) this.models.emotion = await emotion.load(this.config); if (this.config.face.enabled && this.config.face.embedding.enabled && !this.models.embedding) this.models.embedding = await embedding.load(this.config); if (this.config.hand.enabled && !this.models.handpose) this.models.handpose = await handpose.load(this.config); - if (this.config.body.enabled && !this.models.posenet && this.config.body.modelType.startsWith('posenet')) this.models.posenet = await posenet.load(this.config); - if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelType.startsWith('blazepose')) this.models.blazepose = await blazepose.load(this.config); + if (this.config.body.enabled && !this.models.posenet && this.config.body.modelPath.includes('posenet')) this.models.posenet = await posenet.load(this.config); + if (this.config.body.enabled && !this.models.blazepose && this.config.body.modelPath.includes('blazepose')) this.models.blazepose = await blazepose.load(this.config); } if (this.#firstRun) { @@ -477,13 +477,13 @@ class Human { // run body: can be posenet or blazepose this.#analyze('Start Body:'); if (this.config.async) { - if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : []; + if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? this.models.posenet?.estimatePoses(process.tensor, this.config) : []; else bodyRes = this.config.body.enabled ? blazepose.predict(process.tensor, this.config) : []; if (this.#perf.body) delete this.#perf.body; } else { this.state = 'run:body'; timeStamp = now(); - if (this.config.body.modelType.startsWith('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : []; + if (this.config.body.modelPath.includes('posenet')) bodyRes = this.config.body.enabled ? await this.models.posenet?.estimatePoses(process.tensor, this.config) : []; else bodyRes = this.config.body.enabled ? await blazepose.predict(process.tensor, this.config) : []; this.#perf.body = Math.trunc(now() - timeStamp); }