From 23aeb81b76e68b7fa5bd052020e3828b08812045 Mon Sep 17 00:00:00 2001 From: Vladimir Mandic Date: Wed, 14 Oct 2020 13:23:02 -0400 Subject: [PATCH] module parametrization and performance monitoring --- README.md | 25 ++++++++++- demo/demo-esm.js | 1 + src/config.js | 4 +- src/handpose/box.js | 6 +++ src/handpose/handdetector.js | 18 ++++---- src/handpose/handpose.js | 80 ++++++++++++------------------------ src/index.js | 15 ++++++- 7 files changed, 84 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index adab5513..857a3e5e 100644 --- a/README.md +++ b/README.md @@ -294,6 +294,18 @@ result = { } ``` +Additionally, `result` object includes internal performance data - total time spend and time per module (measured in ms): + +```js + result.performance = { + body, + hand, + face, + agegender, + total, + } +``` +
## Build @@ -321,7 +333,18 @@ Development dependencies are [eslint](https://github.com/eslint) used for code l Performance will vary depending on your hardware, but also on number of resolution of input video/image, enabled modules as well as their parameters -For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 50+ FPS, but drops to 5-10 FPS on a medium complex images if all modules are enabled +For example, on a desktop with a low-end nVidia GTX1050 it can perform multiple face detections at 60+ FPS, but drops to 10 FPS on a medium complex images if all modules are enabled + +Performance per module: + +- Enabled all: 10 FPS +- Face Detect: 80 FPS +- Face Geometry: 30 FPS (includes face detect) +- Face Iris: 25 FPS (includes face detect and face geometry) +- Age: 60 FPS (includes face detect) +- Gender: 60 FPS (includes face detect) +- Hand: 40 FPS +- Body: 50 FPS Library can also be used on mobile devices diff --git a/demo/demo-esm.js b/demo/demo-esm.js index 2fc930db..c57dc30c 100644 --- a/demo/demo-esm.js +++ b/demo/demo-esm.js @@ -203,6 +203,7 @@ async function runHumanDetect(input, canvas) { TFJS Version: ${human.tf.version_core} Memory: ${engine.state.numBytes.toLocaleString()} bytes ${engine.state.numDataBuffers.toLocaleString()} buffers ${engine.state.numTensors.toLocaleString()} tensors GPU Memory: used ${engine.backendInstance.numBytesInGPU.toLocaleString()} bytes free ${Math.floor(1024 * 1024 * engine.backendInstance.numMBBeforeWarning).toLocaleString()} bytes Result Object Size: Face: ${(JSON.stringify(result.face)).length.toLocaleString()} bytes Body: ${(JSON.stringify(result.body)).length.toLocaleString()} bytes Hand: ${(JSON.stringify(result.hand)).length.toLocaleString()} bytes + Performance: ${JSON.stringify(result.performance)} `; // rinse & repeate // if (input.readyState) setTimeout(() => runHumanDetect(), 1000); // slow loop for debugging purposes diff --git a/src/config.js b/src/config.js index 589eaaec..ff72d491 100644 --- a/src/config.js +++ b/src/config.js @@ -18,7 +18,8 @@ export default { iris: { enabled: true, modelPath: '../models/iris/model.json', - inputSize: 192, // fixed value + enlargeFactor: 2.3, // empiric tuning + inputSize: 64, // fixed value }, age: { enabled: true, @@ -47,6 +48,7 @@ export default { minConfidence: 0.5, iouThreshold: 0.3, scoreThreshold: 0.7, + enlargeFactor: 1.65, // empiric tuning maxHands: 2, detector: { anchors: '../models/handdetect/anchors.json', diff --git a/src/handpose/box.js b/src/handpose/box.js index 3450ca7b..c7b23e7e 100644 --- a/src/handpose/box.js +++ b/src/handpose/box.js @@ -7,6 +7,7 @@ function getBoxSize(box) { ]; } exports.getBoxSize = getBoxSize; + function getBoxCenter(box) { return [ box.startPoint[0] + (box.endPoint[0] - box.startPoint[0]) / 2, @@ -14,6 +15,7 @@ function getBoxCenter(box) { ]; } exports.getBoxCenter = getBoxCenter; + function cutBoxFromImageAndResize(box, image, cropSize) { const h = image.shape[1]; const w = image.shape[2]; @@ -24,6 +26,7 @@ function cutBoxFromImageAndResize(box, image, cropSize) { return tf.image.cropAndResize(image, boxes, [0], cropSize); } exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize; + function scaleBoxCoordinates(box, factor) { const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]]; const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]]; @@ -34,6 +37,7 @@ function scaleBoxCoordinates(box, factor) { return { startPoint, endPoint, palmLandmarks }; } exports.scaleBoxCoordinates = scaleBoxCoordinates; + function enlargeBox(box, factor = 1.5) { const center = getBoxCenter(box); const size = getBoxSize(box); @@ -43,6 +47,7 @@ function enlargeBox(box, factor = 1.5) { return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; } exports.enlargeBox = enlargeBox; + function squarifyBox(box) { const centers = getBoxCenter(box); const size = getBoxSize(box); @@ -53,6 +58,7 @@ function squarifyBox(box) { return { startPoint, endPoint, palmLandmarks: box.palmLandmarks }; } exports.squarifyBox = squarifyBox; + function shiftBox(box, shiftFactor) { const boxSize = [ box.endPoint[0] - box.startPoint[0], box.endPoint[1] - box.startPoint[1], diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js index c9f5f924..e2bc28e4 100644 --- a/src/handpose/handdetector.js +++ b/src/handpose/handdetector.js @@ -2,17 +2,14 @@ const tf = require('@tensorflow/tfjs'); const bounding = require('./box'); class HandDetector { - constructor(model, width, height, anchors, iouThreshold, scoreThreshold, maxHands) { + constructor(model, anchors, config) { this.model = model; - this.width = width; - this.height = height; - this.iouThreshold = iouThreshold; - this.scoreThreshold = scoreThreshold; - this.maxHands = maxHands; + this.width = config.inputSize; + this.height = config.inputSize; this.anchors = anchors.map((anchor) => [anchor.x_center, anchor.y_center]); this.anchorsTensor = tf.tensor2d(this.anchors); - this.inputSizeTensor = tf.tensor1d([width, height]); - this.doubleInputSizeTensor = tf.tensor1d([width * 2, height * 2]); + this.inputSizeTensor = tf.tensor1d([config.inputSize, config.inputSize]); + this.doubleInputSizeTensor = tf.tensor1d([config.inputSize * 2, config.inputSize * 2]); } normalizeBoxes(boxes) { @@ -73,9 +70,12 @@ class HandDetector { * * @param input The image to classify. */ - async estimateHandBounds(input) { + async estimateHandBounds(input, config) { const inputHeight = input.shape[1]; const inputWidth = input.shape[2]; + this.iouThreshold = config.iouThreshold; + this.scoreThreshold = config.scoreThreshold; + this.maxHands = config.maxHands; const image = tf.tidy(() => input.resizeBilinear([this.width, this.height]).div(255)); const predictions = await this.getBoundingBoxes(image); image.dispose(); diff --git a/src/handpose/handpose.js b/src/handpose/handpose.js index 19acd9ce..58136fac 100644 --- a/src/handpose/handpose.js +++ b/src/handpose/handpose.js @@ -3,71 +3,22 @@ const hand = require('./handdetector'); const keypoints = require('./keypoints'); const pipe = require('./pipeline'); -// Load the bounding box detector model. -async function loadHandDetectorModel(url) { - return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') }); -} - -// Load the mesh detector model. -async function loadHandPoseModel(url) { - return tf.loadGraphModel(url, { fromTFHub: url.includes('tfhub.dev') }); -} - -// In single shot detector pipelines, the output space is discretized into a set -// of bounding boxes, each of which is assigned a score during prediction. The -// anchors define the coordinates of these boxes. -async function loadAnchors(url) { - if (tf.env().features.IS_NODE) { - // eslint-disable-next-line global-require - const fs = require('fs'); - const data = await fs.readFileSync(url.replace('file://', '')); - return JSON.parse(data); - } - return tf.util.fetch(url).then((d) => d.json()); -} - -/** - * Load handpose. - * - * @param config A configuration object with the following properties: - * - `maxContinuousChecks` How many frames to go without running the bounding - * box detector. Defaults to infinity. Set to a lower value if you want a safety - * net in case the mesh detector produces consistently flawed predictions. - * - `detectionConfidence` Threshold for discarding a prediction. Defaults to - * 0.8. - * - `iouThreshold` A float representing the threshold for deciding whether - * boxes overlap too much in non-maximum suppression. Must be between [0, 1]. - * Defaults to 0.3. - * - `scoreThreshold` A threshold for deciding when to remove boxes based - * on score in non-maximum suppression. Defaults to 0.75. - */ -async function load(config) { - const [ANCHORS, handDetectorModel, handPoseModel] = await Promise.all([ - loadAnchors(config.detector.anchors), - loadHandDetectorModel(config.detector.modelPath), - loadHandPoseModel(config.skeleton.modelPath), - ]); - const detector = new hand.HandDetector(handDetectorModel, config.inputSize, config.inputSize, ANCHORS, config.iouThreshold, config.scoreThreshold, config.maxHands); - const pipeline = new pipe.HandPipeline(detector, handPoseModel, config.inputSize, config.inputSize, config.skipFrames, config.minConfidence, config.maxHands); - // eslint-disable-next-line no-use-before-define - const handpose = new HandPose(pipeline); - return handpose; -} -exports.load = load; - class HandPose { constructor(pipeline) { this.pipeline = pipeline; } async estimateHands(input, config) { + this.maxContinuousChecks = config.skipFrames; + this.detectionConfidence = config.minConfidence; + this.maxHands = config.maxHands; const image = tf.tidy(() => { if (!(input instanceof tf.Tensor)) { input = tf.browser.fromPixels(input); } return input.toFloat().expandDims(0); }); - const predictions = await this.pipeline.estimateHand(image, config); + const predictions = await this.pipeline.estimateHands(image, config); image.dispose(); const hands = []; if (!predictions) return hands; @@ -88,3 +39,26 @@ class HandPose { } } exports.HandPose = HandPose; + +async function loadAnchors(url) { + if (tf.env().features.IS_NODE) { + // eslint-disable-next-line global-require + const fs = require('fs'); + const data = await fs.readFileSync(url.replace('file://', '')); + return JSON.parse(data); + } + return tf.util.fetch(url).then((d) => d.json()); +} + +async function load(config) { + const [anchors, handDetectorModel, handPoseModel] = await Promise.all([ + loadAnchors(config.detector.anchors), + tf.loadGraphModel(config.detector.modelPath, { fromTFHub: config.detector.modelPath.includes('tfhub.dev') }), + tf.loadGraphModel(config.skeleton.modelPath, { fromTFHub: config.skeleton.modelPath.includes('tfhub.dev') }), + ]); + const detector = new hand.HandDetector(handDetectorModel, anchors, config); + const pipeline = new pipe.HandPipeline(detector, handPoseModel, config); + const handpose = new HandPose(pipeline); + return handpose; +} +exports.load = load; diff --git a/src/index.js b/src/index.js index f3974fc5..faf36337 100644 --- a/src/index.js +++ b/src/index.js @@ -50,21 +50,32 @@ async function detect(input, userConfig) { tf.env().set('WEBGL_PACK_DEPTHWISECONV', true); } + const perf = {}; + let timeStamp; + // run posenet + timeStamp = performance.now(); let poseRes = []; if (config.body.enabled) poseRes = await models.posenet.estimatePoses(input, config.body); + perf.body = Math.trunc(performance.now() - timeStamp); // run handpose + timeStamp = performance.now(); let handRes = []; if (config.hand.enabled) handRes = await models.handpose.estimateHands(input, config.hand); + perf.hand = Math.trunc(performance.now() - timeStamp); // run facemesh, includes blazeface and iris const faceRes = []; if (config.face.enabled) { + timeStamp = performance.now(); const faces = await models.facemesh.estimateFaces(input, config.face); + perf.face = Math.trunc(performance.now() - timeStamp); for (const face of faces) { // run ssr-net age & gender, inherits face from blazeface + timeStamp = performance.now(); const ssrdata = (config.face.age.enabled || config.face.gender.enabled) ? await ssrnet.predict(face.image, config) : {}; + perf.agegender = Math.trunc(performance.now() - timeStamp); face.image.dispose(); // iris: array[ bottom, left, top, right, center ] const iris = (face.annotations.leftEyeIris && face.annotations.rightEyeIris) @@ -86,7 +97,9 @@ async function detect(input, userConfig) { tf.engine().endScope(); // combine results - resolve({ face: faceRes, body: poseRes, hand: handRes }); + perf.total = Object.values(perf).reduce((a, b) => a + b); + console.log('total', perf.total); + resolve({ face: faceRes, body: poseRes, hand: handRes, performance: perf }); }); }