diff --git a/config.js b/config.js index 673a16a5..f4ec9f23 100644 --- a/config.js +++ b/config.js @@ -4,6 +4,9 @@ export default { backend: 'webgl', // select tfjs backend to use console: true, // enable debugging output to console + async: false, // execute enabled models in parallel + // this disables per-model performance data but slightly increases performance + // cannot be used if profiling is enabled profile: false, // enable tfjs profiling // this has significant performance impact, only enable for debugging purposes // currently only implemented for age,gender,emotion models diff --git a/demo/browser.js b/demo/browser.js index 36f7f1cf..9c6fa699 100644 --- a/demo/browser.js +++ b/demo/browser.js @@ -90,14 +90,18 @@ const log = (...msg) => { // draws processed results and starts processing of a next frame function drawResults(input, result, canvas) { - // update fps + // update fps data fps.push(1000 / (performance.now() - timeStamp)); if (fps.length > ui.maxFrames) fps.shift(); - menu.updateChart('FPS', fps); + + // enable for continous performance monitoring + // console.log(result.performance); // eslint-disable-next-line no-use-before-define - requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop + requestAnimationFrame(() => runHumanDetect(input, canvas)); // immediate loop before we even draw results + // draw fps chart + menu.updateChart('FPS', fps); // draw image from video const ctx = canvas.getContext('2d'); ctx.fillStyle = ui.baseBackground; diff --git a/src/emotion/emotion.js b/src/emotion/emotion.js index f43f5e55..13112e93 100644 --- a/src/emotion/emotion.js +++ b/src/emotion/emotion.js @@ -37,11 +37,11 @@ async function predict(image, config) { let data; if (!config.profile) { const emotionT = await models.emotion.predict(grayscale); - data = await emotionT.data(); + data = emotionT.dataSync(); tf.dispose(emotionT); } else { const profileData = await tf.profile(() => models.emotion.predict(grayscale)); - data = await profileData.result.data(); + data = profileData.result.dataSync(); profileData.result.dispose(); profile.run('emotion', profileData); } diff --git a/src/handpose/box.js b/src/handpose/box.js index c7b23e7e..072a55ca 100644 --- a/src/handpose/box.js +++ b/src/handpose/box.js @@ -30,10 +30,7 @@ exports.cutBoxFromImageAndResize = cutBoxFromImageAndResize; function scaleBoxCoordinates(box, factor) { const startPoint = [box.startPoint[0] * factor[0], box.startPoint[1] * factor[1]]; const endPoint = [box.endPoint[0] * factor[0], box.endPoint[1] * factor[1]]; - const palmLandmarks = box.palmLandmarks.map((coord) => { - const scaledCoord = [coord[0] * factor[0], coord[1] * factor[1]]; - return scaledCoord; - }); + const palmLandmarks = box.palmLandmarks.map((coord) => [coord[0] * factor[0], coord[1] * factor[1]]); return { startPoint, endPoint, palmLandmarks }; } exports.scaleBoxCoordinates = scaleBoxCoordinates; diff --git a/src/handpose/handdetector.js b/src/handpose/handdetector.js index 4642501f..771931c4 100644 --- a/src/handpose/handdetector.js +++ b/src/handpose/handdetector.js @@ -40,8 +40,7 @@ class HandDetector { const rawBoxes = tf.slice(prediction, [0, 1], [-1, 4]); const boxes = this.normalizeBoxes(rawBoxes); const boxesWithHandsTensor = await tf.image.nonMaxSuppressionAsync(boxes, scores, this.maxHands, this.iouThreshold, this.scoreThreshold); - const boxesWithHands = await boxesWithHandsTensor.array(); - const toDispose = [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores]; + const boxesWithHands = boxesWithHandsTensor.arraySync(); const detectedHands = tf.tidy(() => { const detectedBoxes = []; for (const i in boxesWithHands) { @@ -53,7 +52,7 @@ class HandDetector { } return detectedBoxes; }); - toDispose.forEach((tensor) => tensor.dispose()); + [batchedPrediction, boxesWithHandsTensor, prediction, boxes, rawBoxes, scores].forEach((tensor) => tensor.dispose()); return detectedHands; } @@ -64,28 +63,24 @@ class HandDetector { * @param input The image to classify. */ async estimateHandBounds(input, config) { - // const inputHeight = input.shape[2]; - // const inputWidth = input.shape[1]; this.iouThreshold = config.iouThreshold; this.scoreThreshold = config.scoreThreshold; this.maxHands = config.maxHands; const resized = input.resizeBilinear([this.width, this.height]); - const divided = resized.div(255); - const normalized = divided.sub(0.5); - const image = normalized.mul(2.0); + const divided = resized.mul([1 / 127.5]); + const image = divided.sub(0.5); resized.dispose(); divided.dispose(); - normalized.dispose(); const predictions = await this.getBoundingBoxes(image); image.dispose(); if (!predictions || (predictions.length === 0)) return null; const hands = []; for (const i in predictions) { const prediction = predictions[i]; - const boundingBoxes = await prediction.boxes.array(); - const startPoint = boundingBoxes[0].slice(0, 2); - const endPoint = boundingBoxes[0].slice(2, 4); - const palmLandmarks = await prediction.palmLandmarks.array(); + const boundingBoxes = prediction.boxes.dataSync(); + const startPoint = [boundingBoxes[0], boundingBoxes[1]]; + const endPoint = [boundingBoxes[2], boundingBoxes[3]]; + const palmLandmarks = prediction.palmLandmarks.arraySync(); prediction.boxes.dispose(); prediction.palmLandmarks.dispose(); hands.push(bounding.scaleBoxCoordinates({ startPoint, endPoint, palmLandmarks }, [input.shape[2] / this.width, input.shape[1] / this.height])); diff --git a/src/human.js b/src/human.js index adbbcac3..f14d094b 100644 --- a/src/human.js +++ b/src/human.js @@ -61,10 +61,13 @@ class Human { this.version = app.version; this.defaults = defaults; this.config = defaults; - this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas() : null; + this.fx = null; this.state = 'idle'; this.numTensors = 0; this.analyzeMemoryLeaks = false; + // internal temp canvases + this.inCanvas = null; + this.outCanvas = null; // object that contains all initialized models this.models = { facemesh: null, @@ -160,56 +163,62 @@ class Human { } tfImage(input) { - // let imageData; - let filtered; - const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0)); - const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0)); - let targetWidth = originalWidth; - let targetHeight = originalHeight; - if (this.fx && this.config.filter.enabled && !(input instanceof tf.Tensor)) { - if (this.config.filter.width > 0) targetWidth = this.config.filter.width; - else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight); - if (this.config.filter.height > 0) targetHeight = this.config.filter.height; - else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth); - const offscreenCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas'); - if (offscreenCanvas.width !== targetWidth) offscreenCanvas.width = targetWidth; - if (offscreenCanvas.height !== targetHeight) offscreenCanvas.height = targetHeight; - const ctx = offscreenCanvas.getContext('2d'); - if (input instanceof ImageData) ctx.putImageData(input, 0, 0); - else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, offscreenCanvas.width, offscreenCanvas.height); - this.fx.reset(); - this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled - if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast); - if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness); - if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur); - if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation); - if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue); - if (this.config.filter.negative) this.fx.addFilter('negative'); - if (this.config.filter.sepia) this.fx.addFilter('sepia'); - if (this.config.filter.vintage) this.fx.addFilter('brownie'); - if (this.config.filter.sepia) this.fx.addFilter('sepia'); - if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome'); - if (this.config.filter.technicolor) this.fx.addFilter('technicolor'); - if (this.config.filter.polaroid) this.fx.addFilter('polaroid'); - if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate); - filtered = this.fx.apply(offscreenCanvas); - } let tensor; if (input instanceof tf.Tensor) { tensor = tf.clone(input); } else { - const canvas = filtered || input; + const originalWidth = input.naturalWidth || input.videoWidth || input.width || (input.shape && (input.shape[1] > 0)); + const originalHeight = input.naturalHeight || input.videoHeight || input.height || (input.shape && (input.shape[2] > 0)); + let targetWidth = originalWidth; + let targetHeight = originalHeight; + if (this.config.filter.width > 0) targetWidth = this.config.filter.width; + else if (this.config.filter.height > 0) targetWidth = originalWidth * (this.config.filter.height / originalHeight); + if (this.config.filter.height > 0) targetHeight = this.config.filter.height; + else if (this.config.filter.width > 0) targetHeight = originalHeight * (this.config.filter.width / originalWidth); + if (!this.inCanvas || (this.inCanvas.width !== originalWidth) || (this.inCanvas.height !== originalHeight)) { + this.inCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas'); + if (this.inCanvas.width !== targetWidth) this.inCanvas.width = targetWidth; + if (this.inCanvas.height !== targetHeight) this.inCanvas.height = targetHeight; + } + const ctx = this.inCanvas.getContext('2d'); + if (input instanceof ImageData) ctx.putImageData(input, 0, 0); + else ctx.drawImage(input, 0, 0, originalWidth, originalHeight, 0, 0, this.inCanvas.width, this.inCanvas.height); + if (this.config.filter.enabled) { + if (!this.outCanvas || (this.inCanvas.width !== this.outCanvas.width) || (this.inCanvas.height !== this.outCanvas.height)) { + this.outCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(this.inCanvas.width, this.inCanvas.height) : document.createElement('canvas'); + if (this.outCanvas.width !== this.inCanvas.width) this.outCanvas.width = this.inCanvas.width; + if (this.outCanvas.height !== this.inCanvas.height) this.outCanvas.height = this.inCanvas.height; + } + if (!this.fx) this.fx = (tf.ENV.flags.IS_BROWSER && (typeof document !== 'undefined')) ? new fxImage.Canvas({ canvas: this.outCanvas }) : null; + this.fx.reset(); + this.fx.addFilter('brightness', this.config.filter.brightness); // must have at least one filter enabled + if (this.config.filter.contrast !== 0) this.fx.addFilter('contrast', this.config.filter.contrast); + if (this.config.filter.sharpness !== 0) this.fx.addFilter('sharpen', this.config.filter.sharpness); + if (this.config.filter.blur !== 0) this.fx.addFilter('blur', this.config.filter.blur); + if (this.config.filter.saturation !== 0) this.fx.addFilter('saturation', this.config.filter.saturation); + if (this.config.filter.hue !== 0) this.fx.addFilter('hue', this.config.filter.hue); + if (this.config.filter.negative) this.fx.addFilter('negative'); + if (this.config.filter.sepia) this.fx.addFilter('sepia'); + if (this.config.filter.vintage) this.fx.addFilter('brownie'); + if (this.config.filter.sepia) this.fx.addFilter('sepia'); + if (this.config.filter.kodachrome) this.fx.addFilter('kodachrome'); + if (this.config.filter.technicolor) this.fx.addFilter('technicolor'); + if (this.config.filter.polaroid) this.fx.addFilter('polaroid'); + if (this.config.filter.pixelate !== 0) this.fx.addFilter('pixelate', this.config.filter.pixelate); + this.fx.apply(this.inCanvas); + } + if (!this.outCanvas) this.outCanvas = this.inCanvas; let pixels; - if ((this.config.backend === 'webgl') || (canvas instanceof ImageData)) { + if ((this.config.backend === 'webgl') || (this.outCanvas instanceof ImageData)) { // tf kernel-optimized method to get imagedata, also if input is imagedata, just use it - pixels = tf.browser.fromPixels(canvas); + pixels = tf.browser.fromPixels(this.outCanvas); } else { // cpu and wasm kernel does not implement efficient fromPixels method nor we can use canvas as-is, so we do a silly one more canvas const tempCanvas = (typeof OffscreenCanvas !== 'undefined') ? new OffscreenCanvas(targetWidth, targetHeight) : document.createElement('canvas'); tempCanvas.width = targetWidth; tempCanvas.height = targetHeight; const tempCtx = tempCanvas.getContext('2d'); - tempCtx.drawImage(canvas, 0, 0); + tempCtx.drawImage(this.outCanvas, 0, 0); const data = tempCtx.getImageData(0, 0, targetWidth, targetHeight); pixels = tf.browser.fromPixels(data); } @@ -218,7 +227,7 @@ class Human { pixels.dispose(); casted.dispose(); } - return { tensor, canvas: this.config.filter.return ? filtered : null }; + return { tensor, canvas: this.config.filter.return ? this.outCanvas : null }; } async detect(input, userConfig = {}) { @@ -239,6 +248,11 @@ class Human { // eslint-disable-next-line no-async-promise-executor return new Promise(async (resolve) => { + let poseRes; + let handRes; + let ssrRes; + let emotionRes; + const timeStart = now(); // configure backend @@ -270,20 +284,30 @@ class Human { const imageTensor = image.tensor; // run posenet - this.state = 'run:body'; - timeStamp = now(); - this.analyze('Start PoseNet'); - const poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : []; - this.analyze('End PoseNet:'); - perf.body = Math.trunc(now() - timeStamp); + if (this.config.async) { + poseRes = this.config.body.enabled ? this.models.posenet.estimatePoses(imageTensor, this.config.body) : []; + } else { + this.state = 'run:body'; + timeStamp = now(); + this.analyze('Start PoseNet'); + poseRes = this.config.body.enabled ? await this.models.posenet.estimatePoses(imageTensor, this.config.body) : []; + this.analyze('End PoseNet:'); + perf.body = Math.trunc(now() - timeStamp); + } // run handpose - this.state = 'run:hand'; - timeStamp = now(); - this.analyze('Start HandPose:'); - const handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : []; - this.analyze('End HandPose:'); - perf.hand = Math.trunc(now() - timeStamp); + if (this.config.async) { + handRes = this.config.hand.enabled ? this.models.handpose.estimateHands(imageTensor, this.config.hand) : []; + } else { + this.state = 'run:hand'; + timeStamp = now(); + this.analyze('Start HandPose:'); + handRes = this.config.hand.enabled ? await this.models.handpose.estimateHands(imageTensor, this.config.hand) : []; + this.analyze('End HandPose:'); + perf.hand = Math.trunc(now() - timeStamp); + } + + if (this.config.async) [poseRes, handRes] = await Promise.all([poseRes, handRes]); // run facemesh, includes blazeface and iris const faceRes = []; @@ -302,12 +326,12 @@ class Human { // run ssr-net age & gender, inherits face from blazeface this.state = 'run:agegender'; timeStamp = now(); - const ssrData = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {}; + ssrRes = (this.config.face.age.enabled || this.config.face.gender.enabled) ? await ssrnet.predict(face.image, this.config) : {}; perf.agegender = Math.trunc(now() - timeStamp); // run emotion, inherits face from blazeface this.state = 'run:emotion'; timeStamp = now(); - const emotionData = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {}; + emotionRes = this.config.face.emotion.enabled ? await emotion.predict(face.image, this.config) : {}; perf.emotion = Math.trunc(now() - timeStamp); // dont need face anymore @@ -322,10 +346,10 @@ class Human { box: face.box, mesh: face.mesh, annotations: face.annotations, - age: ssrData.age, - gender: ssrData.gender, - agConfidence: ssrData.confidence, - emotion: emotionData, + age: ssrRes.age, + gender: ssrRes.gender, + agConfidence: ssrRes.confidence, + emotion: emotionRes, iris: (iris !== 0) ? Math.trunc(100 * 11.7 /* human iris size in mm */ / iris) / 100 : 0, }); this.analyze('End FaceMesh:');